|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 60210, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.25e-06, |
|
"loss": 7.7769, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.5e-06, |
|
"loss": 4.9602, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.75e-06, |
|
"loss": 4.1666, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 5e-06, |
|
"loss": 3.688, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 6.25e-06, |
|
"loss": 3.4594, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.5e-06, |
|
"loss": 3.2406, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.75e-06, |
|
"loss": 3.1113, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1e-05, |
|
"loss": 2.9628, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.125e-05, |
|
"loss": 2.8324, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.25e-05, |
|
"loss": 2.7406, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.3750000000000002e-05, |
|
"loss": 2.6505, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.5e-05, |
|
"loss": 2.5864, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 2.5592, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.75e-05, |
|
"loss": 2.513, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 2.4164, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2e-05, |
|
"loss": 2.4196, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.125e-05, |
|
"loss": 2.387, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.25e-05, |
|
"loss": 2.3328, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.375e-05, |
|
"loss": 2.307, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.2919, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.625e-05, |
|
"loss": 2.2488, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 2.2451, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.8749999999999997e-05, |
|
"loss": 2.228, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3e-05, |
|
"loss": 2.2017, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.125e-05, |
|
"loss": 2.1533, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 2.1322, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.375000000000001e-05, |
|
"loss": 2.1557, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.5e-05, |
|
"loss": 2.1209, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.625e-05, |
|
"loss": 2.103, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 2.1099, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.875e-05, |
|
"loss": 2.1024, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4e-05, |
|
"loss": 2.0618, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.125e-05, |
|
"loss": 2.073, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.25e-05, |
|
"loss": 2.0616, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.375e-05, |
|
"loss": 2.0967, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.5e-05, |
|
"loss": 2.0628, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.6250000000000006e-05, |
|
"loss": 2.0786, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.75e-05, |
|
"loss": 2.0524, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.875e-05, |
|
"loss": 2.0508, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0352, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.937826411340463e-05, |
|
"loss": 2.0348, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.8756528226809256e-05, |
|
"loss": 2.0173, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.813479234021388e-05, |
|
"loss": 2.0102, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.7513056453618504e-05, |
|
"loss": 1.9995, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 4.689132056702313e-05, |
|
"loss": 2.0089, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.626958468042776e-05, |
|
"loss": 1.9712, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.5647848793832385e-05, |
|
"loss": 1.9776, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 4.5026112907237005e-05, |
|
"loss": 1.9573, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.440437702064163e-05, |
|
"loss": 1.877, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.378264113404626e-05, |
|
"loss": 1.8312, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.3160905247450886e-05, |
|
"loss": 1.8694, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.253916936085551e-05, |
|
"loss": 1.8549, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.191743347426014e-05, |
|
"loss": 1.8445, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.129569758766476e-05, |
|
"loss": 1.8301, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.067396170106939e-05, |
|
"loss": 1.8494, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.0052225814474015e-05, |
|
"loss": 1.8504, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.943048992787864e-05, |
|
"loss": 1.8388, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.880875404128326e-05, |
|
"loss": 1.8036, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.8187018154687896e-05, |
|
"loss": 1.825, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.7565282268092516e-05, |
|
"loss": 1.8174, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.694354638149714e-05, |
|
"loss": 1.8188, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 3.632181049490177e-05, |
|
"loss": 1.8334, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.57000746083064e-05, |
|
"loss": 1.8342, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.507833872171102e-05, |
|
"loss": 1.8119, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.4456602835115645e-05, |
|
"loss": 1.8034, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 3.383486694852027e-05, |
|
"loss": 1.7964, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.32131310619249e-05, |
|
"loss": 1.7865, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.259139517532952e-05, |
|
"loss": 1.7937, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 3.196965928873415e-05, |
|
"loss": 1.7982, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.134792340213877e-05, |
|
"loss": 1.7969, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.07261875155434e-05, |
|
"loss": 1.7892, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.010445162894802e-05, |
|
"loss": 1.769, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 2.948271574235265e-05, |
|
"loss": 1.6569, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 2.8860979855757274e-05, |
|
"loss": 1.6269, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 2.8239243969161898e-05, |
|
"loss": 1.6806, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 2.761750808256653e-05, |
|
"loss": 1.6658, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.6995772195971152e-05, |
|
"loss": 1.654, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 2.6374036309375776e-05, |
|
"loss": 1.6672, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.5752300422780406e-05, |
|
"loss": 1.6427, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 2.513056453618503e-05, |
|
"loss": 1.6182, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 2.4508828649589653e-05, |
|
"loss": 1.6515, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 2.388709276299428e-05, |
|
"loss": 1.6646, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 2.3265356876398904e-05, |
|
"loss": 1.644, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 2.264362098980353e-05, |
|
"loss": 1.6337, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.2021885103208158e-05, |
|
"loss": 1.6412, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.1400149216612782e-05, |
|
"loss": 1.6222, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.077841333001741e-05, |
|
"loss": 1.6513, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.0156677443422033e-05, |
|
"loss": 1.6642, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.953494155682666e-05, |
|
"loss": 1.6336, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.8913205670231287e-05, |
|
"loss": 1.6307, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.829146978363591e-05, |
|
"loss": 1.6365, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.7669733897040537e-05, |
|
"loss": 1.6427, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 1.704799801044516e-05, |
|
"loss": 1.6439, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.6426262123849788e-05, |
|
"loss": 1.6309, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 1.5804526237254415e-05, |
|
"loss": 1.6301, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 1.518279035065904e-05, |
|
"loss": 1.6172, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.4561054464063667e-05, |
|
"loss": 1.55, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.3939318577468291e-05, |
|
"loss": 1.5255, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 1.3317582690872918e-05, |
|
"loss": 1.5083, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.2695846804277545e-05, |
|
"loss": 1.524, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.2074110917682169e-05, |
|
"loss": 1.5256, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.1452375031086796e-05, |
|
"loss": 1.5113, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.0830639144491421e-05, |
|
"loss": 1.5289, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 1.0208903257896047e-05, |
|
"loss": 1.5078, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 9.587167371300672e-06, |
|
"loss": 1.5072, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 8.965431484705297e-06, |
|
"loss": 1.4986, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 8.343695598109923e-06, |
|
"loss": 1.5014, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 7.72195971151455e-06, |
|
"loss": 1.5196, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 7.100223824919175e-06, |
|
"loss": 1.5027, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 6.4784879383238e-06, |
|
"loss": 1.4911, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 5.856752051728426e-06, |
|
"loss": 1.4912, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 5.235016165133052e-06, |
|
"loss": 1.4958, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 4.613280278537677e-06, |
|
"loss": 1.5077, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 3.9915443919423026e-06, |
|
"loss": 1.4834, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 3.3698085053469288e-06, |
|
"loss": 1.4964, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 2.748072618751554e-06, |
|
"loss": 1.5195, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.12633673215618e-06, |
|
"loss": 1.5117, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 1.5046008455608059e-06, |
|
"loss": 1.507, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 8.828649589654314e-07, |
|
"loss": 1.5086, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 2.6112907237005724e-07, |
|
"loss": 1.5051, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 60210, |
|
"total_flos": 4721678852567040.0, |
|
"train_loss": 2.007782585069282, |
|
"train_runtime": 8390.77, |
|
"train_samples_per_second": 14.351, |
|
"train_steps_per_second": 7.176 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 60210, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 4721678852567040.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|