|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 451623, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.920484962997953e-07, |
|
"loss": 10.3537, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.840969925995906e-07, |
|
"loss": 3.8759, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.4761454888993861e-06, |
|
"loss": 3.3657, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9681939851991812e-06, |
|
"loss": 3.2292, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.4602424814989765e-06, |
|
"loss": 3.1654, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9522909777987723e-06, |
|
"loss": 3.1135, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.444339474098567e-06, |
|
"loss": 3.0643, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.9363879703983625e-06, |
|
"loss": 3.0263, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.428436466698158e-06, |
|
"loss": 2.9939, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.920484962997953e-06, |
|
"loss": 2.9628, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.412533459297749e-06, |
|
"loss": 2.9285, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.9045819555975445e-06, |
|
"loss": 2.897, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.396630451897339e-06, |
|
"loss": 2.8763, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.888678948197134e-06, |
|
"loss": 2.8517, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.380727444496929e-06, |
|
"loss": 2.8288, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.872775940796725e-06, |
|
"loss": 2.8043, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.36482443709652e-06, |
|
"loss": 2.7839, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.856872933396316e-06, |
|
"loss": 2.7614, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.348921429696111e-06, |
|
"loss": 2.7452, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.840969925995906e-06, |
|
"loss": 2.7313, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0333018422295703e-05, |
|
"loss": 2.7144, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0825066918595498e-05, |
|
"loss": 2.6997, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1317115414895292e-05, |
|
"loss": 2.6785, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1809163911195089e-05, |
|
"loss": 2.6693, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2301212407494884e-05, |
|
"loss": 2.6494, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2793260903794679e-05, |
|
"loss": 2.6353, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3285309400094472e-05, |
|
"loss": 2.6276, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3777357896394269e-05, |
|
"loss": 2.6187, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4269406392694065e-05, |
|
"loss": 2.6011, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4761454888993858e-05, |
|
"loss": 2.5932, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5253503385293655e-05, |
|
"loss": 2.5815, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.574555188159345e-05, |
|
"loss": 2.5736, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6237600377893248e-05, |
|
"loss": 2.5592, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.672964887419304e-05, |
|
"loss": 2.5488, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7221697370492838e-05, |
|
"loss": 2.5394, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.7713745866792633e-05, |
|
"loss": 2.5307, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8205794363092428e-05, |
|
"loss": 2.5219, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8697842859392223e-05, |
|
"loss": 2.514, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9189891355692017e-05, |
|
"loss": 2.5038, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9681939851991812e-05, |
|
"loss": 2.4932, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0173988348291607e-05, |
|
"loss": 2.4895, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0666036844591405e-05, |
|
"loss": 2.4754, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1158085340891197e-05, |
|
"loss": 2.4703, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1650133837190995e-05, |
|
"loss": 2.4602, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.214218233349079e-05, |
|
"loss": 2.4609, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.2634230829790585e-05, |
|
"loss": 2.4458, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.312627932609038e-05, |
|
"loss": 2.439, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3618327822390178e-05, |
|
"loss": 2.4354, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.411037631868997e-05, |
|
"loss": 2.4263, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4602424814989768e-05, |
|
"loss": 2.4238, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5094473311289563e-05, |
|
"loss": 2.4135, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5586521807589358e-05, |
|
"loss": 2.4098, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6078570303889156e-05, |
|
"loss": 2.4012, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6570618800188944e-05, |
|
"loss": 2.3966, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.7062667296488742e-05, |
|
"loss": 2.386, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.7554715792788537e-05, |
|
"loss": 2.3838, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8046764289088335e-05, |
|
"loss": 2.3787, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.853881278538813e-05, |
|
"loss": 2.3711, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9030861281687925e-05, |
|
"loss": 2.365, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9522909777987717e-05, |
|
"loss": 2.3625, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0014958274287515e-05, |
|
"loss": 2.3555, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.050700677058731e-05, |
|
"loss": 2.349, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.099905526688711e-05, |
|
"loss": 2.3439, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.14911037631869e-05, |
|
"loss": 2.3382, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.19831522594867e-05, |
|
"loss": 2.3355, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.2475200755786496e-05, |
|
"loss": 2.3265, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.296724925208629e-05, |
|
"loss": 2.3289, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.345929774838608e-05, |
|
"loss": 2.3275, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.395134624468588e-05, |
|
"loss": 2.3242, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4443394740985676e-05, |
|
"loss": 2.311, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.493544323728547e-05, |
|
"loss": 2.3148, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.5427491733585265e-05, |
|
"loss": 2.3088, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.591954022988506e-05, |
|
"loss": 2.301, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6411588726184855e-05, |
|
"loss": 2.2969, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.690363722248465e-05, |
|
"loss": 2.2989, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.7395685718784445e-05, |
|
"loss": 2.2882, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.788773421508424e-05, |
|
"loss": 2.2869, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.8379782711384035e-05, |
|
"loss": 2.279, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.8871831207683826e-05, |
|
"loss": 2.2817, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9363879703983625e-05, |
|
"loss": 2.2769, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.985592820028342e-05, |
|
"loss": 2.271, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.0347976696583214e-05, |
|
"loss": 2.2703, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.084002519288301e-05, |
|
"loss": 2.2644, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.133207368918281e-05, |
|
"loss": 2.2578, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.18241221854826e-05, |
|
"loss": 2.2568, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.2316170681782394e-05, |
|
"loss": 2.2565, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.280821917808219e-05, |
|
"loss": 2.2539, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.330026767438199e-05, |
|
"loss": 2.2493, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.379231617068179e-05, |
|
"loss": 2.2488, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.428436466698158e-05, |
|
"loss": 2.2388, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.477641316328138e-05, |
|
"loss": 2.2368, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.526846165958117e-05, |
|
"loss": 2.2366, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.576051015588096e-05, |
|
"loss": 2.2323, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.625255865218076e-05, |
|
"loss": 2.2288, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.674460714848056e-05, |
|
"loss": 2.23, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.7236655644780356e-05, |
|
"loss": 2.223, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.772870414108015e-05, |
|
"loss": 2.2232, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.822075263737994e-05, |
|
"loss": 2.2188, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.871280113367974e-05, |
|
"loss": 2.217, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9204849629979536e-05, |
|
"loss": 2.2118, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.969689812627933e-05, |
|
"loss": 2.2089, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.997900574717671e-05, |
|
"loss": 2.2101, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.992433321378273e-05, |
|
"loss": 2.2048, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.986966068038875e-05, |
|
"loss": 2.2026, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.981498814699476e-05, |
|
"loss": 2.2, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.976031561360078e-05, |
|
"loss": 2.1998, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.97056430802068e-05, |
|
"loss": 2.1905, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.965097054681281e-05, |
|
"loss": 2.1896, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.959629801341883e-05, |
|
"loss": 2.1886, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.954162548002484e-05, |
|
"loss": 2.1825, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.948695294663086e-05, |
|
"loss": 2.1807, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.943228041323688e-05, |
|
"loss": 2.1814, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9377607879842896e-05, |
|
"loss": 2.1797, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9322935346448914e-05, |
|
"loss": 2.1714, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9268262813054925e-05, |
|
"loss": 2.1766, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.921359027966094e-05, |
|
"loss": 2.1725, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.915891774626696e-05, |
|
"loss": 2.1672, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.910424521287298e-05, |
|
"loss": 2.1707, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9049572679479e-05, |
|
"loss": 2.1636, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.899490014608501e-05, |
|
"loss": 2.1627, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.894022761269103e-05, |
|
"loss": 2.1598, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8885555079297044e-05, |
|
"loss": 2.1535, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.883088254590306e-05, |
|
"loss": 2.1531, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8776210012509074e-05, |
|
"loss": 2.1548, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.872153747911509e-05, |
|
"loss": 2.1451, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8666864945721116e-05, |
|
"loss": 2.1497, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.861219241232713e-05, |
|
"loss": 2.1474, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.8557519878933146e-05, |
|
"loss": 2.1442, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.850284734553916e-05, |
|
"loss": 2.1417, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.8448174812145175e-05, |
|
"loss": 2.1426, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.839350227875119e-05, |
|
"loss": 2.1396, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.833882974535721e-05, |
|
"loss": 2.1371, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.828415721196323e-05, |
|
"loss": 2.1352, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.822948467856924e-05, |
|
"loss": 2.1342, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.8174812145175265e-05, |
|
"loss": 2.1329, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.8120139611781276e-05, |
|
"loss": 2.1272, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.8065467078387294e-05, |
|
"loss": 2.1278, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.801079454499331e-05, |
|
"loss": 2.1251, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7956122011599323e-05, |
|
"loss": 2.1222, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.790144947820535e-05, |
|
"loss": 2.1157, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.784677694481136e-05, |
|
"loss": 2.1234, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.779210441141738e-05, |
|
"loss": 2.1167, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.773743187802339e-05, |
|
"loss": 2.1182, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.768275934462941e-05, |
|
"loss": 2.121, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.762808681123543e-05, |
|
"loss": 2.1115, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.757341427784144e-05, |
|
"loss": 2.1171, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.751874174444746e-05, |
|
"loss": 2.11, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.746406921105347e-05, |
|
"loss": 2.1108, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.74093966776595e-05, |
|
"loss": 2.1058, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.735472414426551e-05, |
|
"loss": 2.1005, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7300051610871526e-05, |
|
"loss": 2.1043, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7245379077477544e-05, |
|
"loss": 2.1027, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7190706544083555e-05, |
|
"loss": 2.0973, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.713603401068958e-05, |
|
"loss": 2.0947, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.708136147729559e-05, |
|
"loss": 2.1008, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.702668894390161e-05, |
|
"loss": 2.0995, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.697201641050763e-05, |
|
"loss": 2.0932, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.691734387711364e-05, |
|
"loss": 2.0966, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.686267134371966e-05, |
|
"loss": 2.0932, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6807998810325674e-05, |
|
"loss": 2.0949, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.675332627693169e-05, |
|
"loss": 2.0912, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.669865374353771e-05, |
|
"loss": 2.0885, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.664398121014373e-05, |
|
"loss": 2.0904, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6589308676749746e-05, |
|
"loss": 2.0858, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.653463614335576e-05, |
|
"loss": 2.0828, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6479963609961776e-05, |
|
"loss": 2.0827, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.642529107656779e-05, |
|
"loss": 2.0816, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.637061854317381e-05, |
|
"loss": 2.0789, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.631594600977982e-05, |
|
"loss": 2.0789, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.626127347638584e-05, |
|
"loss": 2.0811, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.620660094299186e-05, |
|
"loss": 2.0819, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.615192840959787e-05, |
|
"loss": 2.0768, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.6097255876203895e-05, |
|
"loss": 2.0715, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.6042583342809906e-05, |
|
"loss": 2.0742, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.5987910809415924e-05, |
|
"loss": 2.0699, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.593323827602194e-05, |
|
"loss": 2.0743, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.587856574262796e-05, |
|
"loss": 2.0694, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.582389320923398e-05, |
|
"loss": 2.0675, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.576922067583999e-05, |
|
"loss": 2.0656, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.571454814244601e-05, |
|
"loss": 2.0637, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5659875609052025e-05, |
|
"loss": 2.0667, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.560520307565804e-05, |
|
"loss": 2.062, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.555053054226406e-05, |
|
"loss": 2.0659, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.549585800887007e-05, |
|
"loss": 2.0597, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.544118547547609e-05, |
|
"loss": 2.067, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.53865129420821e-05, |
|
"loss": 2.0593, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5331840408688126e-05, |
|
"loss": 2.0562, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.527716787529414e-05, |
|
"loss": 2.0589, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5222495341900156e-05, |
|
"loss": 2.0566, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5167822808506174e-05, |
|
"loss": 2.0576, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.511315027511219e-05, |
|
"loss": 2.058, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.505847774171821e-05, |
|
"loss": 2.0528, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.500380520832422e-05, |
|
"loss": 2.0562, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.494913267493024e-05, |
|
"loss": 2.049, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.489446014153626e-05, |
|
"loss": 2.0536, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.4839787608142275e-05, |
|
"loss": 2.0538, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.478511507474829e-05, |
|
"loss": 2.0466, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.4730442541354304e-05, |
|
"loss": 2.049, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.467577000796032e-05, |
|
"loss": 2.0455, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.462109747456634e-05, |
|
"loss": 2.0441, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.456642494117236e-05, |
|
"loss": 2.0477, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4511752407778376e-05, |
|
"loss": 2.0425, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.445707987438439e-05, |
|
"loss": 2.0475, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4402407340990405e-05, |
|
"loss": 2.0482, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.434773480759642e-05, |
|
"loss": 2.0483, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.429306227420244e-05, |
|
"loss": 2.0375, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.423838974080845e-05, |
|
"loss": 2.0384, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.418371720741447e-05, |
|
"loss": 2.0383, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.412904467402049e-05, |
|
"loss": 2.037, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.4074372140626507e-05, |
|
"loss": 2.0315, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.4019699607232525e-05, |
|
"loss": 2.0375, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3965027073838536e-05, |
|
"loss": 2.0414, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3910354540444554e-05, |
|
"loss": 2.0324, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.385568200705057e-05, |
|
"loss": 2.0316, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.380100947365659e-05, |
|
"loss": 2.0343, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.374633694026261e-05, |
|
"loss": 2.0314, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.369166440686862e-05, |
|
"loss": 2.0347, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.363699187347464e-05, |
|
"loss": 2.0268, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.3582319340080655e-05, |
|
"loss": 2.0323, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.352764680668667e-05, |
|
"loss": 2.03, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.347297427329269e-05, |
|
"loss": 2.0231, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.34183017398987e-05, |
|
"loss": 2.0279, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.336362920650472e-05, |
|
"loss": 2.0298, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.330895667311074e-05, |
|
"loss": 2.0408, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.3254284139716756e-05, |
|
"loss": 2.0199, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.319961160632277e-05, |
|
"loss": 2.0297, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.3144939072928786e-05, |
|
"loss": 2.0287, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.309026653953481e-05, |
|
"loss": 2.0256, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.303559400614082e-05, |
|
"loss": 2.0197, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.298092147274684e-05, |
|
"loss": 2.0234, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.292624893935285e-05, |
|
"loss": 2.0391, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.287157640595887e-05, |
|
"loss": 2.0772, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.281690387256489e-05, |
|
"loss": 2.0586, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2762231339170905e-05, |
|
"loss": 2.0336, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.270755880577692e-05, |
|
"loss": 2.0207, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2652886272382934e-05, |
|
"loss": 2.0197, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.259821373898895e-05, |
|
"loss": 2.0184, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.254354120559497e-05, |
|
"loss": 2.0162, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.248886867220099e-05, |
|
"loss": 2.0139, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2434196138807006e-05, |
|
"loss": 2.0141, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.237952360541302e-05, |
|
"loss": 2.0119, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.232485107201904e-05, |
|
"loss": 2.0109, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.227017853862505e-05, |
|
"loss": 2.0112, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.221550600523107e-05, |
|
"loss": 2.0106, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.216083347183708e-05, |
|
"loss": 2.0115, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.21061609384431e-05, |
|
"loss": 2.0164, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2051488405049125e-05, |
|
"loss": 2.0055, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.1996815871655136e-05, |
|
"loss": 2.0117, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1942143338261154e-05, |
|
"loss": 2.008, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1887470804867166e-05, |
|
"loss": 2.0077, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1832798271473184e-05, |
|
"loss": 2.009, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.17781257380792e-05, |
|
"loss": 2.0096, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.172345320468522e-05, |
|
"loss": 2.0025, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.166878067129124e-05, |
|
"loss": 2.0013, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.161410813789725e-05, |
|
"loss": 2.0031, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1559435604503274e-05, |
|
"loss": 2.0039, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1504763071109285e-05, |
|
"loss": 2.0002, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.14500905377153e-05, |
|
"loss": 2.0007, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.139541800432132e-05, |
|
"loss": 2.0078, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.134074547092733e-05, |
|
"loss": 1.9981, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.128607293753336e-05, |
|
"loss": 1.9989, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.123140040413937e-05, |
|
"loss": 1.997, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1176727870745386e-05, |
|
"loss": 2.0004, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.11220553373514e-05, |
|
"loss": 1.9983, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1067382803957415e-05, |
|
"loss": 1.9995, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.101271027056344e-05, |
|
"loss": 1.9994, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.095803773716945e-05, |
|
"loss": 1.9956, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.090336520377547e-05, |
|
"loss": 1.994, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.084869267038148e-05, |
|
"loss": 1.9955, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0794020136987505e-05, |
|
"loss": 1.9994, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0739347603593517e-05, |
|
"loss": 1.9897, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0684675070199535e-05, |
|
"loss": 1.9961, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.063000253680555e-05, |
|
"loss": 1.9946, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0575330003411564e-05, |
|
"loss": 1.991, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.052065747001759e-05, |
|
"loss": 1.9952, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.04659849366236e-05, |
|
"loss": 1.9848, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.041131240322962e-05, |
|
"loss": 1.9937, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0356639869835636e-05, |
|
"loss": 1.9893, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.030196733644165e-05, |
|
"loss": 1.9871, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.024729480304767e-05, |
|
"loss": 1.9855, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.019262226965368e-05, |
|
"loss": 1.9887, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.01379497362597e-05, |
|
"loss": 1.9863, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.008327720286571e-05, |
|
"loss": 1.9904, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.002860466947174e-05, |
|
"loss": 1.9886, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.9973932136077755e-05, |
|
"loss": 1.9871, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.9919259602683766e-05, |
|
"loss": 1.9843, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.9864587069289784e-05, |
|
"loss": 1.9891, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.9809914535895795e-05, |
|
"loss": 1.985, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.975524200250182e-05, |
|
"loss": 1.9826, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.970056946910783e-05, |
|
"loss": 1.9839, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.964589693571385e-05, |
|
"loss": 1.9831, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.959122440231987e-05, |
|
"loss": 1.9843, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.953655186892588e-05, |
|
"loss": 1.9825, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9481879335531903e-05, |
|
"loss": 1.9752, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9427206802137915e-05, |
|
"loss": 1.9787, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.937253426874393e-05, |
|
"loss": 1.984, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.931786173534995e-05, |
|
"loss": 1.9783, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.926318920195597e-05, |
|
"loss": 1.9785, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.920851666856199e-05, |
|
"loss": 1.9759, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.9153844135168e-05, |
|
"loss": 1.9803, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.9099171601774016e-05, |
|
"loss": 1.9733, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.904449906838003e-05, |
|
"loss": 1.9759, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.898982653498605e-05, |
|
"loss": 1.9767, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.893515400159207e-05, |
|
"loss": 1.9773, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.888048146819808e-05, |
|
"loss": 1.9736, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.88258089348041e-05, |
|
"loss": 1.9729, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.877113640141012e-05, |
|
"loss": 1.9756, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.8716463868016135e-05, |
|
"loss": 1.9747, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.8661791334622146e-05, |
|
"loss": 1.9732, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8607118801228164e-05, |
|
"loss": 1.9696, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.855244626783418e-05, |
|
"loss": 1.9731, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.84977737344402e-05, |
|
"loss": 1.9741, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.844310120104622e-05, |
|
"loss": 1.9747, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.838842866765223e-05, |
|
"loss": 1.9717, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.833375613425825e-05, |
|
"loss": 1.9724, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8279083600864266e-05, |
|
"loss": 1.9663, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8224411067470284e-05, |
|
"loss": 1.9671, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.81697385340763e-05, |
|
"loss": 1.9684, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.811506600068231e-05, |
|
"loss": 1.9683, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.806039346728833e-05, |
|
"loss": 1.9682, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.800572093389435e-05, |
|
"loss": 1.9673, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.795104840050037e-05, |
|
"loss": 1.9639, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7896375867106385e-05, |
|
"loss": 1.9633, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7841703333712396e-05, |
|
"loss": 1.964, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7787030800318414e-05, |
|
"loss": 1.9648, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.773235826692443e-05, |
|
"loss": 1.9636, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.767768573353045e-05, |
|
"loss": 1.9623, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.762301320013646e-05, |
|
"loss": 1.9656, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.756834066674248e-05, |
|
"loss": 1.9636, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.75136681333485e-05, |
|
"loss": 1.9672, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7458995599954515e-05, |
|
"loss": 1.9639, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.740432306656053e-05, |
|
"loss": 1.9628, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7349650533166545e-05, |
|
"loss": 1.9647, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.729497799977256e-05, |
|
"loss": 1.9633, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.724030546637858e-05, |
|
"loss": 1.9585, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.71856329329846e-05, |
|
"loss": 1.9599, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7130960399590617e-05, |
|
"loss": 1.9601, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.707628786619663e-05, |
|
"loss": 1.9617, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7021615332802646e-05, |
|
"loss": 1.9583, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.6966942799408664e-05, |
|
"loss": 1.9606, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.691227026601468e-05, |
|
"loss": 1.955, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.68575977326207e-05, |
|
"loss": 1.956, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.680292519922671e-05, |
|
"loss": 1.9584, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.674825266583273e-05, |
|
"loss": 1.9575, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.669358013243875e-05, |
|
"loss": 1.9531, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6638907599044765e-05, |
|
"loss": 1.9603, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6584235065650776e-05, |
|
"loss": 1.9542, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6529562532256794e-05, |
|
"loss": 1.9515, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.647488999886282e-05, |
|
"loss": 1.9594, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.642021746546883e-05, |
|
"loss": 1.955, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.636554493207485e-05, |
|
"loss": 1.9521, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.631087239868086e-05, |
|
"loss": 1.9564, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.625619986528688e-05, |
|
"loss": 1.9556, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6201527331892895e-05, |
|
"loss": 1.9517, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6146854798498913e-05, |
|
"loss": 1.9569, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.609218226510493e-05, |
|
"loss": 1.9482, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.603750973171094e-05, |
|
"loss": 1.9496, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.598283719831696e-05, |
|
"loss": 1.95, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.592816466492298e-05, |
|
"loss": 1.9519, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.5873492131529e-05, |
|
"loss": 1.9477, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5818819598135015e-05, |
|
"loss": 1.9482, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5764147064741026e-05, |
|
"loss": 1.9511, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.570947453134705e-05, |
|
"loss": 1.9464, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.565480199795306e-05, |
|
"loss": 1.9493, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.560012946455908e-05, |
|
"loss": 1.9462, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.554545693116509e-05, |
|
"loss": 1.9493, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.549078439777111e-05, |
|
"loss": 1.9472, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5436111864377134e-05, |
|
"loss": 1.9475, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5381439330983145e-05, |
|
"loss": 1.9461, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.532676679758916e-05, |
|
"loss": 1.9409, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.5272094264195174e-05, |
|
"loss": 1.9498, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.521742173080119e-05, |
|
"loss": 1.9475, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.516274919740721e-05, |
|
"loss": 1.9491, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.510807666401323e-05, |
|
"loss": 1.9433, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.5053404130619246e-05, |
|
"loss": 1.9436, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.499873159722526e-05, |
|
"loss": 1.9425, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.494405906383128e-05, |
|
"loss": 1.9384, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.4889386530437294e-05, |
|
"loss": 1.9398, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.483471399704331e-05, |
|
"loss": 1.9428, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.478004146364933e-05, |
|
"loss": 1.9416, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.472536893025534e-05, |
|
"loss": 1.9409, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4670696396861366e-05, |
|
"loss": 1.9423, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.461602386346738e-05, |
|
"loss": 1.9409, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4561351330073395e-05, |
|
"loss": 1.9399, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4506678796679406e-05, |
|
"loss": 1.94, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4452006263285424e-05, |
|
"loss": 1.9386, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.439733372989145e-05, |
|
"loss": 1.9391, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.434266119649746e-05, |
|
"loss": 1.9412, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.428798866310348e-05, |
|
"loss": 1.9384, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.423331612970949e-05, |
|
"loss": 1.9364, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4178643596315514e-05, |
|
"loss": 1.9354, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.4123971062921525e-05, |
|
"loss": 1.9374, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.406929852952754e-05, |
|
"loss": 1.9363, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.401462599613356e-05, |
|
"loss": 1.9333, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.395995346273957e-05, |
|
"loss": 1.9375, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.39052809293456e-05, |
|
"loss": 1.9422, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.385060839595161e-05, |
|
"loss": 1.9363, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3795935862557626e-05, |
|
"loss": 1.9335, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3741263329163644e-05, |
|
"loss": 1.9378, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3686590795769656e-05, |
|
"loss": 1.9394, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.363191826237568e-05, |
|
"loss": 1.927, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.357724572898169e-05, |
|
"loss": 1.9303, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.352257319558771e-05, |
|
"loss": 1.9333, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.346790066219372e-05, |
|
"loss": 1.9285, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3413228128799746e-05, |
|
"loss": 1.9337, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3358555595405764e-05, |
|
"loss": 1.9339, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3303883062011775e-05, |
|
"loss": 1.9368, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.324921052861779e-05, |
|
"loss": 1.934, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3194537995223804e-05, |
|
"loss": 1.9356, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.313986546182983e-05, |
|
"loss": 1.9305, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.308519292843584e-05, |
|
"loss": 1.9291, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.303052039504186e-05, |
|
"loss": 1.9323, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.2975847861647876e-05, |
|
"loss": 1.9343, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.2921175328253894e-05, |
|
"loss": 1.9315, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.286650279485991e-05, |
|
"loss": 1.9278, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.2811830261465923e-05, |
|
"loss": 1.9295, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.275715772807194e-05, |
|
"loss": 1.9323, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.270248519467796e-05, |
|
"loss": 1.9287, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.264781266128398e-05, |
|
"loss": 1.9327, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.2593140127889995e-05, |
|
"loss": 1.9311, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.253846759449601e-05, |
|
"loss": 1.9262, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2483795061102025e-05, |
|
"loss": 1.9273, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2429122527708036e-05, |
|
"loss": 1.9276, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.237444999431406e-05, |
|
"loss": 1.9307, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.231977746092008e-05, |
|
"loss": 1.9261, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.226510492752609e-05, |
|
"loss": 1.9286, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.221043239413211e-05, |
|
"loss": 1.9315, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2155759860738126e-05, |
|
"loss": 1.9297, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2101087327344144e-05, |
|
"loss": 1.9317, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2046414793950155e-05, |
|
"loss": 1.927, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.199174226055617e-05, |
|
"loss": 1.9279, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.193706972716219e-05, |
|
"loss": 1.922, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.188239719376821e-05, |
|
"loss": 1.9259, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.182772466037423e-05, |
|
"loss": 1.9234, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.177305212698024e-05, |
|
"loss": 1.9232, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1718379593586256e-05, |
|
"loss": 1.9226, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1663707060192274e-05, |
|
"loss": 1.922, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.160903452679829e-05, |
|
"loss": 1.9215, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.155436199340431e-05, |
|
"loss": 1.9207, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.149968946001032e-05, |
|
"loss": 1.9232, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.144501692661634e-05, |
|
"loss": 1.9223, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.139034439322236e-05, |
|
"loss": 1.9224, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1335671859828376e-05, |
|
"loss": 1.9219, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1280999326434394e-05, |
|
"loss": 1.9235, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1226326793040405e-05, |
|
"loss": 1.9225, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.117165425964642e-05, |
|
"loss": 1.92, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.111698172625244e-05, |
|
"loss": 1.9148, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.106230919285846e-05, |
|
"loss": 1.9245, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.100763665946447e-05, |
|
"loss": 1.9184, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.095296412607049e-05, |
|
"loss": 1.9193, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.0898291592676506e-05, |
|
"loss": 1.917, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.0843619059282524e-05, |
|
"loss": 1.9092, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.078894652588854e-05, |
|
"loss": 1.9185, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.073427399249455e-05, |
|
"loss": 1.9165, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.067960145910057e-05, |
|
"loss": 1.9171, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.062492892570659e-05, |
|
"loss": 1.9182, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.057025639231261e-05, |
|
"loss": 1.9182, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0515583858918622e-05, |
|
"loss": 1.9155, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0460911325524636e-05, |
|
"loss": 1.9137, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0406238792130654e-05, |
|
"loss": 1.9162, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0351566258736676e-05, |
|
"loss": 1.9165, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.029689372534269e-05, |
|
"loss": 1.9184, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.0242221191948705e-05, |
|
"loss": 1.9127, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.018754865855472e-05, |
|
"loss": 1.9197, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.0132876125160738e-05, |
|
"loss": 1.915, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.0078203591766756e-05, |
|
"loss": 1.9189, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.0023531058372774e-05, |
|
"loss": 1.9154, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9968858524978788e-05, |
|
"loss": 1.9117, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9914185991584803e-05, |
|
"loss": 1.911, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9859513458190824e-05, |
|
"loss": 1.9191, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.980484092479684e-05, |
|
"loss": 1.9158, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9750168391402854e-05, |
|
"loss": 1.9149, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.969549585800887e-05, |
|
"loss": 1.9146, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9640823324614886e-05, |
|
"loss": 1.9111, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9586150791220908e-05, |
|
"loss": 1.9105, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9531478257826922e-05, |
|
"loss": 1.9125, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9476805724432937e-05, |
|
"loss": 1.9051, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.942213319103895e-05, |
|
"loss": 1.913, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.936746065764497e-05, |
|
"loss": 1.9067, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.931278812425099e-05, |
|
"loss": 1.9093, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9258115590857005e-05, |
|
"loss": 1.9095, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.920344305746302e-05, |
|
"loss": 1.9129, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.9148770524069035e-05, |
|
"loss": 1.9119, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.9094097990675056e-05, |
|
"loss": 1.911, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.903942545728107e-05, |
|
"loss": 1.9078, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.898475292388709e-05, |
|
"loss": 1.9113, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8930080390493103e-05, |
|
"loss": 1.9058, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8875407857099118e-05, |
|
"loss": 1.9114, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.882073532370514e-05, |
|
"loss": 1.9073, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8766062790311154e-05, |
|
"loss": 1.906, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.871139025691717e-05, |
|
"loss": 1.9072, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8656717723523186e-05, |
|
"loss": 1.9083, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.86020451901292e-05, |
|
"loss": 1.9047, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8547372656735222e-05, |
|
"loss": 1.9051, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8492700123341237e-05, |
|
"loss": 1.9053, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.843802758994725e-05, |
|
"loss": 1.9107, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.838335505655327e-05, |
|
"loss": 1.9094, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8328682523159288e-05, |
|
"loss": 1.9081, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8274009989765306e-05, |
|
"loss": 1.9067, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.821933745637132e-05, |
|
"loss": 1.9054, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8164664922977335e-05, |
|
"loss": 1.9007, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.810999238958335e-05, |
|
"loss": 1.9064, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.805531985618937e-05, |
|
"loss": 1.8992, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8000647322795386e-05, |
|
"loss": 1.9074, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7945974789401404e-05, |
|
"loss": 1.9061, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7891302256007418e-05, |
|
"loss": 1.9017, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7836629722613433e-05, |
|
"loss": 1.9018, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7781957189219454e-05, |
|
"loss": 1.9027, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.772728465582547e-05, |
|
"loss": 1.9062, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7672612122431483e-05, |
|
"loss": 1.9026, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.76179395890375e-05, |
|
"loss": 1.9, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7563267055643523e-05, |
|
"loss": 1.9043, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7508594522249537e-05, |
|
"loss": 1.9041, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7453921988855552e-05, |
|
"loss": 1.9025, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7399249455461567e-05, |
|
"loss": 1.8992, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7344576922067585e-05, |
|
"loss": 1.9006, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7289904388673603e-05, |
|
"loss": 1.8998, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.723523185527962e-05, |
|
"loss": 1.8978, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7180559321885635e-05, |
|
"loss": 1.8991, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.712588678849165e-05, |
|
"loss": 1.898, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.707121425509767e-05, |
|
"loss": 1.9011, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7016541721703686e-05, |
|
"loss": 1.8968, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.69618691883097e-05, |
|
"loss": 1.9062, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.690719665491572e-05, |
|
"loss": 1.9018, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6852524121521733e-05, |
|
"loss": 1.9018, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6797851588127754e-05, |
|
"loss": 1.8995, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.674317905473377e-05, |
|
"loss": 1.8954, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6688506521339784e-05, |
|
"loss": 1.8992, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6633833987945798e-05, |
|
"loss": 1.8957, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6579161454551816e-05, |
|
"loss": 1.9017, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6524488921157838e-05, |
|
"loss": 1.8975, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6469816387763852e-05, |
|
"loss": 1.8987, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6415143854369867e-05, |
|
"loss": 1.8962, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.636047132097588e-05, |
|
"loss": 1.9013, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6305798787581903e-05, |
|
"loss": 1.9004, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6251126254187917e-05, |
|
"loss": 1.8955, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6196453720793935e-05, |
|
"loss": 1.8956, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.614178118739995e-05, |
|
"loss": 1.8941, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6087108654005965e-05, |
|
"loss": 1.9004, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6032436120611986e-05, |
|
"loss": 1.8978, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.5977763587218e-05, |
|
"loss": 1.8954, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.5923091053824015e-05, |
|
"loss": 1.8948, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.5868418520430033e-05, |
|
"loss": 1.8946, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5813745987036048e-05, |
|
"loss": 1.897, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.575907345364207e-05, |
|
"loss": 1.897, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5704400920248084e-05, |
|
"loss": 1.8961, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.56497283868541e-05, |
|
"loss": 1.8925, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5595055853460113e-05, |
|
"loss": 1.8938, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5540383320066135e-05, |
|
"loss": 1.8928, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5485710786672153e-05, |
|
"loss": 1.8963, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5431038253278167e-05, |
|
"loss": 1.8923, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5376365719884182e-05, |
|
"loss": 1.8882, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5321693186490196e-05, |
|
"loss": 1.8887, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5267020653096218e-05, |
|
"loss": 1.8903, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5212348119702232e-05, |
|
"loss": 1.8941, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.515767558630825e-05, |
|
"loss": 1.8943, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5103003052914265e-05, |
|
"loss": 1.8924, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.504833051952028e-05, |
|
"loss": 1.8855, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4993657986126298e-05, |
|
"loss": 1.8918, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4938985452732316e-05, |
|
"loss": 1.8891, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.488431291933833e-05, |
|
"loss": 1.8874, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4829640385944348e-05, |
|
"loss": 1.8907, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4774967852550366e-05, |
|
"loss": 1.8913, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.472029531915638e-05, |
|
"loss": 1.8888, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.46656227857624e-05, |
|
"loss": 1.8924, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4610950252368413e-05, |
|
"loss": 1.8889, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.455627771897443e-05, |
|
"loss": 1.8866, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.450160518558045e-05, |
|
"loss": 1.8915, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4446932652186467e-05, |
|
"loss": 1.8896, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4392260118792482e-05, |
|
"loss": 1.8921, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.43375875853985e-05, |
|
"loss": 1.8854, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4282915052004515e-05, |
|
"loss": 1.8887, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.422824251861053e-05, |
|
"loss": 1.8892, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4173569985216547e-05, |
|
"loss": 1.8908, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.4118897451822565e-05, |
|
"loss": 1.8901, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.4064224918428583e-05, |
|
"loss": 1.8906, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.4009552385034598e-05, |
|
"loss": 1.8862, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3954879851640616e-05, |
|
"loss": 1.8915, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.390020731824663e-05, |
|
"loss": 1.8891, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3845534784852645e-05, |
|
"loss": 1.8855, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3790862251458667e-05, |
|
"loss": 1.8828, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.373618971806468e-05, |
|
"loss": 1.8845, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.36815171846707e-05, |
|
"loss": 1.8824, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3626844651276714e-05, |
|
"loss": 1.8832, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3572172117882732e-05, |
|
"loss": 1.8862, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3517499584488746e-05, |
|
"loss": 1.8862, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3462827051094764e-05, |
|
"loss": 1.8837, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3408154517700782e-05, |
|
"loss": 1.8864, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3353481984306797e-05, |
|
"loss": 1.886, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3298809450912815e-05, |
|
"loss": 1.884, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.324413691751883e-05, |
|
"loss": 1.8834, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3189464384124848e-05, |
|
"loss": 1.8848, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3134791850730862e-05, |
|
"loss": 1.8856, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.308011931733688e-05, |
|
"loss": 1.8874, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.3025446783942898e-05, |
|
"loss": 1.8775, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2970774250548913e-05, |
|
"loss": 1.8859, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.291610171715493e-05, |
|
"loss": 1.8799, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2861429183760945e-05, |
|
"loss": 1.8833, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2806756650366963e-05, |
|
"loss": 1.882, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.275208411697298e-05, |
|
"loss": 1.8872, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8590144171628205e-05, |
|
"loss": 1.8825, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.852863750430547e-05, |
|
"loss": 1.8818, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.846713083698273e-05, |
|
"loss": 1.8859, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8405624169659992e-05, |
|
"loss": 1.8773, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8344117502337256e-05, |
|
"loss": 1.8764, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8282610835014516e-05, |
|
"loss": 1.8826, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.822110416769178e-05, |
|
"loss": 1.8805, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.815959750036904e-05, |
|
"loss": 1.8787, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8098090833046304e-05, |
|
"loss": 1.8793, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8036584165723567e-05, |
|
"loss": 1.8797, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7975077498400827e-05, |
|
"loss": 1.875, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7913570831078088e-05, |
|
"loss": 1.8747, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.785206416375535e-05, |
|
"loss": 1.8795, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7790557496432615e-05, |
|
"loss": 1.8761, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7729050829109875e-05, |
|
"loss": 1.878, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.766754416178714e-05, |
|
"loss": 1.8743, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.76060374944644e-05, |
|
"loss": 1.8762, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7544530827141666e-05, |
|
"loss": 1.8739, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7483024159818926e-05, |
|
"loss": 1.8747, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7421517492496186e-05, |
|
"loss": 1.8703, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.736001082517345e-05, |
|
"loss": 1.8709, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.729850415785071e-05, |
|
"loss": 1.8757, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7236997490527974e-05, |
|
"loss": 1.8746, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7175490823205237e-05, |
|
"loss": 1.8744, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7113984155882498e-05, |
|
"loss": 1.8742, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7052477488559758e-05, |
|
"loss": 1.8791, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.6990970821237025e-05, |
|
"loss": 1.871, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6929464153914285e-05, |
|
"loss": 1.8742, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.686795748659155e-05, |
|
"loss": 1.873, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.680645081926881e-05, |
|
"loss": 1.8775, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6744944151946072e-05, |
|
"loss": 1.8746, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6683437484623336e-05, |
|
"loss": 1.8686, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6621930817300596e-05, |
|
"loss": 1.8667, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6560424149977856e-05, |
|
"loss": 1.8716, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.649891748265512e-05, |
|
"loss": 1.8693, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6437410815332384e-05, |
|
"loss": 1.8752, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6375904148009644e-05, |
|
"loss": 1.8677, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6314397480686908e-05, |
|
"loss": 1.8724, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6252890813364168e-05, |
|
"loss": 1.8675, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.619138414604143e-05, |
|
"loss": 1.8674, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6129877478718695e-05, |
|
"loss": 1.8689, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6068370811395955e-05, |
|
"loss": 1.8717, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.600686414407322e-05, |
|
"loss": 1.8695, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.594535747675048e-05, |
|
"loss": 1.869, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5883850809427743e-05, |
|
"loss": 1.8737, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5822344142105006e-05, |
|
"loss": 1.868, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5760837474782266e-05, |
|
"loss": 1.87, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5699330807459527e-05, |
|
"loss": 1.8717, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5637824140136794e-05, |
|
"loss": 1.8675, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5576317472814054e-05, |
|
"loss": 1.8716, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5514810805491317e-05, |
|
"loss": 1.8706, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5453304138168578e-05, |
|
"loss": 1.867, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5391797470845838e-05, |
|
"loss": 1.8638, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5330290803523105e-05, |
|
"loss": 1.8642, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5268784136200365e-05, |
|
"loss": 1.8672, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5207277468877625e-05, |
|
"loss": 1.8677, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.514577080155489e-05, |
|
"loss": 1.867, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.508426413423215e-05, |
|
"loss": 1.8665, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5022757466909413e-05, |
|
"loss": 1.8673, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4961250799586676e-05, |
|
"loss": 1.8719, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4899744132263938e-05, |
|
"loss": 1.8652, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4838237464941202e-05, |
|
"loss": 1.865, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4776730797618462e-05, |
|
"loss": 1.8645, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4715224130295724e-05, |
|
"loss": 1.8661, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4653717462972988e-05, |
|
"loss": 1.8658, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.459221079565025e-05, |
|
"loss": 1.8679, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.453070412832751e-05, |
|
"loss": 1.867, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4469197461004775e-05, |
|
"loss": 1.8648, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4407690793682035e-05, |
|
"loss": 1.8678, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4346184126359297e-05, |
|
"loss": 1.8665, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.428467745903656e-05, |
|
"loss": 1.8717, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4223170791713821e-05, |
|
"loss": 1.8683, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4161664124391086e-05, |
|
"loss": 1.8657, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4100157457068347e-05, |
|
"loss": 1.864, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4038650789745608e-05, |
|
"loss": 1.8622, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3977144122422872e-05, |
|
"loss": 1.8679, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3915637455100134e-05, |
|
"loss": 1.8636, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3854130787777394e-05, |
|
"loss": 1.8688, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3792624120454658e-05, |
|
"loss": 1.8667, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.373111745313192e-05, |
|
"loss": 1.8619, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3669610785809183e-05, |
|
"loss": 1.8677, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3608104118486445e-05, |
|
"loss": 1.8635, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3546597451163705e-05, |
|
"loss": 1.8602, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.348509078384097e-05, |
|
"loss": 1.8627, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3423584116518231e-05, |
|
"loss": 1.8631, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3362077449195493e-05, |
|
"loss": 1.8664, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3300570781872756e-05, |
|
"loss": 1.8601, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3239064114550018e-05, |
|
"loss": 1.8602, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3177557447227279e-05, |
|
"loss": 1.8646, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3116050779904542e-05, |
|
"loss": 1.8606, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.3054544112581804e-05, |
|
"loss": 1.8605, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2993037445259068e-05, |
|
"loss": 1.8622, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.293153077793633e-05, |
|
"loss": 1.8609, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.287002411061359e-05, |
|
"loss": 1.8543, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2808517443290855e-05, |
|
"loss": 1.8598, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2747010775968115e-05, |
|
"loss": 1.8589, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2685504108645377e-05, |
|
"loss": 1.8633, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2623997441322641e-05, |
|
"loss": 1.8633, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2562490773999903e-05, |
|
"loss": 1.8596, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2500984106677163e-05, |
|
"loss": 1.8577, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2439477439354427e-05, |
|
"loss": 1.8595, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2377970772031689e-05, |
|
"loss": 1.8702, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.231646410470895e-05, |
|
"loss": 1.8531, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2254957437386214e-05, |
|
"loss": 1.8599, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2193450770063474e-05, |
|
"loss": 1.862, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2131944102740738e-05, |
|
"loss": 1.8601, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2070437435418e-05, |
|
"loss": 1.8608, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2008930768095263e-05, |
|
"loss": 1.8589, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1947424100772524e-05, |
|
"loss": 1.8623, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1885917433449786e-05, |
|
"loss": 1.8616, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1824410766127049e-05, |
|
"loss": 1.8555, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1762904098804311e-05, |
|
"loss": 1.8579, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1701397431481573e-05, |
|
"loss": 1.8634, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1639890764158835e-05, |
|
"loss": 1.8557, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1578384096836098e-05, |
|
"loss": 1.8579, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1516877429513359e-05, |
|
"loss": 1.8614, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1455370762190622e-05, |
|
"loss": 1.8598, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1393864094867884e-05, |
|
"loss": 1.8567, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1332357427545146e-05, |
|
"loss": 1.855, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1270850760222408e-05, |
|
"loss": 1.8578, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.120934409289967e-05, |
|
"loss": 1.856, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1147837425576934e-05, |
|
"loss": 1.8532, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1086330758254195e-05, |
|
"loss": 1.8625, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1024824090931457e-05, |
|
"loss": 1.8591, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.096331742360872e-05, |
|
"loss": 1.8595, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0901810756285983e-05, |
|
"loss": 1.8557, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0840304088963243e-05, |
|
"loss": 1.8576, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0778797421640507e-05, |
|
"loss": 1.8548, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0717290754317769e-05, |
|
"loss": 1.8605, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.065578408699503e-05, |
|
"loss": 1.8505, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0594277419672292e-05, |
|
"loss": 1.8578, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0532770752349554e-05, |
|
"loss": 1.857, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0471264085026818e-05, |
|
"loss": 1.8545, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.040975741770408e-05, |
|
"loss": 1.8557, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0348250750381342e-05, |
|
"loss": 1.8554, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0286744083058604e-05, |
|
"loss": 1.8548, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0225237415735867e-05, |
|
"loss": 1.8558, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0163730748413128e-05, |
|
"loss": 1.8564, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0102224081090391e-05, |
|
"loss": 1.8581, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0040717413767653e-05, |
|
"loss": 1.8541, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.979210746444915e-06, |
|
"loss": 1.8522, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.917704079122177e-06, |
|
"loss": 1.8551, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.856197411799439e-06, |
|
"loss": 1.8572, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.794690744476702e-06, |
|
"loss": 1.8544, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.733184077153964e-06, |
|
"loss": 1.8509, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.671677409831226e-06, |
|
"loss": 1.8538, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.610170742508488e-06, |
|
"loss": 1.8561, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.54866407518575e-06, |
|
"loss": 1.8559, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.487157407863014e-06, |
|
"loss": 1.8559, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.425650740540274e-06, |
|
"loss": 1.8507, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.364144073217537e-06, |
|
"loss": 1.8526, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.3026374058948e-06, |
|
"loss": 1.8552, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.241130738572061e-06, |
|
"loss": 1.8526, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.179624071249323e-06, |
|
"loss": 1.8534, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.118117403926587e-06, |
|
"loss": 1.8539, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.056610736603849e-06, |
|
"loss": 1.8558, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.99510406928111e-06, |
|
"loss": 1.8532, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.933597401958373e-06, |
|
"loss": 1.8557, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.872090734635634e-06, |
|
"loss": 1.8528, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.810584067312898e-06, |
|
"loss": 1.8554, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.749077399990158e-06, |
|
"loss": 1.8508, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.687570732667422e-06, |
|
"loss": 1.8505, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.626064065344684e-06, |
|
"loss": 1.8489, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.564557398021946e-06, |
|
"loss": 1.8519, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.503050730699208e-06, |
|
"loss": 1.8565, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.441544063376471e-06, |
|
"loss": 1.852, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.380037396053733e-06, |
|
"loss": 1.8553, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.318530728730995e-06, |
|
"loss": 1.8512, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.257024061408257e-06, |
|
"loss": 1.8521, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.195517394085519e-06, |
|
"loss": 1.8495, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.134010726762783e-06, |
|
"loss": 1.8563, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.072504059440043e-06, |
|
"loss": 1.8524, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.010997392117306e-06, |
|
"loss": 1.8537, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.949490724794568e-06, |
|
"loss": 1.8481, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.88798405747183e-06, |
|
"loss": 1.8521, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.826477390149092e-06, |
|
"loss": 1.8488, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.764970722826356e-06, |
|
"loss": 1.856, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.703464055503618e-06, |
|
"loss": 1.8502, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.64195738818088e-06, |
|
"loss": 1.8534, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.580450720858141e-06, |
|
"loss": 1.8481, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.518944053535404e-06, |
|
"loss": 1.8516, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.457437386212666e-06, |
|
"loss": 1.8508, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.395930718889928e-06, |
|
"loss": 1.8442, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.33442405156719e-06, |
|
"loss": 1.8469, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.272917384244453e-06, |
|
"loss": 1.85, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.211410716921714e-06, |
|
"loss": 1.8454, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.1499040495989765e-06, |
|
"loss": 1.8523, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.088397382276239e-06, |
|
"loss": 1.8479, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.026890714953502e-06, |
|
"loss": 1.8438, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.965384047630763e-06, |
|
"loss": 1.8491, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.903877380308026e-06, |
|
"loss": 1.8492, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.842370712985289e-06, |
|
"loss": 1.8506, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.7808640456625505e-06, |
|
"loss": 1.8511, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.719357378339812e-06, |
|
"loss": 1.8479, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.657850711017074e-06, |
|
"loss": 1.8474, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.596344043694337e-06, |
|
"loss": 1.8472, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.534837376371598e-06, |
|
"loss": 1.8536, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.473330709048861e-06, |
|
"loss": 1.8487, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.411824041726124e-06, |
|
"loss": 1.8507, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.3503173744033864e-06, |
|
"loss": 1.8478, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.2888107070806475e-06, |
|
"loss": 1.8488, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.22730403975791e-06, |
|
"loss": 1.8462, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.165797372435172e-06, |
|
"loss": 1.8501, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.104290705112434e-06, |
|
"loss": 1.8495, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.042784037789697e-06, |
|
"loss": 1.8479, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.981277370466959e-06, |
|
"loss": 1.8474, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.919770703144221e-06, |
|
"loss": 1.851, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.8582640358214834e-06, |
|
"loss": 1.8451, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.796757368498745e-06, |
|
"loss": 1.8458, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.735250701176008e-06, |
|
"loss": 1.8485, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.67374403385327e-06, |
|
"loss": 1.8494, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.612237366530533e-06, |
|
"loss": 1.8437, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.550730699207794e-06, |
|
"loss": 1.8435, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.489224031885057e-06, |
|
"loss": 1.8472, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.4277173645623185e-06, |
|
"loss": 1.8469, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.366210697239581e-06, |
|
"loss": 1.845, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.304704029916843e-06, |
|
"loss": 1.8451, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.243197362594105e-06, |
|
"loss": 1.85, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.181690695271368e-06, |
|
"loss": 1.8436, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.12018402794863e-06, |
|
"loss": 1.8435, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.0586773606258925e-06, |
|
"loss": 1.8447, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.9971706933031544e-06, |
|
"loss": 1.847, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.935664025980416e-06, |
|
"loss": 1.8522, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.874157358657678e-06, |
|
"loss": 1.8474, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.812650691334941e-06, |
|
"loss": 1.8473, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.751144024012203e-06, |
|
"loss": 1.8446, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.689637356689466e-06, |
|
"loss": 1.8471, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.628130689366728e-06, |
|
"loss": 1.8472, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.5666240220439895e-06, |
|
"loss": 1.8456, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.505117354721252e-06, |
|
"loss": 1.8446, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.443610687398514e-06, |
|
"loss": 1.8441, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.382104020075776e-06, |
|
"loss": 1.8466, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.320597352753038e-06, |
|
"loss": 1.8424, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.259090685430301e-06, |
|
"loss": 1.8472, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.197584018107563e-06, |
|
"loss": 1.8423, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.1360773507848255e-06, |
|
"loss": 1.8463, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.074570683462087e-06, |
|
"loss": 1.8438, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.01306401613935e-06, |
|
"loss": 1.8399, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.951557348816612e-06, |
|
"loss": 1.8463, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.890050681493874e-06, |
|
"loss": 1.8404, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.828544014171137e-06, |
|
"loss": 1.8457, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.767037346848398e-06, |
|
"loss": 1.8451, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.705530679525661e-06, |
|
"loss": 1.8447, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.644024012202923e-06, |
|
"loss": 1.8427, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.582517344880185e-06, |
|
"loss": 1.8406, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.521010677557447e-06, |
|
"loss": 1.842, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.45950401023471e-06, |
|
"loss": 1.8426, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.397997342911972e-06, |
|
"loss": 1.8455, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.336490675589234e-06, |
|
"loss": 1.841, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.274984008266496e-06, |
|
"loss": 1.8418, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.213477340943759e-06, |
|
"loss": 1.8415, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.1519706736210207e-06, |
|
"loss": 1.8452, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.090464006298283e-06, |
|
"loss": 1.8472, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.0289573389755454e-06, |
|
"loss": 1.8403, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9674506716528073e-06, |
|
"loss": 1.8406, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9059440043300696e-06, |
|
"loss": 1.8425, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.8444373370073315e-06, |
|
"loss": 1.8417, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.782930669684594e-06, |
|
"loss": 1.8402, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.7214240023618562e-06, |
|
"loss": 1.8465, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.659917335039118e-06, |
|
"loss": 1.8454, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.5984106677163805e-06, |
|
"loss": 1.8389, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.536904000393643e-06, |
|
"loss": 1.8439, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.475397333070905e-06, |
|
"loss": 1.8451, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.413890665748167e-06, |
|
"loss": 1.8393, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3523839984254294e-06, |
|
"loss": 1.8428, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2908773311026917e-06, |
|
"loss": 1.8397, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.229370663779954e-06, |
|
"loss": 1.8435, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1678639964572164e-06, |
|
"loss": 1.8411, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1063573291344783e-06, |
|
"loss": 1.8435, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0448506618117402e-06, |
|
"loss": 1.8427, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.9833439944890026e-06, |
|
"loss": 1.8371, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.921837327166265e-06, |
|
"loss": 1.8378, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.860330659843527e-06, |
|
"loss": 1.8403, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7988239925207894e-06, |
|
"loss": 1.8427, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7373173251980517e-06, |
|
"loss": 1.8414, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6758106578753138e-06, |
|
"loss": 1.8371, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6143039905525761e-06, |
|
"loss": 1.8388, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.552797323229838e-06, |
|
"loss": 1.8447, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4912906559071004e-06, |
|
"loss": 1.8438, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4297839885843625e-06, |
|
"loss": 1.84, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3682773212616249e-06, |
|
"loss": 1.844, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3067706539388872e-06, |
|
"loss": 1.8395, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.245263986616149e-06, |
|
"loss": 1.8408, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1837573192934114e-06, |
|
"loss": 1.8426, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1222506519706736e-06, |
|
"loss": 1.8412, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.060743984647936e-06, |
|
"loss": 1.8389, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.992373173251982e-07, |
|
"loss": 1.839, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.377306500024604e-07, |
|
"loss": 1.8411, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.762239826797225e-07, |
|
"loss": 1.843, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.147173153569847e-07, |
|
"loss": 1.84, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.532106480342469e-07, |
|
"loss": 1.847, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.917039807115092e-07, |
|
"loss": 1.8363, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.301973133887713e-07, |
|
"loss": 1.8402, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.686906460660336e-07, |
|
"loss": 1.8397, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.071839787432959e-07, |
|
"loss": 1.8424, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.45677311420558e-07, |
|
"loss": 1.8357, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.841706440978202e-07, |
|
"loss": 1.843, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.2266397677508245e-07, |
|
"loss": 1.8371, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.6115730945234463e-07, |
|
"loss": 1.8429, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.9965064212960688e-07, |
|
"loss": 1.8424, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.381439748068691e-07, |
|
"loss": 1.8414, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.663730748413129e-08, |
|
"loss": 1.8446, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.5130640161393495e-08, |
|
"loss": 1.8381, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 451623, |
|
"total_flos": 1.5894400168611545e+19, |
|
"train_loss": 0.6229404193166468, |
|
"train_runtime": 298967.217, |
|
"train_samples_per_second": 870.111, |
|
"train_steps_per_second": 1.511 |
|
} |
|
], |
|
"max_steps": 451623, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.5894400168611545e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|