naija-xlm-twitter-base / trainer_state.json
manueltonneau's picture
Upload 10 files
13c7a54 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 451623,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.920484962997953e-07,
"loss": 10.3537,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 9.840969925995906e-07,
"loss": 3.8759,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 1.4761454888993861e-06,
"loss": 3.3657,
"step": 1500
},
{
"epoch": 0.0,
"learning_rate": 1.9681939851991812e-06,
"loss": 3.2292,
"step": 2000
},
{
"epoch": 0.0,
"learning_rate": 2.4602424814989765e-06,
"loss": 3.1654,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 2.9522909777987723e-06,
"loss": 3.1135,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 3.444339474098567e-06,
"loss": 3.0643,
"step": 3500
},
{
"epoch": 0.01,
"learning_rate": 3.9363879703983625e-06,
"loss": 3.0263,
"step": 4000
},
{
"epoch": 0.01,
"learning_rate": 4.428436466698158e-06,
"loss": 2.9939,
"step": 4500
},
{
"epoch": 0.01,
"learning_rate": 4.920484962997953e-06,
"loss": 2.9628,
"step": 5000
},
{
"epoch": 0.01,
"learning_rate": 5.412533459297749e-06,
"loss": 2.9285,
"step": 5500
},
{
"epoch": 0.01,
"learning_rate": 5.9045819555975445e-06,
"loss": 2.897,
"step": 6000
},
{
"epoch": 0.01,
"learning_rate": 6.396630451897339e-06,
"loss": 2.8763,
"step": 6500
},
{
"epoch": 0.01,
"learning_rate": 6.888678948197134e-06,
"loss": 2.8517,
"step": 7000
},
{
"epoch": 0.01,
"learning_rate": 7.380727444496929e-06,
"loss": 2.8288,
"step": 7500
},
{
"epoch": 0.02,
"learning_rate": 7.872775940796725e-06,
"loss": 2.8043,
"step": 8000
},
{
"epoch": 0.02,
"learning_rate": 8.36482443709652e-06,
"loss": 2.7839,
"step": 8500
},
{
"epoch": 0.02,
"learning_rate": 8.856872933396316e-06,
"loss": 2.7614,
"step": 9000
},
{
"epoch": 0.02,
"learning_rate": 9.348921429696111e-06,
"loss": 2.7452,
"step": 9500
},
{
"epoch": 0.02,
"learning_rate": 9.840969925995906e-06,
"loss": 2.7313,
"step": 10000
},
{
"epoch": 0.02,
"learning_rate": 1.0333018422295703e-05,
"loss": 2.7144,
"step": 10500
},
{
"epoch": 0.02,
"learning_rate": 1.0825066918595498e-05,
"loss": 2.6997,
"step": 11000
},
{
"epoch": 0.02,
"learning_rate": 1.1317115414895292e-05,
"loss": 2.6785,
"step": 11500
},
{
"epoch": 0.02,
"learning_rate": 1.1809163911195089e-05,
"loss": 2.6693,
"step": 12000
},
{
"epoch": 0.02,
"learning_rate": 1.2301212407494884e-05,
"loss": 2.6494,
"step": 12500
},
{
"epoch": 0.03,
"learning_rate": 1.2793260903794679e-05,
"loss": 2.6353,
"step": 13000
},
{
"epoch": 0.03,
"learning_rate": 1.3285309400094472e-05,
"loss": 2.6276,
"step": 13500
},
{
"epoch": 0.03,
"learning_rate": 1.3777357896394269e-05,
"loss": 2.6187,
"step": 14000
},
{
"epoch": 0.03,
"learning_rate": 1.4269406392694065e-05,
"loss": 2.6011,
"step": 14500
},
{
"epoch": 0.03,
"learning_rate": 1.4761454888993858e-05,
"loss": 2.5932,
"step": 15000
},
{
"epoch": 0.03,
"learning_rate": 1.5253503385293655e-05,
"loss": 2.5815,
"step": 15500
},
{
"epoch": 0.03,
"learning_rate": 1.574555188159345e-05,
"loss": 2.5736,
"step": 16000
},
{
"epoch": 0.03,
"learning_rate": 1.6237600377893248e-05,
"loss": 2.5592,
"step": 16500
},
{
"epoch": 0.03,
"learning_rate": 1.672964887419304e-05,
"loss": 2.5488,
"step": 17000
},
{
"epoch": 0.03,
"learning_rate": 1.7221697370492838e-05,
"loss": 2.5394,
"step": 17500
},
{
"epoch": 0.04,
"learning_rate": 1.7713745866792633e-05,
"loss": 2.5307,
"step": 18000
},
{
"epoch": 0.04,
"learning_rate": 1.8205794363092428e-05,
"loss": 2.5219,
"step": 18500
},
{
"epoch": 0.04,
"learning_rate": 1.8697842859392223e-05,
"loss": 2.514,
"step": 19000
},
{
"epoch": 0.04,
"learning_rate": 1.9189891355692017e-05,
"loss": 2.5038,
"step": 19500
},
{
"epoch": 0.04,
"learning_rate": 1.9681939851991812e-05,
"loss": 2.4932,
"step": 20000
},
{
"epoch": 0.04,
"learning_rate": 2.0173988348291607e-05,
"loss": 2.4895,
"step": 20500
},
{
"epoch": 0.04,
"learning_rate": 2.0666036844591405e-05,
"loss": 2.4754,
"step": 21000
},
{
"epoch": 0.04,
"learning_rate": 2.1158085340891197e-05,
"loss": 2.4703,
"step": 21500
},
{
"epoch": 0.04,
"learning_rate": 2.1650133837190995e-05,
"loss": 2.4602,
"step": 22000
},
{
"epoch": 0.04,
"learning_rate": 2.214218233349079e-05,
"loss": 2.4609,
"step": 22500
},
{
"epoch": 0.05,
"learning_rate": 2.2634230829790585e-05,
"loss": 2.4458,
"step": 23000
},
{
"epoch": 0.05,
"learning_rate": 2.312627932609038e-05,
"loss": 2.439,
"step": 23500
},
{
"epoch": 0.05,
"learning_rate": 2.3618327822390178e-05,
"loss": 2.4354,
"step": 24000
},
{
"epoch": 0.05,
"learning_rate": 2.411037631868997e-05,
"loss": 2.4263,
"step": 24500
},
{
"epoch": 0.05,
"learning_rate": 2.4602424814989768e-05,
"loss": 2.4238,
"step": 25000
},
{
"epoch": 0.05,
"learning_rate": 2.5094473311289563e-05,
"loss": 2.4135,
"step": 25500
},
{
"epoch": 0.05,
"learning_rate": 2.5586521807589358e-05,
"loss": 2.4098,
"step": 26000
},
{
"epoch": 0.05,
"learning_rate": 2.6078570303889156e-05,
"loss": 2.4012,
"step": 26500
},
{
"epoch": 0.05,
"learning_rate": 2.6570618800188944e-05,
"loss": 2.3966,
"step": 27000
},
{
"epoch": 0.05,
"learning_rate": 2.7062667296488742e-05,
"loss": 2.386,
"step": 27500
},
{
"epoch": 0.06,
"learning_rate": 2.7554715792788537e-05,
"loss": 2.3838,
"step": 28000
},
{
"epoch": 0.06,
"learning_rate": 2.8046764289088335e-05,
"loss": 2.3787,
"step": 28500
},
{
"epoch": 0.06,
"learning_rate": 2.853881278538813e-05,
"loss": 2.3711,
"step": 29000
},
{
"epoch": 0.06,
"learning_rate": 2.9030861281687925e-05,
"loss": 2.365,
"step": 29500
},
{
"epoch": 0.06,
"learning_rate": 2.9522909777987717e-05,
"loss": 2.3625,
"step": 30000
},
{
"epoch": 0.06,
"learning_rate": 3.0014958274287515e-05,
"loss": 2.3555,
"step": 30500
},
{
"epoch": 0.06,
"learning_rate": 3.050700677058731e-05,
"loss": 2.349,
"step": 31000
},
{
"epoch": 0.06,
"learning_rate": 3.099905526688711e-05,
"loss": 2.3439,
"step": 31500
},
{
"epoch": 0.06,
"learning_rate": 3.14911037631869e-05,
"loss": 2.3382,
"step": 32000
},
{
"epoch": 0.06,
"learning_rate": 3.19831522594867e-05,
"loss": 2.3355,
"step": 32500
},
{
"epoch": 0.06,
"learning_rate": 3.2475200755786496e-05,
"loss": 2.3265,
"step": 33000
},
{
"epoch": 0.07,
"learning_rate": 3.296724925208629e-05,
"loss": 2.3289,
"step": 33500
},
{
"epoch": 0.07,
"learning_rate": 3.345929774838608e-05,
"loss": 2.3275,
"step": 34000
},
{
"epoch": 0.07,
"learning_rate": 3.395134624468588e-05,
"loss": 2.3242,
"step": 34500
},
{
"epoch": 0.07,
"learning_rate": 3.4443394740985676e-05,
"loss": 2.311,
"step": 35000
},
{
"epoch": 0.07,
"learning_rate": 3.493544323728547e-05,
"loss": 2.3148,
"step": 35500
},
{
"epoch": 0.07,
"learning_rate": 3.5427491733585265e-05,
"loss": 2.3088,
"step": 36000
},
{
"epoch": 0.07,
"learning_rate": 3.591954022988506e-05,
"loss": 2.301,
"step": 36500
},
{
"epoch": 0.07,
"learning_rate": 3.6411588726184855e-05,
"loss": 2.2969,
"step": 37000
},
{
"epoch": 0.07,
"learning_rate": 3.690363722248465e-05,
"loss": 2.2989,
"step": 37500
},
{
"epoch": 0.07,
"learning_rate": 3.7395685718784445e-05,
"loss": 2.2882,
"step": 38000
},
{
"epoch": 0.08,
"learning_rate": 3.788773421508424e-05,
"loss": 2.2869,
"step": 38500
},
{
"epoch": 0.08,
"learning_rate": 3.8379782711384035e-05,
"loss": 2.279,
"step": 39000
},
{
"epoch": 0.08,
"learning_rate": 3.8871831207683826e-05,
"loss": 2.2817,
"step": 39500
},
{
"epoch": 0.08,
"learning_rate": 3.9363879703983625e-05,
"loss": 2.2769,
"step": 40000
},
{
"epoch": 0.08,
"learning_rate": 3.985592820028342e-05,
"loss": 2.271,
"step": 40500
},
{
"epoch": 0.08,
"learning_rate": 4.0347976696583214e-05,
"loss": 2.2703,
"step": 41000
},
{
"epoch": 0.08,
"learning_rate": 4.084002519288301e-05,
"loss": 2.2644,
"step": 41500
},
{
"epoch": 0.08,
"learning_rate": 4.133207368918281e-05,
"loss": 2.2578,
"step": 42000
},
{
"epoch": 0.08,
"learning_rate": 4.18241221854826e-05,
"loss": 2.2568,
"step": 42500
},
{
"epoch": 0.08,
"learning_rate": 4.2316170681782394e-05,
"loss": 2.2565,
"step": 43000
},
{
"epoch": 0.09,
"learning_rate": 4.280821917808219e-05,
"loss": 2.2539,
"step": 43500
},
{
"epoch": 0.09,
"learning_rate": 4.330026767438199e-05,
"loss": 2.2493,
"step": 44000
},
{
"epoch": 0.09,
"learning_rate": 4.379231617068179e-05,
"loss": 2.2488,
"step": 44500
},
{
"epoch": 0.09,
"learning_rate": 4.428436466698158e-05,
"loss": 2.2388,
"step": 45000
},
{
"epoch": 0.09,
"learning_rate": 4.477641316328138e-05,
"loss": 2.2368,
"step": 45500
},
{
"epoch": 0.09,
"learning_rate": 4.526846165958117e-05,
"loss": 2.2366,
"step": 46000
},
{
"epoch": 0.09,
"learning_rate": 4.576051015588096e-05,
"loss": 2.2323,
"step": 46500
},
{
"epoch": 0.09,
"learning_rate": 4.625255865218076e-05,
"loss": 2.2288,
"step": 47000
},
{
"epoch": 0.09,
"learning_rate": 4.674460714848056e-05,
"loss": 2.23,
"step": 47500
},
{
"epoch": 0.09,
"learning_rate": 4.7236655644780356e-05,
"loss": 2.223,
"step": 48000
},
{
"epoch": 0.1,
"learning_rate": 4.772870414108015e-05,
"loss": 2.2232,
"step": 48500
},
{
"epoch": 0.1,
"learning_rate": 4.822075263737994e-05,
"loss": 2.2188,
"step": 49000
},
{
"epoch": 0.1,
"learning_rate": 4.871280113367974e-05,
"loss": 2.217,
"step": 49500
},
{
"epoch": 0.1,
"learning_rate": 4.9204849629979536e-05,
"loss": 2.2118,
"step": 50000
},
{
"epoch": 0.1,
"learning_rate": 4.969689812627933e-05,
"loss": 2.2089,
"step": 50500
},
{
"epoch": 0.1,
"learning_rate": 4.997900574717671e-05,
"loss": 2.2101,
"step": 51000
},
{
"epoch": 0.1,
"learning_rate": 4.992433321378273e-05,
"loss": 2.2048,
"step": 51500
},
{
"epoch": 0.1,
"learning_rate": 4.986966068038875e-05,
"loss": 2.2026,
"step": 52000
},
{
"epoch": 0.1,
"learning_rate": 4.981498814699476e-05,
"loss": 2.2,
"step": 52500
},
{
"epoch": 0.1,
"learning_rate": 4.976031561360078e-05,
"loss": 2.1998,
"step": 53000
},
{
"epoch": 0.11,
"learning_rate": 4.97056430802068e-05,
"loss": 2.1905,
"step": 53500
},
{
"epoch": 0.11,
"learning_rate": 4.965097054681281e-05,
"loss": 2.1896,
"step": 54000
},
{
"epoch": 0.11,
"learning_rate": 4.959629801341883e-05,
"loss": 2.1886,
"step": 54500
},
{
"epoch": 0.11,
"learning_rate": 4.954162548002484e-05,
"loss": 2.1825,
"step": 55000
},
{
"epoch": 0.11,
"learning_rate": 4.948695294663086e-05,
"loss": 2.1807,
"step": 55500
},
{
"epoch": 0.11,
"learning_rate": 4.943228041323688e-05,
"loss": 2.1814,
"step": 56000
},
{
"epoch": 0.11,
"learning_rate": 4.9377607879842896e-05,
"loss": 2.1797,
"step": 56500
},
{
"epoch": 0.11,
"learning_rate": 4.9322935346448914e-05,
"loss": 2.1714,
"step": 57000
},
{
"epoch": 0.11,
"learning_rate": 4.9268262813054925e-05,
"loss": 2.1766,
"step": 57500
},
{
"epoch": 0.11,
"learning_rate": 4.921359027966094e-05,
"loss": 2.1725,
"step": 58000
},
{
"epoch": 0.12,
"learning_rate": 4.915891774626696e-05,
"loss": 2.1672,
"step": 58500
},
{
"epoch": 0.12,
"learning_rate": 4.910424521287298e-05,
"loss": 2.1707,
"step": 59000
},
{
"epoch": 0.12,
"learning_rate": 4.9049572679479e-05,
"loss": 2.1636,
"step": 59500
},
{
"epoch": 0.12,
"learning_rate": 4.899490014608501e-05,
"loss": 2.1627,
"step": 60000
},
{
"epoch": 0.12,
"learning_rate": 4.894022761269103e-05,
"loss": 2.1598,
"step": 60500
},
{
"epoch": 0.12,
"learning_rate": 4.8885555079297044e-05,
"loss": 2.1535,
"step": 61000
},
{
"epoch": 0.12,
"learning_rate": 4.883088254590306e-05,
"loss": 2.1531,
"step": 61500
},
{
"epoch": 0.12,
"learning_rate": 4.8776210012509074e-05,
"loss": 2.1548,
"step": 62000
},
{
"epoch": 0.12,
"learning_rate": 4.872153747911509e-05,
"loss": 2.1451,
"step": 62500
},
{
"epoch": 0.12,
"learning_rate": 4.8666864945721116e-05,
"loss": 2.1497,
"step": 63000
},
{
"epoch": 0.12,
"learning_rate": 4.861219241232713e-05,
"loss": 2.1474,
"step": 63500
},
{
"epoch": 0.13,
"learning_rate": 4.8557519878933146e-05,
"loss": 2.1442,
"step": 64000
},
{
"epoch": 0.13,
"learning_rate": 4.850284734553916e-05,
"loss": 2.1417,
"step": 64500
},
{
"epoch": 0.13,
"learning_rate": 4.8448174812145175e-05,
"loss": 2.1426,
"step": 65000
},
{
"epoch": 0.13,
"learning_rate": 4.839350227875119e-05,
"loss": 2.1396,
"step": 65500
},
{
"epoch": 0.13,
"learning_rate": 4.833882974535721e-05,
"loss": 2.1371,
"step": 66000
},
{
"epoch": 0.13,
"learning_rate": 4.828415721196323e-05,
"loss": 2.1352,
"step": 66500
},
{
"epoch": 0.13,
"learning_rate": 4.822948467856924e-05,
"loss": 2.1342,
"step": 67000
},
{
"epoch": 0.13,
"learning_rate": 4.8174812145175265e-05,
"loss": 2.1329,
"step": 67500
},
{
"epoch": 0.13,
"learning_rate": 4.8120139611781276e-05,
"loss": 2.1272,
"step": 68000
},
{
"epoch": 0.13,
"learning_rate": 4.8065467078387294e-05,
"loss": 2.1278,
"step": 68500
},
{
"epoch": 0.14,
"learning_rate": 4.801079454499331e-05,
"loss": 2.1251,
"step": 69000
},
{
"epoch": 0.14,
"learning_rate": 4.7956122011599323e-05,
"loss": 2.1222,
"step": 69500
},
{
"epoch": 0.14,
"learning_rate": 4.790144947820535e-05,
"loss": 2.1157,
"step": 70000
},
{
"epoch": 0.14,
"learning_rate": 4.784677694481136e-05,
"loss": 2.1234,
"step": 70500
},
{
"epoch": 0.14,
"learning_rate": 4.779210441141738e-05,
"loss": 2.1167,
"step": 71000
},
{
"epoch": 0.14,
"learning_rate": 4.773743187802339e-05,
"loss": 2.1182,
"step": 71500
},
{
"epoch": 0.14,
"learning_rate": 4.768275934462941e-05,
"loss": 2.121,
"step": 72000
},
{
"epoch": 0.14,
"learning_rate": 4.762808681123543e-05,
"loss": 2.1115,
"step": 72500
},
{
"epoch": 0.14,
"learning_rate": 4.757341427784144e-05,
"loss": 2.1171,
"step": 73000
},
{
"epoch": 0.14,
"learning_rate": 4.751874174444746e-05,
"loss": 2.11,
"step": 73500
},
{
"epoch": 0.15,
"learning_rate": 4.746406921105347e-05,
"loss": 2.1108,
"step": 74000
},
{
"epoch": 0.15,
"learning_rate": 4.74093966776595e-05,
"loss": 2.1058,
"step": 74500
},
{
"epoch": 0.15,
"learning_rate": 4.735472414426551e-05,
"loss": 2.1005,
"step": 75000
},
{
"epoch": 0.15,
"learning_rate": 4.7300051610871526e-05,
"loss": 2.1043,
"step": 75500
},
{
"epoch": 0.15,
"learning_rate": 4.7245379077477544e-05,
"loss": 2.1027,
"step": 76000
},
{
"epoch": 0.15,
"learning_rate": 4.7190706544083555e-05,
"loss": 2.0973,
"step": 76500
},
{
"epoch": 0.15,
"learning_rate": 4.713603401068958e-05,
"loss": 2.0947,
"step": 77000
},
{
"epoch": 0.15,
"learning_rate": 4.708136147729559e-05,
"loss": 2.1008,
"step": 77500
},
{
"epoch": 0.15,
"learning_rate": 4.702668894390161e-05,
"loss": 2.0995,
"step": 78000
},
{
"epoch": 0.15,
"learning_rate": 4.697201641050763e-05,
"loss": 2.0932,
"step": 78500
},
{
"epoch": 0.16,
"learning_rate": 4.691734387711364e-05,
"loss": 2.0966,
"step": 79000
},
{
"epoch": 0.16,
"learning_rate": 4.686267134371966e-05,
"loss": 2.0932,
"step": 79500
},
{
"epoch": 0.16,
"learning_rate": 4.6807998810325674e-05,
"loss": 2.0949,
"step": 80000
},
{
"epoch": 0.16,
"learning_rate": 4.675332627693169e-05,
"loss": 2.0912,
"step": 80500
},
{
"epoch": 0.16,
"learning_rate": 4.669865374353771e-05,
"loss": 2.0885,
"step": 81000
},
{
"epoch": 0.16,
"learning_rate": 4.664398121014373e-05,
"loss": 2.0904,
"step": 81500
},
{
"epoch": 0.16,
"learning_rate": 4.6589308676749746e-05,
"loss": 2.0858,
"step": 82000
},
{
"epoch": 0.16,
"learning_rate": 4.653463614335576e-05,
"loss": 2.0828,
"step": 82500
},
{
"epoch": 0.16,
"learning_rate": 4.6479963609961776e-05,
"loss": 2.0827,
"step": 83000
},
{
"epoch": 0.16,
"learning_rate": 4.642529107656779e-05,
"loss": 2.0816,
"step": 83500
},
{
"epoch": 0.17,
"learning_rate": 4.637061854317381e-05,
"loss": 2.0789,
"step": 84000
},
{
"epoch": 0.17,
"learning_rate": 4.631594600977982e-05,
"loss": 2.0789,
"step": 84500
},
{
"epoch": 0.17,
"learning_rate": 4.626127347638584e-05,
"loss": 2.0811,
"step": 85000
},
{
"epoch": 0.17,
"learning_rate": 4.620660094299186e-05,
"loss": 2.0819,
"step": 85500
},
{
"epoch": 0.17,
"learning_rate": 4.615192840959787e-05,
"loss": 2.0768,
"step": 86000
},
{
"epoch": 0.17,
"learning_rate": 4.6097255876203895e-05,
"loss": 2.0715,
"step": 86500
},
{
"epoch": 0.17,
"learning_rate": 4.6042583342809906e-05,
"loss": 2.0742,
"step": 87000
},
{
"epoch": 0.17,
"learning_rate": 4.5987910809415924e-05,
"loss": 2.0699,
"step": 87500
},
{
"epoch": 0.17,
"learning_rate": 4.593323827602194e-05,
"loss": 2.0743,
"step": 88000
},
{
"epoch": 0.17,
"learning_rate": 4.587856574262796e-05,
"loss": 2.0694,
"step": 88500
},
{
"epoch": 0.18,
"learning_rate": 4.582389320923398e-05,
"loss": 2.0675,
"step": 89000
},
{
"epoch": 0.18,
"learning_rate": 4.576922067583999e-05,
"loss": 2.0656,
"step": 89500
},
{
"epoch": 0.18,
"learning_rate": 4.571454814244601e-05,
"loss": 2.0637,
"step": 90000
},
{
"epoch": 0.18,
"learning_rate": 4.5659875609052025e-05,
"loss": 2.0667,
"step": 90500
},
{
"epoch": 0.18,
"learning_rate": 4.560520307565804e-05,
"loss": 2.062,
"step": 91000
},
{
"epoch": 0.18,
"learning_rate": 4.555053054226406e-05,
"loss": 2.0659,
"step": 91500
},
{
"epoch": 0.18,
"learning_rate": 4.549585800887007e-05,
"loss": 2.0597,
"step": 92000
},
{
"epoch": 0.18,
"learning_rate": 4.544118547547609e-05,
"loss": 2.067,
"step": 92500
},
{
"epoch": 0.18,
"learning_rate": 4.53865129420821e-05,
"loss": 2.0593,
"step": 93000
},
{
"epoch": 0.18,
"learning_rate": 4.5331840408688126e-05,
"loss": 2.0562,
"step": 93500
},
{
"epoch": 0.19,
"learning_rate": 4.527716787529414e-05,
"loss": 2.0589,
"step": 94000
},
{
"epoch": 0.19,
"learning_rate": 4.5222495341900156e-05,
"loss": 2.0566,
"step": 94500
},
{
"epoch": 0.19,
"learning_rate": 4.5167822808506174e-05,
"loss": 2.0576,
"step": 95000
},
{
"epoch": 0.19,
"learning_rate": 4.511315027511219e-05,
"loss": 2.058,
"step": 95500
},
{
"epoch": 0.19,
"learning_rate": 4.505847774171821e-05,
"loss": 2.0528,
"step": 96000
},
{
"epoch": 0.19,
"learning_rate": 4.500380520832422e-05,
"loss": 2.0562,
"step": 96500
},
{
"epoch": 0.19,
"learning_rate": 4.494913267493024e-05,
"loss": 2.049,
"step": 97000
},
{
"epoch": 0.19,
"learning_rate": 4.489446014153626e-05,
"loss": 2.0536,
"step": 97500
},
{
"epoch": 0.19,
"learning_rate": 4.4839787608142275e-05,
"loss": 2.0538,
"step": 98000
},
{
"epoch": 0.19,
"learning_rate": 4.478511507474829e-05,
"loss": 2.0466,
"step": 98500
},
{
"epoch": 0.19,
"learning_rate": 4.4730442541354304e-05,
"loss": 2.049,
"step": 99000
},
{
"epoch": 0.2,
"learning_rate": 4.467577000796032e-05,
"loss": 2.0455,
"step": 99500
},
{
"epoch": 0.2,
"learning_rate": 4.462109747456634e-05,
"loss": 2.0441,
"step": 100000
},
{
"epoch": 0.2,
"learning_rate": 4.456642494117236e-05,
"loss": 2.0477,
"step": 100500
},
{
"epoch": 0.2,
"learning_rate": 4.4511752407778376e-05,
"loss": 2.0425,
"step": 101000
},
{
"epoch": 0.2,
"learning_rate": 4.445707987438439e-05,
"loss": 2.0475,
"step": 101500
},
{
"epoch": 0.2,
"learning_rate": 4.4402407340990405e-05,
"loss": 2.0482,
"step": 102000
},
{
"epoch": 0.2,
"learning_rate": 4.434773480759642e-05,
"loss": 2.0483,
"step": 102500
},
{
"epoch": 0.2,
"learning_rate": 4.429306227420244e-05,
"loss": 2.0375,
"step": 103000
},
{
"epoch": 0.2,
"learning_rate": 4.423838974080845e-05,
"loss": 2.0384,
"step": 103500
},
{
"epoch": 0.2,
"learning_rate": 4.418371720741447e-05,
"loss": 2.0383,
"step": 104000
},
{
"epoch": 0.21,
"learning_rate": 4.412904467402049e-05,
"loss": 2.037,
"step": 104500
},
{
"epoch": 0.21,
"learning_rate": 4.4074372140626507e-05,
"loss": 2.0315,
"step": 105000
},
{
"epoch": 0.21,
"learning_rate": 4.4019699607232525e-05,
"loss": 2.0375,
"step": 105500
},
{
"epoch": 0.21,
"learning_rate": 4.3965027073838536e-05,
"loss": 2.0414,
"step": 106000
},
{
"epoch": 0.21,
"learning_rate": 4.3910354540444554e-05,
"loss": 2.0324,
"step": 106500
},
{
"epoch": 0.21,
"learning_rate": 4.385568200705057e-05,
"loss": 2.0316,
"step": 107000
},
{
"epoch": 0.21,
"learning_rate": 4.380100947365659e-05,
"loss": 2.0343,
"step": 107500
},
{
"epoch": 0.21,
"learning_rate": 4.374633694026261e-05,
"loss": 2.0314,
"step": 108000
},
{
"epoch": 0.21,
"learning_rate": 4.369166440686862e-05,
"loss": 2.0347,
"step": 108500
},
{
"epoch": 0.21,
"learning_rate": 4.363699187347464e-05,
"loss": 2.0268,
"step": 109000
},
{
"epoch": 0.22,
"learning_rate": 4.3582319340080655e-05,
"loss": 2.0323,
"step": 109500
},
{
"epoch": 0.22,
"learning_rate": 4.352764680668667e-05,
"loss": 2.03,
"step": 110000
},
{
"epoch": 0.22,
"learning_rate": 4.347297427329269e-05,
"loss": 2.0231,
"step": 110500
},
{
"epoch": 0.22,
"learning_rate": 4.34183017398987e-05,
"loss": 2.0279,
"step": 111000
},
{
"epoch": 0.22,
"learning_rate": 4.336362920650472e-05,
"loss": 2.0298,
"step": 111500
},
{
"epoch": 0.22,
"learning_rate": 4.330895667311074e-05,
"loss": 2.0408,
"step": 112000
},
{
"epoch": 0.22,
"learning_rate": 4.3254284139716756e-05,
"loss": 2.0199,
"step": 112500
},
{
"epoch": 0.22,
"learning_rate": 4.319961160632277e-05,
"loss": 2.0297,
"step": 113000
},
{
"epoch": 0.22,
"learning_rate": 4.3144939072928786e-05,
"loss": 2.0287,
"step": 113500
},
{
"epoch": 0.22,
"learning_rate": 4.309026653953481e-05,
"loss": 2.0256,
"step": 114000
},
{
"epoch": 0.23,
"learning_rate": 4.303559400614082e-05,
"loss": 2.0197,
"step": 114500
},
{
"epoch": 0.23,
"learning_rate": 4.298092147274684e-05,
"loss": 2.0234,
"step": 115000
},
{
"epoch": 0.23,
"learning_rate": 4.292624893935285e-05,
"loss": 2.0391,
"step": 115500
},
{
"epoch": 0.23,
"learning_rate": 4.287157640595887e-05,
"loss": 2.0772,
"step": 116000
},
{
"epoch": 0.23,
"learning_rate": 4.281690387256489e-05,
"loss": 2.0586,
"step": 116500
},
{
"epoch": 0.23,
"learning_rate": 4.2762231339170905e-05,
"loss": 2.0336,
"step": 117000
},
{
"epoch": 0.23,
"learning_rate": 4.270755880577692e-05,
"loss": 2.0207,
"step": 117500
},
{
"epoch": 0.23,
"learning_rate": 4.2652886272382934e-05,
"loss": 2.0197,
"step": 118000
},
{
"epoch": 0.23,
"learning_rate": 4.259821373898895e-05,
"loss": 2.0184,
"step": 118500
},
{
"epoch": 0.23,
"learning_rate": 4.254354120559497e-05,
"loss": 2.0162,
"step": 119000
},
{
"epoch": 0.24,
"learning_rate": 4.248886867220099e-05,
"loss": 2.0139,
"step": 119500
},
{
"epoch": 0.24,
"learning_rate": 4.2434196138807006e-05,
"loss": 2.0141,
"step": 120000
},
{
"epoch": 0.24,
"learning_rate": 4.237952360541302e-05,
"loss": 2.0119,
"step": 120500
},
{
"epoch": 0.24,
"learning_rate": 4.232485107201904e-05,
"loss": 2.0109,
"step": 121000
},
{
"epoch": 0.24,
"learning_rate": 4.227017853862505e-05,
"loss": 2.0112,
"step": 121500
},
{
"epoch": 0.24,
"learning_rate": 4.221550600523107e-05,
"loss": 2.0106,
"step": 122000
},
{
"epoch": 0.24,
"learning_rate": 4.216083347183708e-05,
"loss": 2.0115,
"step": 122500
},
{
"epoch": 0.24,
"learning_rate": 4.21061609384431e-05,
"loss": 2.0164,
"step": 123000
},
{
"epoch": 0.24,
"learning_rate": 4.2051488405049125e-05,
"loss": 2.0055,
"step": 123500
},
{
"epoch": 0.24,
"learning_rate": 4.1996815871655136e-05,
"loss": 2.0117,
"step": 124000
},
{
"epoch": 0.25,
"learning_rate": 4.1942143338261154e-05,
"loss": 2.008,
"step": 124500
},
{
"epoch": 0.25,
"learning_rate": 4.1887470804867166e-05,
"loss": 2.0077,
"step": 125000
},
{
"epoch": 0.25,
"learning_rate": 4.1832798271473184e-05,
"loss": 2.009,
"step": 125500
},
{
"epoch": 0.25,
"learning_rate": 4.17781257380792e-05,
"loss": 2.0096,
"step": 126000
},
{
"epoch": 0.25,
"learning_rate": 4.172345320468522e-05,
"loss": 2.0025,
"step": 126500
},
{
"epoch": 0.25,
"learning_rate": 4.166878067129124e-05,
"loss": 2.0013,
"step": 127000
},
{
"epoch": 0.25,
"learning_rate": 4.161410813789725e-05,
"loss": 2.0031,
"step": 127500
},
{
"epoch": 0.25,
"learning_rate": 4.1559435604503274e-05,
"loss": 2.0039,
"step": 128000
},
{
"epoch": 0.25,
"learning_rate": 4.1504763071109285e-05,
"loss": 2.0002,
"step": 128500
},
{
"epoch": 0.25,
"learning_rate": 4.14500905377153e-05,
"loss": 2.0007,
"step": 129000
},
{
"epoch": 0.25,
"learning_rate": 4.139541800432132e-05,
"loss": 2.0078,
"step": 129500
},
{
"epoch": 0.26,
"learning_rate": 4.134074547092733e-05,
"loss": 1.9981,
"step": 130000
},
{
"epoch": 0.26,
"learning_rate": 4.128607293753336e-05,
"loss": 1.9989,
"step": 130500
},
{
"epoch": 0.26,
"learning_rate": 4.123140040413937e-05,
"loss": 1.997,
"step": 131000
},
{
"epoch": 0.26,
"learning_rate": 4.1176727870745386e-05,
"loss": 2.0004,
"step": 131500
},
{
"epoch": 0.26,
"learning_rate": 4.11220553373514e-05,
"loss": 1.9983,
"step": 132000
},
{
"epoch": 0.26,
"learning_rate": 4.1067382803957415e-05,
"loss": 1.9995,
"step": 132500
},
{
"epoch": 0.26,
"learning_rate": 4.101271027056344e-05,
"loss": 1.9994,
"step": 133000
},
{
"epoch": 0.26,
"learning_rate": 4.095803773716945e-05,
"loss": 1.9956,
"step": 133500
},
{
"epoch": 0.26,
"learning_rate": 4.090336520377547e-05,
"loss": 1.994,
"step": 134000
},
{
"epoch": 0.26,
"learning_rate": 4.084869267038148e-05,
"loss": 1.9955,
"step": 134500
},
{
"epoch": 0.27,
"learning_rate": 4.0794020136987505e-05,
"loss": 1.9994,
"step": 135000
},
{
"epoch": 0.27,
"learning_rate": 4.0739347603593517e-05,
"loss": 1.9897,
"step": 135500
},
{
"epoch": 0.27,
"learning_rate": 4.0684675070199535e-05,
"loss": 1.9961,
"step": 136000
},
{
"epoch": 0.27,
"learning_rate": 4.063000253680555e-05,
"loss": 1.9946,
"step": 136500
},
{
"epoch": 0.27,
"learning_rate": 4.0575330003411564e-05,
"loss": 1.991,
"step": 137000
},
{
"epoch": 0.27,
"learning_rate": 4.052065747001759e-05,
"loss": 1.9952,
"step": 137500
},
{
"epoch": 0.27,
"learning_rate": 4.04659849366236e-05,
"loss": 1.9848,
"step": 138000
},
{
"epoch": 0.27,
"learning_rate": 4.041131240322962e-05,
"loss": 1.9937,
"step": 138500
},
{
"epoch": 0.27,
"learning_rate": 4.0356639869835636e-05,
"loss": 1.9893,
"step": 139000
},
{
"epoch": 0.27,
"learning_rate": 4.030196733644165e-05,
"loss": 1.9871,
"step": 139500
},
{
"epoch": 0.28,
"learning_rate": 4.024729480304767e-05,
"loss": 1.9855,
"step": 140000
},
{
"epoch": 0.28,
"learning_rate": 4.019262226965368e-05,
"loss": 1.9887,
"step": 140500
},
{
"epoch": 0.28,
"learning_rate": 4.01379497362597e-05,
"loss": 1.9863,
"step": 141000
},
{
"epoch": 0.28,
"learning_rate": 4.008327720286571e-05,
"loss": 1.9904,
"step": 141500
},
{
"epoch": 0.28,
"learning_rate": 4.002860466947174e-05,
"loss": 1.9886,
"step": 142000
},
{
"epoch": 0.28,
"learning_rate": 3.9973932136077755e-05,
"loss": 1.9871,
"step": 142500
},
{
"epoch": 0.28,
"learning_rate": 3.9919259602683766e-05,
"loss": 1.9843,
"step": 143000
},
{
"epoch": 0.28,
"learning_rate": 3.9864587069289784e-05,
"loss": 1.9891,
"step": 143500
},
{
"epoch": 0.28,
"learning_rate": 3.9809914535895795e-05,
"loss": 1.985,
"step": 144000
},
{
"epoch": 0.28,
"learning_rate": 3.975524200250182e-05,
"loss": 1.9826,
"step": 144500
},
{
"epoch": 0.29,
"learning_rate": 3.970056946910783e-05,
"loss": 1.9839,
"step": 145000
},
{
"epoch": 0.29,
"learning_rate": 3.964589693571385e-05,
"loss": 1.9831,
"step": 145500
},
{
"epoch": 0.29,
"learning_rate": 3.959122440231987e-05,
"loss": 1.9843,
"step": 146000
},
{
"epoch": 0.29,
"learning_rate": 3.953655186892588e-05,
"loss": 1.9825,
"step": 146500
},
{
"epoch": 0.29,
"learning_rate": 3.9481879335531903e-05,
"loss": 1.9752,
"step": 147000
},
{
"epoch": 0.29,
"learning_rate": 3.9427206802137915e-05,
"loss": 1.9787,
"step": 147500
},
{
"epoch": 0.29,
"learning_rate": 3.937253426874393e-05,
"loss": 1.984,
"step": 148000
},
{
"epoch": 0.29,
"learning_rate": 3.931786173534995e-05,
"loss": 1.9783,
"step": 148500
},
{
"epoch": 0.29,
"learning_rate": 3.926318920195597e-05,
"loss": 1.9785,
"step": 149000
},
{
"epoch": 0.29,
"learning_rate": 3.920851666856199e-05,
"loss": 1.9759,
"step": 149500
},
{
"epoch": 0.3,
"learning_rate": 3.9153844135168e-05,
"loss": 1.9803,
"step": 150000
},
{
"epoch": 0.3,
"learning_rate": 3.9099171601774016e-05,
"loss": 1.9733,
"step": 150500
},
{
"epoch": 0.3,
"learning_rate": 3.904449906838003e-05,
"loss": 1.9759,
"step": 151000
},
{
"epoch": 0.3,
"learning_rate": 3.898982653498605e-05,
"loss": 1.9767,
"step": 151500
},
{
"epoch": 0.3,
"learning_rate": 3.893515400159207e-05,
"loss": 1.9773,
"step": 152000
},
{
"epoch": 0.3,
"learning_rate": 3.888048146819808e-05,
"loss": 1.9736,
"step": 152500
},
{
"epoch": 0.3,
"learning_rate": 3.88258089348041e-05,
"loss": 1.9729,
"step": 153000
},
{
"epoch": 0.3,
"learning_rate": 3.877113640141012e-05,
"loss": 1.9756,
"step": 153500
},
{
"epoch": 0.3,
"learning_rate": 3.8716463868016135e-05,
"loss": 1.9747,
"step": 154000
},
{
"epoch": 0.3,
"learning_rate": 3.8661791334622146e-05,
"loss": 1.9732,
"step": 154500
},
{
"epoch": 0.31,
"learning_rate": 3.8607118801228164e-05,
"loss": 1.9696,
"step": 155000
},
{
"epoch": 0.31,
"learning_rate": 3.855244626783418e-05,
"loss": 1.9731,
"step": 155500
},
{
"epoch": 0.31,
"learning_rate": 3.84977737344402e-05,
"loss": 1.9741,
"step": 156000
},
{
"epoch": 0.31,
"learning_rate": 3.844310120104622e-05,
"loss": 1.9747,
"step": 156500
},
{
"epoch": 0.31,
"learning_rate": 3.838842866765223e-05,
"loss": 1.9717,
"step": 157000
},
{
"epoch": 0.31,
"learning_rate": 3.833375613425825e-05,
"loss": 1.9724,
"step": 157500
},
{
"epoch": 0.31,
"learning_rate": 3.8279083600864266e-05,
"loss": 1.9663,
"step": 158000
},
{
"epoch": 0.31,
"learning_rate": 3.8224411067470284e-05,
"loss": 1.9671,
"step": 158500
},
{
"epoch": 0.31,
"learning_rate": 3.81697385340763e-05,
"loss": 1.9684,
"step": 159000
},
{
"epoch": 0.31,
"learning_rate": 3.811506600068231e-05,
"loss": 1.9683,
"step": 159500
},
{
"epoch": 0.31,
"learning_rate": 3.806039346728833e-05,
"loss": 1.9682,
"step": 160000
},
{
"epoch": 0.32,
"learning_rate": 3.800572093389435e-05,
"loss": 1.9673,
"step": 160500
},
{
"epoch": 0.32,
"learning_rate": 3.795104840050037e-05,
"loss": 1.9639,
"step": 161000
},
{
"epoch": 0.32,
"learning_rate": 3.7896375867106385e-05,
"loss": 1.9633,
"step": 161500
},
{
"epoch": 0.32,
"learning_rate": 3.7841703333712396e-05,
"loss": 1.964,
"step": 162000
},
{
"epoch": 0.32,
"learning_rate": 3.7787030800318414e-05,
"loss": 1.9648,
"step": 162500
},
{
"epoch": 0.32,
"learning_rate": 3.773235826692443e-05,
"loss": 1.9636,
"step": 163000
},
{
"epoch": 0.32,
"learning_rate": 3.767768573353045e-05,
"loss": 1.9623,
"step": 163500
},
{
"epoch": 0.32,
"learning_rate": 3.762301320013646e-05,
"loss": 1.9656,
"step": 164000
},
{
"epoch": 0.32,
"learning_rate": 3.756834066674248e-05,
"loss": 1.9636,
"step": 164500
},
{
"epoch": 0.32,
"learning_rate": 3.75136681333485e-05,
"loss": 1.9672,
"step": 165000
},
{
"epoch": 0.33,
"learning_rate": 3.7458995599954515e-05,
"loss": 1.9639,
"step": 165500
},
{
"epoch": 0.33,
"learning_rate": 3.740432306656053e-05,
"loss": 1.9628,
"step": 166000
},
{
"epoch": 0.33,
"learning_rate": 3.7349650533166545e-05,
"loss": 1.9647,
"step": 166500
},
{
"epoch": 0.33,
"learning_rate": 3.729497799977256e-05,
"loss": 1.9633,
"step": 167000
},
{
"epoch": 0.33,
"learning_rate": 3.724030546637858e-05,
"loss": 1.9585,
"step": 167500
},
{
"epoch": 0.33,
"learning_rate": 3.71856329329846e-05,
"loss": 1.9599,
"step": 168000
},
{
"epoch": 0.33,
"learning_rate": 3.7130960399590617e-05,
"loss": 1.9601,
"step": 168500
},
{
"epoch": 0.33,
"learning_rate": 3.707628786619663e-05,
"loss": 1.9617,
"step": 169000
},
{
"epoch": 0.33,
"learning_rate": 3.7021615332802646e-05,
"loss": 1.9583,
"step": 169500
},
{
"epoch": 0.33,
"learning_rate": 3.6966942799408664e-05,
"loss": 1.9606,
"step": 170000
},
{
"epoch": 0.34,
"learning_rate": 3.691227026601468e-05,
"loss": 1.955,
"step": 170500
},
{
"epoch": 0.34,
"learning_rate": 3.68575977326207e-05,
"loss": 1.956,
"step": 171000
},
{
"epoch": 0.34,
"learning_rate": 3.680292519922671e-05,
"loss": 1.9584,
"step": 171500
},
{
"epoch": 0.34,
"learning_rate": 3.674825266583273e-05,
"loss": 1.9575,
"step": 172000
},
{
"epoch": 0.34,
"learning_rate": 3.669358013243875e-05,
"loss": 1.9531,
"step": 172500
},
{
"epoch": 0.34,
"learning_rate": 3.6638907599044765e-05,
"loss": 1.9603,
"step": 173000
},
{
"epoch": 0.34,
"learning_rate": 3.6584235065650776e-05,
"loss": 1.9542,
"step": 173500
},
{
"epoch": 0.34,
"learning_rate": 3.6529562532256794e-05,
"loss": 1.9515,
"step": 174000
},
{
"epoch": 0.34,
"learning_rate": 3.647488999886282e-05,
"loss": 1.9594,
"step": 174500
},
{
"epoch": 0.34,
"learning_rate": 3.642021746546883e-05,
"loss": 1.955,
"step": 175000
},
{
"epoch": 0.35,
"learning_rate": 3.636554493207485e-05,
"loss": 1.9521,
"step": 175500
},
{
"epoch": 0.35,
"learning_rate": 3.631087239868086e-05,
"loss": 1.9564,
"step": 176000
},
{
"epoch": 0.35,
"learning_rate": 3.625619986528688e-05,
"loss": 1.9556,
"step": 176500
},
{
"epoch": 0.35,
"learning_rate": 3.6201527331892895e-05,
"loss": 1.9517,
"step": 177000
},
{
"epoch": 0.35,
"learning_rate": 3.6146854798498913e-05,
"loss": 1.9569,
"step": 177500
},
{
"epoch": 0.35,
"learning_rate": 3.609218226510493e-05,
"loss": 1.9482,
"step": 178000
},
{
"epoch": 0.35,
"learning_rate": 3.603750973171094e-05,
"loss": 1.9496,
"step": 178500
},
{
"epoch": 0.35,
"learning_rate": 3.598283719831696e-05,
"loss": 1.95,
"step": 179000
},
{
"epoch": 0.35,
"learning_rate": 3.592816466492298e-05,
"loss": 1.9519,
"step": 179500
},
{
"epoch": 0.35,
"learning_rate": 3.5873492131529e-05,
"loss": 1.9477,
"step": 180000
},
{
"epoch": 0.36,
"learning_rate": 3.5818819598135015e-05,
"loss": 1.9482,
"step": 180500
},
{
"epoch": 0.36,
"learning_rate": 3.5764147064741026e-05,
"loss": 1.9511,
"step": 181000
},
{
"epoch": 0.36,
"learning_rate": 3.570947453134705e-05,
"loss": 1.9464,
"step": 181500
},
{
"epoch": 0.36,
"learning_rate": 3.565480199795306e-05,
"loss": 1.9493,
"step": 182000
},
{
"epoch": 0.36,
"learning_rate": 3.560012946455908e-05,
"loss": 1.9462,
"step": 182500
},
{
"epoch": 0.36,
"learning_rate": 3.554545693116509e-05,
"loss": 1.9493,
"step": 183000
},
{
"epoch": 0.36,
"learning_rate": 3.549078439777111e-05,
"loss": 1.9472,
"step": 183500
},
{
"epoch": 0.36,
"learning_rate": 3.5436111864377134e-05,
"loss": 1.9475,
"step": 184000
},
{
"epoch": 0.36,
"learning_rate": 3.5381439330983145e-05,
"loss": 1.9461,
"step": 184500
},
{
"epoch": 0.36,
"learning_rate": 3.532676679758916e-05,
"loss": 1.9409,
"step": 185000
},
{
"epoch": 0.37,
"learning_rate": 3.5272094264195174e-05,
"loss": 1.9498,
"step": 185500
},
{
"epoch": 0.37,
"learning_rate": 3.521742173080119e-05,
"loss": 1.9475,
"step": 186000
},
{
"epoch": 0.37,
"learning_rate": 3.516274919740721e-05,
"loss": 1.9491,
"step": 186500
},
{
"epoch": 0.37,
"learning_rate": 3.510807666401323e-05,
"loss": 1.9433,
"step": 187000
},
{
"epoch": 0.37,
"learning_rate": 3.5053404130619246e-05,
"loss": 1.9436,
"step": 187500
},
{
"epoch": 0.37,
"learning_rate": 3.499873159722526e-05,
"loss": 1.9425,
"step": 188000
},
{
"epoch": 0.37,
"learning_rate": 3.494405906383128e-05,
"loss": 1.9384,
"step": 188500
},
{
"epoch": 0.37,
"learning_rate": 3.4889386530437294e-05,
"loss": 1.9398,
"step": 189000
},
{
"epoch": 0.37,
"learning_rate": 3.483471399704331e-05,
"loss": 1.9428,
"step": 189500
},
{
"epoch": 0.37,
"learning_rate": 3.478004146364933e-05,
"loss": 1.9416,
"step": 190000
},
{
"epoch": 0.37,
"learning_rate": 3.472536893025534e-05,
"loss": 1.9409,
"step": 190500
},
{
"epoch": 0.38,
"learning_rate": 3.4670696396861366e-05,
"loss": 1.9423,
"step": 191000
},
{
"epoch": 0.38,
"learning_rate": 3.461602386346738e-05,
"loss": 1.9409,
"step": 191500
},
{
"epoch": 0.38,
"learning_rate": 3.4561351330073395e-05,
"loss": 1.9399,
"step": 192000
},
{
"epoch": 0.38,
"learning_rate": 3.4506678796679406e-05,
"loss": 1.94,
"step": 192500
},
{
"epoch": 0.38,
"learning_rate": 3.4452006263285424e-05,
"loss": 1.9386,
"step": 193000
},
{
"epoch": 0.38,
"learning_rate": 3.439733372989145e-05,
"loss": 1.9391,
"step": 193500
},
{
"epoch": 0.38,
"learning_rate": 3.434266119649746e-05,
"loss": 1.9412,
"step": 194000
},
{
"epoch": 0.38,
"learning_rate": 3.428798866310348e-05,
"loss": 1.9384,
"step": 194500
},
{
"epoch": 0.38,
"learning_rate": 3.423331612970949e-05,
"loss": 1.9364,
"step": 195000
},
{
"epoch": 0.38,
"learning_rate": 3.4178643596315514e-05,
"loss": 1.9354,
"step": 195500
},
{
"epoch": 0.39,
"learning_rate": 3.4123971062921525e-05,
"loss": 1.9374,
"step": 196000
},
{
"epoch": 0.39,
"learning_rate": 3.406929852952754e-05,
"loss": 1.9363,
"step": 196500
},
{
"epoch": 0.39,
"learning_rate": 3.401462599613356e-05,
"loss": 1.9333,
"step": 197000
},
{
"epoch": 0.39,
"learning_rate": 3.395995346273957e-05,
"loss": 1.9375,
"step": 197500
},
{
"epoch": 0.39,
"learning_rate": 3.39052809293456e-05,
"loss": 1.9422,
"step": 198000
},
{
"epoch": 0.39,
"learning_rate": 3.385060839595161e-05,
"loss": 1.9363,
"step": 198500
},
{
"epoch": 0.39,
"learning_rate": 3.3795935862557626e-05,
"loss": 1.9335,
"step": 199000
},
{
"epoch": 0.39,
"learning_rate": 3.3741263329163644e-05,
"loss": 1.9378,
"step": 199500
},
{
"epoch": 0.39,
"learning_rate": 3.3686590795769656e-05,
"loss": 1.9394,
"step": 200000
},
{
"epoch": 0.39,
"learning_rate": 3.363191826237568e-05,
"loss": 1.927,
"step": 200500
},
{
"epoch": 0.4,
"learning_rate": 3.357724572898169e-05,
"loss": 1.9303,
"step": 201000
},
{
"epoch": 0.4,
"learning_rate": 3.352257319558771e-05,
"loss": 1.9333,
"step": 201500
},
{
"epoch": 0.4,
"learning_rate": 3.346790066219372e-05,
"loss": 1.9285,
"step": 202000
},
{
"epoch": 0.4,
"learning_rate": 3.3413228128799746e-05,
"loss": 1.9337,
"step": 202500
},
{
"epoch": 0.4,
"learning_rate": 3.3358555595405764e-05,
"loss": 1.9339,
"step": 203000
},
{
"epoch": 0.4,
"learning_rate": 3.3303883062011775e-05,
"loss": 1.9368,
"step": 203500
},
{
"epoch": 0.4,
"learning_rate": 3.324921052861779e-05,
"loss": 1.934,
"step": 204000
},
{
"epoch": 0.4,
"learning_rate": 3.3194537995223804e-05,
"loss": 1.9356,
"step": 204500
},
{
"epoch": 0.4,
"learning_rate": 3.313986546182983e-05,
"loss": 1.9305,
"step": 205000
},
{
"epoch": 0.4,
"learning_rate": 3.308519292843584e-05,
"loss": 1.9291,
"step": 205500
},
{
"epoch": 0.41,
"learning_rate": 3.303052039504186e-05,
"loss": 1.9323,
"step": 206000
},
{
"epoch": 0.41,
"learning_rate": 3.2975847861647876e-05,
"loss": 1.9343,
"step": 206500
},
{
"epoch": 0.41,
"learning_rate": 3.2921175328253894e-05,
"loss": 1.9315,
"step": 207000
},
{
"epoch": 0.41,
"learning_rate": 3.286650279485991e-05,
"loss": 1.9278,
"step": 207500
},
{
"epoch": 0.41,
"learning_rate": 3.2811830261465923e-05,
"loss": 1.9295,
"step": 208000
},
{
"epoch": 0.41,
"learning_rate": 3.275715772807194e-05,
"loss": 1.9323,
"step": 208500
},
{
"epoch": 0.41,
"learning_rate": 3.270248519467796e-05,
"loss": 1.9287,
"step": 209000
},
{
"epoch": 0.41,
"learning_rate": 3.264781266128398e-05,
"loss": 1.9327,
"step": 209500
},
{
"epoch": 0.41,
"learning_rate": 3.2593140127889995e-05,
"loss": 1.9311,
"step": 210000
},
{
"epoch": 0.41,
"learning_rate": 3.253846759449601e-05,
"loss": 1.9262,
"step": 210500
},
{
"epoch": 0.42,
"learning_rate": 3.2483795061102025e-05,
"loss": 1.9273,
"step": 211000
},
{
"epoch": 0.42,
"learning_rate": 3.2429122527708036e-05,
"loss": 1.9276,
"step": 211500
},
{
"epoch": 0.42,
"learning_rate": 3.237444999431406e-05,
"loss": 1.9307,
"step": 212000
},
{
"epoch": 0.42,
"learning_rate": 3.231977746092008e-05,
"loss": 1.9261,
"step": 212500
},
{
"epoch": 0.42,
"learning_rate": 3.226510492752609e-05,
"loss": 1.9286,
"step": 213000
},
{
"epoch": 0.42,
"learning_rate": 3.221043239413211e-05,
"loss": 1.9315,
"step": 213500
},
{
"epoch": 0.42,
"learning_rate": 3.2155759860738126e-05,
"loss": 1.9297,
"step": 214000
},
{
"epoch": 0.42,
"learning_rate": 3.2101087327344144e-05,
"loss": 1.9317,
"step": 214500
},
{
"epoch": 0.42,
"learning_rate": 3.2046414793950155e-05,
"loss": 1.927,
"step": 215000
},
{
"epoch": 0.42,
"learning_rate": 3.199174226055617e-05,
"loss": 1.9279,
"step": 215500
},
{
"epoch": 0.43,
"learning_rate": 3.193706972716219e-05,
"loss": 1.922,
"step": 216000
},
{
"epoch": 0.43,
"learning_rate": 3.188239719376821e-05,
"loss": 1.9259,
"step": 216500
},
{
"epoch": 0.43,
"learning_rate": 3.182772466037423e-05,
"loss": 1.9234,
"step": 217000
},
{
"epoch": 0.43,
"learning_rate": 3.177305212698024e-05,
"loss": 1.9232,
"step": 217500
},
{
"epoch": 0.43,
"learning_rate": 3.1718379593586256e-05,
"loss": 1.9226,
"step": 218000
},
{
"epoch": 0.43,
"learning_rate": 3.1663707060192274e-05,
"loss": 1.922,
"step": 218500
},
{
"epoch": 0.43,
"learning_rate": 3.160903452679829e-05,
"loss": 1.9215,
"step": 219000
},
{
"epoch": 0.43,
"learning_rate": 3.155436199340431e-05,
"loss": 1.9207,
"step": 219500
},
{
"epoch": 0.43,
"learning_rate": 3.149968946001032e-05,
"loss": 1.9232,
"step": 220000
},
{
"epoch": 0.43,
"learning_rate": 3.144501692661634e-05,
"loss": 1.9223,
"step": 220500
},
{
"epoch": 0.43,
"learning_rate": 3.139034439322236e-05,
"loss": 1.9224,
"step": 221000
},
{
"epoch": 0.44,
"learning_rate": 3.1335671859828376e-05,
"loss": 1.9219,
"step": 221500
},
{
"epoch": 0.44,
"learning_rate": 3.1280999326434394e-05,
"loss": 1.9235,
"step": 222000
},
{
"epoch": 0.44,
"learning_rate": 3.1226326793040405e-05,
"loss": 1.9225,
"step": 222500
},
{
"epoch": 0.44,
"learning_rate": 3.117165425964642e-05,
"loss": 1.92,
"step": 223000
},
{
"epoch": 0.44,
"learning_rate": 3.111698172625244e-05,
"loss": 1.9148,
"step": 223500
},
{
"epoch": 0.44,
"learning_rate": 3.106230919285846e-05,
"loss": 1.9245,
"step": 224000
},
{
"epoch": 0.44,
"learning_rate": 3.100763665946447e-05,
"loss": 1.9184,
"step": 224500
},
{
"epoch": 0.44,
"learning_rate": 3.095296412607049e-05,
"loss": 1.9193,
"step": 225000
},
{
"epoch": 0.44,
"learning_rate": 3.0898291592676506e-05,
"loss": 1.917,
"step": 225500
},
{
"epoch": 0.44,
"learning_rate": 3.0843619059282524e-05,
"loss": 1.9092,
"step": 226000
},
{
"epoch": 0.45,
"learning_rate": 3.078894652588854e-05,
"loss": 1.9185,
"step": 226500
},
{
"epoch": 0.45,
"learning_rate": 3.073427399249455e-05,
"loss": 1.9165,
"step": 227000
},
{
"epoch": 0.45,
"learning_rate": 3.067960145910057e-05,
"loss": 1.9171,
"step": 227500
},
{
"epoch": 0.45,
"learning_rate": 3.062492892570659e-05,
"loss": 1.9182,
"step": 228000
},
{
"epoch": 0.45,
"learning_rate": 3.057025639231261e-05,
"loss": 1.9182,
"step": 228500
},
{
"epoch": 0.45,
"learning_rate": 3.0515583858918622e-05,
"loss": 1.9155,
"step": 229000
},
{
"epoch": 0.45,
"learning_rate": 3.0460911325524636e-05,
"loss": 1.9137,
"step": 229500
},
{
"epoch": 0.45,
"learning_rate": 3.0406238792130654e-05,
"loss": 1.9162,
"step": 230000
},
{
"epoch": 0.45,
"learning_rate": 3.0351566258736676e-05,
"loss": 1.9165,
"step": 230500
},
{
"epoch": 0.45,
"learning_rate": 3.029689372534269e-05,
"loss": 1.9184,
"step": 231000
},
{
"epoch": 0.46,
"learning_rate": 3.0242221191948705e-05,
"loss": 1.9127,
"step": 231500
},
{
"epoch": 0.46,
"learning_rate": 3.018754865855472e-05,
"loss": 1.9197,
"step": 232000
},
{
"epoch": 0.46,
"learning_rate": 3.0132876125160738e-05,
"loss": 1.915,
"step": 232500
},
{
"epoch": 0.46,
"learning_rate": 3.0078203591766756e-05,
"loss": 1.9189,
"step": 233000
},
{
"epoch": 0.46,
"learning_rate": 3.0023531058372774e-05,
"loss": 1.9154,
"step": 233500
},
{
"epoch": 0.46,
"learning_rate": 2.9968858524978788e-05,
"loss": 1.9117,
"step": 234000
},
{
"epoch": 0.46,
"learning_rate": 2.9914185991584803e-05,
"loss": 1.911,
"step": 234500
},
{
"epoch": 0.46,
"learning_rate": 2.9859513458190824e-05,
"loss": 1.9191,
"step": 235000
},
{
"epoch": 0.46,
"learning_rate": 2.980484092479684e-05,
"loss": 1.9158,
"step": 235500
},
{
"epoch": 0.46,
"learning_rate": 2.9750168391402854e-05,
"loss": 1.9149,
"step": 236000
},
{
"epoch": 0.47,
"learning_rate": 2.969549585800887e-05,
"loss": 1.9146,
"step": 236500
},
{
"epoch": 0.47,
"learning_rate": 2.9640823324614886e-05,
"loss": 1.9111,
"step": 237000
},
{
"epoch": 0.47,
"learning_rate": 2.9586150791220908e-05,
"loss": 1.9105,
"step": 237500
},
{
"epoch": 0.47,
"learning_rate": 2.9531478257826922e-05,
"loss": 1.9125,
"step": 238000
},
{
"epoch": 0.47,
"learning_rate": 2.9476805724432937e-05,
"loss": 1.9051,
"step": 238500
},
{
"epoch": 0.47,
"learning_rate": 2.942213319103895e-05,
"loss": 1.913,
"step": 239000
},
{
"epoch": 0.47,
"learning_rate": 2.936746065764497e-05,
"loss": 1.9067,
"step": 239500
},
{
"epoch": 0.47,
"learning_rate": 2.931278812425099e-05,
"loss": 1.9093,
"step": 240000
},
{
"epoch": 0.47,
"learning_rate": 2.9258115590857005e-05,
"loss": 1.9095,
"step": 240500
},
{
"epoch": 0.47,
"learning_rate": 2.920344305746302e-05,
"loss": 1.9129,
"step": 241000
},
{
"epoch": 0.48,
"learning_rate": 2.9148770524069035e-05,
"loss": 1.9119,
"step": 241500
},
{
"epoch": 0.48,
"learning_rate": 2.9094097990675056e-05,
"loss": 1.911,
"step": 242000
},
{
"epoch": 0.48,
"learning_rate": 2.903942545728107e-05,
"loss": 1.9078,
"step": 242500
},
{
"epoch": 0.48,
"learning_rate": 2.898475292388709e-05,
"loss": 1.9113,
"step": 243000
},
{
"epoch": 0.48,
"learning_rate": 2.8930080390493103e-05,
"loss": 1.9058,
"step": 243500
},
{
"epoch": 0.48,
"learning_rate": 2.8875407857099118e-05,
"loss": 1.9114,
"step": 244000
},
{
"epoch": 0.48,
"learning_rate": 2.882073532370514e-05,
"loss": 1.9073,
"step": 244500
},
{
"epoch": 0.48,
"learning_rate": 2.8766062790311154e-05,
"loss": 1.906,
"step": 245000
},
{
"epoch": 0.48,
"learning_rate": 2.871139025691717e-05,
"loss": 1.9072,
"step": 245500
},
{
"epoch": 0.48,
"learning_rate": 2.8656717723523186e-05,
"loss": 1.9083,
"step": 246000
},
{
"epoch": 0.49,
"learning_rate": 2.86020451901292e-05,
"loss": 1.9047,
"step": 246500
},
{
"epoch": 0.49,
"learning_rate": 2.8547372656735222e-05,
"loss": 1.9051,
"step": 247000
},
{
"epoch": 0.49,
"learning_rate": 2.8492700123341237e-05,
"loss": 1.9053,
"step": 247500
},
{
"epoch": 0.49,
"learning_rate": 2.843802758994725e-05,
"loss": 1.9107,
"step": 248000
},
{
"epoch": 0.49,
"learning_rate": 2.838335505655327e-05,
"loss": 1.9094,
"step": 248500
},
{
"epoch": 0.49,
"learning_rate": 2.8328682523159288e-05,
"loss": 1.9081,
"step": 249000
},
{
"epoch": 0.49,
"learning_rate": 2.8274009989765306e-05,
"loss": 1.9067,
"step": 249500
},
{
"epoch": 0.49,
"learning_rate": 2.821933745637132e-05,
"loss": 1.9054,
"step": 250000
},
{
"epoch": 0.49,
"learning_rate": 2.8164664922977335e-05,
"loss": 1.9007,
"step": 250500
},
{
"epoch": 0.49,
"learning_rate": 2.810999238958335e-05,
"loss": 1.9064,
"step": 251000
},
{
"epoch": 0.5,
"learning_rate": 2.805531985618937e-05,
"loss": 1.8992,
"step": 251500
},
{
"epoch": 0.5,
"learning_rate": 2.8000647322795386e-05,
"loss": 1.9074,
"step": 252000
},
{
"epoch": 0.5,
"learning_rate": 2.7945974789401404e-05,
"loss": 1.9061,
"step": 252500
},
{
"epoch": 0.5,
"learning_rate": 2.7891302256007418e-05,
"loss": 1.9017,
"step": 253000
},
{
"epoch": 0.5,
"learning_rate": 2.7836629722613433e-05,
"loss": 1.9018,
"step": 253500
},
{
"epoch": 0.5,
"learning_rate": 2.7781957189219454e-05,
"loss": 1.9027,
"step": 254000
},
{
"epoch": 0.5,
"learning_rate": 2.772728465582547e-05,
"loss": 1.9062,
"step": 254500
},
{
"epoch": 0.5,
"learning_rate": 2.7672612122431483e-05,
"loss": 1.9026,
"step": 255000
},
{
"epoch": 0.5,
"learning_rate": 2.76179395890375e-05,
"loss": 1.9,
"step": 255500
},
{
"epoch": 0.5,
"learning_rate": 2.7563267055643523e-05,
"loss": 1.9043,
"step": 256000
},
{
"epoch": 0.5,
"learning_rate": 2.7508594522249537e-05,
"loss": 1.9041,
"step": 256500
},
{
"epoch": 0.51,
"learning_rate": 2.7453921988855552e-05,
"loss": 1.9025,
"step": 257000
},
{
"epoch": 0.51,
"learning_rate": 2.7399249455461567e-05,
"loss": 1.8992,
"step": 257500
},
{
"epoch": 0.51,
"learning_rate": 2.7344576922067585e-05,
"loss": 1.9006,
"step": 258000
},
{
"epoch": 0.51,
"learning_rate": 2.7289904388673603e-05,
"loss": 1.8998,
"step": 258500
},
{
"epoch": 0.51,
"learning_rate": 2.723523185527962e-05,
"loss": 1.8978,
"step": 259000
},
{
"epoch": 0.51,
"learning_rate": 2.7180559321885635e-05,
"loss": 1.8991,
"step": 259500
},
{
"epoch": 0.51,
"learning_rate": 2.712588678849165e-05,
"loss": 1.898,
"step": 260000
},
{
"epoch": 0.51,
"learning_rate": 2.707121425509767e-05,
"loss": 1.9011,
"step": 260500
},
{
"epoch": 0.51,
"learning_rate": 2.7016541721703686e-05,
"loss": 1.8968,
"step": 261000
},
{
"epoch": 0.51,
"learning_rate": 2.69618691883097e-05,
"loss": 1.9062,
"step": 261500
},
{
"epoch": 0.52,
"learning_rate": 2.690719665491572e-05,
"loss": 1.9018,
"step": 262000
},
{
"epoch": 0.52,
"learning_rate": 2.6852524121521733e-05,
"loss": 1.9018,
"step": 262500
},
{
"epoch": 0.52,
"learning_rate": 2.6797851588127754e-05,
"loss": 1.8995,
"step": 263000
},
{
"epoch": 0.52,
"learning_rate": 2.674317905473377e-05,
"loss": 1.8954,
"step": 263500
},
{
"epoch": 0.52,
"learning_rate": 2.6688506521339784e-05,
"loss": 1.8992,
"step": 264000
},
{
"epoch": 0.52,
"learning_rate": 2.6633833987945798e-05,
"loss": 1.8957,
"step": 264500
},
{
"epoch": 0.52,
"learning_rate": 2.6579161454551816e-05,
"loss": 1.9017,
"step": 265000
},
{
"epoch": 0.52,
"learning_rate": 2.6524488921157838e-05,
"loss": 1.8975,
"step": 265500
},
{
"epoch": 0.52,
"learning_rate": 2.6469816387763852e-05,
"loss": 1.8987,
"step": 266000
},
{
"epoch": 0.52,
"learning_rate": 2.6415143854369867e-05,
"loss": 1.8962,
"step": 266500
},
{
"epoch": 0.53,
"learning_rate": 2.636047132097588e-05,
"loss": 1.9013,
"step": 267000
},
{
"epoch": 0.53,
"learning_rate": 2.6305798787581903e-05,
"loss": 1.9004,
"step": 267500
},
{
"epoch": 0.53,
"learning_rate": 2.6251126254187917e-05,
"loss": 1.8955,
"step": 268000
},
{
"epoch": 0.53,
"learning_rate": 2.6196453720793935e-05,
"loss": 1.8956,
"step": 268500
},
{
"epoch": 0.53,
"learning_rate": 2.614178118739995e-05,
"loss": 1.8941,
"step": 269000
},
{
"epoch": 0.53,
"learning_rate": 2.6087108654005965e-05,
"loss": 1.9004,
"step": 269500
},
{
"epoch": 0.53,
"learning_rate": 2.6032436120611986e-05,
"loss": 1.8978,
"step": 270000
},
{
"epoch": 0.53,
"learning_rate": 2.5977763587218e-05,
"loss": 1.8954,
"step": 270500
},
{
"epoch": 0.53,
"learning_rate": 2.5923091053824015e-05,
"loss": 1.8948,
"step": 271000
},
{
"epoch": 0.53,
"learning_rate": 2.5868418520430033e-05,
"loss": 1.8946,
"step": 271500
},
{
"epoch": 0.54,
"learning_rate": 2.5813745987036048e-05,
"loss": 1.897,
"step": 272000
},
{
"epoch": 0.54,
"learning_rate": 2.575907345364207e-05,
"loss": 1.897,
"step": 272500
},
{
"epoch": 0.54,
"learning_rate": 2.5704400920248084e-05,
"loss": 1.8961,
"step": 273000
},
{
"epoch": 0.54,
"learning_rate": 2.56497283868541e-05,
"loss": 1.8925,
"step": 273500
},
{
"epoch": 0.54,
"learning_rate": 2.5595055853460113e-05,
"loss": 1.8938,
"step": 274000
},
{
"epoch": 0.54,
"learning_rate": 2.5540383320066135e-05,
"loss": 1.8928,
"step": 274500
},
{
"epoch": 0.54,
"learning_rate": 2.5485710786672153e-05,
"loss": 1.8963,
"step": 275000
},
{
"epoch": 0.54,
"learning_rate": 2.5431038253278167e-05,
"loss": 1.8923,
"step": 275500
},
{
"epoch": 0.54,
"learning_rate": 2.5376365719884182e-05,
"loss": 1.8882,
"step": 276000
},
{
"epoch": 0.54,
"learning_rate": 2.5321693186490196e-05,
"loss": 1.8887,
"step": 276500
},
{
"epoch": 0.55,
"learning_rate": 2.5267020653096218e-05,
"loss": 1.8903,
"step": 277000
},
{
"epoch": 0.55,
"learning_rate": 2.5212348119702232e-05,
"loss": 1.8941,
"step": 277500
},
{
"epoch": 0.55,
"learning_rate": 2.515767558630825e-05,
"loss": 1.8943,
"step": 278000
},
{
"epoch": 0.55,
"learning_rate": 2.5103003052914265e-05,
"loss": 1.8924,
"step": 278500
},
{
"epoch": 0.55,
"learning_rate": 2.504833051952028e-05,
"loss": 1.8855,
"step": 279000
},
{
"epoch": 0.55,
"learning_rate": 2.4993657986126298e-05,
"loss": 1.8918,
"step": 279500
},
{
"epoch": 0.55,
"learning_rate": 2.4938985452732316e-05,
"loss": 1.8891,
"step": 280000
},
{
"epoch": 0.55,
"learning_rate": 2.488431291933833e-05,
"loss": 1.8874,
"step": 280500
},
{
"epoch": 0.55,
"learning_rate": 2.4829640385944348e-05,
"loss": 1.8907,
"step": 281000
},
{
"epoch": 0.55,
"learning_rate": 2.4774967852550366e-05,
"loss": 1.8913,
"step": 281500
},
{
"epoch": 0.56,
"learning_rate": 2.472029531915638e-05,
"loss": 1.8888,
"step": 282000
},
{
"epoch": 0.56,
"learning_rate": 2.46656227857624e-05,
"loss": 1.8924,
"step": 282500
},
{
"epoch": 0.56,
"learning_rate": 2.4610950252368413e-05,
"loss": 1.8889,
"step": 283000
},
{
"epoch": 0.56,
"learning_rate": 2.455627771897443e-05,
"loss": 1.8866,
"step": 283500
},
{
"epoch": 0.56,
"learning_rate": 2.450160518558045e-05,
"loss": 1.8915,
"step": 284000
},
{
"epoch": 0.56,
"learning_rate": 2.4446932652186467e-05,
"loss": 1.8896,
"step": 284500
},
{
"epoch": 0.56,
"learning_rate": 2.4392260118792482e-05,
"loss": 1.8921,
"step": 285000
},
{
"epoch": 0.56,
"learning_rate": 2.43375875853985e-05,
"loss": 1.8854,
"step": 285500
},
{
"epoch": 0.56,
"learning_rate": 2.4282915052004515e-05,
"loss": 1.8887,
"step": 286000
},
{
"epoch": 0.56,
"learning_rate": 2.422824251861053e-05,
"loss": 1.8892,
"step": 286500
},
{
"epoch": 0.56,
"learning_rate": 2.4173569985216547e-05,
"loss": 1.8908,
"step": 287000
},
{
"epoch": 0.57,
"learning_rate": 2.4118897451822565e-05,
"loss": 1.8901,
"step": 287500
},
{
"epoch": 0.57,
"learning_rate": 2.4064224918428583e-05,
"loss": 1.8906,
"step": 288000
},
{
"epoch": 0.57,
"learning_rate": 2.4009552385034598e-05,
"loss": 1.8862,
"step": 288500
},
{
"epoch": 0.57,
"learning_rate": 2.3954879851640616e-05,
"loss": 1.8915,
"step": 289000
},
{
"epoch": 0.57,
"learning_rate": 2.390020731824663e-05,
"loss": 1.8891,
"step": 289500
},
{
"epoch": 0.57,
"learning_rate": 2.3845534784852645e-05,
"loss": 1.8855,
"step": 290000
},
{
"epoch": 0.57,
"learning_rate": 2.3790862251458667e-05,
"loss": 1.8828,
"step": 290500
},
{
"epoch": 0.57,
"learning_rate": 2.373618971806468e-05,
"loss": 1.8845,
"step": 291000
},
{
"epoch": 0.57,
"learning_rate": 2.36815171846707e-05,
"loss": 1.8824,
"step": 291500
},
{
"epoch": 0.57,
"learning_rate": 2.3626844651276714e-05,
"loss": 1.8832,
"step": 292000
},
{
"epoch": 0.58,
"learning_rate": 2.3572172117882732e-05,
"loss": 1.8862,
"step": 292500
},
{
"epoch": 0.58,
"learning_rate": 2.3517499584488746e-05,
"loss": 1.8862,
"step": 293000
},
{
"epoch": 0.58,
"learning_rate": 2.3462827051094764e-05,
"loss": 1.8837,
"step": 293500
},
{
"epoch": 0.58,
"learning_rate": 2.3408154517700782e-05,
"loss": 1.8864,
"step": 294000
},
{
"epoch": 0.58,
"learning_rate": 2.3353481984306797e-05,
"loss": 1.886,
"step": 294500
},
{
"epoch": 0.58,
"learning_rate": 2.3298809450912815e-05,
"loss": 1.884,
"step": 295000
},
{
"epoch": 0.58,
"learning_rate": 2.324413691751883e-05,
"loss": 1.8834,
"step": 295500
},
{
"epoch": 0.58,
"learning_rate": 2.3189464384124848e-05,
"loss": 1.8848,
"step": 296000
},
{
"epoch": 0.58,
"learning_rate": 2.3134791850730862e-05,
"loss": 1.8856,
"step": 296500
},
{
"epoch": 0.58,
"learning_rate": 2.308011931733688e-05,
"loss": 1.8874,
"step": 297000
},
{
"epoch": 0.59,
"learning_rate": 2.3025446783942898e-05,
"loss": 1.8775,
"step": 297500
},
{
"epoch": 0.59,
"learning_rate": 2.2970774250548913e-05,
"loss": 1.8859,
"step": 298000
},
{
"epoch": 0.59,
"learning_rate": 2.291610171715493e-05,
"loss": 1.8799,
"step": 298500
},
{
"epoch": 0.59,
"learning_rate": 2.2861429183760945e-05,
"loss": 1.8833,
"step": 299000
},
{
"epoch": 0.59,
"learning_rate": 2.2806756650366963e-05,
"loss": 1.882,
"step": 299500
},
{
"epoch": 0.59,
"learning_rate": 2.275208411697298e-05,
"loss": 1.8872,
"step": 300000
},
{
"epoch": 0.67,
"learning_rate": 1.8590144171628205e-05,
"loss": 1.8825,
"step": 300500
},
{
"epoch": 0.67,
"learning_rate": 1.852863750430547e-05,
"loss": 1.8818,
"step": 301000
},
{
"epoch": 0.67,
"learning_rate": 1.846713083698273e-05,
"loss": 1.8859,
"step": 301500
},
{
"epoch": 0.67,
"learning_rate": 1.8405624169659992e-05,
"loss": 1.8773,
"step": 302000
},
{
"epoch": 0.67,
"learning_rate": 1.8344117502337256e-05,
"loss": 1.8764,
"step": 302500
},
{
"epoch": 0.67,
"learning_rate": 1.8282610835014516e-05,
"loss": 1.8826,
"step": 303000
},
{
"epoch": 0.67,
"learning_rate": 1.822110416769178e-05,
"loss": 1.8805,
"step": 303500
},
{
"epoch": 0.67,
"learning_rate": 1.815959750036904e-05,
"loss": 1.8787,
"step": 304000
},
{
"epoch": 0.67,
"learning_rate": 1.8098090833046304e-05,
"loss": 1.8793,
"step": 304500
},
{
"epoch": 0.68,
"learning_rate": 1.8036584165723567e-05,
"loss": 1.8797,
"step": 305000
},
{
"epoch": 0.68,
"learning_rate": 1.7975077498400827e-05,
"loss": 1.875,
"step": 305500
},
{
"epoch": 0.68,
"learning_rate": 1.7913570831078088e-05,
"loss": 1.8747,
"step": 306000
},
{
"epoch": 0.68,
"learning_rate": 1.785206416375535e-05,
"loss": 1.8795,
"step": 306500
},
{
"epoch": 0.68,
"learning_rate": 1.7790557496432615e-05,
"loss": 1.8761,
"step": 307000
},
{
"epoch": 0.68,
"learning_rate": 1.7729050829109875e-05,
"loss": 1.878,
"step": 307500
},
{
"epoch": 0.68,
"learning_rate": 1.766754416178714e-05,
"loss": 1.8743,
"step": 308000
},
{
"epoch": 0.68,
"learning_rate": 1.76060374944644e-05,
"loss": 1.8762,
"step": 308500
},
{
"epoch": 0.68,
"learning_rate": 1.7544530827141666e-05,
"loss": 1.8739,
"step": 309000
},
{
"epoch": 0.69,
"learning_rate": 1.7483024159818926e-05,
"loss": 1.8747,
"step": 309500
},
{
"epoch": 0.69,
"learning_rate": 1.7421517492496186e-05,
"loss": 1.8703,
"step": 310000
},
{
"epoch": 0.69,
"learning_rate": 1.736001082517345e-05,
"loss": 1.8709,
"step": 310500
},
{
"epoch": 0.69,
"learning_rate": 1.729850415785071e-05,
"loss": 1.8757,
"step": 311000
},
{
"epoch": 0.69,
"learning_rate": 1.7236997490527974e-05,
"loss": 1.8746,
"step": 311500
},
{
"epoch": 0.69,
"learning_rate": 1.7175490823205237e-05,
"loss": 1.8744,
"step": 312000
},
{
"epoch": 0.69,
"learning_rate": 1.7113984155882498e-05,
"loss": 1.8742,
"step": 312500
},
{
"epoch": 0.69,
"learning_rate": 1.7052477488559758e-05,
"loss": 1.8791,
"step": 313000
},
{
"epoch": 0.69,
"learning_rate": 1.6990970821237025e-05,
"loss": 1.871,
"step": 313500
},
{
"epoch": 0.7,
"learning_rate": 1.6929464153914285e-05,
"loss": 1.8742,
"step": 314000
},
{
"epoch": 0.7,
"learning_rate": 1.686795748659155e-05,
"loss": 1.873,
"step": 314500
},
{
"epoch": 0.7,
"learning_rate": 1.680645081926881e-05,
"loss": 1.8775,
"step": 315000
},
{
"epoch": 0.7,
"learning_rate": 1.6744944151946072e-05,
"loss": 1.8746,
"step": 315500
},
{
"epoch": 0.7,
"learning_rate": 1.6683437484623336e-05,
"loss": 1.8686,
"step": 316000
},
{
"epoch": 0.7,
"learning_rate": 1.6621930817300596e-05,
"loss": 1.8667,
"step": 316500
},
{
"epoch": 0.7,
"learning_rate": 1.6560424149977856e-05,
"loss": 1.8716,
"step": 317000
},
{
"epoch": 0.7,
"learning_rate": 1.649891748265512e-05,
"loss": 1.8693,
"step": 317500
},
{
"epoch": 0.7,
"learning_rate": 1.6437410815332384e-05,
"loss": 1.8752,
"step": 318000
},
{
"epoch": 0.71,
"learning_rate": 1.6375904148009644e-05,
"loss": 1.8677,
"step": 318500
},
{
"epoch": 0.71,
"learning_rate": 1.6314397480686908e-05,
"loss": 1.8724,
"step": 319000
},
{
"epoch": 0.71,
"learning_rate": 1.6252890813364168e-05,
"loss": 1.8675,
"step": 319500
},
{
"epoch": 0.71,
"learning_rate": 1.619138414604143e-05,
"loss": 1.8674,
"step": 320000
},
{
"epoch": 0.71,
"learning_rate": 1.6129877478718695e-05,
"loss": 1.8689,
"step": 320500
},
{
"epoch": 0.71,
"learning_rate": 1.6068370811395955e-05,
"loss": 1.8717,
"step": 321000
},
{
"epoch": 0.71,
"learning_rate": 1.600686414407322e-05,
"loss": 1.8695,
"step": 321500
},
{
"epoch": 0.71,
"learning_rate": 1.594535747675048e-05,
"loss": 1.869,
"step": 322000
},
{
"epoch": 0.71,
"learning_rate": 1.5883850809427743e-05,
"loss": 1.8737,
"step": 322500
},
{
"epoch": 0.72,
"learning_rate": 1.5822344142105006e-05,
"loss": 1.868,
"step": 323000
},
{
"epoch": 0.72,
"learning_rate": 1.5760837474782266e-05,
"loss": 1.87,
"step": 323500
},
{
"epoch": 0.72,
"learning_rate": 1.5699330807459527e-05,
"loss": 1.8717,
"step": 324000
},
{
"epoch": 0.72,
"learning_rate": 1.5637824140136794e-05,
"loss": 1.8675,
"step": 324500
},
{
"epoch": 0.72,
"learning_rate": 1.5576317472814054e-05,
"loss": 1.8716,
"step": 325000
},
{
"epoch": 0.72,
"learning_rate": 1.5514810805491317e-05,
"loss": 1.8706,
"step": 325500
},
{
"epoch": 0.72,
"learning_rate": 1.5453304138168578e-05,
"loss": 1.867,
"step": 326000
},
{
"epoch": 0.72,
"learning_rate": 1.5391797470845838e-05,
"loss": 1.8638,
"step": 326500
},
{
"epoch": 0.72,
"learning_rate": 1.5330290803523105e-05,
"loss": 1.8642,
"step": 327000
},
{
"epoch": 0.73,
"learning_rate": 1.5268784136200365e-05,
"loss": 1.8672,
"step": 327500
},
{
"epoch": 0.73,
"learning_rate": 1.5207277468877625e-05,
"loss": 1.8677,
"step": 328000
},
{
"epoch": 0.73,
"learning_rate": 1.514577080155489e-05,
"loss": 1.867,
"step": 328500
},
{
"epoch": 0.73,
"learning_rate": 1.508426413423215e-05,
"loss": 1.8665,
"step": 329000
},
{
"epoch": 0.73,
"learning_rate": 1.5022757466909413e-05,
"loss": 1.8673,
"step": 329500
},
{
"epoch": 0.73,
"learning_rate": 1.4961250799586676e-05,
"loss": 1.8719,
"step": 330000
},
{
"epoch": 0.73,
"learning_rate": 1.4899744132263938e-05,
"loss": 1.8652,
"step": 330500
},
{
"epoch": 0.73,
"learning_rate": 1.4838237464941202e-05,
"loss": 1.865,
"step": 331000
},
{
"epoch": 0.73,
"learning_rate": 1.4776730797618462e-05,
"loss": 1.8645,
"step": 331500
},
{
"epoch": 0.74,
"learning_rate": 1.4715224130295724e-05,
"loss": 1.8661,
"step": 332000
},
{
"epoch": 0.74,
"learning_rate": 1.4653717462972988e-05,
"loss": 1.8658,
"step": 332500
},
{
"epoch": 0.74,
"learning_rate": 1.459221079565025e-05,
"loss": 1.8679,
"step": 333000
},
{
"epoch": 0.74,
"learning_rate": 1.453070412832751e-05,
"loss": 1.867,
"step": 333500
},
{
"epoch": 0.74,
"learning_rate": 1.4469197461004775e-05,
"loss": 1.8648,
"step": 334000
},
{
"epoch": 0.74,
"learning_rate": 1.4407690793682035e-05,
"loss": 1.8678,
"step": 334500
},
{
"epoch": 0.74,
"learning_rate": 1.4346184126359297e-05,
"loss": 1.8665,
"step": 335000
},
{
"epoch": 0.74,
"learning_rate": 1.428467745903656e-05,
"loss": 1.8717,
"step": 335500
},
{
"epoch": 0.74,
"learning_rate": 1.4223170791713821e-05,
"loss": 1.8683,
"step": 336000
},
{
"epoch": 0.75,
"learning_rate": 1.4161664124391086e-05,
"loss": 1.8657,
"step": 336500
},
{
"epoch": 0.75,
"learning_rate": 1.4100157457068347e-05,
"loss": 1.864,
"step": 337000
},
{
"epoch": 0.75,
"learning_rate": 1.4038650789745608e-05,
"loss": 1.8622,
"step": 337500
},
{
"epoch": 0.75,
"learning_rate": 1.3977144122422872e-05,
"loss": 1.8679,
"step": 338000
},
{
"epoch": 0.75,
"learning_rate": 1.3915637455100134e-05,
"loss": 1.8636,
"step": 338500
},
{
"epoch": 0.75,
"learning_rate": 1.3854130787777394e-05,
"loss": 1.8688,
"step": 339000
},
{
"epoch": 0.75,
"learning_rate": 1.3792624120454658e-05,
"loss": 1.8667,
"step": 339500
},
{
"epoch": 0.75,
"learning_rate": 1.373111745313192e-05,
"loss": 1.8619,
"step": 340000
},
{
"epoch": 0.75,
"learning_rate": 1.3669610785809183e-05,
"loss": 1.8677,
"step": 340500
},
{
"epoch": 0.76,
"learning_rate": 1.3608104118486445e-05,
"loss": 1.8635,
"step": 341000
},
{
"epoch": 0.76,
"learning_rate": 1.3546597451163705e-05,
"loss": 1.8602,
"step": 341500
},
{
"epoch": 0.76,
"learning_rate": 1.348509078384097e-05,
"loss": 1.8627,
"step": 342000
},
{
"epoch": 0.76,
"learning_rate": 1.3423584116518231e-05,
"loss": 1.8631,
"step": 342500
},
{
"epoch": 0.76,
"learning_rate": 1.3362077449195493e-05,
"loss": 1.8664,
"step": 343000
},
{
"epoch": 0.76,
"learning_rate": 1.3300570781872756e-05,
"loss": 1.8601,
"step": 343500
},
{
"epoch": 0.76,
"learning_rate": 1.3239064114550018e-05,
"loss": 1.8602,
"step": 344000
},
{
"epoch": 0.76,
"learning_rate": 1.3177557447227279e-05,
"loss": 1.8646,
"step": 344500
},
{
"epoch": 0.76,
"learning_rate": 1.3116050779904542e-05,
"loss": 1.8606,
"step": 345000
},
{
"epoch": 0.77,
"learning_rate": 1.3054544112581804e-05,
"loss": 1.8605,
"step": 345500
},
{
"epoch": 0.77,
"learning_rate": 1.2993037445259068e-05,
"loss": 1.8622,
"step": 346000
},
{
"epoch": 0.77,
"learning_rate": 1.293153077793633e-05,
"loss": 1.8609,
"step": 346500
},
{
"epoch": 0.77,
"learning_rate": 1.287002411061359e-05,
"loss": 1.8543,
"step": 347000
},
{
"epoch": 0.77,
"learning_rate": 1.2808517443290855e-05,
"loss": 1.8598,
"step": 347500
},
{
"epoch": 0.77,
"learning_rate": 1.2747010775968115e-05,
"loss": 1.8589,
"step": 348000
},
{
"epoch": 0.77,
"learning_rate": 1.2685504108645377e-05,
"loss": 1.8633,
"step": 348500
},
{
"epoch": 0.77,
"learning_rate": 1.2623997441322641e-05,
"loss": 1.8633,
"step": 349000
},
{
"epoch": 0.77,
"learning_rate": 1.2562490773999903e-05,
"loss": 1.8596,
"step": 349500
},
{
"epoch": 0.77,
"learning_rate": 1.2500984106677163e-05,
"loss": 1.8577,
"step": 350000
},
{
"epoch": 0.78,
"learning_rate": 1.2439477439354427e-05,
"loss": 1.8595,
"step": 350500
},
{
"epoch": 0.78,
"learning_rate": 1.2377970772031689e-05,
"loss": 1.8702,
"step": 351000
},
{
"epoch": 0.78,
"learning_rate": 1.231646410470895e-05,
"loss": 1.8531,
"step": 351500
},
{
"epoch": 0.78,
"learning_rate": 1.2254957437386214e-05,
"loss": 1.8599,
"step": 352000
},
{
"epoch": 0.78,
"learning_rate": 1.2193450770063474e-05,
"loss": 1.862,
"step": 352500
},
{
"epoch": 0.78,
"learning_rate": 1.2131944102740738e-05,
"loss": 1.8601,
"step": 353000
},
{
"epoch": 0.78,
"learning_rate": 1.2070437435418e-05,
"loss": 1.8608,
"step": 353500
},
{
"epoch": 0.78,
"learning_rate": 1.2008930768095263e-05,
"loss": 1.8589,
"step": 354000
},
{
"epoch": 0.78,
"learning_rate": 1.1947424100772524e-05,
"loss": 1.8623,
"step": 354500
},
{
"epoch": 0.79,
"learning_rate": 1.1885917433449786e-05,
"loss": 1.8616,
"step": 355000
},
{
"epoch": 0.79,
"learning_rate": 1.1824410766127049e-05,
"loss": 1.8555,
"step": 355500
},
{
"epoch": 0.79,
"learning_rate": 1.1762904098804311e-05,
"loss": 1.8579,
"step": 356000
},
{
"epoch": 0.79,
"learning_rate": 1.1701397431481573e-05,
"loss": 1.8634,
"step": 356500
},
{
"epoch": 0.79,
"learning_rate": 1.1639890764158835e-05,
"loss": 1.8557,
"step": 357000
},
{
"epoch": 0.79,
"learning_rate": 1.1578384096836098e-05,
"loss": 1.8579,
"step": 357500
},
{
"epoch": 0.79,
"learning_rate": 1.1516877429513359e-05,
"loss": 1.8614,
"step": 358000
},
{
"epoch": 0.79,
"learning_rate": 1.1455370762190622e-05,
"loss": 1.8598,
"step": 358500
},
{
"epoch": 0.79,
"learning_rate": 1.1393864094867884e-05,
"loss": 1.8567,
"step": 359000
},
{
"epoch": 0.8,
"learning_rate": 1.1332357427545146e-05,
"loss": 1.855,
"step": 359500
},
{
"epoch": 0.8,
"learning_rate": 1.1270850760222408e-05,
"loss": 1.8578,
"step": 360000
},
{
"epoch": 0.8,
"learning_rate": 1.120934409289967e-05,
"loss": 1.856,
"step": 360500
},
{
"epoch": 0.8,
"learning_rate": 1.1147837425576934e-05,
"loss": 1.8532,
"step": 361000
},
{
"epoch": 0.8,
"learning_rate": 1.1086330758254195e-05,
"loss": 1.8625,
"step": 361500
},
{
"epoch": 0.8,
"learning_rate": 1.1024824090931457e-05,
"loss": 1.8591,
"step": 362000
},
{
"epoch": 0.8,
"learning_rate": 1.096331742360872e-05,
"loss": 1.8595,
"step": 362500
},
{
"epoch": 0.8,
"learning_rate": 1.0901810756285983e-05,
"loss": 1.8557,
"step": 363000
},
{
"epoch": 0.8,
"learning_rate": 1.0840304088963243e-05,
"loss": 1.8576,
"step": 363500
},
{
"epoch": 0.81,
"learning_rate": 1.0778797421640507e-05,
"loss": 1.8548,
"step": 364000
},
{
"epoch": 0.81,
"learning_rate": 1.0717290754317769e-05,
"loss": 1.8605,
"step": 364500
},
{
"epoch": 0.81,
"learning_rate": 1.065578408699503e-05,
"loss": 1.8505,
"step": 365000
},
{
"epoch": 0.81,
"learning_rate": 1.0594277419672292e-05,
"loss": 1.8578,
"step": 365500
},
{
"epoch": 0.81,
"learning_rate": 1.0532770752349554e-05,
"loss": 1.857,
"step": 366000
},
{
"epoch": 0.81,
"learning_rate": 1.0471264085026818e-05,
"loss": 1.8545,
"step": 366500
},
{
"epoch": 0.81,
"learning_rate": 1.040975741770408e-05,
"loss": 1.8557,
"step": 367000
},
{
"epoch": 0.81,
"learning_rate": 1.0348250750381342e-05,
"loss": 1.8554,
"step": 367500
},
{
"epoch": 0.81,
"learning_rate": 1.0286744083058604e-05,
"loss": 1.8548,
"step": 368000
},
{
"epoch": 0.82,
"learning_rate": 1.0225237415735867e-05,
"loss": 1.8558,
"step": 368500
},
{
"epoch": 0.82,
"learning_rate": 1.0163730748413128e-05,
"loss": 1.8564,
"step": 369000
},
{
"epoch": 0.82,
"learning_rate": 1.0102224081090391e-05,
"loss": 1.8581,
"step": 369500
},
{
"epoch": 0.82,
"learning_rate": 1.0040717413767653e-05,
"loss": 1.8541,
"step": 370000
},
{
"epoch": 0.82,
"learning_rate": 9.979210746444915e-06,
"loss": 1.8522,
"step": 370500
},
{
"epoch": 0.82,
"learning_rate": 9.917704079122177e-06,
"loss": 1.8551,
"step": 371000
},
{
"epoch": 0.82,
"learning_rate": 9.856197411799439e-06,
"loss": 1.8572,
"step": 371500
},
{
"epoch": 0.82,
"learning_rate": 9.794690744476702e-06,
"loss": 1.8544,
"step": 372000
},
{
"epoch": 0.82,
"learning_rate": 9.733184077153964e-06,
"loss": 1.8509,
"step": 372500
},
{
"epoch": 0.83,
"learning_rate": 9.671677409831226e-06,
"loss": 1.8538,
"step": 373000
},
{
"epoch": 0.83,
"learning_rate": 9.610170742508488e-06,
"loss": 1.8561,
"step": 373500
},
{
"epoch": 0.83,
"learning_rate": 9.54866407518575e-06,
"loss": 1.8559,
"step": 374000
},
{
"epoch": 0.83,
"learning_rate": 9.487157407863014e-06,
"loss": 1.8559,
"step": 374500
},
{
"epoch": 0.83,
"learning_rate": 9.425650740540274e-06,
"loss": 1.8507,
"step": 375000
},
{
"epoch": 0.83,
"learning_rate": 9.364144073217537e-06,
"loss": 1.8526,
"step": 375500
},
{
"epoch": 0.83,
"learning_rate": 9.3026374058948e-06,
"loss": 1.8552,
"step": 376000
},
{
"epoch": 0.83,
"learning_rate": 9.241130738572061e-06,
"loss": 1.8526,
"step": 376500
},
{
"epoch": 0.83,
"learning_rate": 9.179624071249323e-06,
"loss": 1.8534,
"step": 377000
},
{
"epoch": 0.84,
"learning_rate": 9.118117403926587e-06,
"loss": 1.8539,
"step": 377500
},
{
"epoch": 0.84,
"learning_rate": 9.056610736603849e-06,
"loss": 1.8558,
"step": 378000
},
{
"epoch": 0.84,
"learning_rate": 8.99510406928111e-06,
"loss": 1.8532,
"step": 378500
},
{
"epoch": 0.84,
"learning_rate": 8.933597401958373e-06,
"loss": 1.8557,
"step": 379000
},
{
"epoch": 0.84,
"learning_rate": 8.872090734635634e-06,
"loss": 1.8528,
"step": 379500
},
{
"epoch": 0.84,
"learning_rate": 8.810584067312898e-06,
"loss": 1.8554,
"step": 380000
},
{
"epoch": 0.84,
"learning_rate": 8.749077399990158e-06,
"loss": 1.8508,
"step": 380500
},
{
"epoch": 0.84,
"learning_rate": 8.687570732667422e-06,
"loss": 1.8505,
"step": 381000
},
{
"epoch": 0.84,
"learning_rate": 8.626064065344684e-06,
"loss": 1.8489,
"step": 381500
},
{
"epoch": 0.85,
"learning_rate": 8.564557398021946e-06,
"loss": 1.8519,
"step": 382000
},
{
"epoch": 0.85,
"learning_rate": 8.503050730699208e-06,
"loss": 1.8565,
"step": 382500
},
{
"epoch": 0.85,
"learning_rate": 8.441544063376471e-06,
"loss": 1.852,
"step": 383000
},
{
"epoch": 0.85,
"learning_rate": 8.380037396053733e-06,
"loss": 1.8553,
"step": 383500
},
{
"epoch": 0.85,
"learning_rate": 8.318530728730995e-06,
"loss": 1.8512,
"step": 384000
},
{
"epoch": 0.85,
"learning_rate": 8.257024061408257e-06,
"loss": 1.8521,
"step": 384500
},
{
"epoch": 0.85,
"learning_rate": 8.195517394085519e-06,
"loss": 1.8495,
"step": 385000
},
{
"epoch": 0.85,
"learning_rate": 8.134010726762783e-06,
"loss": 1.8563,
"step": 385500
},
{
"epoch": 0.85,
"learning_rate": 8.072504059440043e-06,
"loss": 1.8524,
"step": 386000
},
{
"epoch": 0.86,
"learning_rate": 8.010997392117306e-06,
"loss": 1.8537,
"step": 386500
},
{
"epoch": 0.86,
"learning_rate": 7.949490724794568e-06,
"loss": 1.8481,
"step": 387000
},
{
"epoch": 0.86,
"learning_rate": 7.88798405747183e-06,
"loss": 1.8521,
"step": 387500
},
{
"epoch": 0.86,
"learning_rate": 7.826477390149092e-06,
"loss": 1.8488,
"step": 388000
},
{
"epoch": 0.86,
"learning_rate": 7.764970722826356e-06,
"loss": 1.856,
"step": 388500
},
{
"epoch": 0.86,
"learning_rate": 7.703464055503618e-06,
"loss": 1.8502,
"step": 389000
},
{
"epoch": 0.86,
"learning_rate": 7.64195738818088e-06,
"loss": 1.8534,
"step": 389500
},
{
"epoch": 0.86,
"learning_rate": 7.580450720858141e-06,
"loss": 1.8481,
"step": 390000
},
{
"epoch": 0.86,
"learning_rate": 7.518944053535404e-06,
"loss": 1.8516,
"step": 390500
},
{
"epoch": 0.87,
"learning_rate": 7.457437386212666e-06,
"loss": 1.8508,
"step": 391000
},
{
"epoch": 0.87,
"learning_rate": 7.395930718889928e-06,
"loss": 1.8442,
"step": 391500
},
{
"epoch": 0.87,
"learning_rate": 7.33442405156719e-06,
"loss": 1.8469,
"step": 392000
},
{
"epoch": 0.87,
"learning_rate": 7.272917384244453e-06,
"loss": 1.85,
"step": 392500
},
{
"epoch": 0.87,
"learning_rate": 7.211410716921714e-06,
"loss": 1.8454,
"step": 393000
},
{
"epoch": 0.87,
"learning_rate": 7.1499040495989765e-06,
"loss": 1.8523,
"step": 393500
},
{
"epoch": 0.87,
"learning_rate": 7.088397382276239e-06,
"loss": 1.8479,
"step": 394000
},
{
"epoch": 0.87,
"learning_rate": 7.026890714953502e-06,
"loss": 1.8438,
"step": 394500
},
{
"epoch": 0.87,
"learning_rate": 6.965384047630763e-06,
"loss": 1.8491,
"step": 395000
},
{
"epoch": 0.88,
"learning_rate": 6.903877380308026e-06,
"loss": 1.8492,
"step": 395500
},
{
"epoch": 0.88,
"learning_rate": 6.842370712985289e-06,
"loss": 1.8506,
"step": 396000
},
{
"epoch": 0.88,
"learning_rate": 6.7808640456625505e-06,
"loss": 1.8511,
"step": 396500
},
{
"epoch": 0.88,
"learning_rate": 6.719357378339812e-06,
"loss": 1.8479,
"step": 397000
},
{
"epoch": 0.88,
"learning_rate": 6.657850711017074e-06,
"loss": 1.8474,
"step": 397500
},
{
"epoch": 0.88,
"learning_rate": 6.596344043694337e-06,
"loss": 1.8472,
"step": 398000
},
{
"epoch": 0.88,
"learning_rate": 6.534837376371598e-06,
"loss": 1.8536,
"step": 398500
},
{
"epoch": 0.88,
"learning_rate": 6.473330709048861e-06,
"loss": 1.8487,
"step": 399000
},
{
"epoch": 0.88,
"learning_rate": 6.411824041726124e-06,
"loss": 1.8507,
"step": 399500
},
{
"epoch": 0.89,
"learning_rate": 6.3503173744033864e-06,
"loss": 1.8478,
"step": 400000
},
{
"epoch": 0.89,
"learning_rate": 6.2888107070806475e-06,
"loss": 1.8488,
"step": 400500
},
{
"epoch": 0.89,
"learning_rate": 6.22730403975791e-06,
"loss": 1.8462,
"step": 401000
},
{
"epoch": 0.89,
"learning_rate": 6.165797372435172e-06,
"loss": 1.8501,
"step": 401500
},
{
"epoch": 0.89,
"learning_rate": 6.104290705112434e-06,
"loss": 1.8495,
"step": 402000
},
{
"epoch": 0.89,
"learning_rate": 6.042784037789697e-06,
"loss": 1.8479,
"step": 402500
},
{
"epoch": 0.89,
"learning_rate": 5.981277370466959e-06,
"loss": 1.8474,
"step": 403000
},
{
"epoch": 0.89,
"learning_rate": 5.919770703144221e-06,
"loss": 1.851,
"step": 403500
},
{
"epoch": 0.89,
"learning_rate": 5.8582640358214834e-06,
"loss": 1.8451,
"step": 404000
},
{
"epoch": 0.9,
"learning_rate": 5.796757368498745e-06,
"loss": 1.8458,
"step": 404500
},
{
"epoch": 0.9,
"learning_rate": 5.735250701176008e-06,
"loss": 1.8485,
"step": 405000
},
{
"epoch": 0.9,
"learning_rate": 5.67374403385327e-06,
"loss": 1.8494,
"step": 405500
},
{
"epoch": 0.9,
"learning_rate": 5.612237366530533e-06,
"loss": 1.8437,
"step": 406000
},
{
"epoch": 0.9,
"learning_rate": 5.550730699207794e-06,
"loss": 1.8435,
"step": 406500
},
{
"epoch": 0.9,
"learning_rate": 5.489224031885057e-06,
"loss": 1.8472,
"step": 407000
},
{
"epoch": 0.9,
"learning_rate": 5.4277173645623185e-06,
"loss": 1.8469,
"step": 407500
},
{
"epoch": 0.9,
"learning_rate": 5.366210697239581e-06,
"loss": 1.845,
"step": 408000
},
{
"epoch": 0.9,
"learning_rate": 5.304704029916843e-06,
"loss": 1.8451,
"step": 408500
},
{
"epoch": 0.91,
"learning_rate": 5.243197362594105e-06,
"loss": 1.85,
"step": 409000
},
{
"epoch": 0.91,
"learning_rate": 5.181690695271368e-06,
"loss": 1.8436,
"step": 409500
},
{
"epoch": 0.91,
"learning_rate": 5.12018402794863e-06,
"loss": 1.8435,
"step": 410000
},
{
"epoch": 0.91,
"learning_rate": 5.0586773606258925e-06,
"loss": 1.8447,
"step": 410500
},
{
"epoch": 0.91,
"learning_rate": 4.9971706933031544e-06,
"loss": 1.847,
"step": 411000
},
{
"epoch": 0.91,
"learning_rate": 4.935664025980416e-06,
"loss": 1.8522,
"step": 411500
},
{
"epoch": 0.91,
"learning_rate": 4.874157358657678e-06,
"loss": 1.8474,
"step": 412000
},
{
"epoch": 0.91,
"learning_rate": 4.812650691334941e-06,
"loss": 1.8473,
"step": 412500
},
{
"epoch": 0.91,
"learning_rate": 4.751144024012203e-06,
"loss": 1.8446,
"step": 413000
},
{
"epoch": 0.92,
"learning_rate": 4.689637356689466e-06,
"loss": 1.8471,
"step": 413500
},
{
"epoch": 0.92,
"learning_rate": 4.628130689366728e-06,
"loss": 1.8472,
"step": 414000
},
{
"epoch": 0.92,
"learning_rate": 4.5666240220439895e-06,
"loss": 1.8456,
"step": 414500
},
{
"epoch": 0.92,
"learning_rate": 4.505117354721252e-06,
"loss": 1.8446,
"step": 415000
},
{
"epoch": 0.92,
"learning_rate": 4.443610687398514e-06,
"loss": 1.8441,
"step": 415500
},
{
"epoch": 0.92,
"learning_rate": 4.382104020075776e-06,
"loss": 1.8466,
"step": 416000
},
{
"epoch": 0.92,
"learning_rate": 4.320597352753038e-06,
"loss": 1.8424,
"step": 416500
},
{
"epoch": 0.92,
"learning_rate": 4.259090685430301e-06,
"loss": 1.8472,
"step": 417000
},
{
"epoch": 0.92,
"learning_rate": 4.197584018107563e-06,
"loss": 1.8423,
"step": 417500
},
{
"epoch": 0.93,
"learning_rate": 4.1360773507848255e-06,
"loss": 1.8463,
"step": 418000
},
{
"epoch": 0.93,
"learning_rate": 4.074570683462087e-06,
"loss": 1.8438,
"step": 418500
},
{
"epoch": 0.93,
"learning_rate": 4.01306401613935e-06,
"loss": 1.8399,
"step": 419000
},
{
"epoch": 0.93,
"learning_rate": 3.951557348816612e-06,
"loss": 1.8463,
"step": 419500
},
{
"epoch": 0.93,
"learning_rate": 3.890050681493874e-06,
"loss": 1.8404,
"step": 420000
},
{
"epoch": 0.93,
"learning_rate": 3.828544014171137e-06,
"loss": 1.8457,
"step": 420500
},
{
"epoch": 0.93,
"learning_rate": 3.767037346848398e-06,
"loss": 1.8451,
"step": 421000
},
{
"epoch": 0.93,
"learning_rate": 3.705530679525661e-06,
"loss": 1.8447,
"step": 421500
},
{
"epoch": 0.93,
"learning_rate": 3.644024012202923e-06,
"loss": 1.8427,
"step": 422000
},
{
"epoch": 0.94,
"learning_rate": 3.582517344880185e-06,
"loss": 1.8406,
"step": 422500
},
{
"epoch": 0.94,
"learning_rate": 3.521010677557447e-06,
"loss": 1.842,
"step": 423000
},
{
"epoch": 0.94,
"learning_rate": 3.45950401023471e-06,
"loss": 1.8426,
"step": 423500
},
{
"epoch": 0.94,
"learning_rate": 3.397997342911972e-06,
"loss": 1.8455,
"step": 424000
},
{
"epoch": 0.94,
"learning_rate": 3.336490675589234e-06,
"loss": 1.841,
"step": 424500
},
{
"epoch": 0.94,
"learning_rate": 3.274984008266496e-06,
"loss": 1.8418,
"step": 425000
},
{
"epoch": 0.94,
"learning_rate": 3.213477340943759e-06,
"loss": 1.8415,
"step": 425500
},
{
"epoch": 0.94,
"learning_rate": 3.1519706736210207e-06,
"loss": 1.8452,
"step": 426000
},
{
"epoch": 0.94,
"learning_rate": 3.090464006298283e-06,
"loss": 1.8472,
"step": 426500
},
{
"epoch": 0.95,
"learning_rate": 3.0289573389755454e-06,
"loss": 1.8403,
"step": 427000
},
{
"epoch": 0.95,
"learning_rate": 2.9674506716528073e-06,
"loss": 1.8406,
"step": 427500
},
{
"epoch": 0.95,
"learning_rate": 2.9059440043300696e-06,
"loss": 1.8425,
"step": 428000
},
{
"epoch": 0.95,
"learning_rate": 2.8444373370073315e-06,
"loss": 1.8417,
"step": 428500
},
{
"epoch": 0.95,
"learning_rate": 2.782930669684594e-06,
"loss": 1.8402,
"step": 429000
},
{
"epoch": 0.95,
"learning_rate": 2.7214240023618562e-06,
"loss": 1.8465,
"step": 429500
},
{
"epoch": 0.95,
"learning_rate": 2.659917335039118e-06,
"loss": 1.8454,
"step": 430000
},
{
"epoch": 0.95,
"learning_rate": 2.5984106677163805e-06,
"loss": 1.8389,
"step": 430500
},
{
"epoch": 0.95,
"learning_rate": 2.536904000393643e-06,
"loss": 1.8439,
"step": 431000
},
{
"epoch": 0.96,
"learning_rate": 2.475397333070905e-06,
"loss": 1.8451,
"step": 431500
},
{
"epoch": 0.96,
"learning_rate": 2.413890665748167e-06,
"loss": 1.8393,
"step": 432000
},
{
"epoch": 0.96,
"learning_rate": 2.3523839984254294e-06,
"loss": 1.8428,
"step": 432500
},
{
"epoch": 0.96,
"learning_rate": 2.2908773311026917e-06,
"loss": 1.8397,
"step": 433000
},
{
"epoch": 0.96,
"learning_rate": 2.229370663779954e-06,
"loss": 1.8435,
"step": 433500
},
{
"epoch": 0.96,
"learning_rate": 2.1678639964572164e-06,
"loss": 1.8411,
"step": 434000
},
{
"epoch": 0.96,
"learning_rate": 2.1063573291344783e-06,
"loss": 1.8435,
"step": 434500
},
{
"epoch": 0.96,
"learning_rate": 2.0448506618117402e-06,
"loss": 1.8427,
"step": 435000
},
{
"epoch": 0.96,
"learning_rate": 1.9833439944890026e-06,
"loss": 1.8371,
"step": 435500
},
{
"epoch": 0.97,
"learning_rate": 1.921837327166265e-06,
"loss": 1.8378,
"step": 436000
},
{
"epoch": 0.97,
"learning_rate": 1.860330659843527e-06,
"loss": 1.8403,
"step": 436500
},
{
"epoch": 0.97,
"learning_rate": 1.7988239925207894e-06,
"loss": 1.8427,
"step": 437000
},
{
"epoch": 0.97,
"learning_rate": 1.7373173251980517e-06,
"loss": 1.8414,
"step": 437500
},
{
"epoch": 0.97,
"learning_rate": 1.6758106578753138e-06,
"loss": 1.8371,
"step": 438000
},
{
"epoch": 0.97,
"learning_rate": 1.6143039905525761e-06,
"loss": 1.8388,
"step": 438500
},
{
"epoch": 0.97,
"learning_rate": 1.552797323229838e-06,
"loss": 1.8447,
"step": 439000
},
{
"epoch": 0.97,
"learning_rate": 1.4912906559071004e-06,
"loss": 1.8438,
"step": 439500
},
{
"epoch": 0.97,
"learning_rate": 1.4297839885843625e-06,
"loss": 1.84,
"step": 440000
},
{
"epoch": 0.98,
"learning_rate": 1.3682773212616249e-06,
"loss": 1.844,
"step": 440500
},
{
"epoch": 0.98,
"learning_rate": 1.3067706539388872e-06,
"loss": 1.8395,
"step": 441000
},
{
"epoch": 0.98,
"learning_rate": 1.245263986616149e-06,
"loss": 1.8408,
"step": 441500
},
{
"epoch": 0.98,
"learning_rate": 1.1837573192934114e-06,
"loss": 1.8426,
"step": 442000
},
{
"epoch": 0.98,
"learning_rate": 1.1222506519706736e-06,
"loss": 1.8412,
"step": 442500
},
{
"epoch": 0.98,
"learning_rate": 1.060743984647936e-06,
"loss": 1.8389,
"step": 443000
},
{
"epoch": 0.98,
"learning_rate": 9.992373173251982e-07,
"loss": 1.839,
"step": 443500
},
{
"epoch": 0.98,
"learning_rate": 9.377306500024604e-07,
"loss": 1.8411,
"step": 444000
},
{
"epoch": 0.98,
"learning_rate": 8.762239826797225e-07,
"loss": 1.843,
"step": 444500
},
{
"epoch": 0.99,
"learning_rate": 8.147173153569847e-07,
"loss": 1.84,
"step": 445000
},
{
"epoch": 0.99,
"learning_rate": 7.532106480342469e-07,
"loss": 1.847,
"step": 445500
},
{
"epoch": 0.99,
"learning_rate": 6.917039807115092e-07,
"loss": 1.8363,
"step": 446000
},
{
"epoch": 0.99,
"learning_rate": 6.301973133887713e-07,
"loss": 1.8402,
"step": 446500
},
{
"epoch": 0.99,
"learning_rate": 5.686906460660336e-07,
"loss": 1.8397,
"step": 447000
},
{
"epoch": 0.99,
"learning_rate": 5.071839787432959e-07,
"loss": 1.8424,
"step": 447500
},
{
"epoch": 0.99,
"learning_rate": 4.45677311420558e-07,
"loss": 1.8357,
"step": 448000
},
{
"epoch": 0.99,
"learning_rate": 3.841706440978202e-07,
"loss": 1.843,
"step": 448500
},
{
"epoch": 0.99,
"learning_rate": 3.2266397677508245e-07,
"loss": 1.8371,
"step": 449000
},
{
"epoch": 1.0,
"learning_rate": 2.6115730945234463e-07,
"loss": 1.8429,
"step": 449500
},
{
"epoch": 1.0,
"learning_rate": 1.9965064212960688e-07,
"loss": 1.8424,
"step": 450000
},
{
"epoch": 1.0,
"learning_rate": 1.381439748068691e-07,
"loss": 1.8414,
"step": 450500
},
{
"epoch": 1.0,
"learning_rate": 7.663730748413129e-08,
"loss": 1.8446,
"step": 451000
},
{
"epoch": 1.0,
"learning_rate": 1.5130640161393495e-08,
"loss": 1.8381,
"step": 451500
},
{
"epoch": 1.0,
"step": 451623,
"total_flos": 1.5894400168611545e+19,
"train_loss": 0.6229404193166468,
"train_runtime": 298967.217,
"train_samples_per_second": 870.111,
"train_steps_per_second": 1.511
}
],
"max_steps": 451623,
"num_train_epochs": 1,
"total_flos": 1.5894400168611545e+19,
"trial_name": null,
"trial_params": null
}