diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5443 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "global_step": 451623, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.920484962997953e-07, + "loss": 10.3537, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 9.840969925995906e-07, + "loss": 3.8759, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 1.4761454888993861e-06, + "loss": 3.3657, + "step": 1500 + }, + { + "epoch": 0.0, + "learning_rate": 1.9681939851991812e-06, + "loss": 3.2292, + "step": 2000 + }, + { + "epoch": 0.0, + "learning_rate": 2.4602424814989765e-06, + "loss": 3.1654, + "step": 2500 + }, + { + "epoch": 0.01, + "learning_rate": 2.9522909777987723e-06, + "loss": 3.1135, + "step": 3000 + }, + { + "epoch": 0.01, + "learning_rate": 3.444339474098567e-06, + "loss": 3.0643, + "step": 3500 + }, + { + "epoch": 0.01, + "learning_rate": 3.9363879703983625e-06, + "loss": 3.0263, + "step": 4000 + }, + { + "epoch": 0.01, + "learning_rate": 4.428436466698158e-06, + "loss": 2.9939, + "step": 4500 + }, + { + "epoch": 0.01, + "learning_rate": 4.920484962997953e-06, + "loss": 2.9628, + "step": 5000 + }, + { + "epoch": 0.01, + "learning_rate": 5.412533459297749e-06, + "loss": 2.9285, + "step": 5500 + }, + { + "epoch": 0.01, + "learning_rate": 5.9045819555975445e-06, + "loss": 2.897, + "step": 6000 + }, + { + "epoch": 0.01, + "learning_rate": 6.396630451897339e-06, + "loss": 2.8763, + "step": 6500 + }, + { + "epoch": 0.01, + "learning_rate": 6.888678948197134e-06, + "loss": 2.8517, + "step": 7000 + }, + { + "epoch": 0.01, + "learning_rate": 7.380727444496929e-06, + "loss": 2.8288, + "step": 7500 + }, + { + "epoch": 0.02, + "learning_rate": 7.872775940796725e-06, + "loss": 2.8043, + "step": 8000 + }, + { + "epoch": 0.02, + "learning_rate": 8.36482443709652e-06, + "loss": 2.7839, + "step": 8500 + }, + { + "epoch": 0.02, + "learning_rate": 8.856872933396316e-06, + "loss": 2.7614, + "step": 9000 + }, + { + "epoch": 0.02, + "learning_rate": 9.348921429696111e-06, + "loss": 2.7452, + "step": 9500 + }, + { + "epoch": 0.02, + "learning_rate": 9.840969925995906e-06, + "loss": 2.7313, + "step": 10000 + }, + { + "epoch": 0.02, + "learning_rate": 1.0333018422295703e-05, + "loss": 2.7144, + "step": 10500 + }, + { + "epoch": 0.02, + "learning_rate": 1.0825066918595498e-05, + "loss": 2.6997, + "step": 11000 + }, + { + "epoch": 0.02, + "learning_rate": 1.1317115414895292e-05, + "loss": 2.6785, + "step": 11500 + }, + { + "epoch": 0.02, + "learning_rate": 1.1809163911195089e-05, + "loss": 2.6693, + "step": 12000 + }, + { + "epoch": 0.02, + "learning_rate": 1.2301212407494884e-05, + "loss": 2.6494, + "step": 12500 + }, + { + "epoch": 0.03, + "learning_rate": 1.2793260903794679e-05, + "loss": 2.6353, + "step": 13000 + }, + { + "epoch": 0.03, + "learning_rate": 1.3285309400094472e-05, + "loss": 2.6276, + "step": 13500 + }, + { + "epoch": 0.03, + "learning_rate": 1.3777357896394269e-05, + "loss": 2.6187, + "step": 14000 + }, + { + "epoch": 0.03, + "learning_rate": 1.4269406392694065e-05, + "loss": 2.6011, + "step": 14500 + }, + { + "epoch": 0.03, + "learning_rate": 1.4761454888993858e-05, + "loss": 2.5932, + "step": 15000 + }, + { + "epoch": 0.03, + "learning_rate": 1.5253503385293655e-05, + "loss": 2.5815, + "step": 15500 + }, + { + "epoch": 0.03, + "learning_rate": 1.574555188159345e-05, + "loss": 2.5736, + "step": 16000 + }, + { + "epoch": 0.03, + "learning_rate": 1.6237600377893248e-05, + "loss": 2.5592, + "step": 16500 + }, + { + "epoch": 0.03, + "learning_rate": 1.672964887419304e-05, + "loss": 2.5488, + "step": 17000 + }, + { + "epoch": 0.03, + "learning_rate": 1.7221697370492838e-05, + "loss": 2.5394, + "step": 17500 + }, + { + "epoch": 0.04, + "learning_rate": 1.7713745866792633e-05, + "loss": 2.5307, + "step": 18000 + }, + { + "epoch": 0.04, + "learning_rate": 1.8205794363092428e-05, + "loss": 2.5219, + "step": 18500 + }, + { + "epoch": 0.04, + "learning_rate": 1.8697842859392223e-05, + "loss": 2.514, + "step": 19000 + }, + { + "epoch": 0.04, + "learning_rate": 1.9189891355692017e-05, + "loss": 2.5038, + "step": 19500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9681939851991812e-05, + "loss": 2.4932, + "step": 20000 + }, + { + "epoch": 0.04, + "learning_rate": 2.0173988348291607e-05, + "loss": 2.4895, + "step": 20500 + }, + { + "epoch": 0.04, + "learning_rate": 2.0666036844591405e-05, + "loss": 2.4754, + "step": 21000 + }, + { + "epoch": 0.04, + "learning_rate": 2.1158085340891197e-05, + "loss": 2.4703, + "step": 21500 + }, + { + "epoch": 0.04, + "learning_rate": 2.1650133837190995e-05, + "loss": 2.4602, + "step": 22000 + }, + { + "epoch": 0.04, + "learning_rate": 2.214218233349079e-05, + "loss": 2.4609, + "step": 22500 + }, + { + "epoch": 0.05, + "learning_rate": 2.2634230829790585e-05, + "loss": 2.4458, + "step": 23000 + }, + { + "epoch": 0.05, + "learning_rate": 2.312627932609038e-05, + "loss": 2.439, + "step": 23500 + }, + { + "epoch": 0.05, + "learning_rate": 2.3618327822390178e-05, + "loss": 2.4354, + "step": 24000 + }, + { + "epoch": 0.05, + "learning_rate": 2.411037631868997e-05, + "loss": 2.4263, + "step": 24500 + }, + { + "epoch": 0.05, + "learning_rate": 2.4602424814989768e-05, + "loss": 2.4238, + "step": 25000 + }, + { + "epoch": 0.05, + "learning_rate": 2.5094473311289563e-05, + "loss": 2.4135, + "step": 25500 + }, + { + "epoch": 0.05, + "learning_rate": 2.5586521807589358e-05, + "loss": 2.4098, + "step": 26000 + }, + { + "epoch": 0.05, + "learning_rate": 2.6078570303889156e-05, + "loss": 2.4012, + "step": 26500 + }, + { + "epoch": 0.05, + "learning_rate": 2.6570618800188944e-05, + "loss": 2.3966, + "step": 27000 + }, + { + "epoch": 0.05, + "learning_rate": 2.7062667296488742e-05, + "loss": 2.386, + "step": 27500 + }, + { + "epoch": 0.06, + "learning_rate": 2.7554715792788537e-05, + "loss": 2.3838, + "step": 28000 + }, + { + "epoch": 0.06, + "learning_rate": 2.8046764289088335e-05, + "loss": 2.3787, + "step": 28500 + }, + { + "epoch": 0.06, + "learning_rate": 2.853881278538813e-05, + "loss": 2.3711, + "step": 29000 + }, + { + "epoch": 0.06, + "learning_rate": 2.9030861281687925e-05, + "loss": 2.365, + "step": 29500 + }, + { + "epoch": 0.06, + "learning_rate": 2.9522909777987717e-05, + "loss": 2.3625, + "step": 30000 + }, + { + "epoch": 0.06, + "learning_rate": 3.0014958274287515e-05, + "loss": 2.3555, + "step": 30500 + }, + { + "epoch": 0.06, + "learning_rate": 3.050700677058731e-05, + "loss": 2.349, + "step": 31000 + }, + { + "epoch": 0.06, + "learning_rate": 3.099905526688711e-05, + "loss": 2.3439, + "step": 31500 + }, + { + "epoch": 0.06, + "learning_rate": 3.14911037631869e-05, + "loss": 2.3382, + "step": 32000 + }, + { + "epoch": 0.06, + "learning_rate": 3.19831522594867e-05, + "loss": 2.3355, + "step": 32500 + }, + { + "epoch": 0.06, + "learning_rate": 3.2475200755786496e-05, + "loss": 2.3265, + "step": 33000 + }, + { + "epoch": 0.07, + "learning_rate": 3.296724925208629e-05, + "loss": 2.3289, + "step": 33500 + }, + { + "epoch": 0.07, + "learning_rate": 3.345929774838608e-05, + "loss": 2.3275, + "step": 34000 + }, + { + "epoch": 0.07, + "learning_rate": 3.395134624468588e-05, + "loss": 2.3242, + "step": 34500 + }, + { + "epoch": 0.07, + "learning_rate": 3.4443394740985676e-05, + "loss": 2.311, + "step": 35000 + }, + { + "epoch": 0.07, + "learning_rate": 3.493544323728547e-05, + "loss": 2.3148, + "step": 35500 + }, + { + "epoch": 0.07, + "learning_rate": 3.5427491733585265e-05, + "loss": 2.3088, + "step": 36000 + }, + { + "epoch": 0.07, + "learning_rate": 3.591954022988506e-05, + "loss": 2.301, + "step": 36500 + }, + { + "epoch": 0.07, + "learning_rate": 3.6411588726184855e-05, + "loss": 2.2969, + "step": 37000 + }, + { + "epoch": 0.07, + "learning_rate": 3.690363722248465e-05, + "loss": 2.2989, + "step": 37500 + }, + { + "epoch": 0.07, + "learning_rate": 3.7395685718784445e-05, + "loss": 2.2882, + "step": 38000 + }, + { + "epoch": 0.08, + "learning_rate": 3.788773421508424e-05, + "loss": 2.2869, + "step": 38500 + }, + { + "epoch": 0.08, + "learning_rate": 3.8379782711384035e-05, + "loss": 2.279, + "step": 39000 + }, + { + "epoch": 0.08, + "learning_rate": 3.8871831207683826e-05, + "loss": 2.2817, + "step": 39500 + }, + { + "epoch": 0.08, + "learning_rate": 3.9363879703983625e-05, + "loss": 2.2769, + "step": 40000 + }, + { + "epoch": 0.08, + "learning_rate": 3.985592820028342e-05, + "loss": 2.271, + "step": 40500 + }, + { + "epoch": 0.08, + "learning_rate": 4.0347976696583214e-05, + "loss": 2.2703, + "step": 41000 + }, + { + "epoch": 0.08, + "learning_rate": 4.084002519288301e-05, + "loss": 2.2644, + "step": 41500 + }, + { + "epoch": 0.08, + "learning_rate": 4.133207368918281e-05, + "loss": 2.2578, + "step": 42000 + }, + { + "epoch": 0.08, + "learning_rate": 4.18241221854826e-05, + "loss": 2.2568, + "step": 42500 + }, + { + "epoch": 0.08, + "learning_rate": 4.2316170681782394e-05, + "loss": 2.2565, + "step": 43000 + }, + { + "epoch": 0.09, + "learning_rate": 4.280821917808219e-05, + "loss": 2.2539, + "step": 43500 + }, + { + "epoch": 0.09, + "learning_rate": 4.330026767438199e-05, + "loss": 2.2493, + "step": 44000 + }, + { + "epoch": 0.09, + "learning_rate": 4.379231617068179e-05, + "loss": 2.2488, + "step": 44500 + }, + { + "epoch": 0.09, + "learning_rate": 4.428436466698158e-05, + "loss": 2.2388, + "step": 45000 + }, + { + "epoch": 0.09, + "learning_rate": 4.477641316328138e-05, + "loss": 2.2368, + "step": 45500 + }, + { + "epoch": 0.09, + "learning_rate": 4.526846165958117e-05, + "loss": 2.2366, + "step": 46000 + }, + { + "epoch": 0.09, + "learning_rate": 4.576051015588096e-05, + "loss": 2.2323, + "step": 46500 + }, + { + "epoch": 0.09, + "learning_rate": 4.625255865218076e-05, + "loss": 2.2288, + "step": 47000 + }, + { + "epoch": 0.09, + "learning_rate": 4.674460714848056e-05, + "loss": 2.23, + "step": 47500 + }, + { + "epoch": 0.09, + "learning_rate": 4.7236655644780356e-05, + "loss": 2.223, + "step": 48000 + }, + { + "epoch": 0.1, + "learning_rate": 4.772870414108015e-05, + "loss": 2.2232, + "step": 48500 + }, + { + "epoch": 0.1, + "learning_rate": 4.822075263737994e-05, + "loss": 2.2188, + "step": 49000 + }, + { + "epoch": 0.1, + "learning_rate": 4.871280113367974e-05, + "loss": 2.217, + "step": 49500 + }, + { + "epoch": 0.1, + "learning_rate": 4.9204849629979536e-05, + "loss": 2.2118, + "step": 50000 + }, + { + "epoch": 0.1, + "learning_rate": 4.969689812627933e-05, + "loss": 2.2089, + "step": 50500 + }, + { + "epoch": 0.1, + "learning_rate": 4.997900574717671e-05, + "loss": 2.2101, + "step": 51000 + }, + { + "epoch": 0.1, + "learning_rate": 4.992433321378273e-05, + "loss": 2.2048, + "step": 51500 + }, + { + "epoch": 0.1, + "learning_rate": 4.986966068038875e-05, + "loss": 2.2026, + "step": 52000 + }, + { + "epoch": 0.1, + "learning_rate": 4.981498814699476e-05, + "loss": 2.2, + "step": 52500 + }, + { + "epoch": 0.1, + "learning_rate": 4.976031561360078e-05, + "loss": 2.1998, + "step": 53000 + }, + { + "epoch": 0.11, + "learning_rate": 4.97056430802068e-05, + "loss": 2.1905, + "step": 53500 + }, + { + "epoch": 0.11, + "learning_rate": 4.965097054681281e-05, + "loss": 2.1896, + "step": 54000 + }, + { + "epoch": 0.11, + "learning_rate": 4.959629801341883e-05, + "loss": 2.1886, + "step": 54500 + }, + { + "epoch": 0.11, + "learning_rate": 4.954162548002484e-05, + "loss": 2.1825, + "step": 55000 + }, + { + "epoch": 0.11, + "learning_rate": 4.948695294663086e-05, + "loss": 2.1807, + "step": 55500 + }, + { + "epoch": 0.11, + "learning_rate": 4.943228041323688e-05, + "loss": 2.1814, + "step": 56000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9377607879842896e-05, + "loss": 2.1797, + "step": 56500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9322935346448914e-05, + "loss": 2.1714, + "step": 57000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9268262813054925e-05, + "loss": 2.1766, + "step": 57500 + }, + { + "epoch": 0.11, + "learning_rate": 4.921359027966094e-05, + "loss": 2.1725, + "step": 58000 + }, + { + "epoch": 0.12, + "learning_rate": 4.915891774626696e-05, + "loss": 2.1672, + "step": 58500 + }, + { + "epoch": 0.12, + "learning_rate": 4.910424521287298e-05, + "loss": 2.1707, + "step": 59000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9049572679479e-05, + "loss": 2.1636, + "step": 59500 + }, + { + "epoch": 0.12, + "learning_rate": 4.899490014608501e-05, + "loss": 2.1627, + "step": 60000 + }, + { + "epoch": 0.12, + "learning_rate": 4.894022761269103e-05, + "loss": 2.1598, + "step": 60500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8885555079297044e-05, + "loss": 2.1535, + "step": 61000 + }, + { + "epoch": 0.12, + "learning_rate": 4.883088254590306e-05, + "loss": 2.1531, + "step": 61500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8776210012509074e-05, + "loss": 2.1548, + "step": 62000 + }, + { + "epoch": 0.12, + "learning_rate": 4.872153747911509e-05, + "loss": 2.1451, + "step": 62500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8666864945721116e-05, + "loss": 2.1497, + "step": 63000 + }, + { + "epoch": 0.12, + "learning_rate": 4.861219241232713e-05, + "loss": 2.1474, + "step": 63500 + }, + { + "epoch": 0.13, + "learning_rate": 4.8557519878933146e-05, + "loss": 2.1442, + "step": 64000 + }, + { + "epoch": 0.13, + "learning_rate": 4.850284734553916e-05, + "loss": 2.1417, + "step": 64500 + }, + { + "epoch": 0.13, + "learning_rate": 4.8448174812145175e-05, + "loss": 2.1426, + "step": 65000 + }, + { + "epoch": 0.13, + "learning_rate": 4.839350227875119e-05, + "loss": 2.1396, + "step": 65500 + }, + { + "epoch": 0.13, + "learning_rate": 4.833882974535721e-05, + "loss": 2.1371, + "step": 66000 + }, + { + "epoch": 0.13, + "learning_rate": 4.828415721196323e-05, + "loss": 2.1352, + "step": 66500 + }, + { + "epoch": 0.13, + "learning_rate": 4.822948467856924e-05, + "loss": 2.1342, + "step": 67000 + }, + { + "epoch": 0.13, + "learning_rate": 4.8174812145175265e-05, + "loss": 2.1329, + "step": 67500 + }, + { + "epoch": 0.13, + "learning_rate": 4.8120139611781276e-05, + "loss": 2.1272, + "step": 68000 + }, + { + "epoch": 0.13, + "learning_rate": 4.8065467078387294e-05, + "loss": 2.1278, + "step": 68500 + }, + { + "epoch": 0.14, + "learning_rate": 4.801079454499331e-05, + "loss": 2.1251, + "step": 69000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7956122011599323e-05, + "loss": 2.1222, + "step": 69500 + }, + { + "epoch": 0.14, + "learning_rate": 4.790144947820535e-05, + "loss": 2.1157, + "step": 70000 + }, + { + "epoch": 0.14, + "learning_rate": 4.784677694481136e-05, + "loss": 2.1234, + "step": 70500 + }, + { + "epoch": 0.14, + "learning_rate": 4.779210441141738e-05, + "loss": 2.1167, + "step": 71000 + }, + { + "epoch": 0.14, + "learning_rate": 4.773743187802339e-05, + "loss": 2.1182, + "step": 71500 + }, + { + "epoch": 0.14, + "learning_rate": 4.768275934462941e-05, + "loss": 2.121, + "step": 72000 + }, + { + "epoch": 0.14, + "learning_rate": 4.762808681123543e-05, + "loss": 2.1115, + "step": 72500 + }, + { + "epoch": 0.14, + "learning_rate": 4.757341427784144e-05, + "loss": 2.1171, + "step": 73000 + }, + { + "epoch": 0.14, + "learning_rate": 4.751874174444746e-05, + "loss": 2.11, + "step": 73500 + }, + { + "epoch": 0.15, + "learning_rate": 4.746406921105347e-05, + "loss": 2.1108, + "step": 74000 + }, + { + "epoch": 0.15, + "learning_rate": 4.74093966776595e-05, + "loss": 2.1058, + "step": 74500 + }, + { + "epoch": 0.15, + "learning_rate": 4.735472414426551e-05, + "loss": 2.1005, + "step": 75000 + }, + { + "epoch": 0.15, + "learning_rate": 4.7300051610871526e-05, + "loss": 2.1043, + "step": 75500 + }, + { + "epoch": 0.15, + "learning_rate": 4.7245379077477544e-05, + "loss": 2.1027, + "step": 76000 + }, + { + "epoch": 0.15, + "learning_rate": 4.7190706544083555e-05, + "loss": 2.0973, + "step": 76500 + }, + { + "epoch": 0.15, + "learning_rate": 4.713603401068958e-05, + "loss": 2.0947, + "step": 77000 + }, + { + "epoch": 0.15, + "learning_rate": 4.708136147729559e-05, + "loss": 2.1008, + "step": 77500 + }, + { + "epoch": 0.15, + "learning_rate": 4.702668894390161e-05, + "loss": 2.0995, + "step": 78000 + }, + { + "epoch": 0.15, + "learning_rate": 4.697201641050763e-05, + "loss": 2.0932, + "step": 78500 + }, + { + "epoch": 0.16, + "learning_rate": 4.691734387711364e-05, + "loss": 2.0966, + "step": 79000 + }, + { + "epoch": 0.16, + "learning_rate": 4.686267134371966e-05, + "loss": 2.0932, + "step": 79500 + }, + { + "epoch": 0.16, + "learning_rate": 4.6807998810325674e-05, + "loss": 2.0949, + "step": 80000 + }, + { + "epoch": 0.16, + "learning_rate": 4.675332627693169e-05, + "loss": 2.0912, + "step": 80500 + }, + { + "epoch": 0.16, + "learning_rate": 4.669865374353771e-05, + "loss": 2.0885, + "step": 81000 + }, + { + "epoch": 0.16, + "learning_rate": 4.664398121014373e-05, + "loss": 2.0904, + "step": 81500 + }, + { + "epoch": 0.16, + "learning_rate": 4.6589308676749746e-05, + "loss": 2.0858, + "step": 82000 + }, + { + "epoch": 0.16, + "learning_rate": 4.653463614335576e-05, + "loss": 2.0828, + "step": 82500 + }, + { + "epoch": 0.16, + "learning_rate": 4.6479963609961776e-05, + "loss": 2.0827, + "step": 83000 + }, + { + "epoch": 0.16, + "learning_rate": 4.642529107656779e-05, + "loss": 2.0816, + "step": 83500 + }, + { + "epoch": 0.17, + "learning_rate": 4.637061854317381e-05, + "loss": 2.0789, + "step": 84000 + }, + { + "epoch": 0.17, + "learning_rate": 4.631594600977982e-05, + "loss": 2.0789, + "step": 84500 + }, + { + "epoch": 0.17, + "learning_rate": 4.626127347638584e-05, + "loss": 2.0811, + "step": 85000 + }, + { + "epoch": 0.17, + "learning_rate": 4.620660094299186e-05, + "loss": 2.0819, + "step": 85500 + }, + { + "epoch": 0.17, + "learning_rate": 4.615192840959787e-05, + "loss": 2.0768, + "step": 86000 + }, + { + "epoch": 0.17, + "learning_rate": 4.6097255876203895e-05, + "loss": 2.0715, + "step": 86500 + }, + { + "epoch": 0.17, + "learning_rate": 4.6042583342809906e-05, + "loss": 2.0742, + "step": 87000 + }, + { + "epoch": 0.17, + "learning_rate": 4.5987910809415924e-05, + "loss": 2.0699, + "step": 87500 + }, + { + "epoch": 0.17, + "learning_rate": 4.593323827602194e-05, + "loss": 2.0743, + "step": 88000 + }, + { + "epoch": 0.17, + "learning_rate": 4.587856574262796e-05, + "loss": 2.0694, + "step": 88500 + }, + { + "epoch": 0.18, + "learning_rate": 4.582389320923398e-05, + "loss": 2.0675, + "step": 89000 + }, + { + "epoch": 0.18, + "learning_rate": 4.576922067583999e-05, + "loss": 2.0656, + "step": 89500 + }, + { + "epoch": 0.18, + "learning_rate": 4.571454814244601e-05, + "loss": 2.0637, + "step": 90000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5659875609052025e-05, + "loss": 2.0667, + "step": 90500 + }, + { + "epoch": 0.18, + "learning_rate": 4.560520307565804e-05, + "loss": 2.062, + "step": 91000 + }, + { + "epoch": 0.18, + "learning_rate": 4.555053054226406e-05, + "loss": 2.0659, + "step": 91500 + }, + { + "epoch": 0.18, + "learning_rate": 4.549585800887007e-05, + "loss": 2.0597, + "step": 92000 + }, + { + "epoch": 0.18, + "learning_rate": 4.544118547547609e-05, + "loss": 2.067, + "step": 92500 + }, + { + "epoch": 0.18, + "learning_rate": 4.53865129420821e-05, + "loss": 2.0593, + "step": 93000 + }, + { + "epoch": 0.18, + "learning_rate": 4.5331840408688126e-05, + "loss": 2.0562, + "step": 93500 + }, + { + "epoch": 0.19, + "learning_rate": 4.527716787529414e-05, + "loss": 2.0589, + "step": 94000 + }, + { + "epoch": 0.19, + "learning_rate": 4.5222495341900156e-05, + "loss": 2.0566, + "step": 94500 + }, + { + "epoch": 0.19, + "learning_rate": 4.5167822808506174e-05, + "loss": 2.0576, + "step": 95000 + }, + { + "epoch": 0.19, + "learning_rate": 4.511315027511219e-05, + "loss": 2.058, + "step": 95500 + }, + { + "epoch": 0.19, + "learning_rate": 4.505847774171821e-05, + "loss": 2.0528, + "step": 96000 + }, + { + "epoch": 0.19, + "learning_rate": 4.500380520832422e-05, + "loss": 2.0562, + "step": 96500 + }, + { + "epoch": 0.19, + "learning_rate": 4.494913267493024e-05, + "loss": 2.049, + "step": 97000 + }, + { + "epoch": 0.19, + "learning_rate": 4.489446014153626e-05, + "loss": 2.0536, + "step": 97500 + }, + { + "epoch": 0.19, + "learning_rate": 4.4839787608142275e-05, + "loss": 2.0538, + "step": 98000 + }, + { + "epoch": 0.19, + "learning_rate": 4.478511507474829e-05, + "loss": 2.0466, + "step": 98500 + }, + { + "epoch": 0.19, + "learning_rate": 4.4730442541354304e-05, + "loss": 2.049, + "step": 99000 + }, + { + "epoch": 0.2, + "learning_rate": 4.467577000796032e-05, + "loss": 2.0455, + "step": 99500 + }, + { + "epoch": 0.2, + "learning_rate": 4.462109747456634e-05, + "loss": 2.0441, + "step": 100000 + }, + { + "epoch": 0.2, + "learning_rate": 4.456642494117236e-05, + "loss": 2.0477, + "step": 100500 + }, + { + "epoch": 0.2, + "learning_rate": 4.4511752407778376e-05, + "loss": 2.0425, + "step": 101000 + }, + { + "epoch": 0.2, + "learning_rate": 4.445707987438439e-05, + "loss": 2.0475, + "step": 101500 + }, + { + "epoch": 0.2, + "learning_rate": 4.4402407340990405e-05, + "loss": 2.0482, + "step": 102000 + }, + { + "epoch": 0.2, + "learning_rate": 4.434773480759642e-05, + "loss": 2.0483, + "step": 102500 + }, + { + "epoch": 0.2, + "learning_rate": 4.429306227420244e-05, + "loss": 2.0375, + "step": 103000 + }, + { + "epoch": 0.2, + "learning_rate": 4.423838974080845e-05, + "loss": 2.0384, + "step": 103500 + }, + { + "epoch": 0.2, + "learning_rate": 4.418371720741447e-05, + "loss": 2.0383, + "step": 104000 + }, + { + "epoch": 0.21, + "learning_rate": 4.412904467402049e-05, + "loss": 2.037, + "step": 104500 + }, + { + "epoch": 0.21, + "learning_rate": 4.4074372140626507e-05, + "loss": 2.0315, + "step": 105000 + }, + { + "epoch": 0.21, + "learning_rate": 4.4019699607232525e-05, + "loss": 2.0375, + "step": 105500 + }, + { + "epoch": 0.21, + "learning_rate": 4.3965027073838536e-05, + "loss": 2.0414, + "step": 106000 + }, + { + "epoch": 0.21, + "learning_rate": 4.3910354540444554e-05, + "loss": 2.0324, + "step": 106500 + }, + { + "epoch": 0.21, + "learning_rate": 4.385568200705057e-05, + "loss": 2.0316, + "step": 107000 + }, + { + "epoch": 0.21, + "learning_rate": 4.380100947365659e-05, + "loss": 2.0343, + "step": 107500 + }, + { + "epoch": 0.21, + "learning_rate": 4.374633694026261e-05, + "loss": 2.0314, + "step": 108000 + }, + { + "epoch": 0.21, + "learning_rate": 4.369166440686862e-05, + "loss": 2.0347, + "step": 108500 + }, + { + "epoch": 0.21, + "learning_rate": 4.363699187347464e-05, + "loss": 2.0268, + "step": 109000 + }, + { + "epoch": 0.22, + "learning_rate": 4.3582319340080655e-05, + "loss": 2.0323, + "step": 109500 + }, + { + "epoch": 0.22, + "learning_rate": 4.352764680668667e-05, + "loss": 2.03, + "step": 110000 + }, + { + "epoch": 0.22, + "learning_rate": 4.347297427329269e-05, + "loss": 2.0231, + "step": 110500 + }, + { + "epoch": 0.22, + "learning_rate": 4.34183017398987e-05, + "loss": 2.0279, + "step": 111000 + }, + { + "epoch": 0.22, + "learning_rate": 4.336362920650472e-05, + "loss": 2.0298, + "step": 111500 + }, + { + "epoch": 0.22, + "learning_rate": 4.330895667311074e-05, + "loss": 2.0408, + "step": 112000 + }, + { + "epoch": 0.22, + "learning_rate": 4.3254284139716756e-05, + "loss": 2.0199, + "step": 112500 + }, + { + "epoch": 0.22, + "learning_rate": 4.319961160632277e-05, + "loss": 2.0297, + "step": 113000 + }, + { + "epoch": 0.22, + "learning_rate": 4.3144939072928786e-05, + "loss": 2.0287, + "step": 113500 + }, + { + "epoch": 0.22, + "learning_rate": 4.309026653953481e-05, + "loss": 2.0256, + "step": 114000 + }, + { + "epoch": 0.23, + "learning_rate": 4.303559400614082e-05, + "loss": 2.0197, + "step": 114500 + }, + { + "epoch": 0.23, + "learning_rate": 4.298092147274684e-05, + "loss": 2.0234, + "step": 115000 + }, + { + "epoch": 0.23, + "learning_rate": 4.292624893935285e-05, + "loss": 2.0391, + "step": 115500 + }, + { + "epoch": 0.23, + "learning_rate": 4.287157640595887e-05, + "loss": 2.0772, + "step": 116000 + }, + { + "epoch": 0.23, + "learning_rate": 4.281690387256489e-05, + "loss": 2.0586, + "step": 116500 + }, + { + "epoch": 0.23, + "learning_rate": 4.2762231339170905e-05, + "loss": 2.0336, + "step": 117000 + }, + { + "epoch": 0.23, + "learning_rate": 4.270755880577692e-05, + "loss": 2.0207, + "step": 117500 + }, + { + "epoch": 0.23, + "learning_rate": 4.2652886272382934e-05, + "loss": 2.0197, + "step": 118000 + }, + { + "epoch": 0.23, + "learning_rate": 4.259821373898895e-05, + "loss": 2.0184, + "step": 118500 + }, + { + "epoch": 0.23, + "learning_rate": 4.254354120559497e-05, + "loss": 2.0162, + "step": 119000 + }, + { + "epoch": 0.24, + "learning_rate": 4.248886867220099e-05, + "loss": 2.0139, + "step": 119500 + }, + { + "epoch": 0.24, + "learning_rate": 4.2434196138807006e-05, + "loss": 2.0141, + "step": 120000 + }, + { + "epoch": 0.24, + "learning_rate": 4.237952360541302e-05, + "loss": 2.0119, + "step": 120500 + }, + { + "epoch": 0.24, + "learning_rate": 4.232485107201904e-05, + "loss": 2.0109, + "step": 121000 + }, + { + "epoch": 0.24, + "learning_rate": 4.227017853862505e-05, + "loss": 2.0112, + "step": 121500 + }, + { + "epoch": 0.24, + "learning_rate": 4.221550600523107e-05, + "loss": 2.0106, + "step": 122000 + }, + { + "epoch": 0.24, + "learning_rate": 4.216083347183708e-05, + "loss": 2.0115, + "step": 122500 + }, + { + "epoch": 0.24, + "learning_rate": 4.21061609384431e-05, + "loss": 2.0164, + "step": 123000 + }, + { + "epoch": 0.24, + "learning_rate": 4.2051488405049125e-05, + "loss": 2.0055, + "step": 123500 + }, + { + "epoch": 0.24, + "learning_rate": 4.1996815871655136e-05, + "loss": 2.0117, + "step": 124000 + }, + { + "epoch": 0.25, + "learning_rate": 4.1942143338261154e-05, + "loss": 2.008, + "step": 124500 + }, + { + "epoch": 0.25, + "learning_rate": 4.1887470804867166e-05, + "loss": 2.0077, + "step": 125000 + }, + { + "epoch": 0.25, + "learning_rate": 4.1832798271473184e-05, + "loss": 2.009, + "step": 125500 + }, + { + "epoch": 0.25, + "learning_rate": 4.17781257380792e-05, + "loss": 2.0096, + "step": 126000 + }, + { + "epoch": 0.25, + "learning_rate": 4.172345320468522e-05, + "loss": 2.0025, + "step": 126500 + }, + { + "epoch": 0.25, + "learning_rate": 4.166878067129124e-05, + "loss": 2.0013, + "step": 127000 + }, + { + "epoch": 0.25, + "learning_rate": 4.161410813789725e-05, + "loss": 2.0031, + "step": 127500 + }, + { + "epoch": 0.25, + "learning_rate": 4.1559435604503274e-05, + "loss": 2.0039, + "step": 128000 + }, + { + "epoch": 0.25, + "learning_rate": 4.1504763071109285e-05, + "loss": 2.0002, + "step": 128500 + }, + { + "epoch": 0.25, + "learning_rate": 4.14500905377153e-05, + "loss": 2.0007, + "step": 129000 + }, + { + "epoch": 0.25, + "learning_rate": 4.139541800432132e-05, + "loss": 2.0078, + "step": 129500 + }, + { + "epoch": 0.26, + "learning_rate": 4.134074547092733e-05, + "loss": 1.9981, + "step": 130000 + }, + { + "epoch": 0.26, + "learning_rate": 4.128607293753336e-05, + "loss": 1.9989, + "step": 130500 + }, + { + "epoch": 0.26, + "learning_rate": 4.123140040413937e-05, + "loss": 1.997, + "step": 131000 + }, + { + "epoch": 0.26, + "learning_rate": 4.1176727870745386e-05, + "loss": 2.0004, + "step": 131500 + }, + { + "epoch": 0.26, + "learning_rate": 4.11220553373514e-05, + "loss": 1.9983, + "step": 132000 + }, + { + "epoch": 0.26, + "learning_rate": 4.1067382803957415e-05, + "loss": 1.9995, + "step": 132500 + }, + { + "epoch": 0.26, + "learning_rate": 4.101271027056344e-05, + "loss": 1.9994, + "step": 133000 + }, + { + "epoch": 0.26, + "learning_rate": 4.095803773716945e-05, + "loss": 1.9956, + "step": 133500 + }, + { + "epoch": 0.26, + "learning_rate": 4.090336520377547e-05, + "loss": 1.994, + "step": 134000 + }, + { + "epoch": 0.26, + "learning_rate": 4.084869267038148e-05, + "loss": 1.9955, + "step": 134500 + }, + { + "epoch": 0.27, + "learning_rate": 4.0794020136987505e-05, + "loss": 1.9994, + "step": 135000 + }, + { + "epoch": 0.27, + "learning_rate": 4.0739347603593517e-05, + "loss": 1.9897, + "step": 135500 + }, + { + "epoch": 0.27, + "learning_rate": 4.0684675070199535e-05, + "loss": 1.9961, + "step": 136000 + }, + { + "epoch": 0.27, + "learning_rate": 4.063000253680555e-05, + "loss": 1.9946, + "step": 136500 + }, + { + "epoch": 0.27, + "learning_rate": 4.0575330003411564e-05, + "loss": 1.991, + "step": 137000 + }, + { + "epoch": 0.27, + "learning_rate": 4.052065747001759e-05, + "loss": 1.9952, + "step": 137500 + }, + { + "epoch": 0.27, + "learning_rate": 4.04659849366236e-05, + "loss": 1.9848, + "step": 138000 + }, + { + "epoch": 0.27, + "learning_rate": 4.041131240322962e-05, + "loss": 1.9937, + "step": 138500 + }, + { + "epoch": 0.27, + "learning_rate": 4.0356639869835636e-05, + "loss": 1.9893, + "step": 139000 + }, + { + "epoch": 0.27, + "learning_rate": 4.030196733644165e-05, + "loss": 1.9871, + "step": 139500 + }, + { + "epoch": 0.28, + "learning_rate": 4.024729480304767e-05, + "loss": 1.9855, + "step": 140000 + }, + { + "epoch": 0.28, + "learning_rate": 4.019262226965368e-05, + "loss": 1.9887, + "step": 140500 + }, + { + "epoch": 0.28, + "learning_rate": 4.01379497362597e-05, + "loss": 1.9863, + "step": 141000 + }, + { + "epoch": 0.28, + "learning_rate": 4.008327720286571e-05, + "loss": 1.9904, + "step": 141500 + }, + { + "epoch": 0.28, + "learning_rate": 4.002860466947174e-05, + "loss": 1.9886, + "step": 142000 + }, + { + "epoch": 0.28, + "learning_rate": 3.9973932136077755e-05, + "loss": 1.9871, + "step": 142500 + }, + { + "epoch": 0.28, + "learning_rate": 3.9919259602683766e-05, + "loss": 1.9843, + "step": 143000 + }, + { + "epoch": 0.28, + "learning_rate": 3.9864587069289784e-05, + "loss": 1.9891, + "step": 143500 + }, + { + "epoch": 0.28, + "learning_rate": 3.9809914535895795e-05, + "loss": 1.985, + "step": 144000 + }, + { + "epoch": 0.28, + "learning_rate": 3.975524200250182e-05, + "loss": 1.9826, + "step": 144500 + }, + { + "epoch": 0.29, + "learning_rate": 3.970056946910783e-05, + "loss": 1.9839, + "step": 145000 + }, + { + "epoch": 0.29, + "learning_rate": 3.964589693571385e-05, + "loss": 1.9831, + "step": 145500 + }, + { + "epoch": 0.29, + "learning_rate": 3.959122440231987e-05, + "loss": 1.9843, + "step": 146000 + }, + { + "epoch": 0.29, + "learning_rate": 3.953655186892588e-05, + "loss": 1.9825, + "step": 146500 + }, + { + "epoch": 0.29, + "learning_rate": 3.9481879335531903e-05, + "loss": 1.9752, + "step": 147000 + }, + { + "epoch": 0.29, + "learning_rate": 3.9427206802137915e-05, + "loss": 1.9787, + "step": 147500 + }, + { + "epoch": 0.29, + "learning_rate": 3.937253426874393e-05, + "loss": 1.984, + "step": 148000 + }, + { + "epoch": 0.29, + "learning_rate": 3.931786173534995e-05, + "loss": 1.9783, + "step": 148500 + }, + { + "epoch": 0.29, + "learning_rate": 3.926318920195597e-05, + "loss": 1.9785, + "step": 149000 + }, + { + "epoch": 0.29, + "learning_rate": 3.920851666856199e-05, + "loss": 1.9759, + "step": 149500 + }, + { + "epoch": 0.3, + "learning_rate": 3.9153844135168e-05, + "loss": 1.9803, + "step": 150000 + }, + { + "epoch": 0.3, + "learning_rate": 3.9099171601774016e-05, + "loss": 1.9733, + "step": 150500 + }, + { + "epoch": 0.3, + "learning_rate": 3.904449906838003e-05, + "loss": 1.9759, + "step": 151000 + }, + { + "epoch": 0.3, + "learning_rate": 3.898982653498605e-05, + "loss": 1.9767, + "step": 151500 + }, + { + "epoch": 0.3, + "learning_rate": 3.893515400159207e-05, + "loss": 1.9773, + "step": 152000 + }, + { + "epoch": 0.3, + "learning_rate": 3.888048146819808e-05, + "loss": 1.9736, + "step": 152500 + }, + { + "epoch": 0.3, + "learning_rate": 3.88258089348041e-05, + "loss": 1.9729, + "step": 153000 + }, + { + "epoch": 0.3, + "learning_rate": 3.877113640141012e-05, + "loss": 1.9756, + "step": 153500 + }, + { + "epoch": 0.3, + "learning_rate": 3.8716463868016135e-05, + "loss": 1.9747, + "step": 154000 + }, + { + "epoch": 0.3, + "learning_rate": 3.8661791334622146e-05, + "loss": 1.9732, + "step": 154500 + }, + { + "epoch": 0.31, + "learning_rate": 3.8607118801228164e-05, + "loss": 1.9696, + "step": 155000 + }, + { + "epoch": 0.31, + "learning_rate": 3.855244626783418e-05, + "loss": 1.9731, + "step": 155500 + }, + { + "epoch": 0.31, + "learning_rate": 3.84977737344402e-05, + "loss": 1.9741, + "step": 156000 + }, + { + "epoch": 0.31, + "learning_rate": 3.844310120104622e-05, + "loss": 1.9747, + "step": 156500 + }, + { + "epoch": 0.31, + "learning_rate": 3.838842866765223e-05, + "loss": 1.9717, + "step": 157000 + }, + { + "epoch": 0.31, + "learning_rate": 3.833375613425825e-05, + "loss": 1.9724, + "step": 157500 + }, + { + "epoch": 0.31, + "learning_rate": 3.8279083600864266e-05, + "loss": 1.9663, + "step": 158000 + }, + { + "epoch": 0.31, + "learning_rate": 3.8224411067470284e-05, + "loss": 1.9671, + "step": 158500 + }, + { + "epoch": 0.31, + "learning_rate": 3.81697385340763e-05, + "loss": 1.9684, + "step": 159000 + }, + { + "epoch": 0.31, + "learning_rate": 3.811506600068231e-05, + "loss": 1.9683, + "step": 159500 + }, + { + "epoch": 0.31, + "learning_rate": 3.806039346728833e-05, + "loss": 1.9682, + "step": 160000 + }, + { + "epoch": 0.32, + "learning_rate": 3.800572093389435e-05, + "loss": 1.9673, + "step": 160500 + }, + { + "epoch": 0.32, + "learning_rate": 3.795104840050037e-05, + "loss": 1.9639, + "step": 161000 + }, + { + "epoch": 0.32, + "learning_rate": 3.7896375867106385e-05, + "loss": 1.9633, + "step": 161500 + }, + { + "epoch": 0.32, + "learning_rate": 3.7841703333712396e-05, + "loss": 1.964, + "step": 162000 + }, + { + "epoch": 0.32, + "learning_rate": 3.7787030800318414e-05, + "loss": 1.9648, + "step": 162500 + }, + { + "epoch": 0.32, + "learning_rate": 3.773235826692443e-05, + "loss": 1.9636, + "step": 163000 + }, + { + "epoch": 0.32, + "learning_rate": 3.767768573353045e-05, + "loss": 1.9623, + "step": 163500 + }, + { + "epoch": 0.32, + "learning_rate": 3.762301320013646e-05, + "loss": 1.9656, + "step": 164000 + }, + { + "epoch": 0.32, + "learning_rate": 3.756834066674248e-05, + "loss": 1.9636, + "step": 164500 + }, + { + "epoch": 0.32, + "learning_rate": 3.75136681333485e-05, + "loss": 1.9672, + "step": 165000 + }, + { + "epoch": 0.33, + "learning_rate": 3.7458995599954515e-05, + "loss": 1.9639, + "step": 165500 + }, + { + "epoch": 0.33, + "learning_rate": 3.740432306656053e-05, + "loss": 1.9628, + "step": 166000 + }, + { + "epoch": 0.33, + "learning_rate": 3.7349650533166545e-05, + "loss": 1.9647, + "step": 166500 + }, + { + "epoch": 0.33, + "learning_rate": 3.729497799977256e-05, + "loss": 1.9633, + "step": 167000 + }, + { + "epoch": 0.33, + "learning_rate": 3.724030546637858e-05, + "loss": 1.9585, + "step": 167500 + }, + { + "epoch": 0.33, + "learning_rate": 3.71856329329846e-05, + "loss": 1.9599, + "step": 168000 + }, + { + "epoch": 0.33, + "learning_rate": 3.7130960399590617e-05, + "loss": 1.9601, + "step": 168500 + }, + { + "epoch": 0.33, + "learning_rate": 3.707628786619663e-05, + "loss": 1.9617, + "step": 169000 + }, + { + "epoch": 0.33, + "learning_rate": 3.7021615332802646e-05, + "loss": 1.9583, + "step": 169500 + }, + { + "epoch": 0.33, + "learning_rate": 3.6966942799408664e-05, + "loss": 1.9606, + "step": 170000 + }, + { + "epoch": 0.34, + "learning_rate": 3.691227026601468e-05, + "loss": 1.955, + "step": 170500 + }, + { + "epoch": 0.34, + "learning_rate": 3.68575977326207e-05, + "loss": 1.956, + "step": 171000 + }, + { + "epoch": 0.34, + "learning_rate": 3.680292519922671e-05, + "loss": 1.9584, + "step": 171500 + }, + { + "epoch": 0.34, + "learning_rate": 3.674825266583273e-05, + "loss": 1.9575, + "step": 172000 + }, + { + "epoch": 0.34, + "learning_rate": 3.669358013243875e-05, + "loss": 1.9531, + "step": 172500 + }, + { + "epoch": 0.34, + "learning_rate": 3.6638907599044765e-05, + "loss": 1.9603, + "step": 173000 + }, + { + "epoch": 0.34, + "learning_rate": 3.6584235065650776e-05, + "loss": 1.9542, + "step": 173500 + }, + { + "epoch": 0.34, + "learning_rate": 3.6529562532256794e-05, + "loss": 1.9515, + "step": 174000 + }, + { + "epoch": 0.34, + "learning_rate": 3.647488999886282e-05, + "loss": 1.9594, + "step": 174500 + }, + { + "epoch": 0.34, + "learning_rate": 3.642021746546883e-05, + "loss": 1.955, + "step": 175000 + }, + { + "epoch": 0.35, + "learning_rate": 3.636554493207485e-05, + "loss": 1.9521, + "step": 175500 + }, + { + "epoch": 0.35, + "learning_rate": 3.631087239868086e-05, + "loss": 1.9564, + "step": 176000 + }, + { + "epoch": 0.35, + "learning_rate": 3.625619986528688e-05, + "loss": 1.9556, + "step": 176500 + }, + { + "epoch": 0.35, + "learning_rate": 3.6201527331892895e-05, + "loss": 1.9517, + "step": 177000 + }, + { + "epoch": 0.35, + "learning_rate": 3.6146854798498913e-05, + "loss": 1.9569, + "step": 177500 + }, + { + "epoch": 0.35, + "learning_rate": 3.609218226510493e-05, + "loss": 1.9482, + "step": 178000 + }, + { + "epoch": 0.35, + "learning_rate": 3.603750973171094e-05, + "loss": 1.9496, + "step": 178500 + }, + { + "epoch": 0.35, + "learning_rate": 3.598283719831696e-05, + "loss": 1.95, + "step": 179000 + }, + { + "epoch": 0.35, + "learning_rate": 3.592816466492298e-05, + "loss": 1.9519, + "step": 179500 + }, + { + "epoch": 0.35, + "learning_rate": 3.5873492131529e-05, + "loss": 1.9477, + "step": 180000 + }, + { + "epoch": 0.36, + "learning_rate": 3.5818819598135015e-05, + "loss": 1.9482, + "step": 180500 + }, + { + "epoch": 0.36, + "learning_rate": 3.5764147064741026e-05, + "loss": 1.9511, + "step": 181000 + }, + { + "epoch": 0.36, + "learning_rate": 3.570947453134705e-05, + "loss": 1.9464, + "step": 181500 + }, + { + "epoch": 0.36, + "learning_rate": 3.565480199795306e-05, + "loss": 1.9493, + "step": 182000 + }, + { + "epoch": 0.36, + "learning_rate": 3.560012946455908e-05, + "loss": 1.9462, + "step": 182500 + }, + { + "epoch": 0.36, + "learning_rate": 3.554545693116509e-05, + "loss": 1.9493, + "step": 183000 + }, + { + "epoch": 0.36, + "learning_rate": 3.549078439777111e-05, + "loss": 1.9472, + "step": 183500 + }, + { + "epoch": 0.36, + "learning_rate": 3.5436111864377134e-05, + "loss": 1.9475, + "step": 184000 + }, + { + "epoch": 0.36, + "learning_rate": 3.5381439330983145e-05, + "loss": 1.9461, + "step": 184500 + }, + { + "epoch": 0.36, + "learning_rate": 3.532676679758916e-05, + "loss": 1.9409, + "step": 185000 + }, + { + "epoch": 0.37, + "learning_rate": 3.5272094264195174e-05, + "loss": 1.9498, + "step": 185500 + }, + { + "epoch": 0.37, + "learning_rate": 3.521742173080119e-05, + "loss": 1.9475, + "step": 186000 + }, + { + "epoch": 0.37, + "learning_rate": 3.516274919740721e-05, + "loss": 1.9491, + "step": 186500 + }, + { + "epoch": 0.37, + "learning_rate": 3.510807666401323e-05, + "loss": 1.9433, + "step": 187000 + }, + { + "epoch": 0.37, + "learning_rate": 3.5053404130619246e-05, + "loss": 1.9436, + "step": 187500 + }, + { + "epoch": 0.37, + "learning_rate": 3.499873159722526e-05, + "loss": 1.9425, + "step": 188000 + }, + { + "epoch": 0.37, + "learning_rate": 3.494405906383128e-05, + "loss": 1.9384, + "step": 188500 + }, + { + "epoch": 0.37, + "learning_rate": 3.4889386530437294e-05, + "loss": 1.9398, + "step": 189000 + }, + { + "epoch": 0.37, + "learning_rate": 3.483471399704331e-05, + "loss": 1.9428, + "step": 189500 + }, + { + "epoch": 0.37, + "learning_rate": 3.478004146364933e-05, + "loss": 1.9416, + "step": 190000 + }, + { + "epoch": 0.37, + "learning_rate": 3.472536893025534e-05, + "loss": 1.9409, + "step": 190500 + }, + { + "epoch": 0.38, + "learning_rate": 3.4670696396861366e-05, + "loss": 1.9423, + "step": 191000 + }, + { + "epoch": 0.38, + "learning_rate": 3.461602386346738e-05, + "loss": 1.9409, + "step": 191500 + }, + { + "epoch": 0.38, + "learning_rate": 3.4561351330073395e-05, + "loss": 1.9399, + "step": 192000 + }, + { + "epoch": 0.38, + "learning_rate": 3.4506678796679406e-05, + "loss": 1.94, + "step": 192500 + }, + { + "epoch": 0.38, + "learning_rate": 3.4452006263285424e-05, + "loss": 1.9386, + "step": 193000 + }, + { + "epoch": 0.38, + "learning_rate": 3.439733372989145e-05, + "loss": 1.9391, + "step": 193500 + }, + { + "epoch": 0.38, + "learning_rate": 3.434266119649746e-05, + "loss": 1.9412, + "step": 194000 + }, + { + "epoch": 0.38, + "learning_rate": 3.428798866310348e-05, + "loss": 1.9384, + "step": 194500 + }, + { + "epoch": 0.38, + "learning_rate": 3.423331612970949e-05, + "loss": 1.9364, + "step": 195000 + }, + { + "epoch": 0.38, + "learning_rate": 3.4178643596315514e-05, + "loss": 1.9354, + "step": 195500 + }, + { + "epoch": 0.39, + "learning_rate": 3.4123971062921525e-05, + "loss": 1.9374, + "step": 196000 + }, + { + "epoch": 0.39, + "learning_rate": 3.406929852952754e-05, + "loss": 1.9363, + "step": 196500 + }, + { + "epoch": 0.39, + "learning_rate": 3.401462599613356e-05, + "loss": 1.9333, + "step": 197000 + }, + { + "epoch": 0.39, + "learning_rate": 3.395995346273957e-05, + "loss": 1.9375, + "step": 197500 + }, + { + "epoch": 0.39, + "learning_rate": 3.39052809293456e-05, + "loss": 1.9422, + "step": 198000 + }, + { + "epoch": 0.39, + "learning_rate": 3.385060839595161e-05, + "loss": 1.9363, + "step": 198500 + }, + { + "epoch": 0.39, + "learning_rate": 3.3795935862557626e-05, + "loss": 1.9335, + "step": 199000 + }, + { + "epoch": 0.39, + "learning_rate": 3.3741263329163644e-05, + "loss": 1.9378, + "step": 199500 + }, + { + "epoch": 0.39, + "learning_rate": 3.3686590795769656e-05, + "loss": 1.9394, + "step": 200000 + }, + { + "epoch": 0.39, + "learning_rate": 3.363191826237568e-05, + "loss": 1.927, + "step": 200500 + }, + { + "epoch": 0.4, + "learning_rate": 3.357724572898169e-05, + "loss": 1.9303, + "step": 201000 + }, + { + "epoch": 0.4, + "learning_rate": 3.352257319558771e-05, + "loss": 1.9333, + "step": 201500 + }, + { + "epoch": 0.4, + "learning_rate": 3.346790066219372e-05, + "loss": 1.9285, + "step": 202000 + }, + { + "epoch": 0.4, + "learning_rate": 3.3413228128799746e-05, + "loss": 1.9337, + "step": 202500 + }, + { + "epoch": 0.4, + "learning_rate": 3.3358555595405764e-05, + "loss": 1.9339, + "step": 203000 + }, + { + "epoch": 0.4, + "learning_rate": 3.3303883062011775e-05, + "loss": 1.9368, + "step": 203500 + }, + { + "epoch": 0.4, + "learning_rate": 3.324921052861779e-05, + "loss": 1.934, + "step": 204000 + }, + { + "epoch": 0.4, + "learning_rate": 3.3194537995223804e-05, + "loss": 1.9356, + "step": 204500 + }, + { + "epoch": 0.4, + "learning_rate": 3.313986546182983e-05, + "loss": 1.9305, + "step": 205000 + }, + { + "epoch": 0.4, + "learning_rate": 3.308519292843584e-05, + "loss": 1.9291, + "step": 205500 + }, + { + "epoch": 0.41, + "learning_rate": 3.303052039504186e-05, + "loss": 1.9323, + "step": 206000 + }, + { + "epoch": 0.41, + "learning_rate": 3.2975847861647876e-05, + "loss": 1.9343, + "step": 206500 + }, + { + "epoch": 0.41, + "learning_rate": 3.2921175328253894e-05, + "loss": 1.9315, + "step": 207000 + }, + { + "epoch": 0.41, + "learning_rate": 3.286650279485991e-05, + "loss": 1.9278, + "step": 207500 + }, + { + "epoch": 0.41, + "learning_rate": 3.2811830261465923e-05, + "loss": 1.9295, + "step": 208000 + }, + { + "epoch": 0.41, + "learning_rate": 3.275715772807194e-05, + "loss": 1.9323, + "step": 208500 + }, + { + "epoch": 0.41, + "learning_rate": 3.270248519467796e-05, + "loss": 1.9287, + "step": 209000 + }, + { + "epoch": 0.41, + "learning_rate": 3.264781266128398e-05, + "loss": 1.9327, + "step": 209500 + }, + { + "epoch": 0.41, + "learning_rate": 3.2593140127889995e-05, + "loss": 1.9311, + "step": 210000 + }, + { + "epoch": 0.41, + "learning_rate": 3.253846759449601e-05, + "loss": 1.9262, + "step": 210500 + }, + { + "epoch": 0.42, + "learning_rate": 3.2483795061102025e-05, + "loss": 1.9273, + "step": 211000 + }, + { + "epoch": 0.42, + "learning_rate": 3.2429122527708036e-05, + "loss": 1.9276, + "step": 211500 + }, + { + "epoch": 0.42, + "learning_rate": 3.237444999431406e-05, + "loss": 1.9307, + "step": 212000 + }, + { + "epoch": 0.42, + "learning_rate": 3.231977746092008e-05, + "loss": 1.9261, + "step": 212500 + }, + { + "epoch": 0.42, + "learning_rate": 3.226510492752609e-05, + "loss": 1.9286, + "step": 213000 + }, + { + "epoch": 0.42, + "learning_rate": 3.221043239413211e-05, + "loss": 1.9315, + "step": 213500 + }, + { + "epoch": 0.42, + "learning_rate": 3.2155759860738126e-05, + "loss": 1.9297, + "step": 214000 + }, + { + "epoch": 0.42, + "learning_rate": 3.2101087327344144e-05, + "loss": 1.9317, + "step": 214500 + }, + { + "epoch": 0.42, + "learning_rate": 3.2046414793950155e-05, + "loss": 1.927, + "step": 215000 + }, + { + "epoch": 0.42, + "learning_rate": 3.199174226055617e-05, + "loss": 1.9279, + "step": 215500 + }, + { + "epoch": 0.43, + "learning_rate": 3.193706972716219e-05, + "loss": 1.922, + "step": 216000 + }, + { + "epoch": 0.43, + "learning_rate": 3.188239719376821e-05, + "loss": 1.9259, + "step": 216500 + }, + { + "epoch": 0.43, + "learning_rate": 3.182772466037423e-05, + "loss": 1.9234, + "step": 217000 + }, + { + "epoch": 0.43, + "learning_rate": 3.177305212698024e-05, + "loss": 1.9232, + "step": 217500 + }, + { + "epoch": 0.43, + "learning_rate": 3.1718379593586256e-05, + "loss": 1.9226, + "step": 218000 + }, + { + "epoch": 0.43, + "learning_rate": 3.1663707060192274e-05, + "loss": 1.922, + "step": 218500 + }, + { + "epoch": 0.43, + "learning_rate": 3.160903452679829e-05, + "loss": 1.9215, + "step": 219000 + }, + { + "epoch": 0.43, + "learning_rate": 3.155436199340431e-05, + "loss": 1.9207, + "step": 219500 + }, + { + "epoch": 0.43, + "learning_rate": 3.149968946001032e-05, + "loss": 1.9232, + "step": 220000 + }, + { + "epoch": 0.43, + "learning_rate": 3.144501692661634e-05, + "loss": 1.9223, + "step": 220500 + }, + { + "epoch": 0.43, + "learning_rate": 3.139034439322236e-05, + "loss": 1.9224, + "step": 221000 + }, + { + "epoch": 0.44, + "learning_rate": 3.1335671859828376e-05, + "loss": 1.9219, + "step": 221500 + }, + { + "epoch": 0.44, + "learning_rate": 3.1280999326434394e-05, + "loss": 1.9235, + "step": 222000 + }, + { + "epoch": 0.44, + "learning_rate": 3.1226326793040405e-05, + "loss": 1.9225, + "step": 222500 + }, + { + "epoch": 0.44, + "learning_rate": 3.117165425964642e-05, + "loss": 1.92, + "step": 223000 + }, + { + "epoch": 0.44, + "learning_rate": 3.111698172625244e-05, + "loss": 1.9148, + "step": 223500 + }, + { + "epoch": 0.44, + "learning_rate": 3.106230919285846e-05, + "loss": 1.9245, + "step": 224000 + }, + { + "epoch": 0.44, + "learning_rate": 3.100763665946447e-05, + "loss": 1.9184, + "step": 224500 + }, + { + "epoch": 0.44, + "learning_rate": 3.095296412607049e-05, + "loss": 1.9193, + "step": 225000 + }, + { + "epoch": 0.44, + "learning_rate": 3.0898291592676506e-05, + "loss": 1.917, + "step": 225500 + }, + { + "epoch": 0.44, + "learning_rate": 3.0843619059282524e-05, + "loss": 1.9092, + "step": 226000 + }, + { + "epoch": 0.45, + "learning_rate": 3.078894652588854e-05, + "loss": 1.9185, + "step": 226500 + }, + { + "epoch": 0.45, + "learning_rate": 3.073427399249455e-05, + "loss": 1.9165, + "step": 227000 + }, + { + "epoch": 0.45, + "learning_rate": 3.067960145910057e-05, + "loss": 1.9171, + "step": 227500 + }, + { + "epoch": 0.45, + "learning_rate": 3.062492892570659e-05, + "loss": 1.9182, + "step": 228000 + }, + { + "epoch": 0.45, + "learning_rate": 3.057025639231261e-05, + "loss": 1.9182, + "step": 228500 + }, + { + "epoch": 0.45, + "learning_rate": 3.0515583858918622e-05, + "loss": 1.9155, + "step": 229000 + }, + { + "epoch": 0.45, + "learning_rate": 3.0460911325524636e-05, + "loss": 1.9137, + "step": 229500 + }, + { + "epoch": 0.45, + "learning_rate": 3.0406238792130654e-05, + "loss": 1.9162, + "step": 230000 + }, + { + "epoch": 0.45, + "learning_rate": 3.0351566258736676e-05, + "loss": 1.9165, + "step": 230500 + }, + { + "epoch": 0.45, + "learning_rate": 3.029689372534269e-05, + "loss": 1.9184, + "step": 231000 + }, + { + "epoch": 0.46, + "learning_rate": 3.0242221191948705e-05, + "loss": 1.9127, + "step": 231500 + }, + { + "epoch": 0.46, + "learning_rate": 3.018754865855472e-05, + "loss": 1.9197, + "step": 232000 + }, + { + "epoch": 0.46, + "learning_rate": 3.0132876125160738e-05, + "loss": 1.915, + "step": 232500 + }, + { + "epoch": 0.46, + "learning_rate": 3.0078203591766756e-05, + "loss": 1.9189, + "step": 233000 + }, + { + "epoch": 0.46, + "learning_rate": 3.0023531058372774e-05, + "loss": 1.9154, + "step": 233500 + }, + { + "epoch": 0.46, + "learning_rate": 2.9968858524978788e-05, + "loss": 1.9117, + "step": 234000 + }, + { + "epoch": 0.46, + "learning_rate": 2.9914185991584803e-05, + "loss": 1.911, + "step": 234500 + }, + { + "epoch": 0.46, + "learning_rate": 2.9859513458190824e-05, + "loss": 1.9191, + "step": 235000 + }, + { + "epoch": 0.46, + "learning_rate": 2.980484092479684e-05, + "loss": 1.9158, + "step": 235500 + }, + { + "epoch": 0.46, + "learning_rate": 2.9750168391402854e-05, + "loss": 1.9149, + "step": 236000 + }, + { + "epoch": 0.47, + "learning_rate": 2.969549585800887e-05, + "loss": 1.9146, + "step": 236500 + }, + { + "epoch": 0.47, + "learning_rate": 2.9640823324614886e-05, + "loss": 1.9111, + "step": 237000 + }, + { + "epoch": 0.47, + "learning_rate": 2.9586150791220908e-05, + "loss": 1.9105, + "step": 237500 + }, + { + "epoch": 0.47, + "learning_rate": 2.9531478257826922e-05, + "loss": 1.9125, + "step": 238000 + }, + { + "epoch": 0.47, + "learning_rate": 2.9476805724432937e-05, + "loss": 1.9051, + "step": 238500 + }, + { + "epoch": 0.47, + "learning_rate": 2.942213319103895e-05, + "loss": 1.913, + "step": 239000 + }, + { + "epoch": 0.47, + "learning_rate": 2.936746065764497e-05, + "loss": 1.9067, + "step": 239500 + }, + { + "epoch": 0.47, + "learning_rate": 2.931278812425099e-05, + "loss": 1.9093, + "step": 240000 + }, + { + "epoch": 0.47, + "learning_rate": 2.9258115590857005e-05, + "loss": 1.9095, + "step": 240500 + }, + { + "epoch": 0.47, + "learning_rate": 2.920344305746302e-05, + "loss": 1.9129, + "step": 241000 + }, + { + "epoch": 0.48, + "learning_rate": 2.9148770524069035e-05, + "loss": 1.9119, + "step": 241500 + }, + { + "epoch": 0.48, + "learning_rate": 2.9094097990675056e-05, + "loss": 1.911, + "step": 242000 + }, + { + "epoch": 0.48, + "learning_rate": 2.903942545728107e-05, + "loss": 1.9078, + "step": 242500 + }, + { + "epoch": 0.48, + "learning_rate": 2.898475292388709e-05, + "loss": 1.9113, + "step": 243000 + }, + { + "epoch": 0.48, + "learning_rate": 2.8930080390493103e-05, + "loss": 1.9058, + "step": 243500 + }, + { + "epoch": 0.48, + "learning_rate": 2.8875407857099118e-05, + "loss": 1.9114, + "step": 244000 + }, + { + "epoch": 0.48, + "learning_rate": 2.882073532370514e-05, + "loss": 1.9073, + "step": 244500 + }, + { + "epoch": 0.48, + "learning_rate": 2.8766062790311154e-05, + "loss": 1.906, + "step": 245000 + }, + { + "epoch": 0.48, + "learning_rate": 2.871139025691717e-05, + "loss": 1.9072, + "step": 245500 + }, + { + "epoch": 0.48, + "learning_rate": 2.8656717723523186e-05, + "loss": 1.9083, + "step": 246000 + }, + { + "epoch": 0.49, + "learning_rate": 2.86020451901292e-05, + "loss": 1.9047, + "step": 246500 + }, + { + "epoch": 0.49, + "learning_rate": 2.8547372656735222e-05, + "loss": 1.9051, + "step": 247000 + }, + { + "epoch": 0.49, + "learning_rate": 2.8492700123341237e-05, + "loss": 1.9053, + "step": 247500 + }, + { + "epoch": 0.49, + "learning_rate": 2.843802758994725e-05, + "loss": 1.9107, + "step": 248000 + }, + { + "epoch": 0.49, + "learning_rate": 2.838335505655327e-05, + "loss": 1.9094, + "step": 248500 + }, + { + "epoch": 0.49, + "learning_rate": 2.8328682523159288e-05, + "loss": 1.9081, + "step": 249000 + }, + { + "epoch": 0.49, + "learning_rate": 2.8274009989765306e-05, + "loss": 1.9067, + "step": 249500 + }, + { + "epoch": 0.49, + "learning_rate": 2.821933745637132e-05, + "loss": 1.9054, + "step": 250000 + }, + { + "epoch": 0.49, + "learning_rate": 2.8164664922977335e-05, + "loss": 1.9007, + "step": 250500 + }, + { + "epoch": 0.49, + "learning_rate": 2.810999238958335e-05, + "loss": 1.9064, + "step": 251000 + }, + { + "epoch": 0.5, + "learning_rate": 2.805531985618937e-05, + "loss": 1.8992, + "step": 251500 + }, + { + "epoch": 0.5, + "learning_rate": 2.8000647322795386e-05, + "loss": 1.9074, + "step": 252000 + }, + { + "epoch": 0.5, + "learning_rate": 2.7945974789401404e-05, + "loss": 1.9061, + "step": 252500 + }, + { + "epoch": 0.5, + "learning_rate": 2.7891302256007418e-05, + "loss": 1.9017, + "step": 253000 + }, + { + "epoch": 0.5, + "learning_rate": 2.7836629722613433e-05, + "loss": 1.9018, + "step": 253500 + }, + { + "epoch": 0.5, + "learning_rate": 2.7781957189219454e-05, + "loss": 1.9027, + "step": 254000 + }, + { + "epoch": 0.5, + "learning_rate": 2.772728465582547e-05, + "loss": 1.9062, + "step": 254500 + }, + { + "epoch": 0.5, + "learning_rate": 2.7672612122431483e-05, + "loss": 1.9026, + "step": 255000 + }, + { + "epoch": 0.5, + "learning_rate": 2.76179395890375e-05, + "loss": 1.9, + "step": 255500 + }, + { + "epoch": 0.5, + "learning_rate": 2.7563267055643523e-05, + "loss": 1.9043, + "step": 256000 + }, + { + "epoch": 0.5, + "learning_rate": 2.7508594522249537e-05, + "loss": 1.9041, + "step": 256500 + }, + { + "epoch": 0.51, + "learning_rate": 2.7453921988855552e-05, + "loss": 1.9025, + "step": 257000 + }, + { + "epoch": 0.51, + "learning_rate": 2.7399249455461567e-05, + "loss": 1.8992, + "step": 257500 + }, + { + "epoch": 0.51, + "learning_rate": 2.7344576922067585e-05, + "loss": 1.9006, + "step": 258000 + }, + { + "epoch": 0.51, + "learning_rate": 2.7289904388673603e-05, + "loss": 1.8998, + "step": 258500 + }, + { + "epoch": 0.51, + "learning_rate": 2.723523185527962e-05, + "loss": 1.8978, + "step": 259000 + }, + { + "epoch": 0.51, + "learning_rate": 2.7180559321885635e-05, + "loss": 1.8991, + "step": 259500 + }, + { + "epoch": 0.51, + "learning_rate": 2.712588678849165e-05, + "loss": 1.898, + "step": 260000 + }, + { + "epoch": 0.51, + "learning_rate": 2.707121425509767e-05, + "loss": 1.9011, + "step": 260500 + }, + { + "epoch": 0.51, + "learning_rate": 2.7016541721703686e-05, + "loss": 1.8968, + "step": 261000 + }, + { + "epoch": 0.51, + "learning_rate": 2.69618691883097e-05, + "loss": 1.9062, + "step": 261500 + }, + { + "epoch": 0.52, + "learning_rate": 2.690719665491572e-05, + "loss": 1.9018, + "step": 262000 + }, + { + "epoch": 0.52, + "learning_rate": 2.6852524121521733e-05, + "loss": 1.9018, + "step": 262500 + }, + { + "epoch": 0.52, + "learning_rate": 2.6797851588127754e-05, + "loss": 1.8995, + "step": 263000 + }, + { + "epoch": 0.52, + "learning_rate": 2.674317905473377e-05, + "loss": 1.8954, + "step": 263500 + }, + { + "epoch": 0.52, + "learning_rate": 2.6688506521339784e-05, + "loss": 1.8992, + "step": 264000 + }, + { + "epoch": 0.52, + "learning_rate": 2.6633833987945798e-05, + "loss": 1.8957, + "step": 264500 + }, + { + "epoch": 0.52, + "learning_rate": 2.6579161454551816e-05, + "loss": 1.9017, + "step": 265000 + }, + { + "epoch": 0.52, + "learning_rate": 2.6524488921157838e-05, + "loss": 1.8975, + "step": 265500 + }, + { + "epoch": 0.52, + "learning_rate": 2.6469816387763852e-05, + "loss": 1.8987, + "step": 266000 + }, + { + "epoch": 0.52, + "learning_rate": 2.6415143854369867e-05, + "loss": 1.8962, + "step": 266500 + }, + { + "epoch": 0.53, + "learning_rate": 2.636047132097588e-05, + "loss": 1.9013, + "step": 267000 + }, + { + "epoch": 0.53, + "learning_rate": 2.6305798787581903e-05, + "loss": 1.9004, + "step": 267500 + }, + { + "epoch": 0.53, + "learning_rate": 2.6251126254187917e-05, + "loss": 1.8955, + "step": 268000 + }, + { + "epoch": 0.53, + "learning_rate": 2.6196453720793935e-05, + "loss": 1.8956, + "step": 268500 + }, + { + "epoch": 0.53, + "learning_rate": 2.614178118739995e-05, + "loss": 1.8941, + "step": 269000 + }, + { + "epoch": 0.53, + "learning_rate": 2.6087108654005965e-05, + "loss": 1.9004, + "step": 269500 + }, + { + "epoch": 0.53, + "learning_rate": 2.6032436120611986e-05, + "loss": 1.8978, + "step": 270000 + }, + { + "epoch": 0.53, + "learning_rate": 2.5977763587218e-05, + "loss": 1.8954, + "step": 270500 + }, + { + "epoch": 0.53, + "learning_rate": 2.5923091053824015e-05, + "loss": 1.8948, + "step": 271000 + }, + { + "epoch": 0.53, + "learning_rate": 2.5868418520430033e-05, + "loss": 1.8946, + "step": 271500 + }, + { + "epoch": 0.54, + "learning_rate": 2.5813745987036048e-05, + "loss": 1.897, + "step": 272000 + }, + { + "epoch": 0.54, + "learning_rate": 2.575907345364207e-05, + "loss": 1.897, + "step": 272500 + }, + { + "epoch": 0.54, + "learning_rate": 2.5704400920248084e-05, + "loss": 1.8961, + "step": 273000 + }, + { + "epoch": 0.54, + "learning_rate": 2.56497283868541e-05, + "loss": 1.8925, + "step": 273500 + }, + { + "epoch": 0.54, + "learning_rate": 2.5595055853460113e-05, + "loss": 1.8938, + "step": 274000 + }, + { + "epoch": 0.54, + "learning_rate": 2.5540383320066135e-05, + "loss": 1.8928, + "step": 274500 + }, + { + "epoch": 0.54, + "learning_rate": 2.5485710786672153e-05, + "loss": 1.8963, + "step": 275000 + }, + { + "epoch": 0.54, + "learning_rate": 2.5431038253278167e-05, + "loss": 1.8923, + "step": 275500 + }, + { + "epoch": 0.54, + "learning_rate": 2.5376365719884182e-05, + "loss": 1.8882, + "step": 276000 + }, + { + "epoch": 0.54, + "learning_rate": 2.5321693186490196e-05, + "loss": 1.8887, + "step": 276500 + }, + { + "epoch": 0.55, + "learning_rate": 2.5267020653096218e-05, + "loss": 1.8903, + "step": 277000 + }, + { + "epoch": 0.55, + "learning_rate": 2.5212348119702232e-05, + "loss": 1.8941, + "step": 277500 + }, + { + "epoch": 0.55, + "learning_rate": 2.515767558630825e-05, + "loss": 1.8943, + "step": 278000 + }, + { + "epoch": 0.55, + "learning_rate": 2.5103003052914265e-05, + "loss": 1.8924, + "step": 278500 + }, + { + "epoch": 0.55, + "learning_rate": 2.504833051952028e-05, + "loss": 1.8855, + "step": 279000 + }, + { + "epoch": 0.55, + "learning_rate": 2.4993657986126298e-05, + "loss": 1.8918, + "step": 279500 + }, + { + "epoch": 0.55, + "learning_rate": 2.4938985452732316e-05, + "loss": 1.8891, + "step": 280000 + }, + { + "epoch": 0.55, + "learning_rate": 2.488431291933833e-05, + "loss": 1.8874, + "step": 280500 + }, + { + "epoch": 0.55, + "learning_rate": 2.4829640385944348e-05, + "loss": 1.8907, + "step": 281000 + }, + { + "epoch": 0.55, + "learning_rate": 2.4774967852550366e-05, + "loss": 1.8913, + "step": 281500 + }, + { + "epoch": 0.56, + "learning_rate": 2.472029531915638e-05, + "loss": 1.8888, + "step": 282000 + }, + { + "epoch": 0.56, + "learning_rate": 2.46656227857624e-05, + "loss": 1.8924, + "step": 282500 + }, + { + "epoch": 0.56, + "learning_rate": 2.4610950252368413e-05, + "loss": 1.8889, + "step": 283000 + }, + { + "epoch": 0.56, + "learning_rate": 2.455627771897443e-05, + "loss": 1.8866, + "step": 283500 + }, + { + "epoch": 0.56, + "learning_rate": 2.450160518558045e-05, + "loss": 1.8915, + "step": 284000 + }, + { + "epoch": 0.56, + "learning_rate": 2.4446932652186467e-05, + "loss": 1.8896, + "step": 284500 + }, + { + "epoch": 0.56, + "learning_rate": 2.4392260118792482e-05, + "loss": 1.8921, + "step": 285000 + }, + { + "epoch": 0.56, + "learning_rate": 2.43375875853985e-05, + "loss": 1.8854, + "step": 285500 + }, + { + "epoch": 0.56, + "learning_rate": 2.4282915052004515e-05, + "loss": 1.8887, + "step": 286000 + }, + { + "epoch": 0.56, + "learning_rate": 2.422824251861053e-05, + "loss": 1.8892, + "step": 286500 + }, + { + "epoch": 0.56, + "learning_rate": 2.4173569985216547e-05, + "loss": 1.8908, + "step": 287000 + }, + { + "epoch": 0.57, + "learning_rate": 2.4118897451822565e-05, + "loss": 1.8901, + "step": 287500 + }, + { + "epoch": 0.57, + "learning_rate": 2.4064224918428583e-05, + "loss": 1.8906, + "step": 288000 + }, + { + "epoch": 0.57, + "learning_rate": 2.4009552385034598e-05, + "loss": 1.8862, + "step": 288500 + }, + { + "epoch": 0.57, + "learning_rate": 2.3954879851640616e-05, + "loss": 1.8915, + "step": 289000 + }, + { + "epoch": 0.57, + "learning_rate": 2.390020731824663e-05, + "loss": 1.8891, + "step": 289500 + }, + { + "epoch": 0.57, + "learning_rate": 2.3845534784852645e-05, + "loss": 1.8855, + "step": 290000 + }, + { + "epoch": 0.57, + "learning_rate": 2.3790862251458667e-05, + "loss": 1.8828, + "step": 290500 + }, + { + "epoch": 0.57, + "learning_rate": 2.373618971806468e-05, + "loss": 1.8845, + "step": 291000 + }, + { + "epoch": 0.57, + "learning_rate": 2.36815171846707e-05, + "loss": 1.8824, + "step": 291500 + }, + { + "epoch": 0.57, + "learning_rate": 2.3626844651276714e-05, + "loss": 1.8832, + "step": 292000 + }, + { + "epoch": 0.58, + "learning_rate": 2.3572172117882732e-05, + "loss": 1.8862, + "step": 292500 + }, + { + "epoch": 0.58, + "learning_rate": 2.3517499584488746e-05, + "loss": 1.8862, + "step": 293000 + }, + { + "epoch": 0.58, + "learning_rate": 2.3462827051094764e-05, + "loss": 1.8837, + "step": 293500 + }, + { + "epoch": 0.58, + "learning_rate": 2.3408154517700782e-05, + "loss": 1.8864, + "step": 294000 + }, + { + "epoch": 0.58, + "learning_rate": 2.3353481984306797e-05, + "loss": 1.886, + "step": 294500 + }, + { + "epoch": 0.58, + "learning_rate": 2.3298809450912815e-05, + "loss": 1.884, + "step": 295000 + }, + { + "epoch": 0.58, + "learning_rate": 2.324413691751883e-05, + "loss": 1.8834, + "step": 295500 + }, + { + "epoch": 0.58, + "learning_rate": 2.3189464384124848e-05, + "loss": 1.8848, + "step": 296000 + }, + { + "epoch": 0.58, + "learning_rate": 2.3134791850730862e-05, + "loss": 1.8856, + "step": 296500 + }, + { + "epoch": 0.58, + "learning_rate": 2.308011931733688e-05, + "loss": 1.8874, + "step": 297000 + }, + { + "epoch": 0.59, + "learning_rate": 2.3025446783942898e-05, + "loss": 1.8775, + "step": 297500 + }, + { + "epoch": 0.59, + "learning_rate": 2.2970774250548913e-05, + "loss": 1.8859, + "step": 298000 + }, + { + "epoch": 0.59, + "learning_rate": 2.291610171715493e-05, + "loss": 1.8799, + "step": 298500 + }, + { + "epoch": 0.59, + "learning_rate": 2.2861429183760945e-05, + "loss": 1.8833, + "step": 299000 + }, + { + "epoch": 0.59, + "learning_rate": 2.2806756650366963e-05, + "loss": 1.882, + "step": 299500 + }, + { + "epoch": 0.59, + "learning_rate": 2.275208411697298e-05, + "loss": 1.8872, + "step": 300000 + }, + { + "epoch": 0.67, + "learning_rate": 1.8590144171628205e-05, + "loss": 1.8825, + "step": 300500 + }, + { + "epoch": 0.67, + "learning_rate": 1.852863750430547e-05, + "loss": 1.8818, + "step": 301000 + }, + { + "epoch": 0.67, + "learning_rate": 1.846713083698273e-05, + "loss": 1.8859, + "step": 301500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8405624169659992e-05, + "loss": 1.8773, + "step": 302000 + }, + { + "epoch": 0.67, + "learning_rate": 1.8344117502337256e-05, + "loss": 1.8764, + "step": 302500 + }, + { + "epoch": 0.67, + "learning_rate": 1.8282610835014516e-05, + "loss": 1.8826, + "step": 303000 + }, + { + "epoch": 0.67, + "learning_rate": 1.822110416769178e-05, + "loss": 1.8805, + "step": 303500 + }, + { + "epoch": 0.67, + "learning_rate": 1.815959750036904e-05, + "loss": 1.8787, + "step": 304000 + }, + { + "epoch": 0.67, + "learning_rate": 1.8098090833046304e-05, + "loss": 1.8793, + "step": 304500 + }, + { + "epoch": 0.68, + "learning_rate": 1.8036584165723567e-05, + "loss": 1.8797, + "step": 305000 + }, + { + "epoch": 0.68, + "learning_rate": 1.7975077498400827e-05, + "loss": 1.875, + "step": 305500 + }, + { + "epoch": 0.68, + "learning_rate": 1.7913570831078088e-05, + "loss": 1.8747, + "step": 306000 + }, + { + "epoch": 0.68, + "learning_rate": 1.785206416375535e-05, + "loss": 1.8795, + "step": 306500 + }, + { + "epoch": 0.68, + "learning_rate": 1.7790557496432615e-05, + "loss": 1.8761, + "step": 307000 + }, + { + "epoch": 0.68, + "learning_rate": 1.7729050829109875e-05, + "loss": 1.878, + "step": 307500 + }, + { + "epoch": 0.68, + "learning_rate": 1.766754416178714e-05, + "loss": 1.8743, + "step": 308000 + }, + { + "epoch": 0.68, + "learning_rate": 1.76060374944644e-05, + "loss": 1.8762, + "step": 308500 + }, + { + "epoch": 0.68, + "learning_rate": 1.7544530827141666e-05, + "loss": 1.8739, + "step": 309000 + }, + { + "epoch": 0.69, + "learning_rate": 1.7483024159818926e-05, + "loss": 1.8747, + "step": 309500 + }, + { + "epoch": 0.69, + "learning_rate": 1.7421517492496186e-05, + "loss": 1.8703, + "step": 310000 + }, + { + "epoch": 0.69, + "learning_rate": 1.736001082517345e-05, + "loss": 1.8709, + "step": 310500 + }, + { + "epoch": 0.69, + "learning_rate": 1.729850415785071e-05, + "loss": 1.8757, + "step": 311000 + }, + { + "epoch": 0.69, + "learning_rate": 1.7236997490527974e-05, + "loss": 1.8746, + "step": 311500 + }, + { + "epoch": 0.69, + "learning_rate": 1.7175490823205237e-05, + "loss": 1.8744, + "step": 312000 + }, + { + "epoch": 0.69, + "learning_rate": 1.7113984155882498e-05, + "loss": 1.8742, + "step": 312500 + }, + { + "epoch": 0.69, + "learning_rate": 1.7052477488559758e-05, + "loss": 1.8791, + "step": 313000 + }, + { + "epoch": 0.69, + "learning_rate": 1.6990970821237025e-05, + "loss": 1.871, + "step": 313500 + }, + { + "epoch": 0.7, + "learning_rate": 1.6929464153914285e-05, + "loss": 1.8742, + "step": 314000 + }, + { + "epoch": 0.7, + "learning_rate": 1.686795748659155e-05, + "loss": 1.873, + "step": 314500 + }, + { + "epoch": 0.7, + "learning_rate": 1.680645081926881e-05, + "loss": 1.8775, + "step": 315000 + }, + { + "epoch": 0.7, + "learning_rate": 1.6744944151946072e-05, + "loss": 1.8746, + "step": 315500 + }, + { + "epoch": 0.7, + "learning_rate": 1.6683437484623336e-05, + "loss": 1.8686, + "step": 316000 + }, + { + "epoch": 0.7, + "learning_rate": 1.6621930817300596e-05, + "loss": 1.8667, + "step": 316500 + }, + { + "epoch": 0.7, + "learning_rate": 1.6560424149977856e-05, + "loss": 1.8716, + "step": 317000 + }, + { + "epoch": 0.7, + "learning_rate": 1.649891748265512e-05, + "loss": 1.8693, + "step": 317500 + }, + { + "epoch": 0.7, + "learning_rate": 1.6437410815332384e-05, + "loss": 1.8752, + "step": 318000 + }, + { + "epoch": 0.71, + "learning_rate": 1.6375904148009644e-05, + "loss": 1.8677, + "step": 318500 + }, + { + "epoch": 0.71, + "learning_rate": 1.6314397480686908e-05, + "loss": 1.8724, + "step": 319000 + }, + { + "epoch": 0.71, + "learning_rate": 1.6252890813364168e-05, + "loss": 1.8675, + "step": 319500 + }, + { + "epoch": 0.71, + "learning_rate": 1.619138414604143e-05, + "loss": 1.8674, + "step": 320000 + }, + { + "epoch": 0.71, + "learning_rate": 1.6129877478718695e-05, + "loss": 1.8689, + "step": 320500 + }, + { + "epoch": 0.71, + "learning_rate": 1.6068370811395955e-05, + "loss": 1.8717, + "step": 321000 + }, + { + "epoch": 0.71, + "learning_rate": 1.600686414407322e-05, + "loss": 1.8695, + "step": 321500 + }, + { + "epoch": 0.71, + "learning_rate": 1.594535747675048e-05, + "loss": 1.869, + "step": 322000 + }, + { + "epoch": 0.71, + "learning_rate": 1.5883850809427743e-05, + "loss": 1.8737, + "step": 322500 + }, + { + "epoch": 0.72, + "learning_rate": 1.5822344142105006e-05, + "loss": 1.868, + "step": 323000 + }, + { + "epoch": 0.72, + "learning_rate": 1.5760837474782266e-05, + "loss": 1.87, + "step": 323500 + }, + { + "epoch": 0.72, + "learning_rate": 1.5699330807459527e-05, + "loss": 1.8717, + "step": 324000 + }, + { + "epoch": 0.72, + "learning_rate": 1.5637824140136794e-05, + "loss": 1.8675, + "step": 324500 + }, + { + "epoch": 0.72, + "learning_rate": 1.5576317472814054e-05, + "loss": 1.8716, + "step": 325000 + }, + { + "epoch": 0.72, + "learning_rate": 1.5514810805491317e-05, + "loss": 1.8706, + "step": 325500 + }, + { + "epoch": 0.72, + "learning_rate": 1.5453304138168578e-05, + "loss": 1.867, + "step": 326000 + }, + { + "epoch": 0.72, + "learning_rate": 1.5391797470845838e-05, + "loss": 1.8638, + "step": 326500 + }, + { + "epoch": 0.72, + "learning_rate": 1.5330290803523105e-05, + "loss": 1.8642, + "step": 327000 + }, + { + "epoch": 0.73, + "learning_rate": 1.5268784136200365e-05, + "loss": 1.8672, + "step": 327500 + }, + { + "epoch": 0.73, + "learning_rate": 1.5207277468877625e-05, + "loss": 1.8677, + "step": 328000 + }, + { + "epoch": 0.73, + "learning_rate": 1.514577080155489e-05, + "loss": 1.867, + "step": 328500 + }, + { + "epoch": 0.73, + "learning_rate": 1.508426413423215e-05, + "loss": 1.8665, + "step": 329000 + }, + { + "epoch": 0.73, + "learning_rate": 1.5022757466909413e-05, + "loss": 1.8673, + "step": 329500 + }, + { + "epoch": 0.73, + "learning_rate": 1.4961250799586676e-05, + "loss": 1.8719, + "step": 330000 + }, + { + "epoch": 0.73, + "learning_rate": 1.4899744132263938e-05, + "loss": 1.8652, + "step": 330500 + }, + { + "epoch": 0.73, + "learning_rate": 1.4838237464941202e-05, + "loss": 1.865, + "step": 331000 + }, + { + "epoch": 0.73, + "learning_rate": 1.4776730797618462e-05, + "loss": 1.8645, + "step": 331500 + }, + { + "epoch": 0.74, + "learning_rate": 1.4715224130295724e-05, + "loss": 1.8661, + "step": 332000 + }, + { + "epoch": 0.74, + "learning_rate": 1.4653717462972988e-05, + "loss": 1.8658, + "step": 332500 + }, + { + "epoch": 0.74, + "learning_rate": 1.459221079565025e-05, + "loss": 1.8679, + "step": 333000 + }, + { + "epoch": 0.74, + "learning_rate": 1.453070412832751e-05, + "loss": 1.867, + "step": 333500 + }, + { + "epoch": 0.74, + "learning_rate": 1.4469197461004775e-05, + "loss": 1.8648, + "step": 334000 + }, + { + "epoch": 0.74, + "learning_rate": 1.4407690793682035e-05, + "loss": 1.8678, + "step": 334500 + }, + { + "epoch": 0.74, + "learning_rate": 1.4346184126359297e-05, + "loss": 1.8665, + "step": 335000 + }, + { + "epoch": 0.74, + "learning_rate": 1.428467745903656e-05, + "loss": 1.8717, + "step": 335500 + }, + { + "epoch": 0.74, + "learning_rate": 1.4223170791713821e-05, + "loss": 1.8683, + "step": 336000 + }, + { + "epoch": 0.75, + "learning_rate": 1.4161664124391086e-05, + "loss": 1.8657, + "step": 336500 + }, + { + "epoch": 0.75, + "learning_rate": 1.4100157457068347e-05, + "loss": 1.864, + "step": 337000 + }, + { + "epoch": 0.75, + "learning_rate": 1.4038650789745608e-05, + "loss": 1.8622, + "step": 337500 + }, + { + "epoch": 0.75, + "learning_rate": 1.3977144122422872e-05, + "loss": 1.8679, + "step": 338000 + }, + { + "epoch": 0.75, + "learning_rate": 1.3915637455100134e-05, + "loss": 1.8636, + "step": 338500 + }, + { + "epoch": 0.75, + "learning_rate": 1.3854130787777394e-05, + "loss": 1.8688, + "step": 339000 + }, + { + "epoch": 0.75, + "learning_rate": 1.3792624120454658e-05, + "loss": 1.8667, + "step": 339500 + }, + { + "epoch": 0.75, + "learning_rate": 1.373111745313192e-05, + "loss": 1.8619, + "step": 340000 + }, + { + "epoch": 0.75, + "learning_rate": 1.3669610785809183e-05, + "loss": 1.8677, + "step": 340500 + }, + { + "epoch": 0.76, + "learning_rate": 1.3608104118486445e-05, + "loss": 1.8635, + "step": 341000 + }, + { + "epoch": 0.76, + "learning_rate": 1.3546597451163705e-05, + "loss": 1.8602, + "step": 341500 + }, + { + "epoch": 0.76, + "learning_rate": 1.348509078384097e-05, + "loss": 1.8627, + "step": 342000 + }, + { + "epoch": 0.76, + "learning_rate": 1.3423584116518231e-05, + "loss": 1.8631, + "step": 342500 + }, + { + "epoch": 0.76, + "learning_rate": 1.3362077449195493e-05, + "loss": 1.8664, + "step": 343000 + }, + { + "epoch": 0.76, + "learning_rate": 1.3300570781872756e-05, + "loss": 1.8601, + "step": 343500 + }, + { + "epoch": 0.76, + "learning_rate": 1.3239064114550018e-05, + "loss": 1.8602, + "step": 344000 + }, + { + "epoch": 0.76, + "learning_rate": 1.3177557447227279e-05, + "loss": 1.8646, + "step": 344500 + }, + { + "epoch": 0.76, + "learning_rate": 1.3116050779904542e-05, + "loss": 1.8606, + "step": 345000 + }, + { + "epoch": 0.77, + "learning_rate": 1.3054544112581804e-05, + "loss": 1.8605, + "step": 345500 + }, + { + "epoch": 0.77, + "learning_rate": 1.2993037445259068e-05, + "loss": 1.8622, + "step": 346000 + }, + { + "epoch": 0.77, + "learning_rate": 1.293153077793633e-05, + "loss": 1.8609, + "step": 346500 + }, + { + "epoch": 0.77, + "learning_rate": 1.287002411061359e-05, + "loss": 1.8543, + "step": 347000 + }, + { + "epoch": 0.77, + "learning_rate": 1.2808517443290855e-05, + "loss": 1.8598, + "step": 347500 + }, + { + "epoch": 0.77, + "learning_rate": 1.2747010775968115e-05, + "loss": 1.8589, + "step": 348000 + }, + { + "epoch": 0.77, + "learning_rate": 1.2685504108645377e-05, + "loss": 1.8633, + "step": 348500 + }, + { + "epoch": 0.77, + "learning_rate": 1.2623997441322641e-05, + "loss": 1.8633, + "step": 349000 + }, + { + "epoch": 0.77, + "learning_rate": 1.2562490773999903e-05, + "loss": 1.8596, + "step": 349500 + }, + { + "epoch": 0.77, + "learning_rate": 1.2500984106677163e-05, + "loss": 1.8577, + "step": 350000 + }, + { + "epoch": 0.78, + "learning_rate": 1.2439477439354427e-05, + "loss": 1.8595, + "step": 350500 + }, + { + "epoch": 0.78, + "learning_rate": 1.2377970772031689e-05, + "loss": 1.8702, + "step": 351000 + }, + { + "epoch": 0.78, + "learning_rate": 1.231646410470895e-05, + "loss": 1.8531, + "step": 351500 + }, + { + "epoch": 0.78, + "learning_rate": 1.2254957437386214e-05, + "loss": 1.8599, + "step": 352000 + }, + { + "epoch": 0.78, + "learning_rate": 1.2193450770063474e-05, + "loss": 1.862, + "step": 352500 + }, + { + "epoch": 0.78, + "learning_rate": 1.2131944102740738e-05, + "loss": 1.8601, + "step": 353000 + }, + { + "epoch": 0.78, + "learning_rate": 1.2070437435418e-05, + "loss": 1.8608, + "step": 353500 + }, + { + "epoch": 0.78, + "learning_rate": 1.2008930768095263e-05, + "loss": 1.8589, + "step": 354000 + }, + { + "epoch": 0.78, + "learning_rate": 1.1947424100772524e-05, + "loss": 1.8623, + "step": 354500 + }, + { + "epoch": 0.79, + "learning_rate": 1.1885917433449786e-05, + "loss": 1.8616, + "step": 355000 + }, + { + "epoch": 0.79, + "learning_rate": 1.1824410766127049e-05, + "loss": 1.8555, + "step": 355500 + }, + { + "epoch": 0.79, + "learning_rate": 1.1762904098804311e-05, + "loss": 1.8579, + "step": 356000 + }, + { + "epoch": 0.79, + "learning_rate": 1.1701397431481573e-05, + "loss": 1.8634, + "step": 356500 + }, + { + "epoch": 0.79, + "learning_rate": 1.1639890764158835e-05, + "loss": 1.8557, + "step": 357000 + }, + { + "epoch": 0.79, + "learning_rate": 1.1578384096836098e-05, + "loss": 1.8579, + "step": 357500 + }, + { + "epoch": 0.79, + "learning_rate": 1.1516877429513359e-05, + "loss": 1.8614, + "step": 358000 + }, + { + "epoch": 0.79, + "learning_rate": 1.1455370762190622e-05, + "loss": 1.8598, + "step": 358500 + }, + { + "epoch": 0.79, + "learning_rate": 1.1393864094867884e-05, + "loss": 1.8567, + "step": 359000 + }, + { + "epoch": 0.8, + "learning_rate": 1.1332357427545146e-05, + "loss": 1.855, + "step": 359500 + }, + { + "epoch": 0.8, + "learning_rate": 1.1270850760222408e-05, + "loss": 1.8578, + "step": 360000 + }, + { + "epoch": 0.8, + "learning_rate": 1.120934409289967e-05, + "loss": 1.856, + "step": 360500 + }, + { + "epoch": 0.8, + "learning_rate": 1.1147837425576934e-05, + "loss": 1.8532, + "step": 361000 + }, + { + "epoch": 0.8, + "learning_rate": 1.1086330758254195e-05, + "loss": 1.8625, + "step": 361500 + }, + { + "epoch": 0.8, + "learning_rate": 1.1024824090931457e-05, + "loss": 1.8591, + "step": 362000 + }, + { + "epoch": 0.8, + "learning_rate": 1.096331742360872e-05, + "loss": 1.8595, + "step": 362500 + }, + { + "epoch": 0.8, + "learning_rate": 1.0901810756285983e-05, + "loss": 1.8557, + "step": 363000 + }, + { + "epoch": 0.8, + "learning_rate": 1.0840304088963243e-05, + "loss": 1.8576, + "step": 363500 + }, + { + "epoch": 0.81, + "learning_rate": 1.0778797421640507e-05, + "loss": 1.8548, + "step": 364000 + }, + { + "epoch": 0.81, + "learning_rate": 1.0717290754317769e-05, + "loss": 1.8605, + "step": 364500 + }, + { + "epoch": 0.81, + "learning_rate": 1.065578408699503e-05, + "loss": 1.8505, + "step": 365000 + }, + { + "epoch": 0.81, + "learning_rate": 1.0594277419672292e-05, + "loss": 1.8578, + "step": 365500 + }, + { + "epoch": 0.81, + "learning_rate": 1.0532770752349554e-05, + "loss": 1.857, + "step": 366000 + }, + { + "epoch": 0.81, + "learning_rate": 1.0471264085026818e-05, + "loss": 1.8545, + "step": 366500 + }, + { + "epoch": 0.81, + "learning_rate": 1.040975741770408e-05, + "loss": 1.8557, + "step": 367000 + }, + { + "epoch": 0.81, + "learning_rate": 1.0348250750381342e-05, + "loss": 1.8554, + "step": 367500 + }, + { + "epoch": 0.81, + "learning_rate": 1.0286744083058604e-05, + "loss": 1.8548, + "step": 368000 + }, + { + "epoch": 0.82, + "learning_rate": 1.0225237415735867e-05, + "loss": 1.8558, + "step": 368500 + }, + { + "epoch": 0.82, + "learning_rate": 1.0163730748413128e-05, + "loss": 1.8564, + "step": 369000 + }, + { + "epoch": 0.82, + "learning_rate": 1.0102224081090391e-05, + "loss": 1.8581, + "step": 369500 + }, + { + "epoch": 0.82, + "learning_rate": 1.0040717413767653e-05, + "loss": 1.8541, + "step": 370000 + }, + { + "epoch": 0.82, + "learning_rate": 9.979210746444915e-06, + "loss": 1.8522, + "step": 370500 + }, + { + "epoch": 0.82, + "learning_rate": 9.917704079122177e-06, + "loss": 1.8551, + "step": 371000 + }, + { + "epoch": 0.82, + "learning_rate": 9.856197411799439e-06, + "loss": 1.8572, + "step": 371500 + }, + { + "epoch": 0.82, + "learning_rate": 9.794690744476702e-06, + "loss": 1.8544, + "step": 372000 + }, + { + "epoch": 0.82, + "learning_rate": 9.733184077153964e-06, + "loss": 1.8509, + "step": 372500 + }, + { + "epoch": 0.83, + "learning_rate": 9.671677409831226e-06, + "loss": 1.8538, + "step": 373000 + }, + { + "epoch": 0.83, + "learning_rate": 9.610170742508488e-06, + "loss": 1.8561, + "step": 373500 + }, + { + "epoch": 0.83, + "learning_rate": 9.54866407518575e-06, + "loss": 1.8559, + "step": 374000 + }, + { + "epoch": 0.83, + "learning_rate": 9.487157407863014e-06, + "loss": 1.8559, + "step": 374500 + }, + { + "epoch": 0.83, + "learning_rate": 9.425650740540274e-06, + "loss": 1.8507, + "step": 375000 + }, + { + "epoch": 0.83, + "learning_rate": 9.364144073217537e-06, + "loss": 1.8526, + "step": 375500 + }, + { + "epoch": 0.83, + "learning_rate": 9.3026374058948e-06, + "loss": 1.8552, + "step": 376000 + }, + { + "epoch": 0.83, + "learning_rate": 9.241130738572061e-06, + "loss": 1.8526, + "step": 376500 + }, + { + "epoch": 0.83, + "learning_rate": 9.179624071249323e-06, + "loss": 1.8534, + "step": 377000 + }, + { + "epoch": 0.84, + "learning_rate": 9.118117403926587e-06, + "loss": 1.8539, + "step": 377500 + }, + { + "epoch": 0.84, + "learning_rate": 9.056610736603849e-06, + "loss": 1.8558, + "step": 378000 + }, + { + "epoch": 0.84, + "learning_rate": 8.99510406928111e-06, + "loss": 1.8532, + "step": 378500 + }, + { + "epoch": 0.84, + "learning_rate": 8.933597401958373e-06, + "loss": 1.8557, + "step": 379000 + }, + { + "epoch": 0.84, + "learning_rate": 8.872090734635634e-06, + "loss": 1.8528, + "step": 379500 + }, + { + "epoch": 0.84, + "learning_rate": 8.810584067312898e-06, + "loss": 1.8554, + "step": 380000 + }, + { + "epoch": 0.84, + "learning_rate": 8.749077399990158e-06, + "loss": 1.8508, + "step": 380500 + }, + { + "epoch": 0.84, + "learning_rate": 8.687570732667422e-06, + "loss": 1.8505, + "step": 381000 + }, + { + "epoch": 0.84, + "learning_rate": 8.626064065344684e-06, + "loss": 1.8489, + "step": 381500 + }, + { + "epoch": 0.85, + "learning_rate": 8.564557398021946e-06, + "loss": 1.8519, + "step": 382000 + }, + { + "epoch": 0.85, + "learning_rate": 8.503050730699208e-06, + "loss": 1.8565, + "step": 382500 + }, + { + "epoch": 0.85, + "learning_rate": 8.441544063376471e-06, + "loss": 1.852, + "step": 383000 + }, + { + "epoch": 0.85, + "learning_rate": 8.380037396053733e-06, + "loss": 1.8553, + "step": 383500 + }, + { + "epoch": 0.85, + "learning_rate": 8.318530728730995e-06, + "loss": 1.8512, + "step": 384000 + }, + { + "epoch": 0.85, + "learning_rate": 8.257024061408257e-06, + "loss": 1.8521, + "step": 384500 + }, + { + "epoch": 0.85, + "learning_rate": 8.195517394085519e-06, + "loss": 1.8495, + "step": 385000 + }, + { + "epoch": 0.85, + "learning_rate": 8.134010726762783e-06, + "loss": 1.8563, + "step": 385500 + }, + { + "epoch": 0.85, + "learning_rate": 8.072504059440043e-06, + "loss": 1.8524, + "step": 386000 + }, + { + "epoch": 0.86, + "learning_rate": 8.010997392117306e-06, + "loss": 1.8537, + "step": 386500 + }, + { + "epoch": 0.86, + "learning_rate": 7.949490724794568e-06, + "loss": 1.8481, + "step": 387000 + }, + { + "epoch": 0.86, + "learning_rate": 7.88798405747183e-06, + "loss": 1.8521, + "step": 387500 + }, + { + "epoch": 0.86, + "learning_rate": 7.826477390149092e-06, + "loss": 1.8488, + "step": 388000 + }, + { + "epoch": 0.86, + "learning_rate": 7.764970722826356e-06, + "loss": 1.856, + "step": 388500 + }, + { + "epoch": 0.86, + "learning_rate": 7.703464055503618e-06, + "loss": 1.8502, + "step": 389000 + }, + { + "epoch": 0.86, + "learning_rate": 7.64195738818088e-06, + "loss": 1.8534, + "step": 389500 + }, + { + "epoch": 0.86, + "learning_rate": 7.580450720858141e-06, + "loss": 1.8481, + "step": 390000 + }, + { + "epoch": 0.86, + "learning_rate": 7.518944053535404e-06, + "loss": 1.8516, + "step": 390500 + }, + { + "epoch": 0.87, + "learning_rate": 7.457437386212666e-06, + "loss": 1.8508, + "step": 391000 + }, + { + "epoch": 0.87, + "learning_rate": 7.395930718889928e-06, + "loss": 1.8442, + "step": 391500 + }, + { + "epoch": 0.87, + "learning_rate": 7.33442405156719e-06, + "loss": 1.8469, + "step": 392000 + }, + { + "epoch": 0.87, + "learning_rate": 7.272917384244453e-06, + "loss": 1.85, + "step": 392500 + }, + { + "epoch": 0.87, + "learning_rate": 7.211410716921714e-06, + "loss": 1.8454, + "step": 393000 + }, + { + "epoch": 0.87, + "learning_rate": 7.1499040495989765e-06, + "loss": 1.8523, + "step": 393500 + }, + { + "epoch": 0.87, + "learning_rate": 7.088397382276239e-06, + "loss": 1.8479, + "step": 394000 + }, + { + "epoch": 0.87, + "learning_rate": 7.026890714953502e-06, + "loss": 1.8438, + "step": 394500 + }, + { + "epoch": 0.87, + "learning_rate": 6.965384047630763e-06, + "loss": 1.8491, + "step": 395000 + }, + { + "epoch": 0.88, + "learning_rate": 6.903877380308026e-06, + "loss": 1.8492, + "step": 395500 + }, + { + "epoch": 0.88, + "learning_rate": 6.842370712985289e-06, + "loss": 1.8506, + "step": 396000 + }, + { + "epoch": 0.88, + "learning_rate": 6.7808640456625505e-06, + "loss": 1.8511, + "step": 396500 + }, + { + "epoch": 0.88, + "learning_rate": 6.719357378339812e-06, + "loss": 1.8479, + "step": 397000 + }, + { + "epoch": 0.88, + "learning_rate": 6.657850711017074e-06, + "loss": 1.8474, + "step": 397500 + }, + { + "epoch": 0.88, + "learning_rate": 6.596344043694337e-06, + "loss": 1.8472, + "step": 398000 + }, + { + "epoch": 0.88, + "learning_rate": 6.534837376371598e-06, + "loss": 1.8536, + "step": 398500 + }, + { + "epoch": 0.88, + "learning_rate": 6.473330709048861e-06, + "loss": 1.8487, + "step": 399000 + }, + { + "epoch": 0.88, + "learning_rate": 6.411824041726124e-06, + "loss": 1.8507, + "step": 399500 + }, + { + "epoch": 0.89, + "learning_rate": 6.3503173744033864e-06, + "loss": 1.8478, + "step": 400000 + }, + { + "epoch": 0.89, + "learning_rate": 6.2888107070806475e-06, + "loss": 1.8488, + "step": 400500 + }, + { + "epoch": 0.89, + "learning_rate": 6.22730403975791e-06, + "loss": 1.8462, + "step": 401000 + }, + { + "epoch": 0.89, + "learning_rate": 6.165797372435172e-06, + "loss": 1.8501, + "step": 401500 + }, + { + "epoch": 0.89, + "learning_rate": 6.104290705112434e-06, + "loss": 1.8495, + "step": 402000 + }, + { + "epoch": 0.89, + "learning_rate": 6.042784037789697e-06, + "loss": 1.8479, + "step": 402500 + }, + { + "epoch": 0.89, + "learning_rate": 5.981277370466959e-06, + "loss": 1.8474, + "step": 403000 + }, + { + "epoch": 0.89, + "learning_rate": 5.919770703144221e-06, + "loss": 1.851, + "step": 403500 + }, + { + "epoch": 0.89, + "learning_rate": 5.8582640358214834e-06, + "loss": 1.8451, + "step": 404000 + }, + { + "epoch": 0.9, + "learning_rate": 5.796757368498745e-06, + "loss": 1.8458, + "step": 404500 + }, + { + "epoch": 0.9, + "learning_rate": 5.735250701176008e-06, + "loss": 1.8485, + "step": 405000 + }, + { + "epoch": 0.9, + "learning_rate": 5.67374403385327e-06, + "loss": 1.8494, + "step": 405500 + }, + { + "epoch": 0.9, + "learning_rate": 5.612237366530533e-06, + "loss": 1.8437, + "step": 406000 + }, + { + "epoch": 0.9, + "learning_rate": 5.550730699207794e-06, + "loss": 1.8435, + "step": 406500 + }, + { + "epoch": 0.9, + "learning_rate": 5.489224031885057e-06, + "loss": 1.8472, + "step": 407000 + }, + { + "epoch": 0.9, + "learning_rate": 5.4277173645623185e-06, + "loss": 1.8469, + "step": 407500 + }, + { + "epoch": 0.9, + "learning_rate": 5.366210697239581e-06, + "loss": 1.845, + "step": 408000 + }, + { + "epoch": 0.9, + "learning_rate": 5.304704029916843e-06, + "loss": 1.8451, + "step": 408500 + }, + { + "epoch": 0.91, + "learning_rate": 5.243197362594105e-06, + "loss": 1.85, + "step": 409000 + }, + { + "epoch": 0.91, + "learning_rate": 5.181690695271368e-06, + "loss": 1.8436, + "step": 409500 + }, + { + "epoch": 0.91, + "learning_rate": 5.12018402794863e-06, + "loss": 1.8435, + "step": 410000 + }, + { + "epoch": 0.91, + "learning_rate": 5.0586773606258925e-06, + "loss": 1.8447, + "step": 410500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9971706933031544e-06, + "loss": 1.847, + "step": 411000 + }, + { + "epoch": 0.91, + "learning_rate": 4.935664025980416e-06, + "loss": 1.8522, + "step": 411500 + }, + { + "epoch": 0.91, + "learning_rate": 4.874157358657678e-06, + "loss": 1.8474, + "step": 412000 + }, + { + "epoch": 0.91, + "learning_rate": 4.812650691334941e-06, + "loss": 1.8473, + "step": 412500 + }, + { + "epoch": 0.91, + "learning_rate": 4.751144024012203e-06, + "loss": 1.8446, + "step": 413000 + }, + { + "epoch": 0.92, + "learning_rate": 4.689637356689466e-06, + "loss": 1.8471, + "step": 413500 + }, + { + "epoch": 0.92, + "learning_rate": 4.628130689366728e-06, + "loss": 1.8472, + "step": 414000 + }, + { + "epoch": 0.92, + "learning_rate": 4.5666240220439895e-06, + "loss": 1.8456, + "step": 414500 + }, + { + "epoch": 0.92, + "learning_rate": 4.505117354721252e-06, + "loss": 1.8446, + "step": 415000 + }, + { + "epoch": 0.92, + "learning_rate": 4.443610687398514e-06, + "loss": 1.8441, + "step": 415500 + }, + { + "epoch": 0.92, + "learning_rate": 4.382104020075776e-06, + "loss": 1.8466, + "step": 416000 + }, + { + "epoch": 0.92, + "learning_rate": 4.320597352753038e-06, + "loss": 1.8424, + "step": 416500 + }, + { + "epoch": 0.92, + "learning_rate": 4.259090685430301e-06, + "loss": 1.8472, + "step": 417000 + }, + { + "epoch": 0.92, + "learning_rate": 4.197584018107563e-06, + "loss": 1.8423, + "step": 417500 + }, + { + "epoch": 0.93, + "learning_rate": 4.1360773507848255e-06, + "loss": 1.8463, + "step": 418000 + }, + { + "epoch": 0.93, + "learning_rate": 4.074570683462087e-06, + "loss": 1.8438, + "step": 418500 + }, + { + "epoch": 0.93, + "learning_rate": 4.01306401613935e-06, + "loss": 1.8399, + "step": 419000 + }, + { + "epoch": 0.93, + "learning_rate": 3.951557348816612e-06, + "loss": 1.8463, + "step": 419500 + }, + { + "epoch": 0.93, + "learning_rate": 3.890050681493874e-06, + "loss": 1.8404, + "step": 420000 + }, + { + "epoch": 0.93, + "learning_rate": 3.828544014171137e-06, + "loss": 1.8457, + "step": 420500 + }, + { + "epoch": 0.93, + "learning_rate": 3.767037346848398e-06, + "loss": 1.8451, + "step": 421000 + }, + { + "epoch": 0.93, + "learning_rate": 3.705530679525661e-06, + "loss": 1.8447, + "step": 421500 + }, + { + "epoch": 0.93, + "learning_rate": 3.644024012202923e-06, + "loss": 1.8427, + "step": 422000 + }, + { + "epoch": 0.94, + "learning_rate": 3.582517344880185e-06, + "loss": 1.8406, + "step": 422500 + }, + { + "epoch": 0.94, + "learning_rate": 3.521010677557447e-06, + "loss": 1.842, + "step": 423000 + }, + { + "epoch": 0.94, + "learning_rate": 3.45950401023471e-06, + "loss": 1.8426, + "step": 423500 + }, + { + "epoch": 0.94, + "learning_rate": 3.397997342911972e-06, + "loss": 1.8455, + "step": 424000 + }, + { + "epoch": 0.94, + "learning_rate": 3.336490675589234e-06, + "loss": 1.841, + "step": 424500 + }, + { + "epoch": 0.94, + "learning_rate": 3.274984008266496e-06, + "loss": 1.8418, + "step": 425000 + }, + { + "epoch": 0.94, + "learning_rate": 3.213477340943759e-06, + "loss": 1.8415, + "step": 425500 + }, + { + "epoch": 0.94, + "learning_rate": 3.1519706736210207e-06, + "loss": 1.8452, + "step": 426000 + }, + { + "epoch": 0.94, + "learning_rate": 3.090464006298283e-06, + "loss": 1.8472, + "step": 426500 + }, + { + "epoch": 0.95, + "learning_rate": 3.0289573389755454e-06, + "loss": 1.8403, + "step": 427000 + }, + { + "epoch": 0.95, + "learning_rate": 2.9674506716528073e-06, + "loss": 1.8406, + "step": 427500 + }, + { + "epoch": 0.95, + "learning_rate": 2.9059440043300696e-06, + "loss": 1.8425, + "step": 428000 + }, + { + "epoch": 0.95, + "learning_rate": 2.8444373370073315e-06, + "loss": 1.8417, + "step": 428500 + }, + { + "epoch": 0.95, + "learning_rate": 2.782930669684594e-06, + "loss": 1.8402, + "step": 429000 + }, + { + "epoch": 0.95, + "learning_rate": 2.7214240023618562e-06, + "loss": 1.8465, + "step": 429500 + }, + { + "epoch": 0.95, + "learning_rate": 2.659917335039118e-06, + "loss": 1.8454, + "step": 430000 + }, + { + "epoch": 0.95, + "learning_rate": 2.5984106677163805e-06, + "loss": 1.8389, + "step": 430500 + }, + { + "epoch": 0.95, + "learning_rate": 2.536904000393643e-06, + "loss": 1.8439, + "step": 431000 + }, + { + "epoch": 0.96, + "learning_rate": 2.475397333070905e-06, + "loss": 1.8451, + "step": 431500 + }, + { + "epoch": 0.96, + "learning_rate": 2.413890665748167e-06, + "loss": 1.8393, + "step": 432000 + }, + { + "epoch": 0.96, + "learning_rate": 2.3523839984254294e-06, + "loss": 1.8428, + "step": 432500 + }, + { + "epoch": 0.96, + "learning_rate": 2.2908773311026917e-06, + "loss": 1.8397, + "step": 433000 + }, + { + "epoch": 0.96, + "learning_rate": 2.229370663779954e-06, + "loss": 1.8435, + "step": 433500 + }, + { + "epoch": 0.96, + "learning_rate": 2.1678639964572164e-06, + "loss": 1.8411, + "step": 434000 + }, + { + "epoch": 0.96, + "learning_rate": 2.1063573291344783e-06, + "loss": 1.8435, + "step": 434500 + }, + { + "epoch": 0.96, + "learning_rate": 2.0448506618117402e-06, + "loss": 1.8427, + "step": 435000 + }, + { + "epoch": 0.96, + "learning_rate": 1.9833439944890026e-06, + "loss": 1.8371, + "step": 435500 + }, + { + "epoch": 0.97, + "learning_rate": 1.921837327166265e-06, + "loss": 1.8378, + "step": 436000 + }, + { + "epoch": 0.97, + "learning_rate": 1.860330659843527e-06, + "loss": 1.8403, + "step": 436500 + }, + { + "epoch": 0.97, + "learning_rate": 1.7988239925207894e-06, + "loss": 1.8427, + "step": 437000 + }, + { + "epoch": 0.97, + "learning_rate": 1.7373173251980517e-06, + "loss": 1.8414, + "step": 437500 + }, + { + "epoch": 0.97, + "learning_rate": 1.6758106578753138e-06, + "loss": 1.8371, + "step": 438000 + }, + { + "epoch": 0.97, + "learning_rate": 1.6143039905525761e-06, + "loss": 1.8388, + "step": 438500 + }, + { + "epoch": 0.97, + "learning_rate": 1.552797323229838e-06, + "loss": 1.8447, + "step": 439000 + }, + { + "epoch": 0.97, + "learning_rate": 1.4912906559071004e-06, + "loss": 1.8438, + "step": 439500 + }, + { + "epoch": 0.97, + "learning_rate": 1.4297839885843625e-06, + "loss": 1.84, + "step": 440000 + }, + { + "epoch": 0.98, + "learning_rate": 1.3682773212616249e-06, + "loss": 1.844, + "step": 440500 + }, + { + "epoch": 0.98, + "learning_rate": 1.3067706539388872e-06, + "loss": 1.8395, + "step": 441000 + }, + { + "epoch": 0.98, + "learning_rate": 1.245263986616149e-06, + "loss": 1.8408, + "step": 441500 + }, + { + "epoch": 0.98, + "learning_rate": 1.1837573192934114e-06, + "loss": 1.8426, + "step": 442000 + }, + { + "epoch": 0.98, + "learning_rate": 1.1222506519706736e-06, + "loss": 1.8412, + "step": 442500 + }, + { + "epoch": 0.98, + "learning_rate": 1.060743984647936e-06, + "loss": 1.8389, + "step": 443000 + }, + { + "epoch": 0.98, + "learning_rate": 9.992373173251982e-07, + "loss": 1.839, + "step": 443500 + }, + { + "epoch": 0.98, + "learning_rate": 9.377306500024604e-07, + "loss": 1.8411, + "step": 444000 + }, + { + "epoch": 0.98, + "learning_rate": 8.762239826797225e-07, + "loss": 1.843, + "step": 444500 + }, + { + "epoch": 0.99, + "learning_rate": 8.147173153569847e-07, + "loss": 1.84, + "step": 445000 + }, + { + "epoch": 0.99, + "learning_rate": 7.532106480342469e-07, + "loss": 1.847, + "step": 445500 + }, + { + "epoch": 0.99, + "learning_rate": 6.917039807115092e-07, + "loss": 1.8363, + "step": 446000 + }, + { + "epoch": 0.99, + "learning_rate": 6.301973133887713e-07, + "loss": 1.8402, + "step": 446500 + }, + { + "epoch": 0.99, + "learning_rate": 5.686906460660336e-07, + "loss": 1.8397, + "step": 447000 + }, + { + "epoch": 0.99, + "learning_rate": 5.071839787432959e-07, + "loss": 1.8424, + "step": 447500 + }, + { + "epoch": 0.99, + "learning_rate": 4.45677311420558e-07, + "loss": 1.8357, + "step": 448000 + }, + { + "epoch": 0.99, + "learning_rate": 3.841706440978202e-07, + "loss": 1.843, + "step": 448500 + }, + { + "epoch": 0.99, + "learning_rate": 3.2266397677508245e-07, + "loss": 1.8371, + "step": 449000 + }, + { + "epoch": 1.0, + "learning_rate": 2.6115730945234463e-07, + "loss": 1.8429, + "step": 449500 + }, + { + "epoch": 1.0, + "learning_rate": 1.9965064212960688e-07, + "loss": 1.8424, + "step": 450000 + }, + { + "epoch": 1.0, + "learning_rate": 1.381439748068691e-07, + "loss": 1.8414, + "step": 450500 + }, + { + "epoch": 1.0, + "learning_rate": 7.663730748413129e-08, + "loss": 1.8446, + "step": 451000 + }, + { + "epoch": 1.0, + "learning_rate": 1.5130640161393495e-08, + "loss": 1.8381, + "step": 451500 + }, + { + "epoch": 1.0, + "step": 451623, + "total_flos": 1.5894400168611545e+19, + "train_loss": 0.6229404193166468, + "train_runtime": 298967.217, + "train_samples_per_second": 870.111, + "train_steps_per_second": 1.511 + } + ], + "max_steps": 451623, + "num_train_epochs": 1, + "total_flos": 1.5894400168611545e+19, + "trial_name": null, + "trial_params": null +}