AngOFA / trainer_state.json
cx-olquinjica's picture
Upload 7 files
002e13a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 49.99630204866504,
"eval_steps": 500,
"global_step": 253500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"learning_rate": 4.990295857988166e-05,
"loss": 6.3355,
"step": 500
},
{
"epoch": 0.2,
"learning_rate": 4.98043392504931e-05,
"loss": 4.6153,
"step": 1000
},
{
"epoch": 0.3,
"learning_rate": 4.9705719921104535e-05,
"loss": 4.0788,
"step": 1500
},
{
"epoch": 0.39,
"learning_rate": 4.9607100591715975e-05,
"loss": 3.7655,
"step": 2000
},
{
"epoch": 0.49,
"learning_rate": 4.9508481262327415e-05,
"loss": 3.5154,
"step": 2500
},
{
"epoch": 0.59,
"learning_rate": 4.9409861932938855e-05,
"loss": 3.3402,
"step": 3000
},
{
"epoch": 0.69,
"learning_rate": 4.93112426035503e-05,
"loss": 3.1954,
"step": 3500
},
{
"epoch": 0.79,
"learning_rate": 4.921262327416174e-05,
"loss": 3.0717,
"step": 4000
},
{
"epoch": 0.89,
"learning_rate": 4.9114003944773176e-05,
"loss": 2.9541,
"step": 4500
},
{
"epoch": 0.99,
"learning_rate": 4.9015384615384616e-05,
"loss": 2.8672,
"step": 5000
},
{
"epoch": 1.08,
"learning_rate": 4.8916765285996056e-05,
"loss": 2.7782,
"step": 5500
},
{
"epoch": 1.18,
"learning_rate": 4.88181459566075e-05,
"loss": 2.7074,
"step": 6000
},
{
"epoch": 1.28,
"learning_rate": 4.871952662721894e-05,
"loss": 2.6362,
"step": 6500
},
{
"epoch": 1.38,
"learning_rate": 4.862090729783038e-05,
"loss": 2.5867,
"step": 7000
},
{
"epoch": 1.48,
"learning_rate": 4.852228796844182e-05,
"loss": 2.5227,
"step": 7500
},
{
"epoch": 1.58,
"learning_rate": 4.842366863905326e-05,
"loss": 2.4813,
"step": 8000
},
{
"epoch": 1.68,
"learning_rate": 4.832504930966469e-05,
"loss": 2.4355,
"step": 8500
},
{
"epoch": 1.78,
"learning_rate": 4.822642998027614e-05,
"loss": 2.3717,
"step": 9000
},
{
"epoch": 1.87,
"learning_rate": 4.812781065088758e-05,
"loss": 2.345,
"step": 9500
},
{
"epoch": 1.97,
"learning_rate": 4.802919132149902e-05,
"loss": 2.3076,
"step": 10000
},
{
"epoch": 2.07,
"learning_rate": 4.793057199211046e-05,
"loss": 2.2789,
"step": 10500
},
{
"epoch": 2.17,
"learning_rate": 4.78319526627219e-05,
"loss": 2.2335,
"step": 11000
},
{
"epoch": 2.27,
"learning_rate": 4.773333333333333e-05,
"loss": 2.2165,
"step": 11500
},
{
"epoch": 2.37,
"learning_rate": 4.763471400394477e-05,
"loss": 2.185,
"step": 12000
},
{
"epoch": 2.47,
"learning_rate": 4.753609467455621e-05,
"loss": 2.1461,
"step": 12500
},
{
"epoch": 2.56,
"learning_rate": 4.743747534516765e-05,
"loss": 2.1114,
"step": 13000
},
{
"epoch": 2.66,
"learning_rate": 4.7338856015779094e-05,
"loss": 2.0961,
"step": 13500
},
{
"epoch": 2.76,
"learning_rate": 4.7240236686390534e-05,
"loss": 2.0726,
"step": 14000
},
{
"epoch": 2.86,
"learning_rate": 4.714181459566075e-05,
"loss": 2.0446,
"step": 14500
},
{
"epoch": 2.96,
"learning_rate": 4.70431952662722e-05,
"loss": 2.0268,
"step": 15000
},
{
"epoch": 3.06,
"learning_rate": 4.6944773175542406e-05,
"loss": 2.0076,
"step": 15500
},
{
"epoch": 3.16,
"learning_rate": 4.684615384615385e-05,
"loss": 1.9788,
"step": 16000
},
{
"epoch": 3.25,
"learning_rate": 4.674753451676529e-05,
"loss": 1.9637,
"step": 16500
},
{
"epoch": 3.35,
"learning_rate": 4.6648915187376726e-05,
"loss": 1.9425,
"step": 17000
},
{
"epoch": 3.45,
"learning_rate": 4.655029585798817e-05,
"loss": 1.92,
"step": 17500
},
{
"epoch": 3.55,
"learning_rate": 4.645167652859961e-05,
"loss": 1.9069,
"step": 18000
},
{
"epoch": 3.65,
"learning_rate": 4.635305719921105e-05,
"loss": 1.8903,
"step": 18500
},
{
"epoch": 3.75,
"learning_rate": 4.625443786982249e-05,
"loss": 1.8661,
"step": 19000
},
{
"epoch": 3.85,
"learning_rate": 4.61560157790927e-05,
"loss": 1.8637,
"step": 19500
},
{
"epoch": 3.94,
"learning_rate": 4.6057396449704143e-05,
"loss": 1.8444,
"step": 20000
},
{
"epoch": 4.04,
"learning_rate": 4.5958777120315584e-05,
"loss": 1.8321,
"step": 20500
},
{
"epoch": 4.14,
"learning_rate": 4.5860157790927024e-05,
"loss": 1.8032,
"step": 21000
},
{
"epoch": 4.24,
"learning_rate": 4.5761538461538464e-05,
"loss": 1.7901,
"step": 21500
},
{
"epoch": 4.34,
"learning_rate": 4.5662919132149904e-05,
"loss": 1.7781,
"step": 22000
},
{
"epoch": 4.44,
"learning_rate": 4.556449704142012e-05,
"loss": 1.7769,
"step": 22500
},
{
"epoch": 4.54,
"learning_rate": 4.546587771203156e-05,
"loss": 1.7731,
"step": 23000
},
{
"epoch": 4.63,
"learning_rate": 4.5367258382643e-05,
"loss": 1.7503,
"step": 23500
},
{
"epoch": 4.73,
"learning_rate": 4.526863905325444e-05,
"loss": 1.7306,
"step": 24000
},
{
"epoch": 4.83,
"learning_rate": 4.517021696252466e-05,
"loss": 1.729,
"step": 24500
},
{
"epoch": 4.93,
"learning_rate": 4.50715976331361e-05,
"loss": 1.7238,
"step": 25000
},
{
"epoch": 5.03,
"learning_rate": 4.497297830374754e-05,
"loss": 1.7157,
"step": 25500
},
{
"epoch": 5.13,
"learning_rate": 4.487435897435898e-05,
"loss": 1.6753,
"step": 26000
},
{
"epoch": 5.23,
"learning_rate": 4.477573964497042e-05,
"loss": 1.6714,
"step": 26500
},
{
"epoch": 5.33,
"learning_rate": 4.4677317554240634e-05,
"loss": 1.6588,
"step": 27000
},
{
"epoch": 5.42,
"learning_rate": 4.4578698224852074e-05,
"loss": 1.6559,
"step": 27500
},
{
"epoch": 5.52,
"learning_rate": 4.4480078895463514e-05,
"loss": 1.6571,
"step": 28000
},
{
"epoch": 5.62,
"learning_rate": 4.4381459566074954e-05,
"loss": 1.6449,
"step": 28500
},
{
"epoch": 5.72,
"learning_rate": 4.428284023668639e-05,
"loss": 1.6439,
"step": 29000
},
{
"epoch": 5.82,
"learning_rate": 4.4184220907297835e-05,
"loss": 1.6303,
"step": 29500
},
{
"epoch": 5.92,
"learning_rate": 4.4085798816568044e-05,
"loss": 1.6159,
"step": 30000
},
{
"epoch": 6.02,
"learning_rate": 4.398717948717949e-05,
"loss": 1.6218,
"step": 30500
},
{
"epoch": 6.11,
"learning_rate": 4.3888757396449707e-05,
"loss": 1.5967,
"step": 31000
},
{
"epoch": 6.21,
"learning_rate": 4.379013806706115e-05,
"loss": 1.5843,
"step": 31500
},
{
"epoch": 6.31,
"learning_rate": 4.369151873767259e-05,
"loss": 1.581,
"step": 32000
},
{
"epoch": 6.41,
"learning_rate": 4.359289940828403e-05,
"loss": 1.5737,
"step": 32500
},
{
"epoch": 6.51,
"learning_rate": 4.349428007889547e-05,
"loss": 1.5591,
"step": 33000
},
{
"epoch": 6.61,
"learning_rate": 4.339566074950691e-05,
"loss": 1.5654,
"step": 33500
},
{
"epoch": 6.71,
"learning_rate": 4.3297238658777124e-05,
"loss": 1.5536,
"step": 34000
},
{
"epoch": 6.8,
"learning_rate": 4.3198619329388564e-05,
"loss": 1.5458,
"step": 34500
},
{
"epoch": 6.9,
"learning_rate": 4.3100000000000004e-05,
"loss": 1.5353,
"step": 35000
},
{
"epoch": 7.0,
"learning_rate": 4.300138067061144e-05,
"loss": 1.5321,
"step": 35500
},
{
"epoch": 7.1,
"learning_rate": 4.290276134122288e-05,
"loss": 1.5217,
"step": 36000
},
{
"epoch": 7.2,
"learning_rate": 4.280414201183432e-05,
"loss": 1.5073,
"step": 36500
},
{
"epoch": 7.3,
"learning_rate": 4.270552268244576e-05,
"loss": 1.5073,
"step": 37000
},
{
"epoch": 7.4,
"learning_rate": 4.2606903353057205e-05,
"loss": 1.4939,
"step": 37500
},
{
"epoch": 7.49,
"learning_rate": 4.2508284023668646e-05,
"loss": 1.4914,
"step": 38000
},
{
"epoch": 7.59,
"learning_rate": 4.2409664694280086e-05,
"loss": 1.489,
"step": 38500
},
{
"epoch": 7.69,
"learning_rate": 4.23112426035503e-05,
"loss": 1.4797,
"step": 39000
},
{
"epoch": 7.79,
"learning_rate": 4.221262327416174e-05,
"loss": 1.4836,
"step": 39500
},
{
"epoch": 7.89,
"learning_rate": 4.2114003944773175e-05,
"loss": 1.4689,
"step": 40000
},
{
"epoch": 7.99,
"learning_rate": 4.20155818540434e-05,
"loss": 1.4639,
"step": 40500
},
{
"epoch": 8.09,
"learning_rate": 4.191696252465484e-05,
"loss": 1.4526,
"step": 41000
},
{
"epoch": 8.18,
"learning_rate": 4.181834319526627e-05,
"loss": 1.4486,
"step": 41500
},
{
"epoch": 8.28,
"learning_rate": 4.171972386587771e-05,
"loss": 1.4381,
"step": 42000
},
{
"epoch": 8.38,
"learning_rate": 4.162110453648915e-05,
"loss": 1.4321,
"step": 42500
},
{
"epoch": 8.48,
"learning_rate": 4.152248520710059e-05,
"loss": 1.4366,
"step": 43000
},
{
"epoch": 8.58,
"learning_rate": 4.142386587771203e-05,
"loss": 1.4324,
"step": 43500
},
{
"epoch": 8.68,
"learning_rate": 4.132524654832347e-05,
"loss": 1.4236,
"step": 44000
},
{
"epoch": 8.78,
"learning_rate": 4.122662721893491e-05,
"loss": 1.4231,
"step": 44500
},
{
"epoch": 8.88,
"learning_rate": 4.112800788954635e-05,
"loss": 1.416,
"step": 45000
},
{
"epoch": 8.97,
"learning_rate": 4.102958579881657e-05,
"loss": 1.4205,
"step": 45500
},
{
"epoch": 9.07,
"learning_rate": 4.093096646942801e-05,
"loss": 1.3942,
"step": 46000
},
{
"epoch": 9.17,
"learning_rate": 4.083234714003945e-05,
"loss": 1.402,
"step": 46500
},
{
"epoch": 9.27,
"learning_rate": 4.073372781065089e-05,
"loss": 1.3927,
"step": 47000
},
{
"epoch": 9.37,
"learning_rate": 4.063510848126233e-05,
"loss": 1.3817,
"step": 47500
},
{
"epoch": 9.47,
"learning_rate": 4.053648915187377e-05,
"loss": 1.3762,
"step": 48000
},
{
"epoch": 9.57,
"learning_rate": 4.043786982248521e-05,
"loss": 1.3781,
"step": 48500
},
{
"epoch": 9.66,
"learning_rate": 4.0339447731755426e-05,
"loss": 1.3754,
"step": 49000
},
{
"epoch": 9.76,
"learning_rate": 4.0240828402366867e-05,
"loss": 1.3764,
"step": 49500
},
{
"epoch": 9.86,
"learning_rate": 4.014240631163708e-05,
"loss": 1.3621,
"step": 50000
},
{
"epoch": 9.96,
"learning_rate": 4.004378698224852e-05,
"loss": 1.3692,
"step": 50500
},
{
"epoch": 10.06,
"learning_rate": 3.994516765285996e-05,
"loss": 1.3485,
"step": 51000
},
{
"epoch": 10.16,
"learning_rate": 3.98465483234714e-05,
"loss": 1.3479,
"step": 51500
},
{
"epoch": 10.26,
"learning_rate": 3.974792899408284e-05,
"loss": 1.343,
"step": 52000
},
{
"epoch": 10.35,
"learning_rate": 3.9649309664694284e-05,
"loss": 1.3521,
"step": 52500
},
{
"epoch": 10.45,
"learning_rate": 3.9550690335305724e-05,
"loss": 1.336,
"step": 53000
},
{
"epoch": 10.55,
"learning_rate": 3.9452071005917164e-05,
"loss": 1.3361,
"step": 53500
},
{
"epoch": 10.65,
"learning_rate": 3.9353451676528604e-05,
"loss": 1.3395,
"step": 54000
},
{
"epoch": 10.75,
"learning_rate": 3.9254832347140045e-05,
"loss": 1.3308,
"step": 54500
},
{
"epoch": 10.85,
"learning_rate": 3.915621301775148e-05,
"loss": 1.3235,
"step": 55000
},
{
"epoch": 10.95,
"learning_rate": 3.90577909270217e-05,
"loss": 1.3222,
"step": 55500
},
{
"epoch": 11.04,
"learning_rate": 3.8959171597633134e-05,
"loss": 1.322,
"step": 56000
},
{
"epoch": 11.14,
"learning_rate": 3.8860552268244574e-05,
"loss": 1.3015,
"step": 56500
},
{
"epoch": 11.24,
"learning_rate": 3.8761932938856015e-05,
"loss": 1.3081,
"step": 57000
},
{
"epoch": 11.34,
"learning_rate": 3.8663313609467455e-05,
"loss": 1.3034,
"step": 57500
},
{
"epoch": 11.44,
"learning_rate": 3.8564694280078895e-05,
"loss": 1.2988,
"step": 58000
},
{
"epoch": 11.54,
"learning_rate": 3.846607495069034e-05,
"loss": 1.287,
"step": 58500
},
{
"epoch": 11.64,
"learning_rate": 3.836765285996056e-05,
"loss": 1.2945,
"step": 59000
},
{
"epoch": 11.73,
"learning_rate": 3.8269033530572e-05,
"loss": 1.2901,
"step": 59500
},
{
"epoch": 11.83,
"learning_rate": 3.817041420118344e-05,
"loss": 1.2921,
"step": 60000
},
{
"epoch": 11.93,
"learning_rate": 3.8071992110453654e-05,
"loss": 1.2868,
"step": 60500
},
{
"epoch": 12.03,
"learning_rate": 3.7973372781065094e-05,
"loss": 1.2843,
"step": 61000
},
{
"epoch": 12.13,
"learning_rate": 3.787475345167653e-05,
"loss": 1.2763,
"step": 61500
},
{
"epoch": 12.23,
"learning_rate": 3.777613412228797e-05,
"loss": 1.2681,
"step": 62000
},
{
"epoch": 12.33,
"learning_rate": 3.767751479289941e-05,
"loss": 1.2677,
"step": 62500
},
{
"epoch": 12.43,
"learning_rate": 3.757889546351085e-05,
"loss": 1.2675,
"step": 63000
},
{
"epoch": 12.52,
"learning_rate": 3.7480473372781064e-05,
"loss": 1.2605,
"step": 63500
},
{
"epoch": 12.62,
"learning_rate": 3.7381854043392505e-05,
"loss": 1.262,
"step": 64000
},
{
"epoch": 12.72,
"learning_rate": 3.7283234714003945e-05,
"loss": 1.2483,
"step": 64500
},
{
"epoch": 12.82,
"learning_rate": 3.7184615384615385e-05,
"loss": 1.2615,
"step": 65000
},
{
"epoch": 12.92,
"learning_rate": 3.7085996055226825e-05,
"loss": 1.2499,
"step": 65500
},
{
"epoch": 13.02,
"learning_rate": 3.6987376725838266e-05,
"loss": 1.2475,
"step": 66000
},
{
"epoch": 13.12,
"learning_rate": 3.6888757396449706e-05,
"loss": 1.2385,
"step": 66500
},
{
"epoch": 13.21,
"learning_rate": 3.6790138067061146e-05,
"loss": 1.2381,
"step": 67000
},
{
"epoch": 13.31,
"learning_rate": 3.6691518737672586e-05,
"loss": 1.2394,
"step": 67500
},
{
"epoch": 13.41,
"learning_rate": 3.65930966469428e-05,
"loss": 1.2345,
"step": 68000
},
{
"epoch": 13.51,
"learning_rate": 3.649447731755424e-05,
"loss": 1.2348,
"step": 68500
},
{
"epoch": 13.61,
"learning_rate": 3.639585798816568e-05,
"loss": 1.2237,
"step": 69000
},
{
"epoch": 13.71,
"learning_rate": 3.62974358974359e-05,
"loss": 1.2346,
"step": 69500
},
{
"epoch": 13.81,
"learning_rate": 3.619881656804734e-05,
"loss": 1.2205,
"step": 70000
},
{
"epoch": 13.9,
"learning_rate": 3.610019723865878e-05,
"loss": 1.2187,
"step": 70500
},
{
"epoch": 14.0,
"learning_rate": 3.600157790927022e-05,
"loss": 1.222,
"step": 71000
},
{
"epoch": 14.1,
"learning_rate": 3.590295857988166e-05,
"loss": 1.2185,
"step": 71500
},
{
"epoch": 14.2,
"learning_rate": 3.58043392504931e-05,
"loss": 1.2069,
"step": 72000
},
{
"epoch": 14.3,
"learning_rate": 3.570571992110453e-05,
"loss": 1.2022,
"step": 72500
},
{
"epoch": 14.4,
"learning_rate": 3.5607297830374756e-05,
"loss": 1.2077,
"step": 73000
},
{
"epoch": 14.5,
"learning_rate": 3.5508678500986196e-05,
"loss": 1.2086,
"step": 73500
},
{
"epoch": 14.59,
"learning_rate": 3.5410059171597636e-05,
"loss": 1.2018,
"step": 74000
},
{
"epoch": 14.69,
"learning_rate": 3.5311439842209076e-05,
"loss": 1.1921,
"step": 74500
},
{
"epoch": 14.79,
"learning_rate": 3.521282051282052e-05,
"loss": 1.1921,
"step": 75000
},
{
"epoch": 14.89,
"learning_rate": 3.511420118343196e-05,
"loss": 1.195,
"step": 75500
},
{
"epoch": 14.99,
"learning_rate": 3.50155818540434e-05,
"loss": 1.1917,
"step": 76000
},
{
"epoch": 15.09,
"learning_rate": 3.491715976331361e-05,
"loss": 1.1868,
"step": 76500
},
{
"epoch": 15.19,
"learning_rate": 3.481854043392505e-05,
"loss": 1.188,
"step": 77000
},
{
"epoch": 15.28,
"learning_rate": 3.4719921104536493e-05,
"loss": 1.1823,
"step": 77500
},
{
"epoch": 15.38,
"learning_rate": 3.462130177514793e-05,
"loss": 1.1842,
"step": 78000
},
{
"epoch": 15.48,
"learning_rate": 3.452268244575937e-05,
"loss": 1.1753,
"step": 78500
},
{
"epoch": 15.58,
"learning_rate": 3.442406311637081e-05,
"loss": 1.1706,
"step": 79000
},
{
"epoch": 15.68,
"learning_rate": 3.432544378698225e-05,
"loss": 1.176,
"step": 79500
},
{
"epoch": 15.78,
"learning_rate": 3.4226824457593695e-05,
"loss": 1.1707,
"step": 80000
},
{
"epoch": 15.88,
"learning_rate": 3.4128205128205135e-05,
"loss": 1.1627,
"step": 80500
},
{
"epoch": 15.98,
"learning_rate": 3.402978303747535e-05,
"loss": 1.166,
"step": 81000
},
{
"epoch": 16.07,
"learning_rate": 3.393116370808679e-05,
"loss": 1.1525,
"step": 81500
},
{
"epoch": 16.17,
"learning_rate": 3.3832544378698224e-05,
"loss": 1.1591,
"step": 82000
},
{
"epoch": 16.27,
"learning_rate": 3.3733925049309665e-05,
"loss": 1.159,
"step": 82500
},
{
"epoch": 16.37,
"learning_rate": 3.3635305719921105e-05,
"loss": 1.1587,
"step": 83000
},
{
"epoch": 16.47,
"learning_rate": 3.353688362919132e-05,
"loss": 1.156,
"step": 83500
},
{
"epoch": 16.57,
"learning_rate": 3.343826429980276e-05,
"loss": 1.1462,
"step": 84000
},
{
"epoch": 16.67,
"learning_rate": 3.33396449704142e-05,
"loss": 1.1573,
"step": 84500
},
{
"epoch": 16.76,
"learning_rate": 3.324102564102564e-05,
"loss": 1.1403,
"step": 85000
},
{
"epoch": 16.86,
"learning_rate": 3.314260355029586e-05,
"loss": 1.1419,
"step": 85500
},
{
"epoch": 16.96,
"learning_rate": 3.30439842209073e-05,
"loss": 1.1434,
"step": 86000
},
{
"epoch": 17.06,
"learning_rate": 3.294536489151874e-05,
"loss": 1.1452,
"step": 86500
},
{
"epoch": 17.16,
"learning_rate": 3.2846942800788954e-05,
"loss": 1.128,
"step": 87000
},
{
"epoch": 17.26,
"learning_rate": 3.2748323471400394e-05,
"loss": 1.1396,
"step": 87500
},
{
"epoch": 17.36,
"learning_rate": 3.2649704142011834e-05,
"loss": 1.13,
"step": 88000
},
{
"epoch": 17.45,
"learning_rate": 3.255108481262328e-05,
"loss": 1.1303,
"step": 88500
},
{
"epoch": 17.55,
"learning_rate": 3.2452465483234715e-05,
"loss": 1.1292,
"step": 89000
},
{
"epoch": 17.65,
"learning_rate": 3.2353846153846155e-05,
"loss": 1.1313,
"step": 89500
},
{
"epoch": 17.75,
"learning_rate": 3.2255226824457595e-05,
"loss": 1.1294,
"step": 90000
},
{
"epoch": 17.85,
"learning_rate": 3.2156607495069035e-05,
"loss": 1.1237,
"step": 90500
},
{
"epoch": 17.95,
"learning_rate": 3.2057988165680475e-05,
"loss": 1.1252,
"step": 91000
},
{
"epoch": 18.05,
"learning_rate": 3.1959368836291916e-05,
"loss": 1.1231,
"step": 91500
},
{
"epoch": 18.14,
"learning_rate": 3.1860749506903356e-05,
"loss": 1.1169,
"step": 92000
},
{
"epoch": 18.24,
"learning_rate": 3.176232741617357e-05,
"loss": 1.1104,
"step": 92500
},
{
"epoch": 18.34,
"learning_rate": 3.166370808678501e-05,
"loss": 1.1108,
"step": 93000
},
{
"epoch": 18.44,
"learning_rate": 3.156508875739645e-05,
"loss": 1.1034,
"step": 93500
},
{
"epoch": 18.54,
"learning_rate": 3.1466469428007886e-05,
"loss": 1.1067,
"step": 94000
},
{
"epoch": 18.64,
"learning_rate": 3.136785009861933e-05,
"loss": 1.1096,
"step": 94500
},
{
"epoch": 18.74,
"learning_rate": 3.126923076923077e-05,
"loss": 1.103,
"step": 95000
},
{
"epoch": 18.83,
"learning_rate": 3.117080867850099e-05,
"loss": 1.1128,
"step": 95500
},
{
"epoch": 18.93,
"learning_rate": 3.107218934911243e-05,
"loss": 1.0998,
"step": 96000
},
{
"epoch": 19.03,
"learning_rate": 3.097357001972387e-05,
"loss": 1.0949,
"step": 96500
},
{
"epoch": 19.13,
"learning_rate": 3.087495069033531e-05,
"loss": 1.0935,
"step": 97000
},
{
"epoch": 19.23,
"learning_rate": 3.077633136094675e-05,
"loss": 1.1058,
"step": 97500
},
{
"epoch": 19.33,
"learning_rate": 3.067771203155819e-05,
"loss": 1.0914,
"step": 98000
},
{
"epoch": 19.43,
"learning_rate": 3.0579092702169623e-05,
"loss": 1.0907,
"step": 98500
},
{
"epoch": 19.53,
"learning_rate": 3.0480473372781067e-05,
"loss": 1.0849,
"step": 99000
},
{
"epoch": 19.62,
"learning_rate": 3.0382248520710062e-05,
"loss": 1.0866,
"step": 99500
},
{
"epoch": 19.72,
"learning_rate": 3.02836291913215e-05,
"loss": 1.0957,
"step": 100000
},
{
"epoch": 19.82,
"learning_rate": 3.018500986193294e-05,
"loss": 1.0817,
"step": 100500
},
{
"epoch": 19.92,
"learning_rate": 3.008639053254438e-05,
"loss": 1.0924,
"step": 101000
},
{
"epoch": 20.02,
"learning_rate": 2.998777120315582e-05,
"loss": 1.0742,
"step": 101500
},
{
"epoch": 20.12,
"learning_rate": 2.9889151873767256e-05,
"loss": 1.0749,
"step": 102000
},
{
"epoch": 20.22,
"learning_rate": 2.9790729783037475e-05,
"loss": 1.0764,
"step": 102500
},
{
"epoch": 20.31,
"learning_rate": 2.969211045364892e-05,
"loss": 1.0746,
"step": 103000
},
{
"epoch": 20.41,
"learning_rate": 2.959349112426036e-05,
"loss": 1.073,
"step": 103500
},
{
"epoch": 20.51,
"learning_rate": 2.9494871794871796e-05,
"loss": 1.0738,
"step": 104000
},
{
"epoch": 20.61,
"learning_rate": 2.9396252465483236e-05,
"loss": 1.0642,
"step": 104500
},
{
"epoch": 20.71,
"learning_rate": 2.9297633136094677e-05,
"loss": 1.0707,
"step": 105000
},
{
"epoch": 20.81,
"learning_rate": 2.9199013806706117e-05,
"loss": 1.0765,
"step": 105500
},
{
"epoch": 20.91,
"learning_rate": 2.9100394477317554e-05,
"loss": 1.0729,
"step": 106000
},
{
"epoch": 21.0,
"learning_rate": 2.9001775147928994e-05,
"loss": 1.0632,
"step": 106500
},
{
"epoch": 21.1,
"learning_rate": 2.890355029585799e-05,
"loss": 1.0581,
"step": 107000
},
{
"epoch": 21.2,
"learning_rate": 2.880493096646943e-05,
"loss": 1.058,
"step": 107500
},
{
"epoch": 21.3,
"learning_rate": 2.870631163708087e-05,
"loss": 1.0627,
"step": 108000
},
{
"epoch": 21.4,
"learning_rate": 2.8607692307692306e-05,
"loss": 1.0581,
"step": 108500
},
{
"epoch": 21.5,
"learning_rate": 2.8509270216962525e-05,
"loss": 1.0568,
"step": 109000
},
{
"epoch": 21.6,
"learning_rate": 2.8410650887573966e-05,
"loss": 1.05,
"step": 109500
},
{
"epoch": 21.69,
"learning_rate": 2.8312031558185402e-05,
"loss": 1.0602,
"step": 110000
},
{
"epoch": 21.79,
"learning_rate": 2.8213412228796843e-05,
"loss": 1.0533,
"step": 110500
},
{
"epoch": 21.89,
"learning_rate": 2.8114792899408286e-05,
"loss": 1.0573,
"step": 111000
},
{
"epoch": 21.99,
"learning_rate": 2.8016370808678506e-05,
"loss": 1.0523,
"step": 111500
},
{
"epoch": 22.09,
"learning_rate": 2.7917751479289946e-05,
"loss": 1.0414,
"step": 112000
},
{
"epoch": 22.19,
"learning_rate": 2.7819132149901383e-05,
"loss": 1.0416,
"step": 112500
},
{
"epoch": 22.29,
"learning_rate": 2.7720512820512823e-05,
"loss": 1.042,
"step": 113000
},
{
"epoch": 22.38,
"learning_rate": 2.7621893491124263e-05,
"loss": 1.0446,
"step": 113500
},
{
"epoch": 22.48,
"learning_rate": 2.752347140039448e-05,
"loss": 1.0371,
"step": 114000
},
{
"epoch": 22.58,
"learning_rate": 2.742485207100592e-05,
"loss": 1.0334,
"step": 114500
},
{
"epoch": 22.68,
"learning_rate": 2.732623274161736e-05,
"loss": 1.0409,
"step": 115000
},
{
"epoch": 22.78,
"learning_rate": 2.7227810650887575e-05,
"loss": 1.043,
"step": 115500
},
{
"epoch": 22.88,
"learning_rate": 2.7129191321499015e-05,
"loss": 1.0459,
"step": 116000
},
{
"epoch": 22.98,
"learning_rate": 2.7030571992110452e-05,
"loss": 1.0381,
"step": 116500
},
{
"epoch": 23.08,
"learning_rate": 2.6931952662721893e-05,
"loss": 1.0289,
"step": 117000
},
{
"epoch": 23.17,
"learning_rate": 2.6833333333333333e-05,
"loss": 1.0328,
"step": 117500
},
{
"epoch": 23.27,
"learning_rate": 2.6734714003944773e-05,
"loss": 1.0265,
"step": 118000
},
{
"epoch": 23.37,
"learning_rate": 2.6636094674556217e-05,
"loss": 1.0294,
"step": 118500
},
{
"epoch": 23.47,
"learning_rate": 2.6537475345167657e-05,
"loss": 1.0243,
"step": 119000
},
{
"epoch": 23.57,
"learning_rate": 2.6438856015779094e-05,
"loss": 1.0317,
"step": 119500
},
{
"epoch": 23.67,
"learning_rate": 2.6340236686390534e-05,
"loss": 1.0321,
"step": 120000
},
{
"epoch": 23.77,
"learning_rate": 2.6241617357001974e-05,
"loss": 1.0258,
"step": 120500
},
{
"epoch": 23.86,
"learning_rate": 2.614319526627219e-05,
"loss": 1.0245,
"step": 121000
},
{
"epoch": 23.96,
"learning_rate": 2.604457593688363e-05,
"loss": 1.0255,
"step": 121500
},
{
"epoch": 24.06,
"learning_rate": 2.594615384615385e-05,
"loss": 1.0241,
"step": 122000
},
{
"epoch": 24.16,
"learning_rate": 2.5847534516765286e-05,
"loss": 1.0119,
"step": 122500
},
{
"epoch": 24.26,
"learning_rate": 2.5748915187376727e-05,
"loss": 1.0134,
"step": 123000
},
{
"epoch": 24.36,
"learning_rate": 2.5650295857988167e-05,
"loss": 1.0086,
"step": 123500
},
{
"epoch": 24.46,
"learning_rate": 2.5551676528599604e-05,
"loss": 1.0143,
"step": 124000
},
{
"epoch": 24.55,
"learning_rate": 2.5453057199211044e-05,
"loss": 1.018,
"step": 124500
},
{
"epoch": 24.65,
"learning_rate": 2.5354437869822484e-05,
"loss": 1.0141,
"step": 125000
},
{
"epoch": 24.75,
"learning_rate": 2.5255818540433928e-05,
"loss": 1.0027,
"step": 125500
},
{
"epoch": 24.85,
"learning_rate": 2.515759368836292e-05,
"loss": 1.0171,
"step": 126000
},
{
"epoch": 24.95,
"learning_rate": 2.5058974358974356e-05,
"loss": 1.0162,
"step": 126500
},
{
"epoch": 25.05,
"learning_rate": 2.49603550295858e-05,
"loss": 1.0012,
"step": 127000
},
{
"epoch": 25.15,
"learning_rate": 2.486173570019724e-05,
"loss": 0.9968,
"step": 127500
},
{
"epoch": 25.24,
"learning_rate": 2.4763116370808677e-05,
"loss": 1.0019,
"step": 128000
},
{
"epoch": 25.34,
"learning_rate": 2.466449704142012e-05,
"loss": 0.9994,
"step": 128500
},
{
"epoch": 25.44,
"learning_rate": 2.456587771203156e-05,
"loss": 1.0005,
"step": 129000
},
{
"epoch": 25.54,
"learning_rate": 2.4467258382642997e-05,
"loss": 1.0004,
"step": 129500
},
{
"epoch": 25.64,
"learning_rate": 2.4368639053254438e-05,
"loss": 1.0023,
"step": 130000
},
{
"epoch": 25.74,
"learning_rate": 2.4270019723865878e-05,
"loss": 0.9932,
"step": 130500
},
{
"epoch": 25.84,
"learning_rate": 2.4171400394477318e-05,
"loss": 0.9891,
"step": 131000
},
{
"epoch": 25.93,
"learning_rate": 2.4072978303747537e-05,
"loss": 1.0018,
"step": 131500
},
{
"epoch": 26.03,
"learning_rate": 2.3974358974358978e-05,
"loss": 0.9974,
"step": 132000
},
{
"epoch": 26.13,
"learning_rate": 2.3875739644970414e-05,
"loss": 0.9823,
"step": 132500
},
{
"epoch": 26.23,
"learning_rate": 2.3777317554240634e-05,
"loss": 0.989,
"step": 133000
},
{
"epoch": 26.33,
"learning_rate": 2.367889546351085e-05,
"loss": 0.9932,
"step": 133500
},
{
"epoch": 26.43,
"learning_rate": 2.358027613412229e-05,
"loss": 0.9848,
"step": 134000
},
{
"epoch": 26.53,
"learning_rate": 2.348165680473373e-05,
"loss": 0.9874,
"step": 134500
},
{
"epoch": 26.63,
"learning_rate": 2.3383037475345167e-05,
"loss": 0.9876,
"step": 135000
},
{
"epoch": 26.72,
"learning_rate": 2.3284418145956607e-05,
"loss": 0.9832,
"step": 135500
},
{
"epoch": 26.82,
"learning_rate": 2.318579881656805e-05,
"loss": 0.9902,
"step": 136000
},
{
"epoch": 26.92,
"learning_rate": 2.3087179487179488e-05,
"loss": 0.9808,
"step": 136500
},
{
"epoch": 27.02,
"learning_rate": 2.2988560157790928e-05,
"loss": 0.9789,
"step": 137000
},
{
"epoch": 27.12,
"learning_rate": 2.2890138067061147e-05,
"loss": 0.9742,
"step": 137500
},
{
"epoch": 27.22,
"learning_rate": 2.2791518737672584e-05,
"loss": 0.9733,
"step": 138000
},
{
"epoch": 27.32,
"learning_rate": 2.2692899408284024e-05,
"loss": 0.9864,
"step": 138500
},
{
"epoch": 27.41,
"learning_rate": 2.2594280078895464e-05,
"loss": 0.9769,
"step": 139000
},
{
"epoch": 27.51,
"learning_rate": 2.2495660749506905e-05,
"loss": 0.9751,
"step": 139500
},
{
"epoch": 27.61,
"learning_rate": 2.2397041420118345e-05,
"loss": 0.9776,
"step": 140000
},
{
"epoch": 27.71,
"learning_rate": 2.2298422090729785e-05,
"loss": 0.9781,
"step": 140500
},
{
"epoch": 27.81,
"learning_rate": 2.2199802761341222e-05,
"loss": 0.9687,
"step": 141000
},
{
"epoch": 27.91,
"learning_rate": 2.210138067061144e-05,
"loss": 0.9738,
"step": 141500
},
{
"epoch": 28.01,
"learning_rate": 2.200276134122288e-05,
"loss": 0.9686,
"step": 142000
},
{
"epoch": 28.1,
"learning_rate": 2.1904339250493097e-05,
"loss": 0.9642,
"step": 142500
},
{
"epoch": 28.2,
"learning_rate": 2.1805719921104537e-05,
"loss": 0.9571,
"step": 143000
},
{
"epoch": 28.3,
"learning_rate": 2.1707100591715978e-05,
"loss": 0.9686,
"step": 143500
},
{
"epoch": 28.4,
"learning_rate": 2.1608481262327418e-05,
"loss": 0.9633,
"step": 144000
},
{
"epoch": 28.5,
"learning_rate": 2.1509861932938858e-05,
"loss": 0.9627,
"step": 144500
},
{
"epoch": 28.6,
"learning_rate": 2.1411242603550295e-05,
"loss": 0.9615,
"step": 145000
},
{
"epoch": 28.7,
"learning_rate": 2.1312623274161735e-05,
"loss": 0.9633,
"step": 145500
},
{
"epoch": 28.79,
"learning_rate": 2.1214003944773175e-05,
"loss": 0.9609,
"step": 146000
},
{
"epoch": 28.89,
"learning_rate": 2.111538461538462e-05,
"loss": 0.962,
"step": 146500
},
{
"epoch": 28.99,
"learning_rate": 2.1016765285996056e-05,
"loss": 0.9561,
"step": 147000
},
{
"epoch": 29.09,
"learning_rate": 2.0918145956607496e-05,
"loss": 0.9611,
"step": 147500
},
{
"epoch": 29.19,
"learning_rate": 2.0819526627218936e-05,
"loss": 0.9502,
"step": 148000
},
{
"epoch": 29.29,
"learning_rate": 2.0720907297830373e-05,
"loss": 0.9651,
"step": 148500
},
{
"epoch": 29.39,
"learning_rate": 2.062268244575937e-05,
"loss": 0.9502,
"step": 149000
},
{
"epoch": 29.48,
"learning_rate": 2.052406311637081e-05,
"loss": 0.9558,
"step": 149500
},
{
"epoch": 29.58,
"learning_rate": 2.042544378698225e-05,
"loss": 0.9614,
"step": 150000
},
{
"epoch": 29.68,
"learning_rate": 2.0326824457593692e-05,
"loss": 0.9507,
"step": 150500
},
{
"epoch": 29.78,
"learning_rate": 2.0228402366863905e-05,
"loss": 0.9465,
"step": 151000
},
{
"epoch": 29.88,
"learning_rate": 2.0129783037475348e-05,
"loss": 0.9483,
"step": 151500
},
{
"epoch": 29.98,
"learning_rate": 2.0031163708086785e-05,
"loss": 0.9488,
"step": 152000
},
{
"epoch": 30.08,
"learning_rate": 1.9932544378698225e-05,
"loss": 0.9501,
"step": 152500
},
{
"epoch": 30.18,
"learning_rate": 1.9833925049309666e-05,
"loss": 0.9477,
"step": 153000
},
{
"epoch": 30.27,
"learning_rate": 1.9735305719921106e-05,
"loss": 0.9401,
"step": 153500
},
{
"epoch": 30.37,
"learning_rate": 1.9636686390532546e-05,
"loss": 0.9528,
"step": 154000
},
{
"epoch": 30.47,
"learning_rate": 1.9538067061143986e-05,
"loss": 0.9412,
"step": 154500
},
{
"epoch": 30.57,
"learning_rate": 1.9439644970414202e-05,
"loss": 0.9421,
"step": 155000
},
{
"epoch": 30.67,
"learning_rate": 1.934122287968442e-05,
"loss": 0.9455,
"step": 155500
},
{
"epoch": 30.77,
"learning_rate": 1.9242603550295858e-05,
"loss": 0.9401,
"step": 156000
},
{
"epoch": 30.87,
"learning_rate": 1.91439842209073e-05,
"loss": 0.9396,
"step": 156500
},
{
"epoch": 30.96,
"learning_rate": 1.904536489151874e-05,
"loss": 0.937,
"step": 157000
},
{
"epoch": 31.06,
"learning_rate": 1.894674556213018e-05,
"loss": 0.9382,
"step": 157500
},
{
"epoch": 31.16,
"learning_rate": 1.8848126232741616e-05,
"loss": 0.9368,
"step": 158000
},
{
"epoch": 31.26,
"learning_rate": 1.874950690335306e-05,
"loss": 0.9322,
"step": 158500
},
{
"epoch": 31.36,
"learning_rate": 1.86508875739645e-05,
"loss": 0.938,
"step": 159000
},
{
"epoch": 31.46,
"learning_rate": 1.8552268244575936e-05,
"loss": 0.9279,
"step": 159500
},
{
"epoch": 31.56,
"learning_rate": 1.8453648915187377e-05,
"loss": 0.9364,
"step": 160000
},
{
"epoch": 31.65,
"learning_rate": 1.8355226824457593e-05,
"loss": 0.9404,
"step": 160500
},
{
"epoch": 31.75,
"learning_rate": 1.8256607495069033e-05,
"loss": 0.9264,
"step": 161000
},
{
"epoch": 31.85,
"learning_rate": 1.8158185404339252e-05,
"loss": 0.9406,
"step": 161500
},
{
"epoch": 31.95,
"learning_rate": 1.805956607495069e-05,
"loss": 0.9316,
"step": 162000
},
{
"epoch": 32.05,
"learning_rate": 1.7961143984220908e-05,
"loss": 0.9257,
"step": 162500
},
{
"epoch": 32.15,
"learning_rate": 1.7862524654832348e-05,
"loss": 0.9292,
"step": 163000
},
{
"epoch": 32.25,
"learning_rate": 1.776390532544379e-05,
"loss": 0.9198,
"step": 163500
},
{
"epoch": 32.34,
"learning_rate": 1.766528599605523e-05,
"loss": 0.9245,
"step": 164000
},
{
"epoch": 32.44,
"learning_rate": 1.756666666666667e-05,
"loss": 0.9244,
"step": 164500
},
{
"epoch": 32.54,
"learning_rate": 1.7468047337278106e-05,
"loss": 0.9294,
"step": 165000
},
{
"epoch": 32.64,
"learning_rate": 1.7369428007889546e-05,
"loss": 0.9284,
"step": 165500
},
{
"epoch": 32.74,
"learning_rate": 1.727080867850099e-05,
"loss": 0.9224,
"step": 166000
},
{
"epoch": 32.84,
"learning_rate": 1.7172189349112427e-05,
"loss": 0.9269,
"step": 166500
},
{
"epoch": 32.94,
"learning_rate": 1.7073570019723867e-05,
"loss": 0.9204,
"step": 167000
},
{
"epoch": 33.04,
"learning_rate": 1.6974950690335307e-05,
"loss": 0.9203,
"step": 167500
},
{
"epoch": 33.13,
"learning_rate": 1.6876331360946744e-05,
"loss": 0.9213,
"step": 168000
},
{
"epoch": 33.23,
"learning_rate": 1.6777712031558184e-05,
"loss": 0.9177,
"step": 168500
},
{
"epoch": 33.33,
"learning_rate": 1.6679289940828403e-05,
"loss": 0.9117,
"step": 169000
},
{
"epoch": 33.43,
"learning_rate": 1.6580670611439844e-05,
"loss": 0.9109,
"step": 169500
},
{
"epoch": 33.53,
"learning_rate": 1.6482051282051284e-05,
"loss": 0.919,
"step": 170000
},
{
"epoch": 33.63,
"learning_rate": 1.6383431952662724e-05,
"loss": 0.917,
"step": 170500
},
{
"epoch": 33.73,
"learning_rate": 1.628500986193294e-05,
"loss": 0.9071,
"step": 171000
},
{
"epoch": 33.82,
"learning_rate": 1.618639053254438e-05,
"loss": 0.9128,
"step": 171500
},
{
"epoch": 33.92,
"learning_rate": 1.6087771203155817e-05,
"loss": 0.9237,
"step": 172000
},
{
"epoch": 34.02,
"learning_rate": 1.5989151873767257e-05,
"loss": 0.9179,
"step": 172500
},
{
"epoch": 34.12,
"learning_rate": 1.58905325443787e-05,
"loss": 0.9095,
"step": 173000
},
{
"epoch": 34.22,
"learning_rate": 1.579191321499014e-05,
"loss": 0.9104,
"step": 173500
},
{
"epoch": 34.32,
"learning_rate": 1.5693491124260357e-05,
"loss": 0.9134,
"step": 174000
},
{
"epoch": 34.42,
"learning_rate": 1.5594871794871797e-05,
"loss": 0.9078,
"step": 174500
},
{
"epoch": 34.51,
"learning_rate": 1.5496449704142013e-05,
"loss": 0.9037,
"step": 175000
},
{
"epoch": 34.61,
"learning_rate": 1.5397830374753453e-05,
"loss": 0.9107,
"step": 175500
},
{
"epoch": 34.71,
"learning_rate": 1.5299211045364893e-05,
"loss": 0.9082,
"step": 176000
},
{
"epoch": 34.81,
"learning_rate": 1.520059171597633e-05,
"loss": 0.914,
"step": 176500
},
{
"epoch": 34.91,
"learning_rate": 1.5101972386587774e-05,
"loss": 0.9087,
"step": 177000
},
{
"epoch": 35.01,
"learning_rate": 1.5003353057199212e-05,
"loss": 0.9059,
"step": 177500
},
{
"epoch": 35.11,
"learning_rate": 1.490493096646943e-05,
"loss": 0.8985,
"step": 178000
},
{
"epoch": 35.2,
"learning_rate": 1.4806311637080869e-05,
"loss": 0.9007,
"step": 178500
},
{
"epoch": 35.3,
"learning_rate": 1.4707692307692309e-05,
"loss": 0.9071,
"step": 179000
},
{
"epoch": 35.4,
"learning_rate": 1.4609072978303747e-05,
"loss": 0.9013,
"step": 179500
},
{
"epoch": 35.5,
"learning_rate": 1.4510453648915188e-05,
"loss": 0.9006,
"step": 180000
},
{
"epoch": 35.6,
"learning_rate": 1.4412031558185405e-05,
"loss": 0.9026,
"step": 180500
},
{
"epoch": 35.7,
"learning_rate": 1.4313412228796844e-05,
"loss": 0.9051,
"step": 181000
},
{
"epoch": 35.8,
"learning_rate": 1.4214792899408286e-05,
"loss": 0.9061,
"step": 181500
},
{
"epoch": 35.89,
"learning_rate": 1.4116173570019726e-05,
"loss": 0.9025,
"step": 182000
},
{
"epoch": 35.99,
"learning_rate": 1.4017554240631164e-05,
"loss": 0.8981,
"step": 182500
},
{
"epoch": 36.09,
"learning_rate": 1.3918934911242603e-05,
"loss": 0.8951,
"step": 183000
},
{
"epoch": 36.19,
"learning_rate": 1.3820315581854043e-05,
"loss": 0.8995,
"step": 183500
},
{
"epoch": 36.29,
"learning_rate": 1.3721696252465485e-05,
"loss": 0.8995,
"step": 184000
},
{
"epoch": 36.39,
"learning_rate": 1.36232741617357e-05,
"loss": 0.8954,
"step": 184500
},
{
"epoch": 36.49,
"learning_rate": 1.3524654832347141e-05,
"loss": 0.8927,
"step": 185000
},
{
"epoch": 36.59,
"learning_rate": 1.3426035502958581e-05,
"loss": 0.8843,
"step": 185500
},
{
"epoch": 36.68,
"learning_rate": 1.332741617357002e-05,
"loss": 0.891,
"step": 186000
},
{
"epoch": 36.78,
"learning_rate": 1.322879684418146e-05,
"loss": 0.9012,
"step": 186500
},
{
"epoch": 36.88,
"learning_rate": 1.3130177514792899e-05,
"loss": 0.8937,
"step": 187000
},
{
"epoch": 36.98,
"learning_rate": 1.303155818540434e-05,
"loss": 0.8982,
"step": 187500
},
{
"epoch": 37.08,
"learning_rate": 1.2932938856015781e-05,
"loss": 0.8958,
"step": 188000
},
{
"epoch": 37.18,
"learning_rate": 1.283431952662722e-05,
"loss": 0.893,
"step": 188500
},
{
"epoch": 37.28,
"learning_rate": 1.273570019723866e-05,
"loss": 0.8875,
"step": 189000
},
{
"epoch": 37.37,
"learning_rate": 1.2637278106508877e-05,
"loss": 0.8863,
"step": 189500
},
{
"epoch": 37.47,
"learning_rate": 1.2538658777120316e-05,
"loss": 0.881,
"step": 190000
},
{
"epoch": 37.57,
"learning_rate": 1.2440039447731756e-05,
"loss": 0.8837,
"step": 190500
},
{
"epoch": 37.67,
"learning_rate": 1.2341420118343196e-05,
"loss": 0.8899,
"step": 191000
},
{
"epoch": 37.77,
"learning_rate": 1.2242800788954635e-05,
"loss": 0.8834,
"step": 191500
},
{
"epoch": 37.87,
"learning_rate": 1.2144181459566075e-05,
"loss": 0.889,
"step": 192000
},
{
"epoch": 37.97,
"learning_rate": 1.2045562130177515e-05,
"loss": 0.8911,
"step": 192500
},
{
"epoch": 38.06,
"learning_rate": 1.1947140039447733e-05,
"loss": 0.887,
"step": 193000
},
{
"epoch": 38.16,
"learning_rate": 1.1848520710059171e-05,
"loss": 0.8768,
"step": 193500
},
{
"epoch": 38.26,
"learning_rate": 1.1749901380670612e-05,
"loss": 0.8869,
"step": 194000
},
{
"epoch": 38.36,
"learning_rate": 1.1651282051282052e-05,
"loss": 0.8802,
"step": 194500
},
{
"epoch": 38.46,
"learning_rate": 1.155266272189349e-05,
"loss": 0.8752,
"step": 195000
},
{
"epoch": 38.56,
"learning_rate": 1.1454240631163708e-05,
"loss": 0.8792,
"step": 195500
},
{
"epoch": 38.66,
"learning_rate": 1.135562130177515e-05,
"loss": 0.8815,
"step": 196000
},
{
"epoch": 38.75,
"learning_rate": 1.1257199211045366e-05,
"loss": 0.8719,
"step": 196500
},
{
"epoch": 38.85,
"learning_rate": 1.1158579881656806e-05,
"loss": 0.8821,
"step": 197000
},
{
"epoch": 38.95,
"learning_rate": 1.1059960552268244e-05,
"loss": 0.88,
"step": 197500
},
{
"epoch": 39.05,
"learning_rate": 1.0961341222879686e-05,
"loss": 0.878,
"step": 198000
},
{
"epoch": 39.15,
"learning_rate": 1.0862919132149902e-05,
"loss": 0.8813,
"step": 198500
},
{
"epoch": 39.25,
"learning_rate": 1.0764299802761342e-05,
"loss": 0.8756,
"step": 199000
},
{
"epoch": 39.35,
"learning_rate": 1.0665680473372781e-05,
"loss": 0.8748,
"step": 199500
},
{
"epoch": 39.44,
"learning_rate": 1.0567061143984223e-05,
"loss": 0.8762,
"step": 200000
},
{
"epoch": 39.54,
"learning_rate": 1.0468441814595661e-05,
"loss": 0.876,
"step": 200500
},
{
"epoch": 39.64,
"learning_rate": 1.03698224852071e-05,
"loss": 0.8688,
"step": 201000
},
{
"epoch": 39.74,
"learning_rate": 1.0271203155818542e-05,
"loss": 0.8772,
"step": 201500
},
{
"epoch": 39.84,
"learning_rate": 1.017258382642998e-05,
"loss": 0.8721,
"step": 202000
},
{
"epoch": 39.94,
"learning_rate": 1.007396449704142e-05,
"loss": 0.8711,
"step": 202500
},
{
"epoch": 40.04,
"learning_rate": 9.975542406311638e-06,
"loss": 0.8737,
"step": 203000
},
{
"epoch": 40.14,
"learning_rate": 9.876923076923078e-06,
"loss": 0.8682,
"step": 203500
},
{
"epoch": 40.23,
"learning_rate": 9.778303747534517e-06,
"loss": 0.8691,
"step": 204000
},
{
"epoch": 40.33,
"learning_rate": 9.679684418145957e-06,
"loss": 0.8727,
"step": 204500
},
{
"epoch": 40.43,
"learning_rate": 9.581065088757397e-06,
"loss": 0.8725,
"step": 205000
},
{
"epoch": 40.53,
"learning_rate": 9.482642998027613e-06,
"loss": 0.8707,
"step": 205500
},
{
"epoch": 40.63,
"learning_rate": 9.384023668639053e-06,
"loss": 0.8761,
"step": 206000
},
{
"epoch": 40.73,
"learning_rate": 9.285404339250494e-06,
"loss": 0.8674,
"step": 206500
},
{
"epoch": 40.83,
"learning_rate": 9.186785009861934e-06,
"loss": 0.8629,
"step": 207000
},
{
"epoch": 40.92,
"learning_rate": 9.088165680473374e-06,
"loss": 0.8733,
"step": 207500
},
{
"epoch": 41.02,
"learning_rate": 8.989546351084813e-06,
"loss": 0.8634,
"step": 208000
},
{
"epoch": 41.12,
"learning_rate": 8.89112426035503e-06,
"loss": 0.8653,
"step": 208500
},
{
"epoch": 41.22,
"learning_rate": 8.792504930966469e-06,
"loss": 0.8645,
"step": 209000
},
{
"epoch": 41.32,
"learning_rate": 8.69388560157791e-06,
"loss": 0.8623,
"step": 209500
},
{
"epoch": 41.42,
"learning_rate": 8.59526627218935e-06,
"loss": 0.8655,
"step": 210000
},
{
"epoch": 41.52,
"learning_rate": 8.496844181459567e-06,
"loss": 0.8619,
"step": 210500
},
{
"epoch": 41.61,
"learning_rate": 8.398224852071005e-06,
"loss": 0.8728,
"step": 211000
},
{
"epoch": 41.71,
"learning_rate": 8.299605522682447e-06,
"loss": 0.8671,
"step": 211500
},
{
"epoch": 41.81,
"learning_rate": 8.200986193293886e-06,
"loss": 0.8606,
"step": 212000
},
{
"epoch": 41.91,
"learning_rate": 8.102366863905324e-06,
"loss": 0.8578,
"step": 212500
},
{
"epoch": 42.01,
"learning_rate": 8.003747534516766e-06,
"loss": 0.8644,
"step": 213000
},
{
"epoch": 42.11,
"learning_rate": 7.905128205128205e-06,
"loss": 0.8576,
"step": 213500
},
{
"epoch": 42.21,
"learning_rate": 7.806508875739647e-06,
"loss": 0.855,
"step": 214000
},
{
"epoch": 42.3,
"learning_rate": 7.707889546351085e-06,
"loss": 0.85,
"step": 214500
},
{
"epoch": 42.4,
"learning_rate": 7.609270216962525e-06,
"loss": 0.8654,
"step": 215000
},
{
"epoch": 42.5,
"learning_rate": 7.510650887573965e-06,
"loss": 0.8571,
"step": 215500
},
{
"epoch": 42.6,
"learning_rate": 7.412031558185404e-06,
"loss": 0.8673,
"step": 216000
},
{
"epoch": 42.7,
"learning_rate": 7.313609467455622e-06,
"loss": 0.8577,
"step": 216500
},
{
"epoch": 42.8,
"learning_rate": 7.214990138067061e-06,
"loss": 0.8535,
"step": 217000
},
{
"epoch": 42.9,
"learning_rate": 7.116370808678502e-06,
"loss": 0.8613,
"step": 217500
},
{
"epoch": 42.99,
"learning_rate": 7.017751479289941e-06,
"loss": 0.8586,
"step": 218000
},
{
"epoch": 43.09,
"learning_rate": 6.919329388560158e-06,
"loss": 0.8546,
"step": 218500
},
{
"epoch": 43.19,
"learning_rate": 6.820710059171598e-06,
"loss": 0.8521,
"step": 219000
},
{
"epoch": 43.29,
"learning_rate": 6.722090729783037e-06,
"loss": 0.8598,
"step": 219500
},
{
"epoch": 43.39,
"learning_rate": 6.623668639053255e-06,
"loss": 0.8527,
"step": 220000
},
{
"epoch": 43.49,
"learning_rate": 6.525049309664695e-06,
"loss": 0.8512,
"step": 220500
},
{
"epoch": 43.59,
"learning_rate": 6.426429980276134e-06,
"loss": 0.8522,
"step": 221000
},
{
"epoch": 43.69,
"learning_rate": 6.327810650887574e-06,
"loss": 0.855,
"step": 221500
},
{
"epoch": 43.78,
"learning_rate": 6.229191321499015e-06,
"loss": 0.854,
"step": 222000
},
{
"epoch": 43.88,
"learning_rate": 6.130769230769231e-06,
"loss": 0.8585,
"step": 222500
},
{
"epoch": 43.98,
"learning_rate": 6.032149901380671e-06,
"loss": 0.8577,
"step": 223000
},
{
"epoch": 44.08,
"learning_rate": 5.933530571992111e-06,
"loss": 0.8501,
"step": 223500
},
{
"epoch": 44.18,
"learning_rate": 5.8349112426035505e-06,
"loss": 0.8473,
"step": 224000
},
{
"epoch": 44.28,
"learning_rate": 5.73629191321499e-06,
"loss": 0.8528,
"step": 224500
},
{
"epoch": 44.38,
"learning_rate": 5.6378698224852074e-06,
"loss": 0.8459,
"step": 225000
},
{
"epoch": 44.47,
"learning_rate": 5.539250493096648e-06,
"loss": 0.8507,
"step": 225500
},
{
"epoch": 44.57,
"learning_rate": 5.440631163708087e-06,
"loss": 0.844,
"step": 226000
},
{
"epoch": 44.67,
"learning_rate": 5.342011834319527e-06,
"loss": 0.8505,
"step": 226500
},
{
"epoch": 44.77,
"learning_rate": 5.243392504930967e-06,
"loss": 0.8497,
"step": 227000
},
{
"epoch": 44.87,
"learning_rate": 5.144773175542406e-06,
"loss": 0.8534,
"step": 227500
},
{
"epoch": 44.97,
"learning_rate": 5.046153846153846e-06,
"loss": 0.8451,
"step": 228000
},
{
"epoch": 45.07,
"learning_rate": 4.9475345167652866e-06,
"loss": 0.8426,
"step": 228500
},
{
"epoch": 45.16,
"learning_rate": 4.848915187376726e-06,
"loss": 0.8503,
"step": 229000
},
{
"epoch": 45.26,
"learning_rate": 4.750493096646943e-06,
"loss": 0.8476,
"step": 229500
},
{
"epoch": 45.36,
"learning_rate": 4.65207100591716e-06,
"loss": 0.8432,
"step": 230000
},
{
"epoch": 45.46,
"learning_rate": 4.5534516765286e-06,
"loss": 0.8452,
"step": 230500
},
{
"epoch": 45.56,
"learning_rate": 4.45483234714004e-06,
"loss": 0.8435,
"step": 231000
},
{
"epoch": 45.66,
"learning_rate": 4.356213017751479e-06,
"loss": 0.8441,
"step": 231500
},
{
"epoch": 45.76,
"learning_rate": 4.257593688362919e-06,
"loss": 0.8435,
"step": 232000
},
{
"epoch": 45.85,
"learning_rate": 4.15897435897436e-06,
"loss": 0.8458,
"step": 232500
},
{
"epoch": 45.95,
"learning_rate": 4.060355029585799e-06,
"loss": 0.8538,
"step": 233000
},
{
"epoch": 46.05,
"learning_rate": 3.961932938856016e-06,
"loss": 0.8452,
"step": 233500
},
{
"epoch": 46.15,
"learning_rate": 3.863510848126233e-06,
"loss": 0.8372,
"step": 234000
},
{
"epoch": 46.25,
"learning_rate": 3.7648915187376726e-06,
"loss": 0.8456,
"step": 234500
},
{
"epoch": 46.35,
"learning_rate": 3.6662721893491124e-06,
"loss": 0.8453,
"step": 235000
},
{
"epoch": 46.45,
"learning_rate": 3.5676528599605527e-06,
"loss": 0.8438,
"step": 235500
},
{
"epoch": 46.54,
"learning_rate": 3.4690335305719925e-06,
"loss": 0.8473,
"step": 236000
},
{
"epoch": 46.64,
"learning_rate": 3.370414201183432e-06,
"loss": 0.8404,
"step": 236500
},
{
"epoch": 46.74,
"learning_rate": 3.271794871794872e-06,
"loss": 0.843,
"step": 237000
},
{
"epoch": 46.84,
"learning_rate": 3.173175542406312e-06,
"loss": 0.8439,
"step": 237500
},
{
"epoch": 46.94,
"learning_rate": 3.0745562130177517e-06,
"loss": 0.8492,
"step": 238000
},
{
"epoch": 47.04,
"learning_rate": 2.975936883629191e-06,
"loss": 0.8425,
"step": 238500
},
{
"epoch": 47.14,
"learning_rate": 2.8775147928994087e-06,
"loss": 0.8401,
"step": 239000
},
{
"epoch": 47.24,
"learning_rate": 2.778895463510848e-06,
"loss": 0.8437,
"step": 239500
},
{
"epoch": 47.33,
"learning_rate": 2.6802761341222883e-06,
"loss": 0.8375,
"step": 240000
},
{
"epoch": 47.43,
"learning_rate": 2.581656804733728e-06,
"loss": 0.8448,
"step": 240500
},
{
"epoch": 47.53,
"learning_rate": 2.4830374753451675e-06,
"loss": 0.8421,
"step": 241000
},
{
"epoch": 47.63,
"learning_rate": 2.3844181459566077e-06,
"loss": 0.8446,
"step": 241500
},
{
"epoch": 47.73,
"learning_rate": 2.2859960552268244e-06,
"loss": 0.8403,
"step": 242000
},
{
"epoch": 47.83,
"learning_rate": 2.1873767258382646e-06,
"loss": 0.8343,
"step": 242500
},
{
"epoch": 47.93,
"learning_rate": 2.088757396449704e-06,
"loss": 0.8386,
"step": 243000
},
{
"epoch": 48.02,
"learning_rate": 1.9901380670611443e-06,
"loss": 0.8284,
"step": 243500
},
{
"epoch": 48.12,
"learning_rate": 1.8915187376725839e-06,
"loss": 0.8372,
"step": 244000
},
{
"epoch": 48.22,
"learning_rate": 1.7930966469428008e-06,
"loss": 0.8392,
"step": 244500
},
{
"epoch": 48.32,
"learning_rate": 1.6944773175542408e-06,
"loss": 0.8418,
"step": 245000
},
{
"epoch": 48.42,
"learning_rate": 1.5958579881656804e-06,
"loss": 0.8393,
"step": 245500
},
{
"epoch": 48.52,
"learning_rate": 1.4972386587771204e-06,
"loss": 0.838,
"step": 246000
},
{
"epoch": 48.62,
"learning_rate": 1.3986193293885602e-06,
"loss": 0.8403,
"step": 246500
},
{
"epoch": 48.71,
"learning_rate": 1.3001972386587771e-06,
"loss": 0.8346,
"step": 247000
},
{
"epoch": 48.81,
"learning_rate": 1.201577909270217e-06,
"loss": 0.8348,
"step": 247500
},
{
"epoch": 48.91,
"learning_rate": 1.102958579881657e-06,
"loss": 0.8375,
"step": 248000
},
{
"epoch": 49.01,
"learning_rate": 1.0043392504930968e-06,
"loss": 0.838,
"step": 248500
},
{
"epoch": 49.11,
"learning_rate": 9.057199211045366e-07,
"loss": 0.831,
"step": 249000
},
{
"epoch": 49.21,
"learning_rate": 8.072978303747534e-07,
"loss": 0.8363,
"step": 249500
},
{
"epoch": 49.31,
"learning_rate": 7.086785009861934e-07,
"loss": 0.8378,
"step": 250000
},
{
"epoch": 49.4,
"learning_rate": 6.102564102564103e-07,
"loss": 0.8329,
"step": 250500
},
{
"epoch": 49.5,
"learning_rate": 5.116370808678501e-07,
"loss": 0.8419,
"step": 251000
},
{
"epoch": 49.6,
"learning_rate": 4.1301775147929e-07,
"loss": 0.8304,
"step": 251500
},
{
"epoch": 49.7,
"learning_rate": 3.1439842209072983e-07,
"loss": 0.8399,
"step": 252000
},
{
"epoch": 49.8,
"learning_rate": 2.1577909270216962e-07,
"loss": 0.834,
"step": 252500
},
{
"epoch": 49.9,
"learning_rate": 1.1715976331360947e-07,
"loss": 0.838,
"step": 253000
},
{
"epoch": 50.0,
"learning_rate": 1.854043392504931e-08,
"loss": 0.8346,
"step": 253500
},
{
"epoch": 50.0,
"step": 253500,
"total_flos": 2.1443533265845617e+18,
"train_loss": 1.184410398825621,
"train_runtime": 354987.1489,
"train_samples_per_second": 22.853,
"train_steps_per_second": 0.714
}
],
"logging_steps": 500,
"max_steps": 253500,
"num_train_epochs": 50,
"save_steps": 10000,
"total_flos": 2.1443533265845617e+18,
"trial_name": null,
"trial_params": null
}