SentencePieceCharacters-NACHOS-FR / trainer_state.json
qanastek's picture
Upload 40 files
65e52e0
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"global_step": 100387,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-09,
"loss": 4.5137,
"step": 1
},
{
"epoch": 0.03,
"learning_rate": 2.5e-06,
"loss": 3.2562,
"step": 500
},
{
"epoch": 0.07,
"learning_rate": 5e-06,
"loss": 2.8722,
"step": 1000
},
{
"epoch": 0.1,
"learning_rate": 7.5e-06,
"loss": 2.8448,
"step": 1500
},
{
"epoch": 0.14,
"learning_rate": 1e-05,
"loss": 2.8325,
"step": 2000
},
{
"epoch": 0.17,
"learning_rate": 1.25e-05,
"loss": 2.8247,
"step": 2500
},
{
"epoch": 0.21,
"learning_rate": 1.5e-05,
"loss": 2.8201,
"step": 3000
},
{
"epoch": 0.24,
"learning_rate": 1.75e-05,
"loss": 2.8138,
"step": 3500
},
{
"epoch": 0.28,
"learning_rate": 2e-05,
"loss": 2.8128,
"step": 4000
},
{
"epoch": 0.31,
"learning_rate": 2.25e-05,
"loss": 2.8111,
"step": 4500
},
{
"epoch": 0.35,
"learning_rate": 2.5e-05,
"loss": 2.8084,
"step": 5000
},
{
"epoch": 0.38,
"learning_rate": 2.7500000000000004e-05,
"loss": 2.8094,
"step": 5500
},
{
"epoch": 0.42,
"learning_rate": 3e-05,
"loss": 2.8076,
"step": 6000
},
{
"epoch": 0.45,
"learning_rate": 3.2500000000000004e-05,
"loss": 2.807,
"step": 6500
},
{
"epoch": 0.49,
"learning_rate": 3.5e-05,
"loss": 2.8051,
"step": 7000
},
{
"epoch": 0.52,
"learning_rate": 3.7500000000000003e-05,
"loss": 2.8055,
"step": 7500
},
{
"epoch": 0.56,
"learning_rate": 4e-05,
"loss": 2.8039,
"step": 8000
},
{
"epoch": 0.59,
"learning_rate": 4.2495e-05,
"loss": 2.8025,
"step": 8500
},
{
"epoch": 0.63,
"learning_rate": 4.4995000000000005e-05,
"loss": 2.804,
"step": 9000
},
{
"epoch": 0.66,
"learning_rate": 4.7495e-05,
"loss": 2.802,
"step": 9500
},
{
"epoch": 0.7,
"learning_rate": 4.9995000000000005e-05,
"loss": 2.8034,
"step": 10000
},
{
"epoch": 0.73,
"learning_rate": 4.999566044984633e-05,
"loss": 2.7957,
"step": 10500
},
{
"epoch": 0.77,
"learning_rate": 4.999129470522493e-05,
"loss": 2.5224,
"step": 11000
},
{
"epoch": 0.8,
"learning_rate": 4.998692896060352e-05,
"loss": 1.773,
"step": 11500
},
{
"epoch": 0.84,
"learning_rate": 4.998256321598212e-05,
"loss": 1.2906,
"step": 12000
},
{
"epoch": 0.87,
"learning_rate": 4.997819747136072e-05,
"loss": 1.0042,
"step": 12500
},
{
"epoch": 0.91,
"learning_rate": 4.997383172673932e-05,
"loss": 0.8464,
"step": 13000
},
{
"epoch": 0.94,
"learning_rate": 4.996946598211791e-05,
"loss": 0.7509,
"step": 13500
},
{
"epoch": 0.98,
"learning_rate": 4.996510023749651e-05,
"loss": 0.6852,
"step": 14000
},
{
"epoch": 1.01,
"learning_rate": 4.9960734492875106e-05,
"loss": 0.633,
"step": 14500
},
{
"epoch": 1.05,
"learning_rate": 4.9956377479742946e-05,
"loss": 0.5919,
"step": 15000
},
{
"epoch": 1.08,
"learning_rate": 4.995201173512155e-05,
"loss": 0.5582,
"step": 15500
},
{
"epoch": 1.12,
"learning_rate": 4.994764599050014e-05,
"loss": 0.5288,
"step": 16000
},
{
"epoch": 1.15,
"learning_rate": 4.9943280245878735e-05,
"loss": 0.5045,
"step": 16500
},
{
"epoch": 1.19,
"learning_rate": 4.9938914501257336e-05,
"loss": 0.4818,
"step": 17000
},
{
"epoch": 1.22,
"learning_rate": 4.993454875663594e-05,
"loss": 0.4622,
"step": 17500
},
{
"epoch": 1.26,
"learning_rate": 4.993018301201453e-05,
"loss": 0.4451,
"step": 18000
},
{
"epoch": 1.29,
"learning_rate": 4.9925817267393125e-05,
"loss": 0.4285,
"step": 18500
},
{
"epoch": 1.32,
"learning_rate": 4.992146025426097e-05,
"loss": 0.4142,
"step": 19000
},
{
"epoch": 1.36,
"learning_rate": 4.9917094509639565e-05,
"loss": 0.4016,
"step": 19500
},
{
"epoch": 1.39,
"learning_rate": 4.9912728765018166e-05,
"loss": 0.3895,
"step": 20000
},
{
"epoch": 1.43,
"learning_rate": 4.990836302039676e-05,
"loss": 0.3783,
"step": 20500
},
{
"epoch": 1.46,
"learning_rate": 4.99040060072646e-05,
"loss": 0.3702,
"step": 21000
},
{
"epoch": 1.5,
"learning_rate": 4.989964899413244e-05,
"loss": 0.3607,
"step": 21500
},
{
"epoch": 1.53,
"learning_rate": 4.989528324951104e-05,
"loss": 0.3522,
"step": 22000
},
{
"epoch": 1.57,
"learning_rate": 4.9890917504889635e-05,
"loss": 0.3446,
"step": 22500
},
{
"epoch": 1.6,
"learning_rate": 4.9886551760268236e-05,
"loss": 0.3375,
"step": 23000
},
{
"epoch": 1.64,
"learning_rate": 4.988218601564683e-05,
"loss": 0.3297,
"step": 23500
},
{
"epoch": 1.67,
"learning_rate": 4.987782027102543e-05,
"loss": 0.3247,
"step": 24000
},
{
"epoch": 1.71,
"learning_rate": 4.9873454526404024e-05,
"loss": 0.3196,
"step": 24500
},
{
"epoch": 1.74,
"learning_rate": 4.9869088781782625e-05,
"loss": 0.3133,
"step": 25000
},
{
"epoch": 1.78,
"learning_rate": 4.9864731768650465e-05,
"loss": 0.3086,
"step": 25500
},
{
"epoch": 1.81,
"learning_rate": 4.986036602402906e-05,
"loss": 0.3033,
"step": 26000
},
{
"epoch": 1.85,
"learning_rate": 4.985600027940766e-05,
"loss": 0.2998,
"step": 26500
},
{
"epoch": 1.88,
"learning_rate": 4.9851634534786254e-05,
"loss": 0.2948,
"step": 27000
},
{
"epoch": 1.92,
"learning_rate": 4.9847277521654094e-05,
"loss": 0.2912,
"step": 27500
},
{
"epoch": 1.95,
"learning_rate": 4.9842911777032695e-05,
"loss": 0.2873,
"step": 28000
},
{
"epoch": 1.99,
"learning_rate": 4.9838546032411295e-05,
"loss": 0.2837,
"step": 28500
},
{
"epoch": 2.02,
"learning_rate": 4.983418028778989e-05,
"loss": 0.2809,
"step": 29000
},
{
"epoch": 2.06,
"learning_rate": 4.982982327465773e-05,
"loss": 0.277,
"step": 29500
},
{
"epoch": 2.09,
"learning_rate": 4.982546626152557e-05,
"loss": 0.2733,
"step": 30000
},
{
"epoch": 2.13,
"learning_rate": 4.982110051690417e-05,
"loss": 0.2703,
"step": 30500
},
{
"epoch": 2.16,
"learning_rate": 4.9816734772282764e-05,
"loss": 0.2679,
"step": 31000
},
{
"epoch": 2.2,
"learning_rate": 4.981236902766136e-05,
"loss": 0.2656,
"step": 31500
},
{
"epoch": 2.23,
"learning_rate": 4.980800328303996e-05,
"loss": 0.2617,
"step": 32000
},
{
"epoch": 2.27,
"learning_rate": 4.980363753841856e-05,
"loss": 0.2603,
"step": 32500
},
{
"epoch": 2.3,
"learning_rate": 4.9799271793797154e-05,
"loss": 0.2578,
"step": 33000
},
{
"epoch": 2.34,
"learning_rate": 4.9794914780664993e-05,
"loss": 0.2558,
"step": 33500
},
{
"epoch": 2.37,
"learning_rate": 4.979054903604359e-05,
"loss": 0.2528,
"step": 34000
},
{
"epoch": 2.41,
"learning_rate": 4.978618329142219e-05,
"loss": 0.2508,
"step": 34500
},
{
"epoch": 2.44,
"learning_rate": 4.978181754680078e-05,
"loss": 0.2484,
"step": 35000
},
{
"epoch": 2.48,
"learning_rate": 4.977745180217938e-05,
"loss": 0.247,
"step": 35500
},
{
"epoch": 2.51,
"learning_rate": 4.977309478904722e-05,
"loss": 0.2451,
"step": 36000
},
{
"epoch": 2.55,
"learning_rate": 4.9768729044425824e-05,
"loss": 0.2424,
"step": 36500
},
{
"epoch": 2.58,
"learning_rate": 4.976436329980442e-05,
"loss": 0.2401,
"step": 37000
},
{
"epoch": 2.61,
"learning_rate": 4.975999755518301e-05,
"loss": 0.2385,
"step": 37500
},
{
"epoch": 2.65,
"learning_rate": 4.975564054205085e-05,
"loss": 0.2369,
"step": 38000
},
{
"epoch": 2.68,
"learning_rate": 4.975127479742945e-05,
"loss": 0.2347,
"step": 38500
},
{
"epoch": 2.72,
"learning_rate": 4.9746909052808046e-05,
"loss": 0.2335,
"step": 39000
},
{
"epoch": 2.75,
"learning_rate": 4.974254330818665e-05,
"loss": 0.232,
"step": 39500
},
{
"epoch": 2.79,
"learning_rate": 4.973817756356524e-05,
"loss": 0.2303,
"step": 40000
},
{
"epoch": 2.82,
"learning_rate": 4.973382055043309e-05,
"loss": 0.2291,
"step": 40500
},
{
"epoch": 2.86,
"learning_rate": 4.972945480581168e-05,
"loss": 0.2277,
"step": 41000
},
{
"epoch": 2.89,
"learning_rate": 4.9725089061190276e-05,
"loss": 0.2254,
"step": 41500
},
{
"epoch": 2.93,
"learning_rate": 4.972072331656888e-05,
"loss": 0.2244,
"step": 42000
},
{
"epoch": 2.96,
"learning_rate": 4.971636630343672e-05,
"loss": 0.2233,
"step": 42500
},
{
"epoch": 3.0,
"learning_rate": 4.971200055881531e-05,
"loss": 0.2215,
"step": 43000
},
{
"epoch": 3.03,
"learning_rate": 4.970763481419391e-05,
"loss": 0.2207,
"step": 43500
},
{
"epoch": 3.07,
"learning_rate": 4.970326906957251e-05,
"loss": 0.2197,
"step": 44000
},
{
"epoch": 3.1,
"learning_rate": 4.969891205644035e-05,
"loss": 0.2179,
"step": 44500
},
{
"epoch": 3.14,
"learning_rate": 4.9694546311818946e-05,
"loss": 0.2173,
"step": 45000
},
{
"epoch": 3.17,
"learning_rate": 4.969018056719754e-05,
"loss": 0.2155,
"step": 45500
},
{
"epoch": 3.21,
"learning_rate": 4.968581482257614e-05,
"loss": 0.2144,
"step": 46000
},
{
"epoch": 3.24,
"learning_rate": 4.968145780944398e-05,
"loss": 0.2133,
"step": 46500
},
{
"epoch": 3.28,
"learning_rate": 4.9677092064822575e-05,
"loss": 0.2126,
"step": 47000
},
{
"epoch": 3.31,
"learning_rate": 4.9672726320201176e-05,
"loss": 0.2115,
"step": 47500
},
{
"epoch": 3.35,
"learning_rate": 4.9668360575579776e-05,
"loss": 0.2105,
"step": 48000
},
{
"epoch": 3.38,
"learning_rate": 4.9664003562447616e-05,
"loss": 0.2088,
"step": 48500
},
{
"epoch": 3.42,
"learning_rate": 4.965963781782621e-05,
"loss": 0.2086,
"step": 49000
},
{
"epoch": 3.45,
"learning_rate": 4.9655272073204804e-05,
"loss": 0.207,
"step": 49500
},
{
"epoch": 3.49,
"learning_rate": 4.9650906328583405e-05,
"loss": 0.2055,
"step": 50000
},
{
"epoch": 3.52,
"learning_rate": 4.9646549315451245e-05,
"loss": 0.2044,
"step": 50500
},
{
"epoch": 3.56,
"learning_rate": 4.964218357082984e-05,
"loss": 0.2045,
"step": 51000
},
{
"epoch": 3.59,
"learning_rate": 4.963781782620844e-05,
"loss": 0.2041,
"step": 51500
},
{
"epoch": 3.63,
"learning_rate": 4.963345208158704e-05,
"loss": 0.2026,
"step": 52000
},
{
"epoch": 3.66,
"learning_rate": 4.962909506845488e-05,
"loss": 0.2015,
"step": 52500
},
{
"epoch": 3.7,
"learning_rate": 4.9624729323833474e-05,
"loss": 0.2006,
"step": 53000
},
{
"epoch": 3.73,
"learning_rate": 4.962036357921207e-05,
"loss": 0.2,
"step": 53500
},
{
"epoch": 3.77,
"learning_rate": 4.961599783459067e-05,
"loss": 0.1989,
"step": 54000
},
{
"epoch": 3.8,
"learning_rate": 4.961164082145851e-05,
"loss": 0.1977,
"step": 54500
},
{
"epoch": 3.84,
"learning_rate": 4.96072750768371e-05,
"loss": 0.1976,
"step": 55000
},
{
"epoch": 3.87,
"learning_rate": 4.9602909332215704e-05,
"loss": 0.1967,
"step": 55500
},
{
"epoch": 3.9,
"learning_rate": 4.9598543587594305e-05,
"loss": 0.196,
"step": 56000
},
{
"epoch": 3.94,
"learning_rate": 4.9594186574462145e-05,
"loss": 0.1944,
"step": 56500
},
{
"epoch": 3.97,
"learning_rate": 4.958982082984074e-05,
"loss": 0.1942,
"step": 57000
},
{
"epoch": 4.01,
"learning_rate": 4.958545508521934e-05,
"loss": 0.1934,
"step": 57500
},
{
"epoch": 4.04,
"learning_rate": 4.9581089340597933e-05,
"loss": 0.1928,
"step": 58000
},
{
"epoch": 4.08,
"learning_rate": 4.957673232746577e-05,
"loss": 0.192,
"step": 58500
},
{
"epoch": 4.11,
"learning_rate": 4.957236658284437e-05,
"loss": 0.1914,
"step": 59000
},
{
"epoch": 4.15,
"learning_rate": 4.9568000838222975e-05,
"loss": 0.19,
"step": 59500
},
{
"epoch": 4.18,
"learning_rate": 4.956363509360157e-05,
"loss": 0.19,
"step": 60000
},
{
"epoch": 4.22,
"learning_rate": 4.955927808046941e-05,
"loss": 0.1895,
"step": 60500
},
{
"epoch": 4.25,
"learning_rate": 4.9554912335848e-05,
"loss": 0.1892,
"step": 61000
},
{
"epoch": 4.29,
"learning_rate": 4.9550546591226604e-05,
"loss": 0.1889,
"step": 61500
},
{
"epoch": 4.32,
"learning_rate": 4.95461808466052e-05,
"loss": 0.1871,
"step": 62000
},
{
"epoch": 4.36,
"learning_rate": 4.954182383347304e-05,
"loss": 0.1872,
"step": 62500
},
{
"epoch": 4.39,
"learning_rate": 4.953745808885163e-05,
"loss": 0.1865,
"step": 63000
},
{
"epoch": 4.43,
"learning_rate": 4.953309234423024e-05,
"loss": 0.186,
"step": 63500
},
{
"epoch": 4.46,
"learning_rate": 4.952872659960883e-05,
"loss": 0.185,
"step": 64000
},
{
"epoch": 4.5,
"learning_rate": 4.952436958647667e-05,
"loss": 0.1843,
"step": 64500
},
{
"epoch": 4.53,
"learning_rate": 4.952000384185527e-05,
"loss": 0.1836,
"step": 65000
},
{
"epoch": 4.57,
"learning_rate": 4.951563809723387e-05,
"loss": 0.183,
"step": 65500
},
{
"epoch": 4.6,
"learning_rate": 4.951127235261246e-05,
"loss": 0.1824,
"step": 66000
},
{
"epoch": 4.64,
"learning_rate": 4.95069153394803e-05,
"loss": 0.1823,
"step": 66500
},
{
"epoch": 4.67,
"learning_rate": 4.9502549594858896e-05,
"loss": 0.1819,
"step": 67000
},
{
"epoch": 4.71,
"learning_rate": 4.94981838502375e-05,
"loss": 0.1812,
"step": 67500
},
{
"epoch": 4.74,
"learning_rate": 4.94938181056161e-05,
"loss": 0.1806,
"step": 68000
},
{
"epoch": 4.78,
"learning_rate": 4.948946109248394e-05,
"loss": 0.1794,
"step": 68500
},
{
"epoch": 4.81,
"learning_rate": 4.948509534786253e-05,
"loss": 0.1795,
"step": 69000
},
{
"epoch": 4.85,
"learning_rate": 4.948072960324113e-05,
"loss": 0.1798,
"step": 69500
},
{
"epoch": 4.88,
"learning_rate": 4.9476363858619726e-05,
"loss": 0.1789,
"step": 70000
},
{
"epoch": 4.92,
"learning_rate": 4.947199811399833e-05,
"loss": 0.1785,
"step": 70500
},
{
"epoch": 4.95,
"learning_rate": 4.946763236937692e-05,
"loss": 0.178,
"step": 71000
},
{
"epoch": 4.99,
"learning_rate": 4.946326662475552e-05,
"loss": 0.1772,
"step": 71500
},
{
"epoch": 5.02,
"learning_rate": 4.9458900880134116e-05,
"loss": 0.177,
"step": 72000
},
{
"epoch": 5.06,
"learning_rate": 4.9454543867001956e-05,
"loss": 0.1769,
"step": 72500
},
{
"epoch": 5.09,
"learning_rate": 4.9450178122380556e-05,
"loss": 0.176,
"step": 73000
},
{
"epoch": 5.13,
"learning_rate": 4.9445821109248396e-05,
"loss": 0.1752,
"step": 73500
},
{
"epoch": 5.16,
"learning_rate": 4.944145536462699e-05,
"loss": 0.1747,
"step": 74000
},
{
"epoch": 5.19,
"learning_rate": 4.943708962000559e-05,
"loss": 0.1757,
"step": 74500
},
{
"epoch": 5.23,
"learning_rate": 4.943272387538419e-05,
"loss": 0.1747,
"step": 75000
},
{
"epoch": 5.26,
"learning_rate": 4.9428358130762786e-05,
"loss": 0.1741,
"step": 75500
},
{
"epoch": 5.3,
"learning_rate": 4.942399238614138e-05,
"loss": 0.1732,
"step": 76000
},
{
"epoch": 5.33,
"learning_rate": 4.941962664151998e-05,
"loss": 0.1726,
"step": 76500
},
{
"epoch": 5.37,
"learning_rate": 4.941526089689858e-05,
"loss": 0.1718,
"step": 77000
},
{
"epoch": 5.4,
"learning_rate": 4.941090388376642e-05,
"loss": 0.172,
"step": 77500
},
{
"epoch": 5.44,
"learning_rate": 4.9406538139145015e-05,
"loss": 0.1713,
"step": 78000
},
{
"epoch": 5.47,
"learning_rate": 4.940217239452361e-05,
"loss": 0.1712,
"step": 78500
},
{
"epoch": 5.51,
"learning_rate": 4.939780664990221e-05,
"loss": 0.1707,
"step": 79000
},
{
"epoch": 5.54,
"learning_rate": 4.939344963677005e-05,
"loss": 0.1702,
"step": 79500
},
{
"epoch": 5.58,
"learning_rate": 4.9389083892148644e-05,
"loss": 0.1696,
"step": 80000
},
{
"epoch": 5.61,
"learning_rate": 4.9384718147527245e-05,
"loss": 0.1698,
"step": 80500
},
{
"epoch": 5.65,
"learning_rate": 4.9380352402905846e-05,
"loss": 0.1691,
"step": 81000
},
{
"epoch": 5.68,
"learning_rate": 4.9375995389773685e-05,
"loss": 0.1683,
"step": 81500
},
{
"epoch": 5.72,
"learning_rate": 4.937162964515228e-05,
"loss": 0.1685,
"step": 82000
},
{
"epoch": 5.75,
"learning_rate": 4.9367263900530874e-05,
"loss": 0.1674,
"step": 82500
},
{
"epoch": 5.79,
"learning_rate": 4.9362898155909474e-05,
"loss": 0.1679,
"step": 83000
},
{
"epoch": 5.82,
"learning_rate": 4.9358541142777314e-05,
"loss": 0.1677,
"step": 83500
},
{
"epoch": 5.86,
"learning_rate": 4.935417539815591e-05,
"loss": 0.1668,
"step": 84000
},
{
"epoch": 5.89,
"learning_rate": 4.934980965353451e-05,
"loss": 0.1664,
"step": 84500
},
{
"epoch": 5.93,
"learning_rate": 4.934544390891311e-05,
"loss": 0.166,
"step": 85000
},
{
"epoch": 5.96,
"learning_rate": 4.934108689578095e-05,
"loss": 0.1654,
"step": 85500
},
{
"epoch": 6.0,
"learning_rate": 4.9336721151159544e-05,
"loss": 0.1657,
"step": 86000
},
{
"epoch": 6.03,
"learning_rate": 4.933235540653814e-05,
"loss": 0.1653,
"step": 86500
},
{
"epoch": 6.07,
"learning_rate": 4.932798966191674e-05,
"loss": 0.1647,
"step": 87000
},
{
"epoch": 6.1,
"learning_rate": 4.932363264878458e-05,
"loss": 0.1643,
"step": 87500
},
{
"epoch": 6.14,
"learning_rate": 4.931926690416317e-05,
"loss": 0.1637,
"step": 88000
},
{
"epoch": 6.17,
"learning_rate": 4.931490115954177e-05,
"loss": 0.1638,
"step": 88500
},
{
"epoch": 6.21,
"learning_rate": 4.9310535414920374e-05,
"loss": 0.163,
"step": 89000
},
{
"epoch": 6.24,
"learning_rate": 4.9306178401788214e-05,
"loss": 0.1625,
"step": 89500
},
{
"epoch": 6.28,
"learning_rate": 4.930181265716681e-05,
"loss": 0.1629,
"step": 90000
},
{
"epoch": 6.31,
"learning_rate": 4.929744691254541e-05,
"loss": 0.1626,
"step": 90500
},
{
"epoch": 6.35,
"learning_rate": 4.9293081167924e-05,
"loss": 0.1618,
"step": 91000
},
{
"epoch": 6.38,
"learning_rate": 4.928872415479184e-05,
"loss": 0.1625,
"step": 91500
},
{
"epoch": 6.42,
"learning_rate": 4.9284358410170437e-05,
"loss": 0.1613,
"step": 92000
},
{
"epoch": 6.45,
"learning_rate": 4.927999266554904e-05,
"loss": 0.1606,
"step": 92500
},
{
"epoch": 6.48,
"learning_rate": 4.927562692092764e-05,
"loss": 0.1615,
"step": 93000
},
{
"epoch": 6.52,
"learning_rate": 4.927126990779548e-05,
"loss": 0.16,
"step": 93500
},
{
"epoch": 6.55,
"learning_rate": 4.926690416317407e-05,
"loss": 0.1609,
"step": 94000
},
{
"epoch": 6.59,
"learning_rate": 4.926253841855267e-05,
"loss": 0.1596,
"step": 94500
},
{
"epoch": 6.62,
"learning_rate": 4.925817267393127e-05,
"loss": 0.1592,
"step": 95000
},
{
"epoch": 6.66,
"learning_rate": 4.925381566079911e-05,
"loss": 0.159,
"step": 95500
},
{
"epoch": 6.69,
"learning_rate": 4.92494499161777e-05,
"loss": 0.1587,
"step": 96000
},
{
"epoch": 6.73,
"learning_rate": 4.92450841715563e-05,
"loss": 0.1585,
"step": 96500
},
{
"epoch": 6.76,
"learning_rate": 4.92407184269349e-05,
"loss": 0.1583,
"step": 97000
},
{
"epoch": 6.8,
"learning_rate": 4.923636141380274e-05,
"loss": 0.1576,
"step": 97500
},
{
"epoch": 6.83,
"learning_rate": 4.9231995669181336e-05,
"loss": 0.1574,
"step": 98000
},
{
"epoch": 6.87,
"learning_rate": 4.922762992455994e-05,
"loss": 0.1574,
"step": 98500
},
{
"epoch": 6.9,
"learning_rate": 4.922326417993853e-05,
"loss": 0.1573,
"step": 99000
},
{
"epoch": 6.94,
"learning_rate": 4.921890716680637e-05,
"loss": 0.1577,
"step": 99500
},
{
"epoch": 6.97,
"learning_rate": 4.9214541422184965e-05,
"loss": 0.1569,
"step": 100000
}
],
"max_steps": 5736400,
"num_train_epochs": 400,
"total_flos": 2.704645145521606e+19,
"trial_name": null,
"trial_params": null
}