gpt-for-est-large / trainer_state2.json
mphi's picture
Upload trainer_state2.json
e673292
raw
history blame
74.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9999949945189983,
"global_step": 299670,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 5.989988987886675e-05,
"loss": 3.7278,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 5.979977975773351e-05,
"loss": 3.7216,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 5.9699669636600265e-05,
"loss": 3.715,
"step": 1500
},
{
"epoch": 0.02,
"learning_rate": 5.9599559515467015e-05,
"loss": 3.7158,
"step": 2000
},
{
"epoch": 0.03,
"learning_rate": 5.949944939433377e-05,
"loss": 3.7145,
"step": 2500
},
{
"epoch": 0.03,
"learning_rate": 5.939933927320052e-05,
"loss": 3.7008,
"step": 3000
},
{
"epoch": 0.04,
"learning_rate": 5.929922915206727e-05,
"loss": 3.6985,
"step": 3500
},
{
"epoch": 0.04,
"learning_rate": 5.919911903093403e-05,
"loss": 3.6897,
"step": 4000
},
{
"epoch": 0.05,
"learning_rate": 5.9099008909800785e-05,
"loss": 3.6934,
"step": 4500
},
{
"epoch": 0.05,
"learning_rate": 5.8998898788667535e-05,
"loss": 3.6838,
"step": 5000
},
{
"epoch": 0.06,
"learning_rate": 5.889878866753429e-05,
"loss": 3.6917,
"step": 5500
},
{
"epoch": 0.06,
"learning_rate": 5.879867854640104e-05,
"loss": 3.6839,
"step": 6000
},
{
"epoch": 0.07,
"learning_rate": 5.869856842526779e-05,
"loss": 3.6839,
"step": 6500
},
{
"epoch": 0.07,
"learning_rate": 5.8598458304134556e-05,
"loss": 3.6904,
"step": 7000
},
{
"epoch": 0.08,
"learning_rate": 5.8498348183001306e-05,
"loss": 3.687,
"step": 7500
},
{
"epoch": 0.08,
"learning_rate": 5.8398238061868056e-05,
"loss": 3.6835,
"step": 8000
},
{
"epoch": 0.09,
"learning_rate": 5.8298127940734806e-05,
"loss": 3.6846,
"step": 8500
},
{
"epoch": 0.09,
"learning_rate": 5.819801781960156e-05,
"loss": 3.6819,
"step": 9000
},
{
"epoch": 0.1,
"learning_rate": 5.809790769846831e-05,
"loss": 3.6802,
"step": 9500
},
{
"epoch": 0.1,
"learning_rate": 5.799779757733507e-05,
"loss": 3.6818,
"step": 10000
},
{
"epoch": 0.11,
"learning_rate": 5.7897687456201826e-05,
"loss": 3.6936,
"step": 10500
},
{
"epoch": 0.11,
"learning_rate": 5.7797577335068576e-05,
"loss": 3.6757,
"step": 11000
},
{
"epoch": 0.12,
"learning_rate": 5.7697467213935326e-05,
"loss": 3.6784,
"step": 11500
},
{
"epoch": 0.12,
"learning_rate": 5.759735709280208e-05,
"loss": 3.6812,
"step": 12000
},
{
"epoch": 0.13,
"learning_rate": 5.749724697166884e-05,
"loss": 3.6756,
"step": 12500
},
{
"epoch": 0.13,
"learning_rate": 5.739713685053559e-05,
"loss": 3.6803,
"step": 13000
},
{
"epoch": 0.14,
"learning_rate": 5.7297026729402346e-05,
"loss": 3.6851,
"step": 13500
},
{
"epoch": 0.14,
"learning_rate": 5.7196916608269096e-05,
"loss": 3.668,
"step": 14000
},
{
"epoch": 0.15,
"learning_rate": 5.7096806487135846e-05,
"loss": 3.6732,
"step": 14500
},
{
"epoch": 0.15,
"learning_rate": 5.699669636600261e-05,
"loss": 3.6791,
"step": 15000
},
{
"epoch": 0.16,
"learning_rate": 5.689658624486936e-05,
"loss": 3.6733,
"step": 15500
},
{
"epoch": 0.16,
"learning_rate": 5.679647612373611e-05,
"loss": 3.6656,
"step": 16000
},
{
"epoch": 0.17,
"learning_rate": 5.6696366002602866e-05,
"loss": 3.67,
"step": 16500
},
{
"epoch": 0.17,
"learning_rate": 5.6596255881469616e-05,
"loss": 3.6716,
"step": 17000
},
{
"epoch": 0.18,
"learning_rate": 5.6496145760336366e-05,
"loss": 3.671,
"step": 17500
},
{
"epoch": 0.18,
"learning_rate": 5.639603563920313e-05,
"loss": 3.6723,
"step": 18000
},
{
"epoch": 0.19,
"learning_rate": 5.629592551806988e-05,
"loss": 3.6755,
"step": 18500
},
{
"epoch": 0.19,
"learning_rate": 5.619581539693663e-05,
"loss": 3.6694,
"step": 19000
},
{
"epoch": 0.2,
"learning_rate": 5.6095705275803387e-05,
"loss": 3.6714,
"step": 19500
},
{
"epoch": 0.2,
"learning_rate": 5.5995595154670137e-05,
"loss": 3.6688,
"step": 20000
},
{
"epoch": 0.21,
"learning_rate": 5.589548503353689e-05,
"loss": 3.6638,
"step": 20500
},
{
"epoch": 0.21,
"learning_rate": 5.579537491240365e-05,
"loss": 3.6639,
"step": 21000
},
{
"epoch": 0.22,
"learning_rate": 5.56952647912704e-05,
"loss": 3.6567,
"step": 21500
},
{
"epoch": 0.22,
"learning_rate": 5.559515467013715e-05,
"loss": 3.6708,
"step": 22000
},
{
"epoch": 0.23,
"learning_rate": 5.549504454900391e-05,
"loss": 3.6601,
"step": 22500
},
{
"epoch": 0.23,
"learning_rate": 5.539493442787066e-05,
"loss": 3.6652,
"step": 23000
},
{
"epoch": 0.24,
"learning_rate": 5.5294824306737414e-05,
"loss": 3.6653,
"step": 23500
},
{
"epoch": 0.24,
"learning_rate": 5.519471418560417e-05,
"loss": 3.6492,
"step": 24000
},
{
"epoch": 0.25,
"learning_rate": 5.509460406447092e-05,
"loss": 3.6575,
"step": 24500
},
{
"epoch": 0.25,
"learning_rate": 5.499449394333767e-05,
"loss": 3.656,
"step": 25000
},
{
"epoch": 0.26,
"learning_rate": 5.489438382220442e-05,
"loss": 3.6545,
"step": 25500
},
{
"epoch": 0.26,
"learning_rate": 5.4794273701071184e-05,
"loss": 3.6486,
"step": 26000
},
{
"epoch": 0.27,
"learning_rate": 5.4694163579937934e-05,
"loss": 3.6575,
"step": 26500
},
{
"epoch": 0.27,
"learning_rate": 5.4594053458804684e-05,
"loss": 3.6579,
"step": 27000
},
{
"epoch": 0.28,
"learning_rate": 5.449394333767144e-05,
"loss": 3.6551,
"step": 27500
},
{
"epoch": 0.28,
"learning_rate": 5.439383321653819e-05,
"loss": 3.6531,
"step": 28000
},
{
"epoch": 0.29,
"learning_rate": 5.429372309540494e-05,
"loss": 3.654,
"step": 28500
},
{
"epoch": 0.29,
"learning_rate": 5.4193612974271704e-05,
"loss": 3.6438,
"step": 29000
},
{
"epoch": 0.3,
"learning_rate": 5.4093502853138454e-05,
"loss": 3.6519,
"step": 29500
},
{
"epoch": 0.3,
"learning_rate": 5.3993392732005204e-05,
"loss": 3.6542,
"step": 30000
},
{
"epoch": 0.31,
"learning_rate": 5.389328261087196e-05,
"loss": 3.6478,
"step": 30500
},
{
"epoch": 0.31,
"learning_rate": 5.379317248973871e-05,
"loss": 3.6613,
"step": 31000
},
{
"epoch": 0.32,
"learning_rate": 5.369306236860547e-05,
"loss": 3.6529,
"step": 31500
},
{
"epoch": 0.32,
"learning_rate": 5.3592952247472224e-05,
"loss": 3.6515,
"step": 32000
},
{
"epoch": 0.33,
"learning_rate": 5.3492842126338974e-05,
"loss": 3.6509,
"step": 32500
},
{
"epoch": 0.33,
"learning_rate": 5.3392732005205724e-05,
"loss": 3.66,
"step": 33000
},
{
"epoch": 0.34,
"learning_rate": 5.329262188407248e-05,
"loss": 3.6488,
"step": 33500
},
{
"epoch": 0.34,
"learning_rate": 5.319251176293924e-05,
"loss": 3.6559,
"step": 34000
},
{
"epoch": 0.35,
"learning_rate": 5.309240164180599e-05,
"loss": 3.6479,
"step": 34500
},
{
"epoch": 0.35,
"learning_rate": 5.2992291520672745e-05,
"loss": 3.6433,
"step": 35000
},
{
"epoch": 0.36,
"learning_rate": 5.2892181399539495e-05,
"loss": 3.6491,
"step": 35500
},
{
"epoch": 0.36,
"learning_rate": 5.2792071278406245e-05,
"loss": 3.655,
"step": 36000
},
{
"epoch": 0.37,
"learning_rate": 5.2691961157273e-05,
"loss": 3.6439,
"step": 36500
},
{
"epoch": 0.37,
"learning_rate": 5.259185103613976e-05,
"loss": 3.6434,
"step": 37000
},
{
"epoch": 0.38,
"learning_rate": 5.249174091500651e-05,
"loss": 3.6409,
"step": 37500
},
{
"epoch": 0.38,
"learning_rate": 5.2391630793873265e-05,
"loss": 3.6415,
"step": 38000
},
{
"epoch": 0.39,
"learning_rate": 5.2291520672740015e-05,
"loss": 3.6454,
"step": 38500
},
{
"epoch": 0.39,
"learning_rate": 5.2191410551606765e-05,
"loss": 3.646,
"step": 39000
},
{
"epoch": 0.4,
"learning_rate": 5.209130043047353e-05,
"loss": 3.6498,
"step": 39500
},
{
"epoch": 0.4,
"learning_rate": 5.199119030934028e-05,
"loss": 3.6407,
"step": 40000
},
{
"epoch": 0.41,
"learning_rate": 5.189108018820703e-05,
"loss": 3.6425,
"step": 40500
},
{
"epoch": 0.41,
"learning_rate": 5.1790970067073785e-05,
"loss": 3.6435,
"step": 41000
},
{
"epoch": 0.42,
"learning_rate": 5.1690859945940535e-05,
"loss": 3.6485,
"step": 41500
},
{
"epoch": 0.42,
"learning_rate": 5.1590749824807285e-05,
"loss": 3.6401,
"step": 42000
},
{
"epoch": 0.43,
"learning_rate": 5.149063970367404e-05,
"loss": 3.6369,
"step": 42500
},
{
"epoch": 0.43,
"learning_rate": 5.13905295825408e-05,
"loss": 3.634,
"step": 43000
},
{
"epoch": 0.44,
"learning_rate": 5.129041946140755e-05,
"loss": 3.6396,
"step": 43500
},
{
"epoch": 0.44,
"learning_rate": 5.11903093402743e-05,
"loss": 3.6362,
"step": 44000
},
{
"epoch": 0.45,
"learning_rate": 5.1090199219141055e-05,
"loss": 3.6328,
"step": 44500
},
{
"epoch": 0.45,
"learning_rate": 5.099008909800781e-05,
"loss": 3.6389,
"step": 45000
},
{
"epoch": 0.46,
"learning_rate": 5.088997897687456e-05,
"loss": 3.6455,
"step": 45500
},
{
"epoch": 0.46,
"learning_rate": 5.078986885574132e-05,
"loss": 3.6348,
"step": 46000
},
{
"epoch": 0.47,
"learning_rate": 5.068975873460807e-05,
"loss": 3.6386,
"step": 46500
},
{
"epoch": 0.47,
"learning_rate": 5.058964861347482e-05,
"loss": 3.6311,
"step": 47000
},
{
"epoch": 0.48,
"learning_rate": 5.0489538492341576e-05,
"loss": 3.6348,
"step": 47500
},
{
"epoch": 0.48,
"learning_rate": 5.038942837120833e-05,
"loss": 3.6394,
"step": 48000
},
{
"epoch": 0.49,
"learning_rate": 5.028931825007508e-05,
"loss": 3.6309,
"step": 48500
},
{
"epoch": 0.49,
"learning_rate": 5.018920812894184e-05,
"loss": 3.6308,
"step": 49000
},
{
"epoch": 0.5,
"learning_rate": 5.008909800780859e-05,
"loss": 3.6285,
"step": 49500
},
{
"epoch": 0.5,
"learning_rate": 4.998898788667534e-05,
"loss": 3.6377,
"step": 50000
},
{
"epoch": 0.51,
"learning_rate": 4.98888777655421e-05,
"loss": 3.6318,
"step": 50500
},
{
"epoch": 0.51,
"learning_rate": 4.978876764440885e-05,
"loss": 3.6303,
"step": 51000
},
{
"epoch": 0.52,
"learning_rate": 4.96886575232756e-05,
"loss": 3.6279,
"step": 51500
},
{
"epoch": 0.52,
"learning_rate": 4.958854740214236e-05,
"loss": 3.6245,
"step": 52000
},
{
"epoch": 0.53,
"learning_rate": 4.948843728100911e-05,
"loss": 3.6232,
"step": 52500
},
{
"epoch": 0.53,
"learning_rate": 4.9388327159875866e-05,
"loss": 3.6305,
"step": 53000
},
{
"epoch": 0.54,
"learning_rate": 4.928821703874262e-05,
"loss": 3.6197,
"step": 53500
},
{
"epoch": 0.54,
"learning_rate": 4.918810691760937e-05,
"loss": 3.6225,
"step": 54000
},
{
"epoch": 0.55,
"learning_rate": 4.908799679647612e-05,
"loss": 3.6248,
"step": 54500
},
{
"epoch": 0.55,
"learning_rate": 4.898788667534288e-05,
"loss": 3.6244,
"step": 55000
},
{
"epoch": 0.56,
"learning_rate": 4.888777655420963e-05,
"loss": 3.6323,
"step": 55500
},
{
"epoch": 0.56,
"learning_rate": 4.8787666433076386e-05,
"loss": 3.6215,
"step": 56000
},
{
"epoch": 0.57,
"learning_rate": 4.868755631194314e-05,
"loss": 3.6286,
"step": 56500
},
{
"epoch": 0.57,
"learning_rate": 4.858744619080989e-05,
"loss": 3.6305,
"step": 57000
},
{
"epoch": 0.58,
"learning_rate": 4.848733606967664e-05,
"loss": 3.6207,
"step": 57500
},
{
"epoch": 0.58,
"learning_rate": 4.838722594854339e-05,
"loss": 3.6255,
"step": 58000
},
{
"epoch": 0.59,
"learning_rate": 4.828711582741016e-05,
"loss": 3.6254,
"step": 58500
},
{
"epoch": 0.59,
"learning_rate": 4.818700570627691e-05,
"loss": 3.6274,
"step": 59000
},
{
"epoch": 0.6,
"learning_rate": 4.808689558514366e-05,
"loss": 3.6276,
"step": 59500
},
{
"epoch": 0.6,
"learning_rate": 4.7986785464010413e-05,
"loss": 3.6242,
"step": 60000
},
{
"epoch": 0.61,
"learning_rate": 4.7886675342877163e-05,
"loss": 3.6208,
"step": 60500
},
{
"epoch": 0.61,
"learning_rate": 4.7786565221743913e-05,
"loss": 3.6184,
"step": 61000
},
{
"epoch": 0.62,
"learning_rate": 4.768645510061068e-05,
"loss": 3.6234,
"step": 61500
},
{
"epoch": 0.62,
"learning_rate": 4.758634497947743e-05,
"loss": 3.6217,
"step": 62000
},
{
"epoch": 0.63,
"learning_rate": 4.748623485834418e-05,
"loss": 3.6199,
"step": 62500
},
{
"epoch": 0.63,
"learning_rate": 4.7386124737210934e-05,
"loss": 3.6157,
"step": 63000
},
{
"epoch": 0.64,
"learning_rate": 4.7286014616077684e-05,
"loss": 3.6195,
"step": 63500
},
{
"epoch": 0.64,
"learning_rate": 4.718590449494444e-05,
"loss": 3.6082,
"step": 64000
},
{
"epoch": 0.65,
"learning_rate": 4.70857943738112e-05,
"loss": 3.6117,
"step": 64500
},
{
"epoch": 0.65,
"learning_rate": 4.698568425267795e-05,
"loss": 3.6205,
"step": 65000
},
{
"epoch": 0.66,
"learning_rate": 4.68855741315447e-05,
"loss": 3.6128,
"step": 65500
},
{
"epoch": 0.66,
"learning_rate": 4.6785464010411454e-05,
"loss": 3.6186,
"step": 66000
},
{
"epoch": 0.67,
"learning_rate": 4.6685353889278204e-05,
"loss": 3.6168,
"step": 66500
},
{
"epoch": 0.67,
"learning_rate": 4.658524376814496e-05,
"loss": 3.6224,
"step": 67000
},
{
"epoch": 0.68,
"learning_rate": 4.648513364701172e-05,
"loss": 3.6112,
"step": 67500
},
{
"epoch": 0.68,
"learning_rate": 4.638502352587847e-05,
"loss": 3.6144,
"step": 68000
},
{
"epoch": 0.69,
"learning_rate": 4.628491340474522e-05,
"loss": 3.6178,
"step": 68500
},
{
"epoch": 0.69,
"learning_rate": 4.6184803283611974e-05,
"loss": 3.606,
"step": 69000
},
{
"epoch": 0.7,
"learning_rate": 4.608469316247873e-05,
"loss": 3.6104,
"step": 69500
},
{
"epoch": 0.7,
"learning_rate": 4.598458304134548e-05,
"loss": 3.6055,
"step": 70000
},
{
"epoch": 0.71,
"learning_rate": 4.588447292021224e-05,
"loss": 3.6067,
"step": 70500
},
{
"epoch": 0.71,
"learning_rate": 4.578436279907899e-05,
"loss": 3.6141,
"step": 71000
},
{
"epoch": 0.72,
"learning_rate": 4.568425267794574e-05,
"loss": 3.6147,
"step": 71500
},
{
"epoch": 0.72,
"learning_rate": 4.55841425568125e-05,
"loss": 3.6193,
"step": 72000
},
{
"epoch": 0.73,
"learning_rate": 4.548403243567925e-05,
"loss": 3.6088,
"step": 72500
},
{
"epoch": 0.73,
"learning_rate": 4.5383922314546e-05,
"loss": 3.6106,
"step": 73000
},
{
"epoch": 0.74,
"learning_rate": 4.528381219341276e-05,
"loss": 3.6058,
"step": 73500
},
{
"epoch": 0.74,
"learning_rate": 4.518370207227951e-05,
"loss": 3.615,
"step": 74000
},
{
"epoch": 0.75,
"learning_rate": 4.508359195114626e-05,
"loss": 3.6123,
"step": 74500
},
{
"epoch": 0.75,
"learning_rate": 4.4983481830013015e-05,
"loss": 3.6059,
"step": 75000
},
{
"epoch": 0.76,
"learning_rate": 4.488337170887977e-05,
"loss": 3.602,
"step": 75500
},
{
"epoch": 0.76,
"learning_rate": 4.478326158774652e-05,
"loss": 3.605,
"step": 76000
},
{
"epoch": 0.77,
"learning_rate": 4.468315146661327e-05,
"loss": 3.6099,
"step": 76500
},
{
"epoch": 0.77,
"learning_rate": 4.458304134548003e-05,
"loss": 3.602,
"step": 77000
},
{
"epoch": 0.78,
"learning_rate": 4.4482931224346785e-05,
"loss": 3.6058,
"step": 77500
},
{
"epoch": 0.78,
"learning_rate": 4.4382821103213535e-05,
"loss": 3.6044,
"step": 78000
},
{
"epoch": 0.79,
"learning_rate": 4.428271098208029e-05,
"loss": 3.6072,
"step": 78500
},
{
"epoch": 0.79,
"learning_rate": 4.418260086094704e-05,
"loss": 3.6015,
"step": 79000
},
{
"epoch": 0.8,
"learning_rate": 4.408249073981379e-05,
"loss": 3.6019,
"step": 79500
},
{
"epoch": 0.8,
"learning_rate": 4.398238061868055e-05,
"loss": 3.6099,
"step": 80000
},
{
"epoch": 0.81,
"learning_rate": 4.3882270497547305e-05,
"loss": 3.6005,
"step": 80500
},
{
"epoch": 0.81,
"learning_rate": 4.3782160376414055e-05,
"loss": 3.609,
"step": 81000
},
{
"epoch": 0.82,
"learning_rate": 4.368205025528081e-05,
"loss": 3.601,
"step": 81500
},
{
"epoch": 0.82,
"learning_rate": 4.358194013414756e-05,
"loss": 3.6013,
"step": 82000
},
{
"epoch": 0.83,
"learning_rate": 4.348183001301431e-05,
"loss": 3.6023,
"step": 82500
},
{
"epoch": 0.83,
"learning_rate": 4.3381719891881076e-05,
"loss": 3.6047,
"step": 83000
},
{
"epoch": 0.84,
"learning_rate": 4.3281609770747826e-05,
"loss": 3.5886,
"step": 83500
},
{
"epoch": 0.84,
"learning_rate": 4.3181499649614575e-05,
"loss": 3.5942,
"step": 84000
},
{
"epoch": 0.85,
"learning_rate": 4.308138952848133e-05,
"loss": 3.5961,
"step": 84500
},
{
"epoch": 0.85,
"learning_rate": 4.298127940734808e-05,
"loss": 3.6019,
"step": 85000
},
{
"epoch": 0.86,
"learning_rate": 4.288116928621483e-05,
"loss": 3.6043,
"step": 85500
},
{
"epoch": 0.86,
"learning_rate": 4.2781059165081596e-05,
"loss": 3.5922,
"step": 86000
},
{
"epoch": 0.87,
"learning_rate": 4.2680949043948346e-05,
"loss": 3.5973,
"step": 86500
},
{
"epoch": 0.87,
"learning_rate": 4.2580838922815096e-05,
"loss": 3.603,
"step": 87000
},
{
"epoch": 0.88,
"learning_rate": 4.248072880168185e-05,
"loss": 3.6052,
"step": 87500
},
{
"epoch": 0.88,
"learning_rate": 4.23806186805486e-05,
"loss": 3.5951,
"step": 88000
},
{
"epoch": 0.89,
"learning_rate": 4.228050855941536e-05,
"loss": 3.5904,
"step": 88500
},
{
"epoch": 0.89,
"learning_rate": 4.2180398438282116e-05,
"loss": 3.5861,
"step": 89000
},
{
"epoch": 0.9,
"learning_rate": 4.2080288317148866e-05,
"loss": 3.6006,
"step": 89500
},
{
"epoch": 0.9,
"learning_rate": 4.1980178196015616e-05,
"loss": 3.5911,
"step": 90000
},
{
"epoch": 0.91,
"learning_rate": 4.188006807488237e-05,
"loss": 3.5982,
"step": 90500
},
{
"epoch": 0.91,
"learning_rate": 4.177995795374913e-05,
"loss": 3.5939,
"step": 91000
},
{
"epoch": 0.92,
"learning_rate": 4.167984783261588e-05,
"loss": 3.6002,
"step": 91500
},
{
"epoch": 0.92,
"learning_rate": 4.157973771148263e-05,
"loss": 3.5975,
"step": 92000
},
{
"epoch": 0.93,
"learning_rate": 4.1479627590349386e-05,
"loss": 3.5909,
"step": 92500
},
{
"epoch": 0.93,
"learning_rate": 4.1379517469216136e-05,
"loss": 3.5986,
"step": 93000
},
{
"epoch": 0.94,
"learning_rate": 4.1279407348082886e-05,
"loss": 3.589,
"step": 93500
},
{
"epoch": 0.94,
"learning_rate": 4.117929722694965e-05,
"loss": 3.5922,
"step": 94000
},
{
"epoch": 0.95,
"learning_rate": 4.10791871058164e-05,
"loss": 3.5921,
"step": 94500
},
{
"epoch": 0.95,
"learning_rate": 4.097907698468315e-05,
"loss": 3.5971,
"step": 95000
},
{
"epoch": 0.96,
"learning_rate": 4.0878966863549907e-05,
"loss": 3.5914,
"step": 95500
},
{
"epoch": 0.96,
"learning_rate": 4.0778856742416657e-05,
"loss": 3.5933,
"step": 96000
},
{
"epoch": 0.97,
"learning_rate": 4.067874662128341e-05,
"loss": 3.5823,
"step": 96500
},
{
"epoch": 0.97,
"learning_rate": 4.057863650015017e-05,
"loss": 3.5955,
"step": 97000
},
{
"epoch": 0.98,
"learning_rate": 4.047852637901692e-05,
"loss": 3.587,
"step": 97500
},
{
"epoch": 0.98,
"learning_rate": 4.037841625788367e-05,
"loss": 3.5958,
"step": 98000
},
{
"epoch": 0.99,
"learning_rate": 4.027830613675043e-05,
"loss": 3.5866,
"step": 98500
},
{
"epoch": 0.99,
"learning_rate": 4.017819601561718e-05,
"loss": 3.5848,
"step": 99000
},
{
"epoch": 1.0,
"learning_rate": 4.0078085894483934e-05,
"loss": 3.5803,
"step": 99500
},
{
"epoch": 1.0,
"learning_rate": 3.997797577335069e-05,
"loss": 3.5811,
"step": 100000
},
{
"epoch": 1.01,
"learning_rate": 3.987786565221744e-05,
"loss": 3.5407,
"step": 100500
},
{
"epoch": 1.01,
"learning_rate": 3.977775553108419e-05,
"loss": 3.5453,
"step": 101000
},
{
"epoch": 1.02,
"learning_rate": 3.967764540995095e-05,
"loss": 3.5439,
"step": 101500
},
{
"epoch": 1.02,
"learning_rate": 3.9577535288817704e-05,
"loss": 3.5486,
"step": 102000
},
{
"epoch": 1.03,
"learning_rate": 3.9477425167684454e-05,
"loss": 3.5476,
"step": 102500
},
{
"epoch": 1.03,
"learning_rate": 3.937731504655121e-05,
"loss": 3.548,
"step": 103000
},
{
"epoch": 1.04,
"learning_rate": 3.927720492541796e-05,
"loss": 3.5348,
"step": 103500
},
{
"epoch": 1.04,
"learning_rate": 3.917709480428471e-05,
"loss": 3.5374,
"step": 104000
},
{
"epoch": 1.05,
"learning_rate": 3.907698468315147e-05,
"loss": 3.5443,
"step": 104500
},
{
"epoch": 1.05,
"learning_rate": 3.8976874562018224e-05,
"loss": 3.547,
"step": 105000
},
{
"epoch": 1.06,
"learning_rate": 3.8876764440884974e-05,
"loss": 3.5464,
"step": 105500
},
{
"epoch": 1.06,
"learning_rate": 3.877665431975173e-05,
"loss": 3.5456,
"step": 106000
},
{
"epoch": 1.07,
"learning_rate": 3.867654419861848e-05,
"loss": 3.5402,
"step": 106500
},
{
"epoch": 1.07,
"learning_rate": 3.857643407748523e-05,
"loss": 3.5425,
"step": 107000
},
{
"epoch": 1.08,
"learning_rate": 3.847632395635199e-05,
"loss": 3.5461,
"step": 107500
},
{
"epoch": 1.08,
"learning_rate": 3.8376213835218744e-05,
"loss": 3.547,
"step": 108000
},
{
"epoch": 1.09,
"learning_rate": 3.8276103714085494e-05,
"loss": 3.547,
"step": 108500
},
{
"epoch": 1.09,
"learning_rate": 3.8175993592952244e-05,
"loss": 3.5478,
"step": 109000
},
{
"epoch": 1.1,
"learning_rate": 3.8075883471819e-05,
"loss": 3.5439,
"step": 109500
},
{
"epoch": 1.1,
"learning_rate": 3.797577335068576e-05,
"loss": 3.5428,
"step": 110000
},
{
"epoch": 1.11,
"learning_rate": 3.787566322955251e-05,
"loss": 3.5457,
"step": 110500
},
{
"epoch": 1.11,
"learning_rate": 3.7775553108419265e-05,
"loss": 3.5394,
"step": 111000
},
{
"epoch": 1.12,
"learning_rate": 3.7675442987286015e-05,
"loss": 3.5455,
"step": 111500
},
{
"epoch": 1.12,
"learning_rate": 3.7575332866152765e-05,
"loss": 3.5464,
"step": 112000
},
{
"epoch": 1.13,
"learning_rate": 3.747522274501952e-05,
"loss": 3.5397,
"step": 112500
},
{
"epoch": 1.13,
"learning_rate": 3.737511262388628e-05,
"loss": 3.5395,
"step": 113000
},
{
"epoch": 1.14,
"learning_rate": 3.727500250275303e-05,
"loss": 3.5362,
"step": 113500
},
{
"epoch": 1.14,
"learning_rate": 3.7174892381619785e-05,
"loss": 3.5442,
"step": 114000
},
{
"epoch": 1.15,
"learning_rate": 3.7074782260486535e-05,
"loss": 3.5365,
"step": 114500
},
{
"epoch": 1.15,
"learning_rate": 3.6974672139353285e-05,
"loss": 3.5408,
"step": 115000
},
{
"epoch": 1.16,
"learning_rate": 3.687456201822005e-05,
"loss": 3.5402,
"step": 115500
},
{
"epoch": 1.16,
"learning_rate": 3.67744518970868e-05,
"loss": 3.5353,
"step": 116000
},
{
"epoch": 1.17,
"learning_rate": 3.667434177595355e-05,
"loss": 3.5403,
"step": 116500
},
{
"epoch": 1.17,
"learning_rate": 3.6574231654820305e-05,
"loss": 3.5403,
"step": 117000
},
{
"epoch": 1.18,
"learning_rate": 3.6474121533687055e-05,
"loss": 3.5445,
"step": 117500
},
{
"epoch": 1.18,
"learning_rate": 3.6374011412553805e-05,
"loss": 3.5895,
"step": 118000
},
{
"epoch": 1.19,
"learning_rate": 3.627390129142057e-05,
"loss": 3.5945,
"step": 118500
},
{
"epoch": 1.19,
"learning_rate": 3.617379117028732e-05,
"loss": 3.5952,
"step": 119000
},
{
"epoch": 1.2,
"learning_rate": 3.607368104915407e-05,
"loss": 3.5922,
"step": 119500
},
{
"epoch": 1.2,
"learning_rate": 3.5973570928020825e-05,
"loss": 3.5886,
"step": 120000
},
{
"epoch": 1.21,
"learning_rate": 3.5873460806887575e-05,
"loss": 3.5939,
"step": 120500
},
{
"epoch": 1.21,
"learning_rate": 3.577335068575433e-05,
"loss": 3.5882,
"step": 121000
},
{
"epoch": 1.22,
"learning_rate": 3.567324056462109e-05,
"loss": 3.5827,
"step": 121500
},
{
"epoch": 1.22,
"learning_rate": 3.557313044348784e-05,
"loss": 3.5883,
"step": 122000
},
{
"epoch": 1.23,
"learning_rate": 3.547302032235459e-05,
"loss": 3.5938,
"step": 122500
},
{
"epoch": 1.23,
"learning_rate": 3.5372910201221346e-05,
"loss": 3.5888,
"step": 123000
},
{
"epoch": 1.24,
"learning_rate": 3.5272800080088096e-05,
"loss": 3.5887,
"step": 123500
},
{
"epoch": 1.24,
"learning_rate": 3.517268995895485e-05,
"loss": 3.5872,
"step": 124000
},
{
"epoch": 1.25,
"learning_rate": 3.50725798378216e-05,
"loss": 3.5805,
"step": 124500
},
{
"epoch": 1.25,
"learning_rate": 3.497246971668836e-05,
"loss": 3.5884,
"step": 125000
},
{
"epoch": 1.26,
"learning_rate": 3.487235959555511e-05,
"loss": 3.59,
"step": 125500
},
{
"epoch": 1.26,
"learning_rate": 3.477224947442186e-05,
"loss": 3.5949,
"step": 126000
},
{
"epoch": 1.27,
"learning_rate": 3.467213935328862e-05,
"loss": 3.5853,
"step": 126500
},
{
"epoch": 1.27,
"learning_rate": 3.457202923215537e-05,
"loss": 3.5865,
"step": 127000
},
{
"epoch": 1.28,
"learning_rate": 3.447191911102212e-05,
"loss": 3.5931,
"step": 127500
},
{
"epoch": 1.28,
"learning_rate": 3.437180898988888e-05,
"loss": 3.5939,
"step": 128000
},
{
"epoch": 1.29,
"learning_rate": 3.427169886875563e-05,
"loss": 3.5869,
"step": 128500
},
{
"epoch": 1.29,
"learning_rate": 3.4171588747622386e-05,
"loss": 3.5897,
"step": 129000
},
{
"epoch": 1.3,
"learning_rate": 3.407147862648914e-05,
"loss": 3.586,
"step": 129500
},
{
"epoch": 1.3,
"learning_rate": 3.397136850535589e-05,
"loss": 3.5922,
"step": 130000
},
{
"epoch": 1.31,
"learning_rate": 3.387125838422264e-05,
"loss": 3.5907,
"step": 130500
},
{
"epoch": 1.31,
"learning_rate": 3.37711482630894e-05,
"loss": 3.5943,
"step": 131000
},
{
"epoch": 1.32,
"learning_rate": 3.367103814195615e-05,
"loss": 3.584,
"step": 131500
},
{
"epoch": 1.32,
"learning_rate": 3.3570928020822906e-05,
"loss": 3.5856,
"step": 132000
},
{
"epoch": 1.33,
"learning_rate": 3.347081789968966e-05,
"loss": 3.5853,
"step": 132500
},
{
"epoch": 1.33,
"learning_rate": 3.337070777855641e-05,
"loss": 3.5876,
"step": 133000
},
{
"epoch": 1.34,
"learning_rate": 3.327059765742316e-05,
"loss": 3.5933,
"step": 133500
},
{
"epoch": 1.34,
"learning_rate": 3.317048753628992e-05,
"loss": 3.583,
"step": 134000
},
{
"epoch": 1.35,
"learning_rate": 3.307037741515668e-05,
"loss": 3.5854,
"step": 134500
},
{
"epoch": 1.35,
"learning_rate": 3.297026729402343e-05,
"loss": 3.5859,
"step": 135000
},
{
"epoch": 1.36,
"learning_rate": 3.2870157172890183e-05,
"loss": 3.5905,
"step": 135500
},
{
"epoch": 1.36,
"learning_rate": 3.277004705175693e-05,
"loss": 3.5863,
"step": 136000
},
{
"epoch": 1.37,
"learning_rate": 3.266993693062368e-05,
"loss": 3.5836,
"step": 136500
},
{
"epoch": 1.37,
"learning_rate": 3.256982680949044e-05,
"loss": 3.5912,
"step": 137000
},
{
"epoch": 1.38,
"learning_rate": 3.24697166883572e-05,
"loss": 3.5823,
"step": 137500
},
{
"epoch": 1.38,
"learning_rate": 3.236960656722395e-05,
"loss": 3.581,
"step": 138000
},
{
"epoch": 1.39,
"learning_rate": 3.2269496446090704e-05,
"loss": 3.5875,
"step": 138500
},
{
"epoch": 1.39,
"learning_rate": 3.2169386324957454e-05,
"loss": 3.5897,
"step": 139000
},
{
"epoch": 1.4,
"learning_rate": 3.2069276203824204e-05,
"loss": 3.5845,
"step": 139500
},
{
"epoch": 1.4,
"learning_rate": 3.196916608269096e-05,
"loss": 3.5937,
"step": 140000
},
{
"epoch": 1.41,
"learning_rate": 3.186905596155772e-05,
"loss": 3.5862,
"step": 140500
},
{
"epoch": 1.41,
"learning_rate": 3.176894584042447e-05,
"loss": 3.5894,
"step": 141000
},
{
"epoch": 1.42,
"learning_rate": 3.166883571929122e-05,
"loss": 3.5844,
"step": 141500
},
{
"epoch": 1.42,
"learning_rate": 3.1568725598157974e-05,
"loss": 3.5877,
"step": 142000
},
{
"epoch": 1.43,
"learning_rate": 3.1468615477024724e-05,
"loss": 3.5814,
"step": 142500
},
{
"epoch": 1.43,
"learning_rate": 3.136850535589148e-05,
"loss": 3.5898,
"step": 143000
},
{
"epoch": 1.44,
"learning_rate": 3.126839523475824e-05,
"loss": 3.5867,
"step": 143500
},
{
"epoch": 1.44,
"learning_rate": 3.116828511362499e-05,
"loss": 3.5788,
"step": 144000
},
{
"epoch": 1.45,
"learning_rate": 3.106817499249174e-05,
"loss": 3.5853,
"step": 144500
},
{
"epoch": 1.45,
"learning_rate": 3.0968064871358494e-05,
"loss": 3.5797,
"step": 145000
},
{
"epoch": 1.46,
"learning_rate": 3.086795475022525e-05,
"loss": 3.5808,
"step": 145500
},
{
"epoch": 1.46,
"learning_rate": 3.0767844629092e-05,
"loss": 3.5905,
"step": 146000
},
{
"epoch": 1.47,
"learning_rate": 3.066773450795876e-05,
"loss": 3.5829,
"step": 146500
},
{
"epoch": 1.47,
"learning_rate": 3.056762438682551e-05,
"loss": 3.5814,
"step": 147000
},
{
"epoch": 1.48,
"learning_rate": 3.046751426569226e-05,
"loss": 3.5844,
"step": 147500
},
{
"epoch": 1.48,
"learning_rate": 3.0367404144559018e-05,
"loss": 3.5842,
"step": 148000
},
{
"epoch": 1.49,
"learning_rate": 3.026729402342577e-05,
"loss": 3.5841,
"step": 148500
},
{
"epoch": 1.49,
"learning_rate": 3.0167183902292525e-05,
"loss": 3.5872,
"step": 149000
},
{
"epoch": 1.5,
"learning_rate": 3.0067073781159275e-05,
"loss": 3.5906,
"step": 149500
},
{
"epoch": 1.5,
"learning_rate": 2.996696366002603e-05,
"loss": 3.5784,
"step": 150000
},
{
"epoch": 1.51,
"learning_rate": 2.9866853538892785e-05,
"loss": 3.5907,
"step": 150500
},
{
"epoch": 1.51,
"learning_rate": 2.9766743417759535e-05,
"loss": 3.5839,
"step": 151000
},
{
"epoch": 1.52,
"learning_rate": 2.966663329662629e-05,
"loss": 3.5824,
"step": 151500
},
{
"epoch": 1.52,
"learning_rate": 2.9566523175493045e-05,
"loss": 3.5771,
"step": 152000
},
{
"epoch": 1.53,
"learning_rate": 2.9466413054359795e-05,
"loss": 3.5827,
"step": 152500
},
{
"epoch": 1.53,
"learning_rate": 2.936630293322655e-05,
"loss": 3.5817,
"step": 153000
},
{
"epoch": 1.54,
"learning_rate": 2.92661928120933e-05,
"loss": 3.5869,
"step": 153500
},
{
"epoch": 1.54,
"learning_rate": 2.9166082690960055e-05,
"loss": 3.5867,
"step": 154000
},
{
"epoch": 1.55,
"learning_rate": 2.906597256982681e-05,
"loss": 3.5923,
"step": 154500
},
{
"epoch": 1.55,
"learning_rate": 2.896586244869356e-05,
"loss": 3.5823,
"step": 155000
},
{
"epoch": 1.56,
"learning_rate": 2.886575232756032e-05,
"loss": 3.5769,
"step": 155500
},
{
"epoch": 1.56,
"learning_rate": 2.8765642206427072e-05,
"loss": 3.579,
"step": 156000
},
{
"epoch": 1.57,
"learning_rate": 2.8665532085293822e-05,
"loss": 3.5777,
"step": 156500
},
{
"epoch": 1.57,
"learning_rate": 2.856542196416058e-05,
"loss": 3.5832,
"step": 157000
},
{
"epoch": 1.58,
"learning_rate": 2.8465311843027332e-05,
"loss": 3.5799,
"step": 157500
},
{
"epoch": 1.58,
"learning_rate": 2.8365201721894082e-05,
"loss": 3.5748,
"step": 158000
},
{
"epoch": 1.59,
"learning_rate": 2.826509160076084e-05,
"loss": 3.5888,
"step": 158500
},
{
"epoch": 1.59,
"learning_rate": 2.8164981479627592e-05,
"loss": 3.5878,
"step": 159000
},
{
"epoch": 1.6,
"learning_rate": 2.8064871358494342e-05,
"loss": 3.5815,
"step": 159500
},
{
"epoch": 1.6,
"learning_rate": 2.79647612373611e-05,
"loss": 3.5785,
"step": 160000
},
{
"epoch": 1.61,
"learning_rate": 2.7864651116227852e-05,
"loss": 3.5865,
"step": 160500
},
{
"epoch": 1.61,
"learning_rate": 2.7764540995094606e-05,
"loss": 3.5718,
"step": 161000
},
{
"epoch": 1.62,
"learning_rate": 2.766443087396136e-05,
"loss": 3.5854,
"step": 161500
},
{
"epoch": 1.62,
"learning_rate": 2.756432075282811e-05,
"loss": 3.5721,
"step": 162000
},
{
"epoch": 1.63,
"learning_rate": 2.7464210631694866e-05,
"loss": 3.5775,
"step": 162500
},
{
"epoch": 1.63,
"learning_rate": 2.736410051056162e-05,
"loss": 3.5874,
"step": 163000
},
{
"epoch": 1.64,
"learning_rate": 2.726399038942837e-05,
"loss": 3.5854,
"step": 163500
},
{
"epoch": 1.64,
"learning_rate": 2.7163880268295126e-05,
"loss": 3.582,
"step": 164000
},
{
"epoch": 1.65,
"learning_rate": 2.706377014716188e-05,
"loss": 3.5753,
"step": 164500
},
{
"epoch": 1.65,
"learning_rate": 2.6963660026028633e-05,
"loss": 3.5792,
"step": 165000
},
{
"epoch": 1.66,
"learning_rate": 2.6863549904895386e-05,
"loss": 3.5759,
"step": 165500
},
{
"epoch": 1.66,
"learning_rate": 2.676343978376214e-05,
"loss": 3.5819,
"step": 166000
},
{
"epoch": 1.67,
"learning_rate": 2.6663329662628893e-05,
"loss": 3.5744,
"step": 166500
},
{
"epoch": 1.67,
"learning_rate": 2.6563219541495646e-05,
"loss": 3.5814,
"step": 167000
},
{
"epoch": 1.68,
"learning_rate": 2.64631094203624e-05,
"loss": 3.579,
"step": 167500
},
{
"epoch": 1.68,
"learning_rate": 2.6362999299229153e-05,
"loss": 3.5872,
"step": 168000
},
{
"epoch": 1.69,
"learning_rate": 2.6262889178095906e-05,
"loss": 3.5764,
"step": 168500
},
{
"epoch": 1.69,
"learning_rate": 2.616277905696266e-05,
"loss": 3.576,
"step": 169000
},
{
"epoch": 1.7,
"learning_rate": 2.6062668935829413e-05,
"loss": 3.5744,
"step": 169500
},
{
"epoch": 1.7,
"learning_rate": 2.5962558814696166e-05,
"loss": 3.5742,
"step": 170000
},
{
"epoch": 1.71,
"learning_rate": 2.586244869356292e-05,
"loss": 3.5718,
"step": 170500
},
{
"epoch": 1.71,
"learning_rate": 2.5762338572429673e-05,
"loss": 3.5761,
"step": 171000
},
{
"epoch": 1.72,
"learning_rate": 2.5662228451296426e-05,
"loss": 3.5765,
"step": 171500
},
{
"epoch": 1.72,
"learning_rate": 2.556211833016318e-05,
"loss": 3.5783,
"step": 172000
},
{
"epoch": 1.73,
"learning_rate": 2.5462008209029933e-05,
"loss": 3.5767,
"step": 172500
},
{
"epoch": 1.73,
"learning_rate": 2.5361898087896687e-05,
"loss": 3.5697,
"step": 173000
},
{
"epoch": 1.74,
"learning_rate": 2.526178796676344e-05,
"loss": 3.57,
"step": 173500
},
{
"epoch": 1.74,
"learning_rate": 2.5161677845630193e-05,
"loss": 3.573,
"step": 174000
},
{
"epoch": 1.75,
"learning_rate": 2.506156772449695e-05,
"loss": 3.5741,
"step": 174500
},
{
"epoch": 1.75,
"learning_rate": 2.49614576033637e-05,
"loss": 3.5802,
"step": 175000
},
{
"epoch": 1.76,
"learning_rate": 2.4861347482230453e-05,
"loss": 3.5763,
"step": 175500
},
{
"epoch": 1.76,
"learning_rate": 2.476123736109721e-05,
"loss": 3.5697,
"step": 176000
},
{
"epoch": 1.77,
"learning_rate": 2.466112723996396e-05,
"loss": 3.5797,
"step": 176500
},
{
"epoch": 1.77,
"learning_rate": 2.4561017118830714e-05,
"loss": 3.5779,
"step": 177000
},
{
"epoch": 1.78,
"learning_rate": 2.4460906997697467e-05,
"loss": 3.5754,
"step": 177500
},
{
"epoch": 1.78,
"learning_rate": 2.436079687656422e-05,
"loss": 3.5808,
"step": 178000
},
{
"epoch": 1.79,
"learning_rate": 2.4260686755430974e-05,
"loss": 3.57,
"step": 178500
},
{
"epoch": 1.79,
"learning_rate": 2.4160576634297727e-05,
"loss": 3.5687,
"step": 179000
},
{
"epoch": 1.8,
"learning_rate": 2.406046651316448e-05,
"loss": 3.5777,
"step": 179500
},
{
"epoch": 1.8,
"learning_rate": 2.3960356392031237e-05,
"loss": 3.5793,
"step": 180000
},
{
"epoch": 1.81,
"learning_rate": 2.3860246270897987e-05,
"loss": 3.5826,
"step": 180500
},
{
"epoch": 1.81,
"learning_rate": 2.376013614976474e-05,
"loss": 3.5682,
"step": 181000
},
{
"epoch": 1.82,
"learning_rate": 2.3660026028631497e-05,
"loss": 3.5774,
"step": 181500
},
{
"epoch": 1.82,
"learning_rate": 2.3559915907498247e-05,
"loss": 3.5774,
"step": 182000
},
{
"epoch": 1.83,
"learning_rate": 2.3459805786365e-05,
"loss": 3.5682,
"step": 182500
},
{
"epoch": 1.83,
"learning_rate": 2.3359695665231758e-05,
"loss": 3.5741,
"step": 183000
},
{
"epoch": 1.84,
"learning_rate": 2.3259585544098508e-05,
"loss": 3.5718,
"step": 183500
},
{
"epoch": 1.84,
"learning_rate": 2.3159475422965264e-05,
"loss": 3.5704,
"step": 184000
},
{
"epoch": 1.85,
"learning_rate": 2.3059365301832018e-05,
"loss": 3.5711,
"step": 184500
},
{
"epoch": 1.85,
"learning_rate": 2.2959255180698768e-05,
"loss": 3.5759,
"step": 185000
},
{
"epoch": 1.86,
"learning_rate": 2.2859145059565524e-05,
"loss": 3.5717,
"step": 185500
},
{
"epoch": 1.86,
"learning_rate": 2.2759034938432274e-05,
"loss": 3.5752,
"step": 186000
},
{
"epoch": 1.87,
"learning_rate": 2.2658924817299028e-05,
"loss": 3.5771,
"step": 186500
},
{
"epoch": 1.87,
"learning_rate": 2.2558814696165785e-05,
"loss": 3.5646,
"step": 187000
},
{
"epoch": 1.88,
"learning_rate": 2.2458704575032535e-05,
"loss": 3.5694,
"step": 187500
},
{
"epoch": 1.88,
"learning_rate": 2.2358594453899288e-05,
"loss": 3.5752,
"step": 188000
},
{
"epoch": 1.89,
"learning_rate": 2.2258484332766045e-05,
"loss": 3.5732,
"step": 188500
},
{
"epoch": 1.89,
"learning_rate": 2.2158374211632795e-05,
"loss": 3.576,
"step": 189000
},
{
"epoch": 1.9,
"learning_rate": 2.205826409049955e-05,
"loss": 3.579,
"step": 189500
},
{
"epoch": 1.9,
"learning_rate": 2.1958153969366305e-05,
"loss": 3.577,
"step": 190000
},
{
"epoch": 1.91,
"learning_rate": 2.1858043848233055e-05,
"loss": 3.5761,
"step": 190500
},
{
"epoch": 1.91,
"learning_rate": 2.175793372709981e-05,
"loss": 3.5713,
"step": 191000
},
{
"epoch": 1.92,
"learning_rate": 2.1657823605966565e-05,
"loss": 3.567,
"step": 191500
},
{
"epoch": 1.92,
"learning_rate": 2.1557713484833315e-05,
"loss": 3.5702,
"step": 192000
},
{
"epoch": 1.93,
"learning_rate": 2.145760336370007e-05,
"loss": 3.5675,
"step": 192500
},
{
"epoch": 1.93,
"learning_rate": 2.1357493242566825e-05,
"loss": 3.5677,
"step": 193000
},
{
"epoch": 1.94,
"learning_rate": 2.125738312143358e-05,
"loss": 3.5756,
"step": 193500
},
{
"epoch": 1.94,
"learning_rate": 2.1157273000300332e-05,
"loss": 3.5742,
"step": 194000
},
{
"epoch": 1.95,
"learning_rate": 2.1057162879167082e-05,
"loss": 3.5783,
"step": 194500
},
{
"epoch": 1.95,
"learning_rate": 2.095705275803384e-05,
"loss": 3.5716,
"step": 195000
},
{
"epoch": 1.96,
"learning_rate": 2.0856942636900592e-05,
"loss": 3.5644,
"step": 195500
},
{
"epoch": 1.96,
"learning_rate": 2.0756832515767342e-05,
"loss": 3.5701,
"step": 196000
},
{
"epoch": 1.97,
"learning_rate": 2.06567223946341e-05,
"loss": 3.5744,
"step": 196500
},
{
"epoch": 1.97,
"learning_rate": 2.0556612273500852e-05,
"loss": 3.5715,
"step": 197000
},
{
"epoch": 1.98,
"learning_rate": 2.0456502152367602e-05,
"loss": 3.5673,
"step": 197500
},
{
"epoch": 1.98,
"learning_rate": 2.035639203123436e-05,
"loss": 3.5683,
"step": 198000
},
{
"epoch": 1.99,
"learning_rate": 2.0256281910101112e-05,
"loss": 3.5723,
"step": 198500
},
{
"epoch": 1.99,
"learning_rate": 2.0156171788967866e-05,
"loss": 3.5749,
"step": 199000
},
{
"epoch": 2.0,
"learning_rate": 2.005606166783462e-05,
"loss": 3.5611,
"step": 199500
},
{
"epoch": 2.0,
"learning_rate": 1.9955951546701372e-05,
"loss": 3.561,
"step": 200000
},
{
"epoch": 2.01,
"learning_rate": 1.9855841425568126e-05,
"loss": 3.5365,
"step": 200500
},
{
"epoch": 2.01,
"learning_rate": 1.975573130443488e-05,
"loss": 3.5517,
"step": 201000
},
{
"epoch": 2.02,
"learning_rate": 1.9655621183301632e-05,
"loss": 3.5408,
"step": 201500
},
{
"epoch": 2.02,
"learning_rate": 1.9555511062168386e-05,
"loss": 3.5401,
"step": 202000
},
{
"epoch": 2.03,
"learning_rate": 1.945540094103514e-05,
"loss": 3.5402,
"step": 202500
},
{
"epoch": 2.03,
"learning_rate": 1.9355290819901893e-05,
"loss": 3.5454,
"step": 203000
},
{
"epoch": 2.04,
"learning_rate": 1.9255180698768646e-05,
"loss": 3.537,
"step": 203500
},
{
"epoch": 2.04,
"learning_rate": 1.91550705776354e-05,
"loss": 3.5413,
"step": 204000
},
{
"epoch": 2.05,
"learning_rate": 1.9054960456502153e-05,
"loss": 3.543,
"step": 204500
},
{
"epoch": 2.05,
"learning_rate": 1.8954850335368906e-05,
"loss": 3.5367,
"step": 205000
},
{
"epoch": 2.06,
"learning_rate": 1.885474021423566e-05,
"loss": 3.5449,
"step": 205500
},
{
"epoch": 2.06,
"learning_rate": 1.8754630093102413e-05,
"loss": 3.536,
"step": 206000
},
{
"epoch": 2.07,
"learning_rate": 1.8654519971969166e-05,
"loss": 3.5429,
"step": 206500
},
{
"epoch": 2.07,
"learning_rate": 1.855440985083592e-05,
"loss": 3.5373,
"step": 207000
},
{
"epoch": 2.08,
"learning_rate": 1.8454299729702673e-05,
"loss": 3.5495,
"step": 207500
},
{
"epoch": 2.08,
"learning_rate": 1.8354189608569426e-05,
"loss": 3.5432,
"step": 208000
},
{
"epoch": 2.09,
"learning_rate": 1.8254079487436183e-05,
"loss": 3.5435,
"step": 208500
},
{
"epoch": 2.09,
"learning_rate": 1.8153969366302933e-05,
"loss": 3.545,
"step": 209000
},
{
"epoch": 2.1,
"learning_rate": 1.8053859245169686e-05,
"loss": 3.5331,
"step": 209500
},
{
"epoch": 2.1,
"learning_rate": 1.7953749124036443e-05,
"loss": 3.5315,
"step": 210000
},
{
"epoch": 2.11,
"learning_rate": 1.7853639002903193e-05,
"loss": 3.5333,
"step": 210500
},
{
"epoch": 2.11,
"learning_rate": 1.7753528881769947e-05,
"loss": 3.548,
"step": 211000
},
{
"epoch": 2.12,
"learning_rate": 1.76534187606367e-05,
"loss": 3.5485,
"step": 211500
},
{
"epoch": 2.12,
"learning_rate": 1.7553308639503453e-05,
"loss": 3.5378,
"step": 212000
},
{
"epoch": 2.13,
"learning_rate": 1.745319851837021e-05,
"loss": 3.5438,
"step": 212500
},
{
"epoch": 2.13,
"learning_rate": 1.735308839723696e-05,
"loss": 3.5346,
"step": 213000
},
{
"epoch": 2.14,
"learning_rate": 1.7252978276103713e-05,
"loss": 3.5406,
"step": 213500
},
{
"epoch": 2.14,
"learning_rate": 1.715286815497047e-05,
"loss": 3.5365,
"step": 214000
},
{
"epoch": 2.15,
"learning_rate": 1.705275803383722e-05,
"loss": 3.5403,
"step": 214500
},
{
"epoch": 2.15,
"learning_rate": 1.6952647912703974e-05,
"loss": 3.5449,
"step": 215000
},
{
"epoch": 2.16,
"learning_rate": 1.685253779157073e-05,
"loss": 3.5456,
"step": 215500
},
{
"epoch": 2.16,
"learning_rate": 1.675242767043748e-05,
"loss": 3.5391,
"step": 216000
},
{
"epoch": 2.17,
"learning_rate": 1.6652317549304234e-05,
"loss": 3.5437,
"step": 216500
},
{
"epoch": 2.17,
"learning_rate": 1.655220742817099e-05,
"loss": 3.5374,
"step": 217000
},
{
"epoch": 2.18,
"learning_rate": 1.645209730703774e-05,
"loss": 3.5483,
"step": 217500
},
{
"epoch": 2.18,
"learning_rate": 1.6351987185904497e-05,
"loss": 3.546,
"step": 218000
},
{
"epoch": 2.19,
"learning_rate": 1.6251877064771247e-05,
"loss": 3.5468,
"step": 218500
},
{
"epoch": 2.19,
"learning_rate": 1.6151766943638e-05,
"loss": 3.542,
"step": 219000
},
{
"epoch": 2.2,
"learning_rate": 1.6051656822504757e-05,
"loss": 3.5409,
"step": 219500
},
{
"epoch": 2.2,
"learning_rate": 1.5951546701371507e-05,
"loss": 3.5452,
"step": 220000
},
{
"epoch": 2.21,
"learning_rate": 1.585143658023826e-05,
"loss": 3.5398,
"step": 220500
},
{
"epoch": 2.21,
"learning_rate": 1.5751326459105017e-05,
"loss": 3.5423,
"step": 221000
},
{
"epoch": 2.22,
"learning_rate": 1.5651216337971767e-05,
"loss": 3.5423,
"step": 221500
},
{
"epoch": 2.22,
"learning_rate": 1.5551106216838524e-05,
"loss": 3.5529,
"step": 222000
},
{
"epoch": 2.23,
"learning_rate": 1.5450996095705278e-05,
"loss": 3.5467,
"step": 222500
},
{
"epoch": 2.23,
"learning_rate": 1.5350885974572028e-05,
"loss": 3.5428,
"step": 223000
},
{
"epoch": 2.24,
"learning_rate": 1.5250775853438784e-05,
"loss": 3.5497,
"step": 223500
},
{
"epoch": 2.24,
"learning_rate": 1.5150665732305536e-05,
"loss": 3.5413,
"step": 224000
},
{
"epoch": 2.25,
"learning_rate": 1.505055561117229e-05,
"loss": 3.5399,
"step": 224500
},
{
"epoch": 2.25,
"learning_rate": 1.4950445490039045e-05,
"loss": 3.5404,
"step": 225000
},
{
"epoch": 2.26,
"learning_rate": 1.4850335368905796e-05,
"loss": 3.5494,
"step": 225500
},
{
"epoch": 2.26,
"learning_rate": 1.475022524777255e-05,
"loss": 3.5332,
"step": 226000
},
{
"epoch": 2.27,
"learning_rate": 1.4650115126639305e-05,
"loss": 3.5436,
"step": 226500
},
{
"epoch": 2.27,
"learning_rate": 1.4550005005506056e-05,
"loss": 3.5414,
"step": 227000
},
{
"epoch": 2.28,
"learning_rate": 1.444989488437281e-05,
"loss": 3.5463,
"step": 227500
},
{
"epoch": 2.28,
"learning_rate": 1.4349784763239565e-05,
"loss": 3.5441,
"step": 228000
},
{
"epoch": 2.29,
"learning_rate": 1.4249674642106318e-05,
"loss": 3.5448,
"step": 228500
},
{
"epoch": 2.29,
"learning_rate": 1.414956452097307e-05,
"loss": 3.5414,
"step": 229000
},
{
"epoch": 2.3,
"learning_rate": 1.4049454399839823e-05,
"loss": 3.5428,
"step": 229500
},
{
"epoch": 2.3,
"learning_rate": 1.3949344278706578e-05,
"loss": 3.5371,
"step": 230000
},
{
"epoch": 2.31,
"learning_rate": 1.3849234157573332e-05,
"loss": 3.545,
"step": 230500
},
{
"epoch": 2.31,
"learning_rate": 1.3749124036440083e-05,
"loss": 3.5379,
"step": 231000
},
{
"epoch": 2.32,
"learning_rate": 1.3649013915306838e-05,
"loss": 3.5405,
"step": 231500
},
{
"epoch": 2.32,
"learning_rate": 1.3548903794173592e-05,
"loss": 3.5417,
"step": 232000
},
{
"epoch": 2.33,
"learning_rate": 1.3448793673040345e-05,
"loss": 3.5416,
"step": 232500
},
{
"epoch": 2.33,
"learning_rate": 1.3348683551907097e-05,
"loss": 3.543,
"step": 233000
},
{
"epoch": 2.34,
"learning_rate": 1.3248573430773852e-05,
"loss": 3.5473,
"step": 233500
},
{
"epoch": 2.34,
"learning_rate": 1.3148463309640605e-05,
"loss": 3.5424,
"step": 234000
},
{
"epoch": 2.35,
"learning_rate": 1.3048353188507359e-05,
"loss": 3.5367,
"step": 234500
},
{
"epoch": 2.35,
"learning_rate": 1.2948243067374112e-05,
"loss": 3.5443,
"step": 235000
},
{
"epoch": 2.36,
"learning_rate": 1.2848132946240865e-05,
"loss": 3.5426,
"step": 235500
},
{
"epoch": 2.36,
"learning_rate": 1.2748022825107619e-05,
"loss": 3.5478,
"step": 236000
},
{
"epoch": 2.37,
"learning_rate": 1.2647912703974372e-05,
"loss": 3.5481,
"step": 236500
},
{
"epoch": 2.37,
"learning_rate": 1.2547802582841126e-05,
"loss": 3.5358,
"step": 237000
},
{
"epoch": 2.38,
"learning_rate": 1.2447692461707879e-05,
"loss": 3.5492,
"step": 237500
},
{
"epoch": 2.38,
"learning_rate": 1.2347582340574632e-05,
"loss": 3.5428,
"step": 238000
},
{
"epoch": 2.39,
"learning_rate": 1.2247472219441386e-05,
"loss": 3.5412,
"step": 238500
},
{
"epoch": 2.39,
"learning_rate": 1.2147362098308139e-05,
"loss": 3.5417,
"step": 239000
},
{
"epoch": 2.4,
"learning_rate": 1.2047251977174892e-05,
"loss": 3.5401,
"step": 239500
},
{
"epoch": 2.4,
"learning_rate": 1.1947141856041647e-05,
"loss": 3.5451,
"step": 240000
},
{
"epoch": 2.41,
"learning_rate": 1.18470317349084e-05,
"loss": 3.5367,
"step": 240500
},
{
"epoch": 2.41,
"learning_rate": 1.1746921613775153e-05,
"loss": 3.5391,
"step": 241000
},
{
"epoch": 2.42,
"learning_rate": 1.1646811492641906e-05,
"loss": 3.5472,
"step": 241500
},
{
"epoch": 2.42,
"learning_rate": 1.1546701371508661e-05,
"loss": 3.5384,
"step": 242000
},
{
"epoch": 2.43,
"learning_rate": 1.1446591250375413e-05,
"loss": 3.5407,
"step": 242500
},
{
"epoch": 2.43,
"learning_rate": 1.1346481129242166e-05,
"loss": 3.5353,
"step": 243000
},
{
"epoch": 2.44,
"learning_rate": 1.1246371008108921e-05,
"loss": 3.5362,
"step": 243500
},
{
"epoch": 2.44,
"learning_rate": 1.1146260886975673e-05,
"loss": 3.5466,
"step": 244000
},
{
"epoch": 2.45,
"learning_rate": 1.1046150765842426e-05,
"loss": 3.5367,
"step": 244500
},
{
"epoch": 2.45,
"learning_rate": 1.0946040644709181e-05,
"loss": 3.5408,
"step": 245000
},
{
"epoch": 2.46,
"learning_rate": 1.0845930523575935e-05,
"loss": 3.5473,
"step": 245500
},
{
"epoch": 2.46,
"learning_rate": 1.0745820402442686e-05,
"loss": 3.548,
"step": 246000
},
{
"epoch": 2.47,
"learning_rate": 1.064571028130944e-05,
"loss": 3.5384,
"step": 246500
},
{
"epoch": 2.47,
"learning_rate": 1.0545600160176195e-05,
"loss": 3.5336,
"step": 247000
},
{
"epoch": 2.48,
"learning_rate": 1.0445490039042948e-05,
"loss": 3.5411,
"step": 247500
},
{
"epoch": 2.48,
"learning_rate": 1.03453799179097e-05,
"loss": 3.5506,
"step": 248000
},
{
"epoch": 2.49,
"learning_rate": 1.0245269796776455e-05,
"loss": 3.548,
"step": 248500
},
{
"epoch": 2.49,
"learning_rate": 1.0145159675643208e-05,
"loss": 3.5342,
"step": 249000
},
{
"epoch": 2.5,
"learning_rate": 1.0045049554509962e-05,
"loss": 3.5415,
"step": 249500
},
{
"epoch": 2.5,
"learning_rate": 9.944939433376713e-06,
"loss": 3.5354,
"step": 250000
},
{
"epoch": 2.51,
"learning_rate": 9.844829312243468e-06,
"loss": 3.5313,
"step": 250500
},
{
"epoch": 2.51,
"learning_rate": 9.744719191110222e-06,
"loss": 3.5369,
"step": 251000
},
{
"epoch": 2.52,
"learning_rate": 9.644609069976975e-06,
"loss": 3.5452,
"step": 251500
},
{
"epoch": 2.52,
"learning_rate": 9.544498948843729e-06,
"loss": 3.5447,
"step": 252000
},
{
"epoch": 2.53,
"learning_rate": 9.444388827710482e-06,
"loss": 3.5509,
"step": 252500
},
{
"epoch": 2.53,
"learning_rate": 9.344278706577235e-06,
"loss": 3.538,
"step": 253000
},
{
"epoch": 2.54,
"learning_rate": 9.244168585443989e-06,
"loss": 3.5405,
"step": 253500
},
{
"epoch": 2.54,
"learning_rate": 9.144058464310742e-06,
"loss": 3.5293,
"step": 254000
},
{
"epoch": 2.55,
"learning_rate": 9.043948343177495e-06,
"loss": 3.5478,
"step": 254500
},
{
"epoch": 2.55,
"learning_rate": 8.943838222044249e-06,
"loss": 3.53,
"step": 255000
},
{
"epoch": 2.56,
"learning_rate": 8.843728100911002e-06,
"loss": 3.54,
"step": 255500
},
{
"epoch": 2.56,
"learning_rate": 8.743617979777756e-06,
"loss": 3.5342,
"step": 256000
},
{
"epoch": 2.57,
"learning_rate": 8.643507858644509e-06,
"loss": 3.5373,
"step": 256500
},
{
"epoch": 2.57,
"learning_rate": 8.543397737511264e-06,
"loss": 3.5341,
"step": 257000
},
{
"epoch": 2.58,
"learning_rate": 8.443287616378016e-06,
"loss": 3.5384,
"step": 257500
},
{
"epoch": 2.58,
"learning_rate": 8.343177495244769e-06,
"loss": 3.5415,
"step": 258000
},
{
"epoch": 2.59,
"learning_rate": 8.243067374111522e-06,
"loss": 3.5315,
"step": 258500
},
{
"epoch": 2.59,
"learning_rate": 8.142957252978277e-06,
"loss": 3.5401,
"step": 259000
},
{
"epoch": 2.6,
"learning_rate": 8.042847131845029e-06,
"loss": 3.541,
"step": 259500
},
{
"epoch": 2.6,
"learning_rate": 7.942737010711783e-06,
"loss": 3.5443,
"step": 260000
},
{
"epoch": 2.61,
"learning_rate": 7.842626889578538e-06,
"loss": 3.5413,
"step": 260500
},
{
"epoch": 2.61,
"learning_rate": 7.742516768445291e-06,
"loss": 3.5302,
"step": 261000
},
{
"epoch": 2.62,
"learning_rate": 7.642406647312043e-06,
"loss": 3.5429,
"step": 261500
},
{
"epoch": 2.62,
"learning_rate": 7.542296526178797e-06,
"loss": 3.5417,
"step": 262000
},
{
"epoch": 2.63,
"learning_rate": 7.442186405045551e-06,
"loss": 3.5402,
"step": 262500
},
{
"epoch": 2.63,
"learning_rate": 7.342076283912304e-06,
"loss": 3.5407,
"step": 263000
},
{
"epoch": 2.64,
"learning_rate": 7.241966162779057e-06,
"loss": 3.5383,
"step": 263500
},
{
"epoch": 2.64,
"learning_rate": 7.14185604164581e-06,
"loss": 3.5362,
"step": 264000
},
{
"epoch": 2.65,
"learning_rate": 7.041745920512564e-06,
"loss": 3.5428,
"step": 264500
},
{
"epoch": 2.65,
"learning_rate": 6.941635799379317e-06,
"loss": 3.5374,
"step": 265000
},
{
"epoch": 2.66,
"learning_rate": 6.8415256782460705e-06,
"loss": 3.5414,
"step": 265500
},
{
"epoch": 2.66,
"learning_rate": 6.741415557112825e-06,
"loss": 3.5317,
"step": 266000
},
{
"epoch": 2.67,
"learning_rate": 6.641305435979577e-06,
"loss": 3.54,
"step": 266500
},
{
"epoch": 2.67,
"learning_rate": 6.5411953148463315e-06,
"loss": 3.543,
"step": 267000
},
{
"epoch": 2.68,
"learning_rate": 6.441085193713084e-06,
"loss": 3.5407,
"step": 267500
},
{
"epoch": 2.68,
"learning_rate": 6.340975072579838e-06,
"loss": 3.5433,
"step": 268000
},
{
"epoch": 2.69,
"learning_rate": 6.240864951446592e-06,
"loss": 3.5359,
"step": 268500
},
{
"epoch": 2.69,
"learning_rate": 6.140754830313345e-06,
"loss": 3.5419,
"step": 269000
},
{
"epoch": 2.7,
"learning_rate": 6.040644709180098e-06,
"loss": 3.5488,
"step": 269500
},
{
"epoch": 2.7,
"learning_rate": 5.940534588046852e-06,
"loss": 3.5352,
"step": 270000
},
{
"epoch": 2.71,
"learning_rate": 5.840424466913605e-06,
"loss": 3.5328,
"step": 270500
},
{
"epoch": 2.71,
"learning_rate": 5.7403143457803585e-06,
"loss": 3.5397,
"step": 271000
},
{
"epoch": 2.72,
"learning_rate": 5.640204224647112e-06,
"loss": 3.529,
"step": 271500
},
{
"epoch": 2.72,
"learning_rate": 5.540094103513866e-06,
"loss": 3.5405,
"step": 272000
},
{
"epoch": 2.73,
"learning_rate": 5.439983982380619e-06,
"loss": 3.5309,
"step": 272500
},
{
"epoch": 2.73,
"learning_rate": 5.339873861247372e-06,
"loss": 3.5336,
"step": 273000
},
{
"epoch": 2.74,
"learning_rate": 5.239763740114125e-06,
"loss": 3.5471,
"step": 273500
},
{
"epoch": 2.74,
"learning_rate": 5.139653618980879e-06,
"loss": 3.5301,
"step": 274000
},
{
"epoch": 2.75,
"learning_rate": 5.039543497847633e-06,
"loss": 3.537,
"step": 274500
},
{
"epoch": 2.75,
"learning_rate": 4.9394333767143855e-06,
"loss": 3.5421,
"step": 275000
},
{
"epoch": 2.76,
"learning_rate": 4.83932325558114e-06,
"loss": 3.5365,
"step": 275500
},
{
"epoch": 2.76,
"learning_rate": 4.739213134447892e-06,
"loss": 3.5313,
"step": 276000
},
{
"epoch": 2.77,
"learning_rate": 4.6391030133146465e-06,
"loss": 3.5356,
"step": 276500
},
{
"epoch": 2.77,
"learning_rate": 4.5389928921814e-06,
"loss": 3.5387,
"step": 277000
},
{
"epoch": 2.78,
"learning_rate": 4.438882771048153e-06,
"loss": 3.5386,
"step": 277500
},
{
"epoch": 2.78,
"learning_rate": 4.338772649914907e-06,
"loss": 3.5426,
"step": 278000
},
{
"epoch": 2.79,
"learning_rate": 4.23866252878166e-06,
"loss": 3.5425,
"step": 278500
},
{
"epoch": 2.79,
"learning_rate": 4.138552407648413e-06,
"loss": 3.5396,
"step": 279000
},
{
"epoch": 2.8,
"learning_rate": 4.038442286515167e-06,
"loss": 3.5389,
"step": 279500
},
{
"epoch": 2.8,
"learning_rate": 3.93833216538192e-06,
"loss": 3.5308,
"step": 280000
},
{
"epoch": 2.81,
"learning_rate": 3.838222044248674e-06,
"loss": 3.5359,
"step": 280500
},
{
"epoch": 2.81,
"learning_rate": 3.738111923115427e-06,
"loss": 3.5365,
"step": 281000
},
{
"epoch": 2.82,
"learning_rate": 3.6380018019821807e-06,
"loss": 3.5287,
"step": 281500
},
{
"epoch": 2.82,
"learning_rate": 3.537891680848934e-06,
"loss": 3.5452,
"step": 282000
},
{
"epoch": 2.83,
"learning_rate": 3.4377815597156874e-06,
"loss": 3.545,
"step": 282500
},
{
"epoch": 2.83,
"learning_rate": 3.337671438582441e-06,
"loss": 3.5322,
"step": 283000
},
{
"epoch": 2.84,
"learning_rate": 3.237561317449194e-06,
"loss": 3.5277,
"step": 283500
},
{
"epoch": 2.84,
"learning_rate": 3.1374511963159475e-06,
"loss": 3.5443,
"step": 284000
},
{
"epoch": 2.85,
"learning_rate": 3.0373410751827013e-06,
"loss": 3.5467,
"step": 284500
},
{
"epoch": 2.85,
"learning_rate": 2.9372309540494547e-06,
"loss": 3.5383,
"step": 285000
},
{
"epoch": 2.86,
"learning_rate": 2.837120832916208e-06,
"loss": 3.5327,
"step": 285500
},
{
"epoch": 2.86,
"learning_rate": 2.7370107117829615e-06,
"loss": 3.534,
"step": 286000
},
{
"epoch": 2.87,
"learning_rate": 2.6369005906497144e-06,
"loss": 3.5382,
"step": 286500
},
{
"epoch": 2.87,
"learning_rate": 2.536790469516468e-06,
"loss": 3.5347,
"step": 287000
},
{
"epoch": 2.88,
"learning_rate": 2.4366803483832216e-06,
"loss": 3.5277,
"step": 287500
},
{
"epoch": 2.88,
"learning_rate": 2.336570227249975e-06,
"loss": 3.5319,
"step": 288000
},
{
"epoch": 2.89,
"learning_rate": 2.2364601061167284e-06,
"loss": 3.5436,
"step": 288500
},
{
"epoch": 2.89,
"learning_rate": 2.1363499849834817e-06,
"loss": 3.538,
"step": 289000
},
{
"epoch": 2.9,
"learning_rate": 2.036239863850235e-06,
"loss": 3.5365,
"step": 289500
},
{
"epoch": 2.9,
"learning_rate": 1.936129742716989e-06,
"loss": 3.5385,
"step": 290000
},
{
"epoch": 2.91,
"learning_rate": 1.836019621583742e-06,
"loss": 3.5317,
"step": 290500
},
{
"epoch": 2.91,
"learning_rate": 1.7359095004504957e-06,
"loss": 3.536,
"step": 291000
},
{
"epoch": 2.92,
"learning_rate": 1.635799379317249e-06,
"loss": 3.5383,
"step": 291500
},
{
"epoch": 2.92,
"learning_rate": 1.5356892581840024e-06,
"loss": 3.5316,
"step": 292000
},
{
"epoch": 2.93,
"learning_rate": 1.435579137050756e-06,
"loss": 3.5352,
"step": 292500
},
{
"epoch": 2.93,
"learning_rate": 1.3354690159175094e-06,
"loss": 3.5296,
"step": 293000
},
{
"epoch": 2.94,
"learning_rate": 1.2353588947842625e-06,
"loss": 3.5341,
"step": 293500
},
{
"epoch": 2.94,
"learning_rate": 1.1352487736510161e-06,
"loss": 3.5336,
"step": 294000
},
{
"epoch": 2.95,
"learning_rate": 1.0351386525177695e-06,
"loss": 3.5404,
"step": 294500
},
{
"epoch": 2.95,
"learning_rate": 9.35028531384523e-07,
"loss": 3.5433,
"step": 295000
},
{
"epoch": 2.96,
"learning_rate": 8.349184102512765e-07,
"loss": 3.5247,
"step": 295500
},
{
"epoch": 2.96,
"learning_rate": 7.348082891180298e-07,
"loss": 3.5389,
"step": 296000
},
{
"epoch": 2.97,
"learning_rate": 6.346981679847833e-07,
"loss": 3.539,
"step": 296500
},
{
"epoch": 2.97,
"learning_rate": 5.345880468515367e-07,
"loss": 3.5283,
"step": 297000
},
{
"epoch": 2.98,
"learning_rate": 4.3447792571829013e-07,
"loss": 3.5397,
"step": 297500
},
{
"epoch": 2.98,
"learning_rate": 3.3436780458504356e-07,
"loss": 3.5438,
"step": 298000
},
{
"epoch": 2.99,
"learning_rate": 2.34257683451797e-07,
"loss": 3.5265,
"step": 298500
},
{
"epoch": 2.99,
"learning_rate": 1.341475623185504e-07,
"loss": 3.5346,
"step": 299000
},
{
"epoch": 3.0,
"learning_rate": 3.403744118530384e-08,
"loss": 3.5366,
"step": 299500
},
{
"epoch": 3.0,
"step": 299670,
"total_flos": 4.00615718456918e+19,
"train_loss": 1.773998625050055,
"train_runtime": 174345.3711,
"train_samples_per_second": 55.003,
"train_steps_per_second": 1.719
}
],
"max_steps": 299670,
"num_train_epochs": 3,
"total_flos": 4.00615718456918e+19,
"trial_name": null,
"trial_params": null
}