ConfliBERT-Arabic-Scratch-65K-v2 / trainer_state.json
salsarra's picture
Upload 13 files
c1642d0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 768801,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.9967481832099594e-05,
"loss": 6.7212,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.993496366419919e-05,
"loss": 6.2046,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 4.9902445496298784e-05,
"loss": 6.1099,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.986992732839838e-05,
"loss": 6.0605,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.9837409160497974e-05,
"loss": 6.0447,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 4.980489099259757e-05,
"loss": 6.0184,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 4.9772372824697164e-05,
"loss": 5.9684,
"step": 3500
},
{
"epoch": 0.02,
"learning_rate": 4.9739854656796756e-05,
"loss": 5.9596,
"step": 4000
},
{
"epoch": 0.02,
"learning_rate": 4.970733648889635e-05,
"loss": 5.9604,
"step": 4500
},
{
"epoch": 0.02,
"learning_rate": 4.967481832099594e-05,
"loss": 5.9346,
"step": 5000
},
{
"epoch": 0.02,
"learning_rate": 4.964230015309554e-05,
"loss": 5.9298,
"step": 5500
},
{
"epoch": 0.02,
"learning_rate": 4.960978198519513e-05,
"loss": 5.9189,
"step": 6000
},
{
"epoch": 0.03,
"learning_rate": 4.957726381729472e-05,
"loss": 5.8765,
"step": 6500
},
{
"epoch": 0.03,
"learning_rate": 4.954474564939432e-05,
"loss": 5.8812,
"step": 7000
},
{
"epoch": 0.03,
"learning_rate": 4.951222748149391e-05,
"loss": 5.8845,
"step": 7500
},
{
"epoch": 0.03,
"learning_rate": 4.947970931359351e-05,
"loss": 5.867,
"step": 8000
},
{
"epoch": 0.03,
"learning_rate": 4.94471911456931e-05,
"loss": 5.8744,
"step": 8500
},
{
"epoch": 0.04,
"learning_rate": 4.94146729777927e-05,
"loss": 5.8506,
"step": 9000
},
{
"epoch": 0.04,
"learning_rate": 4.938215480989229e-05,
"loss": 5.8343,
"step": 9500
},
{
"epoch": 0.04,
"learning_rate": 4.934963664199188e-05,
"loss": 5.829,
"step": 10000
},
{
"epoch": 0.04,
"learning_rate": 4.931711847409148e-05,
"loss": 5.8303,
"step": 10500
},
{
"epoch": 0.04,
"learning_rate": 4.928460030619107e-05,
"loss": 5.8147,
"step": 11000
},
{
"epoch": 0.04,
"learning_rate": 4.9252082138290664e-05,
"loss": 5.8124,
"step": 11500
},
{
"epoch": 0.05,
"learning_rate": 4.9219563970390256e-05,
"loss": 5.8069,
"step": 12000
},
{
"epoch": 0.05,
"learning_rate": 4.9187045802489855e-05,
"loss": 5.8026,
"step": 12500
},
{
"epoch": 0.05,
"learning_rate": 4.9154527634589446e-05,
"loss": 5.8412,
"step": 13000
},
{
"epoch": 0.05,
"learning_rate": 4.912200946668904e-05,
"loss": 5.8021,
"step": 13500
},
{
"epoch": 0.05,
"learning_rate": 4.9089491298788636e-05,
"loss": 5.7984,
"step": 14000
},
{
"epoch": 0.06,
"learning_rate": 4.905697313088823e-05,
"loss": 5.7936,
"step": 14500
},
{
"epoch": 0.06,
"learning_rate": 4.9024454962987826e-05,
"loss": 5.7868,
"step": 15000
},
{
"epoch": 0.06,
"learning_rate": 4.899193679508742e-05,
"loss": 5.7637,
"step": 15500
},
{
"epoch": 0.06,
"learning_rate": 4.895941862718701e-05,
"loss": 5.7895,
"step": 16000
},
{
"epoch": 0.06,
"learning_rate": 4.892690045928661e-05,
"loss": 5.7957,
"step": 16500
},
{
"epoch": 0.07,
"learning_rate": 4.88943822913862e-05,
"loss": 5.7764,
"step": 17000
},
{
"epoch": 0.07,
"learning_rate": 4.88618641234858e-05,
"loss": 5.7723,
"step": 17500
},
{
"epoch": 0.07,
"learning_rate": 4.882934595558539e-05,
"loss": 5.763,
"step": 18000
},
{
"epoch": 0.07,
"learning_rate": 4.879682778768498e-05,
"loss": 5.7695,
"step": 18500
},
{
"epoch": 0.07,
"learning_rate": 4.876430961978457e-05,
"loss": 5.768,
"step": 19000
},
{
"epoch": 0.08,
"learning_rate": 4.8731791451884165e-05,
"loss": 5.7776,
"step": 19500
},
{
"epoch": 0.08,
"learning_rate": 4.869927328398376e-05,
"loss": 5.7606,
"step": 20000
},
{
"epoch": 0.08,
"learning_rate": 4.8666755116083355e-05,
"loss": 5.7308,
"step": 20500
},
{
"epoch": 0.08,
"learning_rate": 4.863423694818295e-05,
"loss": 5.7412,
"step": 21000
},
{
"epoch": 0.08,
"learning_rate": 4.8601718780282545e-05,
"loss": 5.7627,
"step": 21500
},
{
"epoch": 0.09,
"learning_rate": 4.8569200612382136e-05,
"loss": 5.7449,
"step": 22000
},
{
"epoch": 0.09,
"learning_rate": 4.8536682444481735e-05,
"loss": 5.7572,
"step": 22500
},
{
"epoch": 0.09,
"learning_rate": 4.8504164276581326e-05,
"loss": 5.7505,
"step": 23000
},
{
"epoch": 0.09,
"learning_rate": 4.8471646108680925e-05,
"loss": 5.7188,
"step": 23500
},
{
"epoch": 0.09,
"learning_rate": 4.8439127940780517e-05,
"loss": 5.7193,
"step": 24000
},
{
"epoch": 0.1,
"learning_rate": 4.8406609772880115e-05,
"loss": 5.7303,
"step": 24500
},
{
"epoch": 0.1,
"learning_rate": 4.8374091604979707e-05,
"loss": 5.7279,
"step": 25000
},
{
"epoch": 0.1,
"learning_rate": 4.83415734370793e-05,
"loss": 5.716,
"step": 25500
},
{
"epoch": 0.1,
"learning_rate": 4.83090552691789e-05,
"loss": 5.7237,
"step": 26000
},
{
"epoch": 0.1,
"learning_rate": 4.827653710127849e-05,
"loss": 5.7276,
"step": 26500
},
{
"epoch": 0.11,
"learning_rate": 4.824401893337808e-05,
"loss": 5.7271,
"step": 27000
},
{
"epoch": 0.11,
"learning_rate": 4.821150076547767e-05,
"loss": 5.7384,
"step": 27500
},
{
"epoch": 0.11,
"learning_rate": 4.817898259757727e-05,
"loss": 5.6949,
"step": 28000
},
{
"epoch": 0.11,
"learning_rate": 4.814646442967686e-05,
"loss": 5.7068,
"step": 28500
},
{
"epoch": 0.11,
"learning_rate": 4.811394626177645e-05,
"loss": 5.7147,
"step": 29000
},
{
"epoch": 0.12,
"learning_rate": 4.808142809387605e-05,
"loss": 5.7132,
"step": 29500
},
{
"epoch": 0.12,
"learning_rate": 4.804890992597564e-05,
"loss": 5.7256,
"step": 30000
},
{
"epoch": 0.12,
"learning_rate": 4.801639175807524e-05,
"loss": 5.7195,
"step": 30500
},
{
"epoch": 0.12,
"learning_rate": 4.7983873590174833e-05,
"loss": 5.7022,
"step": 31000
},
{
"epoch": 0.12,
"learning_rate": 4.7951355422274425e-05,
"loss": 5.6978,
"step": 31500
},
{
"epoch": 0.12,
"learning_rate": 4.7918837254374024e-05,
"loss": 5.674,
"step": 32000
},
{
"epoch": 0.13,
"learning_rate": 4.7886319086473615e-05,
"loss": 5.7044,
"step": 32500
},
{
"epoch": 0.13,
"learning_rate": 4.7853800918573214e-05,
"loss": 5.7076,
"step": 33000
},
{
"epoch": 0.13,
"learning_rate": 4.7821282750672805e-05,
"loss": 5.7146,
"step": 33500
},
{
"epoch": 0.13,
"learning_rate": 4.77887645827724e-05,
"loss": 5.6924,
"step": 34000
},
{
"epoch": 0.13,
"learning_rate": 4.775624641487199e-05,
"loss": 5.6972,
"step": 34500
},
{
"epoch": 0.14,
"learning_rate": 4.772372824697158e-05,
"loss": 5.6929,
"step": 35000
},
{
"epoch": 0.14,
"learning_rate": 4.769121007907118e-05,
"loss": 5.6553,
"step": 35500
},
{
"epoch": 0.14,
"learning_rate": 4.765869191117077e-05,
"loss": 5.6857,
"step": 36000
},
{
"epoch": 0.14,
"learning_rate": 4.762617374327037e-05,
"loss": 5.7021,
"step": 36500
},
{
"epoch": 0.14,
"learning_rate": 4.759365557536996e-05,
"loss": 5.695,
"step": 37000
},
{
"epoch": 0.15,
"learning_rate": 4.756113740746956e-05,
"loss": 5.7066,
"step": 37500
},
{
"epoch": 0.15,
"learning_rate": 4.752861923956915e-05,
"loss": 5.6919,
"step": 38000
},
{
"epoch": 0.15,
"learning_rate": 4.749610107166874e-05,
"loss": 5.6823,
"step": 38500
},
{
"epoch": 0.15,
"learning_rate": 4.746358290376834e-05,
"loss": 5.6699,
"step": 39000
},
{
"epoch": 0.15,
"learning_rate": 4.743106473586793e-05,
"loss": 5.6837,
"step": 39500
},
{
"epoch": 0.16,
"learning_rate": 4.739854656796753e-05,
"loss": 5.6773,
"step": 40000
},
{
"epoch": 0.16,
"learning_rate": 4.736602840006712e-05,
"loss": 5.6796,
"step": 40500
},
{
"epoch": 0.16,
"learning_rate": 4.7333510232166714e-05,
"loss": 5.6663,
"step": 41000
},
{
"epoch": 0.16,
"learning_rate": 4.7300992064266305e-05,
"loss": 5.6548,
"step": 41500
},
{
"epoch": 0.16,
"learning_rate": 4.72684738963659e-05,
"loss": 5.6853,
"step": 42000
},
{
"epoch": 0.17,
"learning_rate": 4.7235955728465495e-05,
"loss": 5.691,
"step": 42500
},
{
"epoch": 0.17,
"learning_rate": 4.720343756056509e-05,
"loss": 5.657,
"step": 43000
},
{
"epoch": 0.17,
"learning_rate": 4.7170919392664686e-05,
"loss": 5.6815,
"step": 43500
},
{
"epoch": 0.17,
"learning_rate": 4.713840122476428e-05,
"loss": 5.6602,
"step": 44000
},
{
"epoch": 0.17,
"learning_rate": 4.710588305686387e-05,
"loss": 5.6691,
"step": 44500
},
{
"epoch": 0.18,
"learning_rate": 4.707336488896347e-05,
"loss": 5.6767,
"step": 45000
},
{
"epoch": 0.18,
"learning_rate": 4.704084672106306e-05,
"loss": 5.6499,
"step": 45500
},
{
"epoch": 0.18,
"learning_rate": 4.700832855316266e-05,
"loss": 5.669,
"step": 46000
},
{
"epoch": 0.18,
"learning_rate": 4.697581038526225e-05,
"loss": 5.6641,
"step": 46500
},
{
"epoch": 0.18,
"learning_rate": 4.694329221736185e-05,
"loss": 5.6509,
"step": 47000
},
{
"epoch": 0.19,
"learning_rate": 4.691077404946144e-05,
"loss": 5.6501,
"step": 47500
},
{
"epoch": 0.19,
"learning_rate": 4.687825588156103e-05,
"loss": 5.6611,
"step": 48000
},
{
"epoch": 0.19,
"learning_rate": 4.684573771366062e-05,
"loss": 5.6626,
"step": 48500
},
{
"epoch": 0.19,
"learning_rate": 4.681321954576022e-05,
"loss": 5.6591,
"step": 49000
},
{
"epoch": 0.19,
"learning_rate": 4.678070137785981e-05,
"loss": 5.6519,
"step": 49500
},
{
"epoch": 0.2,
"learning_rate": 4.6748183209959404e-05,
"loss": 5.6552,
"step": 50000
},
{
"epoch": 0.2,
"learning_rate": 4.6715665042058996e-05,
"loss": 5.6542,
"step": 50500
},
{
"epoch": 0.2,
"learning_rate": 4.6683146874158594e-05,
"loss": 5.6521,
"step": 51000
},
{
"epoch": 0.2,
"learning_rate": 4.6650628706258186e-05,
"loss": 5.6631,
"step": 51500
},
{
"epoch": 0.2,
"learning_rate": 4.6618110538357784e-05,
"loss": 5.6296,
"step": 52000
},
{
"epoch": 0.2,
"learning_rate": 4.6585592370457376e-05,
"loss": 5.6541,
"step": 52500
},
{
"epoch": 0.21,
"learning_rate": 4.6553074202556974e-05,
"loss": 5.6373,
"step": 53000
},
{
"epoch": 0.21,
"learning_rate": 4.6520556034656566e-05,
"loss": 5.6358,
"step": 53500
},
{
"epoch": 0.21,
"learning_rate": 4.648803786675616e-05,
"loss": 5.6654,
"step": 54000
},
{
"epoch": 0.21,
"learning_rate": 4.6455519698855756e-05,
"loss": 5.6481,
"step": 54500
},
{
"epoch": 0.21,
"learning_rate": 4.642300153095535e-05,
"loss": 5.6545,
"step": 55000
},
{
"epoch": 0.22,
"learning_rate": 4.6390483363054946e-05,
"loss": 5.6379,
"step": 55500
},
{
"epoch": 0.22,
"learning_rate": 4.635796519515454e-05,
"loss": 5.6537,
"step": 56000
},
{
"epoch": 0.22,
"learning_rate": 4.632544702725413e-05,
"loss": 5.6611,
"step": 56500
},
{
"epoch": 0.22,
"learning_rate": 4.629292885935372e-05,
"loss": 5.6639,
"step": 57000
},
{
"epoch": 0.22,
"learning_rate": 4.626041069145331e-05,
"loss": 5.6579,
"step": 57500
},
{
"epoch": 0.23,
"learning_rate": 4.622789252355291e-05,
"loss": 5.6714,
"step": 58000
},
{
"epoch": 0.23,
"learning_rate": 4.61953743556525e-05,
"loss": 5.6345,
"step": 58500
},
{
"epoch": 0.23,
"learning_rate": 4.61628561877521e-05,
"loss": 5.6457,
"step": 59000
},
{
"epoch": 0.23,
"learning_rate": 4.613033801985169e-05,
"loss": 5.6613,
"step": 59500
},
{
"epoch": 0.23,
"learning_rate": 4.6097819851951284e-05,
"loss": 5.6517,
"step": 60000
},
{
"epoch": 0.24,
"learning_rate": 4.606530168405088e-05,
"loss": 5.6328,
"step": 60500
},
{
"epoch": 0.24,
"learning_rate": 4.6032783516150474e-05,
"loss": 5.6311,
"step": 61000
},
{
"epoch": 0.24,
"learning_rate": 4.600026534825007e-05,
"loss": 5.6228,
"step": 61500
},
{
"epoch": 0.24,
"learning_rate": 4.5967747180349665e-05,
"loss": 5.6283,
"step": 62000
},
{
"epoch": 0.24,
"learning_rate": 4.593522901244926e-05,
"loss": 5.656,
"step": 62500
},
{
"epoch": 0.25,
"learning_rate": 4.5902710844548855e-05,
"loss": 5.6389,
"step": 63000
},
{
"epoch": 0.25,
"learning_rate": 4.5870192676648446e-05,
"loss": 5.6361,
"step": 63500
},
{
"epoch": 0.25,
"learning_rate": 4.583767450874804e-05,
"loss": 5.6305,
"step": 64000
},
{
"epoch": 0.25,
"learning_rate": 4.580515634084763e-05,
"loss": 5.6367,
"step": 64500
},
{
"epoch": 0.25,
"learning_rate": 4.577263817294723e-05,
"loss": 5.6393,
"step": 65000
},
{
"epoch": 0.26,
"learning_rate": 4.574012000504682e-05,
"loss": 5.6073,
"step": 65500
},
{
"epoch": 0.26,
"learning_rate": 4.570760183714641e-05,
"loss": 5.6139,
"step": 66000
},
{
"epoch": 0.26,
"learning_rate": 4.567508366924601e-05,
"loss": 5.619,
"step": 66500
},
{
"epoch": 0.26,
"learning_rate": 4.56425655013456e-05,
"loss": 5.6307,
"step": 67000
},
{
"epoch": 0.26,
"learning_rate": 4.56100473334452e-05,
"loss": 5.634,
"step": 67500
},
{
"epoch": 0.27,
"learning_rate": 4.557752916554479e-05,
"loss": 5.6234,
"step": 68000
},
{
"epoch": 0.27,
"learning_rate": 4.554501099764439e-05,
"loss": 5.6219,
"step": 68500
},
{
"epoch": 0.27,
"learning_rate": 4.551249282974398e-05,
"loss": 5.6326,
"step": 69000
},
{
"epoch": 0.27,
"learning_rate": 4.547997466184357e-05,
"loss": 5.6293,
"step": 69500
},
{
"epoch": 0.27,
"learning_rate": 4.544745649394317e-05,
"loss": 5.6153,
"step": 70000
},
{
"epoch": 0.28,
"learning_rate": 4.541493832604276e-05,
"loss": 5.6193,
"step": 70500
},
{
"epoch": 0.28,
"learning_rate": 4.5382420158142355e-05,
"loss": 5.62,
"step": 71000
},
{
"epoch": 0.28,
"learning_rate": 4.5349901990241946e-05,
"loss": 5.6276,
"step": 71500
},
{
"epoch": 0.28,
"learning_rate": 4.5317383822341545e-05,
"loss": 5.6065,
"step": 72000
},
{
"epoch": 0.28,
"learning_rate": 4.5284865654441136e-05,
"loss": 5.6008,
"step": 72500
},
{
"epoch": 0.28,
"learning_rate": 4.525234748654073e-05,
"loss": 5.624,
"step": 73000
},
{
"epoch": 0.29,
"learning_rate": 4.5219829318640327e-05,
"loss": 5.6111,
"step": 73500
},
{
"epoch": 0.29,
"learning_rate": 4.518731115073992e-05,
"loss": 5.6309,
"step": 74000
},
{
"epoch": 0.29,
"learning_rate": 4.5154792982839517e-05,
"loss": 5.6356,
"step": 74500
},
{
"epoch": 0.29,
"learning_rate": 4.512227481493911e-05,
"loss": 5.6045,
"step": 75000
},
{
"epoch": 0.29,
"learning_rate": 4.50897566470387e-05,
"loss": 5.6079,
"step": 75500
},
{
"epoch": 0.3,
"learning_rate": 4.50572384791383e-05,
"loss": 5.609,
"step": 76000
},
{
"epoch": 0.3,
"learning_rate": 4.502472031123789e-05,
"loss": 5.6262,
"step": 76500
},
{
"epoch": 0.3,
"learning_rate": 4.499220214333749e-05,
"loss": 5.5966,
"step": 77000
},
{
"epoch": 0.3,
"learning_rate": 4.495968397543708e-05,
"loss": 5.598,
"step": 77500
},
{
"epoch": 0.3,
"learning_rate": 4.492716580753668e-05,
"loss": 5.618,
"step": 78000
},
{
"epoch": 0.31,
"learning_rate": 4.489464763963627e-05,
"loss": 5.6125,
"step": 78500
},
{
"epoch": 0.31,
"learning_rate": 4.486212947173586e-05,
"loss": 5.6182,
"step": 79000
},
{
"epoch": 0.31,
"learning_rate": 4.482961130383545e-05,
"loss": 5.5995,
"step": 79500
},
{
"epoch": 0.31,
"learning_rate": 4.4797093135935045e-05,
"loss": 5.6079,
"step": 80000
},
{
"epoch": 0.31,
"learning_rate": 4.4764574968034643e-05,
"loss": 5.6138,
"step": 80500
},
{
"epoch": 0.32,
"learning_rate": 4.4732056800134235e-05,
"loss": 5.6076,
"step": 81000
},
{
"epoch": 0.32,
"learning_rate": 4.4699538632233834e-05,
"loss": 5.598,
"step": 81500
},
{
"epoch": 0.32,
"learning_rate": 4.4667020464333425e-05,
"loss": 5.6038,
"step": 82000
},
{
"epoch": 0.32,
"learning_rate": 4.463450229643302e-05,
"loss": 5.5977,
"step": 82500
},
{
"epoch": 0.32,
"learning_rate": 4.4601984128532615e-05,
"loss": 5.621,
"step": 83000
},
{
"epoch": 0.33,
"learning_rate": 4.456946596063221e-05,
"loss": 5.6076,
"step": 83500
},
{
"epoch": 0.33,
"learning_rate": 4.4536947792731805e-05,
"loss": 5.6156,
"step": 84000
},
{
"epoch": 0.33,
"learning_rate": 4.45044296248314e-05,
"loss": 5.6229,
"step": 84500
},
{
"epoch": 0.33,
"learning_rate": 4.447191145693099e-05,
"loss": 5.5821,
"step": 85000
},
{
"epoch": 0.33,
"learning_rate": 4.443939328903059e-05,
"loss": 5.5937,
"step": 85500
},
{
"epoch": 0.34,
"learning_rate": 4.440687512113018e-05,
"loss": 5.5981,
"step": 86000
},
{
"epoch": 0.34,
"learning_rate": 4.437435695322977e-05,
"loss": 5.6175,
"step": 86500
},
{
"epoch": 0.34,
"learning_rate": 4.434183878532936e-05,
"loss": 5.5978,
"step": 87000
},
{
"epoch": 0.34,
"learning_rate": 4.430932061742896e-05,
"loss": 5.5959,
"step": 87500
},
{
"epoch": 0.34,
"learning_rate": 4.427680244952855e-05,
"loss": 5.5779,
"step": 88000
},
{
"epoch": 0.35,
"learning_rate": 4.4244284281628144e-05,
"loss": 5.6002,
"step": 88500
},
{
"epoch": 0.35,
"learning_rate": 4.421176611372774e-05,
"loss": 5.6177,
"step": 89000
},
{
"epoch": 0.35,
"learning_rate": 4.4179247945827334e-05,
"loss": 5.6038,
"step": 89500
},
{
"epoch": 0.35,
"learning_rate": 4.414672977792693e-05,
"loss": 5.6033,
"step": 90000
},
{
"epoch": 0.35,
"learning_rate": 4.4114211610026524e-05,
"loss": 5.6066,
"step": 90500
},
{
"epoch": 0.36,
"learning_rate": 4.408169344212612e-05,
"loss": 5.605,
"step": 91000
},
{
"epoch": 0.36,
"learning_rate": 4.4049175274225714e-05,
"loss": 5.613,
"step": 91500
},
{
"epoch": 0.36,
"learning_rate": 4.4016657106325305e-05,
"loss": 5.5924,
"step": 92000
},
{
"epoch": 0.36,
"learning_rate": 4.3984138938424904e-05,
"loss": 5.5983,
"step": 92500
},
{
"epoch": 0.36,
"learning_rate": 4.3951620770524496e-05,
"loss": 5.5947,
"step": 93000
},
{
"epoch": 0.36,
"learning_rate": 4.391910260262409e-05,
"loss": 5.5848,
"step": 93500
},
{
"epoch": 0.37,
"learning_rate": 4.388658443472368e-05,
"loss": 5.5853,
"step": 94000
},
{
"epoch": 0.37,
"learning_rate": 4.385406626682327e-05,
"loss": 5.5949,
"step": 94500
},
{
"epoch": 0.37,
"learning_rate": 4.382154809892287e-05,
"loss": 5.5795,
"step": 95000
},
{
"epoch": 0.37,
"learning_rate": 4.378902993102246e-05,
"loss": 5.5979,
"step": 95500
},
{
"epoch": 0.37,
"learning_rate": 4.375651176312206e-05,
"loss": 5.5873,
"step": 96000
},
{
"epoch": 0.38,
"learning_rate": 4.372399359522165e-05,
"loss": 5.6173,
"step": 96500
},
{
"epoch": 0.38,
"learning_rate": 4.369147542732125e-05,
"loss": 5.6014,
"step": 97000
},
{
"epoch": 0.38,
"learning_rate": 4.365895725942084e-05,
"loss": 5.597,
"step": 97500
},
{
"epoch": 0.38,
"learning_rate": 4.362643909152043e-05,
"loss": 5.5682,
"step": 98000
},
{
"epoch": 0.38,
"learning_rate": 4.359392092362003e-05,
"loss": 5.5819,
"step": 98500
},
{
"epoch": 0.39,
"learning_rate": 4.356140275571962e-05,
"loss": 5.5784,
"step": 99000
},
{
"epoch": 0.39,
"learning_rate": 4.352888458781922e-05,
"loss": 5.5703,
"step": 99500
},
{
"epoch": 0.39,
"learning_rate": 4.349636641991881e-05,
"loss": 5.5705,
"step": 100000
},
{
"epoch": 0.39,
"learning_rate": 4.3463848252018404e-05,
"loss": 5.5742,
"step": 100500
},
{
"epoch": 0.39,
"learning_rate": 4.3431330084118e-05,
"loss": 5.6006,
"step": 101000
},
{
"epoch": 0.4,
"learning_rate": 4.3398811916217594e-05,
"loss": 5.5678,
"step": 101500
},
{
"epoch": 0.4,
"learning_rate": 4.3366293748317186e-05,
"loss": 5.5967,
"step": 102000
},
{
"epoch": 0.4,
"learning_rate": 4.333377558041678e-05,
"loss": 5.5938,
"step": 102500
},
{
"epoch": 0.4,
"learning_rate": 4.3301257412516376e-05,
"loss": 5.5844,
"step": 103000
},
{
"epoch": 0.4,
"learning_rate": 4.326873924461597e-05,
"loss": 5.5882,
"step": 103500
},
{
"epoch": 0.41,
"learning_rate": 4.323622107671556e-05,
"loss": 5.5708,
"step": 104000
},
{
"epoch": 0.41,
"learning_rate": 4.320370290881516e-05,
"loss": 5.5687,
"step": 104500
},
{
"epoch": 0.41,
"learning_rate": 4.317118474091475e-05,
"loss": 5.5928,
"step": 105000
},
{
"epoch": 0.41,
"learning_rate": 4.313866657301435e-05,
"loss": 5.5926,
"step": 105500
},
{
"epoch": 0.41,
"learning_rate": 4.310614840511394e-05,
"loss": 5.577,
"step": 106000
},
{
"epoch": 0.42,
"learning_rate": 4.307363023721354e-05,
"loss": 5.5642,
"step": 106500
},
{
"epoch": 0.42,
"learning_rate": 4.304111206931313e-05,
"loss": 5.5927,
"step": 107000
},
{
"epoch": 0.42,
"learning_rate": 4.300859390141272e-05,
"loss": 5.5954,
"step": 107500
},
{
"epoch": 0.42,
"learning_rate": 4.297607573351232e-05,
"loss": 5.5789,
"step": 108000
},
{
"epoch": 0.42,
"learning_rate": 4.294355756561191e-05,
"loss": 5.5541,
"step": 108500
},
{
"epoch": 0.43,
"learning_rate": 4.29110393977115e-05,
"loss": 5.5594,
"step": 109000
},
{
"epoch": 0.43,
"learning_rate": 4.2878521229811094e-05,
"loss": 5.5868,
"step": 109500
},
{
"epoch": 0.43,
"learning_rate": 4.2846003061910686e-05,
"loss": 5.5632,
"step": 110000
},
{
"epoch": 0.43,
"learning_rate": 4.2813484894010284e-05,
"loss": 5.5895,
"step": 110500
},
{
"epoch": 0.43,
"learning_rate": 4.2780966726109876e-05,
"loss": 5.594,
"step": 111000
},
{
"epoch": 0.44,
"learning_rate": 4.2748448558209475e-05,
"loss": 5.5825,
"step": 111500
},
{
"epoch": 0.44,
"learning_rate": 4.2715930390309066e-05,
"loss": 5.588,
"step": 112000
},
{
"epoch": 0.44,
"learning_rate": 4.2683412222408665e-05,
"loss": 5.5711,
"step": 112500
},
{
"epoch": 0.44,
"learning_rate": 4.2650894054508256e-05,
"loss": 5.5995,
"step": 113000
},
{
"epoch": 0.44,
"learning_rate": 4.261837588660785e-05,
"loss": 5.571,
"step": 113500
},
{
"epoch": 0.44,
"learning_rate": 4.2585857718707446e-05,
"loss": 5.5743,
"step": 114000
},
{
"epoch": 0.45,
"learning_rate": 4.255333955080704e-05,
"loss": 5.5766,
"step": 114500
},
{
"epoch": 0.45,
"learning_rate": 4.2520821382906636e-05,
"loss": 5.5705,
"step": 115000
},
{
"epoch": 0.45,
"learning_rate": 4.248830321500623e-05,
"loss": 5.5684,
"step": 115500
},
{
"epoch": 0.45,
"learning_rate": 4.245578504710582e-05,
"loss": 5.5593,
"step": 116000
},
{
"epoch": 0.45,
"learning_rate": 4.242326687920541e-05,
"loss": 5.5969,
"step": 116500
},
{
"epoch": 0.46,
"learning_rate": 4.2390748711305e-05,
"loss": 5.5724,
"step": 117000
},
{
"epoch": 0.46,
"learning_rate": 4.23582305434046e-05,
"loss": 5.5601,
"step": 117500
},
{
"epoch": 0.46,
"learning_rate": 4.232571237550419e-05,
"loss": 5.572,
"step": 118000
},
{
"epoch": 0.46,
"learning_rate": 4.229319420760379e-05,
"loss": 5.5733,
"step": 118500
},
{
"epoch": 0.46,
"learning_rate": 4.226067603970338e-05,
"loss": 5.5782,
"step": 119000
},
{
"epoch": 0.47,
"learning_rate": 4.2228157871802975e-05,
"loss": 5.5429,
"step": 119500
},
{
"epoch": 0.47,
"learning_rate": 4.219563970390257e-05,
"loss": 5.5873,
"step": 120000
},
{
"epoch": 0.47,
"learning_rate": 4.2163121536002165e-05,
"loss": 5.5707,
"step": 120500
},
{
"epoch": 0.47,
"learning_rate": 4.213060336810176e-05,
"loss": 5.5741,
"step": 121000
},
{
"epoch": 0.47,
"learning_rate": 4.2098085200201355e-05,
"loss": 5.5847,
"step": 121500
},
{
"epoch": 0.48,
"learning_rate": 4.206556703230095e-05,
"loss": 5.5845,
"step": 122000
},
{
"epoch": 0.48,
"learning_rate": 4.2033048864400545e-05,
"loss": 5.5769,
"step": 122500
},
{
"epoch": 0.48,
"learning_rate": 4.2000530696500137e-05,
"loss": 5.5777,
"step": 123000
},
{
"epoch": 0.48,
"learning_rate": 4.1968012528599735e-05,
"loss": 5.5624,
"step": 123500
},
{
"epoch": 0.48,
"learning_rate": 4.1935494360699327e-05,
"loss": 5.5753,
"step": 124000
},
{
"epoch": 0.49,
"learning_rate": 4.190297619279892e-05,
"loss": 5.572,
"step": 124500
},
{
"epoch": 0.49,
"learning_rate": 4.187045802489851e-05,
"loss": 5.5825,
"step": 125000
},
{
"epoch": 0.49,
"learning_rate": 4.183793985699811e-05,
"loss": 5.5775,
"step": 125500
},
{
"epoch": 0.49,
"learning_rate": 4.18054216890977e-05,
"loss": 5.5575,
"step": 126000
},
{
"epoch": 0.49,
"learning_rate": 4.177290352119729e-05,
"loss": 5.5643,
"step": 126500
},
{
"epoch": 0.5,
"learning_rate": 4.174038535329689e-05,
"loss": 5.5624,
"step": 127000
},
{
"epoch": 0.5,
"learning_rate": 4.170786718539648e-05,
"loss": 5.57,
"step": 127500
},
{
"epoch": 0.5,
"learning_rate": 4.167534901749608e-05,
"loss": 5.581,
"step": 128000
},
{
"epoch": 0.5,
"learning_rate": 4.164283084959567e-05,
"loss": 5.5746,
"step": 128500
},
{
"epoch": 0.5,
"learning_rate": 4.161031268169526e-05,
"loss": 5.5536,
"step": 129000
},
{
"epoch": 0.51,
"learning_rate": 4.157779451379486e-05,
"loss": 5.5706,
"step": 129500
},
{
"epoch": 0.51,
"learning_rate": 4.1545276345894453e-05,
"loss": 5.5287,
"step": 130000
},
{
"epoch": 0.51,
"learning_rate": 4.151275817799405e-05,
"loss": 5.5688,
"step": 130500
},
{
"epoch": 0.51,
"learning_rate": 4.1480240010093644e-05,
"loss": 5.5577,
"step": 131000
},
{
"epoch": 0.51,
"learning_rate": 4.1447721842193235e-05,
"loss": 5.5561,
"step": 131500
},
{
"epoch": 0.52,
"learning_rate": 4.141520367429283e-05,
"loss": 5.5381,
"step": 132000
},
{
"epoch": 0.52,
"learning_rate": 4.138268550639242e-05,
"loss": 5.5509,
"step": 132500
},
{
"epoch": 0.52,
"learning_rate": 4.135016733849202e-05,
"loss": 5.5589,
"step": 133000
},
{
"epoch": 0.52,
"learning_rate": 4.131764917059161e-05,
"loss": 5.5433,
"step": 133500
},
{
"epoch": 0.52,
"learning_rate": 4.128513100269121e-05,
"loss": 5.5809,
"step": 134000
},
{
"epoch": 0.52,
"learning_rate": 4.12526128347908e-05,
"loss": 5.5478,
"step": 134500
},
{
"epoch": 0.53,
"learning_rate": 4.122009466689039e-05,
"loss": 5.5663,
"step": 135000
},
{
"epoch": 0.53,
"learning_rate": 4.118757649898999e-05,
"loss": 5.529,
"step": 135500
},
{
"epoch": 0.53,
"learning_rate": 4.115505833108958e-05,
"loss": 5.5764,
"step": 136000
},
{
"epoch": 0.53,
"learning_rate": 4.112254016318918e-05,
"loss": 5.5561,
"step": 136500
},
{
"epoch": 0.53,
"learning_rate": 4.109002199528877e-05,
"loss": 5.5781,
"step": 137000
},
{
"epoch": 0.54,
"learning_rate": 4.105750382738837e-05,
"loss": 5.572,
"step": 137500
},
{
"epoch": 0.54,
"learning_rate": 4.102498565948796e-05,
"loss": 5.5631,
"step": 138000
},
{
"epoch": 0.54,
"learning_rate": 4.099246749158755e-05,
"loss": 5.5649,
"step": 138500
},
{
"epoch": 0.54,
"learning_rate": 4.0959949323687144e-05,
"loss": 5.5553,
"step": 139000
},
{
"epoch": 0.54,
"learning_rate": 4.0927431155786735e-05,
"loss": 5.5324,
"step": 139500
},
{
"epoch": 0.55,
"learning_rate": 4.0894912987886334e-05,
"loss": 5.5562,
"step": 140000
},
{
"epoch": 0.55,
"learning_rate": 4.0862394819985925e-05,
"loss": 5.5345,
"step": 140500
},
{
"epoch": 0.55,
"learning_rate": 4.0829876652085524e-05,
"loss": 5.5513,
"step": 141000
},
{
"epoch": 0.55,
"learning_rate": 4.0797358484185115e-05,
"loss": 5.5401,
"step": 141500
},
{
"epoch": 0.55,
"learning_rate": 4.076484031628471e-05,
"loss": 5.5566,
"step": 142000
},
{
"epoch": 0.56,
"learning_rate": 4.0732322148384306e-05,
"loss": 5.538,
"step": 142500
},
{
"epoch": 0.56,
"learning_rate": 4.06998039804839e-05,
"loss": 5.5495,
"step": 143000
},
{
"epoch": 0.56,
"learning_rate": 4.0667285812583496e-05,
"loss": 5.5551,
"step": 143500
},
{
"epoch": 0.56,
"learning_rate": 4.063476764468309e-05,
"loss": 5.5617,
"step": 144000
},
{
"epoch": 0.56,
"learning_rate": 4.060224947678268e-05,
"loss": 5.5654,
"step": 144500
},
{
"epoch": 0.57,
"learning_rate": 4.056973130888228e-05,
"loss": 5.5485,
"step": 145000
},
{
"epoch": 0.57,
"learning_rate": 4.053721314098187e-05,
"loss": 5.5328,
"step": 145500
},
{
"epoch": 0.57,
"learning_rate": 4.050469497308146e-05,
"loss": 5.5312,
"step": 146000
},
{
"epoch": 0.57,
"learning_rate": 4.047217680518106e-05,
"loss": 5.5306,
"step": 146500
},
{
"epoch": 0.57,
"learning_rate": 4.043965863728065e-05,
"loss": 5.5662,
"step": 147000
},
{
"epoch": 0.58,
"learning_rate": 4.040714046938024e-05,
"loss": 5.531,
"step": 147500
},
{
"epoch": 0.58,
"learning_rate": 4.0374622301479834e-05,
"loss": 5.5477,
"step": 148000
},
{
"epoch": 0.58,
"learning_rate": 4.034210413357943e-05,
"loss": 5.5436,
"step": 148500
},
{
"epoch": 0.58,
"learning_rate": 4.0309585965679024e-05,
"loss": 5.5617,
"step": 149000
},
{
"epoch": 0.58,
"learning_rate": 4.027706779777862e-05,
"loss": 5.5636,
"step": 149500
},
{
"epoch": 0.59,
"learning_rate": 4.0244549629878214e-05,
"loss": 5.5282,
"step": 150000
},
{
"epoch": 0.59,
"learning_rate": 4.021203146197781e-05,
"loss": 5.55,
"step": 150500
},
{
"epoch": 0.59,
"learning_rate": 4.0179513294077404e-05,
"loss": 5.5378,
"step": 151000
},
{
"epoch": 0.59,
"learning_rate": 4.0146995126176996e-05,
"loss": 5.5463,
"step": 151500
},
{
"epoch": 0.59,
"learning_rate": 4.0114476958276594e-05,
"loss": 5.5292,
"step": 152000
},
{
"epoch": 0.6,
"learning_rate": 4.0081958790376186e-05,
"loss": 5.5282,
"step": 152500
},
{
"epoch": 0.6,
"learning_rate": 4.0049440622475784e-05,
"loss": 5.5575,
"step": 153000
},
{
"epoch": 0.6,
"learning_rate": 4.0016922454575376e-05,
"loss": 5.5299,
"step": 153500
},
{
"epoch": 0.6,
"learning_rate": 3.998440428667497e-05,
"loss": 5.55,
"step": 154000
},
{
"epoch": 0.6,
"learning_rate": 3.995188611877456e-05,
"loss": 5.5286,
"step": 154500
},
{
"epoch": 0.6,
"learning_rate": 3.991936795087415e-05,
"loss": 5.5405,
"step": 155000
},
{
"epoch": 0.61,
"learning_rate": 3.988684978297375e-05,
"loss": 5.5244,
"step": 155500
},
{
"epoch": 0.61,
"learning_rate": 3.985433161507334e-05,
"loss": 5.5208,
"step": 156000
},
{
"epoch": 0.61,
"learning_rate": 3.982181344717294e-05,
"loss": 5.5472,
"step": 156500
},
{
"epoch": 0.61,
"learning_rate": 3.978929527927253e-05,
"loss": 5.5582,
"step": 157000
},
{
"epoch": 0.61,
"learning_rate": 3.975677711137212e-05,
"loss": 5.5539,
"step": 157500
},
{
"epoch": 0.62,
"learning_rate": 3.972425894347172e-05,
"loss": 5.5463,
"step": 158000
},
{
"epoch": 0.62,
"learning_rate": 3.969174077557131e-05,
"loss": 5.5498,
"step": 158500
},
{
"epoch": 0.62,
"learning_rate": 3.965922260767091e-05,
"loss": 5.5299,
"step": 159000
},
{
"epoch": 0.62,
"learning_rate": 3.96267044397705e-05,
"loss": 5.5384,
"step": 159500
},
{
"epoch": 0.62,
"learning_rate": 3.95941862718701e-05,
"loss": 5.5288,
"step": 160000
},
{
"epoch": 0.63,
"learning_rate": 3.956166810396969e-05,
"loss": 5.5213,
"step": 160500
},
{
"epoch": 0.63,
"learning_rate": 3.9529149936069285e-05,
"loss": 5.5386,
"step": 161000
},
{
"epoch": 0.63,
"learning_rate": 3.9496631768168876e-05,
"loss": 5.5413,
"step": 161500
},
{
"epoch": 0.63,
"learning_rate": 3.946411360026847e-05,
"loss": 5.5387,
"step": 162000
},
{
"epoch": 0.63,
"learning_rate": 3.9431595432368066e-05,
"loss": 5.5342,
"step": 162500
},
{
"epoch": 0.64,
"learning_rate": 3.939907726446766e-05,
"loss": 5.5485,
"step": 163000
},
{
"epoch": 0.64,
"learning_rate": 3.936655909656725e-05,
"loss": 5.532,
"step": 163500
},
{
"epoch": 0.64,
"learning_rate": 3.933404092866685e-05,
"loss": 5.518,
"step": 164000
},
{
"epoch": 0.64,
"learning_rate": 3.930152276076644e-05,
"loss": 5.5319,
"step": 164500
},
{
"epoch": 0.64,
"learning_rate": 3.926900459286604e-05,
"loss": 5.5335,
"step": 165000
},
{
"epoch": 0.65,
"learning_rate": 3.923648642496563e-05,
"loss": 5.5488,
"step": 165500
},
{
"epoch": 0.65,
"learning_rate": 3.920396825706523e-05,
"loss": 5.5252,
"step": 166000
},
{
"epoch": 0.65,
"learning_rate": 3.917145008916482e-05,
"loss": 5.5141,
"step": 166500
},
{
"epoch": 0.65,
"learning_rate": 3.913893192126441e-05,
"loss": 5.5417,
"step": 167000
},
{
"epoch": 0.65,
"learning_rate": 3.910641375336401e-05,
"loss": 5.5355,
"step": 167500
},
{
"epoch": 0.66,
"learning_rate": 3.90738955854636e-05,
"loss": 5.5415,
"step": 168000
},
{
"epoch": 0.66,
"learning_rate": 3.904137741756319e-05,
"loss": 5.534,
"step": 168500
},
{
"epoch": 0.66,
"learning_rate": 3.9008859249662785e-05,
"loss": 5.5257,
"step": 169000
},
{
"epoch": 0.66,
"learning_rate": 3.897634108176238e-05,
"loss": 5.5221,
"step": 169500
},
{
"epoch": 0.66,
"learning_rate": 3.8943822913861975e-05,
"loss": 5.5364,
"step": 170000
},
{
"epoch": 0.67,
"learning_rate": 3.8911304745961566e-05,
"loss": 5.5296,
"step": 170500
},
{
"epoch": 0.67,
"learning_rate": 3.8878786578061165e-05,
"loss": 5.5197,
"step": 171000
},
{
"epoch": 0.67,
"learning_rate": 3.8846268410160756e-05,
"loss": 5.5615,
"step": 171500
},
{
"epoch": 0.67,
"learning_rate": 3.8813750242260355e-05,
"loss": 5.5354,
"step": 172000
},
{
"epoch": 0.67,
"learning_rate": 3.8781232074359947e-05,
"loss": 5.5192,
"step": 172500
},
{
"epoch": 0.68,
"learning_rate": 3.874871390645954e-05,
"loss": 5.5497,
"step": 173000
},
{
"epoch": 0.68,
"learning_rate": 3.8716195738559137e-05,
"loss": 5.5419,
"step": 173500
},
{
"epoch": 0.68,
"learning_rate": 3.868367757065873e-05,
"loss": 5.5366,
"step": 174000
},
{
"epoch": 0.68,
"learning_rate": 3.865115940275833e-05,
"loss": 5.5031,
"step": 174500
},
{
"epoch": 0.68,
"learning_rate": 3.861864123485792e-05,
"loss": 5.5425,
"step": 175000
},
{
"epoch": 0.68,
"learning_rate": 3.858612306695752e-05,
"loss": 5.5195,
"step": 175500
},
{
"epoch": 0.69,
"learning_rate": 3.855360489905711e-05,
"loss": 5.5235,
"step": 176000
},
{
"epoch": 0.69,
"learning_rate": 3.85210867311567e-05,
"loss": 5.5299,
"step": 176500
},
{
"epoch": 0.69,
"learning_rate": 3.848856856325629e-05,
"loss": 5.5233,
"step": 177000
},
{
"epoch": 0.69,
"learning_rate": 3.845605039535588e-05,
"loss": 5.5386,
"step": 177500
},
{
"epoch": 0.69,
"learning_rate": 3.842353222745548e-05,
"loss": 5.5233,
"step": 178000
},
{
"epoch": 0.7,
"learning_rate": 3.839101405955507e-05,
"loss": 5.5128,
"step": 178500
},
{
"epoch": 0.7,
"learning_rate": 3.8358495891654665e-05,
"loss": 5.5286,
"step": 179000
},
{
"epoch": 0.7,
"learning_rate": 3.8325977723754263e-05,
"loss": 5.5022,
"step": 179500
},
{
"epoch": 0.7,
"learning_rate": 3.8293459555853855e-05,
"loss": 5.523,
"step": 180000
},
{
"epoch": 0.7,
"learning_rate": 3.8260941387953454e-05,
"loss": 5.5229,
"step": 180500
},
{
"epoch": 0.71,
"learning_rate": 3.8228423220053045e-05,
"loss": 5.5352,
"step": 181000
},
{
"epoch": 0.71,
"learning_rate": 3.8195905052152644e-05,
"loss": 5.5312,
"step": 181500
},
{
"epoch": 0.71,
"learning_rate": 3.8163386884252235e-05,
"loss": 5.5209,
"step": 182000
},
{
"epoch": 0.71,
"learning_rate": 3.813086871635183e-05,
"loss": 5.5273,
"step": 182500
},
{
"epoch": 0.71,
"learning_rate": 3.8098350548451425e-05,
"loss": 5.5092,
"step": 183000
},
{
"epoch": 0.72,
"learning_rate": 3.806583238055102e-05,
"loss": 5.5154,
"step": 183500
},
{
"epoch": 0.72,
"learning_rate": 3.803331421265061e-05,
"loss": 5.5246,
"step": 184000
},
{
"epoch": 0.72,
"learning_rate": 3.80007960447502e-05,
"loss": 5.5372,
"step": 184500
},
{
"epoch": 0.72,
"learning_rate": 3.79682778768498e-05,
"loss": 5.5258,
"step": 185000
},
{
"epoch": 0.72,
"learning_rate": 3.793575970894939e-05,
"loss": 5.5236,
"step": 185500
},
{
"epoch": 0.73,
"learning_rate": 3.790324154104898e-05,
"loss": 5.4988,
"step": 186000
},
{
"epoch": 0.73,
"learning_rate": 3.787072337314858e-05,
"loss": 5.5234,
"step": 186500
},
{
"epoch": 0.73,
"learning_rate": 3.783820520524817e-05,
"loss": 5.5285,
"step": 187000
},
{
"epoch": 0.73,
"learning_rate": 3.780568703734777e-05,
"loss": 5.5015,
"step": 187500
},
{
"epoch": 0.73,
"learning_rate": 3.777316886944736e-05,
"loss": 5.5222,
"step": 188000
},
{
"epoch": 0.74,
"learning_rate": 3.7740650701546954e-05,
"loss": 5.5113,
"step": 188500
},
{
"epoch": 0.74,
"learning_rate": 3.770813253364655e-05,
"loss": 5.524,
"step": 189000
},
{
"epoch": 0.74,
"learning_rate": 3.7675614365746144e-05,
"loss": 5.5357,
"step": 189500
},
{
"epoch": 0.74,
"learning_rate": 3.764309619784574e-05,
"loss": 5.5221,
"step": 190000
},
{
"epoch": 0.74,
"learning_rate": 3.7610578029945334e-05,
"loss": 5.5226,
"step": 190500
},
{
"epoch": 0.75,
"learning_rate": 3.7578059862044925e-05,
"loss": 5.5204,
"step": 191000
},
{
"epoch": 0.75,
"learning_rate": 3.754554169414452e-05,
"loss": 5.5201,
"step": 191500
},
{
"epoch": 0.75,
"learning_rate": 3.751302352624411e-05,
"loss": 5.521,
"step": 192000
},
{
"epoch": 0.75,
"learning_rate": 3.748050535834371e-05,
"loss": 5.5334,
"step": 192500
},
{
"epoch": 0.75,
"learning_rate": 3.74479871904433e-05,
"loss": 5.4979,
"step": 193000
},
{
"epoch": 0.76,
"learning_rate": 3.74154690225429e-05,
"loss": 5.5098,
"step": 193500
},
{
"epoch": 0.76,
"learning_rate": 3.738295085464249e-05,
"loss": 5.5429,
"step": 194000
},
{
"epoch": 0.76,
"learning_rate": 3.735043268674209e-05,
"loss": 5.506,
"step": 194500
},
{
"epoch": 0.76,
"learning_rate": 3.731791451884168e-05,
"loss": 5.5116,
"step": 195000
},
{
"epoch": 0.76,
"learning_rate": 3.728539635094127e-05,
"loss": 5.5075,
"step": 195500
},
{
"epoch": 0.76,
"learning_rate": 3.725287818304087e-05,
"loss": 5.5159,
"step": 196000
},
{
"epoch": 0.77,
"learning_rate": 3.722036001514046e-05,
"loss": 5.5192,
"step": 196500
},
{
"epoch": 0.77,
"learning_rate": 3.718784184724006e-05,
"loss": 5.5109,
"step": 197000
},
{
"epoch": 0.77,
"learning_rate": 3.715532367933965e-05,
"loss": 5.496,
"step": 197500
},
{
"epoch": 0.77,
"learning_rate": 3.712280551143924e-05,
"loss": 5.5329,
"step": 198000
},
{
"epoch": 0.77,
"learning_rate": 3.709028734353884e-05,
"loss": 5.4942,
"step": 198500
},
{
"epoch": 0.78,
"learning_rate": 3.705776917563843e-05,
"loss": 5.5314,
"step": 199000
},
{
"epoch": 0.78,
"learning_rate": 3.7025251007738024e-05,
"loss": 5.5117,
"step": 199500
},
{
"epoch": 0.78,
"learning_rate": 3.6992732839837616e-05,
"loss": 5.5068,
"step": 200000
},
{
"epoch": 0.78,
"learning_rate": 3.6960214671937214e-05,
"loss": 5.5034,
"step": 200500
},
{
"epoch": 0.78,
"learning_rate": 3.6927696504036806e-05,
"loss": 5.5165,
"step": 201000
},
{
"epoch": 0.79,
"learning_rate": 3.68951783361364e-05,
"loss": 5.5228,
"step": 201500
},
{
"epoch": 0.79,
"learning_rate": 3.6862660168235996e-05,
"loss": 5.522,
"step": 202000
},
{
"epoch": 0.79,
"learning_rate": 3.683014200033559e-05,
"loss": 5.5127,
"step": 202500
},
{
"epoch": 0.79,
"learning_rate": 3.6797623832435186e-05,
"loss": 5.498,
"step": 203000
},
{
"epoch": 0.79,
"learning_rate": 3.676510566453478e-05,
"loss": 5.5175,
"step": 203500
},
{
"epoch": 0.8,
"learning_rate": 3.6732587496634376e-05,
"loss": 5.4934,
"step": 204000
},
{
"epoch": 0.8,
"learning_rate": 3.670006932873397e-05,
"loss": 5.5252,
"step": 204500
},
{
"epoch": 0.8,
"learning_rate": 3.666755116083356e-05,
"loss": 5.4944,
"step": 205000
},
{
"epoch": 0.8,
"learning_rate": 3.663503299293316e-05,
"loss": 5.4928,
"step": 205500
},
{
"epoch": 0.8,
"learning_rate": 3.660251482503275e-05,
"loss": 5.4842,
"step": 206000
},
{
"epoch": 0.81,
"learning_rate": 3.656999665713234e-05,
"loss": 5.4963,
"step": 206500
},
{
"epoch": 0.81,
"learning_rate": 3.653747848923193e-05,
"loss": 5.4839,
"step": 207000
},
{
"epoch": 0.81,
"learning_rate": 3.6504960321331524e-05,
"loss": 5.5047,
"step": 207500
},
{
"epoch": 0.81,
"learning_rate": 3.647244215343112e-05,
"loss": 5.5055,
"step": 208000
},
{
"epoch": 0.81,
"learning_rate": 3.6439923985530714e-05,
"loss": 5.467,
"step": 208500
},
{
"epoch": 0.82,
"learning_rate": 3.640740581763031e-05,
"loss": 5.4914,
"step": 209000
},
{
"epoch": 0.82,
"learning_rate": 3.6374887649729904e-05,
"loss": 5.4803,
"step": 209500
},
{
"epoch": 0.82,
"learning_rate": 3.63423694818295e-05,
"loss": 5.4874,
"step": 210000
},
{
"epoch": 0.82,
"learning_rate": 3.6309851313929095e-05,
"loss": 5.4186,
"step": 210500
},
{
"epoch": 0.82,
"learning_rate": 3.6277333146028686e-05,
"loss": 5.3267,
"step": 211000
},
{
"epoch": 0.83,
"learning_rate": 3.6244814978128285e-05,
"loss": 5.2962,
"step": 211500
},
{
"epoch": 0.83,
"learning_rate": 3.6212296810227876e-05,
"loss": 5.2441,
"step": 212000
},
{
"epoch": 0.83,
"learning_rate": 3.6179778642327475e-05,
"loss": 5.1665,
"step": 212500
},
{
"epoch": 0.83,
"learning_rate": 3.6147260474427066e-05,
"loss": 5.1096,
"step": 213000
},
{
"epoch": 0.83,
"learning_rate": 3.611474230652666e-05,
"loss": 5.0509,
"step": 213500
},
{
"epoch": 0.84,
"learning_rate": 3.608222413862625e-05,
"loss": 5.0168,
"step": 214000
},
{
"epoch": 0.84,
"learning_rate": 3.604970597072584e-05,
"loss": 4.9811,
"step": 214500
},
{
"epoch": 0.84,
"learning_rate": 3.601718780282544e-05,
"loss": 4.9535,
"step": 215000
},
{
"epoch": 0.84,
"learning_rate": 3.598466963492503e-05,
"loss": 4.9113,
"step": 215500
},
{
"epoch": 0.84,
"learning_rate": 3.595215146702463e-05,
"loss": 4.8715,
"step": 216000
},
{
"epoch": 0.84,
"learning_rate": 3.591963329912422e-05,
"loss": 4.8401,
"step": 216500
},
{
"epoch": 0.85,
"learning_rate": 3.588711513122381e-05,
"loss": 4.8103,
"step": 217000
},
{
"epoch": 0.85,
"learning_rate": 3.585459696332341e-05,
"loss": 4.7615,
"step": 217500
},
{
"epoch": 0.85,
"learning_rate": 3.5822078795423e-05,
"loss": 4.7477,
"step": 218000
},
{
"epoch": 0.85,
"learning_rate": 3.57895606275226e-05,
"loss": 4.6929,
"step": 218500
},
{
"epoch": 0.85,
"learning_rate": 3.575704245962219e-05,
"loss": 4.6674,
"step": 219000
},
{
"epoch": 0.86,
"learning_rate": 3.572452429172179e-05,
"loss": 4.6552,
"step": 219500
},
{
"epoch": 0.86,
"learning_rate": 3.569200612382138e-05,
"loss": 4.5982,
"step": 220000
},
{
"epoch": 0.86,
"learning_rate": 3.5659487955920975e-05,
"loss": 4.4937,
"step": 220500
},
{
"epoch": 0.86,
"learning_rate": 3.562696978802057e-05,
"loss": 4.3654,
"step": 221000
},
{
"epoch": 0.86,
"learning_rate": 3.5594451620120165e-05,
"loss": 4.249,
"step": 221500
},
{
"epoch": 0.87,
"learning_rate": 3.5561933452219757e-05,
"loss": 4.122,
"step": 222000
},
{
"epoch": 0.87,
"learning_rate": 3.552941528431935e-05,
"loss": 3.9966,
"step": 222500
},
{
"epoch": 0.87,
"learning_rate": 3.549689711641894e-05,
"loss": 3.8664,
"step": 223000
},
{
"epoch": 0.87,
"learning_rate": 3.546437894851854e-05,
"loss": 3.7561,
"step": 223500
},
{
"epoch": 0.87,
"learning_rate": 3.543186078061813e-05,
"loss": 3.6266,
"step": 224000
},
{
"epoch": 0.88,
"learning_rate": 3.539934261271773e-05,
"loss": 3.5324,
"step": 224500
},
{
"epoch": 0.88,
"learning_rate": 3.536682444481732e-05,
"loss": 3.439,
"step": 225000
},
{
"epoch": 0.88,
"learning_rate": 3.533430627691692e-05,
"loss": 3.3637,
"step": 225500
},
{
"epoch": 0.88,
"learning_rate": 3.530178810901651e-05,
"loss": 3.2801,
"step": 226000
},
{
"epoch": 0.88,
"learning_rate": 3.52692699411161e-05,
"loss": 3.1894,
"step": 226500
},
{
"epoch": 0.89,
"learning_rate": 3.52367517732157e-05,
"loss": 3.1034,
"step": 227000
},
{
"epoch": 0.89,
"learning_rate": 3.520423360531529e-05,
"loss": 3.0252,
"step": 227500
},
{
"epoch": 0.89,
"learning_rate": 3.517171543741489e-05,
"loss": 2.9289,
"step": 228000
},
{
"epoch": 0.89,
"learning_rate": 3.513919726951448e-05,
"loss": 2.8581,
"step": 228500
},
{
"epoch": 0.89,
"learning_rate": 3.5106679101614073e-05,
"loss": 2.8208,
"step": 229000
},
{
"epoch": 0.9,
"learning_rate": 3.5074160933713665e-05,
"loss": 2.7686,
"step": 229500
},
{
"epoch": 0.9,
"learning_rate": 3.504164276581326e-05,
"loss": 2.7256,
"step": 230000
},
{
"epoch": 0.9,
"learning_rate": 3.5009124597912855e-05,
"loss": 2.6784,
"step": 230500
},
{
"epoch": 0.9,
"learning_rate": 3.497660643001245e-05,
"loss": 2.6355,
"step": 231000
},
{
"epoch": 0.9,
"learning_rate": 3.4944088262112045e-05,
"loss": 2.6078,
"step": 231500
},
{
"epoch": 0.91,
"learning_rate": 3.491157009421164e-05,
"loss": 2.575,
"step": 232000
},
{
"epoch": 0.91,
"learning_rate": 3.487905192631123e-05,
"loss": 2.5266,
"step": 232500
},
{
"epoch": 0.91,
"learning_rate": 3.484653375841083e-05,
"loss": 2.5291,
"step": 233000
},
{
"epoch": 0.91,
"learning_rate": 3.481401559051042e-05,
"loss": 2.4816,
"step": 233500
},
{
"epoch": 0.91,
"learning_rate": 3.478149742261002e-05,
"loss": 2.4666,
"step": 234000
},
{
"epoch": 0.92,
"learning_rate": 3.474897925470961e-05,
"loss": 2.4444,
"step": 234500
},
{
"epoch": 0.92,
"learning_rate": 3.471646108680921e-05,
"loss": 2.4253,
"step": 235000
},
{
"epoch": 0.92,
"learning_rate": 3.46839429189088e-05,
"loss": 2.403,
"step": 235500
},
{
"epoch": 0.92,
"learning_rate": 3.465142475100839e-05,
"loss": 2.3755,
"step": 236000
},
{
"epoch": 0.92,
"learning_rate": 3.461890658310798e-05,
"loss": 2.35,
"step": 236500
},
{
"epoch": 0.92,
"learning_rate": 3.4586388415207574e-05,
"loss": 2.3292,
"step": 237000
},
{
"epoch": 0.93,
"learning_rate": 3.455387024730717e-05,
"loss": 2.3076,
"step": 237500
},
{
"epoch": 0.93,
"learning_rate": 3.4521352079406764e-05,
"loss": 2.2888,
"step": 238000
},
{
"epoch": 0.93,
"learning_rate": 3.448883391150636e-05,
"loss": 2.267,
"step": 238500
},
{
"epoch": 0.93,
"learning_rate": 3.4456315743605954e-05,
"loss": 2.2615,
"step": 239000
},
{
"epoch": 0.93,
"learning_rate": 3.4423797575705545e-05,
"loss": 2.2433,
"step": 239500
},
{
"epoch": 0.94,
"learning_rate": 3.4391279407805144e-05,
"loss": 2.2348,
"step": 240000
},
{
"epoch": 0.94,
"learning_rate": 3.4358761239904735e-05,
"loss": 2.2083,
"step": 240500
},
{
"epoch": 0.94,
"learning_rate": 3.4326243072004334e-05,
"loss": 2.2022,
"step": 241000
},
{
"epoch": 0.94,
"learning_rate": 3.4293724904103926e-05,
"loss": 2.2007,
"step": 241500
},
{
"epoch": 0.94,
"learning_rate": 3.426120673620352e-05,
"loss": 2.1813,
"step": 242000
},
{
"epoch": 0.95,
"learning_rate": 3.4228688568303116e-05,
"loss": 2.157,
"step": 242500
},
{
"epoch": 0.95,
"learning_rate": 3.419617040040271e-05,
"loss": 2.1462,
"step": 243000
},
{
"epoch": 0.95,
"learning_rate": 3.41636522325023e-05,
"loss": 2.1346,
"step": 243500
},
{
"epoch": 0.95,
"learning_rate": 3.41311340646019e-05,
"loss": 2.1271,
"step": 244000
},
{
"epoch": 0.95,
"learning_rate": 3.409861589670149e-05,
"loss": 2.1027,
"step": 244500
},
{
"epoch": 0.96,
"learning_rate": 3.406609772880108e-05,
"loss": 2.1023,
"step": 245000
},
{
"epoch": 0.96,
"learning_rate": 3.403357956090067e-05,
"loss": 2.0849,
"step": 245500
},
{
"epoch": 0.96,
"learning_rate": 3.400106139300027e-05,
"loss": 2.0843,
"step": 246000
},
{
"epoch": 0.96,
"learning_rate": 3.396854322509986e-05,
"loss": 2.0666,
"step": 246500
},
{
"epoch": 0.96,
"learning_rate": 3.393602505719946e-05,
"loss": 2.0578,
"step": 247000
},
{
"epoch": 0.97,
"learning_rate": 3.390350688929905e-05,
"loss": 2.0446,
"step": 247500
},
{
"epoch": 0.97,
"learning_rate": 3.3870988721398644e-05,
"loss": 2.0486,
"step": 248000
},
{
"epoch": 0.97,
"learning_rate": 3.383847055349824e-05,
"loss": 2.0247,
"step": 248500
},
{
"epoch": 0.97,
"learning_rate": 3.3805952385597834e-05,
"loss": 2.0286,
"step": 249000
},
{
"epoch": 0.97,
"learning_rate": 3.377343421769743e-05,
"loss": 2.0144,
"step": 249500
},
{
"epoch": 0.98,
"learning_rate": 3.3740916049797024e-05,
"loss": 2.0024,
"step": 250000
},
{
"epoch": 0.98,
"learning_rate": 3.370839788189662e-05,
"loss": 1.9928,
"step": 250500
},
{
"epoch": 0.98,
"learning_rate": 3.3675879713996214e-05,
"loss": 1.9791,
"step": 251000
},
{
"epoch": 0.98,
"learning_rate": 3.3643361546095806e-05,
"loss": 1.9735,
"step": 251500
},
{
"epoch": 0.98,
"learning_rate": 3.36108433781954e-05,
"loss": 1.9699,
"step": 252000
},
{
"epoch": 0.99,
"learning_rate": 3.357832521029499e-05,
"loss": 1.9683,
"step": 252500
},
{
"epoch": 0.99,
"learning_rate": 3.354580704239459e-05,
"loss": 1.9456,
"step": 253000
},
{
"epoch": 0.99,
"learning_rate": 3.351328887449418e-05,
"loss": 1.9365,
"step": 253500
},
{
"epoch": 0.99,
"learning_rate": 3.348077070659378e-05,
"loss": 1.9347,
"step": 254000
},
{
"epoch": 0.99,
"learning_rate": 3.344825253869337e-05,
"loss": 1.9325,
"step": 254500
},
{
"epoch": 1.0,
"learning_rate": 3.341573437079296e-05,
"loss": 1.9314,
"step": 255000
},
{
"epoch": 1.0,
"learning_rate": 3.338321620289256e-05,
"loss": 1.9199,
"step": 255500
},
{
"epoch": 1.0,
"learning_rate": 3.335069803499215e-05,
"loss": 1.905,
"step": 256000
},
{
"epoch": 1.0,
"learning_rate": 3.331817986709175e-05,
"loss": 1.9004,
"step": 256500
},
{
"epoch": 1.0,
"learning_rate": 3.328566169919134e-05,
"loss": 1.8919,
"step": 257000
},
{
"epoch": 1.0,
"learning_rate": 3.325314353129093e-05,
"loss": 1.8801,
"step": 257500
},
{
"epoch": 1.01,
"learning_rate": 3.322062536339053e-05,
"loss": 1.8772,
"step": 258000
},
{
"epoch": 1.01,
"learning_rate": 3.318810719549012e-05,
"loss": 1.8774,
"step": 258500
},
{
"epoch": 1.01,
"learning_rate": 3.3155589027589714e-05,
"loss": 1.8752,
"step": 259000
},
{
"epoch": 1.01,
"learning_rate": 3.3123070859689306e-05,
"loss": 1.852,
"step": 259500
},
{
"epoch": 1.01,
"learning_rate": 3.3090552691788905e-05,
"loss": 1.8492,
"step": 260000
},
{
"epoch": 1.02,
"learning_rate": 3.3058034523888496e-05,
"loss": 1.8562,
"step": 260500
},
{
"epoch": 1.02,
"learning_rate": 3.302551635598809e-05,
"loss": 1.843,
"step": 261000
},
{
"epoch": 1.02,
"learning_rate": 3.2992998188087686e-05,
"loss": 1.8321,
"step": 261500
},
{
"epoch": 1.02,
"learning_rate": 3.296048002018728e-05,
"loss": 1.8233,
"step": 262000
},
{
"epoch": 1.02,
"learning_rate": 3.2927961852286876e-05,
"loss": 1.8265,
"step": 262500
},
{
"epoch": 1.03,
"learning_rate": 3.289544368438647e-05,
"loss": 1.818,
"step": 263000
},
{
"epoch": 1.03,
"learning_rate": 3.2862925516486066e-05,
"loss": 1.8186,
"step": 263500
},
{
"epoch": 1.03,
"learning_rate": 3.283040734858566e-05,
"loss": 1.7974,
"step": 264000
},
{
"epoch": 1.03,
"learning_rate": 3.279788918068525e-05,
"loss": 1.7867,
"step": 264500
},
{
"epoch": 1.03,
"learning_rate": 3.276537101278485e-05,
"loss": 1.7938,
"step": 265000
},
{
"epoch": 1.04,
"learning_rate": 3.273285284488444e-05,
"loss": 1.7877,
"step": 265500
},
{
"epoch": 1.04,
"learning_rate": 3.270033467698403e-05,
"loss": 1.7923,
"step": 266000
},
{
"epoch": 1.04,
"learning_rate": 3.266781650908362e-05,
"loss": 1.7794,
"step": 266500
},
{
"epoch": 1.04,
"learning_rate": 3.263529834118322e-05,
"loss": 1.7809,
"step": 267000
},
{
"epoch": 1.04,
"learning_rate": 3.260278017328281e-05,
"loss": 1.7654,
"step": 267500
},
{
"epoch": 1.05,
"learning_rate": 3.2570262005382405e-05,
"loss": 1.7569,
"step": 268000
},
{
"epoch": 1.05,
"learning_rate": 3.2537743837482e-05,
"loss": 1.7653,
"step": 268500
},
{
"epoch": 1.05,
"learning_rate": 3.2505225669581595e-05,
"loss": 1.7554,
"step": 269000
},
{
"epoch": 1.05,
"learning_rate": 3.247270750168119e-05,
"loss": 1.7461,
"step": 269500
},
{
"epoch": 1.05,
"learning_rate": 3.2440189333780785e-05,
"loss": 1.7386,
"step": 270000
},
{
"epoch": 1.06,
"learning_rate": 3.2407671165880376e-05,
"loss": 1.738,
"step": 270500
},
{
"epoch": 1.06,
"learning_rate": 3.2375152997979975e-05,
"loss": 1.7343,
"step": 271000
},
{
"epoch": 1.06,
"learning_rate": 3.2342634830079567e-05,
"loss": 1.7334,
"step": 271500
},
{
"epoch": 1.06,
"learning_rate": 3.2310116662179165e-05,
"loss": 1.7163,
"step": 272000
},
{
"epoch": 1.06,
"learning_rate": 3.2277598494278757e-05,
"loss": 1.7176,
"step": 272500
},
{
"epoch": 1.07,
"learning_rate": 3.2245080326378355e-05,
"loss": 1.7203,
"step": 273000
},
{
"epoch": 1.07,
"learning_rate": 3.221256215847795e-05,
"loss": 1.7172,
"step": 273500
},
{
"epoch": 1.07,
"learning_rate": 3.218004399057754e-05,
"loss": 1.703,
"step": 274000
},
{
"epoch": 1.07,
"learning_rate": 3.214752582267713e-05,
"loss": 1.6993,
"step": 274500
},
{
"epoch": 1.07,
"learning_rate": 3.211500765477672e-05,
"loss": 1.6958,
"step": 275000
},
{
"epoch": 1.08,
"learning_rate": 3.208248948687632e-05,
"loss": 1.6965,
"step": 275500
},
{
"epoch": 1.08,
"learning_rate": 3.204997131897591e-05,
"loss": 1.6939,
"step": 276000
},
{
"epoch": 1.08,
"learning_rate": 3.20174531510755e-05,
"loss": 1.6792,
"step": 276500
},
{
"epoch": 1.08,
"learning_rate": 3.19849349831751e-05,
"loss": 1.6956,
"step": 277000
},
{
"epoch": 1.08,
"learning_rate": 3.195241681527469e-05,
"loss": 1.6888,
"step": 277500
},
{
"epoch": 1.08,
"learning_rate": 3.191989864737429e-05,
"loss": 1.6803,
"step": 278000
},
{
"epoch": 1.09,
"learning_rate": 3.1887380479473883e-05,
"loss": 1.6753,
"step": 278500
},
{
"epoch": 1.09,
"learning_rate": 3.185486231157348e-05,
"loss": 1.6731,
"step": 279000
},
{
"epoch": 1.09,
"learning_rate": 3.1822344143673074e-05,
"loss": 1.6584,
"step": 279500
},
{
"epoch": 1.09,
"learning_rate": 3.1789825975772665e-05,
"loss": 1.6632,
"step": 280000
},
{
"epoch": 1.09,
"learning_rate": 3.1757307807872264e-05,
"loss": 1.6526,
"step": 280500
},
{
"epoch": 1.1,
"learning_rate": 3.1724789639971855e-05,
"loss": 1.6596,
"step": 281000
},
{
"epoch": 1.1,
"learning_rate": 3.169227147207145e-05,
"loss": 1.6466,
"step": 281500
},
{
"epoch": 1.1,
"learning_rate": 3.165975330417104e-05,
"loss": 1.6503,
"step": 282000
},
{
"epoch": 1.1,
"learning_rate": 3.162723513627063e-05,
"loss": 1.6462,
"step": 282500
},
{
"epoch": 1.1,
"learning_rate": 3.159471696837023e-05,
"loss": 1.6459,
"step": 283000
},
{
"epoch": 1.11,
"learning_rate": 3.156219880046982e-05,
"loss": 1.6391,
"step": 283500
},
{
"epoch": 1.11,
"learning_rate": 3.152968063256942e-05,
"loss": 1.6301,
"step": 284000
},
{
"epoch": 1.11,
"learning_rate": 3.149716246466901e-05,
"loss": 1.627,
"step": 284500
},
{
"epoch": 1.11,
"learning_rate": 3.146464429676861e-05,
"loss": 1.6331,
"step": 285000
},
{
"epoch": 1.11,
"learning_rate": 3.14321261288682e-05,
"loss": 1.6254,
"step": 285500
},
{
"epoch": 1.12,
"learning_rate": 3.139960796096779e-05,
"loss": 1.6201,
"step": 286000
},
{
"epoch": 1.12,
"learning_rate": 3.136708979306739e-05,
"loss": 1.6213,
"step": 286500
},
{
"epoch": 1.12,
"learning_rate": 3.133457162516698e-05,
"loss": 1.6158,
"step": 287000
},
{
"epoch": 1.12,
"learning_rate": 3.130205345726658e-05,
"loss": 1.6142,
"step": 287500
},
{
"epoch": 1.12,
"learning_rate": 3.126953528936617e-05,
"loss": 1.6048,
"step": 288000
},
{
"epoch": 1.13,
"learning_rate": 3.1237017121465764e-05,
"loss": 1.6069,
"step": 288500
},
{
"epoch": 1.13,
"learning_rate": 3.1204498953565355e-05,
"loss": 1.601,
"step": 289000
},
{
"epoch": 1.13,
"learning_rate": 3.117198078566495e-05,
"loss": 1.6069,
"step": 289500
},
{
"epoch": 1.13,
"learning_rate": 3.1139462617764545e-05,
"loss": 1.5992,
"step": 290000
},
{
"epoch": 1.13,
"learning_rate": 3.110694444986414e-05,
"loss": 1.6011,
"step": 290500
},
{
"epoch": 1.14,
"learning_rate": 3.1074426281963736e-05,
"loss": 1.5995,
"step": 291000
},
{
"epoch": 1.14,
"learning_rate": 3.104190811406333e-05,
"loss": 1.5854,
"step": 291500
},
{
"epoch": 1.14,
"learning_rate": 3.100938994616292e-05,
"loss": 1.5855,
"step": 292000
},
{
"epoch": 1.14,
"learning_rate": 3.097687177826252e-05,
"loss": 1.58,
"step": 292500
},
{
"epoch": 1.14,
"learning_rate": 3.094435361036211e-05,
"loss": 1.5733,
"step": 293000
},
{
"epoch": 1.15,
"learning_rate": 3.091183544246171e-05,
"loss": 1.5734,
"step": 293500
},
{
"epoch": 1.15,
"learning_rate": 3.08793172745613e-05,
"loss": 1.5749,
"step": 294000
},
{
"epoch": 1.15,
"learning_rate": 3.08467991066609e-05,
"loss": 1.5793,
"step": 294500
},
{
"epoch": 1.15,
"learning_rate": 3.081428093876049e-05,
"loss": 1.57,
"step": 295000
},
{
"epoch": 1.15,
"learning_rate": 3.078176277086008e-05,
"loss": 1.559,
"step": 295500
},
{
"epoch": 1.16,
"learning_rate": 3.074924460295968e-05,
"loss": 1.5605,
"step": 296000
},
{
"epoch": 1.16,
"learning_rate": 3.071672643505927e-05,
"loss": 1.5528,
"step": 296500
},
{
"epoch": 1.16,
"learning_rate": 3.068420826715886e-05,
"loss": 1.5572,
"step": 297000
},
{
"epoch": 1.16,
"learning_rate": 3.0651690099258454e-05,
"loss": 1.5568,
"step": 297500
},
{
"epoch": 1.16,
"learning_rate": 3.061917193135805e-05,
"loss": 1.556,
"step": 298000
},
{
"epoch": 1.16,
"learning_rate": 3.0586653763457644e-05,
"loss": 1.5599,
"step": 298500
},
{
"epoch": 1.17,
"learning_rate": 3.0554135595557236e-05,
"loss": 1.5531,
"step": 299000
},
{
"epoch": 1.17,
"learning_rate": 3.0521617427656834e-05,
"loss": 1.5557,
"step": 299500
},
{
"epoch": 1.17,
"learning_rate": 3.0489099259756426e-05,
"loss": 1.5438,
"step": 300000
},
{
"epoch": 1.17,
"learning_rate": 3.0456581091856024e-05,
"loss": 1.5526,
"step": 300500
},
{
"epoch": 1.17,
"learning_rate": 3.0424062923955616e-05,
"loss": 1.5352,
"step": 301000
},
{
"epoch": 1.18,
"learning_rate": 3.0391544756055208e-05,
"loss": 1.5359,
"step": 301500
},
{
"epoch": 1.18,
"learning_rate": 3.0359026588154803e-05,
"loss": 1.5354,
"step": 302000
},
{
"epoch": 1.18,
"learning_rate": 3.0326508420254394e-05,
"loss": 1.5295,
"step": 302500
},
{
"epoch": 1.18,
"learning_rate": 3.0293990252353993e-05,
"loss": 1.5296,
"step": 303000
},
{
"epoch": 1.18,
"learning_rate": 3.0261472084453584e-05,
"loss": 1.5247,
"step": 303500
},
{
"epoch": 1.19,
"learning_rate": 3.0228953916553183e-05,
"loss": 1.5268,
"step": 304000
},
{
"epoch": 1.19,
"learning_rate": 3.0196435748652774e-05,
"loss": 1.5214,
"step": 304500
},
{
"epoch": 1.19,
"learning_rate": 3.0163917580752366e-05,
"loss": 1.5246,
"step": 305000
},
{
"epoch": 1.19,
"learning_rate": 3.0131399412851964e-05,
"loss": 1.5274,
"step": 305500
},
{
"epoch": 1.19,
"learning_rate": 3.0098881244951556e-05,
"loss": 1.5078,
"step": 306000
},
{
"epoch": 1.2,
"learning_rate": 3.006636307705115e-05,
"loss": 1.5175,
"step": 306500
},
{
"epoch": 1.2,
"learning_rate": 3.0033844909150743e-05,
"loss": 1.5166,
"step": 307000
},
{
"epoch": 1.2,
"learning_rate": 3.000132674125034e-05,
"loss": 1.5126,
"step": 307500
},
{
"epoch": 1.2,
"learning_rate": 2.9968808573349933e-05,
"loss": 1.5188,
"step": 308000
},
{
"epoch": 1.2,
"learning_rate": 2.9936290405449524e-05,
"loss": 1.5015,
"step": 308500
},
{
"epoch": 1.21,
"learning_rate": 2.9903772237549123e-05,
"loss": 1.505,
"step": 309000
},
{
"epoch": 1.21,
"learning_rate": 2.9871254069648715e-05,
"loss": 1.4986,
"step": 309500
},
{
"epoch": 1.21,
"learning_rate": 2.983873590174831e-05,
"loss": 1.4994,
"step": 310000
},
{
"epoch": 1.21,
"learning_rate": 2.98062177338479e-05,
"loss": 1.4982,
"step": 310500
},
{
"epoch": 1.21,
"learning_rate": 2.9773699565947493e-05,
"loss": 1.4874,
"step": 311000
},
{
"epoch": 1.22,
"learning_rate": 2.974118139804709e-05,
"loss": 1.4915,
"step": 311500
},
{
"epoch": 1.22,
"learning_rate": 2.9708663230146683e-05,
"loss": 1.4996,
"step": 312000
},
{
"epoch": 1.22,
"learning_rate": 2.967614506224628e-05,
"loss": 1.4948,
"step": 312500
},
{
"epoch": 1.22,
"learning_rate": 2.9643626894345873e-05,
"loss": 1.4842,
"step": 313000
},
{
"epoch": 1.22,
"learning_rate": 2.9611108726445468e-05,
"loss": 1.4798,
"step": 313500
},
{
"epoch": 1.23,
"learning_rate": 2.957859055854506e-05,
"loss": 1.491,
"step": 314000
},
{
"epoch": 1.23,
"learning_rate": 2.954607239064465e-05,
"loss": 1.4821,
"step": 314500
},
{
"epoch": 1.23,
"learning_rate": 2.951355422274425e-05,
"loss": 1.4841,
"step": 315000
},
{
"epoch": 1.23,
"learning_rate": 2.948103605484384e-05,
"loss": 1.4746,
"step": 315500
},
{
"epoch": 1.23,
"learning_rate": 2.944851788694344e-05,
"loss": 1.4767,
"step": 316000
},
{
"epoch": 1.24,
"learning_rate": 2.941599971904303e-05,
"loss": 1.4775,
"step": 316500
},
{
"epoch": 1.24,
"learning_rate": 2.9383481551142623e-05,
"loss": 1.4777,
"step": 317000
},
{
"epoch": 1.24,
"learning_rate": 2.9350963383242218e-05,
"loss": 1.4718,
"step": 317500
},
{
"epoch": 1.24,
"learning_rate": 2.931844521534181e-05,
"loss": 1.4654,
"step": 318000
},
{
"epoch": 1.24,
"learning_rate": 2.9285927047441408e-05,
"loss": 1.4667,
"step": 318500
},
{
"epoch": 1.24,
"learning_rate": 2.9253408879541e-05,
"loss": 1.4718,
"step": 319000
},
{
"epoch": 1.25,
"learning_rate": 2.9220890711640598e-05,
"loss": 1.4644,
"step": 319500
},
{
"epoch": 1.25,
"learning_rate": 2.918837254374019e-05,
"loss": 1.4597,
"step": 320000
},
{
"epoch": 1.25,
"learning_rate": 2.915585437583978e-05,
"loss": 1.4532,
"step": 320500
},
{
"epoch": 1.25,
"learning_rate": 2.9123336207939377e-05,
"loss": 1.4635,
"step": 321000
},
{
"epoch": 1.25,
"learning_rate": 2.9090818040038968e-05,
"loss": 1.4501,
"step": 321500
},
{
"epoch": 1.26,
"learning_rate": 2.9058299872138567e-05,
"loss": 1.4521,
"step": 322000
},
{
"epoch": 1.26,
"learning_rate": 2.9025781704238158e-05,
"loss": 1.4588,
"step": 322500
},
{
"epoch": 1.26,
"learning_rate": 2.8993263536337757e-05,
"loss": 1.4562,
"step": 323000
},
{
"epoch": 1.26,
"learning_rate": 2.896074536843735e-05,
"loss": 1.4555,
"step": 323500
},
{
"epoch": 1.26,
"learning_rate": 2.892822720053694e-05,
"loss": 1.4483,
"step": 324000
},
{
"epoch": 1.27,
"learning_rate": 2.8895709032636535e-05,
"loss": 1.4548,
"step": 324500
},
{
"epoch": 1.27,
"learning_rate": 2.8863190864736127e-05,
"loss": 1.4491,
"step": 325000
},
{
"epoch": 1.27,
"learning_rate": 2.8830672696835725e-05,
"loss": 1.4398,
"step": 325500
},
{
"epoch": 1.27,
"learning_rate": 2.8798154528935317e-05,
"loss": 1.4409,
"step": 326000
},
{
"epoch": 1.27,
"learning_rate": 2.876563636103491e-05,
"loss": 1.4472,
"step": 326500
},
{
"epoch": 1.28,
"learning_rate": 2.8733118193134507e-05,
"loss": 1.4316,
"step": 327000
},
{
"epoch": 1.28,
"learning_rate": 2.87006000252341e-05,
"loss": 1.4255,
"step": 327500
},
{
"epoch": 1.28,
"learning_rate": 2.8668081857333693e-05,
"loss": 1.4308,
"step": 328000
},
{
"epoch": 1.28,
"learning_rate": 2.863556368943329e-05,
"loss": 1.4355,
"step": 328500
},
{
"epoch": 1.28,
"learning_rate": 2.8603045521532884e-05,
"loss": 1.4294,
"step": 329000
},
{
"epoch": 1.29,
"learning_rate": 2.8570527353632475e-05,
"loss": 1.4267,
"step": 329500
},
{
"epoch": 1.29,
"learning_rate": 2.8538009185732067e-05,
"loss": 1.4333,
"step": 330000
},
{
"epoch": 1.29,
"learning_rate": 2.8505491017831665e-05,
"loss": 1.4254,
"step": 330500
},
{
"epoch": 1.29,
"learning_rate": 2.8472972849931257e-05,
"loss": 1.4281,
"step": 331000
},
{
"epoch": 1.29,
"learning_rate": 2.8440454682030855e-05,
"loss": 1.426,
"step": 331500
},
{
"epoch": 1.3,
"learning_rate": 2.8407936514130447e-05,
"loss": 1.4294,
"step": 332000
},
{
"epoch": 1.3,
"learning_rate": 2.8375418346230042e-05,
"loss": 1.4208,
"step": 332500
},
{
"epoch": 1.3,
"learning_rate": 2.8342900178329634e-05,
"loss": 1.421,
"step": 333000
},
{
"epoch": 1.3,
"learning_rate": 2.8310382010429225e-05,
"loss": 1.4226,
"step": 333500
},
{
"epoch": 1.3,
"learning_rate": 2.8277863842528824e-05,
"loss": 1.417,
"step": 334000
},
{
"epoch": 1.31,
"learning_rate": 2.8245345674628415e-05,
"loss": 1.4166,
"step": 334500
},
{
"epoch": 1.31,
"learning_rate": 2.8212827506728014e-05,
"loss": 1.4207,
"step": 335000
},
{
"epoch": 1.31,
"learning_rate": 2.8180309338827605e-05,
"loss": 1.408,
"step": 335500
},
{
"epoch": 1.31,
"learning_rate": 2.8147791170927197e-05,
"loss": 1.412,
"step": 336000
},
{
"epoch": 1.31,
"learning_rate": 2.8115273003026792e-05,
"loss": 1.412,
"step": 336500
},
{
"epoch": 1.32,
"learning_rate": 2.8082754835126384e-05,
"loss": 1.4137,
"step": 337000
},
{
"epoch": 1.32,
"learning_rate": 2.8050236667225982e-05,
"loss": 1.4032,
"step": 337500
},
{
"epoch": 1.32,
"learning_rate": 2.8017718499325574e-05,
"loss": 1.4022,
"step": 338000
},
{
"epoch": 1.32,
"learning_rate": 2.7985200331425172e-05,
"loss": 1.4038,
"step": 338500
},
{
"epoch": 1.32,
"learning_rate": 2.7952682163524764e-05,
"loss": 1.3994,
"step": 339000
},
{
"epoch": 1.32,
"learning_rate": 2.7920163995624355e-05,
"loss": 1.3996,
"step": 339500
},
{
"epoch": 1.33,
"learning_rate": 2.788764582772395e-05,
"loss": 1.3937,
"step": 340000
},
{
"epoch": 1.33,
"learning_rate": 2.7855127659823542e-05,
"loss": 1.3994,
"step": 340500
},
{
"epoch": 1.33,
"learning_rate": 2.782260949192314e-05,
"loss": 1.4008,
"step": 341000
},
{
"epoch": 1.33,
"learning_rate": 2.7790091324022732e-05,
"loss": 1.399,
"step": 341500
},
{
"epoch": 1.33,
"learning_rate": 2.775757315612233e-05,
"loss": 1.398,
"step": 342000
},
{
"epoch": 1.34,
"learning_rate": 2.7725054988221922e-05,
"loss": 1.3928,
"step": 342500
},
{
"epoch": 1.34,
"learning_rate": 2.7692536820321514e-05,
"loss": 1.3855,
"step": 343000
},
{
"epoch": 1.34,
"learning_rate": 2.766001865242111e-05,
"loss": 1.394,
"step": 343500
},
{
"epoch": 1.34,
"learning_rate": 2.76275004845207e-05,
"loss": 1.3863,
"step": 344000
},
{
"epoch": 1.34,
"learning_rate": 2.75949823166203e-05,
"loss": 1.3915,
"step": 344500
},
{
"epoch": 1.35,
"learning_rate": 2.756246414871989e-05,
"loss": 1.392,
"step": 345000
},
{
"epoch": 1.35,
"learning_rate": 2.7529945980819482e-05,
"loss": 1.389,
"step": 345500
},
{
"epoch": 1.35,
"learning_rate": 2.749742781291908e-05,
"loss": 1.3836,
"step": 346000
},
{
"epoch": 1.35,
"learning_rate": 2.7464909645018672e-05,
"loss": 1.3852,
"step": 346500
},
{
"epoch": 1.35,
"learning_rate": 2.7432391477118267e-05,
"loss": 1.3736,
"step": 347000
},
{
"epoch": 1.36,
"learning_rate": 2.739987330921786e-05,
"loss": 1.3818,
"step": 347500
},
{
"epoch": 1.36,
"learning_rate": 2.7367355141317457e-05,
"loss": 1.3816,
"step": 348000
},
{
"epoch": 1.36,
"learning_rate": 2.733483697341705e-05,
"loss": 1.3766,
"step": 348500
},
{
"epoch": 1.36,
"learning_rate": 2.730231880551664e-05,
"loss": 1.3835,
"step": 349000
},
{
"epoch": 1.36,
"learning_rate": 2.726980063761624e-05,
"loss": 1.3843,
"step": 349500
},
{
"epoch": 1.37,
"learning_rate": 2.723728246971583e-05,
"loss": 1.3723,
"step": 350000
},
{
"epoch": 1.37,
"learning_rate": 2.7204764301815426e-05,
"loss": 1.3802,
"step": 350500
},
{
"epoch": 1.37,
"learning_rate": 2.7172246133915018e-05,
"loss": 1.3747,
"step": 351000
},
{
"epoch": 1.37,
"learning_rate": 2.7139727966014616e-05,
"loss": 1.3662,
"step": 351500
},
{
"epoch": 1.37,
"learning_rate": 2.7107209798114208e-05,
"loss": 1.3712,
"step": 352000
},
{
"epoch": 1.38,
"learning_rate": 2.70746916302138e-05,
"loss": 1.3805,
"step": 352500
},
{
"epoch": 1.38,
"learning_rate": 2.7042173462313398e-05,
"loss": 1.3647,
"step": 353000
},
{
"epoch": 1.38,
"learning_rate": 2.700965529441299e-05,
"loss": 1.3731,
"step": 353500
},
{
"epoch": 1.38,
"learning_rate": 2.6977137126512588e-05,
"loss": 1.3663,
"step": 354000
},
{
"epoch": 1.38,
"learning_rate": 2.694461895861218e-05,
"loss": 1.3559,
"step": 354500
},
{
"epoch": 1.39,
"learning_rate": 2.691210079071177e-05,
"loss": 1.3642,
"step": 355000
},
{
"epoch": 1.39,
"learning_rate": 2.6879582622811366e-05,
"loss": 1.3679,
"step": 355500
},
{
"epoch": 1.39,
"learning_rate": 2.6847064454910958e-05,
"loss": 1.3576,
"step": 356000
},
{
"epoch": 1.39,
"learning_rate": 2.6814546287010556e-05,
"loss": 1.3536,
"step": 356500
},
{
"epoch": 1.39,
"learning_rate": 2.6782028119110148e-05,
"loss": 1.3635,
"step": 357000
},
{
"epoch": 1.4,
"learning_rate": 2.6749509951209746e-05,
"loss": 1.3622,
"step": 357500
},
{
"epoch": 1.4,
"learning_rate": 2.6716991783309338e-05,
"loss": 1.3594,
"step": 358000
},
{
"epoch": 1.4,
"learning_rate": 2.668447361540893e-05,
"loss": 1.3559,
"step": 358500
},
{
"epoch": 1.4,
"learning_rate": 2.6651955447508525e-05,
"loss": 1.351,
"step": 359000
},
{
"epoch": 1.4,
"learning_rate": 2.6619437279608116e-05,
"loss": 1.3582,
"step": 359500
},
{
"epoch": 1.4,
"learning_rate": 2.6586919111707715e-05,
"loss": 1.3567,
"step": 360000
},
{
"epoch": 1.41,
"learning_rate": 2.6554400943807306e-05,
"loss": 1.3417,
"step": 360500
},
{
"epoch": 1.41,
"learning_rate": 2.6521882775906898e-05,
"loss": 1.3467,
"step": 361000
},
{
"epoch": 1.41,
"learning_rate": 2.6489364608006496e-05,
"loss": 1.3623,
"step": 361500
},
{
"epoch": 1.41,
"learning_rate": 2.6456846440106088e-05,
"loss": 1.3495,
"step": 362000
},
{
"epoch": 1.41,
"learning_rate": 2.6424328272205683e-05,
"loss": 1.3514,
"step": 362500
},
{
"epoch": 1.42,
"learning_rate": 2.6391810104305275e-05,
"loss": 1.342,
"step": 363000
},
{
"epoch": 1.42,
"learning_rate": 2.6359291936404873e-05,
"loss": 1.3363,
"step": 363500
},
{
"epoch": 1.42,
"learning_rate": 2.6326773768504465e-05,
"loss": 1.3467,
"step": 364000
},
{
"epoch": 1.42,
"learning_rate": 2.6294255600604056e-05,
"loss": 1.345,
"step": 364500
},
{
"epoch": 1.42,
"learning_rate": 2.6261737432703655e-05,
"loss": 1.3486,
"step": 365000
},
{
"epoch": 1.43,
"learning_rate": 2.6229219264803246e-05,
"loss": 1.3505,
"step": 365500
},
{
"epoch": 1.43,
"learning_rate": 2.619670109690284e-05,
"loss": 1.3397,
"step": 366000
},
{
"epoch": 1.43,
"learning_rate": 2.6164182929002433e-05,
"loss": 1.3381,
"step": 366500
},
{
"epoch": 1.43,
"learning_rate": 2.613166476110203e-05,
"loss": 1.3367,
"step": 367000
},
{
"epoch": 1.43,
"learning_rate": 2.6099146593201623e-05,
"loss": 1.3375,
"step": 367500
},
{
"epoch": 1.44,
"learning_rate": 2.6066628425301215e-05,
"loss": 1.3405,
"step": 368000
},
{
"epoch": 1.44,
"learning_rate": 2.6034110257400813e-05,
"loss": 1.3384,
"step": 368500
},
{
"epoch": 1.44,
"learning_rate": 2.6001592089500405e-05,
"loss": 1.3357,
"step": 369000
},
{
"epoch": 1.44,
"learning_rate": 2.59690739216e-05,
"loss": 1.3349,
"step": 369500
},
{
"epoch": 1.44,
"learning_rate": 2.593655575369959e-05,
"loss": 1.3386,
"step": 370000
},
{
"epoch": 1.45,
"learning_rate": 2.5904037585799183e-05,
"loss": 1.3269,
"step": 370500
},
{
"epoch": 1.45,
"learning_rate": 2.587151941789878e-05,
"loss": 1.3368,
"step": 371000
},
{
"epoch": 1.45,
"learning_rate": 2.5839001249998373e-05,
"loss": 1.3378,
"step": 371500
},
{
"epoch": 1.45,
"learning_rate": 2.580648308209797e-05,
"loss": 1.3308,
"step": 372000
},
{
"epoch": 1.45,
"learning_rate": 2.5773964914197563e-05,
"loss": 1.3222,
"step": 372500
},
{
"epoch": 1.46,
"learning_rate": 2.574144674629716e-05,
"loss": 1.3313,
"step": 373000
},
{
"epoch": 1.46,
"learning_rate": 2.570892857839675e-05,
"loss": 1.3274,
"step": 373500
},
{
"epoch": 1.46,
"learning_rate": 2.567641041049634e-05,
"loss": 1.3282,
"step": 374000
},
{
"epoch": 1.46,
"learning_rate": 2.564389224259594e-05,
"loss": 1.3198,
"step": 374500
},
{
"epoch": 1.46,
"learning_rate": 2.561137407469553e-05,
"loss": 1.319,
"step": 375000
},
{
"epoch": 1.47,
"learning_rate": 2.557885590679513e-05,
"loss": 1.3279,
"step": 375500
},
{
"epoch": 1.47,
"learning_rate": 2.5546337738894722e-05,
"loss": 1.3206,
"step": 376000
},
{
"epoch": 1.47,
"learning_rate": 2.5513819570994317e-05,
"loss": 1.3237,
"step": 376500
},
{
"epoch": 1.47,
"learning_rate": 2.5481301403093912e-05,
"loss": 1.317,
"step": 377000
},
{
"epoch": 1.47,
"learning_rate": 2.5448783235193503e-05,
"loss": 1.3168,
"step": 377500
},
{
"epoch": 1.48,
"learning_rate": 2.54162650672931e-05,
"loss": 1.3219,
"step": 378000
},
{
"epoch": 1.48,
"learning_rate": 2.538374689939269e-05,
"loss": 1.316,
"step": 378500
},
{
"epoch": 1.48,
"learning_rate": 2.535122873149229e-05,
"loss": 1.3099,
"step": 379000
},
{
"epoch": 1.48,
"learning_rate": 2.531871056359188e-05,
"loss": 1.3184,
"step": 379500
},
{
"epoch": 1.48,
"learning_rate": 2.5286192395691472e-05,
"loss": 1.32,
"step": 380000
},
{
"epoch": 1.48,
"learning_rate": 2.525367422779107e-05,
"loss": 1.3118,
"step": 380500
},
{
"epoch": 1.49,
"learning_rate": 2.5221156059890662e-05,
"loss": 1.301,
"step": 381000
},
{
"epoch": 1.49,
"learning_rate": 2.5188637891990257e-05,
"loss": 1.3119,
"step": 381500
},
{
"epoch": 1.49,
"learning_rate": 2.515611972408985e-05,
"loss": 1.3115,
"step": 382000
},
{
"epoch": 1.49,
"learning_rate": 2.5123601556189447e-05,
"loss": 1.3155,
"step": 382500
},
{
"epoch": 1.49,
"learning_rate": 2.509108338828904e-05,
"loss": 1.318,
"step": 383000
},
{
"epoch": 1.5,
"learning_rate": 2.505856522038863e-05,
"loss": 1.3038,
"step": 383500
},
{
"epoch": 1.5,
"learning_rate": 2.502604705248823e-05,
"loss": 1.3007,
"step": 384000
},
{
"epoch": 1.5,
"learning_rate": 2.499352888458782e-05,
"loss": 1.3096,
"step": 384500
},
{
"epoch": 1.5,
"learning_rate": 2.4961010716687412e-05,
"loss": 1.2961,
"step": 385000
},
{
"epoch": 1.5,
"learning_rate": 2.4928492548787007e-05,
"loss": 1.3118,
"step": 385500
},
{
"epoch": 1.51,
"learning_rate": 2.4895974380886602e-05,
"loss": 1.3076,
"step": 386000
},
{
"epoch": 1.51,
"learning_rate": 2.4863456212986197e-05,
"loss": 1.3118,
"step": 386500
},
{
"epoch": 1.51,
"learning_rate": 2.4830938045085792e-05,
"loss": 1.2965,
"step": 387000
},
{
"epoch": 1.51,
"learning_rate": 2.4798419877185387e-05,
"loss": 1.302,
"step": 387500
},
{
"epoch": 1.51,
"learning_rate": 2.476590170928498e-05,
"loss": 1.3042,
"step": 388000
},
{
"epoch": 1.52,
"learning_rate": 2.473338354138457e-05,
"loss": 1.3032,
"step": 388500
},
{
"epoch": 1.52,
"learning_rate": 2.4700865373484165e-05,
"loss": 1.3031,
"step": 389000
},
{
"epoch": 1.52,
"learning_rate": 2.466834720558376e-05,
"loss": 1.2966,
"step": 389500
},
{
"epoch": 1.52,
"learning_rate": 2.4635829037683356e-05,
"loss": 1.3008,
"step": 390000
},
{
"epoch": 1.52,
"learning_rate": 2.460331086978295e-05,
"loss": 1.2896,
"step": 390500
},
{
"epoch": 1.53,
"learning_rate": 2.4570792701882542e-05,
"loss": 1.2999,
"step": 391000
},
{
"epoch": 1.53,
"learning_rate": 2.4538274533982137e-05,
"loss": 1.3017,
"step": 391500
},
{
"epoch": 1.53,
"learning_rate": 2.4505756366081732e-05,
"loss": 1.2969,
"step": 392000
},
{
"epoch": 1.53,
"learning_rate": 2.4473238198181324e-05,
"loss": 1.297,
"step": 392500
},
{
"epoch": 1.53,
"learning_rate": 2.444072003028092e-05,
"loss": 1.2959,
"step": 393000
},
{
"epoch": 1.54,
"learning_rate": 2.4408201862380514e-05,
"loss": 1.3034,
"step": 393500
},
{
"epoch": 1.54,
"learning_rate": 2.4375683694480106e-05,
"loss": 1.285,
"step": 394000
},
{
"epoch": 1.54,
"learning_rate": 2.43431655265797e-05,
"loss": 1.2913,
"step": 394500
},
{
"epoch": 1.54,
"learning_rate": 2.4310647358679296e-05,
"loss": 1.2806,
"step": 395000
},
{
"epoch": 1.54,
"learning_rate": 2.427812919077889e-05,
"loss": 1.2842,
"step": 395500
},
{
"epoch": 1.55,
"learning_rate": 2.4245611022878482e-05,
"loss": 1.2776,
"step": 396000
},
{
"epoch": 1.55,
"learning_rate": 2.4213092854978077e-05,
"loss": 1.2867,
"step": 396500
},
{
"epoch": 1.55,
"learning_rate": 2.4180574687077672e-05,
"loss": 1.2906,
"step": 397000
},
{
"epoch": 1.55,
"learning_rate": 2.4148056519177264e-05,
"loss": 1.2907,
"step": 397500
},
{
"epoch": 1.55,
"learning_rate": 2.411553835127686e-05,
"loss": 1.2786,
"step": 398000
},
{
"epoch": 1.56,
"learning_rate": 2.4083020183376454e-05,
"loss": 1.2943,
"step": 398500
},
{
"epoch": 1.56,
"learning_rate": 2.405050201547605e-05,
"loss": 1.2758,
"step": 399000
},
{
"epoch": 1.56,
"learning_rate": 2.401798384757564e-05,
"loss": 1.2839,
"step": 399500
},
{
"epoch": 1.56,
"learning_rate": 2.3985465679675236e-05,
"loss": 1.2819,
"step": 400000
},
{
"epoch": 1.56,
"learning_rate": 2.3952947511774828e-05,
"loss": 1.2791,
"step": 400500
},
{
"epoch": 1.56,
"learning_rate": 2.3920429343874423e-05,
"loss": 1.2753,
"step": 401000
},
{
"epoch": 1.57,
"learning_rate": 2.3887911175974018e-05,
"loss": 1.2791,
"step": 401500
},
{
"epoch": 1.57,
"learning_rate": 2.3855393008073613e-05,
"loss": 1.2691,
"step": 402000
},
{
"epoch": 1.57,
"learning_rate": 2.3822874840173208e-05,
"loss": 1.278,
"step": 402500
},
{
"epoch": 1.57,
"learning_rate": 2.3790356672272803e-05,
"loss": 1.2769,
"step": 403000
},
{
"epoch": 1.57,
"learning_rate": 2.3757838504372394e-05,
"loss": 1.282,
"step": 403500
},
{
"epoch": 1.58,
"learning_rate": 2.3725320336471986e-05,
"loss": 1.272,
"step": 404000
},
{
"epoch": 1.58,
"learning_rate": 2.369280216857158e-05,
"loss": 1.2694,
"step": 404500
},
{
"epoch": 1.58,
"learning_rate": 2.3660284000671176e-05,
"loss": 1.2681,
"step": 405000
},
{
"epoch": 1.58,
"learning_rate": 2.362776583277077e-05,
"loss": 1.2774,
"step": 405500
},
{
"epoch": 1.58,
"learning_rate": 2.3595247664870366e-05,
"loss": 1.2685,
"step": 406000
},
{
"epoch": 1.59,
"learning_rate": 2.3562729496969958e-05,
"loss": 1.2703,
"step": 406500
},
{
"epoch": 1.59,
"learning_rate": 2.3530211329069553e-05,
"loss": 1.277,
"step": 407000
},
{
"epoch": 1.59,
"learning_rate": 2.3497693161169144e-05,
"loss": 1.2666,
"step": 407500
},
{
"epoch": 1.59,
"learning_rate": 2.346517499326874e-05,
"loss": 1.2656,
"step": 408000
},
{
"epoch": 1.59,
"learning_rate": 2.3432656825368335e-05,
"loss": 1.2666,
"step": 408500
},
{
"epoch": 1.6,
"learning_rate": 2.340013865746793e-05,
"loss": 1.2639,
"step": 409000
},
{
"epoch": 1.6,
"learning_rate": 2.3367620489567525e-05,
"loss": 1.2686,
"step": 409500
},
{
"epoch": 1.6,
"learning_rate": 2.3335102321667116e-05,
"loss": 1.2689,
"step": 410000
},
{
"epoch": 1.6,
"learning_rate": 2.330258415376671e-05,
"loss": 1.2643,
"step": 410500
},
{
"epoch": 1.6,
"learning_rate": 2.3270065985866303e-05,
"loss": 1.2647,
"step": 411000
},
{
"epoch": 1.61,
"learning_rate": 2.3237547817965898e-05,
"loss": 1.2546,
"step": 411500
},
{
"epoch": 1.61,
"learning_rate": 2.3205029650065493e-05,
"loss": 1.2731,
"step": 412000
},
{
"epoch": 1.61,
"learning_rate": 2.3172511482165088e-05,
"loss": 1.2624,
"step": 412500
},
{
"epoch": 1.61,
"learning_rate": 2.313999331426468e-05,
"loss": 1.2578,
"step": 413000
},
{
"epoch": 1.61,
"learning_rate": 2.3107475146364275e-05,
"loss": 1.2609,
"step": 413500
},
{
"epoch": 1.62,
"learning_rate": 2.307495697846387e-05,
"loss": 1.2538,
"step": 414000
},
{
"epoch": 1.62,
"learning_rate": 2.3042438810563465e-05,
"loss": 1.258,
"step": 414500
},
{
"epoch": 1.62,
"learning_rate": 2.3009920642663056e-05,
"loss": 1.2518,
"step": 415000
},
{
"epoch": 1.62,
"learning_rate": 2.297740247476265e-05,
"loss": 1.2589,
"step": 415500
},
{
"epoch": 1.62,
"learning_rate": 2.2944884306862243e-05,
"loss": 1.2574,
"step": 416000
},
{
"epoch": 1.63,
"learning_rate": 2.2912366138961838e-05,
"loss": 1.2568,
"step": 416500
},
{
"epoch": 1.63,
"learning_rate": 2.2879847971061433e-05,
"loss": 1.2668,
"step": 417000
},
{
"epoch": 1.63,
"learning_rate": 2.2847329803161028e-05,
"loss": 1.2583,
"step": 417500
},
{
"epoch": 1.63,
"learning_rate": 2.2814811635260623e-05,
"loss": 1.2515,
"step": 418000
},
{
"epoch": 1.63,
"learning_rate": 2.2782293467360215e-05,
"loss": 1.2536,
"step": 418500
},
{
"epoch": 1.64,
"learning_rate": 2.2749775299459806e-05,
"loss": 1.2568,
"step": 419000
},
{
"epoch": 1.64,
"learning_rate": 2.27172571315594e-05,
"loss": 1.2572,
"step": 419500
},
{
"epoch": 1.64,
"learning_rate": 2.2684738963658997e-05,
"loss": 1.2556,
"step": 420000
},
{
"epoch": 1.64,
"learning_rate": 2.265222079575859e-05,
"loss": 1.2529,
"step": 420500
},
{
"epoch": 1.64,
"learning_rate": 2.2619702627858187e-05,
"loss": 1.2514,
"step": 421000
},
{
"epoch": 1.64,
"learning_rate": 2.258718445995778e-05,
"loss": 1.2515,
"step": 421500
},
{
"epoch": 1.65,
"learning_rate": 2.2554666292057373e-05,
"loss": 1.2482,
"step": 422000
},
{
"epoch": 1.65,
"learning_rate": 2.2522148124156965e-05,
"loss": 1.25,
"step": 422500
},
{
"epoch": 1.65,
"learning_rate": 2.248962995625656e-05,
"loss": 1.2518,
"step": 423000
},
{
"epoch": 1.65,
"learning_rate": 2.2457111788356155e-05,
"loss": 1.2528,
"step": 423500
},
{
"epoch": 1.65,
"learning_rate": 2.242459362045575e-05,
"loss": 1.2491,
"step": 424000
},
{
"epoch": 1.66,
"learning_rate": 2.2392075452555345e-05,
"loss": 1.2438,
"step": 424500
},
{
"epoch": 1.66,
"learning_rate": 2.235955728465494e-05,
"loss": 1.2541,
"step": 425000
},
{
"epoch": 1.66,
"learning_rate": 2.2327039116754532e-05,
"loss": 1.2482,
"step": 425500
},
{
"epoch": 1.66,
"learning_rate": 2.2294520948854127e-05,
"loss": 1.26,
"step": 426000
},
{
"epoch": 1.66,
"learning_rate": 2.226200278095372e-05,
"loss": 1.2491,
"step": 426500
},
{
"epoch": 1.67,
"learning_rate": 2.2229484613053313e-05,
"loss": 1.2455,
"step": 427000
},
{
"epoch": 1.67,
"learning_rate": 2.219696644515291e-05,
"loss": 1.2415,
"step": 427500
},
{
"epoch": 1.67,
"learning_rate": 2.2164448277252504e-05,
"loss": 1.2429,
"step": 428000
},
{
"epoch": 1.67,
"learning_rate": 2.2131930109352095e-05,
"loss": 1.2357,
"step": 428500
},
{
"epoch": 1.67,
"learning_rate": 2.209941194145169e-05,
"loss": 1.2391,
"step": 429000
},
{
"epoch": 1.68,
"learning_rate": 2.2066893773551285e-05,
"loss": 1.234,
"step": 429500
},
{
"epoch": 1.68,
"learning_rate": 2.2034375605650877e-05,
"loss": 1.2468,
"step": 430000
},
{
"epoch": 1.68,
"learning_rate": 2.2001857437750472e-05,
"loss": 1.2346,
"step": 430500
},
{
"epoch": 1.68,
"learning_rate": 2.1969339269850067e-05,
"loss": 1.2501,
"step": 431000
},
{
"epoch": 1.68,
"learning_rate": 2.1936821101949662e-05,
"loss": 1.2325,
"step": 431500
},
{
"epoch": 1.69,
"learning_rate": 2.1904302934049254e-05,
"loss": 1.2407,
"step": 432000
},
{
"epoch": 1.69,
"learning_rate": 2.187178476614885e-05,
"loss": 1.2381,
"step": 432500
},
{
"epoch": 1.69,
"learning_rate": 2.1839266598248444e-05,
"loss": 1.239,
"step": 433000
},
{
"epoch": 1.69,
"learning_rate": 2.1806748430348035e-05,
"loss": 1.2338,
"step": 433500
},
{
"epoch": 1.69,
"learning_rate": 2.177423026244763e-05,
"loss": 1.2336,
"step": 434000
},
{
"epoch": 1.7,
"learning_rate": 2.1741712094547225e-05,
"loss": 1.2403,
"step": 434500
},
{
"epoch": 1.7,
"learning_rate": 2.1709193926646817e-05,
"loss": 1.2359,
"step": 435000
},
{
"epoch": 1.7,
"learning_rate": 2.1676675758746412e-05,
"loss": 1.2263,
"step": 435500
},
{
"epoch": 1.7,
"learning_rate": 2.1644157590846007e-05,
"loss": 1.2363,
"step": 436000
},
{
"epoch": 1.7,
"learning_rate": 2.1611639422945602e-05,
"loss": 1.2334,
"step": 436500
},
{
"epoch": 1.71,
"learning_rate": 2.1579121255045194e-05,
"loss": 1.2317,
"step": 437000
},
{
"epoch": 1.71,
"learning_rate": 2.154660308714479e-05,
"loss": 1.2421,
"step": 437500
},
{
"epoch": 1.71,
"learning_rate": 2.151408491924438e-05,
"loss": 1.2329,
"step": 438000
},
{
"epoch": 1.71,
"learning_rate": 2.1481566751343975e-05,
"loss": 1.2237,
"step": 438500
},
{
"epoch": 1.71,
"learning_rate": 2.144904858344357e-05,
"loss": 1.2316,
"step": 439000
},
{
"epoch": 1.72,
"learning_rate": 2.1416530415543166e-05,
"loss": 1.2239,
"step": 439500
},
{
"epoch": 1.72,
"learning_rate": 2.138401224764276e-05,
"loss": 1.2344,
"step": 440000
},
{
"epoch": 1.72,
"learning_rate": 2.1351494079742356e-05,
"loss": 1.2243,
"step": 440500
},
{
"epoch": 1.72,
"learning_rate": 2.1318975911841947e-05,
"loss": 1.2329,
"step": 441000
},
{
"epoch": 1.72,
"learning_rate": 2.128645774394154e-05,
"loss": 1.2228,
"step": 441500
},
{
"epoch": 1.72,
"learning_rate": 2.1253939576041134e-05,
"loss": 1.2296,
"step": 442000
},
{
"epoch": 1.73,
"learning_rate": 2.122142140814073e-05,
"loss": 1.22,
"step": 442500
},
{
"epoch": 1.73,
"learning_rate": 2.1188903240240324e-05,
"loss": 1.2317,
"step": 443000
},
{
"epoch": 1.73,
"learning_rate": 2.115638507233992e-05,
"loss": 1.2299,
"step": 443500
},
{
"epoch": 1.73,
"learning_rate": 2.1123866904439514e-05,
"loss": 1.2361,
"step": 444000
},
{
"epoch": 1.73,
"learning_rate": 2.1091348736539106e-05,
"loss": 1.2282,
"step": 444500
},
{
"epoch": 1.74,
"learning_rate": 2.1058830568638697e-05,
"loss": 1.2263,
"step": 445000
},
{
"epoch": 1.74,
"learning_rate": 2.1026312400738292e-05,
"loss": 1.218,
"step": 445500
},
{
"epoch": 1.74,
"learning_rate": 2.0993794232837887e-05,
"loss": 1.228,
"step": 446000
},
{
"epoch": 1.74,
"learning_rate": 2.0961276064937482e-05,
"loss": 1.2248,
"step": 446500
},
{
"epoch": 1.74,
"learning_rate": 2.0928757897037077e-05,
"loss": 1.2275,
"step": 447000
},
{
"epoch": 1.75,
"learning_rate": 2.089623972913667e-05,
"loss": 1.2223,
"step": 447500
},
{
"epoch": 1.75,
"learning_rate": 2.0863721561236264e-05,
"loss": 1.2224,
"step": 448000
},
{
"epoch": 1.75,
"learning_rate": 2.0831203393335856e-05,
"loss": 1.2217,
"step": 448500
},
{
"epoch": 1.75,
"learning_rate": 2.079868522543545e-05,
"loss": 1.2208,
"step": 449000
},
{
"epoch": 1.75,
"learning_rate": 2.0766167057535046e-05,
"loss": 1.217,
"step": 449500
},
{
"epoch": 1.76,
"learning_rate": 2.073364888963464e-05,
"loss": 1.214,
"step": 450000
},
{
"epoch": 1.76,
"learning_rate": 2.0701130721734233e-05,
"loss": 1.2142,
"step": 450500
},
{
"epoch": 1.76,
"learning_rate": 2.0668612553833828e-05,
"loss": 1.2072,
"step": 451000
},
{
"epoch": 1.76,
"learning_rate": 2.0636094385933423e-05,
"loss": 1.2121,
"step": 451500
},
{
"epoch": 1.76,
"learning_rate": 2.0603576218033018e-05,
"loss": 1.2185,
"step": 452000
},
{
"epoch": 1.77,
"learning_rate": 2.057105805013261e-05,
"loss": 1.2129,
"step": 452500
},
{
"epoch": 1.77,
"learning_rate": 2.0538539882232204e-05,
"loss": 1.2091,
"step": 453000
},
{
"epoch": 1.77,
"learning_rate": 2.05060217143318e-05,
"loss": 1.2087,
"step": 453500
},
{
"epoch": 1.77,
"learning_rate": 2.047350354643139e-05,
"loss": 1.2247,
"step": 454000
},
{
"epoch": 1.77,
"learning_rate": 2.0440985378530986e-05,
"loss": 1.2131,
"step": 454500
},
{
"epoch": 1.78,
"learning_rate": 2.040846721063058e-05,
"loss": 1.2172,
"step": 455000
},
{
"epoch": 1.78,
"learning_rate": 2.0375949042730176e-05,
"loss": 1.2044,
"step": 455500
},
{
"epoch": 1.78,
"learning_rate": 2.0343430874829768e-05,
"loss": 1.2159,
"step": 456000
},
{
"epoch": 1.78,
"learning_rate": 2.0310912706929363e-05,
"loss": 1.2087,
"step": 456500
},
{
"epoch": 1.78,
"learning_rate": 2.0278394539028954e-05,
"loss": 1.2067,
"step": 457000
},
{
"epoch": 1.79,
"learning_rate": 2.024587637112855e-05,
"loss": 1.1992,
"step": 457500
},
{
"epoch": 1.79,
"learning_rate": 2.0213358203228144e-05,
"loss": 1.202,
"step": 458000
},
{
"epoch": 1.79,
"learning_rate": 2.018084003532774e-05,
"loss": 1.2078,
"step": 458500
},
{
"epoch": 1.79,
"learning_rate": 2.0148321867427335e-05,
"loss": 1.2087,
"step": 459000
},
{
"epoch": 1.79,
"learning_rate": 2.0115803699526926e-05,
"loss": 1.2119,
"step": 459500
},
{
"epoch": 1.8,
"learning_rate": 2.0083285531626518e-05,
"loss": 1.2056,
"step": 460000
},
{
"epoch": 1.8,
"learning_rate": 2.0050767363726113e-05,
"loss": 1.1988,
"step": 460500
},
{
"epoch": 1.8,
"learning_rate": 2.0018249195825708e-05,
"loss": 1.2137,
"step": 461000
},
{
"epoch": 1.8,
"learning_rate": 1.9985731027925303e-05,
"loss": 1.2125,
"step": 461500
},
{
"epoch": 1.8,
"learning_rate": 1.9953212860024898e-05,
"loss": 1.2043,
"step": 462000
},
{
"epoch": 1.8,
"learning_rate": 1.9920694692124493e-05,
"loss": 1.2074,
"step": 462500
},
{
"epoch": 1.81,
"learning_rate": 1.9888176524224085e-05,
"loss": 1.2036,
"step": 463000
},
{
"epoch": 1.81,
"learning_rate": 1.985565835632368e-05,
"loss": 1.2005,
"step": 463500
},
{
"epoch": 1.81,
"learning_rate": 1.982314018842327e-05,
"loss": 1.2057,
"step": 464000
},
{
"epoch": 1.81,
"learning_rate": 1.9790622020522866e-05,
"loss": 1.2043,
"step": 464500
},
{
"epoch": 1.81,
"learning_rate": 1.975810385262246e-05,
"loss": 1.2036,
"step": 465000
},
{
"epoch": 1.82,
"learning_rate": 1.9725585684722056e-05,
"loss": 1.2022,
"step": 465500
},
{
"epoch": 1.82,
"learning_rate": 1.969306751682165e-05,
"loss": 1.2003,
"step": 466000
},
{
"epoch": 1.82,
"learning_rate": 1.9660549348921243e-05,
"loss": 1.1992,
"step": 466500
},
{
"epoch": 1.82,
"learning_rate": 1.9628031181020838e-05,
"loss": 1.2011,
"step": 467000
},
{
"epoch": 1.82,
"learning_rate": 1.959551301312043e-05,
"loss": 1.1997,
"step": 467500
},
{
"epoch": 1.83,
"learning_rate": 1.9562994845220025e-05,
"loss": 1.2033,
"step": 468000
},
{
"epoch": 1.83,
"learning_rate": 1.953047667731962e-05,
"loss": 1.199,
"step": 468500
},
{
"epoch": 1.83,
"learning_rate": 1.9497958509419215e-05,
"loss": 1.1972,
"step": 469000
},
{
"epoch": 1.83,
"learning_rate": 1.9465440341518807e-05,
"loss": 1.1979,
"step": 469500
},
{
"epoch": 1.83,
"learning_rate": 1.94329221736184e-05,
"loss": 1.2027,
"step": 470000
},
{
"epoch": 1.84,
"learning_rate": 1.9400404005717997e-05,
"loss": 1.1959,
"step": 470500
},
{
"epoch": 1.84,
"learning_rate": 1.9367885837817588e-05,
"loss": 1.1962,
"step": 471000
},
{
"epoch": 1.84,
"learning_rate": 1.9335367669917183e-05,
"loss": 1.1978,
"step": 471500
},
{
"epoch": 1.84,
"learning_rate": 1.930284950201678e-05,
"loss": 1.2043,
"step": 472000
},
{
"epoch": 1.84,
"learning_rate": 1.927033133411637e-05,
"loss": 1.1901,
"step": 472500
},
{
"epoch": 1.85,
"learning_rate": 1.9237813166215965e-05,
"loss": 1.2003,
"step": 473000
},
{
"epoch": 1.85,
"learning_rate": 1.920529499831556e-05,
"loss": 1.1963,
"step": 473500
},
{
"epoch": 1.85,
"learning_rate": 1.9172776830415155e-05,
"loss": 1.197,
"step": 474000
},
{
"epoch": 1.85,
"learning_rate": 1.914025866251475e-05,
"loss": 1.1969,
"step": 474500
},
{
"epoch": 1.85,
"learning_rate": 1.9107740494614342e-05,
"loss": 1.1896,
"step": 475000
},
{
"epoch": 1.86,
"learning_rate": 1.9075222326713933e-05,
"loss": 1.1928,
"step": 475500
},
{
"epoch": 1.86,
"learning_rate": 1.904270415881353e-05,
"loss": 1.1887,
"step": 476000
},
{
"epoch": 1.86,
"learning_rate": 1.9010185990913123e-05,
"loss": 1.1895,
"step": 476500
},
{
"epoch": 1.86,
"learning_rate": 1.897766782301272e-05,
"loss": 1.1926,
"step": 477000
},
{
"epoch": 1.86,
"learning_rate": 1.8945149655112314e-05,
"loss": 1.1861,
"step": 477500
},
{
"epoch": 1.87,
"learning_rate": 1.891263148721191e-05,
"loss": 1.2007,
"step": 478000
},
{
"epoch": 1.87,
"learning_rate": 1.88801133193115e-05,
"loss": 1.1893,
"step": 478500
},
{
"epoch": 1.87,
"learning_rate": 1.8847595151411092e-05,
"loss": 1.1894,
"step": 479000
},
{
"epoch": 1.87,
"learning_rate": 1.8815076983510687e-05,
"loss": 1.1954,
"step": 479500
},
{
"epoch": 1.87,
"learning_rate": 1.8782558815610282e-05,
"loss": 1.186,
"step": 480000
},
{
"epoch": 1.87,
"learning_rate": 1.8750040647709877e-05,
"loss": 1.1876,
"step": 480500
},
{
"epoch": 1.88,
"learning_rate": 1.8717522479809472e-05,
"loss": 1.19,
"step": 481000
},
{
"epoch": 1.88,
"learning_rate": 1.8685004311909067e-05,
"loss": 1.1834,
"step": 481500
},
{
"epoch": 1.88,
"learning_rate": 1.865248614400866e-05,
"loss": 1.1836,
"step": 482000
},
{
"epoch": 1.88,
"learning_rate": 1.861996797610825e-05,
"loss": 1.1895,
"step": 482500
},
{
"epoch": 1.88,
"learning_rate": 1.8587449808207845e-05,
"loss": 1.1894,
"step": 483000
},
{
"epoch": 1.89,
"learning_rate": 1.855493164030744e-05,
"loss": 1.1848,
"step": 483500
},
{
"epoch": 1.89,
"learning_rate": 1.8522413472407035e-05,
"loss": 1.1855,
"step": 484000
},
{
"epoch": 1.89,
"learning_rate": 1.848989530450663e-05,
"loss": 1.1856,
"step": 484500
},
{
"epoch": 1.89,
"learning_rate": 1.8457377136606222e-05,
"loss": 1.1802,
"step": 485000
},
{
"epoch": 1.89,
"learning_rate": 1.8424858968705817e-05,
"loss": 1.1805,
"step": 485500
},
{
"epoch": 1.9,
"learning_rate": 1.8392340800805412e-05,
"loss": 1.1837,
"step": 486000
},
{
"epoch": 1.9,
"learning_rate": 1.8359822632905004e-05,
"loss": 1.1772,
"step": 486500
},
{
"epoch": 1.9,
"learning_rate": 1.83273044650046e-05,
"loss": 1.1849,
"step": 487000
},
{
"epoch": 1.9,
"learning_rate": 1.8294786297104194e-05,
"loss": 1.1802,
"step": 487500
},
{
"epoch": 1.9,
"learning_rate": 1.826226812920379e-05,
"loss": 1.1749,
"step": 488000
},
{
"epoch": 1.91,
"learning_rate": 1.822974996130338e-05,
"loss": 1.1786,
"step": 488500
},
{
"epoch": 1.91,
"learning_rate": 1.8197231793402976e-05,
"loss": 1.182,
"step": 489000
},
{
"epoch": 1.91,
"learning_rate": 1.816471362550257e-05,
"loss": 1.1811,
"step": 489500
},
{
"epoch": 1.91,
"learning_rate": 1.8132195457602162e-05,
"loss": 1.1782,
"step": 490000
},
{
"epoch": 1.91,
"learning_rate": 1.8099677289701757e-05,
"loss": 1.1785,
"step": 490500
},
{
"epoch": 1.92,
"learning_rate": 1.8067159121801352e-05,
"loss": 1.1802,
"step": 491000
},
{
"epoch": 1.92,
"learning_rate": 1.8034640953900944e-05,
"loss": 1.1783,
"step": 491500
},
{
"epoch": 1.92,
"learning_rate": 1.800212278600054e-05,
"loss": 1.1862,
"step": 492000
},
{
"epoch": 1.92,
"learning_rate": 1.7969604618100134e-05,
"loss": 1.1749,
"step": 492500
},
{
"epoch": 1.92,
"learning_rate": 1.793708645019973e-05,
"loss": 1.1816,
"step": 493000
},
{
"epoch": 1.93,
"learning_rate": 1.790456828229932e-05,
"loss": 1.175,
"step": 493500
},
{
"epoch": 1.93,
"learning_rate": 1.7872050114398916e-05,
"loss": 1.178,
"step": 494000
},
{
"epoch": 1.93,
"learning_rate": 1.7839531946498507e-05,
"loss": 1.1739,
"step": 494500
},
{
"epoch": 1.93,
"learning_rate": 1.7807013778598102e-05,
"loss": 1.1811,
"step": 495000
},
{
"epoch": 1.93,
"learning_rate": 1.7774495610697697e-05,
"loss": 1.1812,
"step": 495500
},
{
"epoch": 1.94,
"learning_rate": 1.7741977442797292e-05,
"loss": 1.1772,
"step": 496000
},
{
"epoch": 1.94,
"learning_rate": 1.7709459274896887e-05,
"loss": 1.1761,
"step": 496500
},
{
"epoch": 1.94,
"learning_rate": 1.767694110699648e-05,
"loss": 1.1731,
"step": 497000
},
{
"epoch": 1.94,
"learning_rate": 1.764442293909607e-05,
"loss": 1.1655,
"step": 497500
},
{
"epoch": 1.94,
"learning_rate": 1.7611904771195666e-05,
"loss": 1.1715,
"step": 498000
},
{
"epoch": 1.95,
"learning_rate": 1.757938660329526e-05,
"loss": 1.1777,
"step": 498500
},
{
"epoch": 1.95,
"learning_rate": 1.7546868435394856e-05,
"loss": 1.172,
"step": 499000
},
{
"epoch": 1.95,
"learning_rate": 1.751435026749445e-05,
"loss": 1.1716,
"step": 499500
},
{
"epoch": 1.95,
"learning_rate": 1.7481832099594046e-05,
"loss": 1.1733,
"step": 500000
},
{
"epoch": 1.95,
"learning_rate": 1.744931393169364e-05,
"loss": 1.1745,
"step": 500500
},
{
"epoch": 1.95,
"learning_rate": 1.7416795763793233e-05,
"loss": 1.1722,
"step": 501000
},
{
"epoch": 1.96,
"learning_rate": 1.7384277595892824e-05,
"loss": 1.1628,
"step": 501500
},
{
"epoch": 1.96,
"learning_rate": 1.735175942799242e-05,
"loss": 1.1659,
"step": 502000
},
{
"epoch": 1.96,
"learning_rate": 1.7319241260092014e-05,
"loss": 1.1742,
"step": 502500
},
{
"epoch": 1.96,
"learning_rate": 1.728672309219161e-05,
"loss": 1.1674,
"step": 503000
},
{
"epoch": 1.96,
"learning_rate": 1.7254204924291204e-05,
"loss": 1.1704,
"step": 503500
},
{
"epoch": 1.97,
"learning_rate": 1.7221686756390796e-05,
"loss": 1.164,
"step": 504000
},
{
"epoch": 1.97,
"learning_rate": 1.718916858849039e-05,
"loss": 1.1759,
"step": 504500
},
{
"epoch": 1.97,
"learning_rate": 1.7156650420589983e-05,
"loss": 1.1658,
"step": 505000
},
{
"epoch": 1.97,
"learning_rate": 1.7124132252689578e-05,
"loss": 1.164,
"step": 505500
},
{
"epoch": 1.97,
"learning_rate": 1.7091614084789173e-05,
"loss": 1.1667,
"step": 506000
},
{
"epoch": 1.98,
"learning_rate": 1.7059095916888768e-05,
"loss": 1.1729,
"step": 506500
},
{
"epoch": 1.98,
"learning_rate": 1.702657774898836e-05,
"loss": 1.1668,
"step": 507000
},
{
"epoch": 1.98,
"learning_rate": 1.6994059581087954e-05,
"loss": 1.1709,
"step": 507500
},
{
"epoch": 1.98,
"learning_rate": 1.696154141318755e-05,
"loss": 1.1647,
"step": 508000
},
{
"epoch": 1.98,
"learning_rate": 1.692902324528714e-05,
"loss": 1.1599,
"step": 508500
},
{
"epoch": 1.99,
"learning_rate": 1.6896505077386736e-05,
"loss": 1.1652,
"step": 509000
},
{
"epoch": 1.99,
"learning_rate": 1.686398690948633e-05,
"loss": 1.1587,
"step": 509500
},
{
"epoch": 1.99,
"learning_rate": 1.6831468741585926e-05,
"loss": 1.1691,
"step": 510000
},
{
"epoch": 1.99,
"learning_rate": 1.6798950573685518e-05,
"loss": 1.1643,
"step": 510500
},
{
"epoch": 1.99,
"learning_rate": 1.6766432405785113e-05,
"loss": 1.1606,
"step": 511000
},
{
"epoch": 2.0,
"learning_rate": 1.6733914237884708e-05,
"loss": 1.1571,
"step": 511500
},
{
"epoch": 2.0,
"learning_rate": 1.6701396069984303e-05,
"loss": 1.1628,
"step": 512000
},
{
"epoch": 2.0,
"learning_rate": 1.6668877902083895e-05,
"loss": 1.1619,
"step": 512500
},
{
"epoch": 2.0,
"learning_rate": 1.663635973418349e-05,
"loss": 1.1597,
"step": 513000
},
{
"epoch": 2.0,
"learning_rate": 1.660384156628308e-05,
"loss": 1.1605,
"step": 513500
},
{
"epoch": 2.01,
"learning_rate": 1.6571323398382676e-05,
"loss": 1.1593,
"step": 514000
},
{
"epoch": 2.01,
"learning_rate": 1.653880523048227e-05,
"loss": 1.167,
"step": 514500
},
{
"epoch": 2.01,
"learning_rate": 1.6506287062581866e-05,
"loss": 1.1579,
"step": 515000
},
{
"epoch": 2.01,
"learning_rate": 1.647376889468146e-05,
"loss": 1.1611,
"step": 515500
},
{
"epoch": 2.01,
"learning_rate": 1.6441250726781053e-05,
"loss": 1.1498,
"step": 516000
},
{
"epoch": 2.02,
"learning_rate": 1.6408732558880645e-05,
"loss": 1.1566,
"step": 516500
},
{
"epoch": 2.02,
"learning_rate": 1.637621439098024e-05,
"loss": 1.1596,
"step": 517000
},
{
"epoch": 2.02,
"learning_rate": 1.6343696223079835e-05,
"loss": 1.1514,
"step": 517500
},
{
"epoch": 2.02,
"learning_rate": 1.631117805517943e-05,
"loss": 1.1478,
"step": 518000
},
{
"epoch": 2.02,
"learning_rate": 1.6278659887279025e-05,
"loss": 1.1547,
"step": 518500
},
{
"epoch": 2.03,
"learning_rate": 1.624614171937862e-05,
"loss": 1.1621,
"step": 519000
},
{
"epoch": 2.03,
"learning_rate": 1.621362355147821e-05,
"loss": 1.1557,
"step": 519500
},
{
"epoch": 2.03,
"learning_rate": 1.6181105383577803e-05,
"loss": 1.1481,
"step": 520000
},
{
"epoch": 2.03,
"learning_rate": 1.6148587215677398e-05,
"loss": 1.1573,
"step": 520500
},
{
"epoch": 2.03,
"learning_rate": 1.6116069047776993e-05,
"loss": 1.1651,
"step": 521000
},
{
"epoch": 2.03,
"learning_rate": 1.608355087987659e-05,
"loss": 1.1524,
"step": 521500
},
{
"epoch": 2.04,
"learning_rate": 1.6051032711976183e-05,
"loss": 1.151,
"step": 522000
},
{
"epoch": 2.04,
"learning_rate": 1.601851454407578e-05,
"loss": 1.1594,
"step": 522500
},
{
"epoch": 2.04,
"learning_rate": 1.598599637617537e-05,
"loss": 1.1506,
"step": 523000
},
{
"epoch": 2.04,
"learning_rate": 1.5953478208274965e-05,
"loss": 1.1606,
"step": 523500
},
{
"epoch": 2.04,
"learning_rate": 1.5920960040374557e-05,
"loss": 1.1546,
"step": 524000
},
{
"epoch": 2.05,
"learning_rate": 1.5888441872474152e-05,
"loss": 1.1559,
"step": 524500
},
{
"epoch": 2.05,
"learning_rate": 1.5855923704573747e-05,
"loss": 1.1504,
"step": 525000
},
{
"epoch": 2.05,
"learning_rate": 1.5823405536673342e-05,
"loss": 1.1538,
"step": 525500
},
{
"epoch": 2.05,
"learning_rate": 1.5790887368772933e-05,
"loss": 1.1498,
"step": 526000
},
{
"epoch": 2.05,
"learning_rate": 1.575836920087253e-05,
"loss": 1.1576,
"step": 526500
},
{
"epoch": 2.06,
"learning_rate": 1.5725851032972124e-05,
"loss": 1.1574,
"step": 527000
},
{
"epoch": 2.06,
"learning_rate": 1.5693332865071715e-05,
"loss": 1.1434,
"step": 527500
},
{
"epoch": 2.06,
"learning_rate": 1.566081469717131e-05,
"loss": 1.1451,
"step": 528000
},
{
"epoch": 2.06,
"learning_rate": 1.5628296529270905e-05,
"loss": 1.1555,
"step": 528500
},
{
"epoch": 2.06,
"learning_rate": 1.5595778361370497e-05,
"loss": 1.1537,
"step": 529000
},
{
"epoch": 2.07,
"learning_rate": 1.5563260193470092e-05,
"loss": 1.153,
"step": 529500
},
{
"epoch": 2.07,
"learning_rate": 1.5530742025569687e-05,
"loss": 1.1526,
"step": 530000
},
{
"epoch": 2.07,
"learning_rate": 1.5498223857669282e-05,
"loss": 1.1589,
"step": 530500
},
{
"epoch": 2.07,
"learning_rate": 1.5465705689768874e-05,
"loss": 1.1471,
"step": 531000
},
{
"epoch": 2.07,
"learning_rate": 1.543318752186847e-05,
"loss": 1.1536,
"step": 531500
},
{
"epoch": 2.08,
"learning_rate": 1.540066935396806e-05,
"loss": 1.1498,
"step": 532000
},
{
"epoch": 2.08,
"learning_rate": 1.5368151186067655e-05,
"loss": 1.1515,
"step": 532500
},
{
"epoch": 2.08,
"learning_rate": 1.533563301816725e-05,
"loss": 1.1455,
"step": 533000
},
{
"epoch": 2.08,
"learning_rate": 1.5303114850266845e-05,
"loss": 1.1417,
"step": 533500
},
{
"epoch": 2.08,
"learning_rate": 1.527059668236644e-05,
"loss": 1.1472,
"step": 534000
},
{
"epoch": 2.09,
"learning_rate": 1.5238078514466034e-05,
"loss": 1.1419,
"step": 534500
},
{
"epoch": 2.09,
"learning_rate": 1.5205560346565629e-05,
"loss": 1.1446,
"step": 535000
},
{
"epoch": 2.09,
"learning_rate": 1.517304217866522e-05,
"loss": 1.1467,
"step": 535500
},
{
"epoch": 2.09,
"learning_rate": 1.5140524010764814e-05,
"loss": 1.1511,
"step": 536000
},
{
"epoch": 2.09,
"learning_rate": 1.5108005842864409e-05,
"loss": 1.1476,
"step": 536500
},
{
"epoch": 2.1,
"learning_rate": 1.5075487674964004e-05,
"loss": 1.1456,
"step": 537000
},
{
"epoch": 2.1,
"learning_rate": 1.5042969507063597e-05,
"loss": 1.146,
"step": 537500
},
{
"epoch": 2.1,
"learning_rate": 1.5010451339163192e-05,
"loss": 1.1491,
"step": 538000
},
{
"epoch": 2.1,
"learning_rate": 1.4977933171262784e-05,
"loss": 1.1476,
"step": 538500
},
{
"epoch": 2.1,
"learning_rate": 1.4945415003362379e-05,
"loss": 1.1413,
"step": 539000
},
{
"epoch": 2.11,
"learning_rate": 1.4912896835461972e-05,
"loss": 1.1439,
"step": 539500
},
{
"epoch": 2.11,
"learning_rate": 1.4880378667561567e-05,
"loss": 1.1463,
"step": 540000
},
{
"epoch": 2.11,
"learning_rate": 1.4847860499661162e-05,
"loss": 1.1464,
"step": 540500
},
{
"epoch": 2.11,
"learning_rate": 1.4815342331760756e-05,
"loss": 1.1368,
"step": 541000
},
{
"epoch": 2.11,
"learning_rate": 1.4782824163860349e-05,
"loss": 1.1439,
"step": 541500
},
{
"epoch": 2.11,
"learning_rate": 1.4750305995959942e-05,
"loss": 1.1426,
"step": 542000
},
{
"epoch": 2.12,
"learning_rate": 1.4717787828059537e-05,
"loss": 1.1357,
"step": 542500
},
{
"epoch": 2.12,
"learning_rate": 1.4685269660159132e-05,
"loss": 1.1489,
"step": 543000
},
{
"epoch": 2.12,
"learning_rate": 1.4652751492258726e-05,
"loss": 1.1383,
"step": 543500
},
{
"epoch": 2.12,
"learning_rate": 1.462023332435832e-05,
"loss": 1.1401,
"step": 544000
},
{
"epoch": 2.12,
"learning_rate": 1.4587715156457916e-05,
"loss": 1.1387,
"step": 544500
},
{
"epoch": 2.13,
"learning_rate": 1.4555196988557507e-05,
"loss": 1.1428,
"step": 545000
},
{
"epoch": 2.13,
"learning_rate": 1.45226788206571e-05,
"loss": 1.1404,
"step": 545500
},
{
"epoch": 2.13,
"learning_rate": 1.4490160652756696e-05,
"loss": 1.1366,
"step": 546000
},
{
"epoch": 2.13,
"learning_rate": 1.445764248485629e-05,
"loss": 1.1338,
"step": 546500
},
{
"epoch": 2.13,
"learning_rate": 1.4425124316955884e-05,
"loss": 1.1323,
"step": 547000
},
{
"epoch": 2.14,
"learning_rate": 1.439260614905548e-05,
"loss": 1.1413,
"step": 547500
},
{
"epoch": 2.14,
"learning_rate": 1.4360087981155071e-05,
"loss": 1.1357,
"step": 548000
},
{
"epoch": 2.14,
"learning_rate": 1.4327569813254666e-05,
"loss": 1.1352,
"step": 548500
},
{
"epoch": 2.14,
"learning_rate": 1.429505164535426e-05,
"loss": 1.1406,
"step": 549000
},
{
"epoch": 2.14,
"learning_rate": 1.4262533477453854e-05,
"loss": 1.1359,
"step": 549500
},
{
"epoch": 2.15,
"learning_rate": 1.423001530955345e-05,
"loss": 1.1319,
"step": 550000
},
{
"epoch": 2.15,
"learning_rate": 1.4197497141653043e-05,
"loss": 1.1335,
"step": 550500
},
{
"epoch": 2.15,
"learning_rate": 1.4164978973752634e-05,
"loss": 1.1293,
"step": 551000
},
{
"epoch": 2.15,
"learning_rate": 1.413246080585223e-05,
"loss": 1.1322,
"step": 551500
},
{
"epoch": 2.15,
"learning_rate": 1.4099942637951824e-05,
"loss": 1.135,
"step": 552000
},
{
"epoch": 2.16,
"learning_rate": 1.4067424470051418e-05,
"loss": 1.1328,
"step": 552500
},
{
"epoch": 2.16,
"learning_rate": 1.4034906302151013e-05,
"loss": 1.1359,
"step": 553000
},
{
"epoch": 2.16,
"learning_rate": 1.4002388134250608e-05,
"loss": 1.1387,
"step": 553500
},
{
"epoch": 2.16,
"learning_rate": 1.39698699663502e-05,
"loss": 1.1368,
"step": 554000
},
{
"epoch": 2.16,
"learning_rate": 1.3937351798449794e-05,
"loss": 1.1342,
"step": 554500
},
{
"epoch": 2.17,
"learning_rate": 1.3904833630549388e-05,
"loss": 1.1309,
"step": 555000
},
{
"epoch": 2.17,
"learning_rate": 1.3872315462648983e-05,
"loss": 1.1427,
"step": 555500
},
{
"epoch": 2.17,
"learning_rate": 1.3839797294748578e-05,
"loss": 1.1333,
"step": 556000
},
{
"epoch": 2.17,
"learning_rate": 1.3807279126848171e-05,
"loss": 1.1328,
"step": 556500
},
{
"epoch": 2.17,
"learning_rate": 1.3774760958947766e-05,
"loss": 1.1394,
"step": 557000
},
{
"epoch": 2.18,
"learning_rate": 1.3742242791047358e-05,
"loss": 1.1368,
"step": 557500
},
{
"epoch": 2.18,
"learning_rate": 1.3709724623146953e-05,
"loss": 1.1344,
"step": 558000
},
{
"epoch": 2.18,
"learning_rate": 1.3677206455246546e-05,
"loss": 1.1375,
"step": 558500
},
{
"epoch": 2.18,
"learning_rate": 1.3644688287346141e-05,
"loss": 1.1308,
"step": 559000
},
{
"epoch": 2.18,
"learning_rate": 1.3612170119445736e-05,
"loss": 1.1339,
"step": 559500
},
{
"epoch": 2.19,
"learning_rate": 1.357965195154533e-05,
"loss": 1.1252,
"step": 560000
},
{
"epoch": 2.19,
"learning_rate": 1.3547133783644921e-05,
"loss": 1.1296,
"step": 560500
},
{
"epoch": 2.19,
"learning_rate": 1.3514615615744516e-05,
"loss": 1.1386,
"step": 561000
},
{
"epoch": 2.19,
"learning_rate": 1.3482097447844111e-05,
"loss": 1.1379,
"step": 561500
},
{
"epoch": 2.19,
"learning_rate": 1.3449579279943705e-05,
"loss": 1.1262,
"step": 562000
},
{
"epoch": 2.19,
"learning_rate": 1.34170611120433e-05,
"loss": 1.1237,
"step": 562500
},
{
"epoch": 2.2,
"learning_rate": 1.3384542944142895e-05,
"loss": 1.1263,
"step": 563000
},
{
"epoch": 2.2,
"learning_rate": 1.3352024776242486e-05,
"loss": 1.1255,
"step": 563500
},
{
"epoch": 2.2,
"learning_rate": 1.331950660834208e-05,
"loss": 1.1362,
"step": 564000
},
{
"epoch": 2.2,
"learning_rate": 1.3286988440441675e-05,
"loss": 1.1202,
"step": 564500
},
{
"epoch": 2.2,
"learning_rate": 1.325447027254127e-05,
"loss": 1.1261,
"step": 565000
},
{
"epoch": 2.21,
"learning_rate": 1.3221952104640865e-05,
"loss": 1.128,
"step": 565500
},
{
"epoch": 2.21,
"learning_rate": 1.3189433936740458e-05,
"loss": 1.1336,
"step": 566000
},
{
"epoch": 2.21,
"learning_rate": 1.3156915768840053e-05,
"loss": 1.1263,
"step": 566500
},
{
"epoch": 2.21,
"learning_rate": 1.3124397600939645e-05,
"loss": 1.1338,
"step": 567000
},
{
"epoch": 2.21,
"learning_rate": 1.309187943303924e-05,
"loss": 1.1248,
"step": 567500
},
{
"epoch": 2.22,
"learning_rate": 1.3059361265138833e-05,
"loss": 1.1261,
"step": 568000
},
{
"epoch": 2.22,
"learning_rate": 1.3026843097238428e-05,
"loss": 1.1328,
"step": 568500
},
{
"epoch": 2.22,
"learning_rate": 1.2994324929338023e-05,
"loss": 1.1213,
"step": 569000
},
{
"epoch": 2.22,
"learning_rate": 1.2961806761437617e-05,
"loss": 1.1279,
"step": 569500
},
{
"epoch": 2.22,
"learning_rate": 1.2929288593537208e-05,
"loss": 1.1243,
"step": 570000
},
{
"epoch": 2.23,
"learning_rate": 1.2896770425636803e-05,
"loss": 1.1201,
"step": 570500
},
{
"epoch": 2.23,
"learning_rate": 1.2864252257736398e-05,
"loss": 1.1245,
"step": 571000
},
{
"epoch": 2.23,
"learning_rate": 1.2831734089835992e-05,
"loss": 1.1251,
"step": 571500
},
{
"epoch": 2.23,
"learning_rate": 1.2799215921935587e-05,
"loss": 1.1285,
"step": 572000
},
{
"epoch": 2.23,
"learning_rate": 1.2766697754035182e-05,
"loss": 1.1276,
"step": 572500
},
{
"epoch": 2.24,
"learning_rate": 1.2734179586134773e-05,
"loss": 1.1199,
"step": 573000
},
{
"epoch": 2.24,
"learning_rate": 1.2701661418234367e-05,
"loss": 1.1215,
"step": 573500
},
{
"epoch": 2.24,
"learning_rate": 1.2669143250333962e-05,
"loss": 1.1203,
"step": 574000
},
{
"epoch": 2.24,
"learning_rate": 1.2636625082433557e-05,
"loss": 1.1168,
"step": 574500
},
{
"epoch": 2.24,
"learning_rate": 1.260410691453315e-05,
"loss": 1.1223,
"step": 575000
},
{
"epoch": 2.25,
"learning_rate": 1.2571588746632745e-05,
"loss": 1.117,
"step": 575500
},
{
"epoch": 2.25,
"learning_rate": 1.2539070578732337e-05,
"loss": 1.1251,
"step": 576000
},
{
"epoch": 2.25,
"learning_rate": 1.2506552410831932e-05,
"loss": 1.1169,
"step": 576500
},
{
"epoch": 2.25,
"learning_rate": 1.2474034242931527e-05,
"loss": 1.1183,
"step": 577000
},
{
"epoch": 2.25,
"learning_rate": 1.244151607503112e-05,
"loss": 1.1251,
"step": 577500
},
{
"epoch": 2.26,
"learning_rate": 1.2408997907130715e-05,
"loss": 1.1188,
"step": 578000
},
{
"epoch": 2.26,
"learning_rate": 1.2376479739230309e-05,
"loss": 1.1198,
"step": 578500
},
{
"epoch": 2.26,
"learning_rate": 1.2343961571329902e-05,
"loss": 1.1304,
"step": 579000
},
{
"epoch": 2.26,
"learning_rate": 1.2311443403429497e-05,
"loss": 1.121,
"step": 579500
},
{
"epoch": 2.26,
"learning_rate": 1.227892523552909e-05,
"loss": 1.1215,
"step": 580000
},
{
"epoch": 2.27,
"learning_rate": 1.2246407067628685e-05,
"loss": 1.1213,
"step": 580500
},
{
"epoch": 2.27,
"learning_rate": 1.2213888899728279e-05,
"loss": 1.1229,
"step": 581000
},
{
"epoch": 2.27,
"learning_rate": 1.2181370731827872e-05,
"loss": 1.1167,
"step": 581500
},
{
"epoch": 2.27,
"learning_rate": 1.2148852563927467e-05,
"loss": 1.1308,
"step": 582000
},
{
"epoch": 2.27,
"learning_rate": 1.211633439602706e-05,
"loss": 1.1189,
"step": 582500
},
{
"epoch": 2.27,
"learning_rate": 1.2083816228126654e-05,
"loss": 1.1161,
"step": 583000
},
{
"epoch": 2.28,
"learning_rate": 1.2051298060226249e-05,
"loss": 1.1188,
"step": 583500
},
{
"epoch": 2.28,
"learning_rate": 1.2018779892325844e-05,
"loss": 1.1157,
"step": 584000
},
{
"epoch": 2.28,
"learning_rate": 1.1986261724425437e-05,
"loss": 1.1145,
"step": 584500
},
{
"epoch": 2.28,
"learning_rate": 1.195374355652503e-05,
"loss": 1.1179,
"step": 585000
},
{
"epoch": 2.28,
"learning_rate": 1.1921225388624625e-05,
"loss": 1.1155,
"step": 585500
},
{
"epoch": 2.29,
"learning_rate": 1.188870722072422e-05,
"loss": 1.1277,
"step": 586000
},
{
"epoch": 2.29,
"learning_rate": 1.1856189052823812e-05,
"loss": 1.1162,
"step": 586500
},
{
"epoch": 2.29,
"learning_rate": 1.1823670884923407e-05,
"loss": 1.122,
"step": 587000
},
{
"epoch": 2.29,
"learning_rate": 1.1791152717023002e-05,
"loss": 1.1188,
"step": 587500
},
{
"epoch": 2.29,
"learning_rate": 1.1758634549122596e-05,
"loss": 1.1187,
"step": 588000
},
{
"epoch": 2.3,
"learning_rate": 1.1726116381222189e-05,
"loss": 1.108,
"step": 588500
},
{
"epoch": 2.3,
"learning_rate": 1.1693598213321784e-05,
"loss": 1.1199,
"step": 589000
},
{
"epoch": 2.3,
"learning_rate": 1.1661080045421377e-05,
"loss": 1.1125,
"step": 589500
},
{
"epoch": 2.3,
"learning_rate": 1.1628561877520972e-05,
"loss": 1.117,
"step": 590000
},
{
"epoch": 2.3,
"learning_rate": 1.1596043709620566e-05,
"loss": 1.1159,
"step": 590500
},
{
"epoch": 2.31,
"learning_rate": 1.1563525541720159e-05,
"loss": 1.1172,
"step": 591000
},
{
"epoch": 2.31,
"learning_rate": 1.1531007373819754e-05,
"loss": 1.1136,
"step": 591500
},
{
"epoch": 2.31,
"learning_rate": 1.1498489205919347e-05,
"loss": 1.1146,
"step": 592000
},
{
"epoch": 2.31,
"learning_rate": 1.146597103801894e-05,
"loss": 1.111,
"step": 592500
},
{
"epoch": 2.31,
"learning_rate": 1.1433452870118536e-05,
"loss": 1.1129,
"step": 593000
},
{
"epoch": 2.32,
"learning_rate": 1.140093470221813e-05,
"loss": 1.1069,
"step": 593500
},
{
"epoch": 2.32,
"learning_rate": 1.1368416534317722e-05,
"loss": 1.1168,
"step": 594000
},
{
"epoch": 2.32,
"learning_rate": 1.1335898366417317e-05,
"loss": 1.1157,
"step": 594500
},
{
"epoch": 2.32,
"learning_rate": 1.1303380198516912e-05,
"loss": 1.1111,
"step": 595000
},
{
"epoch": 2.32,
"learning_rate": 1.1270862030616506e-05,
"loss": 1.1084,
"step": 595500
},
{
"epoch": 2.33,
"learning_rate": 1.1238343862716099e-05,
"loss": 1.1132,
"step": 596000
},
{
"epoch": 2.33,
"learning_rate": 1.1205825694815694e-05,
"loss": 1.1139,
"step": 596500
},
{
"epoch": 2.33,
"learning_rate": 1.117330752691529e-05,
"loss": 1.11,
"step": 597000
},
{
"epoch": 2.33,
"learning_rate": 1.1140789359014883e-05,
"loss": 1.1162,
"step": 597500
},
{
"epoch": 2.33,
"learning_rate": 1.1108271191114476e-05,
"loss": 1.11,
"step": 598000
},
{
"epoch": 2.34,
"learning_rate": 1.1075753023214071e-05,
"loss": 1.1106,
"step": 598500
},
{
"epoch": 2.34,
"learning_rate": 1.1043234855313664e-05,
"loss": 1.1054,
"step": 599000
},
{
"epoch": 2.34,
"learning_rate": 1.1010716687413258e-05,
"loss": 1.1091,
"step": 599500
},
{
"epoch": 2.34,
"learning_rate": 1.0978198519512853e-05,
"loss": 1.1067,
"step": 600000
},
{
"epoch": 2.34,
"learning_rate": 1.0945680351612446e-05,
"loss": 1.1033,
"step": 600500
},
{
"epoch": 2.35,
"learning_rate": 1.0913162183712041e-05,
"loss": 1.1133,
"step": 601000
},
{
"epoch": 2.35,
"learning_rate": 1.0880644015811634e-05,
"loss": 1.1101,
"step": 601500
},
{
"epoch": 2.35,
"learning_rate": 1.0848125847911228e-05,
"loss": 1.1105,
"step": 602000
},
{
"epoch": 2.35,
"learning_rate": 1.0815607680010823e-05,
"loss": 1.1083,
"step": 602500
},
{
"epoch": 2.35,
"learning_rate": 1.0783089512110418e-05,
"loss": 1.1077,
"step": 603000
},
{
"epoch": 2.35,
"learning_rate": 1.075057134421001e-05,
"loss": 1.1051,
"step": 603500
},
{
"epoch": 2.36,
"learning_rate": 1.0718053176309604e-05,
"loss": 1.1064,
"step": 604000
},
{
"epoch": 2.36,
"learning_rate": 1.06855350084092e-05,
"loss": 1.1054,
"step": 604500
},
{
"epoch": 2.36,
"learning_rate": 1.0653016840508793e-05,
"loss": 1.1107,
"step": 605000
},
{
"epoch": 2.36,
"learning_rate": 1.0620498672608386e-05,
"loss": 1.1031,
"step": 605500
},
{
"epoch": 2.36,
"learning_rate": 1.0587980504707981e-05,
"loss": 1.1097,
"step": 606000
},
{
"epoch": 2.37,
"learning_rate": 1.0555462336807576e-05,
"loss": 1.1074,
"step": 606500
},
{
"epoch": 2.37,
"learning_rate": 1.0522944168907168e-05,
"loss": 1.1144,
"step": 607000
},
{
"epoch": 2.37,
"learning_rate": 1.0490426001006763e-05,
"loss": 1.1106,
"step": 607500
},
{
"epoch": 2.37,
"learning_rate": 1.0457907833106358e-05,
"loss": 1.1091,
"step": 608000
},
{
"epoch": 2.37,
"learning_rate": 1.0425389665205951e-05,
"loss": 1.1057,
"step": 608500
},
{
"epoch": 2.38,
"learning_rate": 1.0392871497305545e-05,
"loss": 1.1066,
"step": 609000
},
{
"epoch": 2.38,
"learning_rate": 1.036035332940514e-05,
"loss": 1.108,
"step": 609500
},
{
"epoch": 2.38,
"learning_rate": 1.0327835161504733e-05,
"loss": 1.1104,
"step": 610000
},
{
"epoch": 2.38,
"learning_rate": 1.0295316993604328e-05,
"loss": 1.1158,
"step": 610500
},
{
"epoch": 2.38,
"learning_rate": 1.0262798825703921e-05,
"loss": 1.109,
"step": 611000
},
{
"epoch": 2.39,
"learning_rate": 1.0230280657803515e-05,
"loss": 1.1011,
"step": 611500
},
{
"epoch": 2.39,
"learning_rate": 1.019776248990311e-05,
"loss": 1.0989,
"step": 612000
},
{
"epoch": 2.39,
"learning_rate": 1.0165244322002703e-05,
"loss": 1.109,
"step": 612500
},
{
"epoch": 2.39,
"learning_rate": 1.0132726154102296e-05,
"loss": 1.0999,
"step": 613000
},
{
"epoch": 2.39,
"learning_rate": 1.0100207986201891e-05,
"loss": 1.1091,
"step": 613500
},
{
"epoch": 2.4,
"learning_rate": 1.0067689818301486e-05,
"loss": 1.1033,
"step": 614000
},
{
"epoch": 2.4,
"learning_rate": 1.003517165040108e-05,
"loss": 1.1087,
"step": 614500
},
{
"epoch": 2.4,
"learning_rate": 1.0002653482500673e-05,
"loss": 1.0964,
"step": 615000
},
{
"epoch": 2.4,
"learning_rate": 9.970135314600268e-06,
"loss": 1.0946,
"step": 615500
},
{
"epoch": 2.4,
"learning_rate": 9.937617146699861e-06,
"loss": 1.0994,
"step": 616000
},
{
"epoch": 2.41,
"learning_rate": 9.905098978799455e-06,
"loss": 1.1078,
"step": 616500
},
{
"epoch": 2.41,
"learning_rate": 9.87258081089905e-06,
"loss": 1.101,
"step": 617000
},
{
"epoch": 2.41,
"learning_rate": 9.840062642998645e-06,
"loss": 1.1085,
"step": 617500
},
{
"epoch": 2.41,
"learning_rate": 9.807544475098238e-06,
"loss": 1.1083,
"step": 618000
},
{
"epoch": 2.41,
"learning_rate": 9.775026307197832e-06,
"loss": 1.1069,
"step": 618500
},
{
"epoch": 2.42,
"learning_rate": 9.742508139297427e-06,
"loss": 1.1041,
"step": 619000
},
{
"epoch": 2.42,
"learning_rate": 9.70998997139702e-06,
"loss": 1.1056,
"step": 619500
},
{
"epoch": 2.42,
"learning_rate": 9.677471803496615e-06,
"loss": 1.1013,
"step": 620000
},
{
"epoch": 2.42,
"learning_rate": 9.644953635596208e-06,
"loss": 1.1007,
"step": 620500
},
{
"epoch": 2.42,
"learning_rate": 9.612435467695802e-06,
"loss": 1.1092,
"step": 621000
},
{
"epoch": 2.43,
"learning_rate": 9.579917299795397e-06,
"loss": 1.104,
"step": 621500
},
{
"epoch": 2.43,
"learning_rate": 9.54739913189499e-06,
"loss": 1.098,
"step": 622000
},
{
"epoch": 2.43,
"learning_rate": 9.514880963994583e-06,
"loss": 1.0999,
"step": 622500
},
{
"epoch": 2.43,
"learning_rate": 9.482362796094178e-06,
"loss": 1.0956,
"step": 623000
},
{
"epoch": 2.43,
"learning_rate": 9.449844628193773e-06,
"loss": 1.1052,
"step": 623500
},
{
"epoch": 2.43,
"learning_rate": 9.417326460293365e-06,
"loss": 1.1057,
"step": 624000
},
{
"epoch": 2.44,
"learning_rate": 9.38480829239296e-06,
"loss": 1.0962,
"step": 624500
},
{
"epoch": 2.44,
"learning_rate": 9.352290124492555e-06,
"loss": 1.103,
"step": 625000
},
{
"epoch": 2.44,
"learning_rate": 9.319771956592148e-06,
"loss": 1.1092,
"step": 625500
},
{
"epoch": 2.44,
"learning_rate": 9.287253788691742e-06,
"loss": 1.0881,
"step": 626000
},
{
"epoch": 2.44,
"learning_rate": 9.254735620791337e-06,
"loss": 1.0999,
"step": 626500
},
{
"epoch": 2.45,
"learning_rate": 9.22221745289093e-06,
"loss": 1.1054,
"step": 627000
},
{
"epoch": 2.45,
"learning_rate": 9.189699284990525e-06,
"loss": 1.0941,
"step": 627500
},
{
"epoch": 2.45,
"learning_rate": 9.157181117090119e-06,
"loss": 1.1027,
"step": 628000
},
{
"epoch": 2.45,
"learning_rate": 9.124662949189712e-06,
"loss": 1.0986,
"step": 628500
},
{
"epoch": 2.45,
"learning_rate": 9.092144781289307e-06,
"loss": 1.0928,
"step": 629000
},
{
"epoch": 2.46,
"learning_rate": 9.0596266133889e-06,
"loss": 1.0906,
"step": 629500
},
{
"epoch": 2.46,
"learning_rate": 9.027108445488495e-06,
"loss": 1.0925,
"step": 630000
},
{
"epoch": 2.46,
"learning_rate": 8.994590277588089e-06,
"loss": 1.0926,
"step": 630500
},
{
"epoch": 2.46,
"learning_rate": 8.962072109687684e-06,
"loss": 1.0927,
"step": 631000
},
{
"epoch": 2.46,
"learning_rate": 8.929553941787277e-06,
"loss": 1.1047,
"step": 631500
},
{
"epoch": 2.47,
"learning_rate": 8.89703577388687e-06,
"loss": 1.0932,
"step": 632000
},
{
"epoch": 2.47,
"learning_rate": 8.864517605986465e-06,
"loss": 1.099,
"step": 632500
},
{
"epoch": 2.47,
"learning_rate": 8.83199943808606e-06,
"loss": 1.0946,
"step": 633000
},
{
"epoch": 2.47,
"learning_rate": 8.799481270185652e-06,
"loss": 1.1017,
"step": 633500
},
{
"epoch": 2.47,
"learning_rate": 8.766963102285247e-06,
"loss": 1.1008,
"step": 634000
},
{
"epoch": 2.48,
"learning_rate": 8.734444934384842e-06,
"loss": 1.0916,
"step": 634500
},
{
"epoch": 2.48,
"learning_rate": 8.701926766484435e-06,
"loss": 1.0949,
"step": 635000
},
{
"epoch": 2.48,
"learning_rate": 8.669408598584029e-06,
"loss": 1.0958,
"step": 635500
},
{
"epoch": 2.48,
"learning_rate": 8.636890430683624e-06,
"loss": 1.1001,
"step": 636000
},
{
"epoch": 2.48,
"learning_rate": 8.604372262783217e-06,
"loss": 1.0931,
"step": 636500
},
{
"epoch": 2.49,
"learning_rate": 8.57185409488281e-06,
"loss": 1.0975,
"step": 637000
},
{
"epoch": 2.49,
"learning_rate": 8.539335926982406e-06,
"loss": 1.1003,
"step": 637500
},
{
"epoch": 2.49,
"learning_rate": 8.506817759081999e-06,
"loss": 1.0929,
"step": 638000
},
{
"epoch": 2.49,
"learning_rate": 8.474299591181594e-06,
"loss": 1.0987,
"step": 638500
},
{
"epoch": 2.49,
"learning_rate": 8.441781423281187e-06,
"loss": 1.0915,
"step": 639000
},
{
"epoch": 2.5,
"learning_rate": 8.40926325538078e-06,
"loss": 1.0907,
"step": 639500
},
{
"epoch": 2.5,
"learning_rate": 8.376745087480376e-06,
"loss": 1.0959,
"step": 640000
},
{
"epoch": 2.5,
"learning_rate": 8.34422691957997e-06,
"loss": 1.094,
"step": 640500
},
{
"epoch": 2.5,
"learning_rate": 8.311708751679564e-06,
"loss": 1.0932,
"step": 641000
},
{
"epoch": 2.5,
"learning_rate": 8.279190583779157e-06,
"loss": 1.0887,
"step": 641500
},
{
"epoch": 2.51,
"learning_rate": 8.246672415878752e-06,
"loss": 1.0996,
"step": 642000
},
{
"epoch": 2.51,
"learning_rate": 8.214154247978346e-06,
"loss": 1.0919,
"step": 642500
},
{
"epoch": 2.51,
"learning_rate": 8.181636080077939e-06,
"loss": 1.0899,
"step": 643000
},
{
"epoch": 2.51,
"learning_rate": 8.149117912177534e-06,
"loss": 1.0899,
"step": 643500
},
{
"epoch": 2.51,
"learning_rate": 8.116599744277129e-06,
"loss": 1.0939,
"step": 644000
},
{
"epoch": 2.51,
"learning_rate": 8.084081576376722e-06,
"loss": 1.0915,
"step": 644500
},
{
"epoch": 2.52,
"learning_rate": 8.051563408476316e-06,
"loss": 1.0901,
"step": 645000
},
{
"epoch": 2.52,
"learning_rate": 8.01904524057591e-06,
"loss": 1.0844,
"step": 645500
},
{
"epoch": 2.52,
"learning_rate": 7.986527072675504e-06,
"loss": 1.0808,
"step": 646000
},
{
"epoch": 2.52,
"learning_rate": 7.954008904775098e-06,
"loss": 1.0843,
"step": 646500
},
{
"epoch": 2.52,
"learning_rate": 7.921490736874693e-06,
"loss": 1.0931,
"step": 647000
},
{
"epoch": 2.53,
"learning_rate": 7.888972568974286e-06,
"loss": 1.0874,
"step": 647500
},
{
"epoch": 2.53,
"learning_rate": 7.856454401073881e-06,
"loss": 1.0893,
"step": 648000
},
{
"epoch": 2.53,
"learning_rate": 7.823936233173474e-06,
"loss": 1.0879,
"step": 648500
},
{
"epoch": 2.53,
"learning_rate": 7.791418065273068e-06,
"loss": 1.1044,
"step": 649000
},
{
"epoch": 2.53,
"learning_rate": 7.758899897372663e-06,
"loss": 1.0898,
"step": 649500
},
{
"epoch": 2.54,
"learning_rate": 7.726381729472256e-06,
"loss": 1.0876,
"step": 650000
},
{
"epoch": 2.54,
"learning_rate": 7.69386356157185e-06,
"loss": 1.0968,
"step": 650500
},
{
"epoch": 2.54,
"learning_rate": 7.661345393671444e-06,
"loss": 1.087,
"step": 651000
},
{
"epoch": 2.54,
"learning_rate": 7.6288272257710385e-06,
"loss": 1.0867,
"step": 651500
},
{
"epoch": 2.54,
"learning_rate": 7.5963090578706336e-06,
"loss": 1.0834,
"step": 652000
},
{
"epoch": 2.55,
"learning_rate": 7.563790889970226e-06,
"loss": 1.0882,
"step": 652500
},
{
"epoch": 2.55,
"learning_rate": 7.531272722069821e-06,
"loss": 1.0883,
"step": 653000
},
{
"epoch": 2.55,
"learning_rate": 7.498754554169415e-06,
"loss": 1.0811,
"step": 653500
},
{
"epoch": 2.55,
"learning_rate": 7.466236386269009e-06,
"loss": 1.0852,
"step": 654000
},
{
"epoch": 2.55,
"learning_rate": 7.433718218368603e-06,
"loss": 1.0924,
"step": 654500
},
{
"epoch": 2.56,
"learning_rate": 7.401200050468198e-06,
"loss": 1.0938,
"step": 655000
},
{
"epoch": 2.56,
"learning_rate": 7.36868188256779e-06,
"loss": 1.0888,
"step": 655500
},
{
"epoch": 2.56,
"learning_rate": 7.336163714667385e-06,
"loss": 1.0905,
"step": 656000
},
{
"epoch": 2.56,
"learning_rate": 7.3036455467669795e-06,
"loss": 1.0882,
"step": 656500
},
{
"epoch": 2.56,
"learning_rate": 7.271127378866573e-06,
"loss": 1.0871,
"step": 657000
},
{
"epoch": 2.57,
"learning_rate": 7.238609210966167e-06,
"loss": 1.0855,
"step": 657500
},
{
"epoch": 2.57,
"learning_rate": 7.206091043065761e-06,
"loss": 1.0888,
"step": 658000
},
{
"epoch": 2.57,
"learning_rate": 7.173572875165355e-06,
"loss": 1.0819,
"step": 658500
},
{
"epoch": 2.57,
"learning_rate": 7.141054707264949e-06,
"loss": 1.089,
"step": 659000
},
{
"epoch": 2.57,
"learning_rate": 7.108536539364544e-06,
"loss": 1.0899,
"step": 659500
},
{
"epoch": 2.58,
"learning_rate": 7.076018371464137e-06,
"loss": 1.0841,
"step": 660000
},
{
"epoch": 2.58,
"learning_rate": 7.043500203563731e-06,
"loss": 1.0852,
"step": 660500
},
{
"epoch": 2.58,
"learning_rate": 7.0109820356633255e-06,
"loss": 1.0779,
"step": 661000
},
{
"epoch": 2.58,
"learning_rate": 6.978463867762919e-06,
"loss": 1.084,
"step": 661500
},
{
"epoch": 2.58,
"learning_rate": 6.945945699862513e-06,
"loss": 1.0835,
"step": 662000
},
{
"epoch": 2.59,
"learning_rate": 6.913427531962108e-06,
"loss": 1.0893,
"step": 662500
},
{
"epoch": 2.59,
"learning_rate": 6.880909364061702e-06,
"loss": 1.0897,
"step": 663000
},
{
"epoch": 2.59,
"learning_rate": 6.848391196161296e-06,
"loss": 1.0888,
"step": 663500
},
{
"epoch": 2.59,
"learning_rate": 6.81587302826089e-06,
"loss": 1.0907,
"step": 664000
},
{
"epoch": 2.59,
"learning_rate": 6.783354860360484e-06,
"loss": 1.0827,
"step": 664500
},
{
"epoch": 2.59,
"learning_rate": 6.750836692460077e-06,
"loss": 1.0874,
"step": 665000
},
{
"epoch": 2.6,
"learning_rate": 6.718318524559672e-06,
"loss": 1.0859,
"step": 665500
},
{
"epoch": 2.6,
"learning_rate": 6.6858003566592665e-06,
"loss": 1.0768,
"step": 666000
},
{
"epoch": 2.6,
"learning_rate": 6.65328218875886e-06,
"loss": 1.084,
"step": 666500
},
{
"epoch": 2.6,
"learning_rate": 6.620764020858454e-06,
"loss": 1.0814,
"step": 667000
},
{
"epoch": 2.6,
"learning_rate": 6.588245852958048e-06,
"loss": 1.0858,
"step": 667500
},
{
"epoch": 2.61,
"learning_rate": 6.5557276850576416e-06,
"loss": 1.0886,
"step": 668000
},
{
"epoch": 2.61,
"learning_rate": 6.523209517157236e-06,
"loss": 1.077,
"step": 668500
},
{
"epoch": 2.61,
"learning_rate": 6.490691349256831e-06,
"loss": 1.0803,
"step": 669000
},
{
"epoch": 2.61,
"learning_rate": 6.458173181356423e-06,
"loss": 1.0757,
"step": 669500
},
{
"epoch": 2.61,
"learning_rate": 6.425655013456018e-06,
"loss": 1.0778,
"step": 670000
},
{
"epoch": 2.62,
"learning_rate": 6.3931368455556125e-06,
"loss": 1.0805,
"step": 670500
},
{
"epoch": 2.62,
"learning_rate": 6.360618677655206e-06,
"loss": 1.0889,
"step": 671000
},
{
"epoch": 2.62,
"learning_rate": 6.3281005097548e-06,
"loss": 1.0801,
"step": 671500
},
{
"epoch": 2.62,
"learning_rate": 6.295582341854395e-06,
"loss": 1.0801,
"step": 672000
},
{
"epoch": 2.62,
"learning_rate": 6.2630641739539876e-06,
"loss": 1.0843,
"step": 672500
},
{
"epoch": 2.63,
"learning_rate": 6.230546006053583e-06,
"loss": 1.0856,
"step": 673000
},
{
"epoch": 2.63,
"learning_rate": 6.198027838153177e-06,
"loss": 1.0809,
"step": 673500
},
{
"epoch": 2.63,
"learning_rate": 6.16550967025277e-06,
"loss": 1.0775,
"step": 674000
},
{
"epoch": 2.63,
"learning_rate": 6.132991502352364e-06,
"loss": 1.0819,
"step": 674500
},
{
"epoch": 2.63,
"learning_rate": 6.1004733344519585e-06,
"loss": 1.0753,
"step": 675000
},
{
"epoch": 2.64,
"learning_rate": 6.067955166551553e-06,
"loss": 1.0901,
"step": 675500
},
{
"epoch": 2.64,
"learning_rate": 6.035436998651146e-06,
"loss": 1.0796,
"step": 676000
},
{
"epoch": 2.64,
"learning_rate": 6.002918830750741e-06,
"loss": 1.0775,
"step": 676500
},
{
"epoch": 2.64,
"learning_rate": 5.970400662850334e-06,
"loss": 1.0789,
"step": 677000
},
{
"epoch": 2.64,
"learning_rate": 5.937882494949929e-06,
"loss": 1.0898,
"step": 677500
},
{
"epoch": 2.65,
"learning_rate": 5.905364327049523e-06,
"loss": 1.0753,
"step": 678000
},
{
"epoch": 2.65,
"learning_rate": 5.872846159149117e-06,
"loss": 1.0732,
"step": 678500
},
{
"epoch": 2.65,
"learning_rate": 5.840327991248711e-06,
"loss": 1.0776,
"step": 679000
},
{
"epoch": 2.65,
"learning_rate": 5.807809823348305e-06,
"loss": 1.0794,
"step": 679500
},
{
"epoch": 2.65,
"learning_rate": 5.775291655447899e-06,
"loss": 1.0765,
"step": 680000
},
{
"epoch": 2.66,
"learning_rate": 5.742773487547493e-06,
"loss": 1.0737,
"step": 680500
},
{
"epoch": 2.66,
"learning_rate": 5.710255319647087e-06,
"loss": 1.0817,
"step": 681000
},
{
"epoch": 2.66,
"learning_rate": 5.677737151746681e-06,
"loss": 1.0755,
"step": 681500
},
{
"epoch": 2.66,
"learning_rate": 5.645218983846275e-06,
"loss": 1.0853,
"step": 682000
},
{
"epoch": 2.66,
"learning_rate": 5.612700815945869e-06,
"loss": 1.0847,
"step": 682500
},
{
"epoch": 2.67,
"learning_rate": 5.580182648045464e-06,
"loss": 1.086,
"step": 683000
},
{
"epoch": 2.67,
"learning_rate": 5.547664480145057e-06,
"loss": 1.0752,
"step": 683500
},
{
"epoch": 2.67,
"learning_rate": 5.515146312244651e-06,
"loss": 1.0851,
"step": 684000
},
{
"epoch": 2.67,
"learning_rate": 5.4826281443442455e-06,
"loss": 1.0743,
"step": 684500
},
{
"epoch": 2.67,
"learning_rate": 5.45010997644384e-06,
"loss": 1.0807,
"step": 685000
},
{
"epoch": 2.67,
"learning_rate": 5.417591808543433e-06,
"loss": 1.0754,
"step": 685500
},
{
"epoch": 2.68,
"learning_rate": 5.385073640643028e-06,
"loss": 1.0702,
"step": 686000
},
{
"epoch": 2.68,
"learning_rate": 5.352555472742621e-06,
"loss": 1.0796,
"step": 686500
},
{
"epoch": 2.68,
"learning_rate": 5.3200373048422156e-06,
"loss": 1.0742,
"step": 687000
},
{
"epoch": 2.68,
"learning_rate": 5.28751913694181e-06,
"loss": 1.0724,
"step": 687500
},
{
"epoch": 2.68,
"learning_rate": 5.255000969041404e-06,
"loss": 1.0806,
"step": 688000
},
{
"epoch": 2.69,
"learning_rate": 5.222482801140998e-06,
"loss": 1.0784,
"step": 688500
},
{
"epoch": 2.69,
"learning_rate": 5.1899646332405914e-06,
"loss": 1.0779,
"step": 689000
},
{
"epoch": 2.69,
"learning_rate": 5.157446465340186e-06,
"loss": 1.0747,
"step": 689500
},
{
"epoch": 2.69,
"learning_rate": 5.12492829743978e-06,
"loss": 1.0713,
"step": 690000
},
{
"epoch": 2.69,
"learning_rate": 5.092410129539374e-06,
"loss": 1.0813,
"step": 690500
},
{
"epoch": 2.7,
"learning_rate": 5.059891961638967e-06,
"loss": 1.0787,
"step": 691000
},
{
"epoch": 2.7,
"learning_rate": 5.027373793738562e-06,
"loss": 1.0704,
"step": 691500
},
{
"epoch": 2.7,
"learning_rate": 4.994855625838156e-06,
"loss": 1.0789,
"step": 692000
},
{
"epoch": 2.7,
"learning_rate": 4.96233745793775e-06,
"loss": 1.0808,
"step": 692500
},
{
"epoch": 2.7,
"learning_rate": 4.929819290037344e-06,
"loss": 1.077,
"step": 693000
},
{
"epoch": 2.71,
"learning_rate": 4.897301122136938e-06,
"loss": 1.0727,
"step": 693500
},
{
"epoch": 2.71,
"learning_rate": 4.8647829542365325e-06,
"loss": 1.0761,
"step": 694000
},
{
"epoch": 2.71,
"learning_rate": 4.832264786336127e-06,
"loss": 1.072,
"step": 694500
},
{
"epoch": 2.71,
"learning_rate": 4.79974661843572e-06,
"loss": 1.077,
"step": 695000
},
{
"epoch": 2.71,
"learning_rate": 4.767228450535314e-06,
"loss": 1.0719,
"step": 695500
},
{
"epoch": 2.72,
"learning_rate": 4.734710282634908e-06,
"loss": 1.0732,
"step": 696000
},
{
"epoch": 2.72,
"learning_rate": 4.7021921147345025e-06,
"loss": 1.0697,
"step": 696500
},
{
"epoch": 2.72,
"learning_rate": 4.669673946834097e-06,
"loss": 1.0658,
"step": 697000
},
{
"epoch": 2.72,
"learning_rate": 4.63715577893369e-06,
"loss": 1.0835,
"step": 697500
},
{
"epoch": 2.72,
"learning_rate": 4.604637611033284e-06,
"loss": 1.0741,
"step": 698000
},
{
"epoch": 2.73,
"learning_rate": 4.5721194431328784e-06,
"loss": 1.0666,
"step": 698500
},
{
"epoch": 2.73,
"learning_rate": 4.539601275232473e-06,
"loss": 1.0721,
"step": 699000
},
{
"epoch": 2.73,
"learning_rate": 4.507083107332067e-06,
"loss": 1.0756,
"step": 699500
},
{
"epoch": 2.73,
"learning_rate": 4.474564939431661e-06,
"loss": 1.0732,
"step": 700000
},
{
"epoch": 2.73,
"learning_rate": 4.442046771531254e-06,
"loss": 1.0761,
"step": 700500
},
{
"epoch": 2.74,
"learning_rate": 4.409528603630849e-06,
"loss": 1.0742,
"step": 701000
},
{
"epoch": 2.74,
"learning_rate": 4.377010435730443e-06,
"loss": 1.0779,
"step": 701500
},
{
"epoch": 2.74,
"learning_rate": 4.344492267830037e-06,
"loss": 1.072,
"step": 702000
},
{
"epoch": 2.74,
"learning_rate": 4.311974099929631e-06,
"loss": 1.0762,
"step": 702500
},
{
"epoch": 2.74,
"learning_rate": 4.279455932029225e-06,
"loss": 1.0707,
"step": 703000
},
{
"epoch": 2.75,
"learning_rate": 4.246937764128819e-06,
"loss": 1.0772,
"step": 703500
},
{
"epoch": 2.75,
"learning_rate": 4.214419596228413e-06,
"loss": 1.0619,
"step": 704000
},
{
"epoch": 2.75,
"learning_rate": 4.181901428328007e-06,
"loss": 1.0868,
"step": 704500
},
{
"epoch": 2.75,
"learning_rate": 4.149383260427601e-06,
"loss": 1.0695,
"step": 705000
},
{
"epoch": 2.75,
"learning_rate": 4.116865092527195e-06,
"loss": 1.0613,
"step": 705500
},
{
"epoch": 2.75,
"learning_rate": 4.084346924626789e-06,
"loss": 1.0673,
"step": 706000
},
{
"epoch": 2.76,
"learning_rate": 4.051828756726384e-06,
"loss": 1.0672,
"step": 706500
},
{
"epoch": 2.76,
"learning_rate": 4.019310588825977e-06,
"loss": 1.067,
"step": 707000
},
{
"epoch": 2.76,
"learning_rate": 3.986792420925571e-06,
"loss": 1.0703,
"step": 707500
},
{
"epoch": 2.76,
"learning_rate": 3.9542742530251654e-06,
"loss": 1.0661,
"step": 708000
},
{
"epoch": 2.76,
"learning_rate": 3.92175608512476e-06,
"loss": 1.0697,
"step": 708500
},
{
"epoch": 2.77,
"learning_rate": 3.889237917224353e-06,
"loss": 1.072,
"step": 709000
},
{
"epoch": 2.77,
"learning_rate": 3.856719749323948e-06,
"loss": 1.0708,
"step": 709500
},
{
"epoch": 2.77,
"learning_rate": 3.824201581423541e-06,
"loss": 1.0715,
"step": 710000
},
{
"epoch": 2.77,
"learning_rate": 3.791683413523135e-06,
"loss": 1.0731,
"step": 710500
},
{
"epoch": 2.77,
"learning_rate": 3.7591652456227297e-06,
"loss": 1.0731,
"step": 711000
},
{
"epoch": 2.78,
"learning_rate": 3.7266470777223235e-06,
"loss": 1.0659,
"step": 711500
},
{
"epoch": 2.78,
"learning_rate": 3.694128909821918e-06,
"loss": 1.0744,
"step": 712000
},
{
"epoch": 2.78,
"learning_rate": 3.661610741921512e-06,
"loss": 1.0691,
"step": 712500
},
{
"epoch": 2.78,
"learning_rate": 3.6290925740211056e-06,
"loss": 1.078,
"step": 713000
},
{
"epoch": 2.78,
"learning_rate": 3.5965744061207e-06,
"loss": 1.0713,
"step": 713500
},
{
"epoch": 2.79,
"learning_rate": 3.564056238220294e-06,
"loss": 1.0709,
"step": 714000
},
{
"epoch": 2.79,
"learning_rate": 3.5315380703198877e-06,
"loss": 1.0647,
"step": 714500
},
{
"epoch": 2.79,
"learning_rate": 3.499019902419482e-06,
"loss": 1.0637,
"step": 715000
},
{
"epoch": 2.79,
"learning_rate": 3.466501734519076e-06,
"loss": 1.0696,
"step": 715500
},
{
"epoch": 2.79,
"learning_rate": 3.43398356661867e-06,
"loss": 1.0796,
"step": 716000
},
{
"epoch": 2.8,
"learning_rate": 3.401465398718264e-06,
"loss": 1.0665,
"step": 716500
},
{
"epoch": 2.8,
"learning_rate": 3.368947230817858e-06,
"loss": 1.0692,
"step": 717000
},
{
"epoch": 2.8,
"learning_rate": 3.3364290629174524e-06,
"loss": 1.0693,
"step": 717500
},
{
"epoch": 2.8,
"learning_rate": 3.303910895017046e-06,
"loss": 1.0715,
"step": 718000
},
{
"epoch": 2.8,
"learning_rate": 3.27139272711664e-06,
"loss": 1.0652,
"step": 718500
},
{
"epoch": 2.81,
"learning_rate": 3.2388745592162346e-06,
"loss": 1.0846,
"step": 719000
},
{
"epoch": 2.81,
"learning_rate": 3.2063563913158283e-06,
"loss": 1.0657,
"step": 719500
},
{
"epoch": 2.81,
"learning_rate": 3.173838223415422e-06,
"loss": 1.0693,
"step": 720000
},
{
"epoch": 2.81,
"learning_rate": 3.1413200555150167e-06,
"loss": 1.0736,
"step": 720500
},
{
"epoch": 2.81,
"learning_rate": 3.1088018876146104e-06,
"loss": 1.0636,
"step": 721000
},
{
"epoch": 2.82,
"learning_rate": 3.0762837197142046e-06,
"loss": 1.069,
"step": 721500
},
{
"epoch": 2.82,
"learning_rate": 3.043765551813799e-06,
"loss": 1.072,
"step": 722000
},
{
"epoch": 2.82,
"learning_rate": 3.0112473839133926e-06,
"loss": 1.0663,
"step": 722500
},
{
"epoch": 2.82,
"learning_rate": 2.9787292160129868e-06,
"loss": 1.072,
"step": 723000
},
{
"epoch": 2.82,
"learning_rate": 2.9462110481125805e-06,
"loss": 1.0611,
"step": 723500
},
{
"epoch": 2.83,
"learning_rate": 2.9136928802121747e-06,
"loss": 1.0741,
"step": 724000
},
{
"epoch": 2.83,
"learning_rate": 2.8811747123117685e-06,
"loss": 1.0712,
"step": 724500
},
{
"epoch": 2.83,
"learning_rate": 2.8486565444113627e-06,
"loss": 1.07,
"step": 725000
},
{
"epoch": 2.83,
"learning_rate": 2.816138376510957e-06,
"loss": 1.0655,
"step": 725500
},
{
"epoch": 2.83,
"learning_rate": 2.7836202086105506e-06,
"loss": 1.0663,
"step": 726000
},
{
"epoch": 2.83,
"learning_rate": 2.751102040710145e-06,
"loss": 1.0648,
"step": 726500
},
{
"epoch": 2.84,
"learning_rate": 2.718583872809739e-06,
"loss": 1.068,
"step": 727000
},
{
"epoch": 2.84,
"learning_rate": 2.686065704909333e-06,
"loss": 1.0661,
"step": 727500
},
{
"epoch": 2.84,
"learning_rate": 2.653547537008927e-06,
"loss": 1.0593,
"step": 728000
},
{
"epoch": 2.84,
"learning_rate": 2.621029369108521e-06,
"loss": 1.0652,
"step": 728500
},
{
"epoch": 2.84,
"learning_rate": 2.5885112012081153e-06,
"loss": 1.0744,
"step": 729000
},
{
"epoch": 2.85,
"learning_rate": 2.5559930333077095e-06,
"loss": 1.0704,
"step": 729500
},
{
"epoch": 2.85,
"learning_rate": 2.5234748654073033e-06,
"loss": 1.0706,
"step": 730000
},
{
"epoch": 2.85,
"learning_rate": 2.490956697506897e-06,
"loss": 1.0681,
"step": 730500
},
{
"epoch": 2.85,
"learning_rate": 2.458438529606491e-06,
"loss": 1.078,
"step": 731000
},
{
"epoch": 2.85,
"learning_rate": 2.425920361706085e-06,
"loss": 1.0632,
"step": 731500
},
{
"epoch": 2.86,
"learning_rate": 2.393402193805679e-06,
"loss": 1.0671,
"step": 732000
},
{
"epoch": 2.86,
"learning_rate": 2.3608840259052733e-06,
"loss": 1.0681,
"step": 732500
},
{
"epoch": 2.86,
"learning_rate": 2.3283658580048675e-06,
"loss": 1.0653,
"step": 733000
},
{
"epoch": 2.86,
"learning_rate": 2.2958476901044613e-06,
"loss": 1.0697,
"step": 733500
},
{
"epoch": 2.86,
"learning_rate": 2.2633295222040555e-06,
"loss": 1.0691,
"step": 734000
},
{
"epoch": 2.87,
"learning_rate": 2.2308113543036497e-06,
"loss": 1.0584,
"step": 734500
},
{
"epoch": 2.87,
"learning_rate": 2.198293186403244e-06,
"loss": 1.0656,
"step": 735000
},
{
"epoch": 2.87,
"learning_rate": 2.1657750185028376e-06,
"loss": 1.0707,
"step": 735500
},
{
"epoch": 2.87,
"learning_rate": 2.133256850602432e-06,
"loss": 1.062,
"step": 736000
},
{
"epoch": 2.87,
"learning_rate": 2.100738682702026e-06,
"loss": 1.0601,
"step": 736500
},
{
"epoch": 2.88,
"learning_rate": 2.0682205148016197e-06,
"loss": 1.0657,
"step": 737000
},
{
"epoch": 2.88,
"learning_rate": 2.035702346901214e-06,
"loss": 1.0618,
"step": 737500
},
{
"epoch": 2.88,
"learning_rate": 2.0031841790008077e-06,
"loss": 1.0607,
"step": 738000
},
{
"epoch": 2.88,
"learning_rate": 1.970666011100402e-06,
"loss": 1.0701,
"step": 738500
},
{
"epoch": 2.88,
"learning_rate": 1.9381478431999956e-06,
"loss": 1.0678,
"step": 739000
},
{
"epoch": 2.89,
"learning_rate": 1.90562967529959e-06,
"loss": 1.071,
"step": 739500
},
{
"epoch": 2.89,
"learning_rate": 1.873111507399184e-06,
"loss": 1.0649,
"step": 740000
},
{
"epoch": 2.89,
"learning_rate": 1.8405933394987782e-06,
"loss": 1.0644,
"step": 740500
},
{
"epoch": 2.89,
"learning_rate": 1.808075171598372e-06,
"loss": 1.0663,
"step": 741000
},
{
"epoch": 2.89,
"learning_rate": 1.7755570036979661e-06,
"loss": 1.0682,
"step": 741500
},
{
"epoch": 2.9,
"learning_rate": 1.7430388357975603e-06,
"loss": 1.0641,
"step": 742000
},
{
"epoch": 2.9,
"learning_rate": 1.710520667897154e-06,
"loss": 1.0597,
"step": 742500
},
{
"epoch": 2.9,
"learning_rate": 1.6780024999967483e-06,
"loss": 1.0585,
"step": 743000
},
{
"epoch": 2.9,
"learning_rate": 1.6454843320963425e-06,
"loss": 1.0633,
"step": 743500
},
{
"epoch": 2.9,
"learning_rate": 1.6129661641959364e-06,
"loss": 1.0659,
"step": 744000
},
{
"epoch": 2.91,
"learning_rate": 1.5804479962955302e-06,
"loss": 1.0535,
"step": 744500
},
{
"epoch": 2.91,
"learning_rate": 1.5479298283951244e-06,
"loss": 1.0733,
"step": 745000
},
{
"epoch": 2.91,
"learning_rate": 1.5154116604947186e-06,
"loss": 1.0663,
"step": 745500
},
{
"epoch": 2.91,
"learning_rate": 1.4828934925943125e-06,
"loss": 1.0696,
"step": 746000
},
{
"epoch": 2.91,
"learning_rate": 1.4503753246939065e-06,
"loss": 1.0688,
"step": 746500
},
{
"epoch": 2.91,
"learning_rate": 1.4178571567935007e-06,
"loss": 1.0593,
"step": 747000
},
{
"epoch": 2.92,
"learning_rate": 1.3853389888930947e-06,
"loss": 1.0683,
"step": 747500
},
{
"epoch": 2.92,
"learning_rate": 1.3528208209926887e-06,
"loss": 1.0631,
"step": 748000
},
{
"epoch": 2.92,
"learning_rate": 1.3203026530922826e-06,
"loss": 1.0726,
"step": 748500
},
{
"epoch": 2.92,
"learning_rate": 1.2877844851918768e-06,
"loss": 1.0675,
"step": 749000
},
{
"epoch": 2.92,
"learning_rate": 1.2552663172914708e-06,
"loss": 1.0617,
"step": 749500
},
{
"epoch": 2.93,
"learning_rate": 1.222748149391065e-06,
"loss": 1.0617,
"step": 750000
},
{
"epoch": 2.93,
"learning_rate": 1.190229981490659e-06,
"loss": 1.0654,
"step": 750500
},
{
"epoch": 2.93,
"learning_rate": 1.157711813590253e-06,
"loss": 1.0681,
"step": 751000
},
{
"epoch": 2.93,
"learning_rate": 1.1251936456898469e-06,
"loss": 1.0669,
"step": 751500
},
{
"epoch": 2.93,
"learning_rate": 1.0926754777894409e-06,
"loss": 1.0634,
"step": 752000
},
{
"epoch": 2.94,
"learning_rate": 1.060157309889035e-06,
"loss": 1.0596,
"step": 752500
},
{
"epoch": 2.94,
"learning_rate": 1.027639141988629e-06,
"loss": 1.0609,
"step": 753000
},
{
"epoch": 2.94,
"learning_rate": 9.951209740882232e-07,
"loss": 1.0641,
"step": 753500
},
{
"epoch": 2.94,
"learning_rate": 9.626028061878172e-07,
"loss": 1.0555,
"step": 754000
},
{
"epoch": 2.94,
"learning_rate": 9.300846382874113e-07,
"loss": 1.0574,
"step": 754500
},
{
"epoch": 2.95,
"learning_rate": 8.975664703870052e-07,
"loss": 1.0654,
"step": 755000
},
{
"epoch": 2.95,
"learning_rate": 8.650483024865994e-07,
"loss": 1.0592,
"step": 755500
},
{
"epoch": 2.95,
"learning_rate": 8.325301345861933e-07,
"loss": 1.057,
"step": 756000
},
{
"epoch": 2.95,
"learning_rate": 8.000119666857873e-07,
"loss": 1.0593,
"step": 756500
},
{
"epoch": 2.95,
"learning_rate": 7.674937987853815e-07,
"loss": 1.0655,
"step": 757000
},
{
"epoch": 2.96,
"learning_rate": 7.349756308849755e-07,
"loss": 1.071,
"step": 757500
},
{
"epoch": 2.96,
"learning_rate": 7.024574629845695e-07,
"loss": 1.0546,
"step": 758000
},
{
"epoch": 2.96,
"learning_rate": 6.699392950841635e-07,
"loss": 1.0573,
"step": 758500
},
{
"epoch": 2.96,
"learning_rate": 6.374211271837576e-07,
"loss": 1.0652,
"step": 759000
},
{
"epoch": 2.96,
"learning_rate": 6.049029592833516e-07,
"loss": 1.065,
"step": 759500
},
{
"epoch": 2.97,
"learning_rate": 5.723847913829456e-07,
"loss": 1.071,
"step": 760000
},
{
"epoch": 2.97,
"learning_rate": 5.398666234825397e-07,
"loss": 1.0628,
"step": 760500
},
{
"epoch": 2.97,
"learning_rate": 5.073484555821338e-07,
"loss": 1.0618,
"step": 761000
},
{
"epoch": 2.97,
"learning_rate": 4.748302876817278e-07,
"loss": 1.0604,
"step": 761500
},
{
"epoch": 2.97,
"learning_rate": 4.423121197813219e-07,
"loss": 1.0612,
"step": 762000
},
{
"epoch": 2.98,
"learning_rate": 4.097939518809159e-07,
"loss": 1.0581,
"step": 762500
},
{
"epoch": 2.98,
"learning_rate": 3.7727578398050994e-07,
"loss": 1.0603,
"step": 763000
},
{
"epoch": 2.98,
"learning_rate": 3.4475761608010396e-07,
"loss": 1.0605,
"step": 763500
},
{
"epoch": 2.98,
"learning_rate": 3.1223944817969804e-07,
"loss": 1.0618,
"step": 764000
},
{
"epoch": 2.98,
"learning_rate": 2.7972128027929207e-07,
"loss": 1.064,
"step": 764500
},
{
"epoch": 2.99,
"learning_rate": 2.472031123788861e-07,
"loss": 1.0619,
"step": 765000
},
{
"epoch": 2.99,
"learning_rate": 2.1468494447848013e-07,
"loss": 1.0642,
"step": 765500
},
{
"epoch": 2.99,
"learning_rate": 1.8216677657807418e-07,
"loss": 1.0553,
"step": 766000
},
{
"epoch": 2.99,
"learning_rate": 1.4964860867766823e-07,
"loss": 1.0646,
"step": 766500
},
{
"epoch": 2.99,
"learning_rate": 1.1713044077726226e-07,
"loss": 1.0582,
"step": 767000
},
{
"epoch": 2.99,
"learning_rate": 8.46122728768563e-08,
"loss": 1.057,
"step": 767500
},
{
"epoch": 3.0,
"learning_rate": 5.209410497645035e-08,
"loss": 1.0583,
"step": 768000
},
{
"epoch": 3.0,
"learning_rate": 1.9575937076044387e-08,
"loss": 1.0635,
"step": 768500
},
{
"epoch": 3.0,
"step": 768801,
"total_flos": 6.47787050209493e+18,
"train_loss": 2.538804254183028,
"train_runtime": 274559.0377,
"train_samples_per_second": 89.604,
"train_steps_per_second": 2.8
}
],
"max_steps": 768801,
"num_train_epochs": 3,
"total_flos": 6.47787050209493e+18,
"trial_name": null,
"trial_params": null
}