gpt2-4chan-mini / trainer_state.json
niizam
minor fix
f46f2c3
raw
history blame
No virus
138 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.6055880638592613,
"global_step": 560000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.999098827285924e-05,
"loss": 5.3192,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.998197654571848e-05,
"loss": 5.4021,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 4.9972964818577715e-05,
"loss": 5.4284,
"step": 1500
},
{
"epoch": 0.0,
"learning_rate": 4.996395309143695e-05,
"loss": 5.4341,
"step": 2000
},
{
"epoch": 0.0,
"learning_rate": 4.995494136429619e-05,
"loss": 5.4215,
"step": 2500
},
{
"epoch": 0.0,
"learning_rate": 4.994592963715543e-05,
"loss": 5.3564,
"step": 3000
},
{
"epoch": 0.0,
"learning_rate": 4.9936917910014664e-05,
"loss": 5.3319,
"step": 3500
},
{
"epoch": 0.0,
"learning_rate": 4.99279061828739e-05,
"loss": 5.3326,
"step": 4000
},
{
"epoch": 0.0,
"learning_rate": 4.991889445573314e-05,
"loss": 5.3575,
"step": 4500
},
{
"epoch": 0.01,
"learning_rate": 4.990988272859237e-05,
"loss": 5.3404,
"step": 5000
},
{
"epoch": 0.01,
"learning_rate": 4.990087100145161e-05,
"loss": 5.339,
"step": 5500
},
{
"epoch": 0.01,
"learning_rate": 4.989185927431085e-05,
"loss": 5.2714,
"step": 6000
},
{
"epoch": 0.01,
"learning_rate": 4.988284754717009e-05,
"loss": 5.2691,
"step": 6500
},
{
"epoch": 0.01,
"learning_rate": 4.9873835820029326e-05,
"loss": 5.2559,
"step": 7000
},
{
"epoch": 0.01,
"learning_rate": 4.9864824092888563e-05,
"loss": 5.209,
"step": 7500
},
{
"epoch": 0.01,
"learning_rate": 4.9855812365747794e-05,
"loss": 5.26,
"step": 8000
},
{
"epoch": 0.01,
"learning_rate": 4.984680063860703e-05,
"loss": 5.1878,
"step": 8500
},
{
"epoch": 0.01,
"learning_rate": 4.983778891146627e-05,
"loss": 5.212,
"step": 9000
},
{
"epoch": 0.01,
"learning_rate": 4.9828777184325506e-05,
"loss": 5.2063,
"step": 9500
},
{
"epoch": 0.01,
"learning_rate": 4.981976545718475e-05,
"loss": 5.2132,
"step": 10000
},
{
"epoch": 0.01,
"learning_rate": 4.981075373004399e-05,
"loss": 5.221,
"step": 10500
},
{
"epoch": 0.01,
"learning_rate": 4.980174200290322e-05,
"loss": 5.1786,
"step": 11000
},
{
"epoch": 0.01,
"learning_rate": 4.9792730275762456e-05,
"loss": 5.1868,
"step": 11500
},
{
"epoch": 0.01,
"learning_rate": 4.978371854862169e-05,
"loss": 5.1585,
"step": 12000
},
{
"epoch": 0.01,
"learning_rate": 4.977470682148093e-05,
"loss": 5.2465,
"step": 12500
},
{
"epoch": 0.01,
"learning_rate": 4.976569509434017e-05,
"loss": 5.1645,
"step": 13000
},
{
"epoch": 0.01,
"learning_rate": 4.9756683367199405e-05,
"loss": 5.1462,
"step": 13500
},
{
"epoch": 0.02,
"learning_rate": 4.974767164005864e-05,
"loss": 5.1588,
"step": 14000
},
{
"epoch": 0.02,
"learning_rate": 4.973865991291788e-05,
"loss": 5.145,
"step": 14500
},
{
"epoch": 0.02,
"learning_rate": 4.972964818577712e-05,
"loss": 5.1256,
"step": 15000
},
{
"epoch": 0.02,
"learning_rate": 4.9720636458636355e-05,
"loss": 5.1227,
"step": 15500
},
{
"epoch": 0.02,
"learning_rate": 4.971162473149559e-05,
"loss": 5.096,
"step": 16000
},
{
"epoch": 0.02,
"learning_rate": 4.970261300435483e-05,
"loss": 5.1427,
"step": 16500
},
{
"epoch": 0.02,
"learning_rate": 4.969360127721407e-05,
"loss": 5.121,
"step": 17000
},
{
"epoch": 0.02,
"learning_rate": 4.9684589550073305e-05,
"loss": 5.1324,
"step": 17500
},
{
"epoch": 0.02,
"learning_rate": 4.967557782293254e-05,
"loss": 5.1476,
"step": 18000
},
{
"epoch": 0.02,
"learning_rate": 4.966656609579178e-05,
"loss": 5.0538,
"step": 18500
},
{
"epoch": 0.02,
"learning_rate": 4.965755436865102e-05,
"loss": 5.0635,
"step": 19000
},
{
"epoch": 0.02,
"learning_rate": 4.9648542641510254e-05,
"loss": 5.0309,
"step": 19500
},
{
"epoch": 0.02,
"learning_rate": 4.963953091436949e-05,
"loss": 5.0623,
"step": 20000
},
{
"epoch": 0.02,
"learning_rate": 4.963051918722872e-05,
"loss": 5.0624,
"step": 20500
},
{
"epoch": 0.02,
"learning_rate": 4.962150746008796e-05,
"loss": 5.0844,
"step": 21000
},
{
"epoch": 0.02,
"learning_rate": 4.9612495732947204e-05,
"loss": 5.0667,
"step": 21500
},
{
"epoch": 0.02,
"learning_rate": 4.960348400580644e-05,
"loss": 5.0536,
"step": 22000
},
{
"epoch": 0.02,
"learning_rate": 4.959447227866568e-05,
"loss": 5.0783,
"step": 22500
},
{
"epoch": 0.02,
"learning_rate": 4.9585460551524916e-05,
"loss": 5.0335,
"step": 23000
},
{
"epoch": 0.03,
"learning_rate": 4.957644882438415e-05,
"loss": 5.0321,
"step": 23500
},
{
"epoch": 0.03,
"learning_rate": 4.9567437097243384e-05,
"loss": 5.037,
"step": 24000
},
{
"epoch": 0.03,
"learning_rate": 4.955842537010262e-05,
"loss": 5.0187,
"step": 24500
},
{
"epoch": 0.03,
"learning_rate": 4.954941364296186e-05,
"loss": 5.0357,
"step": 25000
},
{
"epoch": 0.03,
"learning_rate": 4.95404019158211e-05,
"loss": 5.0128,
"step": 25500
},
{
"epoch": 0.03,
"learning_rate": 4.953139018868034e-05,
"loss": 5.0553,
"step": 26000
},
{
"epoch": 0.03,
"learning_rate": 4.952237846153957e-05,
"loss": 5.0024,
"step": 26500
},
{
"epoch": 0.03,
"learning_rate": 4.951336673439881e-05,
"loss": 5.0177,
"step": 27000
},
{
"epoch": 0.03,
"learning_rate": 4.9504355007258046e-05,
"loss": 5.0174,
"step": 27500
},
{
"epoch": 0.03,
"learning_rate": 4.949534328011728e-05,
"loss": 5.0167,
"step": 28000
},
{
"epoch": 0.03,
"learning_rate": 4.948633155297652e-05,
"loss": 4.9896,
"step": 28500
},
{
"epoch": 0.03,
"learning_rate": 4.947731982583576e-05,
"loss": 5.0355,
"step": 29000
},
{
"epoch": 0.03,
"learning_rate": 4.9468308098694995e-05,
"loss": 4.9929,
"step": 29500
},
{
"epoch": 0.03,
"learning_rate": 4.945929637155423e-05,
"loss": 4.9702,
"step": 30000
},
{
"epoch": 0.03,
"learning_rate": 4.945028464441347e-05,
"loss": 4.944,
"step": 30500
},
{
"epoch": 0.03,
"learning_rate": 4.944127291727271e-05,
"loss": 4.9957,
"step": 31000
},
{
"epoch": 0.03,
"learning_rate": 4.9432261190131945e-05,
"loss": 4.9908,
"step": 31500
},
{
"epoch": 0.03,
"learning_rate": 4.942324946299118e-05,
"loss": 4.9816,
"step": 32000
},
{
"epoch": 0.04,
"learning_rate": 4.941423773585042e-05,
"loss": 4.9649,
"step": 32500
},
{
"epoch": 0.04,
"learning_rate": 4.940522600870966e-05,
"loss": 4.9434,
"step": 33000
},
{
"epoch": 0.04,
"learning_rate": 4.9396214281568895e-05,
"loss": 5.0387,
"step": 33500
},
{
"epoch": 0.04,
"learning_rate": 4.938720255442813e-05,
"loss": 4.9799,
"step": 34000
},
{
"epoch": 0.04,
"learning_rate": 4.937819082728737e-05,
"loss": 4.9648,
"step": 34500
},
{
"epoch": 0.04,
"learning_rate": 4.936917910014661e-05,
"loss": 4.9593,
"step": 35000
},
{
"epoch": 0.04,
"learning_rate": 4.9360167373005844e-05,
"loss": 4.9687,
"step": 35500
},
{
"epoch": 0.04,
"learning_rate": 4.9351155645865075e-05,
"loss": 4.9474,
"step": 36000
},
{
"epoch": 0.04,
"learning_rate": 4.934214391872431e-05,
"loss": 4.9344,
"step": 36500
},
{
"epoch": 0.04,
"learning_rate": 4.9333132191583556e-05,
"loss": 4.932,
"step": 37000
},
{
"epoch": 0.04,
"learning_rate": 4.9324120464442794e-05,
"loss": 5.0116,
"step": 37500
},
{
"epoch": 0.04,
"learning_rate": 4.931510873730203e-05,
"loss": 4.9311,
"step": 38000
},
{
"epoch": 0.04,
"learning_rate": 4.930609701016127e-05,
"loss": 4.9114,
"step": 38500
},
{
"epoch": 0.04,
"learning_rate": 4.92970852830205e-05,
"loss": 4.9517,
"step": 39000
},
{
"epoch": 0.04,
"learning_rate": 4.928807355587974e-05,
"loss": 4.9541,
"step": 39500
},
{
"epoch": 0.04,
"learning_rate": 4.9279061828738974e-05,
"loss": 4.9637,
"step": 40000
},
{
"epoch": 0.04,
"learning_rate": 4.927005010159821e-05,
"loss": 4.9498,
"step": 40500
},
{
"epoch": 0.04,
"learning_rate": 4.926103837445745e-05,
"loss": 4.8924,
"step": 41000
},
{
"epoch": 0.04,
"learning_rate": 4.925202664731669e-05,
"loss": 4.9596,
"step": 41500
},
{
"epoch": 0.05,
"learning_rate": 4.924301492017593e-05,
"loss": 4.9264,
"step": 42000
},
{
"epoch": 0.05,
"learning_rate": 4.923400319303516e-05,
"loss": 4.9179,
"step": 42500
},
{
"epoch": 0.05,
"learning_rate": 4.92249914658944e-05,
"loss": 4.9151,
"step": 43000
},
{
"epoch": 0.05,
"learning_rate": 4.9215979738753636e-05,
"loss": 4.9101,
"step": 43500
},
{
"epoch": 0.05,
"learning_rate": 4.920696801161287e-05,
"loss": 4.9541,
"step": 44000
},
{
"epoch": 0.05,
"learning_rate": 4.919795628447211e-05,
"loss": 4.9423,
"step": 44500
},
{
"epoch": 0.05,
"learning_rate": 4.918894455733135e-05,
"loss": 4.8763,
"step": 45000
},
{
"epoch": 0.05,
"learning_rate": 4.9179932830190586e-05,
"loss": 4.9015,
"step": 45500
},
{
"epoch": 0.05,
"learning_rate": 4.917092110304982e-05,
"loss": 4.9179,
"step": 46000
},
{
"epoch": 0.05,
"learning_rate": 4.916190937590906e-05,
"loss": 4.8837,
"step": 46500
},
{
"epoch": 0.05,
"learning_rate": 4.91528976487683e-05,
"loss": 4.9141,
"step": 47000
},
{
"epoch": 0.05,
"learning_rate": 4.9143885921627535e-05,
"loss": 4.8766,
"step": 47500
},
{
"epoch": 0.05,
"learning_rate": 4.913487419448677e-05,
"loss": 4.9088,
"step": 48000
},
{
"epoch": 0.05,
"learning_rate": 4.912586246734601e-05,
"loss": 4.9137,
"step": 48500
},
{
"epoch": 0.05,
"learning_rate": 4.911685074020525e-05,
"loss": 4.8692,
"step": 49000
},
{
"epoch": 0.05,
"learning_rate": 4.9107839013064485e-05,
"loss": 4.8607,
"step": 49500
},
{
"epoch": 0.05,
"learning_rate": 4.909882728592372e-05,
"loss": 4.8573,
"step": 50000
},
{
"epoch": 0.05,
"learning_rate": 4.908981555878296e-05,
"loss": 4.9472,
"step": 50500
},
{
"epoch": 0.06,
"learning_rate": 4.90808038316422e-05,
"loss": 4.9144,
"step": 51000
},
{
"epoch": 0.06,
"learning_rate": 4.907179210450143e-05,
"loss": 4.973,
"step": 51500
},
{
"epoch": 0.06,
"learning_rate": 4.9062780377360665e-05,
"loss": 4.9413,
"step": 52000
},
{
"epoch": 0.06,
"learning_rate": 4.90537686502199e-05,
"loss": 4.972,
"step": 52500
},
{
"epoch": 0.06,
"learning_rate": 4.9044756923079147e-05,
"loss": 4.9722,
"step": 53000
},
{
"epoch": 0.06,
"learning_rate": 4.9035745195938384e-05,
"loss": 4.9126,
"step": 53500
},
{
"epoch": 0.06,
"learning_rate": 4.902673346879762e-05,
"loss": 4.9117,
"step": 54000
},
{
"epoch": 0.06,
"learning_rate": 4.901772174165686e-05,
"loss": 4.9233,
"step": 54500
},
{
"epoch": 0.06,
"learning_rate": 4.900871001451609e-05,
"loss": 4.9693,
"step": 55000
},
{
"epoch": 0.06,
"learning_rate": 4.899969828737533e-05,
"loss": 4.9875,
"step": 55500
},
{
"epoch": 0.06,
"learning_rate": 4.8990686560234564e-05,
"loss": 4.9741,
"step": 56000
},
{
"epoch": 0.06,
"learning_rate": 4.89816748330938e-05,
"loss": 4.9411,
"step": 56500
},
{
"epoch": 0.06,
"learning_rate": 4.8972663105953046e-05,
"loss": 4.9281,
"step": 57000
},
{
"epoch": 0.06,
"learning_rate": 4.896365137881228e-05,
"loss": 4.9392,
"step": 57500
},
{
"epoch": 0.06,
"learning_rate": 4.8954639651671514e-05,
"loss": 4.9473,
"step": 58000
},
{
"epoch": 0.06,
"learning_rate": 4.894562792453075e-05,
"loss": 4.9333,
"step": 58500
},
{
"epoch": 0.06,
"learning_rate": 4.893661619738999e-05,
"loss": 4.9547,
"step": 59000
},
{
"epoch": 0.06,
"learning_rate": 4.8927604470249226e-05,
"loss": 4.9422,
"step": 59500
},
{
"epoch": 0.06,
"learning_rate": 4.891859274310846e-05,
"loss": 4.9182,
"step": 60000
},
{
"epoch": 0.07,
"learning_rate": 4.89095810159677e-05,
"loss": 4.9282,
"step": 60500
},
{
"epoch": 0.07,
"learning_rate": 4.890056928882694e-05,
"loss": 4.943,
"step": 61000
},
{
"epoch": 0.07,
"learning_rate": 4.8891557561686176e-05,
"loss": 4.9436,
"step": 61500
},
{
"epoch": 0.07,
"learning_rate": 4.888254583454541e-05,
"loss": 4.9253,
"step": 62000
},
{
"epoch": 0.07,
"learning_rate": 4.887353410740465e-05,
"loss": 4.9442,
"step": 62500
},
{
"epoch": 0.07,
"learning_rate": 4.886452238026389e-05,
"loss": 4.8888,
"step": 63000
},
{
"epoch": 0.07,
"learning_rate": 4.8855510653123125e-05,
"loss": 4.9155,
"step": 63500
},
{
"epoch": 0.07,
"learning_rate": 4.8846498925982356e-05,
"loss": 4.9519,
"step": 64000
},
{
"epoch": 0.07,
"learning_rate": 4.88374871988416e-05,
"loss": 4.9563,
"step": 64500
},
{
"epoch": 0.07,
"learning_rate": 4.882847547170084e-05,
"loss": 4.9553,
"step": 65000
},
{
"epoch": 0.07,
"learning_rate": 4.8819463744560075e-05,
"loss": 4.8865,
"step": 65500
},
{
"epoch": 0.07,
"learning_rate": 4.881045201741931e-05,
"loss": 4.8987,
"step": 66000
},
{
"epoch": 0.07,
"learning_rate": 4.880144029027855e-05,
"loss": 4.9129,
"step": 66500
},
{
"epoch": 0.07,
"learning_rate": 4.879242856313779e-05,
"loss": 4.9565,
"step": 67000
},
{
"epoch": 0.07,
"learning_rate": 4.878341683599702e-05,
"loss": 4.8831,
"step": 67500
},
{
"epoch": 0.07,
"learning_rate": 4.8774405108856255e-05,
"loss": 4.9007,
"step": 68000
},
{
"epoch": 0.07,
"learning_rate": 4.87653933817155e-05,
"loss": 4.9337,
"step": 68500
},
{
"epoch": 0.07,
"learning_rate": 4.8756381654574737e-05,
"loss": 4.8446,
"step": 69000
},
{
"epoch": 0.08,
"learning_rate": 4.8747369927433974e-05,
"loss": 4.9388,
"step": 69500
},
{
"epoch": 0.08,
"learning_rate": 4.873835820029321e-05,
"loss": 4.9655,
"step": 70000
},
{
"epoch": 0.08,
"learning_rate": 4.872934647315244e-05,
"loss": 4.9309,
"step": 70500
},
{
"epoch": 0.08,
"learning_rate": 4.872033474601168e-05,
"loss": 4.9102,
"step": 71000
},
{
"epoch": 0.08,
"learning_rate": 4.871132301887092e-05,
"loss": 4.8491,
"step": 71500
},
{
"epoch": 0.08,
"learning_rate": 4.8702311291730154e-05,
"loss": 4.895,
"step": 72000
},
{
"epoch": 0.08,
"learning_rate": 4.86932995645894e-05,
"loss": 4.9222,
"step": 72500
},
{
"epoch": 0.08,
"learning_rate": 4.8684287837448636e-05,
"loss": 4.8966,
"step": 73000
},
{
"epoch": 0.08,
"learning_rate": 4.8675276110307866e-05,
"loss": 4.8669,
"step": 73500
},
{
"epoch": 0.08,
"learning_rate": 4.8666264383167104e-05,
"loss": 4.8332,
"step": 74000
},
{
"epoch": 0.08,
"learning_rate": 4.865725265602634e-05,
"loss": 4.9127,
"step": 74500
},
{
"epoch": 0.08,
"learning_rate": 4.864824092888558e-05,
"loss": 4.9251,
"step": 75000
},
{
"epoch": 0.08,
"learning_rate": 4.8639229201744816e-05,
"loss": 4.9379,
"step": 75500
},
{
"epoch": 0.08,
"learning_rate": 4.8630217474604053e-05,
"loss": 4.8682,
"step": 76000
},
{
"epoch": 0.08,
"learning_rate": 4.862120574746329e-05,
"loss": 4.8762,
"step": 76500
},
{
"epoch": 0.08,
"learning_rate": 4.861219402032253e-05,
"loss": 4.8544,
"step": 77000
},
{
"epoch": 0.08,
"learning_rate": 4.8603182293181766e-05,
"loss": 4.8835,
"step": 77500
},
{
"epoch": 0.08,
"learning_rate": 4.8594170566041e-05,
"loss": 4.8346,
"step": 78000
},
{
"epoch": 0.08,
"learning_rate": 4.858515883890024e-05,
"loss": 4.9229,
"step": 78500
},
{
"epoch": 0.09,
"learning_rate": 4.857614711175948e-05,
"loss": 4.9159,
"step": 79000
},
{
"epoch": 0.09,
"learning_rate": 4.856713538461871e-05,
"loss": 4.8566,
"step": 79500
},
{
"epoch": 0.09,
"learning_rate": 4.855812365747795e-05,
"loss": 4.8155,
"step": 80000
},
{
"epoch": 0.09,
"learning_rate": 4.854911193033719e-05,
"loss": 4.9222,
"step": 80500
},
{
"epoch": 0.09,
"learning_rate": 4.854010020319643e-05,
"loss": 4.8545,
"step": 81000
},
{
"epoch": 0.09,
"learning_rate": 4.8531088476055665e-05,
"loss": 4.8295,
"step": 81500
},
{
"epoch": 0.09,
"learning_rate": 4.85220767489149e-05,
"loss": 4.8223,
"step": 82000
},
{
"epoch": 0.09,
"learning_rate": 4.851306502177414e-05,
"loss": 4.8637,
"step": 82500
},
{
"epoch": 0.09,
"learning_rate": 4.850405329463337e-05,
"loss": 4.878,
"step": 83000
},
{
"epoch": 0.09,
"learning_rate": 4.849504156749261e-05,
"loss": 4.8677,
"step": 83500
},
{
"epoch": 0.09,
"learning_rate": 4.848602984035185e-05,
"loss": 4.8636,
"step": 84000
},
{
"epoch": 0.09,
"learning_rate": 4.847701811321109e-05,
"loss": 4.8708,
"step": 84500
},
{
"epoch": 0.09,
"learning_rate": 4.846800638607033e-05,
"loss": 4.8608,
"step": 85000
},
{
"epoch": 0.09,
"learning_rate": 4.8458994658929564e-05,
"loss": 4.8347,
"step": 85500
},
{
"epoch": 0.09,
"learning_rate": 4.8449982931788795e-05,
"loss": 4.8385,
"step": 86000
},
{
"epoch": 0.09,
"learning_rate": 4.844097120464803e-05,
"loss": 4.8565,
"step": 86500
},
{
"epoch": 0.09,
"learning_rate": 4.843195947750727e-05,
"loss": 4.867,
"step": 87000
},
{
"epoch": 0.09,
"learning_rate": 4.842294775036651e-05,
"loss": 4.8456,
"step": 87500
},
{
"epoch": 0.1,
"learning_rate": 4.841393602322575e-05,
"loss": 4.8739,
"step": 88000
},
{
"epoch": 0.1,
"learning_rate": 4.840492429608499e-05,
"loss": 4.8473,
"step": 88500
},
{
"epoch": 0.1,
"learning_rate": 4.839591256894422e-05,
"loss": 4.8496,
"step": 89000
},
{
"epoch": 0.1,
"learning_rate": 4.8386900841803456e-05,
"loss": 4.8579,
"step": 89500
},
{
"epoch": 0.1,
"learning_rate": 4.8377889114662694e-05,
"loss": 4.8543,
"step": 90000
},
{
"epoch": 0.1,
"learning_rate": 4.836887738752193e-05,
"loss": 4.8855,
"step": 90500
},
{
"epoch": 0.1,
"learning_rate": 4.835986566038117e-05,
"loss": 4.8511,
"step": 91000
},
{
"epoch": 0.1,
"learning_rate": 4.8350853933240406e-05,
"loss": 4.8682,
"step": 91500
},
{
"epoch": 0.1,
"learning_rate": 4.8341842206099643e-05,
"loss": 4.8074,
"step": 92000
},
{
"epoch": 0.1,
"learning_rate": 4.833283047895888e-05,
"loss": 4.8034,
"step": 92500
},
{
"epoch": 0.1,
"learning_rate": 4.832381875181812e-05,
"loss": 4.842,
"step": 93000
},
{
"epoch": 0.1,
"learning_rate": 4.8314807024677356e-05,
"loss": 4.8146,
"step": 93500
},
{
"epoch": 0.1,
"learning_rate": 4.830579529753659e-05,
"loss": 4.8353,
"step": 94000
},
{
"epoch": 0.1,
"learning_rate": 4.829678357039583e-05,
"loss": 4.8151,
"step": 94500
},
{
"epoch": 0.1,
"learning_rate": 4.828777184325507e-05,
"loss": 4.8127,
"step": 95000
},
{
"epoch": 0.1,
"learning_rate": 4.8278760116114305e-05,
"loss": 4.833,
"step": 95500
},
{
"epoch": 0.1,
"learning_rate": 4.826974838897354e-05,
"loss": 4.8383,
"step": 96000
},
{
"epoch": 0.1,
"learning_rate": 4.826073666183278e-05,
"loss": 4.8441,
"step": 96500
},
{
"epoch": 0.1,
"learning_rate": 4.825172493469202e-05,
"loss": 4.8794,
"step": 97000
},
{
"epoch": 0.11,
"learning_rate": 4.8242713207551255e-05,
"loss": 4.828,
"step": 97500
},
{
"epoch": 0.11,
"learning_rate": 4.823370148041049e-05,
"loss": 4.7572,
"step": 98000
},
{
"epoch": 0.11,
"learning_rate": 4.822468975326972e-05,
"loss": 4.7658,
"step": 98500
},
{
"epoch": 0.11,
"learning_rate": 4.821567802612896e-05,
"loss": 4.8123,
"step": 99000
},
{
"epoch": 0.11,
"learning_rate": 4.8206666298988204e-05,
"loss": 4.8082,
"step": 99500
},
{
"epoch": 0.11,
"learning_rate": 4.819765457184744e-05,
"loss": 4.7542,
"step": 100000
},
{
"epoch": 0.11,
"learning_rate": 4.818864284470668e-05,
"loss": 4.8264,
"step": 100500
},
{
"epoch": 0.11,
"learning_rate": 4.817963111756592e-05,
"loss": 4.7541,
"step": 101000
},
{
"epoch": 0.11,
"learning_rate": 4.817061939042515e-05,
"loss": 4.7992,
"step": 101500
},
{
"epoch": 0.11,
"learning_rate": 4.8161607663284385e-05,
"loss": 4.8078,
"step": 102000
},
{
"epoch": 0.11,
"learning_rate": 4.815259593614362e-05,
"loss": 4.8453,
"step": 102500
},
{
"epoch": 0.11,
"learning_rate": 4.814358420900286e-05,
"loss": 4.8276,
"step": 103000
},
{
"epoch": 0.11,
"learning_rate": 4.81345724818621e-05,
"loss": 4.7253,
"step": 103500
},
{
"epoch": 0.11,
"learning_rate": 4.812556075472134e-05,
"loss": 4.8102,
"step": 104000
},
{
"epoch": 0.11,
"learning_rate": 4.811654902758057e-05,
"loss": 4.8006,
"step": 104500
},
{
"epoch": 0.11,
"learning_rate": 4.810753730043981e-05,
"loss": 4.7603,
"step": 105000
},
{
"epoch": 0.11,
"learning_rate": 4.8098525573299047e-05,
"loss": 4.7124,
"step": 105500
},
{
"epoch": 0.11,
"learning_rate": 4.8089513846158284e-05,
"loss": 4.7757,
"step": 106000
},
{
"epoch": 0.12,
"learning_rate": 4.808050211901752e-05,
"loss": 4.7593,
"step": 106500
},
{
"epoch": 0.12,
"learning_rate": 4.807149039187676e-05,
"loss": 4.8501,
"step": 107000
},
{
"epoch": 0.12,
"learning_rate": 4.8062478664735996e-05,
"loss": 4.8105,
"step": 107500
},
{
"epoch": 0.12,
"learning_rate": 4.8053466937595234e-05,
"loss": 4.7692,
"step": 108000
},
{
"epoch": 0.12,
"learning_rate": 4.804445521045447e-05,
"loss": 4.7855,
"step": 108500
},
{
"epoch": 0.12,
"learning_rate": 4.803544348331371e-05,
"loss": 4.8032,
"step": 109000
},
{
"epoch": 0.12,
"learning_rate": 4.8026431756172946e-05,
"loss": 4.7814,
"step": 109500
},
{
"epoch": 0.12,
"learning_rate": 4.801742002903218e-05,
"loss": 4.8473,
"step": 110000
},
{
"epoch": 0.12,
"learning_rate": 4.800840830189142e-05,
"loss": 4.8047,
"step": 110500
},
{
"epoch": 0.12,
"learning_rate": 4.799939657475066e-05,
"loss": 4.8326,
"step": 111000
},
{
"epoch": 0.12,
"learning_rate": 4.7990384847609895e-05,
"loss": 4.7993,
"step": 111500
},
{
"epoch": 0.12,
"learning_rate": 4.798137312046913e-05,
"loss": 4.7892,
"step": 112000
},
{
"epoch": 0.12,
"learning_rate": 4.797236139332837e-05,
"loss": 4.751,
"step": 112500
},
{
"epoch": 0.12,
"learning_rate": 4.796334966618761e-05,
"loss": 4.7795,
"step": 113000
},
{
"epoch": 0.12,
"learning_rate": 4.7954337939046845e-05,
"loss": 4.7684,
"step": 113500
},
{
"epoch": 0.12,
"learning_rate": 4.7945326211906076e-05,
"loss": 4.7673,
"step": 114000
},
{
"epoch": 0.12,
"learning_rate": 4.793631448476531e-05,
"loss": 4.7614,
"step": 114500
},
{
"epoch": 0.12,
"learning_rate": 4.792730275762455e-05,
"loss": 4.7529,
"step": 115000
},
{
"epoch": 0.12,
"learning_rate": 4.7918291030483795e-05,
"loss": 4.8215,
"step": 115500
},
{
"epoch": 0.13,
"learning_rate": 4.790927930334303e-05,
"loss": 4.7351,
"step": 116000
},
{
"epoch": 0.13,
"learning_rate": 4.790026757620227e-05,
"loss": 4.7878,
"step": 116500
},
{
"epoch": 0.13,
"learning_rate": 4.78912558490615e-05,
"loss": 4.7618,
"step": 117000
},
{
"epoch": 0.13,
"learning_rate": 4.788224412192074e-05,
"loss": 4.846,
"step": 117500
},
{
"epoch": 0.13,
"learning_rate": 4.7873232394779975e-05,
"loss": 4.8027,
"step": 118000
},
{
"epoch": 0.13,
"learning_rate": 4.786422066763921e-05,
"loss": 4.7415,
"step": 118500
},
{
"epoch": 0.13,
"learning_rate": 4.785520894049845e-05,
"loss": 4.7554,
"step": 119000
},
{
"epoch": 0.13,
"learning_rate": 4.7846197213357694e-05,
"loss": 4.7287,
"step": 119500
},
{
"epoch": 0.13,
"learning_rate": 4.783718548621693e-05,
"loss": 4.81,
"step": 120000
},
{
"epoch": 0.13,
"learning_rate": 4.782817375907616e-05,
"loss": 4.7374,
"step": 120500
},
{
"epoch": 0.13,
"learning_rate": 4.78191620319354e-05,
"loss": 4.7541,
"step": 121000
},
{
"epoch": 0.13,
"learning_rate": 4.7810150304794637e-05,
"loss": 4.7704,
"step": 121500
},
{
"epoch": 0.13,
"learning_rate": 4.7801138577653874e-05,
"loss": 4.7743,
"step": 122000
},
{
"epoch": 0.13,
"learning_rate": 4.779212685051311e-05,
"loss": 4.7569,
"step": 122500
},
{
"epoch": 0.13,
"learning_rate": 4.778311512337235e-05,
"loss": 4.69,
"step": 123000
},
{
"epoch": 0.13,
"learning_rate": 4.7774103396231586e-05,
"loss": 4.8213,
"step": 123500
},
{
"epoch": 0.13,
"learning_rate": 4.7765091669090824e-05,
"loss": 4.7616,
"step": 124000
},
{
"epoch": 0.13,
"learning_rate": 4.775607994195006e-05,
"loss": 4.7587,
"step": 124500
},
{
"epoch": 0.14,
"learning_rate": 4.77470682148093e-05,
"loss": 4.7599,
"step": 125000
},
{
"epoch": 0.14,
"learning_rate": 4.7738056487668536e-05,
"loss": 4.692,
"step": 125500
},
{
"epoch": 0.14,
"learning_rate": 4.772904476052777e-05,
"loss": 4.8163,
"step": 126000
},
{
"epoch": 0.14,
"learning_rate": 4.7720033033387004e-05,
"loss": 4.7533,
"step": 126500
},
{
"epoch": 0.14,
"learning_rate": 4.771102130624625e-05,
"loss": 4.7933,
"step": 127000
},
{
"epoch": 0.14,
"learning_rate": 4.7702009579105485e-05,
"loss": 4.7659,
"step": 127500
},
{
"epoch": 0.14,
"learning_rate": 4.769299785196472e-05,
"loss": 4.7502,
"step": 128000
},
{
"epoch": 0.14,
"learning_rate": 4.768398612482396e-05,
"loss": 4.7412,
"step": 128500
},
{
"epoch": 0.14,
"learning_rate": 4.76749743976832e-05,
"loss": 4.7917,
"step": 129000
},
{
"epoch": 0.14,
"learning_rate": 4.766596267054243e-05,
"loss": 4.7984,
"step": 129500
},
{
"epoch": 0.14,
"learning_rate": 4.7656950943401666e-05,
"loss": 4.7151,
"step": 130000
},
{
"epoch": 0.14,
"learning_rate": 4.76479392162609e-05,
"loss": 4.7101,
"step": 130500
},
{
"epoch": 0.14,
"learning_rate": 4.763892748912015e-05,
"loss": 4.7416,
"step": 131000
},
{
"epoch": 0.14,
"learning_rate": 4.7629915761979385e-05,
"loss": 4.7401,
"step": 131500
},
{
"epoch": 0.14,
"learning_rate": 4.762090403483862e-05,
"loss": 4.7234,
"step": 132000
},
{
"epoch": 0.14,
"learning_rate": 4.761189230769785e-05,
"loss": 4.7334,
"step": 132500
},
{
"epoch": 0.14,
"learning_rate": 4.760288058055709e-05,
"loss": 4.7305,
"step": 133000
},
{
"epoch": 0.14,
"learning_rate": 4.759386885341633e-05,
"loss": 4.7889,
"step": 133500
},
{
"epoch": 0.14,
"learning_rate": 4.7584857126275565e-05,
"loss": 4.7615,
"step": 134000
},
{
"epoch": 0.15,
"learning_rate": 4.75758453991348e-05,
"loss": 4.6827,
"step": 134500
},
{
"epoch": 0.15,
"learning_rate": 4.7566833671994046e-05,
"loss": 4.7555,
"step": 135000
},
{
"epoch": 0.15,
"learning_rate": 4.7557821944853284e-05,
"loss": 4.7644,
"step": 135500
},
{
"epoch": 0.15,
"learning_rate": 4.7548810217712514e-05,
"loss": 4.7292,
"step": 136000
},
{
"epoch": 0.15,
"learning_rate": 4.753979849057175e-05,
"loss": 4.7221,
"step": 136500
},
{
"epoch": 0.15,
"learning_rate": 4.753078676343099e-05,
"loss": 4.7045,
"step": 137000
},
{
"epoch": 0.15,
"learning_rate": 4.752177503629023e-05,
"loss": 4.6832,
"step": 137500
},
{
"epoch": 0.15,
"learning_rate": 4.7512763309149464e-05,
"loss": 4.7221,
"step": 138000
},
{
"epoch": 0.15,
"learning_rate": 4.75037515820087e-05,
"loss": 4.6595,
"step": 138500
},
{
"epoch": 0.15,
"learning_rate": 4.749473985486794e-05,
"loss": 4.7322,
"step": 139000
},
{
"epoch": 0.15,
"learning_rate": 4.7485728127727176e-05,
"loss": 4.7332,
"step": 139500
},
{
"epoch": 0.15,
"learning_rate": 4.7476716400586414e-05,
"loss": 4.7665,
"step": 140000
},
{
"epoch": 0.15,
"learning_rate": 4.746770467344565e-05,
"loss": 4.6936,
"step": 140500
},
{
"epoch": 0.15,
"learning_rate": 4.745869294630489e-05,
"loss": 4.7322,
"step": 141000
},
{
"epoch": 0.15,
"learning_rate": 4.7449681219164126e-05,
"loss": 4.7406,
"step": 141500
},
{
"epoch": 0.15,
"learning_rate": 4.7440669492023356e-05,
"loss": 4.757,
"step": 142000
},
{
"epoch": 0.15,
"learning_rate": 4.74316577648826e-05,
"loss": 4.7518,
"step": 142500
},
{
"epoch": 0.15,
"learning_rate": 4.742264603774184e-05,
"loss": 4.6843,
"step": 143000
},
{
"epoch": 0.16,
"learning_rate": 4.7413634310601075e-05,
"loss": 4.6937,
"step": 143500
},
{
"epoch": 0.16,
"learning_rate": 4.740462258346031e-05,
"loss": 4.7167,
"step": 144000
},
{
"epoch": 0.16,
"learning_rate": 4.739561085631955e-05,
"loss": 4.7101,
"step": 144500
},
{
"epoch": 0.16,
"learning_rate": 4.738659912917878e-05,
"loss": 4.7401,
"step": 145000
},
{
"epoch": 0.16,
"learning_rate": 4.737758740203802e-05,
"loss": 4.7357,
"step": 145500
},
{
"epoch": 0.16,
"learning_rate": 4.7368575674897256e-05,
"loss": 4.7034,
"step": 146000
},
{
"epoch": 0.16,
"learning_rate": 4.73595639477565e-05,
"loss": 4.6982,
"step": 146500
},
{
"epoch": 0.16,
"learning_rate": 4.735055222061574e-05,
"loss": 4.729,
"step": 147000
},
{
"epoch": 0.16,
"learning_rate": 4.7341540493474975e-05,
"loss": 4.7402,
"step": 147500
},
{
"epoch": 0.16,
"learning_rate": 4.733252876633421e-05,
"loss": 4.7249,
"step": 148000
},
{
"epoch": 0.16,
"learning_rate": 4.732351703919344e-05,
"loss": 4.6795,
"step": 148500
},
{
"epoch": 0.16,
"learning_rate": 4.731450531205268e-05,
"loss": 4.7496,
"step": 149000
},
{
"epoch": 0.16,
"learning_rate": 4.730549358491192e-05,
"loss": 4.7258,
"step": 149500
},
{
"epoch": 0.16,
"learning_rate": 4.7296481857771155e-05,
"loss": 4.7273,
"step": 150000
},
{
"epoch": 0.16,
"learning_rate": 4.72874701306304e-05,
"loss": 4.6983,
"step": 150500
},
{
"epoch": 0.16,
"learning_rate": 4.7278458403489636e-05,
"loss": 4.6593,
"step": 151000
},
{
"epoch": 0.16,
"learning_rate": 4.726944667634887e-05,
"loss": 4.6931,
"step": 151500
},
{
"epoch": 0.16,
"learning_rate": 4.7260434949208104e-05,
"loss": 4.6728,
"step": 152000
},
{
"epoch": 0.16,
"learning_rate": 4.725142322206734e-05,
"loss": 4.6942,
"step": 152500
},
{
"epoch": 0.17,
"learning_rate": 4.724241149492658e-05,
"loss": 4.655,
"step": 153000
},
{
"epoch": 0.17,
"learning_rate": 4.723339976778582e-05,
"loss": 4.6958,
"step": 153500
},
{
"epoch": 0.17,
"learning_rate": 4.7224388040645054e-05,
"loss": 4.727,
"step": 154000
},
{
"epoch": 0.17,
"learning_rate": 4.721537631350429e-05,
"loss": 4.7039,
"step": 154500
},
{
"epoch": 0.17,
"learning_rate": 4.720636458636353e-05,
"loss": 4.6621,
"step": 155000
},
{
"epoch": 0.17,
"learning_rate": 4.7197352859222766e-05,
"loss": 4.7307,
"step": 155500
},
{
"epoch": 0.17,
"learning_rate": 4.7188341132082004e-05,
"loss": 4.6781,
"step": 156000
},
{
"epoch": 0.17,
"learning_rate": 4.717932940494124e-05,
"loss": 4.6862,
"step": 156500
},
{
"epoch": 0.17,
"learning_rate": 4.717031767780048e-05,
"loss": 4.6321,
"step": 157000
},
{
"epoch": 0.17,
"learning_rate": 4.716130595065971e-05,
"loss": 4.6918,
"step": 157500
},
{
"epoch": 0.17,
"learning_rate": 4.715229422351895e-05,
"loss": 4.7254,
"step": 158000
},
{
"epoch": 0.17,
"learning_rate": 4.714328249637819e-05,
"loss": 4.6808,
"step": 158500
},
{
"epoch": 0.17,
"learning_rate": 4.713427076923743e-05,
"loss": 4.6929,
"step": 159000
},
{
"epoch": 0.17,
"learning_rate": 4.7125259042096665e-05,
"loss": 4.6183,
"step": 159500
},
{
"epoch": 0.17,
"learning_rate": 4.71162473149559e-05,
"loss": 4.6005,
"step": 160000
},
{
"epoch": 0.17,
"learning_rate": 4.710723558781514e-05,
"loss": 4.7159,
"step": 160500
},
{
"epoch": 0.17,
"learning_rate": 4.709822386067437e-05,
"loss": 4.6412,
"step": 161000
},
{
"epoch": 0.17,
"learning_rate": 4.708921213353361e-05,
"loss": 4.6927,
"step": 161500
},
{
"epoch": 0.18,
"learning_rate": 4.708020040639285e-05,
"loss": 4.7037,
"step": 162000
},
{
"epoch": 0.18,
"learning_rate": 4.707118867925209e-05,
"loss": 4.7063,
"step": 162500
},
{
"epoch": 0.18,
"learning_rate": 4.706217695211133e-05,
"loss": 4.739,
"step": 163000
},
{
"epoch": 0.18,
"learning_rate": 4.7053165224970565e-05,
"loss": 4.6985,
"step": 163500
},
{
"epoch": 0.18,
"learning_rate": 4.7044153497829795e-05,
"loss": 4.6828,
"step": 164000
},
{
"epoch": 0.18,
"learning_rate": 4.703514177068903e-05,
"loss": 4.7187,
"step": 164500
},
{
"epoch": 0.18,
"learning_rate": 4.702613004354827e-05,
"loss": 4.7055,
"step": 165000
},
{
"epoch": 0.18,
"learning_rate": 4.701711831640751e-05,
"loss": 4.6414,
"step": 165500
},
{
"epoch": 0.18,
"learning_rate": 4.7008106589266745e-05,
"loss": 4.6793,
"step": 166000
},
{
"epoch": 0.18,
"learning_rate": 4.699909486212599e-05,
"loss": 4.7155,
"step": 166500
},
{
"epoch": 0.18,
"learning_rate": 4.699008313498522e-05,
"loss": 4.6599,
"step": 167000
},
{
"epoch": 0.18,
"learning_rate": 4.698107140784446e-05,
"loss": 4.6949,
"step": 167500
},
{
"epoch": 0.18,
"learning_rate": 4.6972059680703695e-05,
"loss": 4.6781,
"step": 168000
},
{
"epoch": 0.18,
"learning_rate": 4.696304795356293e-05,
"loss": 4.6621,
"step": 168500
},
{
"epoch": 0.18,
"learning_rate": 4.695403622642217e-05,
"loss": 4.675,
"step": 169000
},
{
"epoch": 0.18,
"learning_rate": 4.694502449928141e-05,
"loss": 4.6254,
"step": 169500
},
{
"epoch": 0.18,
"learning_rate": 4.6936012772140644e-05,
"loss": 4.7044,
"step": 170000
},
{
"epoch": 0.18,
"learning_rate": 4.692700104499988e-05,
"loss": 4.6353,
"step": 170500
},
{
"epoch": 0.18,
"learning_rate": 4.691798931785912e-05,
"loss": 4.6393,
"step": 171000
},
{
"epoch": 0.19,
"learning_rate": 4.6908977590718356e-05,
"loss": 4.6692,
"step": 171500
},
{
"epoch": 0.19,
"learning_rate": 4.6899965863577594e-05,
"loss": 4.6501,
"step": 172000
},
{
"epoch": 0.19,
"learning_rate": 4.689095413643683e-05,
"loss": 4.6289,
"step": 172500
},
{
"epoch": 0.19,
"learning_rate": 4.688194240929607e-05,
"loss": 4.6656,
"step": 173000
},
{
"epoch": 0.19,
"learning_rate": 4.6872930682155306e-05,
"loss": 4.6542,
"step": 173500
},
{
"epoch": 0.19,
"learning_rate": 4.686391895501454e-05,
"loss": 4.678,
"step": 174000
},
{
"epoch": 0.19,
"learning_rate": 4.685490722787378e-05,
"loss": 4.648,
"step": 174500
},
{
"epoch": 0.19,
"learning_rate": 4.684589550073302e-05,
"loss": 4.6518,
"step": 175000
},
{
"epoch": 0.19,
"learning_rate": 4.6836883773592256e-05,
"loss": 4.7169,
"step": 175500
},
{
"epoch": 0.19,
"learning_rate": 4.682787204645149e-05,
"loss": 4.6243,
"step": 176000
},
{
"epoch": 0.19,
"learning_rate": 4.6818860319310724e-05,
"loss": 4.6988,
"step": 176500
},
{
"epoch": 0.19,
"learning_rate": 4.680984859216996e-05,
"loss": 4.5944,
"step": 177000
},
{
"epoch": 0.19,
"learning_rate": 4.68008368650292e-05,
"loss": 4.7104,
"step": 177500
},
{
"epoch": 0.19,
"learning_rate": 4.679182513788844e-05,
"loss": 4.6633,
"step": 178000
},
{
"epoch": 0.19,
"learning_rate": 4.678281341074768e-05,
"loss": 4.6841,
"step": 178500
},
{
"epoch": 0.19,
"learning_rate": 4.677380168360692e-05,
"loss": 4.6535,
"step": 179000
},
{
"epoch": 0.19,
"learning_rate": 4.676478995646615e-05,
"loss": 4.7139,
"step": 179500
},
{
"epoch": 0.19,
"learning_rate": 4.6755778229325385e-05,
"loss": 4.6433,
"step": 180000
},
{
"epoch": 0.2,
"learning_rate": 4.674676650218462e-05,
"loss": 4.7148,
"step": 180500
},
{
"epoch": 0.2,
"learning_rate": 4.673775477504386e-05,
"loss": 4.6483,
"step": 181000
},
{
"epoch": 0.2,
"learning_rate": 4.67287430479031e-05,
"loss": 4.6044,
"step": 181500
},
{
"epoch": 0.2,
"learning_rate": 4.671973132076234e-05,
"loss": 4.6271,
"step": 182000
},
{
"epoch": 0.2,
"learning_rate": 4.671071959362157e-05,
"loss": 4.6416,
"step": 182500
},
{
"epoch": 0.2,
"learning_rate": 4.670170786648081e-05,
"loss": 4.6732,
"step": 183000
},
{
"epoch": 0.2,
"learning_rate": 4.669269613934005e-05,
"loss": 4.6461,
"step": 183500
},
{
"epoch": 0.2,
"learning_rate": 4.6683684412199285e-05,
"loss": 4.6583,
"step": 184000
},
{
"epoch": 0.2,
"learning_rate": 4.667467268505852e-05,
"loss": 4.6572,
"step": 184500
},
{
"epoch": 0.2,
"learning_rate": 4.666566095791776e-05,
"loss": 4.6394,
"step": 185000
},
{
"epoch": 0.2,
"learning_rate": 4.6656649230777e-05,
"loss": 4.676,
"step": 185500
},
{
"epoch": 0.2,
"learning_rate": 4.6647637503636234e-05,
"loss": 4.6573,
"step": 186000
},
{
"epoch": 0.2,
"learning_rate": 4.663862577649547e-05,
"loss": 4.6528,
"step": 186500
},
{
"epoch": 0.2,
"learning_rate": 4.662961404935471e-05,
"loss": 4.658,
"step": 187000
},
{
"epoch": 0.2,
"learning_rate": 4.6620602322213946e-05,
"loss": 4.6363,
"step": 187500
},
{
"epoch": 0.2,
"learning_rate": 4.6611590595073184e-05,
"loss": 4.6629,
"step": 188000
},
{
"epoch": 0.2,
"learning_rate": 4.660257886793242e-05,
"loss": 4.6319,
"step": 188500
},
{
"epoch": 0.2,
"learning_rate": 4.659356714079166e-05,
"loss": 4.6833,
"step": 189000
},
{
"epoch": 0.2,
"learning_rate": 4.6584555413650896e-05,
"loss": 4.586,
"step": 189500
},
{
"epoch": 0.21,
"learning_rate": 4.657554368651013e-05,
"loss": 4.6757,
"step": 190000
},
{
"epoch": 0.21,
"learning_rate": 4.656653195936937e-05,
"loss": 4.6509,
"step": 190500
},
{
"epoch": 0.21,
"learning_rate": 4.655752023222861e-05,
"loss": 4.6792,
"step": 191000
},
{
"epoch": 0.21,
"learning_rate": 4.6548508505087846e-05,
"loss": 4.6738,
"step": 191500
},
{
"epoch": 0.21,
"learning_rate": 4.6539496777947076e-05,
"loss": 4.6407,
"step": 192000
},
{
"epoch": 0.21,
"learning_rate": 4.6530485050806314e-05,
"loss": 4.6581,
"step": 192500
},
{
"epoch": 0.21,
"learning_rate": 4.652147332366555e-05,
"loss": 4.688,
"step": 193000
},
{
"epoch": 0.21,
"learning_rate": 4.6512461596524795e-05,
"loss": 4.6858,
"step": 193500
},
{
"epoch": 0.21,
"learning_rate": 4.650344986938403e-05,
"loss": 4.6618,
"step": 194000
},
{
"epoch": 0.21,
"learning_rate": 4.649443814224327e-05,
"loss": 4.6565,
"step": 194500
},
{
"epoch": 0.21,
"learning_rate": 4.64854264151025e-05,
"loss": 4.6477,
"step": 195000
},
{
"epoch": 0.21,
"learning_rate": 4.647641468796174e-05,
"loss": 4.6347,
"step": 195500
},
{
"epoch": 0.21,
"learning_rate": 4.6467402960820975e-05,
"loss": 4.6384,
"step": 196000
},
{
"epoch": 0.21,
"learning_rate": 4.645839123368021e-05,
"loss": 4.6041,
"step": 196500
},
{
"epoch": 0.21,
"learning_rate": 4.644937950653945e-05,
"loss": 4.6302,
"step": 197000
},
{
"epoch": 0.21,
"learning_rate": 4.6440367779398694e-05,
"loss": 4.582,
"step": 197500
},
{
"epoch": 0.21,
"learning_rate": 4.6431356052257925e-05,
"loss": 4.6465,
"step": 198000
},
{
"epoch": 0.21,
"learning_rate": 4.642234432511716e-05,
"loss": 4.6427,
"step": 198500
},
{
"epoch": 0.22,
"learning_rate": 4.64133325979764e-05,
"loss": 4.6421,
"step": 199000
},
{
"epoch": 0.22,
"learning_rate": 4.640432087083564e-05,
"loss": 4.6108,
"step": 199500
},
{
"epoch": 0.22,
"learning_rate": 4.6395309143694875e-05,
"loss": 4.6228,
"step": 200000
},
{
"epoch": 0.22,
"learning_rate": 4.638629741655411e-05,
"loss": 4.5645,
"step": 200500
},
{
"epoch": 0.22,
"learning_rate": 4.637728568941335e-05,
"loss": 4.5875,
"step": 201000
},
{
"epoch": 0.22,
"learning_rate": 4.636827396227259e-05,
"loss": 4.6283,
"step": 201500
},
{
"epoch": 0.22,
"learning_rate": 4.6359262235131824e-05,
"loss": 4.6218,
"step": 202000
},
{
"epoch": 0.22,
"learning_rate": 4.635025050799106e-05,
"loss": 4.6801,
"step": 202500
},
{
"epoch": 0.22,
"learning_rate": 4.63412387808503e-05,
"loss": 4.6695,
"step": 203000
},
{
"epoch": 0.22,
"learning_rate": 4.6332227053709536e-05,
"loss": 4.684,
"step": 203500
},
{
"epoch": 0.22,
"learning_rate": 4.6323215326568774e-05,
"loss": 4.5908,
"step": 204000
},
{
"epoch": 0.22,
"learning_rate": 4.6314203599428004e-05,
"loss": 4.6085,
"step": 204500
},
{
"epoch": 0.22,
"learning_rate": 4.630519187228725e-05,
"loss": 4.6316,
"step": 205000
},
{
"epoch": 0.22,
"learning_rate": 4.6296180145146486e-05,
"loss": 4.6607,
"step": 205500
},
{
"epoch": 0.22,
"learning_rate": 4.6287168418005723e-05,
"loss": 4.6351,
"step": 206000
},
{
"epoch": 0.22,
"learning_rate": 4.627815669086496e-05,
"loss": 4.6443,
"step": 206500
},
{
"epoch": 0.22,
"learning_rate": 4.62691449637242e-05,
"loss": 4.6842,
"step": 207000
},
{
"epoch": 0.22,
"learning_rate": 4.626013323658343e-05,
"loss": 4.6173,
"step": 207500
},
{
"epoch": 0.22,
"learning_rate": 4.6251121509442666e-05,
"loss": 4.593,
"step": 208000
},
{
"epoch": 0.23,
"learning_rate": 4.6242109782301904e-05,
"loss": 4.606,
"step": 208500
},
{
"epoch": 0.23,
"learning_rate": 4.623309805516115e-05,
"loss": 4.6188,
"step": 209000
},
{
"epoch": 0.23,
"learning_rate": 4.6224086328020385e-05,
"loss": 4.6307,
"step": 209500
},
{
"epoch": 0.23,
"learning_rate": 4.621507460087962e-05,
"loss": 4.5919,
"step": 210000
},
{
"epoch": 0.23,
"learning_rate": 4.620606287373885e-05,
"loss": 4.6507,
"step": 210500
},
{
"epoch": 0.23,
"learning_rate": 4.619705114659809e-05,
"loss": 4.6382,
"step": 211000
},
{
"epoch": 0.23,
"learning_rate": 4.618803941945733e-05,
"loss": 4.5784,
"step": 211500
},
{
"epoch": 0.23,
"learning_rate": 4.6179027692316565e-05,
"loss": 4.5813,
"step": 212000
},
{
"epoch": 0.23,
"learning_rate": 4.61700159651758e-05,
"loss": 4.6059,
"step": 212500
},
{
"epoch": 0.23,
"learning_rate": 4.616100423803505e-05,
"loss": 4.5996,
"step": 213000
},
{
"epoch": 0.23,
"learning_rate": 4.6151992510894284e-05,
"loss": 4.6524,
"step": 213500
},
{
"epoch": 0.23,
"learning_rate": 4.6142980783753515e-05,
"loss": 4.6452,
"step": 214000
},
{
"epoch": 0.23,
"learning_rate": 4.613396905661275e-05,
"loss": 4.6752,
"step": 214500
},
{
"epoch": 0.23,
"learning_rate": 4.612495732947199e-05,
"loss": 4.5912,
"step": 215000
},
{
"epoch": 0.23,
"learning_rate": 4.611594560233123e-05,
"loss": 4.6646,
"step": 215500
},
{
"epoch": 0.23,
"learning_rate": 4.6106933875190465e-05,
"loss": 4.6234,
"step": 216000
},
{
"epoch": 0.23,
"learning_rate": 4.60979221480497e-05,
"loss": 4.6457,
"step": 216500
},
{
"epoch": 0.23,
"learning_rate": 4.608891042090894e-05,
"loss": 4.6285,
"step": 217000
},
{
"epoch": 0.24,
"learning_rate": 4.607989869376818e-05,
"loss": 4.6047,
"step": 217500
},
{
"epoch": 0.24,
"learning_rate": 4.6070886966627414e-05,
"loss": 4.5877,
"step": 218000
},
{
"epoch": 0.24,
"learning_rate": 4.606187523948665e-05,
"loss": 4.6101,
"step": 218500
},
{
"epoch": 0.24,
"learning_rate": 4.605286351234589e-05,
"loss": 4.6867,
"step": 219000
},
{
"epoch": 0.24,
"learning_rate": 4.6043851785205126e-05,
"loss": 4.6508,
"step": 219500
},
{
"epoch": 0.24,
"learning_rate": 4.603484005806436e-05,
"loss": 4.6099,
"step": 220000
},
{
"epoch": 0.24,
"learning_rate": 4.60258283309236e-05,
"loss": 4.6508,
"step": 220500
},
{
"epoch": 0.24,
"learning_rate": 4.601681660378284e-05,
"loss": 4.6105,
"step": 221000
},
{
"epoch": 0.24,
"learning_rate": 4.6007804876642076e-05,
"loss": 4.6001,
"step": 221500
},
{
"epoch": 0.24,
"learning_rate": 4.5998793149501313e-05,
"loss": 4.6344,
"step": 222000
},
{
"epoch": 0.24,
"learning_rate": 4.598978142236055e-05,
"loss": 4.585,
"step": 222500
},
{
"epoch": 0.24,
"learning_rate": 4.598076969521978e-05,
"loss": 4.5558,
"step": 223000
},
{
"epoch": 0.24,
"learning_rate": 4.597175796807902e-05,
"loss": 4.5825,
"step": 223500
},
{
"epoch": 0.24,
"learning_rate": 4.5962746240938256e-05,
"loss": 4.5569,
"step": 224000
},
{
"epoch": 0.24,
"learning_rate": 4.59537345137975e-05,
"loss": 4.5647,
"step": 224500
},
{
"epoch": 0.24,
"learning_rate": 4.594472278665674e-05,
"loss": 4.5887,
"step": 225000
},
{
"epoch": 0.24,
"learning_rate": 4.5935711059515975e-05,
"loss": 4.5825,
"step": 225500
},
{
"epoch": 0.24,
"learning_rate": 4.5926699332375206e-05,
"loss": 4.5739,
"step": 226000
},
{
"epoch": 0.24,
"learning_rate": 4.591768760523444e-05,
"loss": 4.5726,
"step": 226500
},
{
"epoch": 0.25,
"learning_rate": 4.590867587809368e-05,
"loss": 4.6447,
"step": 227000
},
{
"epoch": 0.25,
"learning_rate": 4.589966415095292e-05,
"loss": 4.5851,
"step": 227500
},
{
"epoch": 0.25,
"learning_rate": 4.5890652423812155e-05,
"loss": 4.5571,
"step": 228000
},
{
"epoch": 0.25,
"learning_rate": 4.58816406966714e-05,
"loss": 4.5877,
"step": 228500
},
{
"epoch": 0.25,
"learning_rate": 4.587262896953064e-05,
"loss": 4.5896,
"step": 229000
},
{
"epoch": 0.25,
"learning_rate": 4.586361724238987e-05,
"loss": 4.591,
"step": 229500
},
{
"epoch": 0.25,
"learning_rate": 4.5854605515249105e-05,
"loss": 4.5587,
"step": 230000
},
{
"epoch": 0.25,
"learning_rate": 4.584559378810834e-05,
"loss": 4.5871,
"step": 230500
},
{
"epoch": 0.25,
"learning_rate": 4.583658206096758e-05,
"loss": 4.6129,
"step": 231000
},
{
"epoch": 0.25,
"learning_rate": 4.582757033382682e-05,
"loss": 4.5838,
"step": 231500
},
{
"epoch": 0.25,
"learning_rate": 4.5818558606686055e-05,
"loss": 4.6555,
"step": 232000
},
{
"epoch": 0.25,
"learning_rate": 4.580954687954529e-05,
"loss": 4.5784,
"step": 232500
},
{
"epoch": 0.25,
"learning_rate": 4.580053515240453e-05,
"loss": 4.5853,
"step": 233000
},
{
"epoch": 0.25,
"learning_rate": 4.579152342526377e-05,
"loss": 4.5536,
"step": 233500
},
{
"epoch": 0.25,
"learning_rate": 4.5782511698123004e-05,
"loss": 4.6067,
"step": 234000
},
{
"epoch": 0.25,
"learning_rate": 4.577349997098224e-05,
"loss": 4.6091,
"step": 234500
},
{
"epoch": 0.25,
"learning_rate": 4.576448824384148e-05,
"loss": 4.5912,
"step": 235000
},
{
"epoch": 0.25,
"learning_rate": 4.575547651670071e-05,
"loss": 4.5887,
"step": 235500
},
{
"epoch": 0.26,
"learning_rate": 4.5746464789559954e-05,
"loss": 4.5748,
"step": 236000
},
{
"epoch": 0.26,
"learning_rate": 4.573745306241919e-05,
"loss": 4.537,
"step": 236500
},
{
"epoch": 0.26,
"learning_rate": 4.572844133527843e-05,
"loss": 4.518,
"step": 237000
},
{
"epoch": 0.26,
"learning_rate": 4.5719429608137666e-05,
"loss": 4.5982,
"step": 237500
},
{
"epoch": 0.26,
"learning_rate": 4.5710417880996904e-05,
"loss": 4.5996,
"step": 238000
},
{
"epoch": 0.26,
"learning_rate": 4.5701406153856134e-05,
"loss": 4.6103,
"step": 238500
},
{
"epoch": 0.26,
"learning_rate": 4.569239442671537e-05,
"loss": 4.5725,
"step": 239000
},
{
"epoch": 0.26,
"learning_rate": 4.568338269957461e-05,
"loss": 4.6039,
"step": 239500
},
{
"epoch": 0.26,
"learning_rate": 4.567437097243385e-05,
"loss": 4.5271,
"step": 240000
},
{
"epoch": 0.26,
"learning_rate": 4.566535924529309e-05,
"loss": 4.6387,
"step": 240500
},
{
"epoch": 0.26,
"learning_rate": 4.565634751815233e-05,
"loss": 4.5238,
"step": 241000
},
{
"epoch": 0.26,
"learning_rate": 4.5647335791011565e-05,
"loss": 4.5608,
"step": 241500
},
{
"epoch": 0.26,
"learning_rate": 4.5638324063870796e-05,
"loss": 4.582,
"step": 242000
},
{
"epoch": 0.26,
"learning_rate": 4.562931233673003e-05,
"loss": 4.5491,
"step": 242500
},
{
"epoch": 0.26,
"learning_rate": 4.562030060958927e-05,
"loss": 4.5778,
"step": 243000
},
{
"epoch": 0.26,
"learning_rate": 4.561128888244851e-05,
"loss": 4.6373,
"step": 243500
},
{
"epoch": 0.26,
"learning_rate": 4.5602277155307746e-05,
"loss": 4.6209,
"step": 244000
},
{
"epoch": 0.26,
"learning_rate": 4.559326542816699e-05,
"loss": 4.5673,
"step": 244500
},
{
"epoch": 0.26,
"learning_rate": 4.558425370102622e-05,
"loss": 4.5685,
"step": 245000
},
{
"epoch": 0.27,
"learning_rate": 4.557524197388546e-05,
"loss": 4.5357,
"step": 245500
},
{
"epoch": 0.27,
"learning_rate": 4.5566230246744695e-05,
"loss": 4.576,
"step": 246000
},
{
"epoch": 0.27,
"learning_rate": 4.555721851960393e-05,
"loss": 4.602,
"step": 246500
},
{
"epoch": 0.27,
"learning_rate": 4.554820679246317e-05,
"loss": 4.4973,
"step": 247000
},
{
"epoch": 0.27,
"learning_rate": 4.553919506532241e-05,
"loss": 4.5782,
"step": 247500
},
{
"epoch": 0.27,
"learning_rate": 4.5530183338181645e-05,
"loss": 4.5825,
"step": 248000
},
{
"epoch": 0.27,
"learning_rate": 4.552117161104088e-05,
"loss": 4.5145,
"step": 248500
},
{
"epoch": 0.27,
"learning_rate": 4.551215988390012e-05,
"loss": 4.5698,
"step": 249000
},
{
"epoch": 0.27,
"learning_rate": 4.550314815675936e-05,
"loss": 4.5806,
"step": 249500
},
{
"epoch": 0.27,
"learning_rate": 4.5494136429618594e-05,
"loss": 4.5052,
"step": 250000
},
{
"epoch": 0.27,
"learning_rate": 4.548512470247783e-05,
"loss": 4.5976,
"step": 250500
},
{
"epoch": 0.27,
"learning_rate": 4.547611297533706e-05,
"loss": 4.5667,
"step": 251000
},
{
"epoch": 0.27,
"learning_rate": 4.5467101248196307e-05,
"loss": 4.5431,
"step": 251500
},
{
"epoch": 0.27,
"learning_rate": 4.5458089521055544e-05,
"loss": 4.5659,
"step": 252000
},
{
"epoch": 0.27,
"learning_rate": 4.544907779391478e-05,
"loss": 4.5484,
"step": 252500
},
{
"epoch": 0.27,
"learning_rate": 4.544006606677402e-05,
"loss": 4.5668,
"step": 253000
},
{
"epoch": 0.27,
"learning_rate": 4.5431054339633256e-05,
"loss": 4.5605,
"step": 253500
},
{
"epoch": 0.27,
"learning_rate": 4.5422042612492494e-05,
"loss": 4.5686,
"step": 254000
},
{
"epoch": 0.28,
"learning_rate": 4.5413030885351724e-05,
"loss": 4.5157,
"step": 254500
},
{
"epoch": 0.28,
"learning_rate": 4.540401915821096e-05,
"loss": 4.5193,
"step": 255000
},
{
"epoch": 0.28,
"learning_rate": 4.53950074310702e-05,
"loss": 4.5781,
"step": 255500
},
{
"epoch": 0.28,
"learning_rate": 4.538599570392944e-05,
"loss": 4.5719,
"step": 256000
},
{
"epoch": 0.28,
"learning_rate": 4.537698397678868e-05,
"loss": 4.5509,
"step": 256500
},
{
"epoch": 0.28,
"learning_rate": 4.536797224964792e-05,
"loss": 4.5617,
"step": 257000
},
{
"epoch": 0.28,
"learning_rate": 4.535896052250715e-05,
"loss": 4.5149,
"step": 257500
},
{
"epoch": 0.28,
"learning_rate": 4.5349948795366386e-05,
"loss": 4.5628,
"step": 258000
},
{
"epoch": 0.28,
"learning_rate": 4.534093706822562e-05,
"loss": 4.5809,
"step": 258500
},
{
"epoch": 0.28,
"learning_rate": 4.533192534108486e-05,
"loss": 4.525,
"step": 259000
},
{
"epoch": 0.28,
"learning_rate": 4.53229136139441e-05,
"loss": 4.5036,
"step": 259500
},
{
"epoch": 0.28,
"learning_rate": 4.531390188680334e-05,
"loss": 4.5399,
"step": 260000
},
{
"epoch": 0.28,
"learning_rate": 4.530489015966257e-05,
"loss": 4.5939,
"step": 260500
},
{
"epoch": 0.28,
"learning_rate": 4.529587843252181e-05,
"loss": 4.605,
"step": 261000
},
{
"epoch": 0.28,
"learning_rate": 4.528686670538105e-05,
"loss": 4.4758,
"step": 261500
},
{
"epoch": 0.28,
"learning_rate": 4.5277854978240285e-05,
"loss": 4.5757,
"step": 262000
},
{
"epoch": 0.28,
"learning_rate": 4.526884325109952e-05,
"loss": 4.5944,
"step": 262500
},
{
"epoch": 0.28,
"learning_rate": 4.525983152395876e-05,
"loss": 4.5485,
"step": 263000
},
{
"epoch": 0.28,
"learning_rate": 4.5250819796818e-05,
"loss": 4.6034,
"step": 263500
},
{
"epoch": 0.29,
"learning_rate": 4.5241808069677235e-05,
"loss": 4.5887,
"step": 264000
},
{
"epoch": 0.29,
"learning_rate": 4.523279634253647e-05,
"loss": 4.5265,
"step": 264500
},
{
"epoch": 0.29,
"learning_rate": 4.522378461539571e-05,
"loss": 4.5177,
"step": 265000
},
{
"epoch": 0.29,
"learning_rate": 4.521477288825495e-05,
"loss": 4.6046,
"step": 265500
},
{
"epoch": 0.29,
"learning_rate": 4.5205761161114184e-05,
"loss": 4.5481,
"step": 266000
},
{
"epoch": 0.29,
"learning_rate": 4.519674943397342e-05,
"loss": 4.5171,
"step": 266500
},
{
"epoch": 0.29,
"learning_rate": 4.518773770683265e-05,
"loss": 4.522,
"step": 267000
},
{
"epoch": 0.29,
"learning_rate": 4.5178725979691897e-05,
"loss": 4.5399,
"step": 267500
},
{
"epoch": 0.29,
"learning_rate": 4.5169714252551134e-05,
"loss": 4.5763,
"step": 268000
},
{
"epoch": 0.29,
"learning_rate": 4.516070252541037e-05,
"loss": 4.5583,
"step": 268500
},
{
"epoch": 0.29,
"learning_rate": 4.515169079826961e-05,
"loss": 4.569,
"step": 269000
},
{
"epoch": 0.29,
"learning_rate": 4.5142679071128846e-05,
"loss": 4.5812,
"step": 269500
},
{
"epoch": 0.29,
"learning_rate": 4.513366734398808e-05,
"loss": 4.5136,
"step": 270000
},
{
"epoch": 0.29,
"learning_rate": 4.5124655616847314e-05,
"loss": 4.525,
"step": 270500
},
{
"epoch": 0.29,
"learning_rate": 4.511564388970655e-05,
"loss": 4.4892,
"step": 271000
},
{
"epoch": 0.29,
"learning_rate": 4.5106632162565796e-05,
"loss": 4.4955,
"step": 271500
},
{
"epoch": 0.29,
"learning_rate": 4.509762043542503e-05,
"loss": 4.5783,
"step": 272000
},
{
"epoch": 0.29,
"learning_rate": 4.508860870828427e-05,
"loss": 4.5685,
"step": 272500
},
{
"epoch": 0.3,
"learning_rate": 4.50795969811435e-05,
"loss": 4.5577,
"step": 273000
},
{
"epoch": 0.3,
"learning_rate": 4.507058525400274e-05,
"loss": 4.6029,
"step": 273500
},
{
"epoch": 0.3,
"learning_rate": 4.5061573526861976e-05,
"loss": 4.5451,
"step": 274000
},
{
"epoch": 0.3,
"learning_rate": 4.5052561799721213e-05,
"loss": 4.5816,
"step": 274500
},
{
"epoch": 0.3,
"learning_rate": 4.504355007258045e-05,
"loss": 4.4644,
"step": 275000
},
{
"epoch": 0.3,
"learning_rate": 4.5034538345439695e-05,
"loss": 4.5545,
"step": 275500
},
{
"epoch": 0.3,
"learning_rate": 4.5025526618298926e-05,
"loss": 4.5686,
"step": 276000
},
{
"epoch": 0.3,
"learning_rate": 4.501651489115816e-05,
"loss": 4.598,
"step": 276500
},
{
"epoch": 0.3,
"learning_rate": 4.50075031640174e-05,
"loss": 4.5273,
"step": 277000
},
{
"epoch": 0.3,
"learning_rate": 4.499849143687664e-05,
"loss": 4.498,
"step": 277500
},
{
"epoch": 0.3,
"learning_rate": 4.4989479709735875e-05,
"loss": 4.5226,
"step": 278000
},
{
"epoch": 0.3,
"learning_rate": 4.498046798259511e-05,
"loss": 4.5453,
"step": 278500
},
{
"epoch": 0.3,
"learning_rate": 4.497145625545435e-05,
"loss": 4.5878,
"step": 279000
},
{
"epoch": 0.3,
"learning_rate": 4.496244452831359e-05,
"loss": 4.4889,
"step": 279500
},
{
"epoch": 0.3,
"learning_rate": 4.4953432801172825e-05,
"loss": 4.531,
"step": 280000
},
{
"epoch": 0.3,
"learning_rate": 4.494442107403206e-05,
"loss": 4.5446,
"step": 280500
},
{
"epoch": 0.3,
"learning_rate": 4.49354093468913e-05,
"loss": 4.5806,
"step": 281000
},
{
"epoch": 0.3,
"learning_rate": 4.492639761975054e-05,
"loss": 4.5838,
"step": 281500
},
{
"epoch": 0.3,
"learning_rate": 4.4917385892609774e-05,
"loss": 4.5335,
"step": 282000
},
{
"epoch": 0.31,
"learning_rate": 4.4908374165469005e-05,
"loss": 4.4475,
"step": 282500
},
{
"epoch": 0.31,
"learning_rate": 4.489936243832825e-05,
"loss": 4.4819,
"step": 283000
},
{
"epoch": 0.31,
"learning_rate": 4.489035071118749e-05,
"loss": 4.4793,
"step": 283500
},
{
"epoch": 0.31,
"learning_rate": 4.4881338984046724e-05,
"loss": 4.5127,
"step": 284000
},
{
"epoch": 0.31,
"learning_rate": 4.487232725690596e-05,
"loss": 4.5424,
"step": 284500
},
{
"epoch": 0.31,
"learning_rate": 4.48633155297652e-05,
"loss": 4.4961,
"step": 285000
},
{
"epoch": 0.31,
"learning_rate": 4.485430380262443e-05,
"loss": 4.5096,
"step": 285500
},
{
"epoch": 0.31,
"learning_rate": 4.484529207548367e-05,
"loss": 4.5307,
"step": 286000
},
{
"epoch": 0.31,
"learning_rate": 4.4836280348342904e-05,
"loss": 4.5627,
"step": 286500
},
{
"epoch": 0.31,
"learning_rate": 4.482726862120215e-05,
"loss": 4.526,
"step": 287000
},
{
"epoch": 0.31,
"learning_rate": 4.4818256894061386e-05,
"loss": 4.5265,
"step": 287500
},
{
"epoch": 0.31,
"learning_rate": 4.480924516692062e-05,
"loss": 4.5695,
"step": 288000
},
{
"epoch": 0.31,
"learning_rate": 4.4800233439779854e-05,
"loss": 4.5962,
"step": 288500
},
{
"epoch": 0.31,
"learning_rate": 4.479122171263909e-05,
"loss": 4.5317,
"step": 289000
},
{
"epoch": 0.31,
"learning_rate": 4.478220998549833e-05,
"loss": 4.5309,
"step": 289500
},
{
"epoch": 0.31,
"learning_rate": 4.4773198258357566e-05,
"loss": 4.5631,
"step": 290000
},
{
"epoch": 0.31,
"learning_rate": 4.4764186531216803e-05,
"loss": 4.5116,
"step": 290500
},
{
"epoch": 0.31,
"learning_rate": 4.475517480407605e-05,
"loss": 4.5155,
"step": 291000
},
{
"epoch": 0.32,
"learning_rate": 4.474616307693528e-05,
"loss": 4.5214,
"step": 291500
},
{
"epoch": 0.32,
"learning_rate": 4.4737151349794516e-05,
"loss": 4.5764,
"step": 292000
},
{
"epoch": 0.32,
"learning_rate": 4.472813962265375e-05,
"loss": 4.5458,
"step": 292500
},
{
"epoch": 0.32,
"learning_rate": 4.471912789551299e-05,
"loss": 4.5426,
"step": 293000
},
{
"epoch": 0.32,
"learning_rate": 4.471011616837223e-05,
"loss": 4.5371,
"step": 293500
},
{
"epoch": 0.32,
"learning_rate": 4.4701104441231465e-05,
"loss": 4.5577,
"step": 294000
},
{
"epoch": 0.32,
"learning_rate": 4.46920927140907e-05,
"loss": 4.4803,
"step": 294500
},
{
"epoch": 0.32,
"learning_rate": 4.468308098694994e-05,
"loss": 4.5597,
"step": 295000
},
{
"epoch": 0.32,
"learning_rate": 4.467406925980918e-05,
"loss": 4.5193,
"step": 295500
},
{
"epoch": 0.32,
"learning_rate": 4.4665057532668415e-05,
"loss": 4.4773,
"step": 296000
},
{
"epoch": 0.32,
"learning_rate": 4.465604580552765e-05,
"loss": 4.5625,
"step": 296500
},
{
"epoch": 0.32,
"learning_rate": 4.464703407838689e-05,
"loss": 4.5206,
"step": 297000
},
{
"epoch": 0.32,
"learning_rate": 4.463802235124613e-05,
"loss": 4.49,
"step": 297500
},
{
"epoch": 0.32,
"learning_rate": 4.462901062410536e-05,
"loss": 4.511,
"step": 298000
},
{
"epoch": 0.32,
"learning_rate": 4.46199988969646e-05,
"loss": 4.4423,
"step": 298500
},
{
"epoch": 0.32,
"learning_rate": 4.461098716982384e-05,
"loss": 4.5147,
"step": 299000
},
{
"epoch": 0.32,
"learning_rate": 4.460197544268308e-05,
"loss": 4.5474,
"step": 299500
},
{
"epoch": 0.32,
"learning_rate": 4.4592963715542314e-05,
"loss": 4.5493,
"step": 300000
},
{
"epoch": 0.32,
"learning_rate": 4.458395198840155e-05,
"loss": 4.49,
"step": 300500
},
{
"epoch": 0.33,
"learning_rate": 4.457494026126078e-05,
"loss": 4.5583,
"step": 301000
},
{
"epoch": 0.33,
"learning_rate": 4.456592853412002e-05,
"loss": 4.5288,
"step": 301500
},
{
"epoch": 0.33,
"learning_rate": 4.455691680697926e-05,
"loss": 4.5368,
"step": 302000
},
{
"epoch": 0.33,
"learning_rate": 4.45479050798385e-05,
"loss": 4.5171,
"step": 302500
},
{
"epoch": 0.33,
"learning_rate": 4.453889335269774e-05,
"loss": 4.4643,
"step": 303000
},
{
"epoch": 0.33,
"learning_rate": 4.4529881625556976e-05,
"loss": 4.4714,
"step": 303500
},
{
"epoch": 0.33,
"learning_rate": 4.4520869898416207e-05,
"loss": 4.5301,
"step": 304000
},
{
"epoch": 0.33,
"learning_rate": 4.4511858171275444e-05,
"loss": 4.4925,
"step": 304500
},
{
"epoch": 0.33,
"learning_rate": 4.450284644413468e-05,
"loss": 4.534,
"step": 305000
},
{
"epoch": 0.33,
"learning_rate": 4.449383471699392e-05,
"loss": 4.5594,
"step": 305500
},
{
"epoch": 0.33,
"learning_rate": 4.4484822989853156e-05,
"loss": 4.5079,
"step": 306000
},
{
"epoch": 0.33,
"learning_rate": 4.4475811262712394e-05,
"loss": 4.5475,
"step": 306500
},
{
"epoch": 0.33,
"learning_rate": 4.446679953557164e-05,
"loss": 4.4948,
"step": 307000
},
{
"epoch": 0.33,
"learning_rate": 4.445778780843087e-05,
"loss": 4.5257,
"step": 307500
},
{
"epoch": 0.33,
"learning_rate": 4.4448776081290106e-05,
"loss": 4.4913,
"step": 308000
},
{
"epoch": 0.33,
"learning_rate": 4.443976435414934e-05,
"loss": 4.5662,
"step": 308500
},
{
"epoch": 0.33,
"learning_rate": 4.443075262700858e-05,
"loss": 4.5267,
"step": 309000
},
{
"epoch": 0.33,
"learning_rate": 4.442174089986782e-05,
"loss": 4.4999,
"step": 309500
},
{
"epoch": 0.34,
"learning_rate": 4.4412729172727055e-05,
"loss": 4.5111,
"step": 310000
},
{
"epoch": 0.34,
"learning_rate": 4.440371744558629e-05,
"loss": 4.5107,
"step": 310500
},
{
"epoch": 0.34,
"learning_rate": 4.439470571844553e-05,
"loss": 4.5682,
"step": 311000
},
{
"epoch": 0.34,
"learning_rate": 4.438569399130477e-05,
"loss": 4.5016,
"step": 311500
},
{
"epoch": 0.34,
"learning_rate": 4.4376682264164005e-05,
"loss": 4.4973,
"step": 312000
},
{
"epoch": 0.34,
"learning_rate": 4.436767053702324e-05,
"loss": 4.5022,
"step": 312500
},
{
"epoch": 0.34,
"learning_rate": 4.435865880988248e-05,
"loss": 4.5441,
"step": 313000
},
{
"epoch": 0.34,
"learning_rate": 4.434964708274171e-05,
"loss": 4.5459,
"step": 313500
},
{
"epoch": 0.34,
"learning_rate": 4.4340635355600955e-05,
"loss": 4.5141,
"step": 314000
},
{
"epoch": 0.34,
"learning_rate": 4.433162362846019e-05,
"loss": 4.5329,
"step": 314500
},
{
"epoch": 0.34,
"learning_rate": 4.432261190131943e-05,
"loss": 4.5172,
"step": 315000
},
{
"epoch": 0.34,
"learning_rate": 4.431360017417867e-05,
"loss": 4.5264,
"step": 315500
},
{
"epoch": 0.34,
"learning_rate": 4.4304588447037904e-05,
"loss": 4.5352,
"step": 316000
},
{
"epoch": 0.34,
"learning_rate": 4.4295576719897135e-05,
"loss": 4.5287,
"step": 316500
},
{
"epoch": 0.34,
"learning_rate": 4.428656499275637e-05,
"loss": 4.5314,
"step": 317000
},
{
"epoch": 0.34,
"learning_rate": 4.427755326561561e-05,
"loss": 4.5219,
"step": 317500
},
{
"epoch": 0.34,
"learning_rate": 4.426854153847485e-05,
"loss": 4.5136,
"step": 318000
},
{
"epoch": 0.34,
"learning_rate": 4.425952981133409e-05,
"loss": 4.483,
"step": 318500
},
{
"epoch": 0.34,
"learning_rate": 4.425051808419333e-05,
"loss": 4.4569,
"step": 319000
},
{
"epoch": 0.35,
"learning_rate": 4.4241506357052566e-05,
"loss": 4.522,
"step": 319500
},
{
"epoch": 0.35,
"learning_rate": 4.4232494629911797e-05,
"loss": 4.4795,
"step": 320000
},
{
"epoch": 0.35,
"learning_rate": 4.4223482902771034e-05,
"loss": 4.5522,
"step": 320500
},
{
"epoch": 0.35,
"learning_rate": 4.421447117563027e-05,
"loss": 4.49,
"step": 321000
},
{
"epoch": 0.35,
"learning_rate": 4.420545944848951e-05,
"loss": 4.5291,
"step": 321500
},
{
"epoch": 0.35,
"learning_rate": 4.4196447721348746e-05,
"loss": 4.5003,
"step": 322000
},
{
"epoch": 0.35,
"learning_rate": 4.418743599420799e-05,
"loss": 4.4768,
"step": 322500
},
{
"epoch": 0.35,
"learning_rate": 4.417842426706722e-05,
"loss": 4.498,
"step": 323000
},
{
"epoch": 0.35,
"learning_rate": 4.416941253992646e-05,
"loss": 4.4942,
"step": 323500
},
{
"epoch": 0.35,
"learning_rate": 4.4160400812785696e-05,
"loss": 4.5224,
"step": 324000
},
{
"epoch": 0.35,
"learning_rate": 4.415138908564493e-05,
"loss": 4.4901,
"step": 324500
},
{
"epoch": 0.35,
"learning_rate": 4.414237735850417e-05,
"loss": 4.4871,
"step": 325000
},
{
"epoch": 0.35,
"learning_rate": 4.413336563136341e-05,
"loss": 4.4594,
"step": 325500
},
{
"epoch": 0.35,
"learning_rate": 4.4124353904222645e-05,
"loss": 4.515,
"step": 326000
},
{
"epoch": 0.35,
"learning_rate": 4.411534217708188e-05,
"loss": 4.4832,
"step": 326500
},
{
"epoch": 0.35,
"learning_rate": 4.410633044994112e-05,
"loss": 4.5075,
"step": 327000
},
{
"epoch": 0.35,
"learning_rate": 4.409731872280036e-05,
"loss": 4.4815,
"step": 327500
},
{
"epoch": 0.35,
"learning_rate": 4.4088306995659595e-05,
"loss": 4.5843,
"step": 328000
},
{
"epoch": 0.36,
"learning_rate": 4.407929526851883e-05,
"loss": 4.4808,
"step": 328500
},
{
"epoch": 0.36,
"learning_rate": 4.407028354137806e-05,
"loss": 4.5299,
"step": 329000
},
{
"epoch": 0.36,
"learning_rate": 4.40612718142373e-05,
"loss": 4.4625,
"step": 329500
},
{
"epoch": 0.36,
"learning_rate": 4.4052260087096545e-05,
"loss": 4.5026,
"step": 330000
},
{
"epoch": 0.36,
"learning_rate": 4.404324835995578e-05,
"loss": 4.4895,
"step": 330500
},
{
"epoch": 0.36,
"learning_rate": 4.403423663281502e-05,
"loss": 4.5502,
"step": 331000
},
{
"epoch": 0.36,
"learning_rate": 4.402522490567426e-05,
"loss": 4.4647,
"step": 331500
},
{
"epoch": 0.36,
"learning_rate": 4.401621317853349e-05,
"loss": 4.4907,
"step": 332000
},
{
"epoch": 0.36,
"learning_rate": 4.4007201451392725e-05,
"loss": 4.4623,
"step": 332500
},
{
"epoch": 0.36,
"learning_rate": 4.399818972425196e-05,
"loss": 4.5451,
"step": 333000
},
{
"epoch": 0.36,
"learning_rate": 4.39891779971112e-05,
"loss": 4.5007,
"step": 333500
},
{
"epoch": 0.36,
"learning_rate": 4.3980166269970444e-05,
"loss": 4.507,
"step": 334000
},
{
"epoch": 0.36,
"learning_rate": 4.397115454282968e-05,
"loss": 4.4595,
"step": 334500
},
{
"epoch": 0.36,
"learning_rate": 4.396214281568892e-05,
"loss": 4.5234,
"step": 335000
},
{
"epoch": 0.36,
"learning_rate": 4.395313108854815e-05,
"loss": 4.4674,
"step": 335500
},
{
"epoch": 0.36,
"learning_rate": 4.394411936140739e-05,
"loss": 4.4806,
"step": 336000
},
{
"epoch": 0.36,
"learning_rate": 4.3935107634266624e-05,
"loss": 4.4845,
"step": 336500
},
{
"epoch": 0.36,
"learning_rate": 4.392609590712586e-05,
"loss": 4.5202,
"step": 337000
},
{
"epoch": 0.36,
"learning_rate": 4.39170841799851e-05,
"loss": 4.5301,
"step": 337500
},
{
"epoch": 0.37,
"learning_rate": 4.390807245284434e-05,
"loss": 4.5139,
"step": 338000
},
{
"epoch": 0.37,
"learning_rate": 4.3899060725703574e-05,
"loss": 4.4715,
"step": 338500
},
{
"epoch": 0.37,
"learning_rate": 4.389004899856281e-05,
"loss": 4.4752,
"step": 339000
},
{
"epoch": 0.37,
"learning_rate": 4.388103727142205e-05,
"loss": 4.4945,
"step": 339500
},
{
"epoch": 0.37,
"learning_rate": 4.3872025544281286e-05,
"loss": 4.4648,
"step": 340000
},
{
"epoch": 0.37,
"learning_rate": 4.386301381714052e-05,
"loss": 4.5011,
"step": 340500
},
{
"epoch": 0.37,
"learning_rate": 4.385400208999976e-05,
"loss": 4.527,
"step": 341000
},
{
"epoch": 0.37,
"learning_rate": 4.3844990362859e-05,
"loss": 4.5182,
"step": 341500
},
{
"epoch": 0.37,
"learning_rate": 4.3835978635718235e-05,
"loss": 4.4455,
"step": 342000
},
{
"epoch": 0.37,
"learning_rate": 4.382696690857747e-05,
"loss": 4.4408,
"step": 342500
},
{
"epoch": 0.37,
"learning_rate": 4.381795518143671e-05,
"loss": 4.4726,
"step": 343000
},
{
"epoch": 0.37,
"learning_rate": 4.380894345429595e-05,
"loss": 4.4912,
"step": 343500
},
{
"epoch": 0.37,
"learning_rate": 4.3799931727155185e-05,
"loss": 4.4715,
"step": 344000
},
{
"epoch": 0.37,
"learning_rate": 4.3790920000014416e-05,
"loss": 4.4808,
"step": 344500
},
{
"epoch": 0.37,
"learning_rate": 4.378190827287365e-05,
"loss": 4.469,
"step": 345000
},
{
"epoch": 0.37,
"learning_rate": 4.37728965457329e-05,
"loss": 4.5089,
"step": 345500
},
{
"epoch": 0.37,
"learning_rate": 4.3763884818592135e-05,
"loss": 4.4609,
"step": 346000
},
{
"epoch": 0.37,
"learning_rate": 4.375487309145137e-05,
"loss": 4.5222,
"step": 346500
},
{
"epoch": 0.38,
"learning_rate": 4.374586136431061e-05,
"loss": 4.4459,
"step": 347000
},
{
"epoch": 0.38,
"learning_rate": 4.373684963716985e-05,
"loss": 4.5242,
"step": 347500
},
{
"epoch": 0.38,
"learning_rate": 4.372783791002908e-05,
"loss": 4.4754,
"step": 348000
},
{
"epoch": 0.38,
"learning_rate": 4.3718826182888315e-05,
"loss": 4.4723,
"step": 348500
},
{
"epoch": 0.38,
"learning_rate": 4.370981445574755e-05,
"loss": 4.4723,
"step": 349000
},
{
"epoch": 0.38,
"learning_rate": 4.3700802728606796e-05,
"loss": 4.489,
"step": 349500
},
{
"epoch": 0.38,
"learning_rate": 4.3691791001466034e-05,
"loss": 4.4956,
"step": 350000
},
{
"epoch": 0.38,
"learning_rate": 4.368277927432527e-05,
"loss": 4.5123,
"step": 350500
},
{
"epoch": 0.38,
"learning_rate": 4.36737675471845e-05,
"loss": 4.5453,
"step": 351000
},
{
"epoch": 0.38,
"learning_rate": 4.366475582004374e-05,
"loss": 4.4737,
"step": 351500
},
{
"epoch": 0.38,
"learning_rate": 4.365574409290298e-05,
"loss": 4.4692,
"step": 352000
},
{
"epoch": 0.38,
"learning_rate": 4.3646732365762214e-05,
"loss": 4.4465,
"step": 352500
},
{
"epoch": 0.38,
"learning_rate": 4.363772063862145e-05,
"loss": 4.4285,
"step": 353000
},
{
"epoch": 0.38,
"learning_rate": 4.3628708911480696e-05,
"loss": 4.5237,
"step": 353500
},
{
"epoch": 0.38,
"learning_rate": 4.3619697184339926e-05,
"loss": 4.5195,
"step": 354000
},
{
"epoch": 0.38,
"learning_rate": 4.3610685457199164e-05,
"loss": 4.492,
"step": 354500
},
{
"epoch": 0.38,
"learning_rate": 4.36016737300584e-05,
"loss": 4.485,
"step": 355000
},
{
"epoch": 0.38,
"learning_rate": 4.359266200291764e-05,
"loss": 4.4856,
"step": 355500
},
{
"epoch": 0.38,
"learning_rate": 4.3583650275776876e-05,
"loss": 4.5072,
"step": 356000
},
{
"epoch": 0.39,
"learning_rate": 4.357463854863611e-05,
"loss": 4.4757,
"step": 356500
},
{
"epoch": 0.39,
"learning_rate": 4.356562682149535e-05,
"loss": 4.4942,
"step": 357000
},
{
"epoch": 0.39,
"learning_rate": 4.355661509435459e-05,
"loss": 4.4687,
"step": 357500
},
{
"epoch": 0.39,
"learning_rate": 4.3547603367213825e-05,
"loss": 4.4391,
"step": 358000
},
{
"epoch": 0.39,
"learning_rate": 4.353859164007306e-05,
"loss": 4.4766,
"step": 358500
},
{
"epoch": 0.39,
"learning_rate": 4.35295799129323e-05,
"loss": 4.5417,
"step": 359000
},
{
"epoch": 0.39,
"learning_rate": 4.352056818579154e-05,
"loss": 4.4657,
"step": 359500
},
{
"epoch": 0.39,
"learning_rate": 4.3511556458650775e-05,
"loss": 4.5017,
"step": 360000
},
{
"epoch": 0.39,
"learning_rate": 4.3502544731510006e-05,
"loss": 4.4788,
"step": 360500
},
{
"epoch": 0.39,
"learning_rate": 4.349353300436925e-05,
"loss": 4.4984,
"step": 361000
},
{
"epoch": 0.39,
"learning_rate": 4.348452127722849e-05,
"loss": 4.543,
"step": 361500
},
{
"epoch": 0.39,
"learning_rate": 4.3475509550087725e-05,
"loss": 4.4832,
"step": 362000
},
{
"epoch": 0.39,
"learning_rate": 4.346649782294696e-05,
"loss": 4.5137,
"step": 362500
},
{
"epoch": 0.39,
"learning_rate": 4.34574860958062e-05,
"loss": 4.4928,
"step": 363000
},
{
"epoch": 0.39,
"learning_rate": 4.344847436866543e-05,
"loss": 4.4666,
"step": 363500
},
{
"epoch": 0.39,
"learning_rate": 4.343946264152467e-05,
"loss": 4.4647,
"step": 364000
},
{
"epoch": 0.39,
"learning_rate": 4.3430450914383905e-05,
"loss": 4.4378,
"step": 364500
},
{
"epoch": 0.39,
"learning_rate": 4.342143918724315e-05,
"loss": 4.4485,
"step": 365000
},
{
"epoch": 0.4,
"learning_rate": 4.3412427460102386e-05,
"loss": 4.4648,
"step": 365500
},
{
"epoch": 0.4,
"learning_rate": 4.3403415732961624e-05,
"loss": 4.4384,
"step": 366000
},
{
"epoch": 0.4,
"learning_rate": 4.3394404005820855e-05,
"loss": 4.4319,
"step": 366500
},
{
"epoch": 0.4,
"learning_rate": 4.338539227868009e-05,
"loss": 4.4791,
"step": 367000
},
{
"epoch": 0.4,
"learning_rate": 4.337638055153933e-05,
"loss": 4.5161,
"step": 367500
},
{
"epoch": 0.4,
"learning_rate": 4.336736882439857e-05,
"loss": 4.4157,
"step": 368000
},
{
"epoch": 0.4,
"learning_rate": 4.3358357097257804e-05,
"loss": 4.4717,
"step": 368500
},
{
"epoch": 0.4,
"learning_rate": 4.334934537011704e-05,
"loss": 4.479,
"step": 369000
},
{
"epoch": 0.4,
"learning_rate": 4.334033364297628e-05,
"loss": 4.4731,
"step": 369500
},
{
"epoch": 0.4,
"learning_rate": 4.3331321915835516e-05,
"loss": 4.4899,
"step": 370000
},
{
"epoch": 0.4,
"learning_rate": 4.3322310188694754e-05,
"loss": 4.502,
"step": 370500
},
{
"epoch": 0.4,
"learning_rate": 4.331329846155399e-05,
"loss": 4.4384,
"step": 371000
},
{
"epoch": 0.4,
"learning_rate": 4.330428673441323e-05,
"loss": 4.4643,
"step": 371500
},
{
"epoch": 0.4,
"learning_rate": 4.3295275007272466e-05,
"loss": 4.4491,
"step": 372000
},
{
"epoch": 0.4,
"learning_rate": 4.32862632801317e-05,
"loss": 4.409,
"step": 372500
},
{
"epoch": 0.4,
"learning_rate": 4.327725155299094e-05,
"loss": 4.4673,
"step": 373000
},
{
"epoch": 0.4,
"learning_rate": 4.326823982585018e-05,
"loss": 4.4603,
"step": 373500
},
{
"epoch": 0.4,
"learning_rate": 4.3259228098709416e-05,
"loss": 4.4743,
"step": 374000
},
{
"epoch": 0.4,
"learning_rate": 4.325021637156865e-05,
"loss": 4.4634,
"step": 374500
},
{
"epoch": 0.41,
"learning_rate": 4.324120464442789e-05,
"loss": 4.4576,
"step": 375000
},
{
"epoch": 0.41,
"learning_rate": 4.323219291728713e-05,
"loss": 4.484,
"step": 375500
},
{
"epoch": 0.41,
"learning_rate": 4.322318119014636e-05,
"loss": 4.431,
"step": 376000
},
{
"epoch": 0.41,
"learning_rate": 4.32141694630056e-05,
"loss": 4.5062,
"step": 376500
},
{
"epoch": 0.41,
"learning_rate": 4.320515773586484e-05,
"loss": 4.4464,
"step": 377000
},
{
"epoch": 0.41,
"learning_rate": 4.319614600872408e-05,
"loss": 4.4734,
"step": 377500
},
{
"epoch": 0.41,
"learning_rate": 4.3187134281583315e-05,
"loss": 4.4615,
"step": 378000
},
{
"epoch": 0.41,
"learning_rate": 4.317812255444255e-05,
"loss": 4.4707,
"step": 378500
},
{
"epoch": 0.41,
"learning_rate": 4.316911082730178e-05,
"loss": 4.5084,
"step": 379000
},
{
"epoch": 0.41,
"learning_rate": 4.316009910016102e-05,
"loss": 4.4335,
"step": 379500
},
{
"epoch": 0.41,
"learning_rate": 4.315108737302026e-05,
"loss": 4.5115,
"step": 380000
},
{
"epoch": 0.41,
"learning_rate": 4.31420756458795e-05,
"loss": 4.495,
"step": 380500
},
{
"epoch": 0.41,
"learning_rate": 4.313306391873874e-05,
"loss": 4.5261,
"step": 381000
},
{
"epoch": 0.41,
"learning_rate": 4.3124052191597977e-05,
"loss": 4.5579,
"step": 381500
},
{
"epoch": 0.41,
"learning_rate": 4.311504046445721e-05,
"loss": 4.4861,
"step": 382000
},
{
"epoch": 0.41,
"learning_rate": 4.3106028737316445e-05,
"loss": 4.4294,
"step": 382500
},
{
"epoch": 0.41,
"learning_rate": 4.309701701017568e-05,
"loss": 4.4975,
"step": 383000
},
{
"epoch": 0.41,
"learning_rate": 4.308800528303492e-05,
"loss": 4.4527,
"step": 383500
},
{
"epoch": 0.42,
"learning_rate": 4.307899355589416e-05,
"loss": 4.4521,
"step": 384000
},
{
"epoch": 0.42,
"learning_rate": 4.3069981828753394e-05,
"loss": 4.5024,
"step": 384500
},
{
"epoch": 0.42,
"learning_rate": 4.306097010161263e-05,
"loss": 4.4181,
"step": 385000
},
{
"epoch": 0.42,
"learning_rate": 4.305195837447187e-05,
"loss": 4.4786,
"step": 385500
},
{
"epoch": 0.42,
"learning_rate": 4.3042946647331106e-05,
"loss": 4.4245,
"step": 386000
},
{
"epoch": 0.42,
"learning_rate": 4.3033934920190344e-05,
"loss": 4.4878,
"step": 386500
},
{
"epoch": 0.42,
"learning_rate": 4.302492319304958e-05,
"loss": 4.4427,
"step": 387000
},
{
"epoch": 0.42,
"learning_rate": 4.301591146590882e-05,
"loss": 4.3975,
"step": 387500
},
{
"epoch": 0.42,
"learning_rate": 4.3006899738768056e-05,
"loss": 4.4566,
"step": 388000
},
{
"epoch": 0.42,
"learning_rate": 4.299788801162729e-05,
"loss": 4.4441,
"step": 388500
},
{
"epoch": 0.42,
"learning_rate": 4.298887628448653e-05,
"loss": 4.4606,
"step": 389000
},
{
"epoch": 0.42,
"learning_rate": 4.297986455734577e-05,
"loss": 4.5232,
"step": 389500
},
{
"epoch": 0.42,
"learning_rate": 4.2970852830205006e-05,
"loss": 4.464,
"step": 390000
},
{
"epoch": 0.42,
"learning_rate": 4.296184110306424e-05,
"loss": 4.3564,
"step": 390500
},
{
"epoch": 0.42,
"learning_rate": 4.295282937592348e-05,
"loss": 4.4523,
"step": 391000
},
{
"epoch": 0.42,
"learning_rate": 4.294381764878271e-05,
"loss": 4.4288,
"step": 391500
},
{
"epoch": 0.42,
"learning_rate": 4.2934805921641955e-05,
"loss": 4.3775,
"step": 392000
},
{
"epoch": 0.42,
"learning_rate": 4.292579419450119e-05,
"loss": 4.4579,
"step": 392500
},
{
"epoch": 0.42,
"learning_rate": 4.291678246736043e-05,
"loss": 4.4408,
"step": 393000
},
{
"epoch": 0.43,
"learning_rate": 4.290777074021967e-05,
"loss": 4.4396,
"step": 393500
},
{
"epoch": 0.43,
"learning_rate": 4.2898759013078905e-05,
"loss": 4.4438,
"step": 394000
},
{
"epoch": 0.43,
"learning_rate": 4.2889747285938135e-05,
"loss": 4.46,
"step": 394500
},
{
"epoch": 0.43,
"learning_rate": 4.288073555879737e-05,
"loss": 4.522,
"step": 395000
},
{
"epoch": 0.43,
"learning_rate": 4.287172383165661e-05,
"loss": 4.471,
"step": 395500
},
{
"epoch": 0.43,
"learning_rate": 4.286271210451585e-05,
"loss": 4.3984,
"step": 396000
},
{
"epoch": 0.43,
"learning_rate": 4.285370037737509e-05,
"loss": 4.4826,
"step": 396500
},
{
"epoch": 0.43,
"learning_rate": 4.284468865023433e-05,
"loss": 4.4343,
"step": 397000
},
{
"epoch": 0.43,
"learning_rate": 4.283567692309356e-05,
"loss": 4.4479,
"step": 397500
},
{
"epoch": 0.43,
"learning_rate": 4.28266651959528e-05,
"loss": 4.4768,
"step": 398000
},
{
"epoch": 0.43,
"learning_rate": 4.2817653468812035e-05,
"loss": 4.4508,
"step": 398500
},
{
"epoch": 0.43,
"learning_rate": 4.280864174167127e-05,
"loss": 4.4707,
"step": 399000
},
{
"epoch": 0.43,
"learning_rate": 4.279963001453051e-05,
"loss": 4.4679,
"step": 399500
},
{
"epoch": 0.43,
"learning_rate": 4.279061828738975e-05,
"loss": 4.3981,
"step": 400000
},
{
"epoch": 0.43,
"learning_rate": 4.278160656024899e-05,
"loss": 4.4731,
"step": 400500
},
{
"epoch": 0.43,
"learning_rate": 4.277259483310822e-05,
"loss": 4.4104,
"step": 401000
},
{
"epoch": 0.43,
"learning_rate": 4.276358310596746e-05,
"loss": 4.4197,
"step": 401500
},
{
"epoch": 0.43,
"learning_rate": 4.2754571378826696e-05,
"loss": 4.4555,
"step": 402000
},
{
"epoch": 0.44,
"learning_rate": 4.2745559651685934e-05,
"loss": 4.4885,
"step": 402500
},
{
"epoch": 0.44,
"learning_rate": 4.273654792454517e-05,
"loss": 4.3961,
"step": 403000
},
{
"epoch": 0.44,
"learning_rate": 4.272753619740441e-05,
"loss": 4.4982,
"step": 403500
},
{
"epoch": 0.44,
"learning_rate": 4.2718524470263646e-05,
"loss": 4.4241,
"step": 404000
},
{
"epoch": 0.44,
"learning_rate": 4.2709512743122883e-05,
"loss": 4.4929,
"step": 404500
},
{
"epoch": 0.44,
"learning_rate": 4.270050101598212e-05,
"loss": 4.438,
"step": 405000
},
{
"epoch": 0.44,
"learning_rate": 4.269148928884136e-05,
"loss": 4.4581,
"step": 405500
},
{
"epoch": 0.44,
"learning_rate": 4.2682477561700596e-05,
"loss": 4.4261,
"step": 406000
},
{
"epoch": 0.44,
"learning_rate": 4.267346583455983e-05,
"loss": 4.4314,
"step": 406500
},
{
"epoch": 0.44,
"learning_rate": 4.2664454107419064e-05,
"loss": 4.5199,
"step": 407000
},
{
"epoch": 0.44,
"learning_rate": 4.26554423802783e-05,
"loss": 4.3874,
"step": 407500
},
{
"epoch": 0.44,
"learning_rate": 4.2646430653137545e-05,
"loss": 4.4064,
"step": 408000
},
{
"epoch": 0.44,
"learning_rate": 4.263741892599678e-05,
"loss": 4.4102,
"step": 408500
},
{
"epoch": 0.44,
"learning_rate": 4.262840719885602e-05,
"loss": 4.4532,
"step": 409000
},
{
"epoch": 0.44,
"learning_rate": 4.261939547171526e-05,
"loss": 4.4605,
"step": 409500
},
{
"epoch": 0.44,
"learning_rate": 4.261038374457449e-05,
"loss": 4.4169,
"step": 410000
},
{
"epoch": 0.44,
"learning_rate": 4.2601372017433725e-05,
"loss": 4.4427,
"step": 410500
},
{
"epoch": 0.44,
"learning_rate": 4.259236029029296e-05,
"loss": 4.4733,
"step": 411000
},
{
"epoch": 0.44,
"learning_rate": 4.25833485631522e-05,
"loss": 4.5038,
"step": 411500
},
{
"epoch": 0.45,
"learning_rate": 4.2574336836011444e-05,
"loss": 4.4452,
"step": 412000
},
{
"epoch": 0.45,
"learning_rate": 4.256532510887068e-05,
"loss": 4.4282,
"step": 412500
},
{
"epoch": 0.45,
"learning_rate": 4.255631338172992e-05,
"loss": 4.4557,
"step": 413000
},
{
"epoch": 0.45,
"learning_rate": 4.254730165458915e-05,
"loss": 4.4631,
"step": 413500
},
{
"epoch": 0.45,
"learning_rate": 4.253828992744839e-05,
"loss": 4.4623,
"step": 414000
},
{
"epoch": 0.45,
"learning_rate": 4.2529278200307625e-05,
"loss": 4.4419,
"step": 414500
},
{
"epoch": 0.45,
"learning_rate": 4.252026647316686e-05,
"loss": 4.4337,
"step": 415000
},
{
"epoch": 0.45,
"learning_rate": 4.25112547460261e-05,
"loss": 4.4549,
"step": 415500
},
{
"epoch": 0.45,
"learning_rate": 4.2502243018885344e-05,
"loss": 4.4857,
"step": 416000
},
{
"epoch": 0.45,
"learning_rate": 4.2493231291744574e-05,
"loss": 4.4788,
"step": 416500
},
{
"epoch": 0.45,
"learning_rate": 4.248421956460381e-05,
"loss": 4.4158,
"step": 417000
},
{
"epoch": 0.45,
"learning_rate": 4.247520783746305e-05,
"loss": 4.4255,
"step": 417500
},
{
"epoch": 0.45,
"learning_rate": 4.2466196110322286e-05,
"loss": 4.5044,
"step": 418000
},
{
"epoch": 0.45,
"learning_rate": 4.2457184383181524e-05,
"loss": 4.352,
"step": 418500
},
{
"epoch": 0.45,
"learning_rate": 4.244817265604076e-05,
"loss": 4.4666,
"step": 419000
},
{
"epoch": 0.45,
"learning_rate": 4.24391609289e-05,
"loss": 4.4477,
"step": 419500
},
{
"epoch": 0.45,
"learning_rate": 4.2430149201759236e-05,
"loss": 4.4425,
"step": 420000
},
{
"epoch": 0.45,
"learning_rate": 4.2421137474618473e-05,
"loss": 4.5022,
"step": 420500
},
{
"epoch": 0.46,
"learning_rate": 4.241212574747771e-05,
"loss": 4.3832,
"step": 421000
},
{
"epoch": 0.46,
"learning_rate": 4.240311402033695e-05,
"loss": 4.4686,
"step": 421500
},
{
"epoch": 0.46,
"learning_rate": 4.2394102293196186e-05,
"loss": 4.4394,
"step": 422000
},
{
"epoch": 0.46,
"learning_rate": 4.2385090566055416e-05,
"loss": 4.4662,
"step": 422500
},
{
"epoch": 0.46,
"learning_rate": 4.2376078838914654e-05,
"loss": 4.4482,
"step": 423000
},
{
"epoch": 0.46,
"learning_rate": 4.23670671117739e-05,
"loss": 4.4238,
"step": 423500
},
{
"epoch": 0.46,
"learning_rate": 4.2358055384633135e-05,
"loss": 4.399,
"step": 424000
},
{
"epoch": 0.46,
"learning_rate": 4.234904365749237e-05,
"loss": 4.4646,
"step": 424500
},
{
"epoch": 0.46,
"learning_rate": 4.234003193035161e-05,
"loss": 4.4333,
"step": 425000
},
{
"epoch": 0.46,
"learning_rate": 4.233102020321084e-05,
"loss": 4.4222,
"step": 425500
},
{
"epoch": 0.46,
"learning_rate": 4.232200847607008e-05,
"loss": 4.4807,
"step": 426000
},
{
"epoch": 0.46,
"learning_rate": 4.2312996748929315e-05,
"loss": 4.4585,
"step": 426500
},
{
"epoch": 0.46,
"learning_rate": 4.230398502178855e-05,
"loss": 4.4629,
"step": 427000
},
{
"epoch": 0.46,
"learning_rate": 4.22949732946478e-05,
"loss": 4.3969,
"step": 427500
},
{
"epoch": 0.46,
"learning_rate": 4.2285961567507034e-05,
"loss": 4.4375,
"step": 428000
},
{
"epoch": 0.46,
"learning_rate": 4.227694984036627e-05,
"loss": 4.4462,
"step": 428500
},
{
"epoch": 0.46,
"learning_rate": 4.22679381132255e-05,
"loss": 4.3994,
"step": 429000
},
{
"epoch": 0.46,
"learning_rate": 4.225892638608474e-05,
"loss": 4.441,
"step": 429500
},
{
"epoch": 0.47,
"learning_rate": 4.224991465894398e-05,
"loss": 4.4581,
"step": 430000
},
{
"epoch": 0.47,
"learning_rate": 4.2240902931803215e-05,
"loss": 4.4271,
"step": 430500
},
{
"epoch": 0.47,
"learning_rate": 4.223189120466245e-05,
"loss": 4.4268,
"step": 431000
},
{
"epoch": 0.47,
"learning_rate": 4.2222879477521696e-05,
"loss": 4.4299,
"step": 431500
},
{
"epoch": 0.47,
"learning_rate": 4.221386775038093e-05,
"loss": 4.4034,
"step": 432000
},
{
"epoch": 0.47,
"learning_rate": 4.2204856023240164e-05,
"loss": 4.5251,
"step": 432500
},
{
"epoch": 0.47,
"learning_rate": 4.21958442960994e-05,
"loss": 4.4132,
"step": 433000
},
{
"epoch": 0.47,
"learning_rate": 4.218683256895864e-05,
"loss": 4.4342,
"step": 433500
},
{
"epoch": 0.47,
"learning_rate": 4.2177820841817877e-05,
"loss": 4.4125,
"step": 434000
},
{
"epoch": 0.47,
"learning_rate": 4.2168809114677114e-05,
"loss": 4.4599,
"step": 434500
},
{
"epoch": 0.47,
"learning_rate": 4.215979738753635e-05,
"loss": 4.3972,
"step": 435000
},
{
"epoch": 0.47,
"learning_rate": 4.215078566039559e-05,
"loss": 4.5031,
"step": 435500
},
{
"epoch": 0.47,
"learning_rate": 4.2141773933254826e-05,
"loss": 4.4313,
"step": 436000
},
{
"epoch": 0.47,
"learning_rate": 4.2132762206114064e-05,
"loss": 4.4108,
"step": 436500
},
{
"epoch": 0.47,
"learning_rate": 4.21237504789733e-05,
"loss": 4.4509,
"step": 437000
},
{
"epoch": 0.47,
"learning_rate": 4.211473875183254e-05,
"loss": 4.4684,
"step": 437500
},
{
"epoch": 0.47,
"learning_rate": 4.210572702469177e-05,
"loss": 4.4394,
"step": 438000
},
{
"epoch": 0.47,
"learning_rate": 4.2096715297551006e-05,
"loss": 4.3804,
"step": 438500
},
{
"epoch": 0.47,
"learning_rate": 4.208770357041025e-05,
"loss": 4.4641,
"step": 439000
},
{
"epoch": 0.48,
"learning_rate": 4.207869184326949e-05,
"loss": 4.3934,
"step": 439500
},
{
"epoch": 0.48,
"learning_rate": 4.2069680116128725e-05,
"loss": 4.3989,
"step": 440000
},
{
"epoch": 0.48,
"learning_rate": 4.206066838898796e-05,
"loss": 4.447,
"step": 440500
},
{
"epoch": 0.48,
"learning_rate": 4.20516566618472e-05,
"loss": 4.4046,
"step": 441000
},
{
"epoch": 0.48,
"learning_rate": 4.204264493470643e-05,
"loss": 4.4264,
"step": 441500
},
{
"epoch": 0.48,
"learning_rate": 4.203363320756567e-05,
"loss": 4.3891,
"step": 442000
},
{
"epoch": 0.48,
"learning_rate": 4.2024621480424906e-05,
"loss": 4.4143,
"step": 442500
},
{
"epoch": 0.48,
"learning_rate": 4.201560975328415e-05,
"loss": 4.4362,
"step": 443000
},
{
"epoch": 0.48,
"learning_rate": 4.200659802614339e-05,
"loss": 4.4681,
"step": 443500
},
{
"epoch": 0.48,
"learning_rate": 4.1997586299002625e-05,
"loss": 4.4628,
"step": 444000
},
{
"epoch": 0.48,
"learning_rate": 4.1988574571861855e-05,
"loss": 4.4444,
"step": 444500
},
{
"epoch": 0.48,
"learning_rate": 4.197956284472109e-05,
"loss": 4.3894,
"step": 445000
},
{
"epoch": 0.48,
"learning_rate": 4.197055111758033e-05,
"loss": 4.4775,
"step": 445500
},
{
"epoch": 0.48,
"learning_rate": 4.196153939043957e-05,
"loss": 4.3898,
"step": 446000
},
{
"epoch": 0.48,
"learning_rate": 4.1952527663298805e-05,
"loss": 4.4591,
"step": 446500
},
{
"epoch": 0.48,
"learning_rate": 4.194351593615804e-05,
"loss": 4.4336,
"step": 447000
},
{
"epoch": 0.48,
"learning_rate": 4.193450420901728e-05,
"loss": 4.4063,
"step": 447500
},
{
"epoch": 0.48,
"learning_rate": 4.192549248187652e-05,
"loss": 4.4326,
"step": 448000
},
{
"epoch": 0.49,
"learning_rate": 4.1916480754735754e-05,
"loss": 4.4418,
"step": 448500
},
{
"epoch": 0.49,
"learning_rate": 4.190746902759499e-05,
"loss": 4.4141,
"step": 449000
},
{
"epoch": 0.49,
"learning_rate": 4.189845730045423e-05,
"loss": 4.3698,
"step": 449500
},
{
"epoch": 0.49,
"learning_rate": 4.1889445573313467e-05,
"loss": 4.4296,
"step": 450000
},
{
"epoch": 0.49,
"learning_rate": 4.1880433846172704e-05,
"loss": 4.4399,
"step": 450500
},
{
"epoch": 0.49,
"learning_rate": 4.187142211903194e-05,
"loss": 4.4123,
"step": 451000
},
{
"epoch": 0.49,
"learning_rate": 4.186241039189118e-05,
"loss": 4.3735,
"step": 451500
},
{
"epoch": 0.49,
"learning_rate": 4.1853398664750416e-05,
"loss": 4.3984,
"step": 452000
},
{
"epoch": 0.49,
"learning_rate": 4.1844386937609654e-05,
"loss": 4.4167,
"step": 452500
},
{
"epoch": 0.49,
"learning_rate": 4.183537521046889e-05,
"loss": 4.3666,
"step": 453000
},
{
"epoch": 0.49,
"learning_rate": 4.182636348332813e-05,
"loss": 4.4422,
"step": 453500
},
{
"epoch": 0.49,
"learning_rate": 4.181735175618736e-05,
"loss": 4.3986,
"step": 454000
},
{
"epoch": 0.49,
"learning_rate": 4.18083400290466e-05,
"loss": 4.4333,
"step": 454500
},
{
"epoch": 0.49,
"learning_rate": 4.179932830190584e-05,
"loss": 4.4112,
"step": 455000
},
{
"epoch": 0.49,
"learning_rate": 4.179031657476508e-05,
"loss": 4.42,
"step": 455500
},
{
"epoch": 0.49,
"learning_rate": 4.1781304847624315e-05,
"loss": 4.4114,
"step": 456000
},
{
"epoch": 0.49,
"learning_rate": 4.177229312048355e-05,
"loss": 4.3842,
"step": 456500
},
{
"epoch": 0.49,
"learning_rate": 4.1763281393342783e-05,
"loss": 4.4399,
"step": 457000
},
{
"epoch": 0.49,
"learning_rate": 4.175426966620202e-05,
"loss": 4.4283,
"step": 457500
},
{
"epoch": 0.5,
"learning_rate": 4.174525793906126e-05,
"loss": 4.4357,
"step": 458000
},
{
"epoch": 0.5,
"learning_rate": 4.1736246211920496e-05,
"loss": 4.4383,
"step": 458500
},
{
"epoch": 0.5,
"learning_rate": 4.172723448477974e-05,
"loss": 4.4069,
"step": 459000
},
{
"epoch": 0.5,
"learning_rate": 4.171822275763898e-05,
"loss": 4.4093,
"step": 459500
},
{
"epoch": 0.5,
"learning_rate": 4.170921103049821e-05,
"loss": 4.4003,
"step": 460000
},
{
"epoch": 0.5,
"learning_rate": 4.1700199303357445e-05,
"loss": 4.3554,
"step": 460500
},
{
"epoch": 0.5,
"learning_rate": 4.169118757621668e-05,
"loss": 4.4233,
"step": 461000
},
{
"epoch": 0.5,
"learning_rate": 4.168217584907592e-05,
"loss": 4.4297,
"step": 461500
},
{
"epoch": 0.5,
"learning_rate": 4.167316412193516e-05,
"loss": 4.41,
"step": 462000
},
{
"epoch": 0.5,
"learning_rate": 4.1664152394794395e-05,
"loss": 4.4319,
"step": 462500
},
{
"epoch": 0.5,
"learning_rate": 4.165514066765363e-05,
"loss": 4.4113,
"step": 463000
},
{
"epoch": 0.5,
"learning_rate": 4.164612894051287e-05,
"loss": 4.4162,
"step": 463500
},
{
"epoch": 0.5,
"learning_rate": 4.163711721337211e-05,
"loss": 4.437,
"step": 464000
},
{
"epoch": 0.5,
"learning_rate": 4.1628105486231344e-05,
"loss": 4.4412,
"step": 464500
},
{
"epoch": 0.5,
"learning_rate": 4.161909375909058e-05,
"loss": 4.4154,
"step": 465000
},
{
"epoch": 0.5,
"learning_rate": 4.161008203194982e-05,
"loss": 4.4167,
"step": 465500
},
{
"epoch": 0.5,
"learning_rate": 4.1601070304809057e-05,
"loss": 4.4659,
"step": 466000
},
{
"epoch": 0.5,
"learning_rate": 4.1592058577668294e-05,
"loss": 4.4041,
"step": 466500
},
{
"epoch": 0.51,
"learning_rate": 4.158304685052753e-05,
"loss": 4.4115,
"step": 467000
},
{
"epoch": 0.51,
"learning_rate": 4.157403512338677e-05,
"loss": 4.4393,
"step": 467500
},
{
"epoch": 0.51,
"learning_rate": 4.1565023396246006e-05,
"loss": 4.3725,
"step": 468000
},
{
"epoch": 0.51,
"learning_rate": 4.1556011669105244e-05,
"loss": 4.4011,
"step": 468500
},
{
"epoch": 0.51,
"learning_rate": 4.154699994196448e-05,
"loss": 4.353,
"step": 469000
},
{
"epoch": 0.51,
"learning_rate": 4.153798821482371e-05,
"loss": 4.3823,
"step": 469500
},
{
"epoch": 0.51,
"learning_rate": 4.152897648768295e-05,
"loss": 4.4488,
"step": 470000
},
{
"epoch": 0.51,
"learning_rate": 4.151996476054219e-05,
"loss": 4.4014,
"step": 470500
},
{
"epoch": 0.51,
"learning_rate": 4.151095303340143e-05,
"loss": 4.3857,
"step": 471000
},
{
"epoch": 0.51,
"learning_rate": 4.150194130626067e-05,
"loss": 4.4427,
"step": 471500
},
{
"epoch": 0.51,
"learning_rate": 4.1492929579119905e-05,
"loss": 4.4374,
"step": 472000
},
{
"epoch": 0.51,
"learning_rate": 4.1483917851979136e-05,
"loss": 4.3678,
"step": 472500
},
{
"epoch": 0.51,
"learning_rate": 4.1474906124838373e-05,
"loss": 4.3926,
"step": 473000
},
{
"epoch": 0.51,
"learning_rate": 4.146589439769761e-05,
"loss": 4.426,
"step": 473500
},
{
"epoch": 0.51,
"learning_rate": 4.145688267055685e-05,
"loss": 4.4384,
"step": 474000
},
{
"epoch": 0.51,
"learning_rate": 4.144787094341609e-05,
"loss": 4.4226,
"step": 474500
},
{
"epoch": 0.51,
"learning_rate": 4.143885921627533e-05,
"loss": 4.3985,
"step": 475000
},
{
"epoch": 0.51,
"learning_rate": 4.142984748913456e-05,
"loss": 4.3802,
"step": 475500
},
{
"epoch": 0.51,
"learning_rate": 4.14208357619938e-05,
"loss": 4.4457,
"step": 476000
},
{
"epoch": 0.52,
"learning_rate": 4.1411824034853035e-05,
"loss": 4.4333,
"step": 476500
},
{
"epoch": 0.52,
"learning_rate": 4.140281230771227e-05,
"loss": 4.3906,
"step": 477000
},
{
"epoch": 0.52,
"learning_rate": 4.139380058057151e-05,
"loss": 4.3618,
"step": 477500
},
{
"epoch": 0.52,
"learning_rate": 4.138478885343075e-05,
"loss": 4.4389,
"step": 478000
},
{
"epoch": 0.52,
"learning_rate": 4.1375777126289985e-05,
"loss": 4.354,
"step": 478500
},
{
"epoch": 0.52,
"learning_rate": 4.136676539914922e-05,
"loss": 4.4335,
"step": 479000
},
{
"epoch": 0.52,
"learning_rate": 4.135775367200846e-05,
"loss": 4.405,
"step": 479500
},
{
"epoch": 0.52,
"learning_rate": 4.13487419448677e-05,
"loss": 4.4223,
"step": 480000
},
{
"epoch": 0.52,
"learning_rate": 4.1339730217726934e-05,
"loss": 4.4074,
"step": 480500
},
{
"epoch": 0.52,
"learning_rate": 4.133071849058617e-05,
"loss": 4.3557,
"step": 481000
},
{
"epoch": 0.52,
"learning_rate": 4.132170676344541e-05,
"loss": 4.4342,
"step": 481500
},
{
"epoch": 0.52,
"learning_rate": 4.131269503630465e-05,
"loss": 4.3986,
"step": 482000
},
{
"epoch": 0.52,
"learning_rate": 4.1303683309163884e-05,
"loss": 4.4292,
"step": 482500
},
{
"epoch": 0.52,
"learning_rate": 4.129467158202312e-05,
"loss": 4.4526,
"step": 483000
},
{
"epoch": 0.52,
"learning_rate": 4.128565985488236e-05,
"loss": 4.4217,
"step": 483500
},
{
"epoch": 0.52,
"learning_rate": 4.1276648127741596e-05,
"loss": 4.3949,
"step": 484000
},
{
"epoch": 0.52,
"learning_rate": 4.1267636400600834e-05,
"loss": 4.4406,
"step": 484500
},
{
"epoch": 0.52,
"learning_rate": 4.1258624673460064e-05,
"loss": 4.4383,
"step": 485000
},
{
"epoch": 0.53,
"learning_rate": 4.12496129463193e-05,
"loss": 4.4106,
"step": 485500
},
{
"epoch": 0.53,
"learning_rate": 4.1240601219178546e-05,
"loss": 4.382,
"step": 486000
},
{
"epoch": 0.53,
"learning_rate": 4.123158949203778e-05,
"loss": 4.3267,
"step": 486500
},
{
"epoch": 0.53,
"learning_rate": 4.122257776489702e-05,
"loss": 4.4221,
"step": 487000
},
{
"epoch": 0.53,
"learning_rate": 4.121356603775626e-05,
"loss": 4.3966,
"step": 487500
},
{
"epoch": 0.53,
"learning_rate": 4.120455431061549e-05,
"loss": 4.3824,
"step": 488000
},
{
"epoch": 0.53,
"learning_rate": 4.1195542583474726e-05,
"loss": 4.4346,
"step": 488500
},
{
"epoch": 0.53,
"learning_rate": 4.1186530856333963e-05,
"loss": 4.3681,
"step": 489000
},
{
"epoch": 0.53,
"learning_rate": 4.11775191291932e-05,
"loss": 4.405,
"step": 489500
},
{
"epoch": 0.53,
"learning_rate": 4.1168507402052445e-05,
"loss": 4.4267,
"step": 490000
},
{
"epoch": 0.53,
"learning_rate": 4.115949567491168e-05,
"loss": 4.4356,
"step": 490500
},
{
"epoch": 0.53,
"learning_rate": 4.115048394777091e-05,
"loss": 4.3915,
"step": 491000
},
{
"epoch": 0.53,
"learning_rate": 4.114147222063015e-05,
"loss": 4.4071,
"step": 491500
},
{
"epoch": 0.53,
"learning_rate": 4.113246049348939e-05,
"loss": 4.4558,
"step": 492000
},
{
"epoch": 0.53,
"learning_rate": 4.1123448766348625e-05,
"loss": 4.4161,
"step": 492500
},
{
"epoch": 0.53,
"learning_rate": 4.111443703920786e-05,
"loss": 4.4322,
"step": 493000
},
{
"epoch": 0.53,
"learning_rate": 4.11054253120671e-05,
"loss": 4.4165,
"step": 493500
},
{
"epoch": 0.53,
"learning_rate": 4.1096413584926344e-05,
"loss": 4.3936,
"step": 494000
},
{
"epoch": 0.53,
"learning_rate": 4.1087401857785575e-05,
"loss": 4.464,
"step": 494500
},
{
"epoch": 0.54,
"learning_rate": 4.107839013064481e-05,
"loss": 4.4453,
"step": 495000
},
{
"epoch": 0.54,
"learning_rate": 4.106937840350405e-05,
"loss": 4.4496,
"step": 495500
},
{
"epoch": 0.54,
"learning_rate": 4.106036667636329e-05,
"loss": 4.4243,
"step": 496000
},
{
"epoch": 0.54,
"learning_rate": 4.1051354949222524e-05,
"loss": 4.4202,
"step": 496500
},
{
"epoch": 0.54,
"learning_rate": 4.104234322208176e-05,
"loss": 4.4393,
"step": 497000
},
{
"epoch": 0.54,
"learning_rate": 4.1033331494941e-05,
"loss": 4.3986,
"step": 497500
},
{
"epoch": 0.54,
"learning_rate": 4.102431976780024e-05,
"loss": 4.3453,
"step": 498000
},
{
"epoch": 0.54,
"learning_rate": 4.1015308040659474e-05,
"loss": 4.4282,
"step": 498500
},
{
"epoch": 0.54,
"learning_rate": 4.100629631351871e-05,
"loss": 4.4063,
"step": 499000
},
{
"epoch": 0.54,
"learning_rate": 4.099728458637795e-05,
"loss": 4.4041,
"step": 499500
},
{
"epoch": 0.54,
"learning_rate": 4.0988272859237186e-05,
"loss": 4.4121,
"step": 500000
},
{
"epoch": 0.54,
"learning_rate": 4.097926113209642e-05,
"loss": 4.3535,
"step": 500500
},
{
"epoch": 0.54,
"learning_rate": 4.0970249404955654e-05,
"loss": 4.4579,
"step": 501000
},
{
"epoch": 0.54,
"learning_rate": 4.09612376778149e-05,
"loss": 4.4047,
"step": 501500
},
{
"epoch": 0.54,
"learning_rate": 4.0952225950674136e-05,
"loss": 4.3953,
"step": 502000
},
{
"epoch": 0.54,
"learning_rate": 4.094321422353337e-05,
"loss": 4.3709,
"step": 502500
},
{
"epoch": 0.54,
"learning_rate": 4.093420249639261e-05,
"loss": 4.4017,
"step": 503000
},
{
"epoch": 0.54,
"learning_rate": 4.092519076925184e-05,
"loss": 4.3861,
"step": 503500
},
{
"epoch": 0.55,
"learning_rate": 4.091617904211108e-05,
"loss": 4.4664,
"step": 504000
},
{
"epoch": 0.55,
"learning_rate": 4.0907167314970316e-05,
"loss": 4.4029,
"step": 504500
},
{
"epoch": 0.55,
"learning_rate": 4.0898155587829554e-05,
"loss": 4.386,
"step": 505000
},
{
"epoch": 0.55,
"learning_rate": 4.08891438606888e-05,
"loss": 4.3983,
"step": 505500
},
{
"epoch": 0.55,
"learning_rate": 4.0880132133548035e-05,
"loss": 4.3899,
"step": 506000
},
{
"epoch": 0.55,
"learning_rate": 4.087112040640727e-05,
"loss": 4.3988,
"step": 506500
},
{
"epoch": 0.55,
"learning_rate": 4.08621086792665e-05,
"loss": 4.3771,
"step": 507000
},
{
"epoch": 0.55,
"learning_rate": 4.085309695212574e-05,
"loss": 4.3443,
"step": 507500
},
{
"epoch": 0.55,
"learning_rate": 4.084408522498498e-05,
"loss": 4.3714,
"step": 508000
},
{
"epoch": 0.55,
"learning_rate": 4.0835073497844215e-05,
"loss": 4.3909,
"step": 508500
},
{
"epoch": 0.55,
"learning_rate": 4.082606177070345e-05,
"loss": 4.4214,
"step": 509000
},
{
"epoch": 0.55,
"learning_rate": 4.081705004356269e-05,
"loss": 4.4305,
"step": 509500
},
{
"epoch": 0.55,
"learning_rate": 4.080803831642193e-05,
"loss": 4.3784,
"step": 510000
},
{
"epoch": 0.55,
"learning_rate": 4.0799026589281165e-05,
"loss": 4.4198,
"step": 510500
},
{
"epoch": 0.55,
"learning_rate": 4.07900148621404e-05,
"loss": 4.3954,
"step": 511000
},
{
"epoch": 0.55,
"learning_rate": 4.078100313499964e-05,
"loss": 4.4075,
"step": 511500
},
{
"epoch": 0.55,
"learning_rate": 4.077199140785888e-05,
"loss": 4.4065,
"step": 512000
},
{
"epoch": 0.55,
"learning_rate": 4.0762979680718115e-05,
"loss": 4.4122,
"step": 512500
},
{
"epoch": 0.55,
"learning_rate": 4.075396795357735e-05,
"loss": 4.4,
"step": 513000
},
{
"epoch": 0.56,
"learning_rate": 4.074495622643659e-05,
"loss": 4.3722,
"step": 513500
},
{
"epoch": 0.56,
"learning_rate": 4.073594449929583e-05,
"loss": 4.3375,
"step": 514000
},
{
"epoch": 0.56,
"learning_rate": 4.0726932772155064e-05,
"loss": 4.3655,
"step": 514500
},
{
"epoch": 0.56,
"learning_rate": 4.07179210450143e-05,
"loss": 4.3714,
"step": 515000
},
{
"epoch": 0.56,
"learning_rate": 4.070890931787354e-05,
"loss": 4.4154,
"step": 515500
},
{
"epoch": 0.56,
"learning_rate": 4.069989759073277e-05,
"loss": 4.4121,
"step": 516000
},
{
"epoch": 0.56,
"learning_rate": 4.069088586359201e-05,
"loss": 4.4102,
"step": 516500
},
{
"epoch": 0.56,
"learning_rate": 4.068187413645125e-05,
"loss": 4.3882,
"step": 517000
},
{
"epoch": 0.56,
"learning_rate": 4.067286240931049e-05,
"loss": 4.4476,
"step": 517500
},
{
"epoch": 0.56,
"learning_rate": 4.0663850682169726e-05,
"loss": 4.3978,
"step": 518000
},
{
"epoch": 0.56,
"learning_rate": 4.065483895502896e-05,
"loss": 4.4405,
"step": 518500
},
{
"epoch": 0.56,
"learning_rate": 4.06458272278882e-05,
"loss": 4.3647,
"step": 519000
},
{
"epoch": 0.56,
"learning_rate": 4.063681550074743e-05,
"loss": 4.3729,
"step": 519500
},
{
"epoch": 0.56,
"learning_rate": 4.062780377360667e-05,
"loss": 4.4138,
"step": 520000
},
{
"epoch": 0.56,
"learning_rate": 4.0618792046465906e-05,
"loss": 4.3248,
"step": 520500
},
{
"epoch": 0.56,
"learning_rate": 4.0609780319325144e-05,
"loss": 4.422,
"step": 521000
},
{
"epoch": 0.56,
"learning_rate": 4.060076859218439e-05,
"loss": 4.3538,
"step": 521500
},
{
"epoch": 0.56,
"learning_rate": 4.0591756865043625e-05,
"loss": 4.4099,
"step": 522000
},
{
"epoch": 0.57,
"learning_rate": 4.0582745137902856e-05,
"loss": 4.4193,
"step": 522500
},
{
"epoch": 0.57,
"learning_rate": 4.057373341076209e-05,
"loss": 4.3988,
"step": 523000
},
{
"epoch": 0.57,
"learning_rate": 4.056472168362133e-05,
"loss": 4.4022,
"step": 523500
},
{
"epoch": 0.57,
"learning_rate": 4.055570995648057e-05,
"loss": 4.3413,
"step": 524000
},
{
"epoch": 0.57,
"learning_rate": 4.0546698229339805e-05,
"loss": 4.434,
"step": 524500
},
{
"epoch": 0.57,
"learning_rate": 4.053768650219904e-05,
"loss": 4.3744,
"step": 525000
},
{
"epoch": 0.57,
"learning_rate": 4.052867477505828e-05,
"loss": 4.418,
"step": 525500
},
{
"epoch": 0.57,
"learning_rate": 4.051966304791752e-05,
"loss": 4.3814,
"step": 526000
},
{
"epoch": 0.57,
"learning_rate": 4.0510651320776755e-05,
"loss": 4.3454,
"step": 526500
},
{
"epoch": 0.57,
"learning_rate": 4.050163959363599e-05,
"loss": 4.3251,
"step": 527000
},
{
"epoch": 0.57,
"learning_rate": 4.049262786649523e-05,
"loss": 4.4182,
"step": 527500
},
{
"epoch": 0.57,
"learning_rate": 4.048361613935447e-05,
"loss": 4.3319,
"step": 528000
},
{
"epoch": 0.57,
"learning_rate": 4.0474604412213705e-05,
"loss": 4.3861,
"step": 528500
},
{
"epoch": 0.57,
"learning_rate": 4.046559268507294e-05,
"loss": 4.4092,
"step": 529000
},
{
"epoch": 0.57,
"learning_rate": 4.045658095793218e-05,
"loss": 4.397,
"step": 529500
},
{
"epoch": 0.57,
"learning_rate": 4.044756923079142e-05,
"loss": 4.3839,
"step": 530000
},
{
"epoch": 0.57,
"learning_rate": 4.0438557503650654e-05,
"loss": 4.4383,
"step": 530500
},
{
"epoch": 0.57,
"learning_rate": 4.042954577650989e-05,
"loss": 4.4198,
"step": 531000
},
{
"epoch": 0.57,
"learning_rate": 4.042053404936912e-05,
"loss": 4.3632,
"step": 531500
},
{
"epoch": 0.58,
"learning_rate": 4.041152232222836e-05,
"loss": 4.3722,
"step": 532000
},
{
"epoch": 0.58,
"learning_rate": 4.04025105950876e-05,
"loss": 4.3664,
"step": 532500
},
{
"epoch": 0.58,
"learning_rate": 4.039349886794684e-05,
"loss": 4.357,
"step": 533000
},
{
"epoch": 0.58,
"learning_rate": 4.038448714080608e-05,
"loss": 4.3484,
"step": 533500
},
{
"epoch": 0.58,
"learning_rate": 4.0375475413665316e-05,
"loss": 4.4506,
"step": 534000
},
{
"epoch": 0.58,
"learning_rate": 4.0366463686524553e-05,
"loss": 4.37,
"step": 534500
},
{
"epoch": 0.58,
"learning_rate": 4.0357451959383784e-05,
"loss": 4.3452,
"step": 535000
},
{
"epoch": 0.58,
"learning_rate": 4.034844023224302e-05,
"loss": 4.4018,
"step": 535500
},
{
"epoch": 0.58,
"learning_rate": 4.033942850510226e-05,
"loss": 4.4079,
"step": 536000
},
{
"epoch": 0.58,
"learning_rate": 4.0330416777961496e-05,
"loss": 4.3569,
"step": 536500
},
{
"epoch": 0.58,
"learning_rate": 4.032140505082074e-05,
"loss": 4.3495,
"step": 537000
},
{
"epoch": 0.58,
"learning_rate": 4.031239332367998e-05,
"loss": 4.3752,
"step": 537500
},
{
"epoch": 0.58,
"learning_rate": 4.030338159653921e-05,
"loss": 4.3821,
"step": 538000
},
{
"epoch": 0.58,
"learning_rate": 4.0294369869398446e-05,
"loss": 4.431,
"step": 538500
},
{
"epoch": 0.58,
"learning_rate": 4.028535814225768e-05,
"loss": 4.3057,
"step": 539000
},
{
"epoch": 0.58,
"learning_rate": 4.027634641511692e-05,
"loss": 4.3249,
"step": 539500
},
{
"epoch": 0.58,
"learning_rate": 4.026733468797616e-05,
"loss": 4.3181,
"step": 540000
},
{
"epoch": 0.58,
"learning_rate": 4.0258322960835395e-05,
"loss": 4.3905,
"step": 540500
},
{
"epoch": 0.59,
"learning_rate": 4.024931123369463e-05,
"loss": 4.3406,
"step": 541000
},
{
"epoch": 0.59,
"learning_rate": 4.024029950655387e-05,
"loss": 4.3545,
"step": 541500
},
{
"epoch": 0.59,
"learning_rate": 4.023128777941311e-05,
"loss": 4.3554,
"step": 542000
},
{
"epoch": 0.59,
"learning_rate": 4.0222276052272345e-05,
"loss": 4.4182,
"step": 542500
},
{
"epoch": 0.59,
"learning_rate": 4.021326432513158e-05,
"loss": 4.4599,
"step": 543000
},
{
"epoch": 0.59,
"learning_rate": 4.020425259799082e-05,
"loss": 4.326,
"step": 543500
},
{
"epoch": 0.59,
"learning_rate": 4.019524087085006e-05,
"loss": 4.3247,
"step": 544000
},
{
"epoch": 0.59,
"learning_rate": 4.0186229143709295e-05,
"loss": 4.4027,
"step": 544500
},
{
"epoch": 0.59,
"learning_rate": 4.017721741656853e-05,
"loss": 4.315,
"step": 545000
},
{
"epoch": 0.59,
"learning_rate": 4.016820568942777e-05,
"loss": 4.3967,
"step": 545500
},
{
"epoch": 0.59,
"learning_rate": 4.015919396228701e-05,
"loss": 4.3808,
"step": 546000
},
{
"epoch": 0.59,
"learning_rate": 4.0150182235146244e-05,
"loss": 4.3609,
"step": 546500
},
{
"epoch": 0.59,
"learning_rate": 4.014117050800548e-05,
"loss": 4.3969,
"step": 547000
},
{
"epoch": 0.59,
"learning_rate": 4.013215878086471e-05,
"loss": 4.3735,
"step": 547500
},
{
"epoch": 0.59,
"learning_rate": 4.012314705372395e-05,
"loss": 4.3567,
"step": 548000
},
{
"epoch": 0.59,
"learning_rate": 4.0114135326583194e-05,
"loss": 4.3614,
"step": 548500
},
{
"epoch": 0.59,
"learning_rate": 4.010512359944243e-05,
"loss": 4.3611,
"step": 549000
},
{
"epoch": 0.59,
"learning_rate": 4.009611187230167e-05,
"loss": 4.376,
"step": 549500
},
{
"epoch": 0.59,
"learning_rate": 4.0087100145160906e-05,
"loss": 4.3128,
"step": 550000
},
{
"epoch": 0.6,
"learning_rate": 4.007808841802014e-05,
"loss": 4.3885,
"step": 550500
},
{
"epoch": 0.6,
"learning_rate": 4.0069076690879374e-05,
"loss": 4.3767,
"step": 551000
},
{
"epoch": 0.6,
"learning_rate": 4.006006496373861e-05,
"loss": 4.3457,
"step": 551500
},
{
"epoch": 0.6,
"learning_rate": 4.005105323659785e-05,
"loss": 4.462,
"step": 552000
},
{
"epoch": 0.6,
"learning_rate": 4.004204150945709e-05,
"loss": 4.3849,
"step": 552500
},
{
"epoch": 0.6,
"learning_rate": 4.003302978231633e-05,
"loss": 4.3644,
"step": 553000
},
{
"epoch": 0.6,
"learning_rate": 4.002401805517556e-05,
"loss": 4.3445,
"step": 553500
},
{
"epoch": 0.6,
"learning_rate": 4.00150063280348e-05,
"loss": 4.3573,
"step": 554000
},
{
"epoch": 0.6,
"learning_rate": 4.0005994600894036e-05,
"loss": 4.3702,
"step": 554500
},
{
"epoch": 0.6,
"learning_rate": 3.999698287375327e-05,
"loss": 4.335,
"step": 555000
},
{
"epoch": 0.6,
"learning_rate": 3.998797114661251e-05,
"loss": 4.3592,
"step": 555500
},
{
"epoch": 0.6,
"learning_rate": 3.997895941947175e-05,
"loss": 4.3702,
"step": 556000
},
{
"epoch": 0.6,
"learning_rate": 3.9969947692330985e-05,
"loss": 4.3976,
"step": 556500
},
{
"epoch": 0.6,
"learning_rate": 3.996093596519022e-05,
"loss": 4.3542,
"step": 557000
},
{
"epoch": 0.6,
"learning_rate": 3.995192423804946e-05,
"loss": 4.3243,
"step": 557500
},
{
"epoch": 0.6,
"learning_rate": 3.99429125109087e-05,
"loss": 4.3865,
"step": 558000
},
{
"epoch": 0.6,
"learning_rate": 3.9933900783767935e-05,
"loss": 4.3937,
"step": 558500
},
{
"epoch": 0.6,
"learning_rate": 3.992488905662717e-05,
"loss": 4.4588,
"step": 559000
},
{
"epoch": 0.61,
"learning_rate": 3.991587732948641e-05,
"loss": 4.3763,
"step": 559500
},
{
"epoch": 0.61,
"learning_rate": 3.990686560234565e-05,
"loss": 4.3972,
"step": 560000
}
],
"max_steps": 2774163,
"num_train_epochs": 3,
"total_flos": 3.658088448e+16,
"trial_name": null,
"trial_params": null
}