{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6055880638592613, "global_step": 560000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999098827285924e-05, "loss": 5.3192, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.998197654571848e-05, "loss": 5.4021, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.9972964818577715e-05, "loss": 5.4284, "step": 1500 }, { "epoch": 0.0, "learning_rate": 4.996395309143695e-05, "loss": 5.4341, "step": 2000 }, { "epoch": 0.0, "learning_rate": 4.995494136429619e-05, "loss": 5.4215, "step": 2500 }, { "epoch": 0.0, "learning_rate": 4.994592963715543e-05, "loss": 5.3564, "step": 3000 }, { "epoch": 0.0, "learning_rate": 4.9936917910014664e-05, "loss": 5.3319, "step": 3500 }, { "epoch": 0.0, "learning_rate": 4.99279061828739e-05, "loss": 5.3326, "step": 4000 }, { "epoch": 0.0, "learning_rate": 4.991889445573314e-05, "loss": 5.3575, "step": 4500 }, { "epoch": 0.01, "learning_rate": 4.990988272859237e-05, "loss": 5.3404, "step": 5000 }, { "epoch": 0.01, "learning_rate": 4.990087100145161e-05, "loss": 5.339, "step": 5500 }, { "epoch": 0.01, "learning_rate": 4.989185927431085e-05, "loss": 5.2714, "step": 6000 }, { "epoch": 0.01, "learning_rate": 4.988284754717009e-05, "loss": 5.2691, "step": 6500 }, { "epoch": 0.01, "learning_rate": 4.9873835820029326e-05, "loss": 5.2559, "step": 7000 }, { "epoch": 0.01, "learning_rate": 4.9864824092888563e-05, "loss": 5.209, "step": 7500 }, { "epoch": 0.01, "learning_rate": 4.9855812365747794e-05, "loss": 5.26, "step": 8000 }, { "epoch": 0.01, "learning_rate": 4.984680063860703e-05, "loss": 5.1878, "step": 8500 }, { "epoch": 0.01, "learning_rate": 4.983778891146627e-05, "loss": 5.212, "step": 9000 }, { "epoch": 0.01, "learning_rate": 4.9828777184325506e-05, "loss": 5.2063, "step": 9500 }, { "epoch": 0.01, "learning_rate": 4.981976545718475e-05, "loss": 5.2132, "step": 10000 }, { "epoch": 0.01, "learning_rate": 4.981075373004399e-05, "loss": 5.221, "step": 10500 }, { "epoch": 0.01, "learning_rate": 4.980174200290322e-05, "loss": 5.1786, "step": 11000 }, { "epoch": 0.01, "learning_rate": 4.9792730275762456e-05, "loss": 5.1868, "step": 11500 }, { "epoch": 0.01, "learning_rate": 4.978371854862169e-05, "loss": 5.1585, "step": 12000 }, { "epoch": 0.01, "learning_rate": 4.977470682148093e-05, "loss": 5.2465, "step": 12500 }, { "epoch": 0.01, "learning_rate": 4.976569509434017e-05, "loss": 5.1645, "step": 13000 }, { "epoch": 0.01, "learning_rate": 4.9756683367199405e-05, "loss": 5.1462, "step": 13500 }, { "epoch": 0.02, "learning_rate": 4.974767164005864e-05, "loss": 5.1588, "step": 14000 }, { "epoch": 0.02, "learning_rate": 4.973865991291788e-05, "loss": 5.145, "step": 14500 }, { "epoch": 0.02, "learning_rate": 4.972964818577712e-05, "loss": 5.1256, "step": 15000 }, { "epoch": 0.02, "learning_rate": 4.9720636458636355e-05, "loss": 5.1227, "step": 15500 }, { "epoch": 0.02, "learning_rate": 4.971162473149559e-05, "loss": 5.096, "step": 16000 }, { "epoch": 0.02, "learning_rate": 4.970261300435483e-05, "loss": 5.1427, "step": 16500 }, { "epoch": 0.02, "learning_rate": 4.969360127721407e-05, "loss": 5.121, "step": 17000 }, { "epoch": 0.02, "learning_rate": 4.9684589550073305e-05, "loss": 5.1324, "step": 17500 }, { "epoch": 0.02, "learning_rate": 4.967557782293254e-05, "loss": 5.1476, "step": 18000 }, { "epoch": 0.02, "learning_rate": 4.966656609579178e-05, "loss": 5.0538, "step": 18500 }, { "epoch": 0.02, "learning_rate": 4.965755436865102e-05, "loss": 5.0635, "step": 19000 }, { "epoch": 0.02, "learning_rate": 4.9648542641510254e-05, "loss": 5.0309, "step": 19500 }, { "epoch": 0.02, "learning_rate": 4.963953091436949e-05, "loss": 5.0623, "step": 20000 }, { "epoch": 0.02, "learning_rate": 4.963051918722872e-05, "loss": 5.0624, "step": 20500 }, { "epoch": 0.02, "learning_rate": 4.962150746008796e-05, "loss": 5.0844, "step": 21000 }, { "epoch": 0.02, "learning_rate": 4.9612495732947204e-05, "loss": 5.0667, "step": 21500 }, { "epoch": 0.02, "learning_rate": 4.960348400580644e-05, "loss": 5.0536, "step": 22000 }, { "epoch": 0.02, "learning_rate": 4.959447227866568e-05, "loss": 5.0783, "step": 22500 }, { "epoch": 0.02, "learning_rate": 4.9585460551524916e-05, "loss": 5.0335, "step": 23000 }, { "epoch": 0.03, "learning_rate": 4.957644882438415e-05, "loss": 5.0321, "step": 23500 }, { "epoch": 0.03, "learning_rate": 4.9567437097243384e-05, "loss": 5.037, "step": 24000 }, { "epoch": 0.03, "learning_rate": 4.955842537010262e-05, "loss": 5.0187, "step": 24500 }, { "epoch": 0.03, "learning_rate": 4.954941364296186e-05, "loss": 5.0357, "step": 25000 }, { "epoch": 0.03, "learning_rate": 4.95404019158211e-05, "loss": 5.0128, "step": 25500 }, { "epoch": 0.03, "learning_rate": 4.953139018868034e-05, "loss": 5.0553, "step": 26000 }, { "epoch": 0.03, "learning_rate": 4.952237846153957e-05, "loss": 5.0024, "step": 26500 }, { "epoch": 0.03, "learning_rate": 4.951336673439881e-05, "loss": 5.0177, "step": 27000 }, { "epoch": 0.03, "learning_rate": 4.9504355007258046e-05, "loss": 5.0174, "step": 27500 }, { "epoch": 0.03, "learning_rate": 4.949534328011728e-05, "loss": 5.0167, "step": 28000 }, { "epoch": 0.03, "learning_rate": 4.948633155297652e-05, "loss": 4.9896, "step": 28500 }, { "epoch": 0.03, "learning_rate": 4.947731982583576e-05, "loss": 5.0355, "step": 29000 }, { "epoch": 0.03, "learning_rate": 4.9468308098694995e-05, "loss": 4.9929, "step": 29500 }, { "epoch": 0.03, "learning_rate": 4.945929637155423e-05, "loss": 4.9702, "step": 30000 }, { "epoch": 0.03, "learning_rate": 4.945028464441347e-05, "loss": 4.944, "step": 30500 }, { "epoch": 0.03, "learning_rate": 4.944127291727271e-05, "loss": 4.9957, "step": 31000 }, { "epoch": 0.03, "learning_rate": 4.9432261190131945e-05, "loss": 4.9908, "step": 31500 }, { "epoch": 0.03, "learning_rate": 4.942324946299118e-05, "loss": 4.9816, "step": 32000 }, { "epoch": 0.04, "learning_rate": 4.941423773585042e-05, "loss": 4.9649, "step": 32500 }, { "epoch": 0.04, "learning_rate": 4.940522600870966e-05, "loss": 4.9434, "step": 33000 }, { "epoch": 0.04, "learning_rate": 4.9396214281568895e-05, "loss": 5.0387, "step": 33500 }, { "epoch": 0.04, "learning_rate": 4.938720255442813e-05, "loss": 4.9799, "step": 34000 }, { "epoch": 0.04, "learning_rate": 4.937819082728737e-05, "loss": 4.9648, "step": 34500 }, { "epoch": 0.04, "learning_rate": 4.936917910014661e-05, "loss": 4.9593, "step": 35000 }, { "epoch": 0.04, "learning_rate": 4.9360167373005844e-05, "loss": 4.9687, "step": 35500 }, { "epoch": 0.04, "learning_rate": 4.9351155645865075e-05, "loss": 4.9474, "step": 36000 }, { "epoch": 0.04, "learning_rate": 4.934214391872431e-05, "loss": 4.9344, "step": 36500 }, { "epoch": 0.04, "learning_rate": 4.9333132191583556e-05, "loss": 4.932, "step": 37000 }, { "epoch": 0.04, "learning_rate": 4.9324120464442794e-05, "loss": 5.0116, "step": 37500 }, { "epoch": 0.04, "learning_rate": 4.931510873730203e-05, "loss": 4.9311, "step": 38000 }, { "epoch": 0.04, "learning_rate": 4.930609701016127e-05, "loss": 4.9114, "step": 38500 }, { "epoch": 0.04, "learning_rate": 4.92970852830205e-05, "loss": 4.9517, "step": 39000 }, { "epoch": 0.04, "learning_rate": 4.928807355587974e-05, "loss": 4.9541, "step": 39500 }, { "epoch": 0.04, "learning_rate": 4.9279061828738974e-05, "loss": 4.9637, "step": 40000 }, { "epoch": 0.04, "learning_rate": 4.927005010159821e-05, "loss": 4.9498, "step": 40500 }, { "epoch": 0.04, "learning_rate": 4.926103837445745e-05, "loss": 4.8924, "step": 41000 }, { "epoch": 0.04, "learning_rate": 4.925202664731669e-05, "loss": 4.9596, "step": 41500 }, { "epoch": 0.05, "learning_rate": 4.924301492017593e-05, "loss": 4.9264, "step": 42000 }, { "epoch": 0.05, "learning_rate": 4.923400319303516e-05, "loss": 4.9179, "step": 42500 }, { "epoch": 0.05, "learning_rate": 4.92249914658944e-05, "loss": 4.9151, "step": 43000 }, { "epoch": 0.05, "learning_rate": 4.9215979738753636e-05, "loss": 4.9101, "step": 43500 }, { "epoch": 0.05, "learning_rate": 4.920696801161287e-05, "loss": 4.9541, "step": 44000 }, { "epoch": 0.05, "learning_rate": 4.919795628447211e-05, "loss": 4.9423, "step": 44500 }, { "epoch": 0.05, "learning_rate": 4.918894455733135e-05, "loss": 4.8763, "step": 45000 }, { "epoch": 0.05, "learning_rate": 4.9179932830190586e-05, "loss": 4.9015, "step": 45500 }, { "epoch": 0.05, "learning_rate": 4.917092110304982e-05, "loss": 4.9179, "step": 46000 }, { "epoch": 0.05, "learning_rate": 4.916190937590906e-05, "loss": 4.8837, "step": 46500 }, { "epoch": 0.05, "learning_rate": 4.91528976487683e-05, "loss": 4.9141, "step": 47000 }, { "epoch": 0.05, "learning_rate": 4.9143885921627535e-05, "loss": 4.8766, "step": 47500 }, { "epoch": 0.05, "learning_rate": 4.913487419448677e-05, "loss": 4.9088, "step": 48000 }, { "epoch": 0.05, "learning_rate": 4.912586246734601e-05, "loss": 4.9137, "step": 48500 }, { "epoch": 0.05, "learning_rate": 4.911685074020525e-05, "loss": 4.8692, "step": 49000 }, { "epoch": 0.05, "learning_rate": 4.9107839013064485e-05, "loss": 4.8607, "step": 49500 }, { "epoch": 0.05, "learning_rate": 4.909882728592372e-05, "loss": 4.8573, "step": 50000 }, { "epoch": 0.05, "learning_rate": 4.908981555878296e-05, "loss": 4.9472, "step": 50500 }, { "epoch": 0.06, "learning_rate": 4.90808038316422e-05, "loss": 4.9144, "step": 51000 }, { "epoch": 0.06, "learning_rate": 4.907179210450143e-05, "loss": 4.973, "step": 51500 }, { "epoch": 0.06, "learning_rate": 4.9062780377360665e-05, "loss": 4.9413, "step": 52000 }, { "epoch": 0.06, "learning_rate": 4.90537686502199e-05, "loss": 4.972, "step": 52500 }, { "epoch": 0.06, "learning_rate": 4.9044756923079147e-05, "loss": 4.9722, "step": 53000 }, { "epoch": 0.06, "learning_rate": 4.9035745195938384e-05, "loss": 4.9126, "step": 53500 }, { "epoch": 0.06, "learning_rate": 4.902673346879762e-05, "loss": 4.9117, "step": 54000 }, { "epoch": 0.06, "learning_rate": 4.901772174165686e-05, "loss": 4.9233, "step": 54500 }, { "epoch": 0.06, "learning_rate": 4.900871001451609e-05, "loss": 4.9693, "step": 55000 }, { "epoch": 0.06, "learning_rate": 4.899969828737533e-05, "loss": 4.9875, "step": 55500 }, { "epoch": 0.06, "learning_rate": 4.8990686560234564e-05, "loss": 4.9741, "step": 56000 }, { "epoch": 0.06, "learning_rate": 4.89816748330938e-05, "loss": 4.9411, "step": 56500 }, { "epoch": 0.06, "learning_rate": 4.8972663105953046e-05, "loss": 4.9281, "step": 57000 }, { "epoch": 0.06, "learning_rate": 4.896365137881228e-05, "loss": 4.9392, "step": 57500 }, { "epoch": 0.06, "learning_rate": 4.8954639651671514e-05, "loss": 4.9473, "step": 58000 }, { "epoch": 0.06, "learning_rate": 4.894562792453075e-05, "loss": 4.9333, "step": 58500 }, { "epoch": 0.06, "learning_rate": 4.893661619738999e-05, "loss": 4.9547, "step": 59000 }, { "epoch": 0.06, "learning_rate": 4.8927604470249226e-05, "loss": 4.9422, "step": 59500 }, { "epoch": 0.06, "learning_rate": 4.891859274310846e-05, "loss": 4.9182, "step": 60000 }, { "epoch": 0.07, "learning_rate": 4.89095810159677e-05, "loss": 4.9282, "step": 60500 }, { "epoch": 0.07, "learning_rate": 4.890056928882694e-05, "loss": 4.943, "step": 61000 }, { "epoch": 0.07, "learning_rate": 4.8891557561686176e-05, "loss": 4.9436, "step": 61500 }, { "epoch": 0.07, "learning_rate": 4.888254583454541e-05, "loss": 4.9253, "step": 62000 }, { "epoch": 0.07, "learning_rate": 4.887353410740465e-05, "loss": 4.9442, "step": 62500 }, { "epoch": 0.07, "learning_rate": 4.886452238026389e-05, "loss": 4.8888, "step": 63000 }, { "epoch": 0.07, "learning_rate": 4.8855510653123125e-05, "loss": 4.9155, "step": 63500 }, { "epoch": 0.07, "learning_rate": 4.8846498925982356e-05, "loss": 4.9519, "step": 64000 }, { "epoch": 0.07, "learning_rate": 4.88374871988416e-05, "loss": 4.9563, "step": 64500 }, { "epoch": 0.07, "learning_rate": 4.882847547170084e-05, "loss": 4.9553, "step": 65000 }, { "epoch": 0.07, "learning_rate": 4.8819463744560075e-05, "loss": 4.8865, "step": 65500 }, { "epoch": 0.07, "learning_rate": 4.881045201741931e-05, "loss": 4.8987, "step": 66000 }, { "epoch": 0.07, "learning_rate": 4.880144029027855e-05, "loss": 4.9129, "step": 66500 }, { "epoch": 0.07, "learning_rate": 4.879242856313779e-05, "loss": 4.9565, "step": 67000 }, { "epoch": 0.07, "learning_rate": 4.878341683599702e-05, "loss": 4.8831, "step": 67500 }, { "epoch": 0.07, "learning_rate": 4.8774405108856255e-05, "loss": 4.9007, "step": 68000 }, { "epoch": 0.07, "learning_rate": 4.87653933817155e-05, "loss": 4.9337, "step": 68500 }, { "epoch": 0.07, "learning_rate": 4.8756381654574737e-05, "loss": 4.8446, "step": 69000 }, { "epoch": 0.08, "learning_rate": 4.8747369927433974e-05, "loss": 4.9388, "step": 69500 }, { "epoch": 0.08, "learning_rate": 4.873835820029321e-05, "loss": 4.9655, "step": 70000 }, { "epoch": 0.08, "learning_rate": 4.872934647315244e-05, "loss": 4.9309, "step": 70500 }, { "epoch": 0.08, "learning_rate": 4.872033474601168e-05, "loss": 4.9102, "step": 71000 }, { "epoch": 0.08, "learning_rate": 4.871132301887092e-05, "loss": 4.8491, "step": 71500 }, { "epoch": 0.08, "learning_rate": 4.8702311291730154e-05, "loss": 4.895, "step": 72000 }, { "epoch": 0.08, "learning_rate": 4.86932995645894e-05, "loss": 4.9222, "step": 72500 }, { "epoch": 0.08, "learning_rate": 4.8684287837448636e-05, "loss": 4.8966, "step": 73000 }, { "epoch": 0.08, "learning_rate": 4.8675276110307866e-05, "loss": 4.8669, "step": 73500 }, { "epoch": 0.08, "learning_rate": 4.8666264383167104e-05, "loss": 4.8332, "step": 74000 }, { "epoch": 0.08, "learning_rate": 4.865725265602634e-05, "loss": 4.9127, "step": 74500 }, { "epoch": 0.08, "learning_rate": 4.864824092888558e-05, "loss": 4.9251, "step": 75000 }, { "epoch": 0.08, "learning_rate": 4.8639229201744816e-05, "loss": 4.9379, "step": 75500 }, { "epoch": 0.08, "learning_rate": 4.8630217474604053e-05, "loss": 4.8682, "step": 76000 }, { "epoch": 0.08, "learning_rate": 4.862120574746329e-05, "loss": 4.8762, "step": 76500 }, { "epoch": 0.08, "learning_rate": 4.861219402032253e-05, "loss": 4.8544, "step": 77000 }, { "epoch": 0.08, "learning_rate": 4.8603182293181766e-05, "loss": 4.8835, "step": 77500 }, { "epoch": 0.08, "learning_rate": 4.8594170566041e-05, "loss": 4.8346, "step": 78000 }, { "epoch": 0.08, "learning_rate": 4.858515883890024e-05, "loss": 4.9229, "step": 78500 }, { "epoch": 0.09, "learning_rate": 4.857614711175948e-05, "loss": 4.9159, "step": 79000 }, { "epoch": 0.09, "learning_rate": 4.856713538461871e-05, "loss": 4.8566, "step": 79500 }, { "epoch": 0.09, "learning_rate": 4.855812365747795e-05, "loss": 4.8155, "step": 80000 }, { "epoch": 0.09, "learning_rate": 4.854911193033719e-05, "loss": 4.9222, "step": 80500 }, { "epoch": 0.09, "learning_rate": 4.854010020319643e-05, "loss": 4.8545, "step": 81000 }, { "epoch": 0.09, "learning_rate": 4.8531088476055665e-05, "loss": 4.8295, "step": 81500 }, { "epoch": 0.09, "learning_rate": 4.85220767489149e-05, "loss": 4.8223, "step": 82000 }, { "epoch": 0.09, "learning_rate": 4.851306502177414e-05, "loss": 4.8637, "step": 82500 }, { "epoch": 0.09, "learning_rate": 4.850405329463337e-05, "loss": 4.878, "step": 83000 }, { "epoch": 0.09, "learning_rate": 4.849504156749261e-05, "loss": 4.8677, "step": 83500 }, { "epoch": 0.09, "learning_rate": 4.848602984035185e-05, "loss": 4.8636, "step": 84000 }, { "epoch": 0.09, "learning_rate": 4.847701811321109e-05, "loss": 4.8708, "step": 84500 }, { "epoch": 0.09, "learning_rate": 4.846800638607033e-05, "loss": 4.8608, "step": 85000 }, { "epoch": 0.09, "learning_rate": 4.8458994658929564e-05, "loss": 4.8347, "step": 85500 }, { "epoch": 0.09, "learning_rate": 4.8449982931788795e-05, "loss": 4.8385, "step": 86000 }, { "epoch": 0.09, "learning_rate": 4.844097120464803e-05, "loss": 4.8565, "step": 86500 }, { "epoch": 0.09, "learning_rate": 4.843195947750727e-05, "loss": 4.867, "step": 87000 }, { "epoch": 0.09, "learning_rate": 4.842294775036651e-05, "loss": 4.8456, "step": 87500 }, { "epoch": 0.1, "learning_rate": 4.841393602322575e-05, "loss": 4.8739, "step": 88000 }, { "epoch": 0.1, "learning_rate": 4.840492429608499e-05, "loss": 4.8473, "step": 88500 }, { "epoch": 0.1, "learning_rate": 4.839591256894422e-05, "loss": 4.8496, "step": 89000 }, { "epoch": 0.1, "learning_rate": 4.8386900841803456e-05, "loss": 4.8579, "step": 89500 }, { "epoch": 0.1, "learning_rate": 4.8377889114662694e-05, "loss": 4.8543, "step": 90000 }, { "epoch": 0.1, "learning_rate": 4.836887738752193e-05, "loss": 4.8855, "step": 90500 }, { "epoch": 0.1, "learning_rate": 4.835986566038117e-05, "loss": 4.8511, "step": 91000 }, { "epoch": 0.1, "learning_rate": 4.8350853933240406e-05, "loss": 4.8682, "step": 91500 }, { "epoch": 0.1, "learning_rate": 4.8341842206099643e-05, "loss": 4.8074, "step": 92000 }, { "epoch": 0.1, "learning_rate": 4.833283047895888e-05, "loss": 4.8034, "step": 92500 }, { "epoch": 0.1, "learning_rate": 4.832381875181812e-05, "loss": 4.842, "step": 93000 }, { "epoch": 0.1, "learning_rate": 4.8314807024677356e-05, "loss": 4.8146, "step": 93500 }, { "epoch": 0.1, "learning_rate": 4.830579529753659e-05, "loss": 4.8353, "step": 94000 }, { "epoch": 0.1, "learning_rate": 4.829678357039583e-05, "loss": 4.8151, "step": 94500 }, { "epoch": 0.1, "learning_rate": 4.828777184325507e-05, "loss": 4.8127, "step": 95000 }, { "epoch": 0.1, "learning_rate": 4.8278760116114305e-05, "loss": 4.833, "step": 95500 }, { "epoch": 0.1, "learning_rate": 4.826974838897354e-05, "loss": 4.8383, "step": 96000 }, { "epoch": 0.1, "learning_rate": 4.826073666183278e-05, "loss": 4.8441, "step": 96500 }, { "epoch": 0.1, "learning_rate": 4.825172493469202e-05, "loss": 4.8794, "step": 97000 }, { "epoch": 0.11, "learning_rate": 4.8242713207551255e-05, "loss": 4.828, "step": 97500 }, { "epoch": 0.11, "learning_rate": 4.823370148041049e-05, "loss": 4.7572, "step": 98000 }, { "epoch": 0.11, "learning_rate": 4.822468975326972e-05, "loss": 4.7658, "step": 98500 }, { "epoch": 0.11, "learning_rate": 4.821567802612896e-05, "loss": 4.8123, "step": 99000 }, { "epoch": 0.11, "learning_rate": 4.8206666298988204e-05, "loss": 4.8082, "step": 99500 }, { "epoch": 0.11, "learning_rate": 4.819765457184744e-05, "loss": 4.7542, "step": 100000 }, { "epoch": 0.11, "learning_rate": 4.818864284470668e-05, "loss": 4.8264, "step": 100500 }, { "epoch": 0.11, "learning_rate": 4.817963111756592e-05, "loss": 4.7541, "step": 101000 }, { "epoch": 0.11, "learning_rate": 4.817061939042515e-05, "loss": 4.7992, "step": 101500 }, { "epoch": 0.11, "learning_rate": 4.8161607663284385e-05, "loss": 4.8078, "step": 102000 }, { "epoch": 0.11, "learning_rate": 4.815259593614362e-05, "loss": 4.8453, "step": 102500 }, { "epoch": 0.11, "learning_rate": 4.814358420900286e-05, "loss": 4.8276, "step": 103000 }, { "epoch": 0.11, "learning_rate": 4.81345724818621e-05, "loss": 4.7253, "step": 103500 }, { "epoch": 0.11, "learning_rate": 4.812556075472134e-05, "loss": 4.8102, "step": 104000 }, { "epoch": 0.11, "learning_rate": 4.811654902758057e-05, "loss": 4.8006, "step": 104500 }, { "epoch": 0.11, "learning_rate": 4.810753730043981e-05, "loss": 4.7603, "step": 105000 }, { "epoch": 0.11, "learning_rate": 4.8098525573299047e-05, "loss": 4.7124, "step": 105500 }, { "epoch": 0.11, "learning_rate": 4.8089513846158284e-05, "loss": 4.7757, "step": 106000 }, { "epoch": 0.12, "learning_rate": 4.808050211901752e-05, "loss": 4.7593, "step": 106500 }, { "epoch": 0.12, "learning_rate": 4.807149039187676e-05, "loss": 4.8501, "step": 107000 }, { "epoch": 0.12, "learning_rate": 4.8062478664735996e-05, "loss": 4.8105, "step": 107500 }, { "epoch": 0.12, "learning_rate": 4.8053466937595234e-05, "loss": 4.7692, "step": 108000 }, { "epoch": 0.12, "learning_rate": 4.804445521045447e-05, "loss": 4.7855, "step": 108500 }, { "epoch": 0.12, "learning_rate": 4.803544348331371e-05, "loss": 4.8032, "step": 109000 }, { "epoch": 0.12, "learning_rate": 4.8026431756172946e-05, "loss": 4.7814, "step": 109500 }, { "epoch": 0.12, "learning_rate": 4.801742002903218e-05, "loss": 4.8473, "step": 110000 }, { "epoch": 0.12, "learning_rate": 4.800840830189142e-05, "loss": 4.8047, "step": 110500 }, { "epoch": 0.12, "learning_rate": 4.799939657475066e-05, "loss": 4.8326, "step": 111000 }, { "epoch": 0.12, "learning_rate": 4.7990384847609895e-05, "loss": 4.7993, "step": 111500 }, { "epoch": 0.12, "learning_rate": 4.798137312046913e-05, "loss": 4.7892, "step": 112000 }, { "epoch": 0.12, "learning_rate": 4.797236139332837e-05, "loss": 4.751, "step": 112500 }, { "epoch": 0.12, "learning_rate": 4.796334966618761e-05, "loss": 4.7795, "step": 113000 }, { "epoch": 0.12, "learning_rate": 4.7954337939046845e-05, "loss": 4.7684, "step": 113500 }, { "epoch": 0.12, "learning_rate": 4.7945326211906076e-05, "loss": 4.7673, "step": 114000 }, { "epoch": 0.12, "learning_rate": 4.793631448476531e-05, "loss": 4.7614, "step": 114500 }, { "epoch": 0.12, "learning_rate": 4.792730275762455e-05, "loss": 4.7529, "step": 115000 }, { "epoch": 0.12, "learning_rate": 4.7918291030483795e-05, "loss": 4.8215, "step": 115500 }, { "epoch": 0.13, "learning_rate": 4.790927930334303e-05, "loss": 4.7351, "step": 116000 }, { "epoch": 0.13, "learning_rate": 4.790026757620227e-05, "loss": 4.7878, "step": 116500 }, { "epoch": 0.13, "learning_rate": 4.78912558490615e-05, "loss": 4.7618, "step": 117000 }, { "epoch": 0.13, "learning_rate": 4.788224412192074e-05, "loss": 4.846, "step": 117500 }, { "epoch": 0.13, "learning_rate": 4.7873232394779975e-05, "loss": 4.8027, "step": 118000 }, { "epoch": 0.13, "learning_rate": 4.786422066763921e-05, "loss": 4.7415, "step": 118500 }, { "epoch": 0.13, "learning_rate": 4.785520894049845e-05, "loss": 4.7554, "step": 119000 }, { "epoch": 0.13, "learning_rate": 4.7846197213357694e-05, "loss": 4.7287, "step": 119500 }, { "epoch": 0.13, "learning_rate": 4.783718548621693e-05, "loss": 4.81, "step": 120000 }, { "epoch": 0.13, "learning_rate": 4.782817375907616e-05, "loss": 4.7374, "step": 120500 }, { "epoch": 0.13, "learning_rate": 4.78191620319354e-05, "loss": 4.7541, "step": 121000 }, { "epoch": 0.13, "learning_rate": 4.7810150304794637e-05, "loss": 4.7704, "step": 121500 }, { "epoch": 0.13, "learning_rate": 4.7801138577653874e-05, "loss": 4.7743, "step": 122000 }, { "epoch": 0.13, "learning_rate": 4.779212685051311e-05, "loss": 4.7569, "step": 122500 }, { "epoch": 0.13, "learning_rate": 4.778311512337235e-05, "loss": 4.69, "step": 123000 }, { "epoch": 0.13, "learning_rate": 4.7774103396231586e-05, "loss": 4.8213, "step": 123500 }, { "epoch": 0.13, "learning_rate": 4.7765091669090824e-05, "loss": 4.7616, "step": 124000 }, { "epoch": 0.13, "learning_rate": 4.775607994195006e-05, "loss": 4.7587, "step": 124500 }, { "epoch": 0.14, "learning_rate": 4.77470682148093e-05, "loss": 4.7599, "step": 125000 }, { "epoch": 0.14, "learning_rate": 4.7738056487668536e-05, "loss": 4.692, "step": 125500 }, { "epoch": 0.14, "learning_rate": 4.772904476052777e-05, "loss": 4.8163, "step": 126000 }, { "epoch": 0.14, "learning_rate": 4.7720033033387004e-05, "loss": 4.7533, "step": 126500 }, { "epoch": 0.14, "learning_rate": 4.771102130624625e-05, "loss": 4.7933, "step": 127000 }, { "epoch": 0.14, "learning_rate": 4.7702009579105485e-05, "loss": 4.7659, "step": 127500 }, { "epoch": 0.14, "learning_rate": 4.769299785196472e-05, "loss": 4.7502, "step": 128000 }, { "epoch": 0.14, "learning_rate": 4.768398612482396e-05, "loss": 4.7412, "step": 128500 }, { "epoch": 0.14, "learning_rate": 4.76749743976832e-05, "loss": 4.7917, "step": 129000 }, { "epoch": 0.14, "learning_rate": 4.766596267054243e-05, "loss": 4.7984, "step": 129500 }, { "epoch": 0.14, "learning_rate": 4.7656950943401666e-05, "loss": 4.7151, "step": 130000 }, { "epoch": 0.14, "learning_rate": 4.76479392162609e-05, "loss": 4.7101, "step": 130500 }, { "epoch": 0.14, "learning_rate": 4.763892748912015e-05, "loss": 4.7416, "step": 131000 }, { "epoch": 0.14, "learning_rate": 4.7629915761979385e-05, "loss": 4.7401, "step": 131500 }, { "epoch": 0.14, "learning_rate": 4.762090403483862e-05, "loss": 4.7234, "step": 132000 }, { "epoch": 0.14, "learning_rate": 4.761189230769785e-05, "loss": 4.7334, "step": 132500 }, { "epoch": 0.14, "learning_rate": 4.760288058055709e-05, "loss": 4.7305, "step": 133000 }, { "epoch": 0.14, "learning_rate": 4.759386885341633e-05, "loss": 4.7889, "step": 133500 }, { "epoch": 0.14, "learning_rate": 4.7584857126275565e-05, "loss": 4.7615, "step": 134000 }, { "epoch": 0.15, "learning_rate": 4.75758453991348e-05, "loss": 4.6827, "step": 134500 }, { "epoch": 0.15, "learning_rate": 4.7566833671994046e-05, "loss": 4.7555, "step": 135000 }, { "epoch": 0.15, "learning_rate": 4.7557821944853284e-05, "loss": 4.7644, "step": 135500 }, { "epoch": 0.15, "learning_rate": 4.7548810217712514e-05, "loss": 4.7292, "step": 136000 }, { "epoch": 0.15, "learning_rate": 4.753979849057175e-05, "loss": 4.7221, "step": 136500 }, { "epoch": 0.15, "learning_rate": 4.753078676343099e-05, "loss": 4.7045, "step": 137000 }, { "epoch": 0.15, "learning_rate": 4.752177503629023e-05, "loss": 4.6832, "step": 137500 }, { "epoch": 0.15, "learning_rate": 4.7512763309149464e-05, "loss": 4.7221, "step": 138000 }, { "epoch": 0.15, "learning_rate": 4.75037515820087e-05, "loss": 4.6595, "step": 138500 }, { "epoch": 0.15, "learning_rate": 4.749473985486794e-05, "loss": 4.7322, "step": 139000 }, { "epoch": 0.15, "learning_rate": 4.7485728127727176e-05, "loss": 4.7332, "step": 139500 }, { "epoch": 0.15, "learning_rate": 4.7476716400586414e-05, "loss": 4.7665, "step": 140000 }, { "epoch": 0.15, "learning_rate": 4.746770467344565e-05, "loss": 4.6936, "step": 140500 }, { "epoch": 0.15, "learning_rate": 4.745869294630489e-05, "loss": 4.7322, "step": 141000 }, { "epoch": 0.15, "learning_rate": 4.7449681219164126e-05, "loss": 4.7406, "step": 141500 }, { "epoch": 0.15, "learning_rate": 4.7440669492023356e-05, "loss": 4.757, "step": 142000 }, { "epoch": 0.15, "learning_rate": 4.74316577648826e-05, "loss": 4.7518, "step": 142500 }, { "epoch": 0.15, "learning_rate": 4.742264603774184e-05, "loss": 4.6843, "step": 143000 }, { "epoch": 0.16, "learning_rate": 4.7413634310601075e-05, "loss": 4.6937, "step": 143500 }, { "epoch": 0.16, "learning_rate": 4.740462258346031e-05, "loss": 4.7167, "step": 144000 }, { "epoch": 0.16, "learning_rate": 4.739561085631955e-05, "loss": 4.7101, "step": 144500 }, { "epoch": 0.16, "learning_rate": 4.738659912917878e-05, "loss": 4.7401, "step": 145000 }, { "epoch": 0.16, "learning_rate": 4.737758740203802e-05, "loss": 4.7357, "step": 145500 }, { "epoch": 0.16, "learning_rate": 4.7368575674897256e-05, "loss": 4.7034, "step": 146000 }, { "epoch": 0.16, "learning_rate": 4.73595639477565e-05, "loss": 4.6982, "step": 146500 }, { "epoch": 0.16, "learning_rate": 4.735055222061574e-05, "loss": 4.729, "step": 147000 }, { "epoch": 0.16, "learning_rate": 4.7341540493474975e-05, "loss": 4.7402, "step": 147500 }, { "epoch": 0.16, "learning_rate": 4.733252876633421e-05, "loss": 4.7249, "step": 148000 }, { "epoch": 0.16, "learning_rate": 4.732351703919344e-05, "loss": 4.6795, "step": 148500 }, { "epoch": 0.16, "learning_rate": 4.731450531205268e-05, "loss": 4.7496, "step": 149000 }, { "epoch": 0.16, "learning_rate": 4.730549358491192e-05, "loss": 4.7258, "step": 149500 }, { "epoch": 0.16, "learning_rate": 4.7296481857771155e-05, "loss": 4.7273, "step": 150000 }, { "epoch": 0.16, "learning_rate": 4.72874701306304e-05, "loss": 4.6983, "step": 150500 }, { "epoch": 0.16, "learning_rate": 4.7278458403489636e-05, "loss": 4.6593, "step": 151000 }, { "epoch": 0.16, "learning_rate": 4.726944667634887e-05, "loss": 4.6931, "step": 151500 }, { "epoch": 0.16, "learning_rate": 4.7260434949208104e-05, "loss": 4.6728, "step": 152000 }, { "epoch": 0.16, "learning_rate": 4.725142322206734e-05, "loss": 4.6942, "step": 152500 }, { "epoch": 0.17, "learning_rate": 4.724241149492658e-05, "loss": 4.655, "step": 153000 }, { "epoch": 0.17, "learning_rate": 4.723339976778582e-05, "loss": 4.6958, "step": 153500 }, { "epoch": 0.17, "learning_rate": 4.7224388040645054e-05, "loss": 4.727, "step": 154000 }, { "epoch": 0.17, "learning_rate": 4.721537631350429e-05, "loss": 4.7039, "step": 154500 }, { "epoch": 0.17, "learning_rate": 4.720636458636353e-05, "loss": 4.6621, "step": 155000 }, { "epoch": 0.17, "learning_rate": 4.7197352859222766e-05, "loss": 4.7307, "step": 155500 }, { "epoch": 0.17, "learning_rate": 4.7188341132082004e-05, "loss": 4.6781, "step": 156000 }, { "epoch": 0.17, "learning_rate": 4.717932940494124e-05, "loss": 4.6862, "step": 156500 }, { "epoch": 0.17, "learning_rate": 4.717031767780048e-05, "loss": 4.6321, "step": 157000 }, { "epoch": 0.17, "learning_rate": 4.716130595065971e-05, "loss": 4.6918, "step": 157500 }, { "epoch": 0.17, "learning_rate": 4.715229422351895e-05, "loss": 4.7254, "step": 158000 }, { "epoch": 0.17, "learning_rate": 4.714328249637819e-05, "loss": 4.6808, "step": 158500 }, { "epoch": 0.17, "learning_rate": 4.713427076923743e-05, "loss": 4.6929, "step": 159000 }, { "epoch": 0.17, "learning_rate": 4.7125259042096665e-05, "loss": 4.6183, "step": 159500 }, { "epoch": 0.17, "learning_rate": 4.71162473149559e-05, "loss": 4.6005, "step": 160000 }, { "epoch": 0.17, "learning_rate": 4.710723558781514e-05, "loss": 4.7159, "step": 160500 }, { "epoch": 0.17, "learning_rate": 4.709822386067437e-05, "loss": 4.6412, "step": 161000 }, { "epoch": 0.17, "learning_rate": 4.708921213353361e-05, "loss": 4.6927, "step": 161500 }, { "epoch": 0.18, "learning_rate": 4.708020040639285e-05, "loss": 4.7037, "step": 162000 }, { "epoch": 0.18, "learning_rate": 4.707118867925209e-05, "loss": 4.7063, "step": 162500 }, { "epoch": 0.18, "learning_rate": 4.706217695211133e-05, "loss": 4.739, "step": 163000 }, { "epoch": 0.18, "learning_rate": 4.7053165224970565e-05, "loss": 4.6985, "step": 163500 }, { "epoch": 0.18, "learning_rate": 4.7044153497829795e-05, "loss": 4.6828, "step": 164000 }, { "epoch": 0.18, "learning_rate": 4.703514177068903e-05, "loss": 4.7187, "step": 164500 }, { "epoch": 0.18, "learning_rate": 4.702613004354827e-05, "loss": 4.7055, "step": 165000 }, { "epoch": 0.18, "learning_rate": 4.701711831640751e-05, "loss": 4.6414, "step": 165500 }, { "epoch": 0.18, "learning_rate": 4.7008106589266745e-05, "loss": 4.6793, "step": 166000 }, { "epoch": 0.18, "learning_rate": 4.699909486212599e-05, "loss": 4.7155, "step": 166500 }, { "epoch": 0.18, "learning_rate": 4.699008313498522e-05, "loss": 4.6599, "step": 167000 }, { "epoch": 0.18, "learning_rate": 4.698107140784446e-05, "loss": 4.6949, "step": 167500 }, { "epoch": 0.18, "learning_rate": 4.6972059680703695e-05, "loss": 4.6781, "step": 168000 }, { "epoch": 0.18, "learning_rate": 4.696304795356293e-05, "loss": 4.6621, "step": 168500 }, { "epoch": 0.18, "learning_rate": 4.695403622642217e-05, "loss": 4.675, "step": 169000 }, { "epoch": 0.18, "learning_rate": 4.694502449928141e-05, "loss": 4.6254, "step": 169500 }, { "epoch": 0.18, "learning_rate": 4.6936012772140644e-05, "loss": 4.7044, "step": 170000 }, { "epoch": 0.18, "learning_rate": 4.692700104499988e-05, "loss": 4.6353, "step": 170500 }, { "epoch": 0.18, "learning_rate": 4.691798931785912e-05, "loss": 4.6393, "step": 171000 }, { "epoch": 0.19, "learning_rate": 4.6908977590718356e-05, "loss": 4.6692, "step": 171500 }, { "epoch": 0.19, "learning_rate": 4.6899965863577594e-05, "loss": 4.6501, "step": 172000 }, { "epoch": 0.19, "learning_rate": 4.689095413643683e-05, "loss": 4.6289, "step": 172500 }, { "epoch": 0.19, "learning_rate": 4.688194240929607e-05, "loss": 4.6656, "step": 173000 }, { "epoch": 0.19, "learning_rate": 4.6872930682155306e-05, "loss": 4.6542, "step": 173500 }, { "epoch": 0.19, "learning_rate": 4.686391895501454e-05, "loss": 4.678, "step": 174000 }, { "epoch": 0.19, "learning_rate": 4.685490722787378e-05, "loss": 4.648, "step": 174500 }, { "epoch": 0.19, "learning_rate": 4.684589550073302e-05, "loss": 4.6518, "step": 175000 }, { "epoch": 0.19, "learning_rate": 4.6836883773592256e-05, "loss": 4.7169, "step": 175500 }, { "epoch": 0.19, "learning_rate": 4.682787204645149e-05, "loss": 4.6243, "step": 176000 }, { "epoch": 0.19, "learning_rate": 4.6818860319310724e-05, "loss": 4.6988, "step": 176500 }, { "epoch": 0.19, "learning_rate": 4.680984859216996e-05, "loss": 4.5944, "step": 177000 }, { "epoch": 0.19, "learning_rate": 4.68008368650292e-05, "loss": 4.7104, "step": 177500 }, { "epoch": 0.19, "learning_rate": 4.679182513788844e-05, "loss": 4.6633, "step": 178000 }, { "epoch": 0.19, "learning_rate": 4.678281341074768e-05, "loss": 4.6841, "step": 178500 }, { "epoch": 0.19, "learning_rate": 4.677380168360692e-05, "loss": 4.6535, "step": 179000 }, { "epoch": 0.19, "learning_rate": 4.676478995646615e-05, "loss": 4.7139, "step": 179500 }, { "epoch": 0.19, "learning_rate": 4.6755778229325385e-05, "loss": 4.6433, "step": 180000 }, { "epoch": 0.2, "learning_rate": 4.674676650218462e-05, "loss": 4.7148, "step": 180500 }, { "epoch": 0.2, "learning_rate": 4.673775477504386e-05, "loss": 4.6483, "step": 181000 }, { "epoch": 0.2, "learning_rate": 4.67287430479031e-05, "loss": 4.6044, "step": 181500 }, { "epoch": 0.2, "learning_rate": 4.671973132076234e-05, "loss": 4.6271, "step": 182000 }, { "epoch": 0.2, "learning_rate": 4.671071959362157e-05, "loss": 4.6416, "step": 182500 }, { "epoch": 0.2, "learning_rate": 4.670170786648081e-05, "loss": 4.6732, "step": 183000 }, { "epoch": 0.2, "learning_rate": 4.669269613934005e-05, "loss": 4.6461, "step": 183500 }, { "epoch": 0.2, "learning_rate": 4.6683684412199285e-05, "loss": 4.6583, "step": 184000 }, { "epoch": 0.2, "learning_rate": 4.667467268505852e-05, "loss": 4.6572, "step": 184500 }, { "epoch": 0.2, "learning_rate": 4.666566095791776e-05, "loss": 4.6394, "step": 185000 }, { "epoch": 0.2, "learning_rate": 4.6656649230777e-05, "loss": 4.676, "step": 185500 }, { "epoch": 0.2, "learning_rate": 4.6647637503636234e-05, "loss": 4.6573, "step": 186000 }, { "epoch": 0.2, "learning_rate": 4.663862577649547e-05, "loss": 4.6528, "step": 186500 }, { "epoch": 0.2, "learning_rate": 4.662961404935471e-05, "loss": 4.658, "step": 187000 }, { "epoch": 0.2, "learning_rate": 4.6620602322213946e-05, "loss": 4.6363, "step": 187500 }, { "epoch": 0.2, "learning_rate": 4.6611590595073184e-05, "loss": 4.6629, "step": 188000 }, { "epoch": 0.2, "learning_rate": 4.660257886793242e-05, "loss": 4.6319, "step": 188500 }, { "epoch": 0.2, "learning_rate": 4.659356714079166e-05, "loss": 4.6833, "step": 189000 }, { "epoch": 0.2, "learning_rate": 4.6584555413650896e-05, "loss": 4.586, "step": 189500 }, { "epoch": 0.21, "learning_rate": 4.657554368651013e-05, "loss": 4.6757, "step": 190000 }, { "epoch": 0.21, "learning_rate": 4.656653195936937e-05, "loss": 4.6509, "step": 190500 }, { "epoch": 0.21, "learning_rate": 4.655752023222861e-05, "loss": 4.6792, "step": 191000 }, { "epoch": 0.21, "learning_rate": 4.6548508505087846e-05, "loss": 4.6738, "step": 191500 }, { "epoch": 0.21, "learning_rate": 4.6539496777947076e-05, "loss": 4.6407, "step": 192000 }, { "epoch": 0.21, "learning_rate": 4.6530485050806314e-05, "loss": 4.6581, "step": 192500 }, { "epoch": 0.21, "learning_rate": 4.652147332366555e-05, "loss": 4.688, "step": 193000 }, { "epoch": 0.21, "learning_rate": 4.6512461596524795e-05, "loss": 4.6858, "step": 193500 }, { "epoch": 0.21, "learning_rate": 4.650344986938403e-05, "loss": 4.6618, "step": 194000 }, { "epoch": 0.21, "learning_rate": 4.649443814224327e-05, "loss": 4.6565, "step": 194500 }, { "epoch": 0.21, "learning_rate": 4.64854264151025e-05, "loss": 4.6477, "step": 195000 }, { "epoch": 0.21, "learning_rate": 4.647641468796174e-05, "loss": 4.6347, "step": 195500 }, { "epoch": 0.21, "learning_rate": 4.6467402960820975e-05, "loss": 4.6384, "step": 196000 }, { "epoch": 0.21, "learning_rate": 4.645839123368021e-05, "loss": 4.6041, "step": 196500 }, { "epoch": 0.21, "learning_rate": 4.644937950653945e-05, "loss": 4.6302, "step": 197000 }, { "epoch": 0.21, "learning_rate": 4.6440367779398694e-05, "loss": 4.582, "step": 197500 }, { "epoch": 0.21, "learning_rate": 4.6431356052257925e-05, "loss": 4.6465, "step": 198000 }, { "epoch": 0.21, "learning_rate": 4.642234432511716e-05, "loss": 4.6427, "step": 198500 }, { "epoch": 0.22, "learning_rate": 4.64133325979764e-05, "loss": 4.6421, "step": 199000 }, { "epoch": 0.22, "learning_rate": 4.640432087083564e-05, "loss": 4.6108, "step": 199500 }, { "epoch": 0.22, "learning_rate": 4.6395309143694875e-05, "loss": 4.6228, "step": 200000 }, { "epoch": 0.22, "learning_rate": 4.638629741655411e-05, "loss": 4.5645, "step": 200500 }, { "epoch": 0.22, "learning_rate": 4.637728568941335e-05, "loss": 4.5875, "step": 201000 }, { "epoch": 0.22, "learning_rate": 4.636827396227259e-05, "loss": 4.6283, "step": 201500 }, { "epoch": 0.22, "learning_rate": 4.6359262235131824e-05, "loss": 4.6218, "step": 202000 }, { "epoch": 0.22, "learning_rate": 4.635025050799106e-05, "loss": 4.6801, "step": 202500 }, { "epoch": 0.22, "learning_rate": 4.63412387808503e-05, "loss": 4.6695, "step": 203000 }, { "epoch": 0.22, "learning_rate": 4.6332227053709536e-05, "loss": 4.684, "step": 203500 }, { "epoch": 0.22, "learning_rate": 4.6323215326568774e-05, "loss": 4.5908, "step": 204000 }, { "epoch": 0.22, "learning_rate": 4.6314203599428004e-05, "loss": 4.6085, "step": 204500 }, { "epoch": 0.22, "learning_rate": 4.630519187228725e-05, "loss": 4.6316, "step": 205000 }, { "epoch": 0.22, "learning_rate": 4.6296180145146486e-05, "loss": 4.6607, "step": 205500 }, { "epoch": 0.22, "learning_rate": 4.6287168418005723e-05, "loss": 4.6351, "step": 206000 }, { "epoch": 0.22, "learning_rate": 4.627815669086496e-05, "loss": 4.6443, "step": 206500 }, { "epoch": 0.22, "learning_rate": 4.62691449637242e-05, "loss": 4.6842, "step": 207000 }, { "epoch": 0.22, "learning_rate": 4.626013323658343e-05, "loss": 4.6173, "step": 207500 }, { "epoch": 0.22, "learning_rate": 4.6251121509442666e-05, "loss": 4.593, "step": 208000 }, { "epoch": 0.23, "learning_rate": 4.6242109782301904e-05, "loss": 4.606, "step": 208500 }, { "epoch": 0.23, "learning_rate": 4.623309805516115e-05, "loss": 4.6188, "step": 209000 }, { "epoch": 0.23, "learning_rate": 4.6224086328020385e-05, "loss": 4.6307, "step": 209500 }, { "epoch": 0.23, "learning_rate": 4.621507460087962e-05, "loss": 4.5919, "step": 210000 }, { "epoch": 0.23, "learning_rate": 4.620606287373885e-05, "loss": 4.6507, "step": 210500 }, { "epoch": 0.23, "learning_rate": 4.619705114659809e-05, "loss": 4.6382, "step": 211000 }, { "epoch": 0.23, "learning_rate": 4.618803941945733e-05, "loss": 4.5784, "step": 211500 }, { "epoch": 0.23, "learning_rate": 4.6179027692316565e-05, "loss": 4.5813, "step": 212000 }, { "epoch": 0.23, "learning_rate": 4.61700159651758e-05, "loss": 4.6059, "step": 212500 }, { "epoch": 0.23, "learning_rate": 4.616100423803505e-05, "loss": 4.5996, "step": 213000 }, { "epoch": 0.23, "learning_rate": 4.6151992510894284e-05, "loss": 4.6524, "step": 213500 }, { "epoch": 0.23, "learning_rate": 4.6142980783753515e-05, "loss": 4.6452, "step": 214000 }, { "epoch": 0.23, "learning_rate": 4.613396905661275e-05, "loss": 4.6752, "step": 214500 }, { "epoch": 0.23, "learning_rate": 4.612495732947199e-05, "loss": 4.5912, "step": 215000 }, { "epoch": 0.23, "learning_rate": 4.611594560233123e-05, "loss": 4.6646, "step": 215500 }, { "epoch": 0.23, "learning_rate": 4.6106933875190465e-05, "loss": 4.6234, "step": 216000 }, { "epoch": 0.23, "learning_rate": 4.60979221480497e-05, "loss": 4.6457, "step": 216500 }, { "epoch": 0.23, "learning_rate": 4.608891042090894e-05, "loss": 4.6285, "step": 217000 }, { "epoch": 0.24, "learning_rate": 4.607989869376818e-05, "loss": 4.6047, "step": 217500 }, { "epoch": 0.24, "learning_rate": 4.6070886966627414e-05, "loss": 4.5877, "step": 218000 }, { "epoch": 0.24, "learning_rate": 4.606187523948665e-05, "loss": 4.6101, "step": 218500 }, { "epoch": 0.24, "learning_rate": 4.605286351234589e-05, "loss": 4.6867, "step": 219000 }, { "epoch": 0.24, "learning_rate": 4.6043851785205126e-05, "loss": 4.6508, "step": 219500 }, { "epoch": 0.24, "learning_rate": 4.603484005806436e-05, "loss": 4.6099, "step": 220000 }, { "epoch": 0.24, "learning_rate": 4.60258283309236e-05, "loss": 4.6508, "step": 220500 }, { "epoch": 0.24, "learning_rate": 4.601681660378284e-05, "loss": 4.6105, "step": 221000 }, { "epoch": 0.24, "learning_rate": 4.6007804876642076e-05, "loss": 4.6001, "step": 221500 }, { "epoch": 0.24, "learning_rate": 4.5998793149501313e-05, "loss": 4.6344, "step": 222000 }, { "epoch": 0.24, "learning_rate": 4.598978142236055e-05, "loss": 4.585, "step": 222500 }, { "epoch": 0.24, "learning_rate": 4.598076969521978e-05, "loss": 4.5558, "step": 223000 }, { "epoch": 0.24, "learning_rate": 4.597175796807902e-05, "loss": 4.5825, "step": 223500 }, { "epoch": 0.24, "learning_rate": 4.5962746240938256e-05, "loss": 4.5569, "step": 224000 }, { "epoch": 0.24, "learning_rate": 4.59537345137975e-05, "loss": 4.5647, "step": 224500 }, { "epoch": 0.24, "learning_rate": 4.594472278665674e-05, "loss": 4.5887, "step": 225000 }, { "epoch": 0.24, "learning_rate": 4.5935711059515975e-05, "loss": 4.5825, "step": 225500 }, { "epoch": 0.24, "learning_rate": 4.5926699332375206e-05, "loss": 4.5739, "step": 226000 }, { "epoch": 0.24, "learning_rate": 4.591768760523444e-05, "loss": 4.5726, "step": 226500 }, { "epoch": 0.25, "learning_rate": 4.590867587809368e-05, "loss": 4.6447, "step": 227000 }, { "epoch": 0.25, "learning_rate": 4.589966415095292e-05, "loss": 4.5851, "step": 227500 }, { "epoch": 0.25, "learning_rate": 4.5890652423812155e-05, "loss": 4.5571, "step": 228000 }, { "epoch": 0.25, "learning_rate": 4.58816406966714e-05, "loss": 4.5877, "step": 228500 }, { "epoch": 0.25, "learning_rate": 4.587262896953064e-05, "loss": 4.5896, "step": 229000 }, { "epoch": 0.25, "learning_rate": 4.586361724238987e-05, "loss": 4.591, "step": 229500 }, { "epoch": 0.25, "learning_rate": 4.5854605515249105e-05, "loss": 4.5587, "step": 230000 }, { "epoch": 0.25, "learning_rate": 4.584559378810834e-05, "loss": 4.5871, "step": 230500 }, { "epoch": 0.25, "learning_rate": 4.583658206096758e-05, "loss": 4.6129, "step": 231000 }, { "epoch": 0.25, "learning_rate": 4.582757033382682e-05, "loss": 4.5838, "step": 231500 }, { "epoch": 0.25, "learning_rate": 4.5818558606686055e-05, "loss": 4.6555, "step": 232000 }, { "epoch": 0.25, "learning_rate": 4.580954687954529e-05, "loss": 4.5784, "step": 232500 }, { "epoch": 0.25, "learning_rate": 4.580053515240453e-05, "loss": 4.5853, "step": 233000 }, { "epoch": 0.25, "learning_rate": 4.579152342526377e-05, "loss": 4.5536, "step": 233500 }, { "epoch": 0.25, "learning_rate": 4.5782511698123004e-05, "loss": 4.6067, "step": 234000 }, { "epoch": 0.25, "learning_rate": 4.577349997098224e-05, "loss": 4.6091, "step": 234500 }, { "epoch": 0.25, "learning_rate": 4.576448824384148e-05, "loss": 4.5912, "step": 235000 }, { "epoch": 0.25, "learning_rate": 4.575547651670071e-05, "loss": 4.5887, "step": 235500 }, { "epoch": 0.26, "learning_rate": 4.5746464789559954e-05, "loss": 4.5748, "step": 236000 }, { "epoch": 0.26, "learning_rate": 4.573745306241919e-05, "loss": 4.537, "step": 236500 }, { "epoch": 0.26, "learning_rate": 4.572844133527843e-05, "loss": 4.518, "step": 237000 }, { "epoch": 0.26, "learning_rate": 4.5719429608137666e-05, "loss": 4.5982, "step": 237500 }, { "epoch": 0.26, "learning_rate": 4.5710417880996904e-05, "loss": 4.5996, "step": 238000 }, { "epoch": 0.26, "learning_rate": 4.5701406153856134e-05, "loss": 4.6103, "step": 238500 }, { "epoch": 0.26, "learning_rate": 4.569239442671537e-05, "loss": 4.5725, "step": 239000 }, { "epoch": 0.26, "learning_rate": 4.568338269957461e-05, "loss": 4.6039, "step": 239500 }, { "epoch": 0.26, "learning_rate": 4.567437097243385e-05, "loss": 4.5271, "step": 240000 }, { "epoch": 0.26, "learning_rate": 4.566535924529309e-05, "loss": 4.6387, "step": 240500 }, { "epoch": 0.26, "learning_rate": 4.565634751815233e-05, "loss": 4.5238, "step": 241000 }, { "epoch": 0.26, "learning_rate": 4.5647335791011565e-05, "loss": 4.5608, "step": 241500 }, { "epoch": 0.26, "learning_rate": 4.5638324063870796e-05, "loss": 4.582, "step": 242000 }, { "epoch": 0.26, "learning_rate": 4.562931233673003e-05, "loss": 4.5491, "step": 242500 }, { "epoch": 0.26, "learning_rate": 4.562030060958927e-05, "loss": 4.5778, "step": 243000 }, { "epoch": 0.26, "learning_rate": 4.561128888244851e-05, "loss": 4.6373, "step": 243500 }, { "epoch": 0.26, "learning_rate": 4.5602277155307746e-05, "loss": 4.6209, "step": 244000 }, { "epoch": 0.26, "learning_rate": 4.559326542816699e-05, "loss": 4.5673, "step": 244500 }, { "epoch": 0.26, "learning_rate": 4.558425370102622e-05, "loss": 4.5685, "step": 245000 }, { "epoch": 0.27, "learning_rate": 4.557524197388546e-05, "loss": 4.5357, "step": 245500 }, { "epoch": 0.27, "learning_rate": 4.5566230246744695e-05, "loss": 4.576, "step": 246000 }, { "epoch": 0.27, "learning_rate": 4.555721851960393e-05, "loss": 4.602, "step": 246500 }, { "epoch": 0.27, "learning_rate": 4.554820679246317e-05, "loss": 4.4973, "step": 247000 }, { "epoch": 0.27, "learning_rate": 4.553919506532241e-05, "loss": 4.5782, "step": 247500 }, { "epoch": 0.27, "learning_rate": 4.5530183338181645e-05, "loss": 4.5825, "step": 248000 }, { "epoch": 0.27, "learning_rate": 4.552117161104088e-05, "loss": 4.5145, "step": 248500 }, { "epoch": 0.27, "learning_rate": 4.551215988390012e-05, "loss": 4.5698, "step": 249000 }, { "epoch": 0.27, "learning_rate": 4.550314815675936e-05, "loss": 4.5806, "step": 249500 }, { "epoch": 0.27, "learning_rate": 4.5494136429618594e-05, "loss": 4.5052, "step": 250000 }, { "epoch": 0.27, "learning_rate": 4.548512470247783e-05, "loss": 4.5976, "step": 250500 }, { "epoch": 0.27, "learning_rate": 4.547611297533706e-05, "loss": 4.5667, "step": 251000 }, { "epoch": 0.27, "learning_rate": 4.5467101248196307e-05, "loss": 4.5431, "step": 251500 }, { "epoch": 0.27, "learning_rate": 4.5458089521055544e-05, "loss": 4.5659, "step": 252000 }, { "epoch": 0.27, "learning_rate": 4.544907779391478e-05, "loss": 4.5484, "step": 252500 }, { "epoch": 0.27, "learning_rate": 4.544006606677402e-05, "loss": 4.5668, "step": 253000 }, { "epoch": 0.27, "learning_rate": 4.5431054339633256e-05, "loss": 4.5605, "step": 253500 }, { "epoch": 0.27, "learning_rate": 4.5422042612492494e-05, "loss": 4.5686, "step": 254000 }, { "epoch": 0.28, "learning_rate": 4.5413030885351724e-05, "loss": 4.5157, "step": 254500 }, { "epoch": 0.28, "learning_rate": 4.540401915821096e-05, "loss": 4.5193, "step": 255000 }, { "epoch": 0.28, "learning_rate": 4.53950074310702e-05, "loss": 4.5781, "step": 255500 }, { "epoch": 0.28, "learning_rate": 4.538599570392944e-05, "loss": 4.5719, "step": 256000 }, { "epoch": 0.28, "learning_rate": 4.537698397678868e-05, "loss": 4.5509, "step": 256500 }, { "epoch": 0.28, "learning_rate": 4.536797224964792e-05, "loss": 4.5617, "step": 257000 }, { "epoch": 0.28, "learning_rate": 4.535896052250715e-05, "loss": 4.5149, "step": 257500 }, { "epoch": 0.28, "learning_rate": 4.5349948795366386e-05, "loss": 4.5628, "step": 258000 }, { "epoch": 0.28, "learning_rate": 4.534093706822562e-05, "loss": 4.5809, "step": 258500 }, { "epoch": 0.28, "learning_rate": 4.533192534108486e-05, "loss": 4.525, "step": 259000 }, { "epoch": 0.28, "learning_rate": 4.53229136139441e-05, "loss": 4.5036, "step": 259500 }, { "epoch": 0.28, "learning_rate": 4.531390188680334e-05, "loss": 4.5399, "step": 260000 }, { "epoch": 0.28, "learning_rate": 4.530489015966257e-05, "loss": 4.5939, "step": 260500 }, { "epoch": 0.28, "learning_rate": 4.529587843252181e-05, "loss": 4.605, "step": 261000 }, { "epoch": 0.28, "learning_rate": 4.528686670538105e-05, "loss": 4.4758, "step": 261500 }, { "epoch": 0.28, "learning_rate": 4.5277854978240285e-05, "loss": 4.5757, "step": 262000 }, { "epoch": 0.28, "learning_rate": 4.526884325109952e-05, "loss": 4.5944, "step": 262500 }, { "epoch": 0.28, "learning_rate": 4.525983152395876e-05, "loss": 4.5485, "step": 263000 }, { "epoch": 0.28, "learning_rate": 4.5250819796818e-05, "loss": 4.6034, "step": 263500 }, { "epoch": 0.29, "learning_rate": 4.5241808069677235e-05, "loss": 4.5887, "step": 264000 }, { "epoch": 0.29, "learning_rate": 4.523279634253647e-05, "loss": 4.5265, "step": 264500 }, { "epoch": 0.29, "learning_rate": 4.522378461539571e-05, "loss": 4.5177, "step": 265000 }, { "epoch": 0.29, "learning_rate": 4.521477288825495e-05, "loss": 4.6046, "step": 265500 }, { "epoch": 0.29, "learning_rate": 4.5205761161114184e-05, "loss": 4.5481, "step": 266000 }, { "epoch": 0.29, "learning_rate": 4.519674943397342e-05, "loss": 4.5171, "step": 266500 }, { "epoch": 0.29, "learning_rate": 4.518773770683265e-05, "loss": 4.522, "step": 267000 }, { "epoch": 0.29, "learning_rate": 4.5178725979691897e-05, "loss": 4.5399, "step": 267500 }, { "epoch": 0.29, "learning_rate": 4.5169714252551134e-05, "loss": 4.5763, "step": 268000 }, { "epoch": 0.29, "learning_rate": 4.516070252541037e-05, "loss": 4.5583, "step": 268500 }, { "epoch": 0.29, "learning_rate": 4.515169079826961e-05, "loss": 4.569, "step": 269000 }, { "epoch": 0.29, "learning_rate": 4.5142679071128846e-05, "loss": 4.5812, "step": 269500 }, { "epoch": 0.29, "learning_rate": 4.513366734398808e-05, "loss": 4.5136, "step": 270000 }, { "epoch": 0.29, "learning_rate": 4.5124655616847314e-05, "loss": 4.525, "step": 270500 }, { "epoch": 0.29, "learning_rate": 4.511564388970655e-05, "loss": 4.4892, "step": 271000 }, { "epoch": 0.29, "learning_rate": 4.5106632162565796e-05, "loss": 4.4955, "step": 271500 }, { "epoch": 0.29, "learning_rate": 4.509762043542503e-05, "loss": 4.5783, "step": 272000 }, { "epoch": 0.29, "learning_rate": 4.508860870828427e-05, "loss": 4.5685, "step": 272500 }, { "epoch": 0.3, "learning_rate": 4.50795969811435e-05, "loss": 4.5577, "step": 273000 }, { "epoch": 0.3, "learning_rate": 4.507058525400274e-05, "loss": 4.6029, "step": 273500 }, { "epoch": 0.3, "learning_rate": 4.5061573526861976e-05, "loss": 4.5451, "step": 274000 }, { "epoch": 0.3, "learning_rate": 4.5052561799721213e-05, "loss": 4.5816, "step": 274500 }, { "epoch": 0.3, "learning_rate": 4.504355007258045e-05, "loss": 4.4644, "step": 275000 }, { "epoch": 0.3, "learning_rate": 4.5034538345439695e-05, "loss": 4.5545, "step": 275500 }, { "epoch": 0.3, "learning_rate": 4.5025526618298926e-05, "loss": 4.5686, "step": 276000 }, { "epoch": 0.3, "learning_rate": 4.501651489115816e-05, "loss": 4.598, "step": 276500 }, { "epoch": 0.3, "learning_rate": 4.50075031640174e-05, "loss": 4.5273, "step": 277000 }, { "epoch": 0.3, "learning_rate": 4.499849143687664e-05, "loss": 4.498, "step": 277500 }, { "epoch": 0.3, "learning_rate": 4.4989479709735875e-05, "loss": 4.5226, "step": 278000 }, { "epoch": 0.3, "learning_rate": 4.498046798259511e-05, "loss": 4.5453, "step": 278500 }, { "epoch": 0.3, "learning_rate": 4.497145625545435e-05, "loss": 4.5878, "step": 279000 }, { "epoch": 0.3, "learning_rate": 4.496244452831359e-05, "loss": 4.4889, "step": 279500 }, { "epoch": 0.3, "learning_rate": 4.4953432801172825e-05, "loss": 4.531, "step": 280000 }, { "epoch": 0.3, "learning_rate": 4.494442107403206e-05, "loss": 4.5446, "step": 280500 }, { "epoch": 0.3, "learning_rate": 4.49354093468913e-05, "loss": 4.5806, "step": 281000 }, { "epoch": 0.3, "learning_rate": 4.492639761975054e-05, "loss": 4.5838, "step": 281500 }, { "epoch": 0.3, "learning_rate": 4.4917385892609774e-05, "loss": 4.5335, "step": 282000 }, { "epoch": 0.31, "learning_rate": 4.4908374165469005e-05, "loss": 4.4475, "step": 282500 }, { "epoch": 0.31, "learning_rate": 4.489936243832825e-05, "loss": 4.4819, "step": 283000 }, { "epoch": 0.31, "learning_rate": 4.489035071118749e-05, "loss": 4.4793, "step": 283500 }, { "epoch": 0.31, "learning_rate": 4.4881338984046724e-05, "loss": 4.5127, "step": 284000 }, { "epoch": 0.31, "learning_rate": 4.487232725690596e-05, "loss": 4.5424, "step": 284500 }, { "epoch": 0.31, "learning_rate": 4.48633155297652e-05, "loss": 4.4961, "step": 285000 }, { "epoch": 0.31, "learning_rate": 4.485430380262443e-05, "loss": 4.5096, "step": 285500 }, { "epoch": 0.31, "learning_rate": 4.484529207548367e-05, "loss": 4.5307, "step": 286000 }, { "epoch": 0.31, "learning_rate": 4.4836280348342904e-05, "loss": 4.5627, "step": 286500 }, { "epoch": 0.31, "learning_rate": 4.482726862120215e-05, "loss": 4.526, "step": 287000 }, { "epoch": 0.31, "learning_rate": 4.4818256894061386e-05, "loss": 4.5265, "step": 287500 }, { "epoch": 0.31, "learning_rate": 4.480924516692062e-05, "loss": 4.5695, "step": 288000 }, { "epoch": 0.31, "learning_rate": 4.4800233439779854e-05, "loss": 4.5962, "step": 288500 }, { "epoch": 0.31, "learning_rate": 4.479122171263909e-05, "loss": 4.5317, "step": 289000 }, { "epoch": 0.31, "learning_rate": 4.478220998549833e-05, "loss": 4.5309, "step": 289500 }, { "epoch": 0.31, "learning_rate": 4.4773198258357566e-05, "loss": 4.5631, "step": 290000 }, { "epoch": 0.31, "learning_rate": 4.4764186531216803e-05, "loss": 4.5116, "step": 290500 }, { "epoch": 0.31, "learning_rate": 4.475517480407605e-05, "loss": 4.5155, "step": 291000 }, { "epoch": 0.32, "learning_rate": 4.474616307693528e-05, "loss": 4.5214, "step": 291500 }, { "epoch": 0.32, "learning_rate": 4.4737151349794516e-05, "loss": 4.5764, "step": 292000 }, { "epoch": 0.32, "learning_rate": 4.472813962265375e-05, "loss": 4.5458, "step": 292500 }, { "epoch": 0.32, "learning_rate": 4.471912789551299e-05, "loss": 4.5426, "step": 293000 }, { "epoch": 0.32, "learning_rate": 4.471011616837223e-05, "loss": 4.5371, "step": 293500 }, { "epoch": 0.32, "learning_rate": 4.4701104441231465e-05, "loss": 4.5577, "step": 294000 }, { "epoch": 0.32, "learning_rate": 4.46920927140907e-05, "loss": 4.4803, "step": 294500 }, { "epoch": 0.32, "learning_rate": 4.468308098694994e-05, "loss": 4.5597, "step": 295000 }, { "epoch": 0.32, "learning_rate": 4.467406925980918e-05, "loss": 4.5193, "step": 295500 }, { "epoch": 0.32, "learning_rate": 4.4665057532668415e-05, "loss": 4.4773, "step": 296000 }, { "epoch": 0.32, "learning_rate": 4.465604580552765e-05, "loss": 4.5625, "step": 296500 }, { "epoch": 0.32, "learning_rate": 4.464703407838689e-05, "loss": 4.5206, "step": 297000 }, { "epoch": 0.32, "learning_rate": 4.463802235124613e-05, "loss": 4.49, "step": 297500 }, { "epoch": 0.32, "learning_rate": 4.462901062410536e-05, "loss": 4.511, "step": 298000 }, { "epoch": 0.32, "learning_rate": 4.46199988969646e-05, "loss": 4.4423, "step": 298500 }, { "epoch": 0.32, "learning_rate": 4.461098716982384e-05, "loss": 4.5147, "step": 299000 }, { "epoch": 0.32, "learning_rate": 4.460197544268308e-05, "loss": 4.5474, "step": 299500 }, { "epoch": 0.32, "learning_rate": 4.4592963715542314e-05, "loss": 4.5493, "step": 300000 }, { "epoch": 0.32, "learning_rate": 4.458395198840155e-05, "loss": 4.49, "step": 300500 }, { "epoch": 0.33, "learning_rate": 4.457494026126078e-05, "loss": 4.5583, "step": 301000 }, { "epoch": 0.33, "learning_rate": 4.456592853412002e-05, "loss": 4.5288, "step": 301500 }, { "epoch": 0.33, "learning_rate": 4.455691680697926e-05, "loss": 4.5368, "step": 302000 }, { "epoch": 0.33, "learning_rate": 4.45479050798385e-05, "loss": 4.5171, "step": 302500 }, { "epoch": 0.33, "learning_rate": 4.453889335269774e-05, "loss": 4.4643, "step": 303000 }, { "epoch": 0.33, "learning_rate": 4.4529881625556976e-05, "loss": 4.4714, "step": 303500 }, { "epoch": 0.33, "learning_rate": 4.4520869898416207e-05, "loss": 4.5301, "step": 304000 }, { "epoch": 0.33, "learning_rate": 4.4511858171275444e-05, "loss": 4.4925, "step": 304500 }, { "epoch": 0.33, "learning_rate": 4.450284644413468e-05, "loss": 4.534, "step": 305000 }, { "epoch": 0.33, "learning_rate": 4.449383471699392e-05, "loss": 4.5594, "step": 305500 }, { "epoch": 0.33, "learning_rate": 4.4484822989853156e-05, "loss": 4.5079, "step": 306000 }, { "epoch": 0.33, "learning_rate": 4.4475811262712394e-05, "loss": 4.5475, "step": 306500 }, { "epoch": 0.33, "learning_rate": 4.446679953557164e-05, "loss": 4.4948, "step": 307000 }, { "epoch": 0.33, "learning_rate": 4.445778780843087e-05, "loss": 4.5257, "step": 307500 }, { "epoch": 0.33, "learning_rate": 4.4448776081290106e-05, "loss": 4.4913, "step": 308000 }, { "epoch": 0.33, "learning_rate": 4.443976435414934e-05, "loss": 4.5662, "step": 308500 }, { "epoch": 0.33, "learning_rate": 4.443075262700858e-05, "loss": 4.5267, "step": 309000 }, { "epoch": 0.33, "learning_rate": 4.442174089986782e-05, "loss": 4.4999, "step": 309500 }, { "epoch": 0.34, "learning_rate": 4.4412729172727055e-05, "loss": 4.5111, "step": 310000 }, { "epoch": 0.34, "learning_rate": 4.440371744558629e-05, "loss": 4.5107, "step": 310500 }, { "epoch": 0.34, "learning_rate": 4.439470571844553e-05, "loss": 4.5682, "step": 311000 }, { "epoch": 0.34, "learning_rate": 4.438569399130477e-05, "loss": 4.5016, "step": 311500 }, { "epoch": 0.34, "learning_rate": 4.4376682264164005e-05, "loss": 4.4973, "step": 312000 }, { "epoch": 0.34, "learning_rate": 4.436767053702324e-05, "loss": 4.5022, "step": 312500 }, { "epoch": 0.34, "learning_rate": 4.435865880988248e-05, "loss": 4.5441, "step": 313000 }, { "epoch": 0.34, "learning_rate": 4.434964708274171e-05, "loss": 4.5459, "step": 313500 }, { "epoch": 0.34, "learning_rate": 4.4340635355600955e-05, "loss": 4.5141, "step": 314000 }, { "epoch": 0.34, "learning_rate": 4.433162362846019e-05, "loss": 4.5329, "step": 314500 }, { "epoch": 0.34, "learning_rate": 4.432261190131943e-05, "loss": 4.5172, "step": 315000 }, { "epoch": 0.34, "learning_rate": 4.431360017417867e-05, "loss": 4.5264, "step": 315500 }, { "epoch": 0.34, "learning_rate": 4.4304588447037904e-05, "loss": 4.5352, "step": 316000 }, { "epoch": 0.34, "learning_rate": 4.4295576719897135e-05, "loss": 4.5287, "step": 316500 }, { "epoch": 0.34, "learning_rate": 4.428656499275637e-05, "loss": 4.5314, "step": 317000 }, { "epoch": 0.34, "learning_rate": 4.427755326561561e-05, "loss": 4.5219, "step": 317500 }, { "epoch": 0.34, "learning_rate": 4.426854153847485e-05, "loss": 4.5136, "step": 318000 }, { "epoch": 0.34, "learning_rate": 4.425952981133409e-05, "loss": 4.483, "step": 318500 }, { "epoch": 0.34, "learning_rate": 4.425051808419333e-05, "loss": 4.4569, "step": 319000 }, { "epoch": 0.35, "learning_rate": 4.4241506357052566e-05, "loss": 4.522, "step": 319500 }, { "epoch": 0.35, "learning_rate": 4.4232494629911797e-05, "loss": 4.4795, "step": 320000 }, { "epoch": 0.35, "learning_rate": 4.4223482902771034e-05, "loss": 4.5522, "step": 320500 }, { "epoch": 0.35, "learning_rate": 4.421447117563027e-05, "loss": 4.49, "step": 321000 }, { "epoch": 0.35, "learning_rate": 4.420545944848951e-05, "loss": 4.5291, "step": 321500 }, { "epoch": 0.35, "learning_rate": 4.4196447721348746e-05, "loss": 4.5003, "step": 322000 }, { "epoch": 0.35, "learning_rate": 4.418743599420799e-05, "loss": 4.4768, "step": 322500 }, { "epoch": 0.35, "learning_rate": 4.417842426706722e-05, "loss": 4.498, "step": 323000 }, { "epoch": 0.35, "learning_rate": 4.416941253992646e-05, "loss": 4.4942, "step": 323500 }, { "epoch": 0.35, "learning_rate": 4.4160400812785696e-05, "loss": 4.5224, "step": 324000 }, { "epoch": 0.35, "learning_rate": 4.415138908564493e-05, "loss": 4.4901, "step": 324500 }, { "epoch": 0.35, "learning_rate": 4.414237735850417e-05, "loss": 4.4871, "step": 325000 }, { "epoch": 0.35, "learning_rate": 4.413336563136341e-05, "loss": 4.4594, "step": 325500 }, { "epoch": 0.35, "learning_rate": 4.4124353904222645e-05, "loss": 4.515, "step": 326000 }, { "epoch": 0.35, "learning_rate": 4.411534217708188e-05, "loss": 4.4832, "step": 326500 }, { "epoch": 0.35, "learning_rate": 4.410633044994112e-05, "loss": 4.5075, "step": 327000 }, { "epoch": 0.35, "learning_rate": 4.409731872280036e-05, "loss": 4.4815, "step": 327500 }, { "epoch": 0.35, "learning_rate": 4.4088306995659595e-05, "loss": 4.5843, "step": 328000 }, { "epoch": 0.36, "learning_rate": 4.407929526851883e-05, "loss": 4.4808, "step": 328500 }, { "epoch": 0.36, "learning_rate": 4.407028354137806e-05, "loss": 4.5299, "step": 329000 }, { "epoch": 0.36, "learning_rate": 4.40612718142373e-05, "loss": 4.4625, "step": 329500 }, { "epoch": 0.36, "learning_rate": 4.4052260087096545e-05, "loss": 4.5026, "step": 330000 }, { "epoch": 0.36, "learning_rate": 4.404324835995578e-05, "loss": 4.4895, "step": 330500 }, { "epoch": 0.36, "learning_rate": 4.403423663281502e-05, "loss": 4.5502, "step": 331000 }, { "epoch": 0.36, "learning_rate": 4.402522490567426e-05, "loss": 4.4647, "step": 331500 }, { "epoch": 0.36, "learning_rate": 4.401621317853349e-05, "loss": 4.4907, "step": 332000 }, { "epoch": 0.36, "learning_rate": 4.4007201451392725e-05, "loss": 4.4623, "step": 332500 }, { "epoch": 0.36, "learning_rate": 4.399818972425196e-05, "loss": 4.5451, "step": 333000 }, { "epoch": 0.36, "learning_rate": 4.39891779971112e-05, "loss": 4.5007, "step": 333500 }, { "epoch": 0.36, "learning_rate": 4.3980166269970444e-05, "loss": 4.507, "step": 334000 }, { "epoch": 0.36, "learning_rate": 4.397115454282968e-05, "loss": 4.4595, "step": 334500 }, { "epoch": 0.36, "learning_rate": 4.396214281568892e-05, "loss": 4.5234, "step": 335000 }, { "epoch": 0.36, "learning_rate": 4.395313108854815e-05, "loss": 4.4674, "step": 335500 }, { "epoch": 0.36, "learning_rate": 4.394411936140739e-05, "loss": 4.4806, "step": 336000 }, { "epoch": 0.36, "learning_rate": 4.3935107634266624e-05, "loss": 4.4845, "step": 336500 }, { "epoch": 0.36, "learning_rate": 4.392609590712586e-05, "loss": 4.5202, "step": 337000 }, { "epoch": 0.36, "learning_rate": 4.39170841799851e-05, "loss": 4.5301, "step": 337500 }, { "epoch": 0.37, "learning_rate": 4.390807245284434e-05, "loss": 4.5139, "step": 338000 }, { "epoch": 0.37, "learning_rate": 4.3899060725703574e-05, "loss": 4.4715, "step": 338500 }, { "epoch": 0.37, "learning_rate": 4.389004899856281e-05, "loss": 4.4752, "step": 339000 }, { "epoch": 0.37, "learning_rate": 4.388103727142205e-05, "loss": 4.4945, "step": 339500 }, { "epoch": 0.37, "learning_rate": 4.3872025544281286e-05, "loss": 4.4648, "step": 340000 }, { "epoch": 0.37, "learning_rate": 4.386301381714052e-05, "loss": 4.5011, "step": 340500 }, { "epoch": 0.37, "learning_rate": 4.385400208999976e-05, "loss": 4.527, "step": 341000 }, { "epoch": 0.37, "learning_rate": 4.3844990362859e-05, "loss": 4.5182, "step": 341500 }, { "epoch": 0.37, "learning_rate": 4.3835978635718235e-05, "loss": 4.4455, "step": 342000 }, { "epoch": 0.37, "learning_rate": 4.382696690857747e-05, "loss": 4.4408, "step": 342500 }, { "epoch": 0.37, "learning_rate": 4.381795518143671e-05, "loss": 4.4726, "step": 343000 }, { "epoch": 0.37, "learning_rate": 4.380894345429595e-05, "loss": 4.4912, "step": 343500 }, { "epoch": 0.37, "learning_rate": 4.3799931727155185e-05, "loss": 4.4715, "step": 344000 }, { "epoch": 0.37, "learning_rate": 4.3790920000014416e-05, "loss": 4.4808, "step": 344500 }, { "epoch": 0.37, "learning_rate": 4.378190827287365e-05, "loss": 4.469, "step": 345000 }, { "epoch": 0.37, "learning_rate": 4.37728965457329e-05, "loss": 4.5089, "step": 345500 }, { "epoch": 0.37, "learning_rate": 4.3763884818592135e-05, "loss": 4.4609, "step": 346000 }, { "epoch": 0.37, "learning_rate": 4.375487309145137e-05, "loss": 4.5222, "step": 346500 }, { "epoch": 0.38, "learning_rate": 4.374586136431061e-05, "loss": 4.4459, "step": 347000 }, { "epoch": 0.38, "learning_rate": 4.373684963716985e-05, "loss": 4.5242, "step": 347500 }, { "epoch": 0.38, "learning_rate": 4.372783791002908e-05, "loss": 4.4754, "step": 348000 }, { "epoch": 0.38, "learning_rate": 4.3718826182888315e-05, "loss": 4.4723, "step": 348500 }, { "epoch": 0.38, "learning_rate": 4.370981445574755e-05, "loss": 4.4723, "step": 349000 }, { "epoch": 0.38, "learning_rate": 4.3700802728606796e-05, "loss": 4.489, "step": 349500 }, { "epoch": 0.38, "learning_rate": 4.3691791001466034e-05, "loss": 4.4956, "step": 350000 }, { "epoch": 0.38, "learning_rate": 4.368277927432527e-05, "loss": 4.5123, "step": 350500 }, { "epoch": 0.38, "learning_rate": 4.36737675471845e-05, "loss": 4.5453, "step": 351000 }, { "epoch": 0.38, "learning_rate": 4.366475582004374e-05, "loss": 4.4737, "step": 351500 }, { "epoch": 0.38, "learning_rate": 4.365574409290298e-05, "loss": 4.4692, "step": 352000 }, { "epoch": 0.38, "learning_rate": 4.3646732365762214e-05, "loss": 4.4465, "step": 352500 }, { "epoch": 0.38, "learning_rate": 4.363772063862145e-05, "loss": 4.4285, "step": 353000 }, { "epoch": 0.38, "learning_rate": 4.3628708911480696e-05, "loss": 4.5237, "step": 353500 }, { "epoch": 0.38, "learning_rate": 4.3619697184339926e-05, "loss": 4.5195, "step": 354000 }, { "epoch": 0.38, "learning_rate": 4.3610685457199164e-05, "loss": 4.492, "step": 354500 }, { "epoch": 0.38, "learning_rate": 4.36016737300584e-05, "loss": 4.485, "step": 355000 }, { "epoch": 0.38, "learning_rate": 4.359266200291764e-05, "loss": 4.4856, "step": 355500 }, { "epoch": 0.38, "learning_rate": 4.3583650275776876e-05, "loss": 4.5072, "step": 356000 }, { "epoch": 0.39, "learning_rate": 4.357463854863611e-05, "loss": 4.4757, "step": 356500 }, { "epoch": 0.39, "learning_rate": 4.356562682149535e-05, "loss": 4.4942, "step": 357000 }, { "epoch": 0.39, "learning_rate": 4.355661509435459e-05, "loss": 4.4687, "step": 357500 }, { "epoch": 0.39, "learning_rate": 4.3547603367213825e-05, "loss": 4.4391, "step": 358000 }, { "epoch": 0.39, "learning_rate": 4.353859164007306e-05, "loss": 4.4766, "step": 358500 }, { "epoch": 0.39, "learning_rate": 4.35295799129323e-05, "loss": 4.5417, "step": 359000 }, { "epoch": 0.39, "learning_rate": 4.352056818579154e-05, "loss": 4.4657, "step": 359500 }, { "epoch": 0.39, "learning_rate": 4.3511556458650775e-05, "loss": 4.5017, "step": 360000 }, { "epoch": 0.39, "learning_rate": 4.3502544731510006e-05, "loss": 4.4788, "step": 360500 }, { "epoch": 0.39, "learning_rate": 4.349353300436925e-05, "loss": 4.4984, "step": 361000 }, { "epoch": 0.39, "learning_rate": 4.348452127722849e-05, "loss": 4.543, "step": 361500 }, { "epoch": 0.39, "learning_rate": 4.3475509550087725e-05, "loss": 4.4832, "step": 362000 }, { "epoch": 0.39, "learning_rate": 4.346649782294696e-05, "loss": 4.5137, "step": 362500 }, { "epoch": 0.39, "learning_rate": 4.34574860958062e-05, "loss": 4.4928, "step": 363000 }, { "epoch": 0.39, "learning_rate": 4.344847436866543e-05, "loss": 4.4666, "step": 363500 }, { "epoch": 0.39, "learning_rate": 4.343946264152467e-05, "loss": 4.4647, "step": 364000 }, { "epoch": 0.39, "learning_rate": 4.3430450914383905e-05, "loss": 4.4378, "step": 364500 }, { "epoch": 0.39, "learning_rate": 4.342143918724315e-05, "loss": 4.4485, "step": 365000 }, { "epoch": 0.4, "learning_rate": 4.3412427460102386e-05, "loss": 4.4648, "step": 365500 }, { "epoch": 0.4, "learning_rate": 4.3403415732961624e-05, "loss": 4.4384, "step": 366000 }, { "epoch": 0.4, "learning_rate": 4.3394404005820855e-05, "loss": 4.4319, "step": 366500 }, { "epoch": 0.4, "learning_rate": 4.338539227868009e-05, "loss": 4.4791, "step": 367000 }, { "epoch": 0.4, "learning_rate": 4.337638055153933e-05, "loss": 4.5161, "step": 367500 }, { "epoch": 0.4, "learning_rate": 4.336736882439857e-05, "loss": 4.4157, "step": 368000 }, { "epoch": 0.4, "learning_rate": 4.3358357097257804e-05, "loss": 4.4717, "step": 368500 }, { "epoch": 0.4, "learning_rate": 4.334934537011704e-05, "loss": 4.479, "step": 369000 }, { "epoch": 0.4, "learning_rate": 4.334033364297628e-05, "loss": 4.4731, "step": 369500 }, { "epoch": 0.4, "learning_rate": 4.3331321915835516e-05, "loss": 4.4899, "step": 370000 }, { "epoch": 0.4, "learning_rate": 4.3322310188694754e-05, "loss": 4.502, "step": 370500 }, { "epoch": 0.4, "learning_rate": 4.331329846155399e-05, "loss": 4.4384, "step": 371000 }, { "epoch": 0.4, "learning_rate": 4.330428673441323e-05, "loss": 4.4643, "step": 371500 }, { "epoch": 0.4, "learning_rate": 4.3295275007272466e-05, "loss": 4.4491, "step": 372000 }, { "epoch": 0.4, "learning_rate": 4.32862632801317e-05, "loss": 4.409, "step": 372500 }, { "epoch": 0.4, "learning_rate": 4.327725155299094e-05, "loss": 4.4673, "step": 373000 }, { "epoch": 0.4, "learning_rate": 4.326823982585018e-05, "loss": 4.4603, "step": 373500 }, { "epoch": 0.4, "learning_rate": 4.3259228098709416e-05, "loss": 4.4743, "step": 374000 }, { "epoch": 0.4, "learning_rate": 4.325021637156865e-05, "loss": 4.4634, "step": 374500 }, { "epoch": 0.41, "learning_rate": 4.324120464442789e-05, "loss": 4.4576, "step": 375000 }, { "epoch": 0.41, "learning_rate": 4.323219291728713e-05, "loss": 4.484, "step": 375500 }, { "epoch": 0.41, "learning_rate": 4.322318119014636e-05, "loss": 4.431, "step": 376000 }, { "epoch": 0.41, "learning_rate": 4.32141694630056e-05, "loss": 4.5062, "step": 376500 }, { "epoch": 0.41, "learning_rate": 4.320515773586484e-05, "loss": 4.4464, "step": 377000 }, { "epoch": 0.41, "learning_rate": 4.319614600872408e-05, "loss": 4.4734, "step": 377500 }, { "epoch": 0.41, "learning_rate": 4.3187134281583315e-05, "loss": 4.4615, "step": 378000 }, { "epoch": 0.41, "learning_rate": 4.317812255444255e-05, "loss": 4.4707, "step": 378500 }, { "epoch": 0.41, "learning_rate": 4.316911082730178e-05, "loss": 4.5084, "step": 379000 }, { "epoch": 0.41, "learning_rate": 4.316009910016102e-05, "loss": 4.4335, "step": 379500 }, { "epoch": 0.41, "learning_rate": 4.315108737302026e-05, "loss": 4.5115, "step": 380000 }, { "epoch": 0.41, "learning_rate": 4.31420756458795e-05, "loss": 4.495, "step": 380500 }, { "epoch": 0.41, "learning_rate": 4.313306391873874e-05, "loss": 4.5261, "step": 381000 }, { "epoch": 0.41, "learning_rate": 4.3124052191597977e-05, "loss": 4.5579, "step": 381500 }, { "epoch": 0.41, "learning_rate": 4.311504046445721e-05, "loss": 4.4861, "step": 382000 }, { "epoch": 0.41, "learning_rate": 4.3106028737316445e-05, "loss": 4.4294, "step": 382500 }, { "epoch": 0.41, "learning_rate": 4.309701701017568e-05, "loss": 4.4975, "step": 383000 }, { "epoch": 0.41, "learning_rate": 4.308800528303492e-05, "loss": 4.4527, "step": 383500 }, { "epoch": 0.42, "learning_rate": 4.307899355589416e-05, "loss": 4.4521, "step": 384000 }, { "epoch": 0.42, "learning_rate": 4.3069981828753394e-05, "loss": 4.5024, "step": 384500 }, { "epoch": 0.42, "learning_rate": 4.306097010161263e-05, "loss": 4.4181, "step": 385000 }, { "epoch": 0.42, "learning_rate": 4.305195837447187e-05, "loss": 4.4786, "step": 385500 }, { "epoch": 0.42, "learning_rate": 4.3042946647331106e-05, "loss": 4.4245, "step": 386000 }, { "epoch": 0.42, "learning_rate": 4.3033934920190344e-05, "loss": 4.4878, "step": 386500 }, { "epoch": 0.42, "learning_rate": 4.302492319304958e-05, "loss": 4.4427, "step": 387000 }, { "epoch": 0.42, "learning_rate": 4.301591146590882e-05, "loss": 4.3975, "step": 387500 }, { "epoch": 0.42, "learning_rate": 4.3006899738768056e-05, "loss": 4.4566, "step": 388000 }, { "epoch": 0.42, "learning_rate": 4.299788801162729e-05, "loss": 4.4441, "step": 388500 }, { "epoch": 0.42, "learning_rate": 4.298887628448653e-05, "loss": 4.4606, "step": 389000 }, { "epoch": 0.42, "learning_rate": 4.297986455734577e-05, "loss": 4.5232, "step": 389500 }, { "epoch": 0.42, "learning_rate": 4.2970852830205006e-05, "loss": 4.464, "step": 390000 }, { "epoch": 0.42, "learning_rate": 4.296184110306424e-05, "loss": 4.3564, "step": 390500 }, { "epoch": 0.42, "learning_rate": 4.295282937592348e-05, "loss": 4.4523, "step": 391000 }, { "epoch": 0.42, "learning_rate": 4.294381764878271e-05, "loss": 4.4288, "step": 391500 }, { "epoch": 0.42, "learning_rate": 4.2934805921641955e-05, "loss": 4.3775, "step": 392000 }, { "epoch": 0.42, "learning_rate": 4.292579419450119e-05, "loss": 4.4579, "step": 392500 }, { "epoch": 0.42, "learning_rate": 4.291678246736043e-05, "loss": 4.4408, "step": 393000 }, { "epoch": 0.43, "learning_rate": 4.290777074021967e-05, "loss": 4.4396, "step": 393500 }, { "epoch": 0.43, "learning_rate": 4.2898759013078905e-05, "loss": 4.4438, "step": 394000 }, { "epoch": 0.43, "learning_rate": 4.2889747285938135e-05, "loss": 4.46, "step": 394500 }, { "epoch": 0.43, "learning_rate": 4.288073555879737e-05, "loss": 4.522, "step": 395000 }, { "epoch": 0.43, "learning_rate": 4.287172383165661e-05, "loss": 4.471, "step": 395500 }, { "epoch": 0.43, "learning_rate": 4.286271210451585e-05, "loss": 4.3984, "step": 396000 }, { "epoch": 0.43, "learning_rate": 4.285370037737509e-05, "loss": 4.4826, "step": 396500 }, { "epoch": 0.43, "learning_rate": 4.284468865023433e-05, "loss": 4.4343, "step": 397000 }, { "epoch": 0.43, "learning_rate": 4.283567692309356e-05, "loss": 4.4479, "step": 397500 }, { "epoch": 0.43, "learning_rate": 4.28266651959528e-05, "loss": 4.4768, "step": 398000 }, { "epoch": 0.43, "learning_rate": 4.2817653468812035e-05, "loss": 4.4508, "step": 398500 }, { "epoch": 0.43, "learning_rate": 4.280864174167127e-05, "loss": 4.4707, "step": 399000 }, { "epoch": 0.43, "learning_rate": 4.279963001453051e-05, "loss": 4.4679, "step": 399500 }, { "epoch": 0.43, "learning_rate": 4.279061828738975e-05, "loss": 4.3981, "step": 400000 }, { "epoch": 0.43, "learning_rate": 4.278160656024899e-05, "loss": 4.4731, "step": 400500 }, { "epoch": 0.43, "learning_rate": 4.277259483310822e-05, "loss": 4.4104, "step": 401000 }, { "epoch": 0.43, "learning_rate": 4.276358310596746e-05, "loss": 4.4197, "step": 401500 }, { "epoch": 0.43, "learning_rate": 4.2754571378826696e-05, "loss": 4.4555, "step": 402000 }, { "epoch": 0.44, "learning_rate": 4.2745559651685934e-05, "loss": 4.4885, "step": 402500 }, { "epoch": 0.44, "learning_rate": 4.273654792454517e-05, "loss": 4.3961, "step": 403000 }, { "epoch": 0.44, "learning_rate": 4.272753619740441e-05, "loss": 4.4982, "step": 403500 }, { "epoch": 0.44, "learning_rate": 4.2718524470263646e-05, "loss": 4.4241, "step": 404000 }, { "epoch": 0.44, "learning_rate": 4.2709512743122883e-05, "loss": 4.4929, "step": 404500 }, { "epoch": 0.44, "learning_rate": 4.270050101598212e-05, "loss": 4.438, "step": 405000 }, { "epoch": 0.44, "learning_rate": 4.269148928884136e-05, "loss": 4.4581, "step": 405500 }, { "epoch": 0.44, "learning_rate": 4.2682477561700596e-05, "loss": 4.4261, "step": 406000 }, { "epoch": 0.44, "learning_rate": 4.267346583455983e-05, "loss": 4.4314, "step": 406500 }, { "epoch": 0.44, "learning_rate": 4.2664454107419064e-05, "loss": 4.5199, "step": 407000 }, { "epoch": 0.44, "learning_rate": 4.26554423802783e-05, "loss": 4.3874, "step": 407500 }, { "epoch": 0.44, "learning_rate": 4.2646430653137545e-05, "loss": 4.4064, "step": 408000 }, { "epoch": 0.44, "learning_rate": 4.263741892599678e-05, "loss": 4.4102, "step": 408500 }, { "epoch": 0.44, "learning_rate": 4.262840719885602e-05, "loss": 4.4532, "step": 409000 }, { "epoch": 0.44, "learning_rate": 4.261939547171526e-05, "loss": 4.4605, "step": 409500 }, { "epoch": 0.44, "learning_rate": 4.261038374457449e-05, "loss": 4.4169, "step": 410000 }, { "epoch": 0.44, "learning_rate": 4.2601372017433725e-05, "loss": 4.4427, "step": 410500 }, { "epoch": 0.44, "learning_rate": 4.259236029029296e-05, "loss": 4.4733, "step": 411000 }, { "epoch": 0.44, "learning_rate": 4.25833485631522e-05, "loss": 4.5038, "step": 411500 }, { "epoch": 0.45, "learning_rate": 4.2574336836011444e-05, "loss": 4.4452, "step": 412000 }, { "epoch": 0.45, "learning_rate": 4.256532510887068e-05, "loss": 4.4282, "step": 412500 }, { "epoch": 0.45, "learning_rate": 4.255631338172992e-05, "loss": 4.4557, "step": 413000 }, { "epoch": 0.45, "learning_rate": 4.254730165458915e-05, "loss": 4.4631, "step": 413500 }, { "epoch": 0.45, "learning_rate": 4.253828992744839e-05, "loss": 4.4623, "step": 414000 }, { "epoch": 0.45, "learning_rate": 4.2529278200307625e-05, "loss": 4.4419, "step": 414500 }, { "epoch": 0.45, "learning_rate": 4.252026647316686e-05, "loss": 4.4337, "step": 415000 }, { "epoch": 0.45, "learning_rate": 4.25112547460261e-05, "loss": 4.4549, "step": 415500 }, { "epoch": 0.45, "learning_rate": 4.2502243018885344e-05, "loss": 4.4857, "step": 416000 }, { "epoch": 0.45, "learning_rate": 4.2493231291744574e-05, "loss": 4.4788, "step": 416500 }, { "epoch": 0.45, "learning_rate": 4.248421956460381e-05, "loss": 4.4158, "step": 417000 }, { "epoch": 0.45, "learning_rate": 4.247520783746305e-05, "loss": 4.4255, "step": 417500 }, { "epoch": 0.45, "learning_rate": 4.2466196110322286e-05, "loss": 4.5044, "step": 418000 }, { "epoch": 0.45, "learning_rate": 4.2457184383181524e-05, "loss": 4.352, "step": 418500 }, { "epoch": 0.45, "learning_rate": 4.244817265604076e-05, "loss": 4.4666, "step": 419000 }, { "epoch": 0.45, "learning_rate": 4.24391609289e-05, "loss": 4.4477, "step": 419500 }, { "epoch": 0.45, "learning_rate": 4.2430149201759236e-05, "loss": 4.4425, "step": 420000 }, { "epoch": 0.45, "learning_rate": 4.2421137474618473e-05, "loss": 4.5022, "step": 420500 }, { "epoch": 0.46, "learning_rate": 4.241212574747771e-05, "loss": 4.3832, "step": 421000 }, { "epoch": 0.46, "learning_rate": 4.240311402033695e-05, "loss": 4.4686, "step": 421500 }, { "epoch": 0.46, "learning_rate": 4.2394102293196186e-05, "loss": 4.4394, "step": 422000 }, { "epoch": 0.46, "learning_rate": 4.2385090566055416e-05, "loss": 4.4662, "step": 422500 }, { "epoch": 0.46, "learning_rate": 4.2376078838914654e-05, "loss": 4.4482, "step": 423000 }, { "epoch": 0.46, "learning_rate": 4.23670671117739e-05, "loss": 4.4238, "step": 423500 }, { "epoch": 0.46, "learning_rate": 4.2358055384633135e-05, "loss": 4.399, "step": 424000 }, { "epoch": 0.46, "learning_rate": 4.234904365749237e-05, "loss": 4.4646, "step": 424500 }, { "epoch": 0.46, "learning_rate": 4.234003193035161e-05, "loss": 4.4333, "step": 425000 }, { "epoch": 0.46, "learning_rate": 4.233102020321084e-05, "loss": 4.4222, "step": 425500 }, { "epoch": 0.46, "learning_rate": 4.232200847607008e-05, "loss": 4.4807, "step": 426000 }, { "epoch": 0.46, "learning_rate": 4.2312996748929315e-05, "loss": 4.4585, "step": 426500 }, { "epoch": 0.46, "learning_rate": 4.230398502178855e-05, "loss": 4.4629, "step": 427000 }, { "epoch": 0.46, "learning_rate": 4.22949732946478e-05, "loss": 4.3969, "step": 427500 }, { "epoch": 0.46, "learning_rate": 4.2285961567507034e-05, "loss": 4.4375, "step": 428000 }, { "epoch": 0.46, "learning_rate": 4.227694984036627e-05, "loss": 4.4462, "step": 428500 }, { "epoch": 0.46, "learning_rate": 4.22679381132255e-05, "loss": 4.3994, "step": 429000 }, { "epoch": 0.46, "learning_rate": 4.225892638608474e-05, "loss": 4.441, "step": 429500 }, { "epoch": 0.47, "learning_rate": 4.224991465894398e-05, "loss": 4.4581, "step": 430000 }, { "epoch": 0.47, "learning_rate": 4.2240902931803215e-05, "loss": 4.4271, "step": 430500 }, { "epoch": 0.47, "learning_rate": 4.223189120466245e-05, "loss": 4.4268, "step": 431000 }, { "epoch": 0.47, "learning_rate": 4.2222879477521696e-05, "loss": 4.4299, "step": 431500 }, { "epoch": 0.47, "learning_rate": 4.221386775038093e-05, "loss": 4.4034, "step": 432000 }, { "epoch": 0.47, "learning_rate": 4.2204856023240164e-05, "loss": 4.5251, "step": 432500 }, { "epoch": 0.47, "learning_rate": 4.21958442960994e-05, "loss": 4.4132, "step": 433000 }, { "epoch": 0.47, "learning_rate": 4.218683256895864e-05, "loss": 4.4342, "step": 433500 }, { "epoch": 0.47, "learning_rate": 4.2177820841817877e-05, "loss": 4.4125, "step": 434000 }, { "epoch": 0.47, "learning_rate": 4.2168809114677114e-05, "loss": 4.4599, "step": 434500 }, { "epoch": 0.47, "learning_rate": 4.215979738753635e-05, "loss": 4.3972, "step": 435000 }, { "epoch": 0.47, "learning_rate": 4.215078566039559e-05, "loss": 4.5031, "step": 435500 }, { "epoch": 0.47, "learning_rate": 4.2141773933254826e-05, "loss": 4.4313, "step": 436000 }, { "epoch": 0.47, "learning_rate": 4.2132762206114064e-05, "loss": 4.4108, "step": 436500 }, { "epoch": 0.47, "learning_rate": 4.21237504789733e-05, "loss": 4.4509, "step": 437000 }, { "epoch": 0.47, "learning_rate": 4.211473875183254e-05, "loss": 4.4684, "step": 437500 }, { "epoch": 0.47, "learning_rate": 4.210572702469177e-05, "loss": 4.4394, "step": 438000 }, { "epoch": 0.47, "learning_rate": 4.2096715297551006e-05, "loss": 4.3804, "step": 438500 }, { "epoch": 0.47, "learning_rate": 4.208770357041025e-05, "loss": 4.4641, "step": 439000 }, { "epoch": 0.48, "learning_rate": 4.207869184326949e-05, "loss": 4.3934, "step": 439500 }, { "epoch": 0.48, "learning_rate": 4.2069680116128725e-05, "loss": 4.3989, "step": 440000 }, { "epoch": 0.48, "learning_rate": 4.206066838898796e-05, "loss": 4.447, "step": 440500 }, { "epoch": 0.48, "learning_rate": 4.20516566618472e-05, "loss": 4.4046, "step": 441000 }, { "epoch": 0.48, "learning_rate": 4.204264493470643e-05, "loss": 4.4264, "step": 441500 }, { "epoch": 0.48, "learning_rate": 4.203363320756567e-05, "loss": 4.3891, "step": 442000 }, { "epoch": 0.48, "learning_rate": 4.2024621480424906e-05, "loss": 4.4143, "step": 442500 }, { "epoch": 0.48, "learning_rate": 4.201560975328415e-05, "loss": 4.4362, "step": 443000 }, { "epoch": 0.48, "learning_rate": 4.200659802614339e-05, "loss": 4.4681, "step": 443500 }, { "epoch": 0.48, "learning_rate": 4.1997586299002625e-05, "loss": 4.4628, "step": 444000 }, { "epoch": 0.48, "learning_rate": 4.1988574571861855e-05, "loss": 4.4444, "step": 444500 }, { "epoch": 0.48, "learning_rate": 4.197956284472109e-05, "loss": 4.3894, "step": 445000 }, { "epoch": 0.48, "learning_rate": 4.197055111758033e-05, "loss": 4.4775, "step": 445500 }, { "epoch": 0.48, "learning_rate": 4.196153939043957e-05, "loss": 4.3898, "step": 446000 }, { "epoch": 0.48, "learning_rate": 4.1952527663298805e-05, "loss": 4.4591, "step": 446500 }, { "epoch": 0.48, "learning_rate": 4.194351593615804e-05, "loss": 4.4336, "step": 447000 }, { "epoch": 0.48, "learning_rate": 4.193450420901728e-05, "loss": 4.4063, "step": 447500 }, { "epoch": 0.48, "learning_rate": 4.192549248187652e-05, "loss": 4.4326, "step": 448000 }, { "epoch": 0.49, "learning_rate": 4.1916480754735754e-05, "loss": 4.4418, "step": 448500 }, { "epoch": 0.49, "learning_rate": 4.190746902759499e-05, "loss": 4.4141, "step": 449000 }, { "epoch": 0.49, "learning_rate": 4.189845730045423e-05, "loss": 4.3698, "step": 449500 }, { "epoch": 0.49, "learning_rate": 4.1889445573313467e-05, "loss": 4.4296, "step": 450000 }, { "epoch": 0.49, "learning_rate": 4.1880433846172704e-05, "loss": 4.4399, "step": 450500 }, { "epoch": 0.49, "learning_rate": 4.187142211903194e-05, "loss": 4.4123, "step": 451000 }, { "epoch": 0.49, "learning_rate": 4.186241039189118e-05, "loss": 4.3735, "step": 451500 }, { "epoch": 0.49, "learning_rate": 4.1853398664750416e-05, "loss": 4.3984, "step": 452000 }, { "epoch": 0.49, "learning_rate": 4.1844386937609654e-05, "loss": 4.4167, "step": 452500 }, { "epoch": 0.49, "learning_rate": 4.183537521046889e-05, "loss": 4.3666, "step": 453000 }, { "epoch": 0.49, "learning_rate": 4.182636348332813e-05, "loss": 4.4422, "step": 453500 }, { "epoch": 0.49, "learning_rate": 4.181735175618736e-05, "loss": 4.3986, "step": 454000 }, { "epoch": 0.49, "learning_rate": 4.18083400290466e-05, "loss": 4.4333, "step": 454500 }, { "epoch": 0.49, "learning_rate": 4.179932830190584e-05, "loss": 4.4112, "step": 455000 }, { "epoch": 0.49, "learning_rate": 4.179031657476508e-05, "loss": 4.42, "step": 455500 }, { "epoch": 0.49, "learning_rate": 4.1781304847624315e-05, "loss": 4.4114, "step": 456000 }, { "epoch": 0.49, "learning_rate": 4.177229312048355e-05, "loss": 4.3842, "step": 456500 }, { "epoch": 0.49, "learning_rate": 4.1763281393342783e-05, "loss": 4.4399, "step": 457000 }, { "epoch": 0.49, "learning_rate": 4.175426966620202e-05, "loss": 4.4283, "step": 457500 }, { "epoch": 0.5, "learning_rate": 4.174525793906126e-05, "loss": 4.4357, "step": 458000 }, { "epoch": 0.5, "learning_rate": 4.1736246211920496e-05, "loss": 4.4383, "step": 458500 }, { "epoch": 0.5, "learning_rate": 4.172723448477974e-05, "loss": 4.4069, "step": 459000 }, { "epoch": 0.5, "learning_rate": 4.171822275763898e-05, "loss": 4.4093, "step": 459500 }, { "epoch": 0.5, "learning_rate": 4.170921103049821e-05, "loss": 4.4003, "step": 460000 }, { "epoch": 0.5, "learning_rate": 4.1700199303357445e-05, "loss": 4.3554, "step": 460500 }, { "epoch": 0.5, "learning_rate": 4.169118757621668e-05, "loss": 4.4233, "step": 461000 }, { "epoch": 0.5, "learning_rate": 4.168217584907592e-05, "loss": 4.4297, "step": 461500 }, { "epoch": 0.5, "learning_rate": 4.167316412193516e-05, "loss": 4.41, "step": 462000 }, { "epoch": 0.5, "learning_rate": 4.1664152394794395e-05, "loss": 4.4319, "step": 462500 }, { "epoch": 0.5, "learning_rate": 4.165514066765363e-05, "loss": 4.4113, "step": 463000 }, { "epoch": 0.5, "learning_rate": 4.164612894051287e-05, "loss": 4.4162, "step": 463500 }, { "epoch": 0.5, "learning_rate": 4.163711721337211e-05, "loss": 4.437, "step": 464000 }, { "epoch": 0.5, "learning_rate": 4.1628105486231344e-05, "loss": 4.4412, "step": 464500 }, { "epoch": 0.5, "learning_rate": 4.161909375909058e-05, "loss": 4.4154, "step": 465000 }, { "epoch": 0.5, "learning_rate": 4.161008203194982e-05, "loss": 4.4167, "step": 465500 }, { "epoch": 0.5, "learning_rate": 4.1601070304809057e-05, "loss": 4.4659, "step": 466000 }, { "epoch": 0.5, "learning_rate": 4.1592058577668294e-05, "loss": 4.4041, "step": 466500 }, { "epoch": 0.51, "learning_rate": 4.158304685052753e-05, "loss": 4.4115, "step": 467000 }, { "epoch": 0.51, "learning_rate": 4.157403512338677e-05, "loss": 4.4393, "step": 467500 }, { "epoch": 0.51, "learning_rate": 4.1565023396246006e-05, "loss": 4.3725, "step": 468000 }, { "epoch": 0.51, "learning_rate": 4.1556011669105244e-05, "loss": 4.4011, "step": 468500 }, { "epoch": 0.51, "learning_rate": 4.154699994196448e-05, "loss": 4.353, "step": 469000 }, { "epoch": 0.51, "learning_rate": 4.153798821482371e-05, "loss": 4.3823, "step": 469500 }, { "epoch": 0.51, "learning_rate": 4.152897648768295e-05, "loss": 4.4488, "step": 470000 }, { "epoch": 0.51, "learning_rate": 4.151996476054219e-05, "loss": 4.4014, "step": 470500 }, { "epoch": 0.51, "learning_rate": 4.151095303340143e-05, "loss": 4.3857, "step": 471000 }, { "epoch": 0.51, "learning_rate": 4.150194130626067e-05, "loss": 4.4427, "step": 471500 }, { "epoch": 0.51, "learning_rate": 4.1492929579119905e-05, "loss": 4.4374, "step": 472000 }, { "epoch": 0.51, "learning_rate": 4.1483917851979136e-05, "loss": 4.3678, "step": 472500 }, { "epoch": 0.51, "learning_rate": 4.1474906124838373e-05, "loss": 4.3926, "step": 473000 }, { "epoch": 0.51, "learning_rate": 4.146589439769761e-05, "loss": 4.426, "step": 473500 }, { "epoch": 0.51, "learning_rate": 4.145688267055685e-05, "loss": 4.4384, "step": 474000 }, { "epoch": 0.51, "learning_rate": 4.144787094341609e-05, "loss": 4.4226, "step": 474500 }, { "epoch": 0.51, "learning_rate": 4.143885921627533e-05, "loss": 4.3985, "step": 475000 }, { "epoch": 0.51, "learning_rate": 4.142984748913456e-05, "loss": 4.3802, "step": 475500 }, { "epoch": 0.51, "learning_rate": 4.14208357619938e-05, "loss": 4.4457, "step": 476000 }, { "epoch": 0.52, "learning_rate": 4.1411824034853035e-05, "loss": 4.4333, "step": 476500 }, { "epoch": 0.52, "learning_rate": 4.140281230771227e-05, "loss": 4.3906, "step": 477000 }, { "epoch": 0.52, "learning_rate": 4.139380058057151e-05, "loss": 4.3618, "step": 477500 }, { "epoch": 0.52, "learning_rate": 4.138478885343075e-05, "loss": 4.4389, "step": 478000 }, { "epoch": 0.52, "learning_rate": 4.1375777126289985e-05, "loss": 4.354, "step": 478500 }, { "epoch": 0.52, "learning_rate": 4.136676539914922e-05, "loss": 4.4335, "step": 479000 }, { "epoch": 0.52, "learning_rate": 4.135775367200846e-05, "loss": 4.405, "step": 479500 }, { "epoch": 0.52, "learning_rate": 4.13487419448677e-05, "loss": 4.4223, "step": 480000 }, { "epoch": 0.52, "learning_rate": 4.1339730217726934e-05, "loss": 4.4074, "step": 480500 }, { "epoch": 0.52, "learning_rate": 4.133071849058617e-05, "loss": 4.3557, "step": 481000 }, { "epoch": 0.52, "learning_rate": 4.132170676344541e-05, "loss": 4.4342, "step": 481500 }, { "epoch": 0.52, "learning_rate": 4.131269503630465e-05, "loss": 4.3986, "step": 482000 }, { "epoch": 0.52, "learning_rate": 4.1303683309163884e-05, "loss": 4.4292, "step": 482500 }, { "epoch": 0.52, "learning_rate": 4.129467158202312e-05, "loss": 4.4526, "step": 483000 }, { "epoch": 0.52, "learning_rate": 4.128565985488236e-05, "loss": 4.4217, "step": 483500 }, { "epoch": 0.52, "learning_rate": 4.1276648127741596e-05, "loss": 4.3949, "step": 484000 }, { "epoch": 0.52, "learning_rate": 4.1267636400600834e-05, "loss": 4.4406, "step": 484500 }, { "epoch": 0.52, "learning_rate": 4.1258624673460064e-05, "loss": 4.4383, "step": 485000 }, { "epoch": 0.53, "learning_rate": 4.12496129463193e-05, "loss": 4.4106, "step": 485500 }, { "epoch": 0.53, "learning_rate": 4.1240601219178546e-05, "loss": 4.382, "step": 486000 }, { "epoch": 0.53, "learning_rate": 4.123158949203778e-05, "loss": 4.3267, "step": 486500 }, { "epoch": 0.53, "learning_rate": 4.122257776489702e-05, "loss": 4.4221, "step": 487000 }, { "epoch": 0.53, "learning_rate": 4.121356603775626e-05, "loss": 4.3966, "step": 487500 }, { "epoch": 0.53, "learning_rate": 4.120455431061549e-05, "loss": 4.3824, "step": 488000 }, { "epoch": 0.53, "learning_rate": 4.1195542583474726e-05, "loss": 4.4346, "step": 488500 }, { "epoch": 0.53, "learning_rate": 4.1186530856333963e-05, "loss": 4.3681, "step": 489000 }, { "epoch": 0.53, "learning_rate": 4.11775191291932e-05, "loss": 4.405, "step": 489500 }, { "epoch": 0.53, "learning_rate": 4.1168507402052445e-05, "loss": 4.4267, "step": 490000 }, { "epoch": 0.53, "learning_rate": 4.115949567491168e-05, "loss": 4.4356, "step": 490500 }, { "epoch": 0.53, "learning_rate": 4.115048394777091e-05, "loss": 4.3915, "step": 491000 }, { "epoch": 0.53, "learning_rate": 4.114147222063015e-05, "loss": 4.4071, "step": 491500 }, { "epoch": 0.53, "learning_rate": 4.113246049348939e-05, "loss": 4.4558, "step": 492000 }, { "epoch": 0.53, "learning_rate": 4.1123448766348625e-05, "loss": 4.4161, "step": 492500 }, { "epoch": 0.53, "learning_rate": 4.111443703920786e-05, "loss": 4.4322, "step": 493000 }, { "epoch": 0.53, "learning_rate": 4.11054253120671e-05, "loss": 4.4165, "step": 493500 }, { "epoch": 0.53, "learning_rate": 4.1096413584926344e-05, "loss": 4.3936, "step": 494000 }, { "epoch": 0.53, "learning_rate": 4.1087401857785575e-05, "loss": 4.464, "step": 494500 }, { "epoch": 0.54, "learning_rate": 4.107839013064481e-05, "loss": 4.4453, "step": 495000 }, { "epoch": 0.54, "learning_rate": 4.106937840350405e-05, "loss": 4.4496, "step": 495500 }, { "epoch": 0.54, "learning_rate": 4.106036667636329e-05, "loss": 4.4243, "step": 496000 }, { "epoch": 0.54, "learning_rate": 4.1051354949222524e-05, "loss": 4.4202, "step": 496500 }, { "epoch": 0.54, "learning_rate": 4.104234322208176e-05, "loss": 4.4393, "step": 497000 }, { "epoch": 0.54, "learning_rate": 4.1033331494941e-05, "loss": 4.3986, "step": 497500 }, { "epoch": 0.54, "learning_rate": 4.102431976780024e-05, "loss": 4.3453, "step": 498000 }, { "epoch": 0.54, "learning_rate": 4.1015308040659474e-05, "loss": 4.4282, "step": 498500 }, { "epoch": 0.54, "learning_rate": 4.100629631351871e-05, "loss": 4.4063, "step": 499000 }, { "epoch": 0.54, "learning_rate": 4.099728458637795e-05, "loss": 4.4041, "step": 499500 }, { "epoch": 0.54, "learning_rate": 4.0988272859237186e-05, "loss": 4.4121, "step": 500000 }, { "epoch": 0.54, "learning_rate": 4.097926113209642e-05, "loss": 4.3535, "step": 500500 }, { "epoch": 0.54, "learning_rate": 4.0970249404955654e-05, "loss": 4.4579, "step": 501000 }, { "epoch": 0.54, "learning_rate": 4.09612376778149e-05, "loss": 4.4047, "step": 501500 }, { "epoch": 0.54, "learning_rate": 4.0952225950674136e-05, "loss": 4.3953, "step": 502000 }, { "epoch": 0.54, "learning_rate": 4.094321422353337e-05, "loss": 4.3709, "step": 502500 }, { "epoch": 0.54, "learning_rate": 4.093420249639261e-05, "loss": 4.4017, "step": 503000 }, { "epoch": 0.54, "learning_rate": 4.092519076925184e-05, "loss": 4.3861, "step": 503500 }, { "epoch": 0.55, "learning_rate": 4.091617904211108e-05, "loss": 4.4664, "step": 504000 }, { "epoch": 0.55, "learning_rate": 4.0907167314970316e-05, "loss": 4.4029, "step": 504500 }, { "epoch": 0.55, "learning_rate": 4.0898155587829554e-05, "loss": 4.386, "step": 505000 }, { "epoch": 0.55, "learning_rate": 4.08891438606888e-05, "loss": 4.3983, "step": 505500 }, { "epoch": 0.55, "learning_rate": 4.0880132133548035e-05, "loss": 4.3899, "step": 506000 }, { "epoch": 0.55, "learning_rate": 4.087112040640727e-05, "loss": 4.3988, "step": 506500 }, { "epoch": 0.55, "learning_rate": 4.08621086792665e-05, "loss": 4.3771, "step": 507000 }, { "epoch": 0.55, "learning_rate": 4.085309695212574e-05, "loss": 4.3443, "step": 507500 }, { "epoch": 0.55, "learning_rate": 4.084408522498498e-05, "loss": 4.3714, "step": 508000 }, { "epoch": 0.55, "learning_rate": 4.0835073497844215e-05, "loss": 4.3909, "step": 508500 }, { "epoch": 0.55, "learning_rate": 4.082606177070345e-05, "loss": 4.4214, "step": 509000 }, { "epoch": 0.55, "learning_rate": 4.081705004356269e-05, "loss": 4.4305, "step": 509500 }, { "epoch": 0.55, "learning_rate": 4.080803831642193e-05, "loss": 4.3784, "step": 510000 }, { "epoch": 0.55, "learning_rate": 4.0799026589281165e-05, "loss": 4.4198, "step": 510500 }, { "epoch": 0.55, "learning_rate": 4.07900148621404e-05, "loss": 4.3954, "step": 511000 }, { "epoch": 0.55, "learning_rate": 4.078100313499964e-05, "loss": 4.4075, "step": 511500 }, { "epoch": 0.55, "learning_rate": 4.077199140785888e-05, "loss": 4.4065, "step": 512000 }, { "epoch": 0.55, "learning_rate": 4.0762979680718115e-05, "loss": 4.4122, "step": 512500 }, { "epoch": 0.55, "learning_rate": 4.075396795357735e-05, "loss": 4.4, "step": 513000 }, { "epoch": 0.56, "learning_rate": 4.074495622643659e-05, "loss": 4.3722, "step": 513500 }, { "epoch": 0.56, "learning_rate": 4.073594449929583e-05, "loss": 4.3375, "step": 514000 }, { "epoch": 0.56, "learning_rate": 4.0726932772155064e-05, "loss": 4.3655, "step": 514500 }, { "epoch": 0.56, "learning_rate": 4.07179210450143e-05, "loss": 4.3714, "step": 515000 }, { "epoch": 0.56, "learning_rate": 4.070890931787354e-05, "loss": 4.4154, "step": 515500 }, { "epoch": 0.56, "learning_rate": 4.069989759073277e-05, "loss": 4.4121, "step": 516000 }, { "epoch": 0.56, "learning_rate": 4.069088586359201e-05, "loss": 4.4102, "step": 516500 }, { "epoch": 0.56, "learning_rate": 4.068187413645125e-05, "loss": 4.3882, "step": 517000 }, { "epoch": 0.56, "learning_rate": 4.067286240931049e-05, "loss": 4.4476, "step": 517500 }, { "epoch": 0.56, "learning_rate": 4.0663850682169726e-05, "loss": 4.3978, "step": 518000 }, { "epoch": 0.56, "learning_rate": 4.065483895502896e-05, "loss": 4.4405, "step": 518500 }, { "epoch": 0.56, "learning_rate": 4.06458272278882e-05, "loss": 4.3647, "step": 519000 }, { "epoch": 0.56, "learning_rate": 4.063681550074743e-05, "loss": 4.3729, "step": 519500 }, { "epoch": 0.56, "learning_rate": 4.062780377360667e-05, "loss": 4.4138, "step": 520000 }, { "epoch": 0.56, "learning_rate": 4.0618792046465906e-05, "loss": 4.3248, "step": 520500 }, { "epoch": 0.56, "learning_rate": 4.0609780319325144e-05, "loss": 4.422, "step": 521000 }, { "epoch": 0.56, "learning_rate": 4.060076859218439e-05, "loss": 4.3538, "step": 521500 }, { "epoch": 0.56, "learning_rate": 4.0591756865043625e-05, "loss": 4.4099, "step": 522000 }, { "epoch": 0.57, "learning_rate": 4.0582745137902856e-05, "loss": 4.4193, "step": 522500 }, { "epoch": 0.57, "learning_rate": 4.057373341076209e-05, "loss": 4.3988, "step": 523000 }, { "epoch": 0.57, "learning_rate": 4.056472168362133e-05, "loss": 4.4022, "step": 523500 }, { "epoch": 0.57, "learning_rate": 4.055570995648057e-05, "loss": 4.3413, "step": 524000 }, { "epoch": 0.57, "learning_rate": 4.0546698229339805e-05, "loss": 4.434, "step": 524500 }, { "epoch": 0.57, "learning_rate": 4.053768650219904e-05, "loss": 4.3744, "step": 525000 }, { "epoch": 0.57, "learning_rate": 4.052867477505828e-05, "loss": 4.418, "step": 525500 }, { "epoch": 0.57, "learning_rate": 4.051966304791752e-05, "loss": 4.3814, "step": 526000 }, { "epoch": 0.57, "learning_rate": 4.0510651320776755e-05, "loss": 4.3454, "step": 526500 }, { "epoch": 0.57, "learning_rate": 4.050163959363599e-05, "loss": 4.3251, "step": 527000 }, { "epoch": 0.57, "learning_rate": 4.049262786649523e-05, "loss": 4.4182, "step": 527500 }, { "epoch": 0.57, "learning_rate": 4.048361613935447e-05, "loss": 4.3319, "step": 528000 }, { "epoch": 0.57, "learning_rate": 4.0474604412213705e-05, "loss": 4.3861, "step": 528500 }, { "epoch": 0.57, "learning_rate": 4.046559268507294e-05, "loss": 4.4092, "step": 529000 }, { "epoch": 0.57, "learning_rate": 4.045658095793218e-05, "loss": 4.397, "step": 529500 }, { "epoch": 0.57, "learning_rate": 4.044756923079142e-05, "loss": 4.3839, "step": 530000 }, { "epoch": 0.57, "learning_rate": 4.0438557503650654e-05, "loss": 4.4383, "step": 530500 }, { "epoch": 0.57, "learning_rate": 4.042954577650989e-05, "loss": 4.4198, "step": 531000 }, { "epoch": 0.57, "learning_rate": 4.042053404936912e-05, "loss": 4.3632, "step": 531500 }, { "epoch": 0.58, "learning_rate": 4.041152232222836e-05, "loss": 4.3722, "step": 532000 }, { "epoch": 0.58, "learning_rate": 4.04025105950876e-05, "loss": 4.3664, "step": 532500 }, { "epoch": 0.58, "learning_rate": 4.039349886794684e-05, "loss": 4.357, "step": 533000 }, { "epoch": 0.58, "learning_rate": 4.038448714080608e-05, "loss": 4.3484, "step": 533500 }, { "epoch": 0.58, "learning_rate": 4.0375475413665316e-05, "loss": 4.4506, "step": 534000 }, { "epoch": 0.58, "learning_rate": 4.0366463686524553e-05, "loss": 4.37, "step": 534500 }, { "epoch": 0.58, "learning_rate": 4.0357451959383784e-05, "loss": 4.3452, "step": 535000 }, { "epoch": 0.58, "learning_rate": 4.034844023224302e-05, "loss": 4.4018, "step": 535500 }, { "epoch": 0.58, "learning_rate": 4.033942850510226e-05, "loss": 4.4079, "step": 536000 }, { "epoch": 0.58, "learning_rate": 4.0330416777961496e-05, "loss": 4.3569, "step": 536500 }, { "epoch": 0.58, "learning_rate": 4.032140505082074e-05, "loss": 4.3495, "step": 537000 }, { "epoch": 0.58, "learning_rate": 4.031239332367998e-05, "loss": 4.3752, "step": 537500 }, { "epoch": 0.58, "learning_rate": 4.030338159653921e-05, "loss": 4.3821, "step": 538000 }, { "epoch": 0.58, "learning_rate": 4.0294369869398446e-05, "loss": 4.431, "step": 538500 }, { "epoch": 0.58, "learning_rate": 4.028535814225768e-05, "loss": 4.3057, "step": 539000 }, { "epoch": 0.58, "learning_rate": 4.027634641511692e-05, "loss": 4.3249, "step": 539500 }, { "epoch": 0.58, "learning_rate": 4.026733468797616e-05, "loss": 4.3181, "step": 540000 }, { "epoch": 0.58, "learning_rate": 4.0258322960835395e-05, "loss": 4.3905, "step": 540500 }, { "epoch": 0.59, "learning_rate": 4.024931123369463e-05, "loss": 4.3406, "step": 541000 }, { "epoch": 0.59, "learning_rate": 4.024029950655387e-05, "loss": 4.3545, "step": 541500 }, { "epoch": 0.59, "learning_rate": 4.023128777941311e-05, "loss": 4.3554, "step": 542000 }, { "epoch": 0.59, "learning_rate": 4.0222276052272345e-05, "loss": 4.4182, "step": 542500 }, { "epoch": 0.59, "learning_rate": 4.021326432513158e-05, "loss": 4.4599, "step": 543000 }, { "epoch": 0.59, "learning_rate": 4.020425259799082e-05, "loss": 4.326, "step": 543500 }, { "epoch": 0.59, "learning_rate": 4.019524087085006e-05, "loss": 4.3247, "step": 544000 }, { "epoch": 0.59, "learning_rate": 4.0186229143709295e-05, "loss": 4.4027, "step": 544500 }, { "epoch": 0.59, "learning_rate": 4.017721741656853e-05, "loss": 4.315, "step": 545000 }, { "epoch": 0.59, "learning_rate": 4.016820568942777e-05, "loss": 4.3967, "step": 545500 }, { "epoch": 0.59, "learning_rate": 4.015919396228701e-05, "loss": 4.3808, "step": 546000 }, { "epoch": 0.59, "learning_rate": 4.0150182235146244e-05, "loss": 4.3609, "step": 546500 }, { "epoch": 0.59, "learning_rate": 4.014117050800548e-05, "loss": 4.3969, "step": 547000 }, { "epoch": 0.59, "learning_rate": 4.013215878086471e-05, "loss": 4.3735, "step": 547500 }, { "epoch": 0.59, "learning_rate": 4.012314705372395e-05, "loss": 4.3567, "step": 548000 }, { "epoch": 0.59, "learning_rate": 4.0114135326583194e-05, "loss": 4.3614, "step": 548500 }, { "epoch": 0.59, "learning_rate": 4.010512359944243e-05, "loss": 4.3611, "step": 549000 }, { "epoch": 0.59, "learning_rate": 4.009611187230167e-05, "loss": 4.376, "step": 549500 }, { "epoch": 0.59, "learning_rate": 4.0087100145160906e-05, "loss": 4.3128, "step": 550000 }, { "epoch": 0.6, "learning_rate": 4.007808841802014e-05, "loss": 4.3885, "step": 550500 }, { "epoch": 0.6, "learning_rate": 4.0069076690879374e-05, "loss": 4.3767, "step": 551000 }, { "epoch": 0.6, "learning_rate": 4.006006496373861e-05, "loss": 4.3457, "step": 551500 }, { "epoch": 0.6, "learning_rate": 4.005105323659785e-05, "loss": 4.462, "step": 552000 }, { "epoch": 0.6, "learning_rate": 4.004204150945709e-05, "loss": 4.3849, "step": 552500 }, { "epoch": 0.6, "learning_rate": 4.003302978231633e-05, "loss": 4.3644, "step": 553000 }, { "epoch": 0.6, "learning_rate": 4.002401805517556e-05, "loss": 4.3445, "step": 553500 }, { "epoch": 0.6, "learning_rate": 4.00150063280348e-05, "loss": 4.3573, "step": 554000 }, { "epoch": 0.6, "learning_rate": 4.0005994600894036e-05, "loss": 4.3702, "step": 554500 }, { "epoch": 0.6, "learning_rate": 3.999698287375327e-05, "loss": 4.335, "step": 555000 }, { "epoch": 0.6, "learning_rate": 3.998797114661251e-05, "loss": 4.3592, "step": 555500 }, { "epoch": 0.6, "learning_rate": 3.997895941947175e-05, "loss": 4.3702, "step": 556000 }, { "epoch": 0.6, "learning_rate": 3.9969947692330985e-05, "loss": 4.3976, "step": 556500 }, { "epoch": 0.6, "learning_rate": 3.996093596519022e-05, "loss": 4.3542, "step": 557000 }, { "epoch": 0.6, "learning_rate": 3.995192423804946e-05, "loss": 4.3243, "step": 557500 }, { "epoch": 0.6, "learning_rate": 3.99429125109087e-05, "loss": 4.3865, "step": 558000 }, { "epoch": 0.6, "learning_rate": 3.9933900783767935e-05, "loss": 4.3937, "step": 558500 }, { "epoch": 0.6, "learning_rate": 3.992488905662717e-05, "loss": 4.4588, "step": 559000 }, { "epoch": 0.61, "learning_rate": 3.991587732948641e-05, "loss": 4.3763, "step": 559500 }, { "epoch": 0.61, "learning_rate": 3.990686560234565e-05, "loss": 4.3972, "step": 560000 } ], "max_steps": 2774163, "num_train_epochs": 3, "total_flos": 3.658088448e+16, "trial_name": null, "trial_params": null }