diff --git "a/gpt2-4chan-mini/trainer_state.json" "b/gpt2-4chan-mini/trainer_state.json" new file mode 100644--- /dev/null +++ "b/gpt2-4chan-mini/trainer_state.json" @@ -0,0 +1,6736 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6055880638592613, + "global_step": 560000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999098827285924e-05, + "loss": 5.3192, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 4.998197654571848e-05, + "loss": 5.4021, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9972964818577715e-05, + "loss": 5.4284, + "step": 1500 + }, + { + "epoch": 0.0, + "learning_rate": 4.996395309143695e-05, + "loss": 5.4341, + "step": 2000 + }, + { + "epoch": 0.0, + "learning_rate": 4.995494136429619e-05, + "loss": 5.4215, + "step": 2500 + }, + { + "epoch": 0.0, + "learning_rate": 4.994592963715543e-05, + "loss": 5.3564, + "step": 3000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9936917910014664e-05, + "loss": 5.3319, + "step": 3500 + }, + { + "epoch": 0.0, + "learning_rate": 4.99279061828739e-05, + "loss": 5.3326, + "step": 4000 + }, + { + "epoch": 0.0, + "learning_rate": 4.991889445573314e-05, + "loss": 5.3575, + "step": 4500 + }, + { + "epoch": 0.01, + "learning_rate": 4.990988272859237e-05, + "loss": 5.3404, + "step": 5000 + }, + { + "epoch": 0.01, + "learning_rate": 4.990087100145161e-05, + "loss": 5.339, + "step": 5500 + }, + { + "epoch": 0.01, + "learning_rate": 4.989185927431085e-05, + "loss": 5.2714, + "step": 6000 + }, + { + "epoch": 0.01, + "learning_rate": 4.988284754717009e-05, + "loss": 5.2691, + "step": 6500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9873835820029326e-05, + "loss": 5.2559, + "step": 7000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9864824092888563e-05, + "loss": 5.209, + "step": 7500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9855812365747794e-05, + "loss": 5.26, + "step": 8000 + }, + { + "epoch": 0.01, + "learning_rate": 4.984680063860703e-05, + "loss": 5.1878, + "step": 8500 + }, + { + "epoch": 0.01, + "learning_rate": 4.983778891146627e-05, + "loss": 5.212, + "step": 9000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9828777184325506e-05, + "loss": 5.2063, + "step": 9500 + }, + { + "epoch": 0.01, + "learning_rate": 4.981976545718475e-05, + "loss": 5.2132, + "step": 10000 + }, + { + "epoch": 0.01, + "learning_rate": 4.981075373004399e-05, + "loss": 5.221, + "step": 10500 + }, + { + "epoch": 0.01, + "learning_rate": 4.980174200290322e-05, + "loss": 5.1786, + "step": 11000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9792730275762456e-05, + "loss": 5.1868, + "step": 11500 + }, + { + "epoch": 0.01, + "learning_rate": 4.978371854862169e-05, + "loss": 5.1585, + "step": 12000 + }, + { + "epoch": 0.01, + "learning_rate": 4.977470682148093e-05, + "loss": 5.2465, + "step": 12500 + }, + { + "epoch": 0.01, + "learning_rate": 4.976569509434017e-05, + "loss": 5.1645, + "step": 13000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9756683367199405e-05, + "loss": 5.1462, + "step": 13500 + }, + { + "epoch": 0.02, + "learning_rate": 4.974767164005864e-05, + "loss": 5.1588, + "step": 14000 + }, + { + "epoch": 0.02, + "learning_rate": 4.973865991291788e-05, + "loss": 5.145, + "step": 14500 + }, + { + "epoch": 0.02, + "learning_rate": 4.972964818577712e-05, + "loss": 5.1256, + "step": 15000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9720636458636355e-05, + "loss": 5.1227, + "step": 15500 + }, + { + "epoch": 0.02, + "learning_rate": 4.971162473149559e-05, + "loss": 5.096, + "step": 16000 + }, + { + "epoch": 0.02, + "learning_rate": 4.970261300435483e-05, + "loss": 5.1427, + "step": 16500 + }, + { + "epoch": 0.02, + "learning_rate": 4.969360127721407e-05, + "loss": 5.121, + "step": 17000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9684589550073305e-05, + "loss": 5.1324, + "step": 17500 + }, + { + "epoch": 0.02, + "learning_rate": 4.967557782293254e-05, + "loss": 5.1476, + "step": 18000 + }, + { + "epoch": 0.02, + "learning_rate": 4.966656609579178e-05, + "loss": 5.0538, + "step": 18500 + }, + { + "epoch": 0.02, + "learning_rate": 4.965755436865102e-05, + "loss": 5.0635, + "step": 19000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9648542641510254e-05, + "loss": 5.0309, + "step": 19500 + }, + { + "epoch": 0.02, + "learning_rate": 4.963953091436949e-05, + "loss": 5.0623, + "step": 20000 + }, + { + "epoch": 0.02, + "learning_rate": 4.963051918722872e-05, + "loss": 5.0624, + "step": 20500 + }, + { + "epoch": 0.02, + "learning_rate": 4.962150746008796e-05, + "loss": 5.0844, + "step": 21000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9612495732947204e-05, + "loss": 5.0667, + "step": 21500 + }, + { + "epoch": 0.02, + "learning_rate": 4.960348400580644e-05, + "loss": 5.0536, + "step": 22000 + }, + { + "epoch": 0.02, + "learning_rate": 4.959447227866568e-05, + "loss": 5.0783, + "step": 22500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9585460551524916e-05, + "loss": 5.0335, + "step": 23000 + }, + { + "epoch": 0.03, + "learning_rate": 4.957644882438415e-05, + "loss": 5.0321, + "step": 23500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9567437097243384e-05, + "loss": 5.037, + "step": 24000 + }, + { + "epoch": 0.03, + "learning_rate": 4.955842537010262e-05, + "loss": 5.0187, + "step": 24500 + }, + { + "epoch": 0.03, + "learning_rate": 4.954941364296186e-05, + "loss": 5.0357, + "step": 25000 + }, + { + "epoch": 0.03, + "learning_rate": 4.95404019158211e-05, + "loss": 5.0128, + "step": 25500 + }, + { + "epoch": 0.03, + "learning_rate": 4.953139018868034e-05, + "loss": 5.0553, + "step": 26000 + }, + { + "epoch": 0.03, + "learning_rate": 4.952237846153957e-05, + "loss": 5.0024, + "step": 26500 + }, + { + "epoch": 0.03, + "learning_rate": 4.951336673439881e-05, + "loss": 5.0177, + "step": 27000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9504355007258046e-05, + "loss": 5.0174, + "step": 27500 + }, + { + "epoch": 0.03, + "learning_rate": 4.949534328011728e-05, + "loss": 5.0167, + "step": 28000 + }, + { + "epoch": 0.03, + "learning_rate": 4.948633155297652e-05, + "loss": 4.9896, + "step": 28500 + }, + { + "epoch": 0.03, + "learning_rate": 4.947731982583576e-05, + "loss": 5.0355, + "step": 29000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9468308098694995e-05, + "loss": 4.9929, + "step": 29500 + }, + { + "epoch": 0.03, + "learning_rate": 4.945929637155423e-05, + "loss": 4.9702, + "step": 30000 + }, + { + "epoch": 0.03, + "learning_rate": 4.945028464441347e-05, + "loss": 4.944, + "step": 30500 + }, + { + "epoch": 0.03, + "learning_rate": 4.944127291727271e-05, + "loss": 4.9957, + "step": 31000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9432261190131945e-05, + "loss": 4.9908, + "step": 31500 + }, + { + "epoch": 0.03, + "learning_rate": 4.942324946299118e-05, + "loss": 4.9816, + "step": 32000 + }, + { + "epoch": 0.04, + "learning_rate": 4.941423773585042e-05, + "loss": 4.9649, + "step": 32500 + }, + { + "epoch": 0.04, + "learning_rate": 4.940522600870966e-05, + "loss": 4.9434, + "step": 33000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9396214281568895e-05, + "loss": 5.0387, + "step": 33500 + }, + { + "epoch": 0.04, + "learning_rate": 4.938720255442813e-05, + "loss": 4.9799, + "step": 34000 + }, + { + "epoch": 0.04, + "learning_rate": 4.937819082728737e-05, + "loss": 4.9648, + "step": 34500 + }, + { + "epoch": 0.04, + "learning_rate": 4.936917910014661e-05, + "loss": 4.9593, + "step": 35000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9360167373005844e-05, + "loss": 4.9687, + "step": 35500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9351155645865075e-05, + "loss": 4.9474, + "step": 36000 + }, + { + "epoch": 0.04, + "learning_rate": 4.934214391872431e-05, + "loss": 4.9344, + "step": 36500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9333132191583556e-05, + "loss": 4.932, + "step": 37000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9324120464442794e-05, + "loss": 5.0116, + "step": 37500 + }, + { + "epoch": 0.04, + "learning_rate": 4.931510873730203e-05, + "loss": 4.9311, + "step": 38000 + }, + { + "epoch": 0.04, + "learning_rate": 4.930609701016127e-05, + "loss": 4.9114, + "step": 38500 + }, + { + "epoch": 0.04, + "learning_rate": 4.92970852830205e-05, + "loss": 4.9517, + "step": 39000 + }, + { + "epoch": 0.04, + "learning_rate": 4.928807355587974e-05, + "loss": 4.9541, + "step": 39500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9279061828738974e-05, + "loss": 4.9637, + "step": 40000 + }, + { + "epoch": 0.04, + "learning_rate": 4.927005010159821e-05, + "loss": 4.9498, + "step": 40500 + }, + { + "epoch": 0.04, + "learning_rate": 4.926103837445745e-05, + "loss": 4.8924, + "step": 41000 + }, + { + "epoch": 0.04, + "learning_rate": 4.925202664731669e-05, + "loss": 4.9596, + "step": 41500 + }, + { + "epoch": 0.05, + "learning_rate": 4.924301492017593e-05, + "loss": 4.9264, + "step": 42000 + }, + { + "epoch": 0.05, + "learning_rate": 4.923400319303516e-05, + "loss": 4.9179, + "step": 42500 + }, + { + "epoch": 0.05, + "learning_rate": 4.92249914658944e-05, + "loss": 4.9151, + "step": 43000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9215979738753636e-05, + "loss": 4.9101, + "step": 43500 + }, + { + "epoch": 0.05, + "learning_rate": 4.920696801161287e-05, + "loss": 4.9541, + "step": 44000 + }, + { + "epoch": 0.05, + "learning_rate": 4.919795628447211e-05, + "loss": 4.9423, + "step": 44500 + }, + { + "epoch": 0.05, + "learning_rate": 4.918894455733135e-05, + "loss": 4.8763, + "step": 45000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9179932830190586e-05, + "loss": 4.9015, + "step": 45500 + }, + { + "epoch": 0.05, + "learning_rate": 4.917092110304982e-05, + "loss": 4.9179, + "step": 46000 + }, + { + "epoch": 0.05, + "learning_rate": 4.916190937590906e-05, + "loss": 4.8837, + "step": 46500 + }, + { + "epoch": 0.05, + "learning_rate": 4.91528976487683e-05, + "loss": 4.9141, + "step": 47000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9143885921627535e-05, + "loss": 4.8766, + "step": 47500 + }, + { + "epoch": 0.05, + "learning_rate": 4.913487419448677e-05, + "loss": 4.9088, + "step": 48000 + }, + { + "epoch": 0.05, + "learning_rate": 4.912586246734601e-05, + "loss": 4.9137, + "step": 48500 + }, + { + "epoch": 0.05, + "learning_rate": 4.911685074020525e-05, + "loss": 4.8692, + "step": 49000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9107839013064485e-05, + "loss": 4.8607, + "step": 49500 + }, + { + "epoch": 0.05, + "learning_rate": 4.909882728592372e-05, + "loss": 4.8573, + "step": 50000 + }, + { + "epoch": 0.05, + "learning_rate": 4.908981555878296e-05, + "loss": 4.9472, + "step": 50500 + }, + { + "epoch": 0.06, + "learning_rate": 4.90808038316422e-05, + "loss": 4.9144, + "step": 51000 + }, + { + "epoch": 0.06, + "learning_rate": 4.907179210450143e-05, + "loss": 4.973, + "step": 51500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9062780377360665e-05, + "loss": 4.9413, + "step": 52000 + }, + { + "epoch": 0.06, + "learning_rate": 4.90537686502199e-05, + "loss": 4.972, + "step": 52500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9044756923079147e-05, + "loss": 4.9722, + "step": 53000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9035745195938384e-05, + "loss": 4.9126, + "step": 53500 + }, + { + "epoch": 0.06, + "learning_rate": 4.902673346879762e-05, + "loss": 4.9117, + "step": 54000 + }, + { + "epoch": 0.06, + "learning_rate": 4.901772174165686e-05, + "loss": 4.9233, + "step": 54500 + }, + { + "epoch": 0.06, + "learning_rate": 4.900871001451609e-05, + "loss": 4.9693, + "step": 55000 + }, + { + "epoch": 0.06, + "learning_rate": 4.899969828737533e-05, + "loss": 4.9875, + "step": 55500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8990686560234564e-05, + "loss": 4.9741, + "step": 56000 + }, + { + "epoch": 0.06, + "learning_rate": 4.89816748330938e-05, + "loss": 4.9411, + "step": 56500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8972663105953046e-05, + "loss": 4.9281, + "step": 57000 + }, + { + "epoch": 0.06, + "learning_rate": 4.896365137881228e-05, + "loss": 4.9392, + "step": 57500 + }, + { + "epoch": 0.06, + "learning_rate": 4.8954639651671514e-05, + "loss": 4.9473, + "step": 58000 + }, + { + "epoch": 0.06, + "learning_rate": 4.894562792453075e-05, + "loss": 4.9333, + "step": 58500 + }, + { + "epoch": 0.06, + "learning_rate": 4.893661619738999e-05, + "loss": 4.9547, + "step": 59000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8927604470249226e-05, + "loss": 4.9422, + "step": 59500 + }, + { + "epoch": 0.06, + "learning_rate": 4.891859274310846e-05, + "loss": 4.9182, + "step": 60000 + }, + { + "epoch": 0.07, + "learning_rate": 4.89095810159677e-05, + "loss": 4.9282, + "step": 60500 + }, + { + "epoch": 0.07, + "learning_rate": 4.890056928882694e-05, + "loss": 4.943, + "step": 61000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8891557561686176e-05, + "loss": 4.9436, + "step": 61500 + }, + { + "epoch": 0.07, + "learning_rate": 4.888254583454541e-05, + "loss": 4.9253, + "step": 62000 + }, + { + "epoch": 0.07, + "learning_rate": 4.887353410740465e-05, + "loss": 4.9442, + "step": 62500 + }, + { + "epoch": 0.07, + "learning_rate": 4.886452238026389e-05, + "loss": 4.8888, + "step": 63000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8855510653123125e-05, + "loss": 4.9155, + "step": 63500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8846498925982356e-05, + "loss": 4.9519, + "step": 64000 + }, + { + "epoch": 0.07, + "learning_rate": 4.88374871988416e-05, + "loss": 4.9563, + "step": 64500 + }, + { + "epoch": 0.07, + "learning_rate": 4.882847547170084e-05, + "loss": 4.9553, + "step": 65000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8819463744560075e-05, + "loss": 4.8865, + "step": 65500 + }, + { + "epoch": 0.07, + "learning_rate": 4.881045201741931e-05, + "loss": 4.8987, + "step": 66000 + }, + { + "epoch": 0.07, + "learning_rate": 4.880144029027855e-05, + "loss": 4.9129, + "step": 66500 + }, + { + "epoch": 0.07, + "learning_rate": 4.879242856313779e-05, + "loss": 4.9565, + "step": 67000 + }, + { + "epoch": 0.07, + "learning_rate": 4.878341683599702e-05, + "loss": 4.8831, + "step": 67500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8774405108856255e-05, + "loss": 4.9007, + "step": 68000 + }, + { + "epoch": 0.07, + "learning_rate": 4.87653933817155e-05, + "loss": 4.9337, + "step": 68500 + }, + { + "epoch": 0.07, + "learning_rate": 4.8756381654574737e-05, + "loss": 4.8446, + "step": 69000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8747369927433974e-05, + "loss": 4.9388, + "step": 69500 + }, + { + "epoch": 0.08, + "learning_rate": 4.873835820029321e-05, + "loss": 4.9655, + "step": 70000 + }, + { + "epoch": 0.08, + "learning_rate": 4.872934647315244e-05, + "loss": 4.9309, + "step": 70500 + }, + { + "epoch": 0.08, + "learning_rate": 4.872033474601168e-05, + "loss": 4.9102, + "step": 71000 + }, + { + "epoch": 0.08, + "learning_rate": 4.871132301887092e-05, + "loss": 4.8491, + "step": 71500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8702311291730154e-05, + "loss": 4.895, + "step": 72000 + }, + { + "epoch": 0.08, + "learning_rate": 4.86932995645894e-05, + "loss": 4.9222, + "step": 72500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8684287837448636e-05, + "loss": 4.8966, + "step": 73000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8675276110307866e-05, + "loss": 4.8669, + "step": 73500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8666264383167104e-05, + "loss": 4.8332, + "step": 74000 + }, + { + "epoch": 0.08, + "learning_rate": 4.865725265602634e-05, + "loss": 4.9127, + "step": 74500 + }, + { + "epoch": 0.08, + "learning_rate": 4.864824092888558e-05, + "loss": 4.9251, + "step": 75000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8639229201744816e-05, + "loss": 4.9379, + "step": 75500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8630217474604053e-05, + "loss": 4.8682, + "step": 76000 + }, + { + "epoch": 0.08, + "learning_rate": 4.862120574746329e-05, + "loss": 4.8762, + "step": 76500 + }, + { + "epoch": 0.08, + "learning_rate": 4.861219402032253e-05, + "loss": 4.8544, + "step": 77000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8603182293181766e-05, + "loss": 4.8835, + "step": 77500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8594170566041e-05, + "loss": 4.8346, + "step": 78000 + }, + { + "epoch": 0.08, + "learning_rate": 4.858515883890024e-05, + "loss": 4.9229, + "step": 78500 + }, + { + "epoch": 0.09, + "learning_rate": 4.857614711175948e-05, + "loss": 4.9159, + "step": 79000 + }, + { + "epoch": 0.09, + "learning_rate": 4.856713538461871e-05, + "loss": 4.8566, + "step": 79500 + }, + { + "epoch": 0.09, + "learning_rate": 4.855812365747795e-05, + "loss": 4.8155, + "step": 80000 + }, + { + "epoch": 0.09, + "learning_rate": 4.854911193033719e-05, + "loss": 4.9222, + "step": 80500 + }, + { + "epoch": 0.09, + "learning_rate": 4.854010020319643e-05, + "loss": 4.8545, + "step": 81000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8531088476055665e-05, + "loss": 4.8295, + "step": 81500 + }, + { + "epoch": 0.09, + "learning_rate": 4.85220767489149e-05, + "loss": 4.8223, + "step": 82000 + }, + { + "epoch": 0.09, + "learning_rate": 4.851306502177414e-05, + "loss": 4.8637, + "step": 82500 + }, + { + "epoch": 0.09, + "learning_rate": 4.850405329463337e-05, + "loss": 4.878, + "step": 83000 + }, + { + "epoch": 0.09, + "learning_rate": 4.849504156749261e-05, + "loss": 4.8677, + "step": 83500 + }, + { + "epoch": 0.09, + "learning_rate": 4.848602984035185e-05, + "loss": 4.8636, + "step": 84000 + }, + { + "epoch": 0.09, + "learning_rate": 4.847701811321109e-05, + "loss": 4.8708, + "step": 84500 + }, + { + "epoch": 0.09, + "learning_rate": 4.846800638607033e-05, + "loss": 4.8608, + "step": 85000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8458994658929564e-05, + "loss": 4.8347, + "step": 85500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8449982931788795e-05, + "loss": 4.8385, + "step": 86000 + }, + { + "epoch": 0.09, + "learning_rate": 4.844097120464803e-05, + "loss": 4.8565, + "step": 86500 + }, + { + "epoch": 0.09, + "learning_rate": 4.843195947750727e-05, + "loss": 4.867, + "step": 87000 + }, + { + "epoch": 0.09, + "learning_rate": 4.842294775036651e-05, + "loss": 4.8456, + "step": 87500 + }, + { + "epoch": 0.1, + "learning_rate": 4.841393602322575e-05, + "loss": 4.8739, + "step": 88000 + }, + { + "epoch": 0.1, + "learning_rate": 4.840492429608499e-05, + "loss": 4.8473, + "step": 88500 + }, + { + "epoch": 0.1, + "learning_rate": 4.839591256894422e-05, + "loss": 4.8496, + "step": 89000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8386900841803456e-05, + "loss": 4.8579, + "step": 89500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8377889114662694e-05, + "loss": 4.8543, + "step": 90000 + }, + { + "epoch": 0.1, + "learning_rate": 4.836887738752193e-05, + "loss": 4.8855, + "step": 90500 + }, + { + "epoch": 0.1, + "learning_rate": 4.835986566038117e-05, + "loss": 4.8511, + "step": 91000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8350853933240406e-05, + "loss": 4.8682, + "step": 91500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8341842206099643e-05, + "loss": 4.8074, + "step": 92000 + }, + { + "epoch": 0.1, + "learning_rate": 4.833283047895888e-05, + "loss": 4.8034, + "step": 92500 + }, + { + "epoch": 0.1, + "learning_rate": 4.832381875181812e-05, + "loss": 4.842, + "step": 93000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8314807024677356e-05, + "loss": 4.8146, + "step": 93500 + }, + { + "epoch": 0.1, + "learning_rate": 4.830579529753659e-05, + "loss": 4.8353, + "step": 94000 + }, + { + "epoch": 0.1, + "learning_rate": 4.829678357039583e-05, + "loss": 4.8151, + "step": 94500 + }, + { + "epoch": 0.1, + "learning_rate": 4.828777184325507e-05, + "loss": 4.8127, + "step": 95000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8278760116114305e-05, + "loss": 4.833, + "step": 95500 + }, + { + "epoch": 0.1, + "learning_rate": 4.826974838897354e-05, + "loss": 4.8383, + "step": 96000 + }, + { + "epoch": 0.1, + "learning_rate": 4.826073666183278e-05, + "loss": 4.8441, + "step": 96500 + }, + { + "epoch": 0.1, + "learning_rate": 4.825172493469202e-05, + "loss": 4.8794, + "step": 97000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8242713207551255e-05, + "loss": 4.828, + "step": 97500 + }, + { + "epoch": 0.11, + "learning_rate": 4.823370148041049e-05, + "loss": 4.7572, + "step": 98000 + }, + { + "epoch": 0.11, + "learning_rate": 4.822468975326972e-05, + "loss": 4.7658, + "step": 98500 + }, + { + "epoch": 0.11, + "learning_rate": 4.821567802612896e-05, + "loss": 4.8123, + "step": 99000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8206666298988204e-05, + "loss": 4.8082, + "step": 99500 + }, + { + "epoch": 0.11, + "learning_rate": 4.819765457184744e-05, + "loss": 4.7542, + "step": 100000 + }, + { + "epoch": 0.11, + "learning_rate": 4.818864284470668e-05, + "loss": 4.8264, + "step": 100500 + }, + { + "epoch": 0.11, + "learning_rate": 4.817963111756592e-05, + "loss": 4.7541, + "step": 101000 + }, + { + "epoch": 0.11, + "learning_rate": 4.817061939042515e-05, + "loss": 4.7992, + "step": 101500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8161607663284385e-05, + "loss": 4.8078, + "step": 102000 + }, + { + "epoch": 0.11, + "learning_rate": 4.815259593614362e-05, + "loss": 4.8453, + "step": 102500 + }, + { + "epoch": 0.11, + "learning_rate": 4.814358420900286e-05, + "loss": 4.8276, + "step": 103000 + }, + { + "epoch": 0.11, + "learning_rate": 4.81345724818621e-05, + "loss": 4.7253, + "step": 103500 + }, + { + "epoch": 0.11, + "learning_rate": 4.812556075472134e-05, + "loss": 4.8102, + "step": 104000 + }, + { + "epoch": 0.11, + "learning_rate": 4.811654902758057e-05, + "loss": 4.8006, + "step": 104500 + }, + { + "epoch": 0.11, + "learning_rate": 4.810753730043981e-05, + "loss": 4.7603, + "step": 105000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8098525573299047e-05, + "loss": 4.7124, + "step": 105500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8089513846158284e-05, + "loss": 4.7757, + "step": 106000 + }, + { + "epoch": 0.12, + "learning_rate": 4.808050211901752e-05, + "loss": 4.7593, + "step": 106500 + }, + { + "epoch": 0.12, + "learning_rate": 4.807149039187676e-05, + "loss": 4.8501, + "step": 107000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8062478664735996e-05, + "loss": 4.8105, + "step": 107500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8053466937595234e-05, + "loss": 4.7692, + "step": 108000 + }, + { + "epoch": 0.12, + "learning_rate": 4.804445521045447e-05, + "loss": 4.7855, + "step": 108500 + }, + { + "epoch": 0.12, + "learning_rate": 4.803544348331371e-05, + "loss": 4.8032, + "step": 109000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8026431756172946e-05, + "loss": 4.7814, + "step": 109500 + }, + { + "epoch": 0.12, + "learning_rate": 4.801742002903218e-05, + "loss": 4.8473, + "step": 110000 + }, + { + "epoch": 0.12, + "learning_rate": 4.800840830189142e-05, + "loss": 4.8047, + "step": 110500 + }, + { + "epoch": 0.12, + "learning_rate": 4.799939657475066e-05, + "loss": 4.8326, + "step": 111000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7990384847609895e-05, + "loss": 4.7993, + "step": 111500 + }, + { + "epoch": 0.12, + "learning_rate": 4.798137312046913e-05, + "loss": 4.7892, + "step": 112000 + }, + { + "epoch": 0.12, + "learning_rate": 4.797236139332837e-05, + "loss": 4.751, + "step": 112500 + }, + { + "epoch": 0.12, + "learning_rate": 4.796334966618761e-05, + "loss": 4.7795, + "step": 113000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7954337939046845e-05, + "loss": 4.7684, + "step": 113500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7945326211906076e-05, + "loss": 4.7673, + "step": 114000 + }, + { + "epoch": 0.12, + "learning_rate": 4.793631448476531e-05, + "loss": 4.7614, + "step": 114500 + }, + { + "epoch": 0.12, + "learning_rate": 4.792730275762455e-05, + "loss": 4.7529, + "step": 115000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7918291030483795e-05, + "loss": 4.8215, + "step": 115500 + }, + { + "epoch": 0.13, + "learning_rate": 4.790927930334303e-05, + "loss": 4.7351, + "step": 116000 + }, + { + "epoch": 0.13, + "learning_rate": 4.790026757620227e-05, + "loss": 4.7878, + "step": 116500 + }, + { + "epoch": 0.13, + "learning_rate": 4.78912558490615e-05, + "loss": 4.7618, + "step": 117000 + }, + { + "epoch": 0.13, + "learning_rate": 4.788224412192074e-05, + "loss": 4.846, + "step": 117500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7873232394779975e-05, + "loss": 4.8027, + "step": 118000 + }, + { + "epoch": 0.13, + "learning_rate": 4.786422066763921e-05, + "loss": 4.7415, + "step": 118500 + }, + { + "epoch": 0.13, + "learning_rate": 4.785520894049845e-05, + "loss": 4.7554, + "step": 119000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7846197213357694e-05, + "loss": 4.7287, + "step": 119500 + }, + { + "epoch": 0.13, + "learning_rate": 4.783718548621693e-05, + "loss": 4.81, + "step": 120000 + }, + { + "epoch": 0.13, + "learning_rate": 4.782817375907616e-05, + "loss": 4.7374, + "step": 120500 + }, + { + "epoch": 0.13, + "learning_rate": 4.78191620319354e-05, + "loss": 4.7541, + "step": 121000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7810150304794637e-05, + "loss": 4.7704, + "step": 121500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7801138577653874e-05, + "loss": 4.7743, + "step": 122000 + }, + { + "epoch": 0.13, + "learning_rate": 4.779212685051311e-05, + "loss": 4.7569, + "step": 122500 + }, + { + "epoch": 0.13, + "learning_rate": 4.778311512337235e-05, + "loss": 4.69, + "step": 123000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7774103396231586e-05, + "loss": 4.8213, + "step": 123500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7765091669090824e-05, + "loss": 4.7616, + "step": 124000 + }, + { + "epoch": 0.13, + "learning_rate": 4.775607994195006e-05, + "loss": 4.7587, + "step": 124500 + }, + { + "epoch": 0.14, + "learning_rate": 4.77470682148093e-05, + "loss": 4.7599, + "step": 125000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7738056487668536e-05, + "loss": 4.692, + "step": 125500 + }, + { + "epoch": 0.14, + "learning_rate": 4.772904476052777e-05, + "loss": 4.8163, + "step": 126000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7720033033387004e-05, + "loss": 4.7533, + "step": 126500 + }, + { + "epoch": 0.14, + "learning_rate": 4.771102130624625e-05, + "loss": 4.7933, + "step": 127000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7702009579105485e-05, + "loss": 4.7659, + "step": 127500 + }, + { + "epoch": 0.14, + "learning_rate": 4.769299785196472e-05, + "loss": 4.7502, + "step": 128000 + }, + { + "epoch": 0.14, + "learning_rate": 4.768398612482396e-05, + "loss": 4.7412, + "step": 128500 + }, + { + "epoch": 0.14, + "learning_rate": 4.76749743976832e-05, + "loss": 4.7917, + "step": 129000 + }, + { + "epoch": 0.14, + "learning_rate": 4.766596267054243e-05, + "loss": 4.7984, + "step": 129500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7656950943401666e-05, + "loss": 4.7151, + "step": 130000 + }, + { + "epoch": 0.14, + "learning_rate": 4.76479392162609e-05, + "loss": 4.7101, + "step": 130500 + }, + { + "epoch": 0.14, + "learning_rate": 4.763892748912015e-05, + "loss": 4.7416, + "step": 131000 + }, + { + "epoch": 0.14, + "learning_rate": 4.7629915761979385e-05, + "loss": 4.7401, + "step": 131500 + }, + { + "epoch": 0.14, + "learning_rate": 4.762090403483862e-05, + "loss": 4.7234, + "step": 132000 + }, + { + "epoch": 0.14, + "learning_rate": 4.761189230769785e-05, + "loss": 4.7334, + "step": 132500 + }, + { + "epoch": 0.14, + "learning_rate": 4.760288058055709e-05, + "loss": 4.7305, + "step": 133000 + }, + { + "epoch": 0.14, + "learning_rate": 4.759386885341633e-05, + "loss": 4.7889, + "step": 133500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7584857126275565e-05, + "loss": 4.7615, + "step": 134000 + }, + { + "epoch": 0.15, + "learning_rate": 4.75758453991348e-05, + "loss": 4.6827, + "step": 134500 + }, + { + "epoch": 0.15, + "learning_rate": 4.7566833671994046e-05, + "loss": 4.7555, + "step": 135000 + }, + { + "epoch": 0.15, + "learning_rate": 4.7557821944853284e-05, + "loss": 4.7644, + "step": 135500 + }, + { + "epoch": 0.15, + "learning_rate": 4.7548810217712514e-05, + "loss": 4.7292, + "step": 136000 + }, + { + "epoch": 0.15, + "learning_rate": 4.753979849057175e-05, + "loss": 4.7221, + "step": 136500 + }, + { + "epoch": 0.15, + "learning_rate": 4.753078676343099e-05, + "loss": 4.7045, + "step": 137000 + }, + { + "epoch": 0.15, + "learning_rate": 4.752177503629023e-05, + "loss": 4.6832, + "step": 137500 + }, + { + "epoch": 0.15, + "learning_rate": 4.7512763309149464e-05, + "loss": 4.7221, + "step": 138000 + }, + { + "epoch": 0.15, + "learning_rate": 4.75037515820087e-05, + "loss": 4.6595, + "step": 138500 + }, + { + "epoch": 0.15, + "learning_rate": 4.749473985486794e-05, + "loss": 4.7322, + "step": 139000 + }, + { + "epoch": 0.15, + "learning_rate": 4.7485728127727176e-05, + "loss": 4.7332, + "step": 139500 + }, + { + "epoch": 0.15, + "learning_rate": 4.7476716400586414e-05, + "loss": 4.7665, + "step": 140000 + }, + { + "epoch": 0.15, + "learning_rate": 4.746770467344565e-05, + "loss": 4.6936, + "step": 140500 + }, + { + "epoch": 0.15, + "learning_rate": 4.745869294630489e-05, + "loss": 4.7322, + "step": 141000 + }, + { + "epoch": 0.15, + "learning_rate": 4.7449681219164126e-05, + "loss": 4.7406, + "step": 141500 + }, + { + "epoch": 0.15, + "learning_rate": 4.7440669492023356e-05, + "loss": 4.757, + "step": 142000 + }, + { + "epoch": 0.15, + "learning_rate": 4.74316577648826e-05, + "loss": 4.7518, + "step": 142500 + }, + { + "epoch": 0.15, + "learning_rate": 4.742264603774184e-05, + "loss": 4.6843, + "step": 143000 + }, + { + "epoch": 0.16, + "learning_rate": 4.7413634310601075e-05, + "loss": 4.6937, + "step": 143500 + }, + { + "epoch": 0.16, + "learning_rate": 4.740462258346031e-05, + "loss": 4.7167, + "step": 144000 + }, + { + "epoch": 0.16, + "learning_rate": 4.739561085631955e-05, + "loss": 4.7101, + "step": 144500 + }, + { + "epoch": 0.16, + "learning_rate": 4.738659912917878e-05, + "loss": 4.7401, + "step": 145000 + }, + { + "epoch": 0.16, + "learning_rate": 4.737758740203802e-05, + "loss": 4.7357, + "step": 145500 + }, + { + "epoch": 0.16, + "learning_rate": 4.7368575674897256e-05, + "loss": 4.7034, + "step": 146000 + }, + { + "epoch": 0.16, + "learning_rate": 4.73595639477565e-05, + "loss": 4.6982, + "step": 146500 + }, + { + "epoch": 0.16, + "learning_rate": 4.735055222061574e-05, + "loss": 4.729, + "step": 147000 + }, + { + "epoch": 0.16, + "learning_rate": 4.7341540493474975e-05, + "loss": 4.7402, + "step": 147500 + }, + { + "epoch": 0.16, + "learning_rate": 4.733252876633421e-05, + "loss": 4.7249, + "step": 148000 + }, + { + "epoch": 0.16, + "learning_rate": 4.732351703919344e-05, + "loss": 4.6795, + "step": 148500 + }, + { + "epoch": 0.16, + "learning_rate": 4.731450531205268e-05, + "loss": 4.7496, + "step": 149000 + }, + { + "epoch": 0.16, + "learning_rate": 4.730549358491192e-05, + "loss": 4.7258, + "step": 149500 + }, + { + "epoch": 0.16, + "learning_rate": 4.7296481857771155e-05, + "loss": 4.7273, + "step": 150000 + }, + { + "epoch": 0.16, + "learning_rate": 4.72874701306304e-05, + "loss": 4.6983, + "step": 150500 + }, + { + "epoch": 0.16, + "learning_rate": 4.7278458403489636e-05, + "loss": 4.6593, + "step": 151000 + }, + { + "epoch": 0.16, + "learning_rate": 4.726944667634887e-05, + "loss": 4.6931, + "step": 151500 + }, + { + "epoch": 0.16, + "learning_rate": 4.7260434949208104e-05, + "loss": 4.6728, + "step": 152000 + }, + { + "epoch": 0.16, + "learning_rate": 4.725142322206734e-05, + "loss": 4.6942, + "step": 152500 + }, + { + "epoch": 0.17, + "learning_rate": 4.724241149492658e-05, + "loss": 4.655, + "step": 153000 + }, + { + "epoch": 0.17, + "learning_rate": 4.723339976778582e-05, + "loss": 4.6958, + "step": 153500 + }, + { + "epoch": 0.17, + "learning_rate": 4.7224388040645054e-05, + "loss": 4.727, + "step": 154000 + }, + { + "epoch": 0.17, + "learning_rate": 4.721537631350429e-05, + "loss": 4.7039, + "step": 154500 + }, + { + "epoch": 0.17, + "learning_rate": 4.720636458636353e-05, + "loss": 4.6621, + "step": 155000 + }, + { + "epoch": 0.17, + "learning_rate": 4.7197352859222766e-05, + "loss": 4.7307, + "step": 155500 + }, + { + "epoch": 0.17, + "learning_rate": 4.7188341132082004e-05, + "loss": 4.6781, + "step": 156000 + }, + { + "epoch": 0.17, + "learning_rate": 4.717932940494124e-05, + "loss": 4.6862, + "step": 156500 + }, + { + "epoch": 0.17, + "learning_rate": 4.717031767780048e-05, + "loss": 4.6321, + "step": 157000 + }, + { + "epoch": 0.17, + "learning_rate": 4.716130595065971e-05, + "loss": 4.6918, + "step": 157500 + }, + { + "epoch": 0.17, + "learning_rate": 4.715229422351895e-05, + "loss": 4.7254, + "step": 158000 + }, + { + "epoch": 0.17, + "learning_rate": 4.714328249637819e-05, + "loss": 4.6808, + "step": 158500 + }, + { + "epoch": 0.17, + "learning_rate": 4.713427076923743e-05, + "loss": 4.6929, + "step": 159000 + }, + { + "epoch": 0.17, + "learning_rate": 4.7125259042096665e-05, + "loss": 4.6183, + "step": 159500 + }, + { + "epoch": 0.17, + "learning_rate": 4.71162473149559e-05, + "loss": 4.6005, + "step": 160000 + }, + { + "epoch": 0.17, + "learning_rate": 4.710723558781514e-05, + "loss": 4.7159, + "step": 160500 + }, + { + "epoch": 0.17, + "learning_rate": 4.709822386067437e-05, + "loss": 4.6412, + "step": 161000 + }, + { + "epoch": 0.17, + "learning_rate": 4.708921213353361e-05, + "loss": 4.6927, + "step": 161500 + }, + { + "epoch": 0.18, + "learning_rate": 4.708020040639285e-05, + "loss": 4.7037, + "step": 162000 + }, + { + "epoch": 0.18, + "learning_rate": 4.707118867925209e-05, + "loss": 4.7063, + "step": 162500 + }, + { + "epoch": 0.18, + "learning_rate": 4.706217695211133e-05, + "loss": 4.739, + "step": 163000 + }, + { + "epoch": 0.18, + "learning_rate": 4.7053165224970565e-05, + "loss": 4.6985, + "step": 163500 + }, + { + "epoch": 0.18, + "learning_rate": 4.7044153497829795e-05, + "loss": 4.6828, + "step": 164000 + }, + { + "epoch": 0.18, + "learning_rate": 4.703514177068903e-05, + "loss": 4.7187, + "step": 164500 + }, + { + "epoch": 0.18, + "learning_rate": 4.702613004354827e-05, + "loss": 4.7055, + "step": 165000 + }, + { + "epoch": 0.18, + "learning_rate": 4.701711831640751e-05, + "loss": 4.6414, + "step": 165500 + }, + { + "epoch": 0.18, + "learning_rate": 4.7008106589266745e-05, + "loss": 4.6793, + "step": 166000 + }, + { + "epoch": 0.18, + "learning_rate": 4.699909486212599e-05, + "loss": 4.7155, + "step": 166500 + }, + { + "epoch": 0.18, + "learning_rate": 4.699008313498522e-05, + "loss": 4.6599, + "step": 167000 + }, + { + "epoch": 0.18, + "learning_rate": 4.698107140784446e-05, + "loss": 4.6949, + "step": 167500 + }, + { + "epoch": 0.18, + "learning_rate": 4.6972059680703695e-05, + "loss": 4.6781, + "step": 168000 + }, + { + "epoch": 0.18, + "learning_rate": 4.696304795356293e-05, + "loss": 4.6621, + "step": 168500 + }, + { + "epoch": 0.18, + "learning_rate": 4.695403622642217e-05, + "loss": 4.675, + "step": 169000 + }, + { + "epoch": 0.18, + "learning_rate": 4.694502449928141e-05, + "loss": 4.6254, + "step": 169500 + }, + { + "epoch": 0.18, + "learning_rate": 4.6936012772140644e-05, + "loss": 4.7044, + "step": 170000 + }, + { + "epoch": 0.18, + "learning_rate": 4.692700104499988e-05, + "loss": 4.6353, + "step": 170500 + }, + { + "epoch": 0.18, + "learning_rate": 4.691798931785912e-05, + "loss": 4.6393, + "step": 171000 + }, + { + "epoch": 0.19, + "learning_rate": 4.6908977590718356e-05, + "loss": 4.6692, + "step": 171500 + }, + { + "epoch": 0.19, + "learning_rate": 4.6899965863577594e-05, + "loss": 4.6501, + "step": 172000 + }, + { + "epoch": 0.19, + "learning_rate": 4.689095413643683e-05, + "loss": 4.6289, + "step": 172500 + }, + { + "epoch": 0.19, + "learning_rate": 4.688194240929607e-05, + "loss": 4.6656, + "step": 173000 + }, + { + "epoch": 0.19, + "learning_rate": 4.6872930682155306e-05, + "loss": 4.6542, + "step": 173500 + }, + { + "epoch": 0.19, + "learning_rate": 4.686391895501454e-05, + "loss": 4.678, + "step": 174000 + }, + { + "epoch": 0.19, + "learning_rate": 4.685490722787378e-05, + "loss": 4.648, + "step": 174500 + }, + { + "epoch": 0.19, + "learning_rate": 4.684589550073302e-05, + "loss": 4.6518, + "step": 175000 + }, + { + "epoch": 0.19, + "learning_rate": 4.6836883773592256e-05, + "loss": 4.7169, + "step": 175500 + }, + { + "epoch": 0.19, + "learning_rate": 4.682787204645149e-05, + "loss": 4.6243, + "step": 176000 + }, + { + "epoch": 0.19, + "learning_rate": 4.6818860319310724e-05, + "loss": 4.6988, + "step": 176500 + }, + { + "epoch": 0.19, + "learning_rate": 4.680984859216996e-05, + "loss": 4.5944, + "step": 177000 + }, + { + "epoch": 0.19, + "learning_rate": 4.68008368650292e-05, + "loss": 4.7104, + "step": 177500 + }, + { + "epoch": 0.19, + "learning_rate": 4.679182513788844e-05, + "loss": 4.6633, + "step": 178000 + }, + { + "epoch": 0.19, + "learning_rate": 4.678281341074768e-05, + "loss": 4.6841, + "step": 178500 + }, + { + "epoch": 0.19, + "learning_rate": 4.677380168360692e-05, + "loss": 4.6535, + "step": 179000 + }, + { + "epoch": 0.19, + "learning_rate": 4.676478995646615e-05, + "loss": 4.7139, + "step": 179500 + }, + { + "epoch": 0.19, + "learning_rate": 4.6755778229325385e-05, + "loss": 4.6433, + "step": 180000 + }, + { + "epoch": 0.2, + "learning_rate": 4.674676650218462e-05, + "loss": 4.7148, + "step": 180500 + }, + { + "epoch": 0.2, + "learning_rate": 4.673775477504386e-05, + "loss": 4.6483, + "step": 181000 + }, + { + "epoch": 0.2, + "learning_rate": 4.67287430479031e-05, + "loss": 4.6044, + "step": 181500 + }, + { + "epoch": 0.2, + "learning_rate": 4.671973132076234e-05, + "loss": 4.6271, + "step": 182000 + }, + { + "epoch": 0.2, + "learning_rate": 4.671071959362157e-05, + "loss": 4.6416, + "step": 182500 + }, + { + "epoch": 0.2, + "learning_rate": 4.670170786648081e-05, + "loss": 4.6732, + "step": 183000 + }, + { + "epoch": 0.2, + "learning_rate": 4.669269613934005e-05, + "loss": 4.6461, + "step": 183500 + }, + { + "epoch": 0.2, + "learning_rate": 4.6683684412199285e-05, + "loss": 4.6583, + "step": 184000 + }, + { + "epoch": 0.2, + "learning_rate": 4.667467268505852e-05, + "loss": 4.6572, + "step": 184500 + }, + { + "epoch": 0.2, + "learning_rate": 4.666566095791776e-05, + "loss": 4.6394, + "step": 185000 + }, + { + "epoch": 0.2, + "learning_rate": 4.6656649230777e-05, + "loss": 4.676, + "step": 185500 + }, + { + "epoch": 0.2, + "learning_rate": 4.6647637503636234e-05, + "loss": 4.6573, + "step": 186000 + }, + { + "epoch": 0.2, + "learning_rate": 4.663862577649547e-05, + "loss": 4.6528, + "step": 186500 + }, + { + "epoch": 0.2, + "learning_rate": 4.662961404935471e-05, + "loss": 4.658, + "step": 187000 + }, + { + "epoch": 0.2, + "learning_rate": 4.6620602322213946e-05, + "loss": 4.6363, + "step": 187500 + }, + { + "epoch": 0.2, + "learning_rate": 4.6611590595073184e-05, + "loss": 4.6629, + "step": 188000 + }, + { + "epoch": 0.2, + "learning_rate": 4.660257886793242e-05, + "loss": 4.6319, + "step": 188500 + }, + { + "epoch": 0.2, + "learning_rate": 4.659356714079166e-05, + "loss": 4.6833, + "step": 189000 + }, + { + "epoch": 0.2, + "learning_rate": 4.6584555413650896e-05, + "loss": 4.586, + "step": 189500 + }, + { + "epoch": 0.21, + "learning_rate": 4.657554368651013e-05, + "loss": 4.6757, + "step": 190000 + }, + { + "epoch": 0.21, + "learning_rate": 4.656653195936937e-05, + "loss": 4.6509, + "step": 190500 + }, + { + "epoch": 0.21, + "learning_rate": 4.655752023222861e-05, + "loss": 4.6792, + "step": 191000 + }, + { + "epoch": 0.21, + "learning_rate": 4.6548508505087846e-05, + "loss": 4.6738, + "step": 191500 + }, + { + "epoch": 0.21, + "learning_rate": 4.6539496777947076e-05, + "loss": 4.6407, + "step": 192000 + }, + { + "epoch": 0.21, + "learning_rate": 4.6530485050806314e-05, + "loss": 4.6581, + "step": 192500 + }, + { + "epoch": 0.21, + "learning_rate": 4.652147332366555e-05, + "loss": 4.688, + "step": 193000 + }, + { + "epoch": 0.21, + "learning_rate": 4.6512461596524795e-05, + "loss": 4.6858, + "step": 193500 + }, + { + "epoch": 0.21, + "learning_rate": 4.650344986938403e-05, + "loss": 4.6618, + "step": 194000 + }, + { + "epoch": 0.21, + "learning_rate": 4.649443814224327e-05, + "loss": 4.6565, + "step": 194500 + }, + { + "epoch": 0.21, + "learning_rate": 4.64854264151025e-05, + "loss": 4.6477, + "step": 195000 + }, + { + "epoch": 0.21, + "learning_rate": 4.647641468796174e-05, + "loss": 4.6347, + "step": 195500 + }, + { + "epoch": 0.21, + "learning_rate": 4.6467402960820975e-05, + "loss": 4.6384, + "step": 196000 + }, + { + "epoch": 0.21, + "learning_rate": 4.645839123368021e-05, + "loss": 4.6041, + "step": 196500 + }, + { + "epoch": 0.21, + "learning_rate": 4.644937950653945e-05, + "loss": 4.6302, + "step": 197000 + }, + { + "epoch": 0.21, + "learning_rate": 4.6440367779398694e-05, + "loss": 4.582, + "step": 197500 + }, + { + "epoch": 0.21, + "learning_rate": 4.6431356052257925e-05, + "loss": 4.6465, + "step": 198000 + }, + { + "epoch": 0.21, + "learning_rate": 4.642234432511716e-05, + "loss": 4.6427, + "step": 198500 + }, + { + "epoch": 0.22, + "learning_rate": 4.64133325979764e-05, + "loss": 4.6421, + "step": 199000 + }, + { + "epoch": 0.22, + "learning_rate": 4.640432087083564e-05, + "loss": 4.6108, + "step": 199500 + }, + { + "epoch": 0.22, + "learning_rate": 4.6395309143694875e-05, + "loss": 4.6228, + "step": 200000 + }, + { + "epoch": 0.22, + "learning_rate": 4.638629741655411e-05, + "loss": 4.5645, + "step": 200500 + }, + { + "epoch": 0.22, + "learning_rate": 4.637728568941335e-05, + "loss": 4.5875, + "step": 201000 + }, + { + "epoch": 0.22, + "learning_rate": 4.636827396227259e-05, + "loss": 4.6283, + "step": 201500 + }, + { + "epoch": 0.22, + "learning_rate": 4.6359262235131824e-05, + "loss": 4.6218, + "step": 202000 + }, + { + "epoch": 0.22, + "learning_rate": 4.635025050799106e-05, + "loss": 4.6801, + "step": 202500 + }, + { + "epoch": 0.22, + "learning_rate": 4.63412387808503e-05, + "loss": 4.6695, + "step": 203000 + }, + { + "epoch": 0.22, + "learning_rate": 4.6332227053709536e-05, + "loss": 4.684, + "step": 203500 + }, + { + "epoch": 0.22, + "learning_rate": 4.6323215326568774e-05, + "loss": 4.5908, + "step": 204000 + }, + { + "epoch": 0.22, + "learning_rate": 4.6314203599428004e-05, + "loss": 4.6085, + "step": 204500 + }, + { + "epoch": 0.22, + "learning_rate": 4.630519187228725e-05, + "loss": 4.6316, + "step": 205000 + }, + { + "epoch": 0.22, + "learning_rate": 4.6296180145146486e-05, + "loss": 4.6607, + "step": 205500 + }, + { + "epoch": 0.22, + "learning_rate": 4.6287168418005723e-05, + "loss": 4.6351, + "step": 206000 + }, + { + "epoch": 0.22, + "learning_rate": 4.627815669086496e-05, + "loss": 4.6443, + "step": 206500 + }, + { + "epoch": 0.22, + "learning_rate": 4.62691449637242e-05, + "loss": 4.6842, + "step": 207000 + }, + { + "epoch": 0.22, + "learning_rate": 4.626013323658343e-05, + "loss": 4.6173, + "step": 207500 + }, + { + "epoch": 0.22, + "learning_rate": 4.6251121509442666e-05, + "loss": 4.593, + "step": 208000 + }, + { + "epoch": 0.23, + "learning_rate": 4.6242109782301904e-05, + "loss": 4.606, + "step": 208500 + }, + { + "epoch": 0.23, + "learning_rate": 4.623309805516115e-05, + "loss": 4.6188, + "step": 209000 + }, + { + "epoch": 0.23, + "learning_rate": 4.6224086328020385e-05, + "loss": 4.6307, + "step": 209500 + }, + { + "epoch": 0.23, + "learning_rate": 4.621507460087962e-05, + "loss": 4.5919, + "step": 210000 + }, + { + "epoch": 0.23, + "learning_rate": 4.620606287373885e-05, + "loss": 4.6507, + "step": 210500 + }, + { + "epoch": 0.23, + "learning_rate": 4.619705114659809e-05, + "loss": 4.6382, + "step": 211000 + }, + { + "epoch": 0.23, + "learning_rate": 4.618803941945733e-05, + "loss": 4.5784, + "step": 211500 + }, + { + "epoch": 0.23, + "learning_rate": 4.6179027692316565e-05, + "loss": 4.5813, + "step": 212000 + }, + { + "epoch": 0.23, + "learning_rate": 4.61700159651758e-05, + "loss": 4.6059, + "step": 212500 + }, + { + "epoch": 0.23, + "learning_rate": 4.616100423803505e-05, + "loss": 4.5996, + "step": 213000 + }, + { + "epoch": 0.23, + "learning_rate": 4.6151992510894284e-05, + "loss": 4.6524, + "step": 213500 + }, + { + "epoch": 0.23, + "learning_rate": 4.6142980783753515e-05, + "loss": 4.6452, + "step": 214000 + }, + { + "epoch": 0.23, + "learning_rate": 4.613396905661275e-05, + "loss": 4.6752, + "step": 214500 + }, + { + "epoch": 0.23, + "learning_rate": 4.612495732947199e-05, + "loss": 4.5912, + "step": 215000 + }, + { + "epoch": 0.23, + "learning_rate": 4.611594560233123e-05, + "loss": 4.6646, + "step": 215500 + }, + { + "epoch": 0.23, + "learning_rate": 4.6106933875190465e-05, + "loss": 4.6234, + "step": 216000 + }, + { + "epoch": 0.23, + "learning_rate": 4.60979221480497e-05, + "loss": 4.6457, + "step": 216500 + }, + { + "epoch": 0.23, + "learning_rate": 4.608891042090894e-05, + "loss": 4.6285, + "step": 217000 + }, + { + "epoch": 0.24, + "learning_rate": 4.607989869376818e-05, + "loss": 4.6047, + "step": 217500 + }, + { + "epoch": 0.24, + "learning_rate": 4.6070886966627414e-05, + "loss": 4.5877, + "step": 218000 + }, + { + "epoch": 0.24, + "learning_rate": 4.606187523948665e-05, + "loss": 4.6101, + "step": 218500 + }, + { + "epoch": 0.24, + "learning_rate": 4.605286351234589e-05, + "loss": 4.6867, + "step": 219000 + }, + { + "epoch": 0.24, + "learning_rate": 4.6043851785205126e-05, + "loss": 4.6508, + "step": 219500 + }, + { + "epoch": 0.24, + "learning_rate": 4.603484005806436e-05, + "loss": 4.6099, + "step": 220000 + }, + { + "epoch": 0.24, + "learning_rate": 4.60258283309236e-05, + "loss": 4.6508, + "step": 220500 + }, + { + "epoch": 0.24, + "learning_rate": 4.601681660378284e-05, + "loss": 4.6105, + "step": 221000 + }, + { + "epoch": 0.24, + "learning_rate": 4.6007804876642076e-05, + "loss": 4.6001, + "step": 221500 + }, + { + "epoch": 0.24, + "learning_rate": 4.5998793149501313e-05, + "loss": 4.6344, + "step": 222000 + }, + { + "epoch": 0.24, + "learning_rate": 4.598978142236055e-05, + "loss": 4.585, + "step": 222500 + }, + { + "epoch": 0.24, + "learning_rate": 4.598076969521978e-05, + "loss": 4.5558, + "step": 223000 + }, + { + "epoch": 0.24, + "learning_rate": 4.597175796807902e-05, + "loss": 4.5825, + "step": 223500 + }, + { + "epoch": 0.24, + "learning_rate": 4.5962746240938256e-05, + "loss": 4.5569, + "step": 224000 + }, + { + "epoch": 0.24, + "learning_rate": 4.59537345137975e-05, + "loss": 4.5647, + "step": 224500 + }, + { + "epoch": 0.24, + "learning_rate": 4.594472278665674e-05, + "loss": 4.5887, + "step": 225000 + }, + { + "epoch": 0.24, + "learning_rate": 4.5935711059515975e-05, + "loss": 4.5825, + "step": 225500 + }, + { + "epoch": 0.24, + "learning_rate": 4.5926699332375206e-05, + "loss": 4.5739, + "step": 226000 + }, + { + "epoch": 0.24, + "learning_rate": 4.591768760523444e-05, + "loss": 4.5726, + "step": 226500 + }, + { + "epoch": 0.25, + "learning_rate": 4.590867587809368e-05, + "loss": 4.6447, + "step": 227000 + }, + { + "epoch": 0.25, + "learning_rate": 4.589966415095292e-05, + "loss": 4.5851, + "step": 227500 + }, + { + "epoch": 0.25, + "learning_rate": 4.5890652423812155e-05, + "loss": 4.5571, + "step": 228000 + }, + { + "epoch": 0.25, + "learning_rate": 4.58816406966714e-05, + "loss": 4.5877, + "step": 228500 + }, + { + "epoch": 0.25, + "learning_rate": 4.587262896953064e-05, + "loss": 4.5896, + "step": 229000 + }, + { + "epoch": 0.25, + "learning_rate": 4.586361724238987e-05, + "loss": 4.591, + "step": 229500 + }, + { + "epoch": 0.25, + "learning_rate": 4.5854605515249105e-05, + "loss": 4.5587, + "step": 230000 + }, + { + "epoch": 0.25, + "learning_rate": 4.584559378810834e-05, + "loss": 4.5871, + "step": 230500 + }, + { + "epoch": 0.25, + "learning_rate": 4.583658206096758e-05, + "loss": 4.6129, + "step": 231000 + }, + { + "epoch": 0.25, + "learning_rate": 4.582757033382682e-05, + "loss": 4.5838, + "step": 231500 + }, + { + "epoch": 0.25, + "learning_rate": 4.5818558606686055e-05, + "loss": 4.6555, + "step": 232000 + }, + { + "epoch": 0.25, + "learning_rate": 4.580954687954529e-05, + "loss": 4.5784, + "step": 232500 + }, + { + "epoch": 0.25, + "learning_rate": 4.580053515240453e-05, + "loss": 4.5853, + "step": 233000 + }, + { + "epoch": 0.25, + "learning_rate": 4.579152342526377e-05, + "loss": 4.5536, + "step": 233500 + }, + { + "epoch": 0.25, + "learning_rate": 4.5782511698123004e-05, + "loss": 4.6067, + "step": 234000 + }, + { + "epoch": 0.25, + "learning_rate": 4.577349997098224e-05, + "loss": 4.6091, + "step": 234500 + }, + { + "epoch": 0.25, + "learning_rate": 4.576448824384148e-05, + "loss": 4.5912, + "step": 235000 + }, + { + "epoch": 0.25, + "learning_rate": 4.575547651670071e-05, + "loss": 4.5887, + "step": 235500 + }, + { + "epoch": 0.26, + "learning_rate": 4.5746464789559954e-05, + "loss": 4.5748, + "step": 236000 + }, + { + "epoch": 0.26, + "learning_rate": 4.573745306241919e-05, + "loss": 4.537, + "step": 236500 + }, + { + "epoch": 0.26, + "learning_rate": 4.572844133527843e-05, + "loss": 4.518, + "step": 237000 + }, + { + "epoch": 0.26, + "learning_rate": 4.5719429608137666e-05, + "loss": 4.5982, + "step": 237500 + }, + { + "epoch": 0.26, + "learning_rate": 4.5710417880996904e-05, + "loss": 4.5996, + "step": 238000 + }, + { + "epoch": 0.26, + "learning_rate": 4.5701406153856134e-05, + "loss": 4.6103, + "step": 238500 + }, + { + "epoch": 0.26, + "learning_rate": 4.569239442671537e-05, + "loss": 4.5725, + "step": 239000 + }, + { + "epoch": 0.26, + "learning_rate": 4.568338269957461e-05, + "loss": 4.6039, + "step": 239500 + }, + { + "epoch": 0.26, + "learning_rate": 4.567437097243385e-05, + "loss": 4.5271, + "step": 240000 + }, + { + "epoch": 0.26, + "learning_rate": 4.566535924529309e-05, + "loss": 4.6387, + "step": 240500 + }, + { + "epoch": 0.26, + "learning_rate": 4.565634751815233e-05, + "loss": 4.5238, + "step": 241000 + }, + { + "epoch": 0.26, + "learning_rate": 4.5647335791011565e-05, + "loss": 4.5608, + "step": 241500 + }, + { + "epoch": 0.26, + "learning_rate": 4.5638324063870796e-05, + "loss": 4.582, + "step": 242000 + }, + { + "epoch": 0.26, + "learning_rate": 4.562931233673003e-05, + "loss": 4.5491, + "step": 242500 + }, + { + "epoch": 0.26, + "learning_rate": 4.562030060958927e-05, + "loss": 4.5778, + "step": 243000 + }, + { + "epoch": 0.26, + "learning_rate": 4.561128888244851e-05, + "loss": 4.6373, + "step": 243500 + }, + { + "epoch": 0.26, + "learning_rate": 4.5602277155307746e-05, + "loss": 4.6209, + "step": 244000 + }, + { + "epoch": 0.26, + "learning_rate": 4.559326542816699e-05, + "loss": 4.5673, + "step": 244500 + }, + { + "epoch": 0.26, + "learning_rate": 4.558425370102622e-05, + "loss": 4.5685, + "step": 245000 + }, + { + "epoch": 0.27, + "learning_rate": 4.557524197388546e-05, + "loss": 4.5357, + "step": 245500 + }, + { + "epoch": 0.27, + "learning_rate": 4.5566230246744695e-05, + "loss": 4.576, + "step": 246000 + }, + { + "epoch": 0.27, + "learning_rate": 4.555721851960393e-05, + "loss": 4.602, + "step": 246500 + }, + { + "epoch": 0.27, + "learning_rate": 4.554820679246317e-05, + "loss": 4.4973, + "step": 247000 + }, + { + "epoch": 0.27, + "learning_rate": 4.553919506532241e-05, + "loss": 4.5782, + "step": 247500 + }, + { + "epoch": 0.27, + "learning_rate": 4.5530183338181645e-05, + "loss": 4.5825, + "step": 248000 + }, + { + "epoch": 0.27, + "learning_rate": 4.552117161104088e-05, + "loss": 4.5145, + "step": 248500 + }, + { + "epoch": 0.27, + "learning_rate": 4.551215988390012e-05, + "loss": 4.5698, + "step": 249000 + }, + { + "epoch": 0.27, + "learning_rate": 4.550314815675936e-05, + "loss": 4.5806, + "step": 249500 + }, + { + "epoch": 0.27, + "learning_rate": 4.5494136429618594e-05, + "loss": 4.5052, + "step": 250000 + }, + { + "epoch": 0.27, + "learning_rate": 4.548512470247783e-05, + "loss": 4.5976, + "step": 250500 + }, + { + "epoch": 0.27, + "learning_rate": 4.547611297533706e-05, + "loss": 4.5667, + "step": 251000 + }, + { + "epoch": 0.27, + "learning_rate": 4.5467101248196307e-05, + "loss": 4.5431, + "step": 251500 + }, + { + "epoch": 0.27, + "learning_rate": 4.5458089521055544e-05, + "loss": 4.5659, + "step": 252000 + }, + { + "epoch": 0.27, + "learning_rate": 4.544907779391478e-05, + "loss": 4.5484, + "step": 252500 + }, + { + "epoch": 0.27, + "learning_rate": 4.544006606677402e-05, + "loss": 4.5668, + "step": 253000 + }, + { + "epoch": 0.27, + "learning_rate": 4.5431054339633256e-05, + "loss": 4.5605, + "step": 253500 + }, + { + "epoch": 0.27, + "learning_rate": 4.5422042612492494e-05, + "loss": 4.5686, + "step": 254000 + }, + { + "epoch": 0.28, + "learning_rate": 4.5413030885351724e-05, + "loss": 4.5157, + "step": 254500 + }, + { + "epoch": 0.28, + "learning_rate": 4.540401915821096e-05, + "loss": 4.5193, + "step": 255000 + }, + { + "epoch": 0.28, + "learning_rate": 4.53950074310702e-05, + "loss": 4.5781, + "step": 255500 + }, + { + "epoch": 0.28, + "learning_rate": 4.538599570392944e-05, + "loss": 4.5719, + "step": 256000 + }, + { + "epoch": 0.28, + "learning_rate": 4.537698397678868e-05, + "loss": 4.5509, + "step": 256500 + }, + { + "epoch": 0.28, + "learning_rate": 4.536797224964792e-05, + "loss": 4.5617, + "step": 257000 + }, + { + "epoch": 0.28, + "learning_rate": 4.535896052250715e-05, + "loss": 4.5149, + "step": 257500 + }, + { + "epoch": 0.28, + "learning_rate": 4.5349948795366386e-05, + "loss": 4.5628, + "step": 258000 + }, + { + "epoch": 0.28, + "learning_rate": 4.534093706822562e-05, + "loss": 4.5809, + "step": 258500 + }, + { + "epoch": 0.28, + "learning_rate": 4.533192534108486e-05, + "loss": 4.525, + "step": 259000 + }, + { + "epoch": 0.28, + "learning_rate": 4.53229136139441e-05, + "loss": 4.5036, + "step": 259500 + }, + { + "epoch": 0.28, + "learning_rate": 4.531390188680334e-05, + "loss": 4.5399, + "step": 260000 + }, + { + "epoch": 0.28, + "learning_rate": 4.530489015966257e-05, + "loss": 4.5939, + "step": 260500 + }, + { + "epoch": 0.28, + "learning_rate": 4.529587843252181e-05, + "loss": 4.605, + "step": 261000 + }, + { + "epoch": 0.28, + "learning_rate": 4.528686670538105e-05, + "loss": 4.4758, + "step": 261500 + }, + { + "epoch": 0.28, + "learning_rate": 4.5277854978240285e-05, + "loss": 4.5757, + "step": 262000 + }, + { + "epoch": 0.28, + "learning_rate": 4.526884325109952e-05, + "loss": 4.5944, + "step": 262500 + }, + { + "epoch": 0.28, + "learning_rate": 4.525983152395876e-05, + "loss": 4.5485, + "step": 263000 + }, + { + "epoch": 0.28, + "learning_rate": 4.5250819796818e-05, + "loss": 4.6034, + "step": 263500 + }, + { + "epoch": 0.29, + "learning_rate": 4.5241808069677235e-05, + "loss": 4.5887, + "step": 264000 + }, + { + "epoch": 0.29, + "learning_rate": 4.523279634253647e-05, + "loss": 4.5265, + "step": 264500 + }, + { + "epoch": 0.29, + "learning_rate": 4.522378461539571e-05, + "loss": 4.5177, + "step": 265000 + }, + { + "epoch": 0.29, + "learning_rate": 4.521477288825495e-05, + "loss": 4.6046, + "step": 265500 + }, + { + "epoch": 0.29, + "learning_rate": 4.5205761161114184e-05, + "loss": 4.5481, + "step": 266000 + }, + { + "epoch": 0.29, + "learning_rate": 4.519674943397342e-05, + "loss": 4.5171, + "step": 266500 + }, + { + "epoch": 0.29, + "learning_rate": 4.518773770683265e-05, + "loss": 4.522, + "step": 267000 + }, + { + "epoch": 0.29, + "learning_rate": 4.5178725979691897e-05, + "loss": 4.5399, + "step": 267500 + }, + { + "epoch": 0.29, + "learning_rate": 4.5169714252551134e-05, + "loss": 4.5763, + "step": 268000 + }, + { + "epoch": 0.29, + "learning_rate": 4.516070252541037e-05, + "loss": 4.5583, + "step": 268500 + }, + { + "epoch": 0.29, + "learning_rate": 4.515169079826961e-05, + "loss": 4.569, + "step": 269000 + }, + { + "epoch": 0.29, + "learning_rate": 4.5142679071128846e-05, + "loss": 4.5812, + "step": 269500 + }, + { + "epoch": 0.29, + "learning_rate": 4.513366734398808e-05, + "loss": 4.5136, + "step": 270000 + }, + { + "epoch": 0.29, + "learning_rate": 4.5124655616847314e-05, + "loss": 4.525, + "step": 270500 + }, + { + "epoch": 0.29, + "learning_rate": 4.511564388970655e-05, + "loss": 4.4892, + "step": 271000 + }, + { + "epoch": 0.29, + "learning_rate": 4.5106632162565796e-05, + "loss": 4.4955, + "step": 271500 + }, + { + "epoch": 0.29, + "learning_rate": 4.509762043542503e-05, + "loss": 4.5783, + "step": 272000 + }, + { + "epoch": 0.29, + "learning_rate": 4.508860870828427e-05, + "loss": 4.5685, + "step": 272500 + }, + { + "epoch": 0.3, + "learning_rate": 4.50795969811435e-05, + "loss": 4.5577, + "step": 273000 + }, + { + "epoch": 0.3, + "learning_rate": 4.507058525400274e-05, + "loss": 4.6029, + "step": 273500 + }, + { + "epoch": 0.3, + "learning_rate": 4.5061573526861976e-05, + "loss": 4.5451, + "step": 274000 + }, + { + "epoch": 0.3, + "learning_rate": 4.5052561799721213e-05, + "loss": 4.5816, + "step": 274500 + }, + { + "epoch": 0.3, + "learning_rate": 4.504355007258045e-05, + "loss": 4.4644, + "step": 275000 + }, + { + "epoch": 0.3, + "learning_rate": 4.5034538345439695e-05, + "loss": 4.5545, + "step": 275500 + }, + { + "epoch": 0.3, + "learning_rate": 4.5025526618298926e-05, + "loss": 4.5686, + "step": 276000 + }, + { + "epoch": 0.3, + "learning_rate": 4.501651489115816e-05, + "loss": 4.598, + "step": 276500 + }, + { + "epoch": 0.3, + "learning_rate": 4.50075031640174e-05, + "loss": 4.5273, + "step": 277000 + }, + { + "epoch": 0.3, + "learning_rate": 4.499849143687664e-05, + "loss": 4.498, + "step": 277500 + }, + { + "epoch": 0.3, + "learning_rate": 4.4989479709735875e-05, + "loss": 4.5226, + "step": 278000 + }, + { + "epoch": 0.3, + "learning_rate": 4.498046798259511e-05, + "loss": 4.5453, + "step": 278500 + }, + { + "epoch": 0.3, + "learning_rate": 4.497145625545435e-05, + "loss": 4.5878, + "step": 279000 + }, + { + "epoch": 0.3, + "learning_rate": 4.496244452831359e-05, + "loss": 4.4889, + "step": 279500 + }, + { + "epoch": 0.3, + "learning_rate": 4.4953432801172825e-05, + "loss": 4.531, + "step": 280000 + }, + { + "epoch": 0.3, + "learning_rate": 4.494442107403206e-05, + "loss": 4.5446, + "step": 280500 + }, + { + "epoch": 0.3, + "learning_rate": 4.49354093468913e-05, + "loss": 4.5806, + "step": 281000 + }, + { + "epoch": 0.3, + "learning_rate": 4.492639761975054e-05, + "loss": 4.5838, + "step": 281500 + }, + { + "epoch": 0.3, + "learning_rate": 4.4917385892609774e-05, + "loss": 4.5335, + "step": 282000 + }, + { + "epoch": 0.31, + "learning_rate": 4.4908374165469005e-05, + "loss": 4.4475, + "step": 282500 + }, + { + "epoch": 0.31, + "learning_rate": 4.489936243832825e-05, + "loss": 4.4819, + "step": 283000 + }, + { + "epoch": 0.31, + "learning_rate": 4.489035071118749e-05, + "loss": 4.4793, + "step": 283500 + }, + { + "epoch": 0.31, + "learning_rate": 4.4881338984046724e-05, + "loss": 4.5127, + "step": 284000 + }, + { + "epoch": 0.31, + "learning_rate": 4.487232725690596e-05, + "loss": 4.5424, + "step": 284500 + }, + { + "epoch": 0.31, + "learning_rate": 4.48633155297652e-05, + "loss": 4.4961, + "step": 285000 + }, + { + "epoch": 0.31, + "learning_rate": 4.485430380262443e-05, + "loss": 4.5096, + "step": 285500 + }, + { + "epoch": 0.31, + "learning_rate": 4.484529207548367e-05, + "loss": 4.5307, + "step": 286000 + }, + { + "epoch": 0.31, + "learning_rate": 4.4836280348342904e-05, + "loss": 4.5627, + "step": 286500 + }, + { + "epoch": 0.31, + "learning_rate": 4.482726862120215e-05, + "loss": 4.526, + "step": 287000 + }, + { + "epoch": 0.31, + "learning_rate": 4.4818256894061386e-05, + "loss": 4.5265, + "step": 287500 + }, + { + "epoch": 0.31, + "learning_rate": 4.480924516692062e-05, + "loss": 4.5695, + "step": 288000 + }, + { + "epoch": 0.31, + "learning_rate": 4.4800233439779854e-05, + "loss": 4.5962, + "step": 288500 + }, + { + "epoch": 0.31, + "learning_rate": 4.479122171263909e-05, + "loss": 4.5317, + "step": 289000 + }, + { + "epoch": 0.31, + "learning_rate": 4.478220998549833e-05, + "loss": 4.5309, + "step": 289500 + }, + { + "epoch": 0.31, + "learning_rate": 4.4773198258357566e-05, + "loss": 4.5631, + "step": 290000 + }, + { + "epoch": 0.31, + "learning_rate": 4.4764186531216803e-05, + "loss": 4.5116, + "step": 290500 + }, + { + "epoch": 0.31, + "learning_rate": 4.475517480407605e-05, + "loss": 4.5155, + "step": 291000 + }, + { + "epoch": 0.32, + "learning_rate": 4.474616307693528e-05, + "loss": 4.5214, + "step": 291500 + }, + { + "epoch": 0.32, + "learning_rate": 4.4737151349794516e-05, + "loss": 4.5764, + "step": 292000 + }, + { + "epoch": 0.32, + "learning_rate": 4.472813962265375e-05, + "loss": 4.5458, + "step": 292500 + }, + { + "epoch": 0.32, + "learning_rate": 4.471912789551299e-05, + "loss": 4.5426, + "step": 293000 + }, + { + "epoch": 0.32, + "learning_rate": 4.471011616837223e-05, + "loss": 4.5371, + "step": 293500 + }, + { + "epoch": 0.32, + "learning_rate": 4.4701104441231465e-05, + "loss": 4.5577, + "step": 294000 + }, + { + "epoch": 0.32, + "learning_rate": 4.46920927140907e-05, + "loss": 4.4803, + "step": 294500 + }, + { + "epoch": 0.32, + "learning_rate": 4.468308098694994e-05, + "loss": 4.5597, + "step": 295000 + }, + { + "epoch": 0.32, + "learning_rate": 4.467406925980918e-05, + "loss": 4.5193, + "step": 295500 + }, + { + "epoch": 0.32, + "learning_rate": 4.4665057532668415e-05, + "loss": 4.4773, + "step": 296000 + }, + { + "epoch": 0.32, + "learning_rate": 4.465604580552765e-05, + "loss": 4.5625, + "step": 296500 + }, + { + "epoch": 0.32, + "learning_rate": 4.464703407838689e-05, + "loss": 4.5206, + "step": 297000 + }, + { + "epoch": 0.32, + "learning_rate": 4.463802235124613e-05, + "loss": 4.49, + "step": 297500 + }, + { + "epoch": 0.32, + "learning_rate": 4.462901062410536e-05, + "loss": 4.511, + "step": 298000 + }, + { + "epoch": 0.32, + "learning_rate": 4.46199988969646e-05, + "loss": 4.4423, + "step": 298500 + }, + { + "epoch": 0.32, + "learning_rate": 4.461098716982384e-05, + "loss": 4.5147, + "step": 299000 + }, + { + "epoch": 0.32, + "learning_rate": 4.460197544268308e-05, + "loss": 4.5474, + "step": 299500 + }, + { + "epoch": 0.32, + "learning_rate": 4.4592963715542314e-05, + "loss": 4.5493, + "step": 300000 + }, + { + "epoch": 0.32, + "learning_rate": 4.458395198840155e-05, + "loss": 4.49, + "step": 300500 + }, + { + "epoch": 0.33, + "learning_rate": 4.457494026126078e-05, + "loss": 4.5583, + "step": 301000 + }, + { + "epoch": 0.33, + "learning_rate": 4.456592853412002e-05, + "loss": 4.5288, + "step": 301500 + }, + { + "epoch": 0.33, + "learning_rate": 4.455691680697926e-05, + "loss": 4.5368, + "step": 302000 + }, + { + "epoch": 0.33, + "learning_rate": 4.45479050798385e-05, + "loss": 4.5171, + "step": 302500 + }, + { + "epoch": 0.33, + "learning_rate": 4.453889335269774e-05, + "loss": 4.4643, + "step": 303000 + }, + { + "epoch": 0.33, + "learning_rate": 4.4529881625556976e-05, + "loss": 4.4714, + "step": 303500 + }, + { + "epoch": 0.33, + "learning_rate": 4.4520869898416207e-05, + "loss": 4.5301, + "step": 304000 + }, + { + "epoch": 0.33, + "learning_rate": 4.4511858171275444e-05, + "loss": 4.4925, + "step": 304500 + }, + { + "epoch": 0.33, + "learning_rate": 4.450284644413468e-05, + "loss": 4.534, + "step": 305000 + }, + { + "epoch": 0.33, + "learning_rate": 4.449383471699392e-05, + "loss": 4.5594, + "step": 305500 + }, + { + "epoch": 0.33, + "learning_rate": 4.4484822989853156e-05, + "loss": 4.5079, + "step": 306000 + }, + { + "epoch": 0.33, + "learning_rate": 4.4475811262712394e-05, + "loss": 4.5475, + "step": 306500 + }, + { + "epoch": 0.33, + "learning_rate": 4.446679953557164e-05, + "loss": 4.4948, + "step": 307000 + }, + { + "epoch": 0.33, + "learning_rate": 4.445778780843087e-05, + "loss": 4.5257, + "step": 307500 + }, + { + "epoch": 0.33, + "learning_rate": 4.4448776081290106e-05, + "loss": 4.4913, + "step": 308000 + }, + { + "epoch": 0.33, + "learning_rate": 4.443976435414934e-05, + "loss": 4.5662, + "step": 308500 + }, + { + "epoch": 0.33, + "learning_rate": 4.443075262700858e-05, + "loss": 4.5267, + "step": 309000 + }, + { + "epoch": 0.33, + "learning_rate": 4.442174089986782e-05, + "loss": 4.4999, + "step": 309500 + }, + { + "epoch": 0.34, + "learning_rate": 4.4412729172727055e-05, + "loss": 4.5111, + "step": 310000 + }, + { + "epoch": 0.34, + "learning_rate": 4.440371744558629e-05, + "loss": 4.5107, + "step": 310500 + }, + { + "epoch": 0.34, + "learning_rate": 4.439470571844553e-05, + "loss": 4.5682, + "step": 311000 + }, + { + "epoch": 0.34, + "learning_rate": 4.438569399130477e-05, + "loss": 4.5016, + "step": 311500 + }, + { + "epoch": 0.34, + "learning_rate": 4.4376682264164005e-05, + "loss": 4.4973, + "step": 312000 + }, + { + "epoch": 0.34, + "learning_rate": 4.436767053702324e-05, + "loss": 4.5022, + "step": 312500 + }, + { + "epoch": 0.34, + "learning_rate": 4.435865880988248e-05, + "loss": 4.5441, + "step": 313000 + }, + { + "epoch": 0.34, + "learning_rate": 4.434964708274171e-05, + "loss": 4.5459, + "step": 313500 + }, + { + "epoch": 0.34, + "learning_rate": 4.4340635355600955e-05, + "loss": 4.5141, + "step": 314000 + }, + { + "epoch": 0.34, + "learning_rate": 4.433162362846019e-05, + "loss": 4.5329, + "step": 314500 + }, + { + "epoch": 0.34, + "learning_rate": 4.432261190131943e-05, + "loss": 4.5172, + "step": 315000 + }, + { + "epoch": 0.34, + "learning_rate": 4.431360017417867e-05, + "loss": 4.5264, + "step": 315500 + }, + { + "epoch": 0.34, + "learning_rate": 4.4304588447037904e-05, + "loss": 4.5352, + "step": 316000 + }, + { + "epoch": 0.34, + "learning_rate": 4.4295576719897135e-05, + "loss": 4.5287, + "step": 316500 + }, + { + "epoch": 0.34, + "learning_rate": 4.428656499275637e-05, + "loss": 4.5314, + "step": 317000 + }, + { + "epoch": 0.34, + "learning_rate": 4.427755326561561e-05, + "loss": 4.5219, + "step": 317500 + }, + { + "epoch": 0.34, + "learning_rate": 4.426854153847485e-05, + "loss": 4.5136, + "step": 318000 + }, + { + "epoch": 0.34, + "learning_rate": 4.425952981133409e-05, + "loss": 4.483, + "step": 318500 + }, + { + "epoch": 0.34, + "learning_rate": 4.425051808419333e-05, + "loss": 4.4569, + "step": 319000 + }, + { + "epoch": 0.35, + "learning_rate": 4.4241506357052566e-05, + "loss": 4.522, + "step": 319500 + }, + { + "epoch": 0.35, + "learning_rate": 4.4232494629911797e-05, + "loss": 4.4795, + "step": 320000 + }, + { + "epoch": 0.35, + "learning_rate": 4.4223482902771034e-05, + "loss": 4.5522, + "step": 320500 + }, + { + "epoch": 0.35, + "learning_rate": 4.421447117563027e-05, + "loss": 4.49, + "step": 321000 + }, + { + "epoch": 0.35, + "learning_rate": 4.420545944848951e-05, + "loss": 4.5291, + "step": 321500 + }, + { + "epoch": 0.35, + "learning_rate": 4.4196447721348746e-05, + "loss": 4.5003, + "step": 322000 + }, + { + "epoch": 0.35, + "learning_rate": 4.418743599420799e-05, + "loss": 4.4768, + "step": 322500 + }, + { + "epoch": 0.35, + "learning_rate": 4.417842426706722e-05, + "loss": 4.498, + "step": 323000 + }, + { + "epoch": 0.35, + "learning_rate": 4.416941253992646e-05, + "loss": 4.4942, + "step": 323500 + }, + { + "epoch": 0.35, + "learning_rate": 4.4160400812785696e-05, + "loss": 4.5224, + "step": 324000 + }, + { + "epoch": 0.35, + "learning_rate": 4.415138908564493e-05, + "loss": 4.4901, + "step": 324500 + }, + { + "epoch": 0.35, + "learning_rate": 4.414237735850417e-05, + "loss": 4.4871, + "step": 325000 + }, + { + "epoch": 0.35, + "learning_rate": 4.413336563136341e-05, + "loss": 4.4594, + "step": 325500 + }, + { + "epoch": 0.35, + "learning_rate": 4.4124353904222645e-05, + "loss": 4.515, + "step": 326000 + }, + { + "epoch": 0.35, + "learning_rate": 4.411534217708188e-05, + "loss": 4.4832, + "step": 326500 + }, + { + "epoch": 0.35, + "learning_rate": 4.410633044994112e-05, + "loss": 4.5075, + "step": 327000 + }, + { + "epoch": 0.35, + "learning_rate": 4.409731872280036e-05, + "loss": 4.4815, + "step": 327500 + }, + { + "epoch": 0.35, + "learning_rate": 4.4088306995659595e-05, + "loss": 4.5843, + "step": 328000 + }, + { + "epoch": 0.36, + "learning_rate": 4.407929526851883e-05, + "loss": 4.4808, + "step": 328500 + }, + { + "epoch": 0.36, + "learning_rate": 4.407028354137806e-05, + "loss": 4.5299, + "step": 329000 + }, + { + "epoch": 0.36, + "learning_rate": 4.40612718142373e-05, + "loss": 4.4625, + "step": 329500 + }, + { + "epoch": 0.36, + "learning_rate": 4.4052260087096545e-05, + "loss": 4.5026, + "step": 330000 + }, + { + "epoch": 0.36, + "learning_rate": 4.404324835995578e-05, + "loss": 4.4895, + "step": 330500 + }, + { + "epoch": 0.36, + "learning_rate": 4.403423663281502e-05, + "loss": 4.5502, + "step": 331000 + }, + { + "epoch": 0.36, + "learning_rate": 4.402522490567426e-05, + "loss": 4.4647, + "step": 331500 + }, + { + "epoch": 0.36, + "learning_rate": 4.401621317853349e-05, + "loss": 4.4907, + "step": 332000 + }, + { + "epoch": 0.36, + "learning_rate": 4.4007201451392725e-05, + "loss": 4.4623, + "step": 332500 + }, + { + "epoch": 0.36, + "learning_rate": 4.399818972425196e-05, + "loss": 4.5451, + "step": 333000 + }, + { + "epoch": 0.36, + "learning_rate": 4.39891779971112e-05, + "loss": 4.5007, + "step": 333500 + }, + { + "epoch": 0.36, + "learning_rate": 4.3980166269970444e-05, + "loss": 4.507, + "step": 334000 + }, + { + "epoch": 0.36, + "learning_rate": 4.397115454282968e-05, + "loss": 4.4595, + "step": 334500 + }, + { + "epoch": 0.36, + "learning_rate": 4.396214281568892e-05, + "loss": 4.5234, + "step": 335000 + }, + { + "epoch": 0.36, + "learning_rate": 4.395313108854815e-05, + "loss": 4.4674, + "step": 335500 + }, + { + "epoch": 0.36, + "learning_rate": 4.394411936140739e-05, + "loss": 4.4806, + "step": 336000 + }, + { + "epoch": 0.36, + "learning_rate": 4.3935107634266624e-05, + "loss": 4.4845, + "step": 336500 + }, + { + "epoch": 0.36, + "learning_rate": 4.392609590712586e-05, + "loss": 4.5202, + "step": 337000 + }, + { + "epoch": 0.36, + "learning_rate": 4.39170841799851e-05, + "loss": 4.5301, + "step": 337500 + }, + { + "epoch": 0.37, + "learning_rate": 4.390807245284434e-05, + "loss": 4.5139, + "step": 338000 + }, + { + "epoch": 0.37, + "learning_rate": 4.3899060725703574e-05, + "loss": 4.4715, + "step": 338500 + }, + { + "epoch": 0.37, + "learning_rate": 4.389004899856281e-05, + "loss": 4.4752, + "step": 339000 + }, + { + "epoch": 0.37, + "learning_rate": 4.388103727142205e-05, + "loss": 4.4945, + "step": 339500 + }, + { + "epoch": 0.37, + "learning_rate": 4.3872025544281286e-05, + "loss": 4.4648, + "step": 340000 + }, + { + "epoch": 0.37, + "learning_rate": 4.386301381714052e-05, + "loss": 4.5011, + "step": 340500 + }, + { + "epoch": 0.37, + "learning_rate": 4.385400208999976e-05, + "loss": 4.527, + "step": 341000 + }, + { + "epoch": 0.37, + "learning_rate": 4.3844990362859e-05, + "loss": 4.5182, + "step": 341500 + }, + { + "epoch": 0.37, + "learning_rate": 4.3835978635718235e-05, + "loss": 4.4455, + "step": 342000 + }, + { + "epoch": 0.37, + "learning_rate": 4.382696690857747e-05, + "loss": 4.4408, + "step": 342500 + }, + { + "epoch": 0.37, + "learning_rate": 4.381795518143671e-05, + "loss": 4.4726, + "step": 343000 + }, + { + "epoch": 0.37, + "learning_rate": 4.380894345429595e-05, + "loss": 4.4912, + "step": 343500 + }, + { + "epoch": 0.37, + "learning_rate": 4.3799931727155185e-05, + "loss": 4.4715, + "step": 344000 + }, + { + "epoch": 0.37, + "learning_rate": 4.3790920000014416e-05, + "loss": 4.4808, + "step": 344500 + }, + { + "epoch": 0.37, + "learning_rate": 4.378190827287365e-05, + "loss": 4.469, + "step": 345000 + }, + { + "epoch": 0.37, + "learning_rate": 4.37728965457329e-05, + "loss": 4.5089, + "step": 345500 + }, + { + "epoch": 0.37, + "learning_rate": 4.3763884818592135e-05, + "loss": 4.4609, + "step": 346000 + }, + { + "epoch": 0.37, + "learning_rate": 4.375487309145137e-05, + "loss": 4.5222, + "step": 346500 + }, + { + "epoch": 0.38, + "learning_rate": 4.374586136431061e-05, + "loss": 4.4459, + "step": 347000 + }, + { + "epoch": 0.38, + "learning_rate": 4.373684963716985e-05, + "loss": 4.5242, + "step": 347500 + }, + { + "epoch": 0.38, + "learning_rate": 4.372783791002908e-05, + "loss": 4.4754, + "step": 348000 + }, + { + "epoch": 0.38, + "learning_rate": 4.3718826182888315e-05, + "loss": 4.4723, + "step": 348500 + }, + { + "epoch": 0.38, + "learning_rate": 4.370981445574755e-05, + "loss": 4.4723, + "step": 349000 + }, + { + "epoch": 0.38, + "learning_rate": 4.3700802728606796e-05, + "loss": 4.489, + "step": 349500 + }, + { + "epoch": 0.38, + "learning_rate": 4.3691791001466034e-05, + "loss": 4.4956, + "step": 350000 + }, + { + "epoch": 0.38, + "learning_rate": 4.368277927432527e-05, + "loss": 4.5123, + "step": 350500 + }, + { + "epoch": 0.38, + "learning_rate": 4.36737675471845e-05, + "loss": 4.5453, + "step": 351000 + }, + { + "epoch": 0.38, + "learning_rate": 4.366475582004374e-05, + "loss": 4.4737, + "step": 351500 + }, + { + "epoch": 0.38, + "learning_rate": 4.365574409290298e-05, + "loss": 4.4692, + "step": 352000 + }, + { + "epoch": 0.38, + "learning_rate": 4.3646732365762214e-05, + "loss": 4.4465, + "step": 352500 + }, + { + "epoch": 0.38, + "learning_rate": 4.363772063862145e-05, + "loss": 4.4285, + "step": 353000 + }, + { + "epoch": 0.38, + "learning_rate": 4.3628708911480696e-05, + "loss": 4.5237, + "step": 353500 + }, + { + "epoch": 0.38, + "learning_rate": 4.3619697184339926e-05, + "loss": 4.5195, + "step": 354000 + }, + { + "epoch": 0.38, + "learning_rate": 4.3610685457199164e-05, + "loss": 4.492, + "step": 354500 + }, + { + "epoch": 0.38, + "learning_rate": 4.36016737300584e-05, + "loss": 4.485, + "step": 355000 + }, + { + "epoch": 0.38, + "learning_rate": 4.359266200291764e-05, + "loss": 4.4856, + "step": 355500 + }, + { + "epoch": 0.38, + "learning_rate": 4.3583650275776876e-05, + "loss": 4.5072, + "step": 356000 + }, + { + "epoch": 0.39, + "learning_rate": 4.357463854863611e-05, + "loss": 4.4757, + "step": 356500 + }, + { + "epoch": 0.39, + "learning_rate": 4.356562682149535e-05, + "loss": 4.4942, + "step": 357000 + }, + { + "epoch": 0.39, + "learning_rate": 4.355661509435459e-05, + "loss": 4.4687, + "step": 357500 + }, + { + "epoch": 0.39, + "learning_rate": 4.3547603367213825e-05, + "loss": 4.4391, + "step": 358000 + }, + { + "epoch": 0.39, + "learning_rate": 4.353859164007306e-05, + "loss": 4.4766, + "step": 358500 + }, + { + "epoch": 0.39, + "learning_rate": 4.35295799129323e-05, + "loss": 4.5417, + "step": 359000 + }, + { + "epoch": 0.39, + "learning_rate": 4.352056818579154e-05, + "loss": 4.4657, + "step": 359500 + }, + { + "epoch": 0.39, + "learning_rate": 4.3511556458650775e-05, + "loss": 4.5017, + "step": 360000 + }, + { + "epoch": 0.39, + "learning_rate": 4.3502544731510006e-05, + "loss": 4.4788, + "step": 360500 + }, + { + "epoch": 0.39, + "learning_rate": 4.349353300436925e-05, + "loss": 4.4984, + "step": 361000 + }, + { + "epoch": 0.39, + "learning_rate": 4.348452127722849e-05, + "loss": 4.543, + "step": 361500 + }, + { + "epoch": 0.39, + "learning_rate": 4.3475509550087725e-05, + "loss": 4.4832, + "step": 362000 + }, + { + "epoch": 0.39, + "learning_rate": 4.346649782294696e-05, + "loss": 4.5137, + "step": 362500 + }, + { + "epoch": 0.39, + "learning_rate": 4.34574860958062e-05, + "loss": 4.4928, + "step": 363000 + }, + { + "epoch": 0.39, + "learning_rate": 4.344847436866543e-05, + "loss": 4.4666, + "step": 363500 + }, + { + "epoch": 0.39, + "learning_rate": 4.343946264152467e-05, + "loss": 4.4647, + "step": 364000 + }, + { + "epoch": 0.39, + "learning_rate": 4.3430450914383905e-05, + "loss": 4.4378, + "step": 364500 + }, + { + "epoch": 0.39, + "learning_rate": 4.342143918724315e-05, + "loss": 4.4485, + "step": 365000 + }, + { + "epoch": 0.4, + "learning_rate": 4.3412427460102386e-05, + "loss": 4.4648, + "step": 365500 + }, + { + "epoch": 0.4, + "learning_rate": 4.3403415732961624e-05, + "loss": 4.4384, + "step": 366000 + }, + { + "epoch": 0.4, + "learning_rate": 4.3394404005820855e-05, + "loss": 4.4319, + "step": 366500 + }, + { + "epoch": 0.4, + "learning_rate": 4.338539227868009e-05, + "loss": 4.4791, + "step": 367000 + }, + { + "epoch": 0.4, + "learning_rate": 4.337638055153933e-05, + "loss": 4.5161, + "step": 367500 + }, + { + "epoch": 0.4, + "learning_rate": 4.336736882439857e-05, + "loss": 4.4157, + "step": 368000 + }, + { + "epoch": 0.4, + "learning_rate": 4.3358357097257804e-05, + "loss": 4.4717, + "step": 368500 + }, + { + "epoch": 0.4, + "learning_rate": 4.334934537011704e-05, + "loss": 4.479, + "step": 369000 + }, + { + "epoch": 0.4, + "learning_rate": 4.334033364297628e-05, + "loss": 4.4731, + "step": 369500 + }, + { + "epoch": 0.4, + "learning_rate": 4.3331321915835516e-05, + "loss": 4.4899, + "step": 370000 + }, + { + "epoch": 0.4, + "learning_rate": 4.3322310188694754e-05, + "loss": 4.502, + "step": 370500 + }, + { + "epoch": 0.4, + "learning_rate": 4.331329846155399e-05, + "loss": 4.4384, + "step": 371000 + }, + { + "epoch": 0.4, + "learning_rate": 4.330428673441323e-05, + "loss": 4.4643, + "step": 371500 + }, + { + "epoch": 0.4, + "learning_rate": 4.3295275007272466e-05, + "loss": 4.4491, + "step": 372000 + }, + { + "epoch": 0.4, + "learning_rate": 4.32862632801317e-05, + "loss": 4.409, + "step": 372500 + }, + { + "epoch": 0.4, + "learning_rate": 4.327725155299094e-05, + "loss": 4.4673, + "step": 373000 + }, + { + "epoch": 0.4, + "learning_rate": 4.326823982585018e-05, + "loss": 4.4603, + "step": 373500 + }, + { + "epoch": 0.4, + "learning_rate": 4.3259228098709416e-05, + "loss": 4.4743, + "step": 374000 + }, + { + "epoch": 0.4, + "learning_rate": 4.325021637156865e-05, + "loss": 4.4634, + "step": 374500 + }, + { + "epoch": 0.41, + "learning_rate": 4.324120464442789e-05, + "loss": 4.4576, + "step": 375000 + }, + { + "epoch": 0.41, + "learning_rate": 4.323219291728713e-05, + "loss": 4.484, + "step": 375500 + }, + { + "epoch": 0.41, + "learning_rate": 4.322318119014636e-05, + "loss": 4.431, + "step": 376000 + }, + { + "epoch": 0.41, + "learning_rate": 4.32141694630056e-05, + "loss": 4.5062, + "step": 376500 + }, + { + "epoch": 0.41, + "learning_rate": 4.320515773586484e-05, + "loss": 4.4464, + "step": 377000 + }, + { + "epoch": 0.41, + "learning_rate": 4.319614600872408e-05, + "loss": 4.4734, + "step": 377500 + }, + { + "epoch": 0.41, + "learning_rate": 4.3187134281583315e-05, + "loss": 4.4615, + "step": 378000 + }, + { + "epoch": 0.41, + "learning_rate": 4.317812255444255e-05, + "loss": 4.4707, + "step": 378500 + }, + { + "epoch": 0.41, + "learning_rate": 4.316911082730178e-05, + "loss": 4.5084, + "step": 379000 + }, + { + "epoch": 0.41, + "learning_rate": 4.316009910016102e-05, + "loss": 4.4335, + "step": 379500 + }, + { + "epoch": 0.41, + "learning_rate": 4.315108737302026e-05, + "loss": 4.5115, + "step": 380000 + }, + { + "epoch": 0.41, + "learning_rate": 4.31420756458795e-05, + "loss": 4.495, + "step": 380500 + }, + { + "epoch": 0.41, + "learning_rate": 4.313306391873874e-05, + "loss": 4.5261, + "step": 381000 + }, + { + "epoch": 0.41, + "learning_rate": 4.3124052191597977e-05, + "loss": 4.5579, + "step": 381500 + }, + { + "epoch": 0.41, + "learning_rate": 4.311504046445721e-05, + "loss": 4.4861, + "step": 382000 + }, + { + "epoch": 0.41, + "learning_rate": 4.3106028737316445e-05, + "loss": 4.4294, + "step": 382500 + }, + { + "epoch": 0.41, + "learning_rate": 4.309701701017568e-05, + "loss": 4.4975, + "step": 383000 + }, + { + "epoch": 0.41, + "learning_rate": 4.308800528303492e-05, + "loss": 4.4527, + "step": 383500 + }, + { + "epoch": 0.42, + "learning_rate": 4.307899355589416e-05, + "loss": 4.4521, + "step": 384000 + }, + { + "epoch": 0.42, + "learning_rate": 4.3069981828753394e-05, + "loss": 4.5024, + "step": 384500 + }, + { + "epoch": 0.42, + "learning_rate": 4.306097010161263e-05, + "loss": 4.4181, + "step": 385000 + }, + { + "epoch": 0.42, + "learning_rate": 4.305195837447187e-05, + "loss": 4.4786, + "step": 385500 + }, + { + "epoch": 0.42, + "learning_rate": 4.3042946647331106e-05, + "loss": 4.4245, + "step": 386000 + }, + { + "epoch": 0.42, + "learning_rate": 4.3033934920190344e-05, + "loss": 4.4878, + "step": 386500 + }, + { + "epoch": 0.42, + "learning_rate": 4.302492319304958e-05, + "loss": 4.4427, + "step": 387000 + }, + { + "epoch": 0.42, + "learning_rate": 4.301591146590882e-05, + "loss": 4.3975, + "step": 387500 + }, + { + "epoch": 0.42, + "learning_rate": 4.3006899738768056e-05, + "loss": 4.4566, + "step": 388000 + }, + { + "epoch": 0.42, + "learning_rate": 4.299788801162729e-05, + "loss": 4.4441, + "step": 388500 + }, + { + "epoch": 0.42, + "learning_rate": 4.298887628448653e-05, + "loss": 4.4606, + "step": 389000 + }, + { + "epoch": 0.42, + "learning_rate": 4.297986455734577e-05, + "loss": 4.5232, + "step": 389500 + }, + { + "epoch": 0.42, + "learning_rate": 4.2970852830205006e-05, + "loss": 4.464, + "step": 390000 + }, + { + "epoch": 0.42, + "learning_rate": 4.296184110306424e-05, + "loss": 4.3564, + "step": 390500 + }, + { + "epoch": 0.42, + "learning_rate": 4.295282937592348e-05, + "loss": 4.4523, + "step": 391000 + }, + { + "epoch": 0.42, + "learning_rate": 4.294381764878271e-05, + "loss": 4.4288, + "step": 391500 + }, + { + "epoch": 0.42, + "learning_rate": 4.2934805921641955e-05, + "loss": 4.3775, + "step": 392000 + }, + { + "epoch": 0.42, + "learning_rate": 4.292579419450119e-05, + "loss": 4.4579, + "step": 392500 + }, + { + "epoch": 0.42, + "learning_rate": 4.291678246736043e-05, + "loss": 4.4408, + "step": 393000 + }, + { + "epoch": 0.43, + "learning_rate": 4.290777074021967e-05, + "loss": 4.4396, + "step": 393500 + }, + { + "epoch": 0.43, + "learning_rate": 4.2898759013078905e-05, + "loss": 4.4438, + "step": 394000 + }, + { + "epoch": 0.43, + "learning_rate": 4.2889747285938135e-05, + "loss": 4.46, + "step": 394500 + }, + { + "epoch": 0.43, + "learning_rate": 4.288073555879737e-05, + "loss": 4.522, + "step": 395000 + }, + { + "epoch": 0.43, + "learning_rate": 4.287172383165661e-05, + "loss": 4.471, + "step": 395500 + }, + { + "epoch": 0.43, + "learning_rate": 4.286271210451585e-05, + "loss": 4.3984, + "step": 396000 + }, + { + "epoch": 0.43, + "learning_rate": 4.285370037737509e-05, + "loss": 4.4826, + "step": 396500 + }, + { + "epoch": 0.43, + "learning_rate": 4.284468865023433e-05, + "loss": 4.4343, + "step": 397000 + }, + { + "epoch": 0.43, + "learning_rate": 4.283567692309356e-05, + "loss": 4.4479, + "step": 397500 + }, + { + "epoch": 0.43, + "learning_rate": 4.28266651959528e-05, + "loss": 4.4768, + "step": 398000 + }, + { + "epoch": 0.43, + "learning_rate": 4.2817653468812035e-05, + "loss": 4.4508, + "step": 398500 + }, + { + "epoch": 0.43, + "learning_rate": 4.280864174167127e-05, + "loss": 4.4707, + "step": 399000 + }, + { + "epoch": 0.43, + "learning_rate": 4.279963001453051e-05, + "loss": 4.4679, + "step": 399500 + }, + { + "epoch": 0.43, + "learning_rate": 4.279061828738975e-05, + "loss": 4.3981, + "step": 400000 + }, + { + "epoch": 0.43, + "learning_rate": 4.278160656024899e-05, + "loss": 4.4731, + "step": 400500 + }, + { + "epoch": 0.43, + "learning_rate": 4.277259483310822e-05, + "loss": 4.4104, + "step": 401000 + }, + { + "epoch": 0.43, + "learning_rate": 4.276358310596746e-05, + "loss": 4.4197, + "step": 401500 + }, + { + "epoch": 0.43, + "learning_rate": 4.2754571378826696e-05, + "loss": 4.4555, + "step": 402000 + }, + { + "epoch": 0.44, + "learning_rate": 4.2745559651685934e-05, + "loss": 4.4885, + "step": 402500 + }, + { + "epoch": 0.44, + "learning_rate": 4.273654792454517e-05, + "loss": 4.3961, + "step": 403000 + }, + { + "epoch": 0.44, + "learning_rate": 4.272753619740441e-05, + "loss": 4.4982, + "step": 403500 + }, + { + "epoch": 0.44, + "learning_rate": 4.2718524470263646e-05, + "loss": 4.4241, + "step": 404000 + }, + { + "epoch": 0.44, + "learning_rate": 4.2709512743122883e-05, + "loss": 4.4929, + "step": 404500 + }, + { + "epoch": 0.44, + "learning_rate": 4.270050101598212e-05, + "loss": 4.438, + "step": 405000 + }, + { + "epoch": 0.44, + "learning_rate": 4.269148928884136e-05, + "loss": 4.4581, + "step": 405500 + }, + { + "epoch": 0.44, + "learning_rate": 4.2682477561700596e-05, + "loss": 4.4261, + "step": 406000 + }, + { + "epoch": 0.44, + "learning_rate": 4.267346583455983e-05, + "loss": 4.4314, + "step": 406500 + }, + { + "epoch": 0.44, + "learning_rate": 4.2664454107419064e-05, + "loss": 4.5199, + "step": 407000 + }, + { + "epoch": 0.44, + "learning_rate": 4.26554423802783e-05, + "loss": 4.3874, + "step": 407500 + }, + { + "epoch": 0.44, + "learning_rate": 4.2646430653137545e-05, + "loss": 4.4064, + "step": 408000 + }, + { + "epoch": 0.44, + "learning_rate": 4.263741892599678e-05, + "loss": 4.4102, + "step": 408500 + }, + { + "epoch": 0.44, + "learning_rate": 4.262840719885602e-05, + "loss": 4.4532, + "step": 409000 + }, + { + "epoch": 0.44, + "learning_rate": 4.261939547171526e-05, + "loss": 4.4605, + "step": 409500 + }, + { + "epoch": 0.44, + "learning_rate": 4.261038374457449e-05, + "loss": 4.4169, + "step": 410000 + }, + { + "epoch": 0.44, + "learning_rate": 4.2601372017433725e-05, + "loss": 4.4427, + "step": 410500 + }, + { + "epoch": 0.44, + "learning_rate": 4.259236029029296e-05, + "loss": 4.4733, + "step": 411000 + }, + { + "epoch": 0.44, + "learning_rate": 4.25833485631522e-05, + "loss": 4.5038, + "step": 411500 + }, + { + "epoch": 0.45, + "learning_rate": 4.2574336836011444e-05, + "loss": 4.4452, + "step": 412000 + }, + { + "epoch": 0.45, + "learning_rate": 4.256532510887068e-05, + "loss": 4.4282, + "step": 412500 + }, + { + "epoch": 0.45, + "learning_rate": 4.255631338172992e-05, + "loss": 4.4557, + "step": 413000 + }, + { + "epoch": 0.45, + "learning_rate": 4.254730165458915e-05, + "loss": 4.4631, + "step": 413500 + }, + { + "epoch": 0.45, + "learning_rate": 4.253828992744839e-05, + "loss": 4.4623, + "step": 414000 + }, + { + "epoch": 0.45, + "learning_rate": 4.2529278200307625e-05, + "loss": 4.4419, + "step": 414500 + }, + { + "epoch": 0.45, + "learning_rate": 4.252026647316686e-05, + "loss": 4.4337, + "step": 415000 + }, + { + "epoch": 0.45, + "learning_rate": 4.25112547460261e-05, + "loss": 4.4549, + "step": 415500 + }, + { + "epoch": 0.45, + "learning_rate": 4.2502243018885344e-05, + "loss": 4.4857, + "step": 416000 + }, + { + "epoch": 0.45, + "learning_rate": 4.2493231291744574e-05, + "loss": 4.4788, + "step": 416500 + }, + { + "epoch": 0.45, + "learning_rate": 4.248421956460381e-05, + "loss": 4.4158, + "step": 417000 + }, + { + "epoch": 0.45, + "learning_rate": 4.247520783746305e-05, + "loss": 4.4255, + "step": 417500 + }, + { + "epoch": 0.45, + "learning_rate": 4.2466196110322286e-05, + "loss": 4.5044, + "step": 418000 + }, + { + "epoch": 0.45, + "learning_rate": 4.2457184383181524e-05, + "loss": 4.352, + "step": 418500 + }, + { + "epoch": 0.45, + "learning_rate": 4.244817265604076e-05, + "loss": 4.4666, + "step": 419000 + }, + { + "epoch": 0.45, + "learning_rate": 4.24391609289e-05, + "loss": 4.4477, + "step": 419500 + }, + { + "epoch": 0.45, + "learning_rate": 4.2430149201759236e-05, + "loss": 4.4425, + "step": 420000 + }, + { + "epoch": 0.45, + "learning_rate": 4.2421137474618473e-05, + "loss": 4.5022, + "step": 420500 + }, + { + "epoch": 0.46, + "learning_rate": 4.241212574747771e-05, + "loss": 4.3832, + "step": 421000 + }, + { + "epoch": 0.46, + "learning_rate": 4.240311402033695e-05, + "loss": 4.4686, + "step": 421500 + }, + { + "epoch": 0.46, + "learning_rate": 4.2394102293196186e-05, + "loss": 4.4394, + "step": 422000 + }, + { + "epoch": 0.46, + "learning_rate": 4.2385090566055416e-05, + "loss": 4.4662, + "step": 422500 + }, + { + "epoch": 0.46, + "learning_rate": 4.2376078838914654e-05, + "loss": 4.4482, + "step": 423000 + }, + { + "epoch": 0.46, + "learning_rate": 4.23670671117739e-05, + "loss": 4.4238, + "step": 423500 + }, + { + "epoch": 0.46, + "learning_rate": 4.2358055384633135e-05, + "loss": 4.399, + "step": 424000 + }, + { + "epoch": 0.46, + "learning_rate": 4.234904365749237e-05, + "loss": 4.4646, + "step": 424500 + }, + { + "epoch": 0.46, + "learning_rate": 4.234003193035161e-05, + "loss": 4.4333, + "step": 425000 + }, + { + "epoch": 0.46, + "learning_rate": 4.233102020321084e-05, + "loss": 4.4222, + "step": 425500 + }, + { + "epoch": 0.46, + "learning_rate": 4.232200847607008e-05, + "loss": 4.4807, + "step": 426000 + }, + { + "epoch": 0.46, + "learning_rate": 4.2312996748929315e-05, + "loss": 4.4585, + "step": 426500 + }, + { + "epoch": 0.46, + "learning_rate": 4.230398502178855e-05, + "loss": 4.4629, + "step": 427000 + }, + { + "epoch": 0.46, + "learning_rate": 4.22949732946478e-05, + "loss": 4.3969, + "step": 427500 + }, + { + "epoch": 0.46, + "learning_rate": 4.2285961567507034e-05, + "loss": 4.4375, + "step": 428000 + }, + { + "epoch": 0.46, + "learning_rate": 4.227694984036627e-05, + "loss": 4.4462, + "step": 428500 + }, + { + "epoch": 0.46, + "learning_rate": 4.22679381132255e-05, + "loss": 4.3994, + "step": 429000 + }, + { + "epoch": 0.46, + "learning_rate": 4.225892638608474e-05, + "loss": 4.441, + "step": 429500 + }, + { + "epoch": 0.47, + "learning_rate": 4.224991465894398e-05, + "loss": 4.4581, + "step": 430000 + }, + { + "epoch": 0.47, + "learning_rate": 4.2240902931803215e-05, + "loss": 4.4271, + "step": 430500 + }, + { + "epoch": 0.47, + "learning_rate": 4.223189120466245e-05, + "loss": 4.4268, + "step": 431000 + }, + { + "epoch": 0.47, + "learning_rate": 4.2222879477521696e-05, + "loss": 4.4299, + "step": 431500 + }, + { + "epoch": 0.47, + "learning_rate": 4.221386775038093e-05, + "loss": 4.4034, + "step": 432000 + }, + { + "epoch": 0.47, + "learning_rate": 4.2204856023240164e-05, + "loss": 4.5251, + "step": 432500 + }, + { + "epoch": 0.47, + "learning_rate": 4.21958442960994e-05, + "loss": 4.4132, + "step": 433000 + }, + { + "epoch": 0.47, + "learning_rate": 4.218683256895864e-05, + "loss": 4.4342, + "step": 433500 + }, + { + "epoch": 0.47, + "learning_rate": 4.2177820841817877e-05, + "loss": 4.4125, + "step": 434000 + }, + { + "epoch": 0.47, + "learning_rate": 4.2168809114677114e-05, + "loss": 4.4599, + "step": 434500 + }, + { + "epoch": 0.47, + "learning_rate": 4.215979738753635e-05, + "loss": 4.3972, + "step": 435000 + }, + { + "epoch": 0.47, + "learning_rate": 4.215078566039559e-05, + "loss": 4.5031, + "step": 435500 + }, + { + "epoch": 0.47, + "learning_rate": 4.2141773933254826e-05, + "loss": 4.4313, + "step": 436000 + }, + { + "epoch": 0.47, + "learning_rate": 4.2132762206114064e-05, + "loss": 4.4108, + "step": 436500 + }, + { + "epoch": 0.47, + "learning_rate": 4.21237504789733e-05, + "loss": 4.4509, + "step": 437000 + }, + { + "epoch": 0.47, + "learning_rate": 4.211473875183254e-05, + "loss": 4.4684, + "step": 437500 + }, + { + "epoch": 0.47, + "learning_rate": 4.210572702469177e-05, + "loss": 4.4394, + "step": 438000 + }, + { + "epoch": 0.47, + "learning_rate": 4.2096715297551006e-05, + "loss": 4.3804, + "step": 438500 + }, + { + "epoch": 0.47, + "learning_rate": 4.208770357041025e-05, + "loss": 4.4641, + "step": 439000 + }, + { + "epoch": 0.48, + "learning_rate": 4.207869184326949e-05, + "loss": 4.3934, + "step": 439500 + }, + { + "epoch": 0.48, + "learning_rate": 4.2069680116128725e-05, + "loss": 4.3989, + "step": 440000 + }, + { + "epoch": 0.48, + "learning_rate": 4.206066838898796e-05, + "loss": 4.447, + "step": 440500 + }, + { + "epoch": 0.48, + "learning_rate": 4.20516566618472e-05, + "loss": 4.4046, + "step": 441000 + }, + { + "epoch": 0.48, + "learning_rate": 4.204264493470643e-05, + "loss": 4.4264, + "step": 441500 + }, + { + "epoch": 0.48, + "learning_rate": 4.203363320756567e-05, + "loss": 4.3891, + "step": 442000 + }, + { + "epoch": 0.48, + "learning_rate": 4.2024621480424906e-05, + "loss": 4.4143, + "step": 442500 + }, + { + "epoch": 0.48, + "learning_rate": 4.201560975328415e-05, + "loss": 4.4362, + "step": 443000 + }, + { + "epoch": 0.48, + "learning_rate": 4.200659802614339e-05, + "loss": 4.4681, + "step": 443500 + }, + { + "epoch": 0.48, + "learning_rate": 4.1997586299002625e-05, + "loss": 4.4628, + "step": 444000 + }, + { + "epoch": 0.48, + "learning_rate": 4.1988574571861855e-05, + "loss": 4.4444, + "step": 444500 + }, + { + "epoch": 0.48, + "learning_rate": 4.197956284472109e-05, + "loss": 4.3894, + "step": 445000 + }, + { + "epoch": 0.48, + "learning_rate": 4.197055111758033e-05, + "loss": 4.4775, + "step": 445500 + }, + { + "epoch": 0.48, + "learning_rate": 4.196153939043957e-05, + "loss": 4.3898, + "step": 446000 + }, + { + "epoch": 0.48, + "learning_rate": 4.1952527663298805e-05, + "loss": 4.4591, + "step": 446500 + }, + { + "epoch": 0.48, + "learning_rate": 4.194351593615804e-05, + "loss": 4.4336, + "step": 447000 + }, + { + "epoch": 0.48, + "learning_rate": 4.193450420901728e-05, + "loss": 4.4063, + "step": 447500 + }, + { + "epoch": 0.48, + "learning_rate": 4.192549248187652e-05, + "loss": 4.4326, + "step": 448000 + }, + { + "epoch": 0.49, + "learning_rate": 4.1916480754735754e-05, + "loss": 4.4418, + "step": 448500 + }, + { + "epoch": 0.49, + "learning_rate": 4.190746902759499e-05, + "loss": 4.4141, + "step": 449000 + }, + { + "epoch": 0.49, + "learning_rate": 4.189845730045423e-05, + "loss": 4.3698, + "step": 449500 + }, + { + "epoch": 0.49, + "learning_rate": 4.1889445573313467e-05, + "loss": 4.4296, + "step": 450000 + }, + { + "epoch": 0.49, + "learning_rate": 4.1880433846172704e-05, + "loss": 4.4399, + "step": 450500 + }, + { + "epoch": 0.49, + "learning_rate": 4.187142211903194e-05, + "loss": 4.4123, + "step": 451000 + }, + { + "epoch": 0.49, + "learning_rate": 4.186241039189118e-05, + "loss": 4.3735, + "step": 451500 + }, + { + "epoch": 0.49, + "learning_rate": 4.1853398664750416e-05, + "loss": 4.3984, + "step": 452000 + }, + { + "epoch": 0.49, + "learning_rate": 4.1844386937609654e-05, + "loss": 4.4167, + "step": 452500 + }, + { + "epoch": 0.49, + "learning_rate": 4.183537521046889e-05, + "loss": 4.3666, + "step": 453000 + }, + { + "epoch": 0.49, + "learning_rate": 4.182636348332813e-05, + "loss": 4.4422, + "step": 453500 + }, + { + "epoch": 0.49, + "learning_rate": 4.181735175618736e-05, + "loss": 4.3986, + "step": 454000 + }, + { + "epoch": 0.49, + "learning_rate": 4.18083400290466e-05, + "loss": 4.4333, + "step": 454500 + }, + { + "epoch": 0.49, + "learning_rate": 4.179932830190584e-05, + "loss": 4.4112, + "step": 455000 + }, + { + "epoch": 0.49, + "learning_rate": 4.179031657476508e-05, + "loss": 4.42, + "step": 455500 + }, + { + "epoch": 0.49, + "learning_rate": 4.1781304847624315e-05, + "loss": 4.4114, + "step": 456000 + }, + { + "epoch": 0.49, + "learning_rate": 4.177229312048355e-05, + "loss": 4.3842, + "step": 456500 + }, + { + "epoch": 0.49, + "learning_rate": 4.1763281393342783e-05, + "loss": 4.4399, + "step": 457000 + }, + { + "epoch": 0.49, + "learning_rate": 4.175426966620202e-05, + "loss": 4.4283, + "step": 457500 + }, + { + "epoch": 0.5, + "learning_rate": 4.174525793906126e-05, + "loss": 4.4357, + "step": 458000 + }, + { + "epoch": 0.5, + "learning_rate": 4.1736246211920496e-05, + "loss": 4.4383, + "step": 458500 + }, + { + "epoch": 0.5, + "learning_rate": 4.172723448477974e-05, + "loss": 4.4069, + "step": 459000 + }, + { + "epoch": 0.5, + "learning_rate": 4.171822275763898e-05, + "loss": 4.4093, + "step": 459500 + }, + { + "epoch": 0.5, + "learning_rate": 4.170921103049821e-05, + "loss": 4.4003, + "step": 460000 + }, + { + "epoch": 0.5, + "learning_rate": 4.1700199303357445e-05, + "loss": 4.3554, + "step": 460500 + }, + { + "epoch": 0.5, + "learning_rate": 4.169118757621668e-05, + "loss": 4.4233, + "step": 461000 + }, + { + "epoch": 0.5, + "learning_rate": 4.168217584907592e-05, + "loss": 4.4297, + "step": 461500 + }, + { + "epoch": 0.5, + "learning_rate": 4.167316412193516e-05, + "loss": 4.41, + "step": 462000 + }, + { + "epoch": 0.5, + "learning_rate": 4.1664152394794395e-05, + "loss": 4.4319, + "step": 462500 + }, + { + "epoch": 0.5, + "learning_rate": 4.165514066765363e-05, + "loss": 4.4113, + "step": 463000 + }, + { + "epoch": 0.5, + "learning_rate": 4.164612894051287e-05, + "loss": 4.4162, + "step": 463500 + }, + { + "epoch": 0.5, + "learning_rate": 4.163711721337211e-05, + "loss": 4.437, + "step": 464000 + }, + { + "epoch": 0.5, + "learning_rate": 4.1628105486231344e-05, + "loss": 4.4412, + "step": 464500 + }, + { + "epoch": 0.5, + "learning_rate": 4.161909375909058e-05, + "loss": 4.4154, + "step": 465000 + }, + { + "epoch": 0.5, + "learning_rate": 4.161008203194982e-05, + "loss": 4.4167, + "step": 465500 + }, + { + "epoch": 0.5, + "learning_rate": 4.1601070304809057e-05, + "loss": 4.4659, + "step": 466000 + }, + { + "epoch": 0.5, + "learning_rate": 4.1592058577668294e-05, + "loss": 4.4041, + "step": 466500 + }, + { + "epoch": 0.51, + "learning_rate": 4.158304685052753e-05, + "loss": 4.4115, + "step": 467000 + }, + { + "epoch": 0.51, + "learning_rate": 4.157403512338677e-05, + "loss": 4.4393, + "step": 467500 + }, + { + "epoch": 0.51, + "learning_rate": 4.1565023396246006e-05, + "loss": 4.3725, + "step": 468000 + }, + { + "epoch": 0.51, + "learning_rate": 4.1556011669105244e-05, + "loss": 4.4011, + "step": 468500 + }, + { + "epoch": 0.51, + "learning_rate": 4.154699994196448e-05, + "loss": 4.353, + "step": 469000 + }, + { + "epoch": 0.51, + "learning_rate": 4.153798821482371e-05, + "loss": 4.3823, + "step": 469500 + }, + { + "epoch": 0.51, + "learning_rate": 4.152897648768295e-05, + "loss": 4.4488, + "step": 470000 + }, + { + "epoch": 0.51, + "learning_rate": 4.151996476054219e-05, + "loss": 4.4014, + "step": 470500 + }, + { + "epoch": 0.51, + "learning_rate": 4.151095303340143e-05, + "loss": 4.3857, + "step": 471000 + }, + { + "epoch": 0.51, + "learning_rate": 4.150194130626067e-05, + "loss": 4.4427, + "step": 471500 + }, + { + "epoch": 0.51, + "learning_rate": 4.1492929579119905e-05, + "loss": 4.4374, + "step": 472000 + }, + { + "epoch": 0.51, + "learning_rate": 4.1483917851979136e-05, + "loss": 4.3678, + "step": 472500 + }, + { + "epoch": 0.51, + "learning_rate": 4.1474906124838373e-05, + "loss": 4.3926, + "step": 473000 + }, + { + "epoch": 0.51, + "learning_rate": 4.146589439769761e-05, + "loss": 4.426, + "step": 473500 + }, + { + "epoch": 0.51, + "learning_rate": 4.145688267055685e-05, + "loss": 4.4384, + "step": 474000 + }, + { + "epoch": 0.51, + "learning_rate": 4.144787094341609e-05, + "loss": 4.4226, + "step": 474500 + }, + { + "epoch": 0.51, + "learning_rate": 4.143885921627533e-05, + "loss": 4.3985, + "step": 475000 + }, + { + "epoch": 0.51, + "learning_rate": 4.142984748913456e-05, + "loss": 4.3802, + "step": 475500 + }, + { + "epoch": 0.51, + "learning_rate": 4.14208357619938e-05, + "loss": 4.4457, + "step": 476000 + }, + { + "epoch": 0.52, + "learning_rate": 4.1411824034853035e-05, + "loss": 4.4333, + "step": 476500 + }, + { + "epoch": 0.52, + "learning_rate": 4.140281230771227e-05, + "loss": 4.3906, + "step": 477000 + }, + { + "epoch": 0.52, + "learning_rate": 4.139380058057151e-05, + "loss": 4.3618, + "step": 477500 + }, + { + "epoch": 0.52, + "learning_rate": 4.138478885343075e-05, + "loss": 4.4389, + "step": 478000 + }, + { + "epoch": 0.52, + "learning_rate": 4.1375777126289985e-05, + "loss": 4.354, + "step": 478500 + }, + { + "epoch": 0.52, + "learning_rate": 4.136676539914922e-05, + "loss": 4.4335, + "step": 479000 + }, + { + "epoch": 0.52, + "learning_rate": 4.135775367200846e-05, + "loss": 4.405, + "step": 479500 + }, + { + "epoch": 0.52, + "learning_rate": 4.13487419448677e-05, + "loss": 4.4223, + "step": 480000 + }, + { + "epoch": 0.52, + "learning_rate": 4.1339730217726934e-05, + "loss": 4.4074, + "step": 480500 + }, + { + "epoch": 0.52, + "learning_rate": 4.133071849058617e-05, + "loss": 4.3557, + "step": 481000 + }, + { + "epoch": 0.52, + "learning_rate": 4.132170676344541e-05, + "loss": 4.4342, + "step": 481500 + }, + { + "epoch": 0.52, + "learning_rate": 4.131269503630465e-05, + "loss": 4.3986, + "step": 482000 + }, + { + "epoch": 0.52, + "learning_rate": 4.1303683309163884e-05, + "loss": 4.4292, + "step": 482500 + }, + { + "epoch": 0.52, + "learning_rate": 4.129467158202312e-05, + "loss": 4.4526, + "step": 483000 + }, + { + "epoch": 0.52, + "learning_rate": 4.128565985488236e-05, + "loss": 4.4217, + "step": 483500 + }, + { + "epoch": 0.52, + "learning_rate": 4.1276648127741596e-05, + "loss": 4.3949, + "step": 484000 + }, + { + "epoch": 0.52, + "learning_rate": 4.1267636400600834e-05, + "loss": 4.4406, + "step": 484500 + }, + { + "epoch": 0.52, + "learning_rate": 4.1258624673460064e-05, + "loss": 4.4383, + "step": 485000 + }, + { + "epoch": 0.53, + "learning_rate": 4.12496129463193e-05, + "loss": 4.4106, + "step": 485500 + }, + { + "epoch": 0.53, + "learning_rate": 4.1240601219178546e-05, + "loss": 4.382, + "step": 486000 + }, + { + "epoch": 0.53, + "learning_rate": 4.123158949203778e-05, + "loss": 4.3267, + "step": 486500 + }, + { + "epoch": 0.53, + "learning_rate": 4.122257776489702e-05, + "loss": 4.4221, + "step": 487000 + }, + { + "epoch": 0.53, + "learning_rate": 4.121356603775626e-05, + "loss": 4.3966, + "step": 487500 + }, + { + "epoch": 0.53, + "learning_rate": 4.120455431061549e-05, + "loss": 4.3824, + "step": 488000 + }, + { + "epoch": 0.53, + "learning_rate": 4.1195542583474726e-05, + "loss": 4.4346, + "step": 488500 + }, + { + "epoch": 0.53, + "learning_rate": 4.1186530856333963e-05, + "loss": 4.3681, + "step": 489000 + }, + { + "epoch": 0.53, + "learning_rate": 4.11775191291932e-05, + "loss": 4.405, + "step": 489500 + }, + { + "epoch": 0.53, + "learning_rate": 4.1168507402052445e-05, + "loss": 4.4267, + "step": 490000 + }, + { + "epoch": 0.53, + "learning_rate": 4.115949567491168e-05, + "loss": 4.4356, + "step": 490500 + }, + { + "epoch": 0.53, + "learning_rate": 4.115048394777091e-05, + "loss": 4.3915, + "step": 491000 + }, + { + "epoch": 0.53, + "learning_rate": 4.114147222063015e-05, + "loss": 4.4071, + "step": 491500 + }, + { + "epoch": 0.53, + "learning_rate": 4.113246049348939e-05, + "loss": 4.4558, + "step": 492000 + }, + { + "epoch": 0.53, + "learning_rate": 4.1123448766348625e-05, + "loss": 4.4161, + "step": 492500 + }, + { + "epoch": 0.53, + "learning_rate": 4.111443703920786e-05, + "loss": 4.4322, + "step": 493000 + }, + { + "epoch": 0.53, + "learning_rate": 4.11054253120671e-05, + "loss": 4.4165, + "step": 493500 + }, + { + "epoch": 0.53, + "learning_rate": 4.1096413584926344e-05, + "loss": 4.3936, + "step": 494000 + }, + { + "epoch": 0.53, + "learning_rate": 4.1087401857785575e-05, + "loss": 4.464, + "step": 494500 + }, + { + "epoch": 0.54, + "learning_rate": 4.107839013064481e-05, + "loss": 4.4453, + "step": 495000 + }, + { + "epoch": 0.54, + "learning_rate": 4.106937840350405e-05, + "loss": 4.4496, + "step": 495500 + }, + { + "epoch": 0.54, + "learning_rate": 4.106036667636329e-05, + "loss": 4.4243, + "step": 496000 + }, + { + "epoch": 0.54, + "learning_rate": 4.1051354949222524e-05, + "loss": 4.4202, + "step": 496500 + }, + { + "epoch": 0.54, + "learning_rate": 4.104234322208176e-05, + "loss": 4.4393, + "step": 497000 + }, + { + "epoch": 0.54, + "learning_rate": 4.1033331494941e-05, + "loss": 4.3986, + "step": 497500 + }, + { + "epoch": 0.54, + "learning_rate": 4.102431976780024e-05, + "loss": 4.3453, + "step": 498000 + }, + { + "epoch": 0.54, + "learning_rate": 4.1015308040659474e-05, + "loss": 4.4282, + "step": 498500 + }, + { + "epoch": 0.54, + "learning_rate": 4.100629631351871e-05, + "loss": 4.4063, + "step": 499000 + }, + { + "epoch": 0.54, + "learning_rate": 4.099728458637795e-05, + "loss": 4.4041, + "step": 499500 + }, + { + "epoch": 0.54, + "learning_rate": 4.0988272859237186e-05, + "loss": 4.4121, + "step": 500000 + }, + { + "epoch": 0.54, + "learning_rate": 4.097926113209642e-05, + "loss": 4.3535, + "step": 500500 + }, + { + "epoch": 0.54, + "learning_rate": 4.0970249404955654e-05, + "loss": 4.4579, + "step": 501000 + }, + { + "epoch": 0.54, + "learning_rate": 4.09612376778149e-05, + "loss": 4.4047, + "step": 501500 + }, + { + "epoch": 0.54, + "learning_rate": 4.0952225950674136e-05, + "loss": 4.3953, + "step": 502000 + }, + { + "epoch": 0.54, + "learning_rate": 4.094321422353337e-05, + "loss": 4.3709, + "step": 502500 + }, + { + "epoch": 0.54, + "learning_rate": 4.093420249639261e-05, + "loss": 4.4017, + "step": 503000 + }, + { + "epoch": 0.54, + "learning_rate": 4.092519076925184e-05, + "loss": 4.3861, + "step": 503500 + }, + { + "epoch": 0.55, + "learning_rate": 4.091617904211108e-05, + "loss": 4.4664, + "step": 504000 + }, + { + "epoch": 0.55, + "learning_rate": 4.0907167314970316e-05, + "loss": 4.4029, + "step": 504500 + }, + { + "epoch": 0.55, + "learning_rate": 4.0898155587829554e-05, + "loss": 4.386, + "step": 505000 + }, + { + "epoch": 0.55, + "learning_rate": 4.08891438606888e-05, + "loss": 4.3983, + "step": 505500 + }, + { + "epoch": 0.55, + "learning_rate": 4.0880132133548035e-05, + "loss": 4.3899, + "step": 506000 + }, + { + "epoch": 0.55, + "learning_rate": 4.087112040640727e-05, + "loss": 4.3988, + "step": 506500 + }, + { + "epoch": 0.55, + "learning_rate": 4.08621086792665e-05, + "loss": 4.3771, + "step": 507000 + }, + { + "epoch": 0.55, + "learning_rate": 4.085309695212574e-05, + "loss": 4.3443, + "step": 507500 + }, + { + "epoch": 0.55, + "learning_rate": 4.084408522498498e-05, + "loss": 4.3714, + "step": 508000 + }, + { + "epoch": 0.55, + "learning_rate": 4.0835073497844215e-05, + "loss": 4.3909, + "step": 508500 + }, + { + "epoch": 0.55, + "learning_rate": 4.082606177070345e-05, + "loss": 4.4214, + "step": 509000 + }, + { + "epoch": 0.55, + "learning_rate": 4.081705004356269e-05, + "loss": 4.4305, + "step": 509500 + }, + { + "epoch": 0.55, + "learning_rate": 4.080803831642193e-05, + "loss": 4.3784, + "step": 510000 + }, + { + "epoch": 0.55, + "learning_rate": 4.0799026589281165e-05, + "loss": 4.4198, + "step": 510500 + }, + { + "epoch": 0.55, + "learning_rate": 4.07900148621404e-05, + "loss": 4.3954, + "step": 511000 + }, + { + "epoch": 0.55, + "learning_rate": 4.078100313499964e-05, + "loss": 4.4075, + "step": 511500 + }, + { + "epoch": 0.55, + "learning_rate": 4.077199140785888e-05, + "loss": 4.4065, + "step": 512000 + }, + { + "epoch": 0.55, + "learning_rate": 4.0762979680718115e-05, + "loss": 4.4122, + "step": 512500 + }, + { + "epoch": 0.55, + "learning_rate": 4.075396795357735e-05, + "loss": 4.4, + "step": 513000 + }, + { + "epoch": 0.56, + "learning_rate": 4.074495622643659e-05, + "loss": 4.3722, + "step": 513500 + }, + { + "epoch": 0.56, + "learning_rate": 4.073594449929583e-05, + "loss": 4.3375, + "step": 514000 + }, + { + "epoch": 0.56, + "learning_rate": 4.0726932772155064e-05, + "loss": 4.3655, + "step": 514500 + }, + { + "epoch": 0.56, + "learning_rate": 4.07179210450143e-05, + "loss": 4.3714, + "step": 515000 + }, + { + "epoch": 0.56, + "learning_rate": 4.070890931787354e-05, + "loss": 4.4154, + "step": 515500 + }, + { + "epoch": 0.56, + "learning_rate": 4.069989759073277e-05, + "loss": 4.4121, + "step": 516000 + }, + { + "epoch": 0.56, + "learning_rate": 4.069088586359201e-05, + "loss": 4.4102, + "step": 516500 + }, + { + "epoch": 0.56, + "learning_rate": 4.068187413645125e-05, + "loss": 4.3882, + "step": 517000 + }, + { + "epoch": 0.56, + "learning_rate": 4.067286240931049e-05, + "loss": 4.4476, + "step": 517500 + }, + { + "epoch": 0.56, + "learning_rate": 4.0663850682169726e-05, + "loss": 4.3978, + "step": 518000 + }, + { + "epoch": 0.56, + "learning_rate": 4.065483895502896e-05, + "loss": 4.4405, + "step": 518500 + }, + { + "epoch": 0.56, + "learning_rate": 4.06458272278882e-05, + "loss": 4.3647, + "step": 519000 + }, + { + "epoch": 0.56, + "learning_rate": 4.063681550074743e-05, + "loss": 4.3729, + "step": 519500 + }, + { + "epoch": 0.56, + "learning_rate": 4.062780377360667e-05, + "loss": 4.4138, + "step": 520000 + }, + { + "epoch": 0.56, + "learning_rate": 4.0618792046465906e-05, + "loss": 4.3248, + "step": 520500 + }, + { + "epoch": 0.56, + "learning_rate": 4.0609780319325144e-05, + "loss": 4.422, + "step": 521000 + }, + { + "epoch": 0.56, + "learning_rate": 4.060076859218439e-05, + "loss": 4.3538, + "step": 521500 + }, + { + "epoch": 0.56, + "learning_rate": 4.0591756865043625e-05, + "loss": 4.4099, + "step": 522000 + }, + { + "epoch": 0.57, + "learning_rate": 4.0582745137902856e-05, + "loss": 4.4193, + "step": 522500 + }, + { + "epoch": 0.57, + "learning_rate": 4.057373341076209e-05, + "loss": 4.3988, + "step": 523000 + }, + { + "epoch": 0.57, + "learning_rate": 4.056472168362133e-05, + "loss": 4.4022, + "step": 523500 + }, + { + "epoch": 0.57, + "learning_rate": 4.055570995648057e-05, + "loss": 4.3413, + "step": 524000 + }, + { + "epoch": 0.57, + "learning_rate": 4.0546698229339805e-05, + "loss": 4.434, + "step": 524500 + }, + { + "epoch": 0.57, + "learning_rate": 4.053768650219904e-05, + "loss": 4.3744, + "step": 525000 + }, + { + "epoch": 0.57, + "learning_rate": 4.052867477505828e-05, + "loss": 4.418, + "step": 525500 + }, + { + "epoch": 0.57, + "learning_rate": 4.051966304791752e-05, + "loss": 4.3814, + "step": 526000 + }, + { + "epoch": 0.57, + "learning_rate": 4.0510651320776755e-05, + "loss": 4.3454, + "step": 526500 + }, + { + "epoch": 0.57, + "learning_rate": 4.050163959363599e-05, + "loss": 4.3251, + "step": 527000 + }, + { + "epoch": 0.57, + "learning_rate": 4.049262786649523e-05, + "loss": 4.4182, + "step": 527500 + }, + { + "epoch": 0.57, + "learning_rate": 4.048361613935447e-05, + "loss": 4.3319, + "step": 528000 + }, + { + "epoch": 0.57, + "learning_rate": 4.0474604412213705e-05, + "loss": 4.3861, + "step": 528500 + }, + { + "epoch": 0.57, + "learning_rate": 4.046559268507294e-05, + "loss": 4.4092, + "step": 529000 + }, + { + "epoch": 0.57, + "learning_rate": 4.045658095793218e-05, + "loss": 4.397, + "step": 529500 + }, + { + "epoch": 0.57, + "learning_rate": 4.044756923079142e-05, + "loss": 4.3839, + "step": 530000 + }, + { + "epoch": 0.57, + "learning_rate": 4.0438557503650654e-05, + "loss": 4.4383, + "step": 530500 + }, + { + "epoch": 0.57, + "learning_rate": 4.042954577650989e-05, + "loss": 4.4198, + "step": 531000 + }, + { + "epoch": 0.57, + "learning_rate": 4.042053404936912e-05, + "loss": 4.3632, + "step": 531500 + }, + { + "epoch": 0.58, + "learning_rate": 4.041152232222836e-05, + "loss": 4.3722, + "step": 532000 + }, + { + "epoch": 0.58, + "learning_rate": 4.04025105950876e-05, + "loss": 4.3664, + "step": 532500 + }, + { + "epoch": 0.58, + "learning_rate": 4.039349886794684e-05, + "loss": 4.357, + "step": 533000 + }, + { + "epoch": 0.58, + "learning_rate": 4.038448714080608e-05, + "loss": 4.3484, + "step": 533500 + }, + { + "epoch": 0.58, + "learning_rate": 4.0375475413665316e-05, + "loss": 4.4506, + "step": 534000 + }, + { + "epoch": 0.58, + "learning_rate": 4.0366463686524553e-05, + "loss": 4.37, + "step": 534500 + }, + { + "epoch": 0.58, + "learning_rate": 4.0357451959383784e-05, + "loss": 4.3452, + "step": 535000 + }, + { + "epoch": 0.58, + "learning_rate": 4.034844023224302e-05, + "loss": 4.4018, + "step": 535500 + }, + { + "epoch": 0.58, + "learning_rate": 4.033942850510226e-05, + "loss": 4.4079, + "step": 536000 + }, + { + "epoch": 0.58, + "learning_rate": 4.0330416777961496e-05, + "loss": 4.3569, + "step": 536500 + }, + { + "epoch": 0.58, + "learning_rate": 4.032140505082074e-05, + "loss": 4.3495, + "step": 537000 + }, + { + "epoch": 0.58, + "learning_rate": 4.031239332367998e-05, + "loss": 4.3752, + "step": 537500 + }, + { + "epoch": 0.58, + "learning_rate": 4.030338159653921e-05, + "loss": 4.3821, + "step": 538000 + }, + { + "epoch": 0.58, + "learning_rate": 4.0294369869398446e-05, + "loss": 4.431, + "step": 538500 + }, + { + "epoch": 0.58, + "learning_rate": 4.028535814225768e-05, + "loss": 4.3057, + "step": 539000 + }, + { + "epoch": 0.58, + "learning_rate": 4.027634641511692e-05, + "loss": 4.3249, + "step": 539500 + }, + { + "epoch": 0.58, + "learning_rate": 4.026733468797616e-05, + "loss": 4.3181, + "step": 540000 + }, + { + "epoch": 0.58, + "learning_rate": 4.0258322960835395e-05, + "loss": 4.3905, + "step": 540500 + }, + { + "epoch": 0.59, + "learning_rate": 4.024931123369463e-05, + "loss": 4.3406, + "step": 541000 + }, + { + "epoch": 0.59, + "learning_rate": 4.024029950655387e-05, + "loss": 4.3545, + "step": 541500 + }, + { + "epoch": 0.59, + "learning_rate": 4.023128777941311e-05, + "loss": 4.3554, + "step": 542000 + }, + { + "epoch": 0.59, + "learning_rate": 4.0222276052272345e-05, + "loss": 4.4182, + "step": 542500 + }, + { + "epoch": 0.59, + "learning_rate": 4.021326432513158e-05, + "loss": 4.4599, + "step": 543000 + }, + { + "epoch": 0.59, + "learning_rate": 4.020425259799082e-05, + "loss": 4.326, + "step": 543500 + }, + { + "epoch": 0.59, + "learning_rate": 4.019524087085006e-05, + "loss": 4.3247, + "step": 544000 + }, + { + "epoch": 0.59, + "learning_rate": 4.0186229143709295e-05, + "loss": 4.4027, + "step": 544500 + }, + { + "epoch": 0.59, + "learning_rate": 4.017721741656853e-05, + "loss": 4.315, + "step": 545000 + }, + { + "epoch": 0.59, + "learning_rate": 4.016820568942777e-05, + "loss": 4.3967, + "step": 545500 + }, + { + "epoch": 0.59, + "learning_rate": 4.015919396228701e-05, + "loss": 4.3808, + "step": 546000 + }, + { + "epoch": 0.59, + "learning_rate": 4.0150182235146244e-05, + "loss": 4.3609, + "step": 546500 + }, + { + "epoch": 0.59, + "learning_rate": 4.014117050800548e-05, + "loss": 4.3969, + "step": 547000 + }, + { + "epoch": 0.59, + "learning_rate": 4.013215878086471e-05, + "loss": 4.3735, + "step": 547500 + }, + { + "epoch": 0.59, + "learning_rate": 4.012314705372395e-05, + "loss": 4.3567, + "step": 548000 + }, + { + "epoch": 0.59, + "learning_rate": 4.0114135326583194e-05, + "loss": 4.3614, + "step": 548500 + }, + { + "epoch": 0.59, + "learning_rate": 4.010512359944243e-05, + "loss": 4.3611, + "step": 549000 + }, + { + "epoch": 0.59, + "learning_rate": 4.009611187230167e-05, + "loss": 4.376, + "step": 549500 + }, + { + "epoch": 0.59, + "learning_rate": 4.0087100145160906e-05, + "loss": 4.3128, + "step": 550000 + }, + { + "epoch": 0.6, + "learning_rate": 4.007808841802014e-05, + "loss": 4.3885, + "step": 550500 + }, + { + "epoch": 0.6, + "learning_rate": 4.0069076690879374e-05, + "loss": 4.3767, + "step": 551000 + }, + { + "epoch": 0.6, + "learning_rate": 4.006006496373861e-05, + "loss": 4.3457, + "step": 551500 + }, + { + "epoch": 0.6, + "learning_rate": 4.005105323659785e-05, + "loss": 4.462, + "step": 552000 + }, + { + "epoch": 0.6, + "learning_rate": 4.004204150945709e-05, + "loss": 4.3849, + "step": 552500 + }, + { + "epoch": 0.6, + "learning_rate": 4.003302978231633e-05, + "loss": 4.3644, + "step": 553000 + }, + { + "epoch": 0.6, + "learning_rate": 4.002401805517556e-05, + "loss": 4.3445, + "step": 553500 + }, + { + "epoch": 0.6, + "learning_rate": 4.00150063280348e-05, + "loss": 4.3573, + "step": 554000 + }, + { + "epoch": 0.6, + "learning_rate": 4.0005994600894036e-05, + "loss": 4.3702, + "step": 554500 + }, + { + "epoch": 0.6, + "learning_rate": 3.999698287375327e-05, + "loss": 4.335, + "step": 555000 + }, + { + "epoch": 0.6, + "learning_rate": 3.998797114661251e-05, + "loss": 4.3592, + "step": 555500 + }, + { + "epoch": 0.6, + "learning_rate": 3.997895941947175e-05, + "loss": 4.3702, + "step": 556000 + }, + { + "epoch": 0.6, + "learning_rate": 3.9969947692330985e-05, + "loss": 4.3976, + "step": 556500 + }, + { + "epoch": 0.6, + "learning_rate": 3.996093596519022e-05, + "loss": 4.3542, + "step": 557000 + }, + { + "epoch": 0.6, + "learning_rate": 3.995192423804946e-05, + "loss": 4.3243, + "step": 557500 + }, + { + "epoch": 0.6, + "learning_rate": 3.99429125109087e-05, + "loss": 4.3865, + "step": 558000 + }, + { + "epoch": 0.6, + "learning_rate": 3.9933900783767935e-05, + "loss": 4.3937, + "step": 558500 + }, + { + "epoch": 0.6, + "learning_rate": 3.992488905662717e-05, + "loss": 4.4588, + "step": 559000 + }, + { + "epoch": 0.61, + "learning_rate": 3.991587732948641e-05, + "loss": 4.3763, + "step": 559500 + }, + { + "epoch": 0.61, + "learning_rate": 3.990686560234565e-05, + "loss": 4.3972, + "step": 560000 + } + ], + "max_steps": 2774163, + "num_train_epochs": 3, + "total_flos": 3.658088448e+16, + "trial_name": null, + "trial_params": null +}