{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.314394248248143, "global_step": 12200000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.7608e-06, "loss": 3.1459, "step": 500 }, { "epoch": 0.0, "learning_rate": 5.5608e-06, "loss": 2.2923, "step": 1000 }, { "epoch": 0.0, "learning_rate": 8.3608e-06, "loss": 2.1518, "step": 1500 }, { "epoch": 0.0, "learning_rate": 1.11608e-05, "loss": 2.0375, "step": 2000 }, { "epoch": 0.0, "learning_rate": 1.39552e-05, "loss": 1.9593, "step": 2500 }, { "epoch": 0.0, "learning_rate": 1.6755200000000002e-05, "loss": 1.8917, "step": 3000 }, { "epoch": 0.0, "learning_rate": 1.95552e-05, "loss": 1.8308, "step": 3500 }, { "epoch": 0.0, "learning_rate": 2.23552e-05, "loss": 1.771, "step": 4000 }, { "epoch": 0.0, "learning_rate": 2.51552e-05, "loss": 1.7222, "step": 4500 }, { "epoch": 0.0, "learning_rate": 2.79552e-05, "loss": 1.6701, "step": 5000 }, { "epoch": 0.0, "learning_rate": 3.07552e-05, "loss": 1.6394, "step": 5500 }, { "epoch": 0.0, "learning_rate": 3.35552e-05, "loss": 1.6025, "step": 6000 }, { "epoch": 0.0, "learning_rate": 3.63552e-05, "loss": 1.558, "step": 6500 }, { "epoch": 0.0, "learning_rate": 3.91552e-05, "loss": 1.5174, "step": 7000 }, { "epoch": 0.0, "learning_rate": 4.19552e-05, "loss": 1.4765, "step": 7500 }, { "epoch": 0.0, "learning_rate": 4.47552e-05, "loss": 1.4487, "step": 8000 }, { "epoch": 0.01, "learning_rate": 4.7555199999999995e-05, "loss": 1.407, "step": 8500 }, { "epoch": 0.01, "learning_rate": 5.03552e-05, "loss": 1.3904, "step": 9000 }, { "epoch": 0.01, "learning_rate": 5.31552e-05, "loss": 1.3567, "step": 9500 }, { "epoch": 0.01, "learning_rate": 5.5955199999999996e-05, "loss": 1.3242, "step": 10000 }, { "epoch": 0.01, "learning_rate": 5.59979336338884e-05, "loss": 1.2825, "step": 10500 }, { "epoch": 0.01, "learning_rate": 5.5995833668327834e-05, "loss": 1.2554, "step": 11000 }, { "epoch": 0.01, "learning_rate": 5.5993733702767274e-05, "loss": 1.2237, "step": 11500 }, { "epoch": 0.01, "learning_rate": 5.599163373720671e-05, "loss": 1.191, "step": 12000 }, { "epoch": 0.01, "learning_rate": 5.598953377164614e-05, "loss": 1.1583, "step": 12500 }, { "epoch": 0.01, "learning_rate": 5.598744220594782e-05, "loss": 1.1338, "step": 13000 }, { "epoch": 0.01, "learning_rate": 5.598534224038726e-05, "loss": 1.1141, "step": 13500 }, { "epoch": 0.01, "learning_rate": 5.5983242274826695e-05, "loss": 1.0981, "step": 14000 }, { "epoch": 0.01, "learning_rate": 5.598114230926613e-05, "loss": 1.0626, "step": 14500 }, { "epoch": 0.01, "learning_rate": 5.597904234370556e-05, "loss": 1.0491, "step": 15000 }, { "epoch": 0.01, "learning_rate": 5.5976942378144995e-05, "loss": 1.0267, "step": 15500 }, { "epoch": 0.01, "learning_rate": 5.597484241258443e-05, "loss": 1.0133, "step": 16000 }, { "epoch": 0.01, "learning_rate": 5.597274244702387e-05, "loss": 0.9943, "step": 16500 }, { "epoch": 0.01, "learning_rate": 5.59706424814633e-05, "loss": 0.9781, "step": 17000 }, { "epoch": 0.01, "learning_rate": 5.5968546715833856e-05, "loss": 0.962, "step": 17500 }, { "epoch": 0.01, "learning_rate": 5.5966450950204417e-05, "loss": 0.932, "step": 18000 }, { "epoch": 0.01, "learning_rate": 5.596435098464385e-05, "loss": 0.9308, "step": 18500 }, { "epoch": 0.01, "learning_rate": 5.596225101908329e-05, "loss": 0.9272, "step": 19000 }, { "epoch": 0.01, "learning_rate": 5.596015105352272e-05, "loss": 0.891, "step": 19500 }, { "epoch": 0.01, "learning_rate": 5.595805108796216e-05, "loss": 0.892, "step": 20000 }, { "epoch": 0.01, "learning_rate": 5.595595112240159e-05, "loss": 0.8786, "step": 20500 }, { "epoch": 0.01, "learning_rate": 5.5953851156841024e-05, "loss": 0.8616, "step": 21000 }, { "epoch": 0.01, "learning_rate": 5.5951755391211584e-05, "loss": 0.8534, "step": 21500 }, { "epoch": 0.01, "learning_rate": 5.594965542565102e-05, "loss": 0.8456, "step": 22000 }, { "epoch": 0.01, "learning_rate": 5.594755546009045e-05, "loss": 0.8451, "step": 22500 }, { "epoch": 0.01, "learning_rate": 5.5945455494529885e-05, "loss": 0.8221, "step": 23000 }, { "epoch": 0.01, "learning_rate": 5.5943355528969325e-05, "loss": 0.819, "step": 23500 }, { "epoch": 0.01, "learning_rate": 5.594125556340876e-05, "loss": 0.8183, "step": 24000 }, { "epoch": 0.01, "learning_rate": 5.593915559784819e-05, "loss": 0.7892, "step": 24500 }, { "epoch": 0.01, "learning_rate": 5.593705563228763e-05, "loss": 0.7881, "step": 25000 }, { "epoch": 0.02, "learning_rate": 5.5934955666727066e-05, "loss": 0.7828, "step": 25500 }, { "epoch": 0.02, "learning_rate": 5.59328557011665e-05, "loss": 0.7862, "step": 26000 }, { "epoch": 0.02, "learning_rate": 5.593075573560594e-05, "loss": 0.7678, "step": 26500 }, { "epoch": 0.02, "learning_rate": 5.592865577004537e-05, "loss": 0.7619, "step": 27000 }, { "epoch": 0.02, "learning_rate": 5.5926560004415926e-05, "loss": 0.7673, "step": 27500 }, { "epoch": 0.02, "learning_rate": 5.592446003885537e-05, "loss": 0.7457, "step": 28000 }, { "epoch": 0.02, "learning_rate": 5.59223600732948e-05, "loss": 0.7518, "step": 28500 }, { "epoch": 0.02, "learning_rate": 5.5920260107734234e-05, "loss": 0.7346, "step": 29000 }, { "epoch": 0.02, "learning_rate": 5.5918160142173674e-05, "loss": 0.7264, "step": 29500 }, { "epoch": 0.02, "learning_rate": 5.59160601766131e-05, "loss": 0.7292, "step": 30000 }, { "epoch": 0.02, "learning_rate": 5.5913960211052534e-05, "loss": 0.7319, "step": 30500 }, { "epoch": 0.02, "learning_rate": 5.5911860245491974e-05, "loss": 0.7061, "step": 31000 }, { "epoch": 0.02, "learning_rate": 5.590976027993141e-05, "loss": 0.7116, "step": 31500 }, { "epoch": 0.02, "learning_rate": 5.590766451430196e-05, "loss": 0.699, "step": 32000 }, { "epoch": 0.02, "learning_rate": 5.5905564548741395e-05, "loss": 0.71, "step": 32500 }, { "epoch": 0.02, "learning_rate": 5.5903472983043075e-05, "loss": 0.6935, "step": 33000 }, { "epoch": 0.02, "learning_rate": 5.590137301748251e-05, "loss": 0.6926, "step": 33500 }, { "epoch": 0.02, "learning_rate": 5.589927305192195e-05, "loss": 0.6876, "step": 34000 }, { "epoch": 0.02, "learning_rate": 5.589717308636138e-05, "loss": 0.6931, "step": 34500 }, { "epoch": 0.02, "learning_rate": 5.589507312080082e-05, "loss": 0.6736, "step": 35000 }, { "epoch": 0.02, "learning_rate": 5.5892973155240256e-05, "loss": 0.6877, "step": 35500 }, { "epoch": 0.02, "learning_rate": 5.589087318967969e-05, "loss": 0.6748, "step": 36000 }, { "epoch": 0.02, "learning_rate": 5.588877322411913e-05, "loss": 0.6819, "step": 36500 }, { "epoch": 0.02, "learning_rate": 5.5886673258558556e-05, "loss": 0.6536, "step": 37000 }, { "epoch": 0.02, "learning_rate": 5.588457329299799e-05, "loss": 0.6594, "step": 37500 }, { "epoch": 0.02, "learning_rate": 5.588247332743743e-05, "loss": 0.6567, "step": 38000 }, { "epoch": 0.02, "learning_rate": 5.5880373361876863e-05, "loss": 0.6501, "step": 38500 }, { "epoch": 0.02, "learning_rate": 5.58782733963163e-05, "loss": 0.6611, "step": 39000 }, { "epoch": 0.02, "learning_rate": 5.587617343075574e-05, "loss": 0.6472, "step": 39500 }, { "epoch": 0.02, "learning_rate": 5.587407346519517e-05, "loss": 0.6428, "step": 40000 }, { "epoch": 0.02, "learning_rate": 5.5871973499634604e-05, "loss": 0.6432, "step": 40500 }, { "epoch": 0.02, "learning_rate": 5.5869873534074044e-05, "loss": 0.6463, "step": 41000 }, { "epoch": 0.02, "learning_rate": 5.58677777684446e-05, "loss": 0.6352, "step": 41500 }, { "epoch": 0.03, "learning_rate": 5.586567780288403e-05, "loss": 0.6305, "step": 42000 }, { "epoch": 0.03, "learning_rate": 5.5863577837323465e-05, "loss": 0.6295, "step": 42500 }, { "epoch": 0.03, "learning_rate": 5.5861477871762905e-05, "loss": 0.6243, "step": 43000 }, { "epoch": 0.03, "learning_rate": 5.585937790620234e-05, "loss": 0.6334, "step": 43500 }, { "epoch": 0.03, "learning_rate": 5.585727794064177e-05, "loss": 0.6281, "step": 44000 }, { "epoch": 0.03, "learning_rate": 5.585517797508121e-05, "loss": 0.6229, "step": 44500 }, { "epoch": 0.03, "learning_rate": 5.585307800952064e-05, "loss": 0.6205, "step": 45000 }, { "epoch": 0.03, "learning_rate": 5.585097804396008e-05, "loss": 0.608, "step": 45500 }, { "epoch": 0.03, "learning_rate": 5.584888227833064e-05, "loss": 0.6021, "step": 46000 }, { "epoch": 0.03, "learning_rate": 5.584678231277007e-05, "loss": 0.6154, "step": 46500 }, { "epoch": 0.03, "learning_rate": 5.5844682347209506e-05, "loss": 0.6113, "step": 47000 }, { "epoch": 0.03, "learning_rate": 5.584259078151118e-05, "loss": 0.5978, "step": 47500 }, { "epoch": 0.03, "learning_rate": 5.5840490815950614e-05, "loss": 0.6058, "step": 48000 }, { "epoch": 0.03, "learning_rate": 5.5838390850390054e-05, "loss": 0.6041, "step": 48500 }, { "epoch": 0.03, "learning_rate": 5.583629088482949e-05, "loss": 0.587, "step": 49000 }, { "epoch": 0.03, "learning_rate": 5.583419931913117e-05, "loss": 0.5985, "step": 49500 }, { "epoch": 0.03, "learning_rate": 5.58320993535706e-05, "loss": 0.5861, "step": 50000 }, { "epoch": 0.03, "learning_rate": 5.582999938801004e-05, "loss": 0.5957, "step": 50500 }, { "epoch": 0.03, "learning_rate": 5.5827899422449475e-05, "loss": 0.5793, "step": 51000 }, { "epoch": 0.03, "learning_rate": 5.58257994568889e-05, "loss": 0.5888, "step": 51500 }, { "epoch": 0.03, "learning_rate": 5.582369949132834e-05, "loss": 0.5848, "step": 52000 }, { "epoch": 0.03, "learning_rate": 5.5821599525767775e-05, "loss": 0.5887, "step": 52500 }, { "epoch": 0.03, "learning_rate": 5.581949956020721e-05, "loss": 0.5913, "step": 53000 }, { "epoch": 0.03, "learning_rate": 5.581739959464665e-05, "loss": 0.5806, "step": 53500 }, { "epoch": 0.03, "learning_rate": 5.581529962908608e-05, "loss": 0.5742, "step": 54000 }, { "epoch": 0.03, "learning_rate": 5.5813203863456636e-05, "loss": 0.5717, "step": 54500 }, { "epoch": 0.03, "learning_rate": 5.581110389789607e-05, "loss": 0.5705, "step": 55000 }, { "epoch": 0.03, "learning_rate": 5.580900393233551e-05, "loss": 0.5651, "step": 55500 }, { "epoch": 0.03, "learning_rate": 5.580690396677494e-05, "loss": 0.566, "step": 56000 }, { "epoch": 0.03, "learning_rate": 5.5804804001214377e-05, "loss": 0.5639, "step": 56500 }, { "epoch": 0.03, "learning_rate": 5.580270403565382e-05, "loss": 0.5625, "step": 57000 }, { "epoch": 0.03, "learning_rate": 5.580060827002437e-05, "loss": 0.5732, "step": 57500 }, { "epoch": 0.03, "learning_rate": 5.5798508304463804e-05, "loss": 0.5654, "step": 58000 }, { "epoch": 0.04, "learning_rate": 5.5796408338903244e-05, "loss": 0.5634, "step": 58500 }, { "epoch": 0.04, "learning_rate": 5.579430837334268e-05, "loss": 0.5601, "step": 59000 }, { "epoch": 0.04, "learning_rate": 5.579221260771323e-05, "loss": 0.5543, "step": 59500 }, { "epoch": 0.04, "learning_rate": 5.5790112642152665e-05, "loss": 0.5553, "step": 60000 }, { "epoch": 0.04, "learning_rate": 5.5788012676592105e-05, "loss": 0.5601, "step": 60500 }, { "epoch": 0.04, "learning_rate": 5.578591271103154e-05, "loss": 0.5546, "step": 61000 }, { "epoch": 0.04, "learning_rate": 5.578381694540209e-05, "loss": 0.5519, "step": 61500 }, { "epoch": 0.04, "learning_rate": 5.5781716979841525e-05, "loss": 0.5419, "step": 62000 }, { "epoch": 0.04, "learning_rate": 5.5779617014280966e-05, "loss": 0.5539, "step": 62500 }, { "epoch": 0.04, "learning_rate": 5.57775170487204e-05, "loss": 0.5537, "step": 63000 }, { "epoch": 0.04, "learning_rate": 5.577541708315983e-05, "loss": 0.5453, "step": 63500 }, { "epoch": 0.04, "learning_rate": 5.577331711759927e-05, "loss": 0.5351, "step": 64000 }, { "epoch": 0.04, "learning_rate": 5.5771217152038706e-05, "loss": 0.5424, "step": 64500 }, { "epoch": 0.04, "learning_rate": 5.576911718647814e-05, "loss": 0.5449, "step": 65000 }, { "epoch": 0.04, "learning_rate": 5.576701722091758e-05, "loss": 0.5305, "step": 65500 }, { "epoch": 0.04, "learning_rate": 5.576491725535701e-05, "loss": 0.5493, "step": 66000 }, { "epoch": 0.04, "learning_rate": 5.576281728979645e-05, "loss": 0.5389, "step": 66500 }, { "epoch": 0.04, "learning_rate": 5.576071732423588e-05, "loss": 0.5438, "step": 67000 }, { "epoch": 0.04, "learning_rate": 5.5758617358675314e-05, "loss": 0.5312, "step": 67500 }, { "epoch": 0.04, "learning_rate": 5.5756517393114754e-05, "loss": 0.5406, "step": 68000 }, { "epoch": 0.04, "learning_rate": 5.575441742755419e-05, "loss": 0.5324, "step": 68500 }, { "epoch": 0.04, "learning_rate": 5.575231746199362e-05, "loss": 0.525, "step": 69000 }, { "epoch": 0.04, "learning_rate": 5.575021749643306e-05, "loss": 0.5379, "step": 69500 }, { "epoch": 0.04, "learning_rate": 5.5748117530872495e-05, "loss": 0.525, "step": 70000 }, { "epoch": 0.04, "learning_rate": 5.574602176524305e-05, "loss": 0.5259, "step": 70500 }, { "epoch": 0.04, "learning_rate": 5.574392179968248e-05, "loss": 0.5382, "step": 71000 }, { "epoch": 0.04, "learning_rate": 5.574182183412192e-05, "loss": 0.5267, "step": 71500 }, { "epoch": 0.04, "learning_rate": 5.5739721868561355e-05, "loss": 0.5234, "step": 72000 }, { "epoch": 0.04, "learning_rate": 5.573762190300079e-05, "loss": 0.5185, "step": 72500 }, { "epoch": 0.04, "learning_rate": 5.573552193744023e-05, "loss": 0.5199, "step": 73000 }, { "epoch": 0.04, "learning_rate": 5.573342617181078e-05, "loss": 0.516, "step": 73500 }, { "epoch": 0.04, "learning_rate": 5.5731326206250216e-05, "loss": 0.5283, "step": 74000 }, { "epoch": 0.04, "learning_rate": 5.5729226240689656e-05, "loss": 0.5129, "step": 74500 }, { "epoch": 0.04, "learning_rate": 5.572712627512909e-05, "loss": 0.5237, "step": 75000 }, { "epoch": 0.05, "learning_rate": 5.572502630956852e-05, "loss": 0.5169, "step": 75500 }, { "epoch": 0.05, "learning_rate": 5.572292634400796e-05, "loss": 0.5173, "step": 76000 }, { "epoch": 0.05, "learning_rate": 5.572083057837852e-05, "loss": 0.5154, "step": 76500 }, { "epoch": 0.05, "learning_rate": 5.571873061281795e-05, "loss": 0.5201, "step": 77000 }, { "epoch": 0.05, "learning_rate": 5.5716630647257384e-05, "loss": 0.5189, "step": 77500 }, { "epoch": 0.05, "learning_rate": 5.5714530681696824e-05, "loss": 0.5188, "step": 78000 }, { "epoch": 0.05, "learning_rate": 5.571243071613626e-05, "loss": 0.5234, "step": 78500 }, { "epoch": 0.05, "learning_rate": 5.5710330750575684e-05, "loss": 0.5079, "step": 79000 }, { "epoch": 0.05, "learning_rate": 5.5708230785015124e-05, "loss": 0.5155, "step": 79500 }, { "epoch": 0.05, "learning_rate": 5.570613081945456e-05, "loss": 0.51, "step": 80000 }, { "epoch": 0.05, "learning_rate": 5.570403505382512e-05, "loss": 0.5124, "step": 80500 }, { "epoch": 0.05, "learning_rate": 5.570193508826455e-05, "loss": 0.5136, "step": 81000 }, { "epoch": 0.05, "learning_rate": 5.5699835122703985e-05, "loss": 0.5012, "step": 81500 }, { "epoch": 0.05, "learning_rate": 5.569773515714342e-05, "loss": 0.5043, "step": 82000 }, { "epoch": 0.05, "learning_rate": 5.569563939151398e-05, "loss": 0.5009, "step": 82500 }, { "epoch": 0.05, "learning_rate": 5.569353942595342e-05, "loss": 0.5163, "step": 83000 }, { "epoch": 0.05, "learning_rate": 5.569143946039285e-05, "loss": 0.4996, "step": 83500 }, { "epoch": 0.05, "learning_rate": 5.568933949483228e-05, "loss": 0.4944, "step": 84000 }, { "epoch": 0.05, "learning_rate": 5.568723952927172e-05, "loss": 0.502, "step": 84500 }, { "epoch": 0.05, "learning_rate": 5.568514376364228e-05, "loss": 0.5044, "step": 85000 }, { "epoch": 0.05, "learning_rate": 5.5683047998012834e-05, "loss": 0.4984, "step": 85500 }, { "epoch": 0.05, "learning_rate": 5.568094803245227e-05, "loss": 0.4897, "step": 86000 }, { "epoch": 0.05, "learning_rate": 5.56788480668917e-05, "loss": 0.4983, "step": 86500 }, { "epoch": 0.05, "learning_rate": 5.567674810133114e-05, "loss": 0.4933, "step": 87000 }, { "epoch": 0.05, "learning_rate": 5.5674648135770574e-05, "loss": 0.4931, "step": 87500 }, { "epoch": 0.05, "learning_rate": 5.567255237014113e-05, "loss": 0.4891, "step": 88000 }, { "epoch": 0.05, "learning_rate": 5.567045660451168e-05, "loss": 0.4873, "step": 88500 }, { "epoch": 0.05, "learning_rate": 5.566835663895112e-05, "loss": 0.5022, "step": 89000 }, { "epoch": 0.05, "learning_rate": 5.5666256673390555e-05, "loss": 0.4927, "step": 89500 }, { "epoch": 0.05, "learning_rate": 5.566415670782999e-05, "loss": 0.4897, "step": 90000 }, { "epoch": 0.05, "learning_rate": 5.566205674226943e-05, "loss": 0.4911, "step": 90500 }, { "epoch": 0.05, "learning_rate": 5.565995677670886e-05, "loss": 0.4928, "step": 91000 }, { "epoch": 0.05, "learning_rate": 5.5657856811148296e-05, "loss": 0.4867, "step": 91500 }, { "epoch": 0.06, "learning_rate": 5.5655756845587736e-05, "loss": 0.4917, "step": 92000 }, { "epoch": 0.06, "learning_rate": 5.565365688002717e-05, "loss": 0.4985, "step": 92500 }, { "epoch": 0.06, "learning_rate": 5.56515569144666e-05, "loss": 0.4989, "step": 93000 }, { "epoch": 0.06, "learning_rate": 5.5649456948906036e-05, "loss": 0.4795, "step": 93500 }, { "epoch": 0.06, "learning_rate": 5.564735698334547e-05, "loss": 0.4883, "step": 94000 }, { "epoch": 0.06, "learning_rate": 5.56452570177849e-05, "loss": 0.4935, "step": 94500 }, { "epoch": 0.06, "learning_rate": 5.5643157052224343e-05, "loss": 0.4823, "step": 95000 }, { "epoch": 0.06, "learning_rate": 5.564105708666378e-05, "loss": 0.4875, "step": 95500 }, { "epoch": 0.06, "learning_rate": 5.563895712110321e-05, "loss": 0.4897, "step": 96000 }, { "epoch": 0.06, "learning_rate": 5.563685715554265e-05, "loss": 0.4853, "step": 96500 }, { "epoch": 0.06, "learning_rate": 5.5634757189982084e-05, "loss": 0.4821, "step": 97000 }, { "epoch": 0.06, "learning_rate": 5.563265722442152e-05, "loss": 0.4917, "step": 97500 }, { "epoch": 0.06, "learning_rate": 5.563055725886096e-05, "loss": 0.4819, "step": 98000 }, { "epoch": 0.06, "learning_rate": 5.562846149323151e-05, "loss": 0.4833, "step": 98500 }, { "epoch": 0.06, "learning_rate": 5.5626361527670945e-05, "loss": 0.4791, "step": 99000 }, { "epoch": 0.06, "learning_rate": 5.5624261562110385e-05, "loss": 0.4753, "step": 99500 }, { "epoch": 0.06, "learning_rate": 5.562216579648094e-05, "loss": 0.4758, "step": 100000 }, { "epoch": 0.06, "eval_loss": 0.4188617467880249, "eval_runtime": 1463.8783, "eval_samples_per_second": 359.811, "eval_steps_per_second": 59.969, "step": 100000 }, { "epoch": 0.06, "learning_rate": 5.562006583092037e-05, "loss": 0.4766, "step": 100500 }, { "epoch": 0.06, "learning_rate": 5.5617965865359805e-05, "loss": 0.4784, "step": 101000 }, { "epoch": 0.06, "learning_rate": 5.5615865899799246e-05, "loss": 0.4848, "step": 101500 }, { "epoch": 0.06, "learning_rate": 5.561376593423868e-05, "loss": 0.4756, "step": 102000 }, { "epoch": 0.06, "learning_rate": 5.561167016860923e-05, "loss": 0.4712, "step": 102500 }, { "epoch": 0.06, "learning_rate": 5.5609570203048666e-05, "loss": 0.4643, "step": 103000 }, { "epoch": 0.06, "learning_rate": 5.5607470237488106e-05, "loss": 0.4696, "step": 103500 }, { "epoch": 0.06, "learning_rate": 5.560537027192754e-05, "loss": 0.4663, "step": 104000 }, { "epoch": 0.06, "learning_rate": 5.560327030636697e-05, "loss": 0.4673, "step": 104500 }, { "epoch": 0.06, "learning_rate": 5.5601174540737534e-05, "loss": 0.4698, "step": 105000 }, { "epoch": 0.06, "learning_rate": 5.559907457517697e-05, "loss": 0.4769, "step": 105500 }, { "epoch": 0.06, "learning_rate": 5.559697880954752e-05, "loss": 0.4661, "step": 106000 }, { "epoch": 0.06, "learning_rate": 5.5594878843986954e-05, "loss": 0.4669, "step": 106500 }, { "epoch": 0.06, "learning_rate": 5.5592778878426394e-05, "loss": 0.4755, "step": 107000 }, { "epoch": 0.06, "learning_rate": 5.559067891286583e-05, "loss": 0.4788, "step": 107500 }, { "epoch": 0.06, "learning_rate": 5.558858314723638e-05, "loss": 0.4656, "step": 108000 }, { "epoch": 0.07, "learning_rate": 5.5586483181675815e-05, "loss": 0.4668, "step": 108500 }, { "epoch": 0.07, "learning_rate": 5.5584383216115255e-05, "loss": 0.4627, "step": 109000 }, { "epoch": 0.07, "learning_rate": 5.558228325055469e-05, "loss": 0.4587, "step": 109500 }, { "epoch": 0.07, "learning_rate": 5.558018328499412e-05, "loss": 0.4613, "step": 110000 }, { "epoch": 0.07, "learning_rate": 5.557808331943356e-05, "loss": 0.4696, "step": 110500 }, { "epoch": 0.07, "learning_rate": 5.5575983353872996e-05, "loss": 0.46, "step": 111000 }, { "epoch": 0.07, "learning_rate": 5.557388338831243e-05, "loss": 0.4726, "step": 111500 }, { "epoch": 0.07, "learning_rate": 5.557179182261411e-05, "loss": 0.4659, "step": 112000 }, { "epoch": 0.07, "learning_rate": 5.556969185705354e-05, "loss": 0.4703, "step": 112500 }, { "epoch": 0.07, "learning_rate": 5.5567591891492977e-05, "loss": 0.4658, "step": 113000 }, { "epoch": 0.07, "learning_rate": 5.556549192593241e-05, "loss": 0.4621, "step": 113500 }, { "epoch": 0.07, "learning_rate": 5.556339196037185e-05, "loss": 0.4674, "step": 114000 }, { "epoch": 0.07, "learning_rate": 5.5561291994811284e-05, "loss": 0.458, "step": 114500 }, { "epoch": 0.07, "learning_rate": 5.555919202925072e-05, "loss": 0.4681, "step": 115000 }, { "epoch": 0.07, "learning_rate": 5.555709206369016e-05, "loss": 0.4529, "step": 115500 }, { "epoch": 0.07, "learning_rate": 5.555499209812959e-05, "loss": 0.4601, "step": 116000 }, { "epoch": 0.07, "learning_rate": 5.5552896332500144e-05, "loss": 0.4605, "step": 116500 }, { "epoch": 0.07, "learning_rate": 5.555079636693958e-05, "loss": 0.4651, "step": 117000 }, { "epoch": 0.07, "learning_rate": 5.554869640137902e-05, "loss": 0.4543, "step": 117500 }, { "epoch": 0.07, "learning_rate": 5.55466048356807e-05, "loss": 0.4616, "step": 118000 }, { "epoch": 0.07, "learning_rate": 5.554450487012013e-05, "loss": 0.462, "step": 118500 }, { "epoch": 0.07, "learning_rate": 5.5542404904559566e-05, "loss": 0.4563, "step": 119000 }, { "epoch": 0.07, "learning_rate": 5.5540304938999e-05, "loss": 0.4565, "step": 119500 }, { "epoch": 0.07, "learning_rate": 5.553820497343843e-05, "loss": 0.4569, "step": 120000 }, { "epoch": 0.07, "learning_rate": 5.5536105007877866e-05, "loss": 0.4499, "step": 120500 }, { "epoch": 0.07, "learning_rate": 5.5534005042317306e-05, "loss": 0.4561, "step": 121000 }, { "epoch": 0.07, "learning_rate": 5.553190507675674e-05, "loss": 0.4622, "step": 121500 }, { "epoch": 0.07, "learning_rate": 5.552980511119617e-05, "loss": 0.4593, "step": 122000 }, { "epoch": 0.07, "learning_rate": 5.552770514563561e-05, "loss": 0.4452, "step": 122500 }, { "epoch": 0.07, "learning_rate": 5.552560518007505e-05, "loss": 0.4466, "step": 123000 }, { "epoch": 0.07, "learning_rate": 5.552350521451448e-05, "loss": 0.4473, "step": 123500 }, { "epoch": 0.07, "learning_rate": 5.552140524895392e-05, "loss": 0.4585, "step": 124000 }, { "epoch": 0.07, "learning_rate": 5.5519305283393354e-05, "loss": 0.4428, "step": 124500 }, { "epoch": 0.07, "learning_rate": 5.551720531783278e-05, "loss": 0.457, "step": 125000 }, { "epoch": 0.08, "learning_rate": 5.551510535227222e-05, "loss": 0.4548, "step": 125500 }, { "epoch": 0.08, "learning_rate": 5.551300958664278e-05, "loss": 0.4563, "step": 126000 }, { "epoch": 0.08, "learning_rate": 5.5510909621082215e-05, "loss": 0.4404, "step": 126500 }, { "epoch": 0.08, "learning_rate": 5.5508809655521655e-05, "loss": 0.4466, "step": 127000 }, { "epoch": 0.08, "learning_rate": 5.550670968996108e-05, "loss": 0.4426, "step": 127500 }, { "epoch": 0.08, "learning_rate": 5.5504609724400515e-05, "loss": 0.4459, "step": 128000 }, { "epoch": 0.08, "learning_rate": 5.5502513958771075e-05, "loss": 0.4522, "step": 128500 }, { "epoch": 0.08, "learning_rate": 5.5500413993210516e-05, "loss": 0.4473, "step": 129000 }, { "epoch": 0.08, "learning_rate": 5.549831402764995e-05, "loss": 0.4513, "step": 129500 }, { "epoch": 0.08, "learning_rate": 5.5496214062089376e-05, "loss": 0.4486, "step": 130000 }, { "epoch": 0.08, "learning_rate": 5.5494114096528816e-05, "loss": 0.4447, "step": 130500 }, { "epoch": 0.08, "learning_rate": 5.5492018330899376e-05, "loss": 0.4521, "step": 131000 }, { "epoch": 0.08, "learning_rate": 5.548991836533881e-05, "loss": 0.4408, "step": 131500 }, { "epoch": 0.08, "learning_rate": 5.5487818399778237e-05, "loss": 0.4317, "step": 132000 }, { "epoch": 0.08, "learning_rate": 5.548571843421768e-05, "loss": 0.4462, "step": 132500 }, { "epoch": 0.08, "learning_rate": 5.548361846865711e-05, "loss": 0.4425, "step": 133000 }, { "epoch": 0.08, "learning_rate": 5.5481518503096544e-05, "loss": 0.4411, "step": 133500 }, { "epoch": 0.08, "learning_rate": 5.5479418537535984e-05, "loss": 0.4425, "step": 134000 }, { "epoch": 0.08, "learning_rate": 5.547731857197542e-05, "loss": 0.4325, "step": 134500 }, { "epoch": 0.08, "learning_rate": 5.547522280634597e-05, "loss": 0.4416, "step": 135000 }, { "epoch": 0.08, "learning_rate": 5.547312704071653e-05, "loss": 0.4449, "step": 135500 }, { "epoch": 0.08, "learning_rate": 5.5471031275087085e-05, "loss": 0.4314, "step": 136000 }, { "epoch": 0.08, "learning_rate": 5.5468931309526525e-05, "loss": 0.4423, "step": 136500 }, { "epoch": 0.08, "learning_rate": 5.546683134396596e-05, "loss": 0.4397, "step": 137000 }, { "epoch": 0.08, "learning_rate": 5.546473137840539e-05, "loss": 0.451, "step": 137500 }, { "epoch": 0.08, "learning_rate": 5.546263141284483e-05, "loss": 0.4381, "step": 138000 }, { "epoch": 0.08, "learning_rate": 5.5460531447284266e-05, "loss": 0.4419, "step": 138500 }, { "epoch": 0.08, "learning_rate": 5.54584314817237e-05, "loss": 0.4423, "step": 139000 }, { "epoch": 0.08, "learning_rate": 5.545633151616313e-05, "loss": 0.4388, "step": 139500 }, { "epoch": 0.08, "learning_rate": 5.5454231550602566e-05, "loss": 0.439, "step": 140000 }, { "epoch": 0.08, "learning_rate": 5.5452135784973126e-05, "loss": 0.4423, "step": 140500 }, { "epoch": 0.08, "learning_rate": 5.545004001934368e-05, "loss": 0.4337, "step": 141000 }, { "epoch": 0.08, "learning_rate": 5.544794005378312e-05, "loss": 0.433, "step": 141500 }, { "epoch": 0.09, "learning_rate": 5.5445840088222554e-05, "loss": 0.4321, "step": 142000 }, { "epoch": 0.09, "learning_rate": 5.544374012266199e-05, "loss": 0.4354, "step": 142500 }, { "epoch": 0.09, "learning_rate": 5.544164015710143e-05, "loss": 0.4414, "step": 143000 }, { "epoch": 0.09, "learning_rate": 5.543954019154086e-05, "loss": 0.4383, "step": 143500 }, { "epoch": 0.09, "learning_rate": 5.543744022598029e-05, "loss": 0.4277, "step": 144000 }, { "epoch": 0.09, "learning_rate": 5.543534026041973e-05, "loss": 0.4329, "step": 144500 }, { "epoch": 0.09, "learning_rate": 5.543324449479029e-05, "loss": 0.4383, "step": 145000 }, { "epoch": 0.09, "learning_rate": 5.543114452922972e-05, "loss": 0.4345, "step": 145500 }, { "epoch": 0.09, "learning_rate": 5.5429044563669155e-05, "loss": 0.4318, "step": 146000 }, { "epoch": 0.09, "learning_rate": 5.542694459810859e-05, "loss": 0.4278, "step": 146500 }, { "epoch": 0.09, "learning_rate": 5.542484883247915e-05, "loss": 0.4366, "step": 147000 }, { "epoch": 0.09, "learning_rate": 5.542274886691858e-05, "loss": 0.4337, "step": 147500 }, { "epoch": 0.09, "learning_rate": 5.542064890135802e-05, "loss": 0.4334, "step": 148000 }, { "epoch": 0.09, "learning_rate": 5.5418548935797456e-05, "loss": 0.4361, "step": 148500 }, { "epoch": 0.09, "learning_rate": 5.541644897023688e-05, "loss": 0.4277, "step": 149000 }, { "epoch": 0.09, "learning_rate": 5.541434900467632e-05, "loss": 0.4323, "step": 149500 }, { "epoch": 0.09, "learning_rate": 5.5412249039115756e-05, "loss": 0.4315, "step": 150000 }, { "epoch": 0.09, "learning_rate": 5.541014907355519e-05, "loss": 0.4296, "step": 150500 }, { "epoch": 0.09, "learning_rate": 5.540805330792575e-05, "loss": 0.4193, "step": 151000 }, { "epoch": 0.09, "learning_rate": 5.5405953342365184e-05, "loss": 0.4244, "step": 151500 }, { "epoch": 0.09, "learning_rate": 5.540385337680462e-05, "loss": 0.4283, "step": 152000 }, { "epoch": 0.09, "learning_rate": 5.540175341124405e-05, "loss": 0.4335, "step": 152500 }, { "epoch": 0.09, "learning_rate": 5.539965344568349e-05, "loss": 0.43, "step": 153000 }, { "epoch": 0.09, "learning_rate": 5.5397557680054044e-05, "loss": 0.4356, "step": 153500 }, { "epoch": 0.09, "learning_rate": 5.539545771449348e-05, "loss": 0.4342, "step": 154000 }, { "epoch": 0.09, "learning_rate": 5.539335774893292e-05, "loss": 0.4295, "step": 154500 }, { "epoch": 0.09, "learning_rate": 5.539125778337235e-05, "loss": 0.4247, "step": 155000 }, { "epoch": 0.09, "learning_rate": 5.538916201774291e-05, "loss": 0.4325, "step": 155500 }, { "epoch": 0.09, "learning_rate": 5.538706205218234e-05, "loss": 0.4211, "step": 156000 }, { "epoch": 0.09, "learning_rate": 5.538496208662178e-05, "loss": 0.4278, "step": 156500 }, { "epoch": 0.09, "learning_rate": 5.538286632099234e-05, "loss": 0.4199, "step": 157000 }, { "epoch": 0.09, "learning_rate": 5.538076635543177e-05, "loss": 0.426, "step": 157500 }, { "epoch": 0.09, "learning_rate": 5.5378666389871206e-05, "loss": 0.4256, "step": 158000 }, { "epoch": 0.1, "learning_rate": 5.537656642431064e-05, "loss": 0.4297, "step": 158500 }, { "epoch": 0.1, "learning_rate": 5.537446645875007e-05, "loss": 0.422, "step": 159000 }, { "epoch": 0.1, "learning_rate": 5.5372366493189506e-05, "loss": 0.427, "step": 159500 }, { "epoch": 0.1, "learning_rate": 5.537026652762895e-05, "loss": 0.4286, "step": 160000 }, { "epoch": 0.1, "learning_rate": 5.536816656206838e-05, "loss": 0.4255, "step": 160500 }, { "epoch": 0.1, "learning_rate": 5.5366066596507814e-05, "loss": 0.4217, "step": 161000 }, { "epoch": 0.1, "learning_rate": 5.5363966630947254e-05, "loss": 0.4265, "step": 161500 }, { "epoch": 0.1, "learning_rate": 5.536186666538669e-05, "loss": 0.4225, "step": 162000 }, { "epoch": 0.1, "learning_rate": 5.535976669982612e-05, "loss": 0.4129, "step": 162500 }, { "epoch": 0.1, "learning_rate": 5.5357675134127794e-05, "loss": 0.4189, "step": 163000 }, { "epoch": 0.1, "learning_rate": 5.5355579368498355e-05, "loss": 0.4356, "step": 163500 }, { "epoch": 0.1, "learning_rate": 5.535348360286891e-05, "loss": 0.4138, "step": 164000 }, { "epoch": 0.1, "learning_rate": 5.535138363730835e-05, "loss": 0.4174, "step": 164500 }, { "epoch": 0.1, "learning_rate": 5.534928367174778e-05, "loss": 0.4203, "step": 165000 }, { "epoch": 0.1, "learning_rate": 5.5347183706187216e-05, "loss": 0.416, "step": 165500 }, { "epoch": 0.1, "learning_rate": 5.5345083740626656e-05, "loss": 0.418, "step": 166000 }, { "epoch": 0.1, "learning_rate": 5.534298377506609e-05, "loss": 0.4276, "step": 166500 }, { "epoch": 0.1, "learning_rate": 5.534088800943664e-05, "loss": 0.426, "step": 167000 }, { "epoch": 0.1, "learning_rate": 5.533878804387608e-05, "loss": 0.421, "step": 167500 }, { "epoch": 0.1, "learning_rate": 5.5336688078315516e-05, "loss": 0.4202, "step": 168000 }, { "epoch": 0.1, "learning_rate": 5.533458811275495e-05, "loss": 0.4176, "step": 168500 }, { "epoch": 0.1, "learning_rate": 5.533248814719439e-05, "loss": 0.418, "step": 169000 }, { "epoch": 0.1, "learning_rate": 5.5330388181633824e-05, "loss": 0.4193, "step": 169500 }, { "epoch": 0.1, "learning_rate": 5.532828821607326e-05, "loss": 0.4184, "step": 170000 }, { "epoch": 0.1, "learning_rate": 5.532618825051269e-05, "loss": 0.4254, "step": 170500 }, { "epoch": 0.1, "learning_rate": 5.5324088284952124e-05, "loss": 0.4183, "step": 171000 }, { "epoch": 0.1, "learning_rate": 5.532198831939156e-05, "loss": 0.4116, "step": 171500 }, { "epoch": 0.1, "learning_rate": 5.5319888353831e-05, "loss": 0.4253, "step": 172000 }, { "epoch": 0.1, "learning_rate": 5.531778838827043e-05, "loss": 0.4157, "step": 172500 }, { "epoch": 0.1, "learning_rate": 5.5315688422709865e-05, "loss": 0.4147, "step": 173000 }, { "epoch": 0.1, "learning_rate": 5.5313588457149305e-05, "loss": 0.418, "step": 173500 }, { "epoch": 0.1, "learning_rate": 5.531149269151986e-05, "loss": 0.4159, "step": 174000 }, { "epoch": 0.1, "learning_rate": 5.530939692589042e-05, "loss": 0.4249, "step": 174500 }, { "epoch": 0.1, "learning_rate": 5.5307296960329845e-05, "loss": 0.4197, "step": 175000 }, { "epoch": 0.11, "learning_rate": 5.5305201194700406e-05, "loss": 0.4136, "step": 175500 }, { "epoch": 0.11, "learning_rate": 5.5303101229139846e-05, "loss": 0.4135, "step": 176000 }, { "epoch": 0.11, "learning_rate": 5.530100126357928e-05, "loss": 0.4221, "step": 176500 }, { "epoch": 0.11, "learning_rate": 5.529890129801871e-05, "loss": 0.4109, "step": 177000 }, { "epoch": 0.11, "learning_rate": 5.5296801332458146e-05, "loss": 0.4158, "step": 177500 }, { "epoch": 0.11, "learning_rate": 5.529470136689758e-05, "loss": 0.4116, "step": 178000 }, { "epoch": 0.11, "learning_rate": 5.529260140133701e-05, "loss": 0.4113, "step": 178500 }, { "epoch": 0.11, "learning_rate": 5.5290505635707574e-05, "loss": 0.4084, "step": 179000 }, { "epoch": 0.11, "learning_rate": 5.5288405670147014e-05, "loss": 0.4126, "step": 179500 }, { "epoch": 0.11, "learning_rate": 5.528630570458644e-05, "loss": 0.4181, "step": 180000 }, { "epoch": 0.11, "learning_rate": 5.5284205739025874e-05, "loss": 0.4165, "step": 180500 }, { "epoch": 0.11, "learning_rate": 5.5282105773465314e-05, "loss": 0.4147, "step": 181000 }, { "epoch": 0.11, "learning_rate": 5.528000580790475e-05, "loss": 0.4092, "step": 181500 }, { "epoch": 0.11, "learning_rate": 5.527790584234418e-05, "loss": 0.4088, "step": 182000 }, { "epoch": 0.11, "learning_rate": 5.527581007671474e-05, "loss": 0.412, "step": 182500 }, { "epoch": 0.11, "learning_rate": 5.5273710111154175e-05, "loss": 0.419, "step": 183000 }, { "epoch": 0.11, "learning_rate": 5.527161014559361e-05, "loss": 0.4161, "step": 183500 }, { "epoch": 0.11, "learning_rate": 5.526951018003305e-05, "loss": 0.4086, "step": 184000 }, { "epoch": 0.11, "learning_rate": 5.526741021447248e-05, "loss": 0.4079, "step": 184500 }, { "epoch": 0.11, "learning_rate": 5.5265310248911916e-05, "loss": 0.4116, "step": 185000 }, { "epoch": 0.11, "learning_rate": 5.526321448328247e-05, "loss": 0.4101, "step": 185500 }, { "epoch": 0.11, "learning_rate": 5.526111451772191e-05, "loss": 0.4155, "step": 186000 }, { "epoch": 0.11, "learning_rate": 5.525901455216134e-05, "loss": 0.4071, "step": 186500 }, { "epoch": 0.11, "learning_rate": 5.5256914586600776e-05, "loss": 0.4119, "step": 187000 }, { "epoch": 0.11, "learning_rate": 5.525481462104022e-05, "loss": 0.4115, "step": 187500 }, { "epoch": 0.11, "learning_rate": 5.525271465547965e-05, "loss": 0.4148, "step": 188000 }, { "epoch": 0.11, "learning_rate": 5.5250614689919084e-05, "loss": 0.4143, "step": 188500 }, { "epoch": 0.11, "learning_rate": 5.5248514724358524e-05, "loss": 0.4145, "step": 189000 }, { "epoch": 0.11, "learning_rate": 5.524641475879796e-05, "loss": 0.4082, "step": 189500 }, { "epoch": 0.11, "learning_rate": 5.5244314793237384e-05, "loss": 0.4026, "step": 190000 }, { "epoch": 0.11, "learning_rate": 5.5242214827676824e-05, "loss": 0.4091, "step": 190500 }, { "epoch": 0.11, "learning_rate": 5.524011486211626e-05, "loss": 0.4181, "step": 191000 }, { "epoch": 0.11, "learning_rate": 5.523801489655569e-05, "loss": 0.4103, "step": 191500 }, { "epoch": 0.12, "learning_rate": 5.523591493099513e-05, "loss": 0.4039, "step": 192000 }, { "epoch": 0.12, "learning_rate": 5.5233814965434565e-05, "loss": 0.4091, "step": 192500 }, { "epoch": 0.12, "learning_rate": 5.523171919980512e-05, "loss": 0.4057, "step": 193000 }, { "epoch": 0.12, "learning_rate": 5.522961923424456e-05, "loss": 0.406, "step": 193500 }, { "epoch": 0.12, "learning_rate": 5.522751926868399e-05, "loss": 0.4119, "step": 194000 }, { "epoch": 0.12, "learning_rate": 5.5225419303123425e-05, "loss": 0.4022, "step": 194500 }, { "epoch": 0.12, "learning_rate": 5.5223319337562866e-05, "loss": 0.4067, "step": 195000 }, { "epoch": 0.12, "learning_rate": 5.52212193720023e-05, "loss": 0.405, "step": 195500 }, { "epoch": 0.12, "learning_rate": 5.521911940644173e-05, "loss": 0.4099, "step": 196000 }, { "epoch": 0.12, "learning_rate": 5.5217023640812286e-05, "loss": 0.4029, "step": 196500 }, { "epoch": 0.12, "learning_rate": 5.5214923675251726e-05, "loss": 0.4078, "step": 197000 }, { "epoch": 0.12, "learning_rate": 5.521282370969116e-05, "loss": 0.4075, "step": 197500 }, { "epoch": 0.12, "learning_rate": 5.521072374413059e-05, "loss": 0.4093, "step": 198000 }, { "epoch": 0.12, "learning_rate": 5.5208623778570034e-05, "loss": 0.4008, "step": 198500 }, { "epoch": 0.12, "learning_rate": 5.520652801294059e-05, "loss": 0.4099, "step": 199000 }, { "epoch": 0.12, "learning_rate": 5.520443224731114e-05, "loss": 0.4116, "step": 199500 }, { "epoch": 0.12, "learning_rate": 5.5202332281750574e-05, "loss": 0.4025, "step": 200000 }, { "epoch": 0.12, "eval_loss": 0.3602813482284546, "eval_runtime": 1456.1894, "eval_samples_per_second": 361.711, "eval_steps_per_second": 60.285, "step": 200000 }, { "epoch": 0.12, "learning_rate": 5.5200232316190014e-05, "loss": 0.4132, "step": 200500 }, { "epoch": 0.12, "learning_rate": 5.519813235062945e-05, "loss": 0.4045, "step": 201000 }, { "epoch": 0.12, "learning_rate": 5.519603238506888e-05, "loss": 0.4019, "step": 201500 }, { "epoch": 0.12, "learning_rate": 5.519393241950832e-05, "loss": 0.4016, "step": 202000 }, { "epoch": 0.12, "learning_rate": 5.5191832453947755e-05, "loss": 0.4016, "step": 202500 }, { "epoch": 0.12, "learning_rate": 5.518973248838719e-05, "loss": 0.4096, "step": 203000 }, { "epoch": 0.12, "learning_rate": 5.518763672275774e-05, "loss": 0.4073, "step": 203500 }, { "epoch": 0.12, "learning_rate": 5.518553675719718e-05, "loss": 0.3978, "step": 204000 }, { "epoch": 0.12, "learning_rate": 5.5183436791636616e-05, "loss": 0.4116, "step": 204500 }, { "epoch": 0.12, "learning_rate": 5.5181345225938296e-05, "loss": 0.3938, "step": 205000 }, { "epoch": 0.12, "learning_rate": 5.517924526037773e-05, "loss": 0.4072, "step": 205500 }, { "epoch": 0.12, "learning_rate": 5.517714529481717e-05, "loss": 0.4074, "step": 206000 }, { "epoch": 0.12, "learning_rate": 5.5175045329256603e-05, "loss": 0.3947, "step": 206500 }, { "epoch": 0.12, "learning_rate": 5.517294536369603e-05, "loss": 0.4071, "step": 207000 }, { "epoch": 0.12, "learning_rate": 5.517084539813547e-05, "loss": 0.401, "step": 207500 }, { "epoch": 0.12, "learning_rate": 5.5168745432574904e-05, "loss": 0.4028, "step": 208000 }, { "epoch": 0.13, "learning_rate": 5.516664546701434e-05, "loss": 0.402, "step": 208500 }, { "epoch": 0.13, "learning_rate": 5.516454550145378e-05, "loss": 0.404, "step": 209000 }, { "epoch": 0.13, "learning_rate": 5.516244553589321e-05, "loss": 0.4008, "step": 209500 }, { "epoch": 0.13, "learning_rate": 5.5160345570332644e-05, "loss": 0.3951, "step": 210000 }, { "epoch": 0.13, "learning_rate": 5.5158245604772085e-05, "loss": 0.4028, "step": 210500 }, { "epoch": 0.13, "learning_rate": 5.515614563921152e-05, "loss": 0.3955, "step": 211000 }, { "epoch": 0.13, "learning_rate": 5.515404987358207e-05, "loss": 0.3963, "step": 211500 }, { "epoch": 0.13, "learning_rate": 5.5151954107952625e-05, "loss": 0.4061, "step": 212000 }, { "epoch": 0.13, "learning_rate": 5.5149854142392065e-05, "loss": 0.3977, "step": 212500 }, { "epoch": 0.13, "learning_rate": 5.51477541768315e-05, "loss": 0.3988, "step": 213000 }, { "epoch": 0.13, "learning_rate": 5.514565421127093e-05, "loss": 0.4016, "step": 213500 }, { "epoch": 0.13, "learning_rate": 5.514355424571037e-05, "loss": 0.3964, "step": 214000 }, { "epoch": 0.13, "learning_rate": 5.5141454280149806e-05, "loss": 0.3928, "step": 214500 }, { "epoch": 0.13, "learning_rate": 5.513935431458924e-05, "loss": 0.3944, "step": 215000 }, { "epoch": 0.13, "learning_rate": 5.513725434902868e-05, "loss": 0.3999, "step": 215500 }, { "epoch": 0.13, "learning_rate": 5.513515438346811e-05, "loss": 0.3927, "step": 216000 }, { "epoch": 0.13, "learning_rate": 5.513305861783867e-05, "loss": 0.4025, "step": 216500 }, { "epoch": 0.13, "learning_rate": 5.51309586522781e-05, "loss": 0.403, "step": 217000 }, { "epoch": 0.13, "learning_rate": 5.512885868671754e-05, "loss": 0.3986, "step": 217500 }, { "epoch": 0.13, "learning_rate": 5.5126758721156974e-05, "loss": 0.4009, "step": 218000 }, { "epoch": 0.13, "learning_rate": 5.512465875559641e-05, "loss": 0.3935, "step": 218500 }, { "epoch": 0.13, "learning_rate": 5.512255879003585e-05, "loss": 0.3926, "step": 219000 }, { "epoch": 0.13, "learning_rate": 5.5120458824475274e-05, "loss": 0.397, "step": 219500 }, { "epoch": 0.13, "learning_rate": 5.511835885891471e-05, "loss": 0.3966, "step": 220000 }, { "epoch": 0.13, "learning_rate": 5.511626729321639e-05, "loss": 0.3969, "step": 220500 }, { "epoch": 0.13, "learning_rate": 5.511416732765583e-05, "loss": 0.3954, "step": 221000 }, { "epoch": 0.13, "learning_rate": 5.511206736209526e-05, "loss": 0.395, "step": 221500 }, { "epoch": 0.13, "learning_rate": 5.5109967396534695e-05, "loss": 0.4024, "step": 222000 }, { "epoch": 0.13, "learning_rate": 5.5107867430974136e-05, "loss": 0.3966, "step": 222500 }, { "epoch": 0.13, "learning_rate": 5.510576746541357e-05, "loss": 0.4017, "step": 223000 }, { "epoch": 0.13, "learning_rate": 5.5103667499853e-05, "loss": 0.3943, "step": 223500 }, { "epoch": 0.13, "learning_rate": 5.510156753429244e-05, "loss": 0.3931, "step": 224000 }, { "epoch": 0.13, "learning_rate": 5.5099471768662996e-05, "loss": 0.3857, "step": 224500 }, { "epoch": 0.13, "learning_rate": 5.509737180310243e-05, "loss": 0.3974, "step": 225000 }, { "epoch": 0.14, "learning_rate": 5.509527183754186e-05, "loss": 0.3892, "step": 225500 }, { "epoch": 0.14, "learning_rate": 5.5093171871981304e-05, "loss": 0.3923, "step": 226000 }, { "epoch": 0.14, "learning_rate": 5.509107610635186e-05, "loss": 0.3939, "step": 226500 }, { "epoch": 0.14, "learning_rate": 5.508897614079129e-05, "loss": 0.391, "step": 227000 }, { "epoch": 0.14, "learning_rate": 5.5086876175230724e-05, "loss": 0.3907, "step": 227500 }, { "epoch": 0.14, "learning_rate": 5.5084776209670164e-05, "loss": 0.4009, "step": 228000 }, { "epoch": 0.14, "learning_rate": 5.50826762441096e-05, "loss": 0.3896, "step": 228500 }, { "epoch": 0.14, "learning_rate": 5.508058047848015e-05, "loss": 0.3965, "step": 229000 }, { "epoch": 0.14, "learning_rate": 5.507848051291959e-05, "loss": 0.3958, "step": 229500 }, { "epoch": 0.14, "learning_rate": 5.5076380547359025e-05, "loss": 0.4004, "step": 230000 }, { "epoch": 0.14, "learning_rate": 5.507428058179846e-05, "loss": 0.3997, "step": 230500 }, { "epoch": 0.14, "learning_rate": 5.507218481616901e-05, "loss": 0.3907, "step": 231000 }, { "epoch": 0.14, "learning_rate": 5.507008485060845e-05, "loss": 0.3905, "step": 231500 }, { "epoch": 0.14, "learning_rate": 5.5067984885047886e-05, "loss": 0.3881, "step": 232000 }, { "epoch": 0.14, "learning_rate": 5.506588491948732e-05, "loss": 0.3871, "step": 232500 }, { "epoch": 0.14, "learning_rate": 5.506378915385787e-05, "loss": 0.3885, "step": 233000 }, { "epoch": 0.14, "learning_rate": 5.506169338822843e-05, "loss": 0.3996, "step": 233500 }, { "epoch": 0.14, "learning_rate": 5.5059593422667867e-05, "loss": 0.3862, "step": 234000 }, { "epoch": 0.14, "learning_rate": 5.50574934571073e-05, "loss": 0.3844, "step": 234500 }, { "epoch": 0.14, "learning_rate": 5.505539349154674e-05, "loss": 0.3904, "step": 235000 }, { "epoch": 0.14, "learning_rate": 5.5053293525986174e-05, "loss": 0.389, "step": 235500 }, { "epoch": 0.14, "learning_rate": 5.505119356042561e-05, "loss": 0.3865, "step": 236000 }, { "epoch": 0.14, "learning_rate": 5.504909359486505e-05, "loss": 0.386, "step": 236500 }, { "epoch": 0.14, "learning_rate": 5.504699362930448e-05, "loss": 0.3911, "step": 237000 }, { "epoch": 0.14, "learning_rate": 5.5044893663743914e-05, "loss": 0.3926, "step": 237500 }, { "epoch": 0.14, "learning_rate": 5.504279789811447e-05, "loss": 0.3917, "step": 238000 }, { "epoch": 0.14, "learning_rate": 5.504069793255391e-05, "loss": 0.3949, "step": 238500 }, { "epoch": 0.14, "learning_rate": 5.503859796699334e-05, "loss": 0.386, "step": 239000 }, { "epoch": 0.14, "learning_rate": 5.5036498001432775e-05, "loss": 0.3814, "step": 239500 }, { "epoch": 0.14, "learning_rate": 5.503440223580333e-05, "loss": 0.3902, "step": 240000 }, { "epoch": 0.14, "learning_rate": 5.503230227024277e-05, "loss": 0.3831, "step": 240500 }, { "epoch": 0.14, "learning_rate": 5.50302023046822e-05, "loss": 0.3918, "step": 241000 }, { "epoch": 0.14, "learning_rate": 5.5028102339121636e-05, "loss": 0.3876, "step": 241500 }, { "epoch": 0.15, "learning_rate": 5.5026002373561076e-05, "loss": 0.3936, "step": 242000 }, { "epoch": 0.15, "learning_rate": 5.502390240800051e-05, "loss": 0.3855, "step": 242500 }, { "epoch": 0.15, "learning_rate": 5.502180244243994e-05, "loss": 0.3869, "step": 243000 }, { "epoch": 0.15, "learning_rate": 5.5019702476879376e-05, "loss": 0.3872, "step": 243500 }, { "epoch": 0.15, "learning_rate": 5.501760671124994e-05, "loss": 0.3844, "step": 244000 }, { "epoch": 0.15, "learning_rate": 5.501550674568937e-05, "loss": 0.3823, "step": 244500 }, { "epoch": 0.15, "learning_rate": 5.501340678012881e-05, "loss": 0.3898, "step": 245000 }, { "epoch": 0.15, "learning_rate": 5.501130681456824e-05, "loss": 0.3866, "step": 245500 }, { "epoch": 0.15, "learning_rate": 5.50092110489388e-05, "loss": 0.3907, "step": 246000 }, { "epoch": 0.15, "learning_rate": 5.500711108337823e-05, "loss": 0.3872, "step": 246500 }, { "epoch": 0.15, "learning_rate": 5.500501111781767e-05, "loss": 0.3857, "step": 247000 }, { "epoch": 0.15, "learning_rate": 5.5002911152257105e-05, "loss": 0.3876, "step": 247500 }, { "epoch": 0.15, "learning_rate": 5.500081118669653e-05, "loss": 0.3908, "step": 248000 }, { "epoch": 0.15, "learning_rate": 5.499871542106709e-05, "loss": 0.3881, "step": 248500 }, { "epoch": 0.15, "learning_rate": 5.499661545550653e-05, "loss": 0.3939, "step": 249000 }, { "epoch": 0.15, "learning_rate": 5.4994515489945965e-05, "loss": 0.386, "step": 249500 }, { "epoch": 0.15, "learning_rate": 5.49924155243854e-05, "loss": 0.3843, "step": 250000 }, { "epoch": 0.15, "learning_rate": 5.499031555882483e-05, "loss": 0.3861, "step": 250500 }, { "epoch": 0.15, "learning_rate": 5.4988215593264266e-05, "loss": 0.3855, "step": 251000 }, { "epoch": 0.15, "learning_rate": 5.4986119827634826e-05, "loss": 0.4056, "step": 251500 }, { "epoch": 0.15, "learning_rate": 5.4984019862074266e-05, "loss": 0.3836, "step": 252000 }, { "epoch": 0.15, "learning_rate": 5.49819198965137e-05, "loss": 0.3884, "step": 252500 }, { "epoch": 0.15, "learning_rate": 5.4979819930953126e-05, "loss": 0.3852, "step": 253000 }, { "epoch": 0.15, "learning_rate": 5.497771996539257e-05, "loss": 0.3754, "step": 253500 }, { "epoch": 0.15, "learning_rate": 5.4975619999832e-05, "loss": 0.3869, "step": 254000 }, { "epoch": 0.15, "learning_rate": 5.497352423420256e-05, "loss": 0.3813, "step": 254500 }, { "epoch": 0.15, "learning_rate": 5.4971424268641994e-05, "loss": 0.3857, "step": 255000 }, { "epoch": 0.15, "learning_rate": 5.496932850301255e-05, "loss": 0.3887, "step": 255500 }, { "epoch": 0.15, "learning_rate": 5.496722853745199e-05, "loss": 0.3822, "step": 256000 }, { "epoch": 0.15, "learning_rate": 5.496512857189142e-05, "loss": 0.3857, "step": 256500 }, { "epoch": 0.15, "learning_rate": 5.4963028606330855e-05, "loss": 0.3863, "step": 257000 }, { "epoch": 0.15, "learning_rate": 5.496092864077029e-05, "loss": 0.3857, "step": 257500 }, { "epoch": 0.15, "learning_rate": 5.495882867520972e-05, "loss": 0.3786, "step": 258000 }, { "epoch": 0.15, "learning_rate": 5.495672870964916e-05, "loss": 0.3852, "step": 258500 }, { "epoch": 0.16, "learning_rate": 5.4954628744088595e-05, "loss": 0.3803, "step": 259000 }, { "epoch": 0.16, "learning_rate": 5.495252877852803e-05, "loss": 0.3777, "step": 259500 }, { "epoch": 0.16, "learning_rate": 5.495043301289858e-05, "loss": 0.3912, "step": 260000 }, { "epoch": 0.16, "learning_rate": 5.494833304733802e-05, "loss": 0.3779, "step": 260500 }, { "epoch": 0.16, "learning_rate": 5.4946233081777456e-05, "loss": 0.3847, "step": 261000 }, { "epoch": 0.16, "learning_rate": 5.494413311621689e-05, "loss": 0.3845, "step": 261500 }, { "epoch": 0.16, "learning_rate": 5.494203735058745e-05, "loss": 0.3814, "step": 262000 }, { "epoch": 0.16, "learning_rate": 5.4939941584958003e-05, "loss": 0.3919, "step": 262500 }, { "epoch": 0.16, "learning_rate": 5.4937841619397444e-05, "loss": 0.3812, "step": 263000 }, { "epoch": 0.16, "learning_rate": 5.493574165383688e-05, "loss": 0.3784, "step": 263500 }, { "epoch": 0.16, "learning_rate": 5.493364168827631e-05, "loss": 0.3857, "step": 264000 }, { "epoch": 0.16, "learning_rate": 5.493154172271575e-05, "loss": 0.3831, "step": 264500 }, { "epoch": 0.16, "learning_rate": 5.492944175715518e-05, "loss": 0.3902, "step": 265000 }, { "epoch": 0.16, "learning_rate": 5.492734179159462e-05, "loss": 0.3797, "step": 265500 }, { "epoch": 0.16, "learning_rate": 5.492524182603405e-05, "loss": 0.3822, "step": 266000 }, { "epoch": 0.16, "learning_rate": 5.4923141860473485e-05, "loss": 0.3855, "step": 266500 }, { "epoch": 0.16, "learning_rate": 5.4921041894912925e-05, "loss": 0.3745, "step": 267000 }, { "epoch": 0.16, "learning_rate": 5.491894192935236e-05, "loss": 0.3789, "step": 267500 }, { "epoch": 0.16, "learning_rate": 5.491684196379179e-05, "loss": 0.3772, "step": 268000 }, { "epoch": 0.16, "learning_rate": 5.4914746198162345e-05, "loss": 0.3891, "step": 268500 }, { "epoch": 0.16, "learning_rate": 5.4912654632464026e-05, "loss": 0.3847, "step": 269000 }, { "epoch": 0.16, "learning_rate": 5.491055466690346e-05, "loss": 0.3832, "step": 269500 }, { "epoch": 0.16, "learning_rate": 5.49084547013429e-05, "loss": 0.3845, "step": 270000 }, { "epoch": 0.16, "learning_rate": 5.490635473578233e-05, "loss": 0.3781, "step": 270500 }, { "epoch": 0.16, "learning_rate": 5.490425477022177e-05, "loss": 0.3854, "step": 271000 }, { "epoch": 0.16, "learning_rate": 5.490215480466121e-05, "loss": 0.3885, "step": 271500 }, { "epoch": 0.16, "learning_rate": 5.490005483910063e-05, "loss": 0.3785, "step": 272000 }, { "epoch": 0.16, "learning_rate": 5.4897954873540074e-05, "loss": 0.3795, "step": 272500 }, { "epoch": 0.16, "learning_rate": 5.489585490797951e-05, "loss": 0.3781, "step": 273000 }, { "epoch": 0.16, "learning_rate": 5.489375914235007e-05, "loss": 0.3885, "step": 273500 }, { "epoch": 0.16, "learning_rate": 5.48916591767895e-05, "loss": 0.3703, "step": 274000 }, { "epoch": 0.16, "learning_rate": 5.4889559211228934e-05, "loss": 0.3871, "step": 274500 }, { "epoch": 0.16, "learning_rate": 5.488745924566837e-05, "loss": 0.3716, "step": 275000 }, { "epoch": 0.17, "learning_rate": 5.48853592801078e-05, "loss": 0.3856, "step": 275500 }, { "epoch": 0.17, "learning_rate": 5.488325931454724e-05, "loss": 0.3807, "step": 276000 }, { "epoch": 0.17, "learning_rate": 5.4881159348986675e-05, "loss": 0.3749, "step": 276500 }, { "epoch": 0.17, "learning_rate": 5.487906358335723e-05, "loss": 0.3797, "step": 277000 }, { "epoch": 0.17, "learning_rate": 5.487696361779667e-05, "loss": 0.3742, "step": 277500 }, { "epoch": 0.17, "learning_rate": 5.48748636522361e-05, "loss": 0.3765, "step": 278000 }, { "epoch": 0.17, "learning_rate": 5.4872763686675536e-05, "loss": 0.3789, "step": 278500 }, { "epoch": 0.17, "learning_rate": 5.487066792104609e-05, "loss": 0.3689, "step": 279000 }, { "epoch": 0.17, "learning_rate": 5.486857215541665e-05, "loss": 0.3921, "step": 279500 }, { "epoch": 0.17, "learning_rate": 5.486647218985609e-05, "loss": 0.3782, "step": 280000 }, { "epoch": 0.17, "learning_rate": 5.486437222429552e-05, "loss": 0.381, "step": 280500 }, { "epoch": 0.17, "learning_rate": 5.486227225873496e-05, "loss": 0.3824, "step": 281000 }, { "epoch": 0.17, "learning_rate": 5.486017229317439e-05, "loss": 0.3788, "step": 281500 }, { "epoch": 0.17, "learning_rate": 5.4858072327613824e-05, "loss": 0.3762, "step": 282000 }, { "epoch": 0.17, "learning_rate": 5.485597236205326e-05, "loss": 0.3735, "step": 282500 }, { "epoch": 0.17, "learning_rate": 5.48538723964927e-05, "loss": 0.3736, "step": 283000 }, { "epoch": 0.17, "learning_rate": 5.485177243093213e-05, "loss": 0.3799, "step": 283500 }, { "epoch": 0.17, "learning_rate": 5.4849672465371564e-05, "loss": 0.376, "step": 284000 }, { "epoch": 0.17, "learning_rate": 5.4847576699742125e-05, "loss": 0.3807, "step": 284500 }, { "epoch": 0.17, "learning_rate": 5.484547673418156e-05, "loss": 0.3782, "step": 285000 }, { "epoch": 0.17, "learning_rate": 5.484337676862099e-05, "loss": 0.371, "step": 285500 }, { "epoch": 0.17, "learning_rate": 5.484127680306043e-05, "loss": 0.3756, "step": 286000 }, { "epoch": 0.17, "learning_rate": 5.4839176837499865e-05, "loss": 0.3862, "step": 286500 }, { "epoch": 0.17, "learning_rate": 5.48370768719393e-05, "loss": 0.3722, "step": 287000 }, { "epoch": 0.17, "learning_rate": 5.483498110630985e-05, "loss": 0.3739, "step": 287500 }, { "epoch": 0.17, "learning_rate": 5.483288114074929e-05, "loss": 0.3694, "step": 288000 }, { "epoch": 0.17, "learning_rate": 5.4830781175188726e-05, "loss": 0.3744, "step": 288500 }, { "epoch": 0.17, "learning_rate": 5.482868120962816e-05, "loss": 0.3743, "step": 289000 }, { "epoch": 0.17, "learning_rate": 5.48265812440676e-05, "loss": 0.3812, "step": 289500 }, { "epoch": 0.17, "learning_rate": 5.482448127850703e-05, "loss": 0.3712, "step": 290000 }, { "epoch": 0.17, "learning_rate": 5.482238551287759e-05, "loss": 0.3719, "step": 290500 }, { "epoch": 0.17, "learning_rate": 5.482028974724814e-05, "loss": 0.393, "step": 291000 }, { "epoch": 0.17, "learning_rate": 5.481818978168758e-05, "loss": 0.3833, "step": 291500 }, { "epoch": 0.18, "learning_rate": 5.4816089816127014e-05, "loss": 0.3784, "step": 292000 }, { "epoch": 0.18, "learning_rate": 5.481398985056645e-05, "loss": 0.3692, "step": 292500 }, { "epoch": 0.18, "learning_rate": 5.481188988500589e-05, "loss": 0.3795, "step": 293000 }, { "epoch": 0.18, "learning_rate": 5.480978991944532e-05, "loss": 0.3779, "step": 293500 }, { "epoch": 0.18, "learning_rate": 5.4807689953884755e-05, "loss": 0.3659, "step": 294000 }, { "epoch": 0.18, "learning_rate": 5.4805589988324195e-05, "loss": 0.3801, "step": 294500 }, { "epoch": 0.18, "learning_rate": 5.480349002276363e-05, "loss": 0.3774, "step": 295000 }, { "epoch": 0.18, "learning_rate": 5.480139005720306e-05, "loss": 0.3727, "step": 295500 }, { "epoch": 0.18, "learning_rate": 5.47992900916425e-05, "loss": 0.3762, "step": 296000 }, { "epoch": 0.18, "learning_rate": 5.479719012608193e-05, "loss": 0.3696, "step": 296500 }, { "epoch": 0.18, "learning_rate": 5.479509016052136e-05, "loss": 0.375, "step": 297000 }, { "epoch": 0.18, "learning_rate": 5.47929901949608e-05, "loss": 0.3706, "step": 297500 }, { "epoch": 0.18, "learning_rate": 5.4790890229400236e-05, "loss": 0.3694, "step": 298000 }, { "epoch": 0.18, "learning_rate": 5.478879026383967e-05, "loss": 0.3695, "step": 298500 }, { "epoch": 0.18, "learning_rate": 5.478669029827911e-05, "loss": 0.3763, "step": 299000 }, { "epoch": 0.18, "learning_rate": 5.478459033271854e-05, "loss": 0.3744, "step": 299500 }, { "epoch": 0.18, "learning_rate": 5.4782494567089097e-05, "loss": 0.3787, "step": 300000 }, { "epoch": 0.18, "eval_loss": 0.3329967260360718, "eval_runtime": 1465.3298, "eval_samples_per_second": 359.455, "eval_steps_per_second": 59.909, "step": 300000 }, { "epoch": 0.18, "learning_rate": 5.478039880145966e-05, "loss": 0.3786, "step": 300500 }, { "epoch": 0.18, "learning_rate": 5.477829883589909e-05, "loss": 0.3774, "step": 301000 }, { "epoch": 0.18, "learning_rate": 5.4776198870338524e-05, "loss": 0.3757, "step": 301500 }, { "epoch": 0.18, "learning_rate": 5.4774103104709084e-05, "loss": 0.3754, "step": 302000 }, { "epoch": 0.18, "learning_rate": 5.477200313914852e-05, "loss": 0.3731, "step": 302500 }, { "epoch": 0.18, "learning_rate": 5.476990317358796e-05, "loss": 0.3757, "step": 303000 }, { "epoch": 0.18, "learning_rate": 5.4767803208027385e-05, "loss": 0.3702, "step": 303500 }, { "epoch": 0.18, "learning_rate": 5.476570324246682e-05, "loss": 0.3719, "step": 304000 }, { "epoch": 0.18, "learning_rate": 5.476360747683738e-05, "loss": 0.3744, "step": 304500 }, { "epoch": 0.18, "learning_rate": 5.476150751127682e-05, "loss": 0.3691, "step": 305000 }, { "epoch": 0.18, "learning_rate": 5.475940754571625e-05, "loss": 0.366, "step": 305500 }, { "epoch": 0.18, "learning_rate": 5.475730758015568e-05, "loss": 0.3794, "step": 306000 }, { "epoch": 0.18, "learning_rate": 5.475520761459512e-05, "loss": 0.3715, "step": 306500 }, { "epoch": 0.18, "learning_rate": 5.475310764903455e-05, "loss": 0.3741, "step": 307000 }, { "epoch": 0.18, "learning_rate": 5.4751007683473986e-05, "loss": 0.3776, "step": 307500 }, { "epoch": 0.18, "learning_rate": 5.4748907717913426e-05, "loss": 0.3714, "step": 308000 }, { "epoch": 0.18, "learning_rate": 5.474680775235286e-05, "loss": 0.3667, "step": 308500 }, { "epoch": 0.19, "learning_rate": 5.474471198672341e-05, "loss": 0.3753, "step": 309000 }, { "epoch": 0.19, "learning_rate": 5.474261202116285e-05, "loss": 0.3742, "step": 309500 }, { "epoch": 0.19, "learning_rate": 5.4740516255533414e-05, "loss": 0.3634, "step": 310000 }, { "epoch": 0.19, "learning_rate": 5.473841628997285e-05, "loss": 0.377, "step": 310500 }, { "epoch": 0.19, "learning_rate": 5.4736316324412274e-05, "loss": 0.3744, "step": 311000 }, { "epoch": 0.19, "learning_rate": 5.4734216358851714e-05, "loss": 0.3654, "step": 311500 }, { "epoch": 0.19, "learning_rate": 5.473211639329115e-05, "loss": 0.3715, "step": 312000 }, { "epoch": 0.19, "learning_rate": 5.473001642773058e-05, "loss": 0.3699, "step": 312500 }, { "epoch": 0.19, "learning_rate": 5.472791646217002e-05, "loss": 0.3721, "step": 313000 }, { "epoch": 0.19, "learning_rate": 5.4725816496609455e-05, "loss": 0.3746, "step": 313500 }, { "epoch": 0.19, "learning_rate": 5.472371653104889e-05, "loss": 0.3657, "step": 314000 }, { "epoch": 0.19, "learning_rate": 5.472162076541944e-05, "loss": 0.3635, "step": 314500 }, { "epoch": 0.19, "learning_rate": 5.471952079985888e-05, "loss": 0.3697, "step": 315000 }, { "epoch": 0.19, "learning_rate": 5.4717420834298315e-05, "loss": 0.3674, "step": 315500 }, { "epoch": 0.19, "learning_rate": 5.471532086873775e-05, "loss": 0.3707, "step": 316000 }, { "epoch": 0.19, "learning_rate": 5.471322510310831e-05, "loss": 0.3764, "step": 316500 }, { "epoch": 0.19, "learning_rate": 5.471112513754774e-05, "loss": 0.375, "step": 317000 }, { "epoch": 0.19, "learning_rate": 5.4709025171987176e-05, "loss": 0.3674, "step": 317500 }, { "epoch": 0.19, "learning_rate": 5.4706925206426616e-05, "loss": 0.3647, "step": 318000 }, { "epoch": 0.19, "learning_rate": 5.470482944079717e-05, "loss": 0.3663, "step": 318500 }, { "epoch": 0.19, "learning_rate": 5.4702729475236603e-05, "loss": 0.3693, "step": 319000 }, { "epoch": 0.19, "learning_rate": 5.470062950967604e-05, "loss": 0.3781, "step": 319500 }, { "epoch": 0.19, "learning_rate": 5.469852954411548e-05, "loss": 0.3702, "step": 320000 }, { "epoch": 0.19, "learning_rate": 5.469642957855491e-05, "loss": 0.3675, "step": 320500 }, { "epoch": 0.19, "learning_rate": 5.4694329612994344e-05, "loss": 0.3659, "step": 321000 }, { "epoch": 0.19, "learning_rate": 5.4692229647433784e-05, "loss": 0.3669, "step": 321500 }, { "epoch": 0.19, "learning_rate": 5.469013388180434e-05, "loss": 0.3713, "step": 322000 }, { "epoch": 0.19, "learning_rate": 5.468803391624377e-05, "loss": 0.3712, "step": 322500 }, { "epoch": 0.19, "learning_rate": 5.4685933950683205e-05, "loss": 0.3718, "step": 323000 }, { "epoch": 0.19, "learning_rate": 5.4683833985122645e-05, "loss": 0.3672, "step": 323500 }, { "epoch": 0.19, "learning_rate": 5.468173401956208e-05, "loss": 0.3652, "step": 324000 }, { "epoch": 0.19, "learning_rate": 5.467963825393263e-05, "loss": 0.3686, "step": 324500 }, { "epoch": 0.19, "learning_rate": 5.467753828837207e-05, "loss": 0.3673, "step": 325000 }, { "epoch": 0.2, "learning_rate": 5.4675438322811506e-05, "loss": 0.3822, "step": 325500 }, { "epoch": 0.2, "learning_rate": 5.467333835725094e-05, "loss": 0.3672, "step": 326000 }, { "epoch": 0.2, "learning_rate": 5.467123839169038e-05, "loss": 0.3695, "step": 326500 }, { "epoch": 0.2, "learning_rate": 5.466913842612981e-05, "loss": 0.3709, "step": 327000 }, { "epoch": 0.2, "learning_rate": 5.4667038460569246e-05, "loss": 0.3718, "step": 327500 }, { "epoch": 0.2, "learning_rate": 5.4664938495008687e-05, "loss": 0.3711, "step": 328000 }, { "epoch": 0.2, "learning_rate": 5.4662846929310354e-05, "loss": 0.3711, "step": 328500 }, { "epoch": 0.2, "learning_rate": 5.4660751163680914e-05, "loss": 0.3679, "step": 329000 }, { "epoch": 0.2, "learning_rate": 5.4658651198120354e-05, "loss": 0.3683, "step": 329500 }, { "epoch": 0.2, "learning_rate": 5.465655123255978e-05, "loss": 0.3712, "step": 330000 }, { "epoch": 0.2, "learning_rate": 5.465445126699922e-05, "loss": 0.3713, "step": 330500 }, { "epoch": 0.2, "learning_rate": 5.4652351301438654e-05, "loss": 0.3659, "step": 331000 }, { "epoch": 0.2, "learning_rate": 5.465025133587809e-05, "loss": 0.3661, "step": 331500 }, { "epoch": 0.2, "learning_rate": 5.464815137031753e-05, "loss": 0.369, "step": 332000 }, { "epoch": 0.2, "learning_rate": 5.464605560468808e-05, "loss": 0.3666, "step": 332500 }, { "epoch": 0.2, "learning_rate": 5.4643955639127515e-05, "loss": 0.3719, "step": 333000 }, { "epoch": 0.2, "learning_rate": 5.464185567356695e-05, "loss": 0.3717, "step": 333500 }, { "epoch": 0.2, "learning_rate": 5.463975570800639e-05, "loss": 0.3639, "step": 334000 }, { "epoch": 0.2, "learning_rate": 5.463765574244582e-05, "loss": 0.3648, "step": 334500 }, { "epoch": 0.2, "learning_rate": 5.4635555776885256e-05, "loss": 0.3643, "step": 335000 }, { "epoch": 0.2, "learning_rate": 5.4633455811324696e-05, "loss": 0.363, "step": 335500 }, { "epoch": 0.2, "learning_rate": 5.463135584576413e-05, "loss": 0.3619, "step": 336000 }, { "epoch": 0.2, "learning_rate": 5.462925588020356e-05, "loss": 0.3591, "step": 336500 }, { "epoch": 0.2, "learning_rate": 5.4627155914643e-05, "loss": 0.3638, "step": 337000 }, { "epoch": 0.2, "learning_rate": 5.462506014901356e-05, "loss": 0.3715, "step": 337500 }, { "epoch": 0.2, "learning_rate": 5.462296018345299e-05, "loss": 0.3618, "step": 338000 }, { "epoch": 0.2, "learning_rate": 5.462086021789243e-05, "loss": 0.3635, "step": 338500 }, { "epoch": 0.2, "learning_rate": 5.4618760252331864e-05, "loss": 0.3717, "step": 339000 }, { "epoch": 0.2, "learning_rate": 5.461666448670242e-05, "loss": 0.3628, "step": 339500 }, { "epoch": 0.2, "learning_rate": 5.461456452114185e-05, "loss": 0.3569, "step": 340000 }, { "epoch": 0.2, "learning_rate": 5.461246455558129e-05, "loss": 0.3649, "step": 340500 }, { "epoch": 0.2, "learning_rate": 5.4610364590020725e-05, "loss": 0.371, "step": 341000 }, { "epoch": 0.2, "learning_rate": 5.460826462446016e-05, "loss": 0.3657, "step": 341500 }, { "epoch": 0.21, "learning_rate": 5.46061646588996e-05, "loss": 0.3606, "step": 342000 }, { "epoch": 0.21, "learning_rate": 5.460406889327015e-05, "loss": 0.366, "step": 342500 }, { "epoch": 0.21, "learning_rate": 5.4601968927709585e-05, "loss": 0.362, "step": 343000 }, { "epoch": 0.21, "learning_rate": 5.459987316208014e-05, "loss": 0.3658, "step": 343500 }, { "epoch": 0.21, "learning_rate": 5.459777319651958e-05, "loss": 0.3565, "step": 344000 }, { "epoch": 0.21, "learning_rate": 5.459567323095901e-05, "loss": 0.3699, "step": 344500 }, { "epoch": 0.21, "learning_rate": 5.4593577465329566e-05, "loss": 0.3666, "step": 345000 }, { "epoch": 0.21, "learning_rate": 5.4591477499769e-05, "loss": 0.3637, "step": 345500 }, { "epoch": 0.21, "learning_rate": 5.458937753420844e-05, "loss": 0.3631, "step": 346000 }, { "epoch": 0.21, "learning_rate": 5.458727756864787e-05, "loss": 0.3739, "step": 346500 }, { "epoch": 0.21, "learning_rate": 5.458517760308731e-05, "loss": 0.3715, "step": 347000 }, { "epoch": 0.21, "learning_rate": 5.458307763752675e-05, "loss": 0.3645, "step": 347500 }, { "epoch": 0.21, "learning_rate": 5.458097767196618e-05, "loss": 0.3679, "step": 348000 }, { "epoch": 0.21, "learning_rate": 5.4578877706405614e-05, "loss": 0.3615, "step": 348500 }, { "epoch": 0.21, "learning_rate": 5.4576777740845054e-05, "loss": 0.3662, "step": 349000 }, { "epoch": 0.21, "learning_rate": 5.457467777528448e-05, "loss": 0.3636, "step": 349500 }, { "epoch": 0.21, "learning_rate": 5.4572577809723914e-05, "loss": 0.3647, "step": 350000 }, { "epoch": 0.21, "learning_rate": 5.4570477844163355e-05, "loss": 0.3608, "step": 350500 }, { "epoch": 0.21, "learning_rate": 5.4568382078533915e-05, "loss": 0.3698, "step": 351000 }, { "epoch": 0.21, "learning_rate": 5.456628211297335e-05, "loss": 0.3604, "step": 351500 }, { "epoch": 0.21, "learning_rate": 5.456418214741278e-05, "loss": 0.361, "step": 352000 }, { "epoch": 0.21, "learning_rate": 5.4562082181852215e-05, "loss": 0.365, "step": 352500 }, { "epoch": 0.21, "learning_rate": 5.455998221629165e-05, "loss": 0.3619, "step": 353000 }, { "epoch": 0.21, "learning_rate": 5.455788225073109e-05, "loss": 0.3649, "step": 353500 }, { "epoch": 0.21, "learning_rate": 5.455578228517052e-05, "loss": 0.3712, "step": 354000 }, { "epoch": 0.21, "learning_rate": 5.4553686519541076e-05, "loss": 0.3674, "step": 354500 }, { "epoch": 0.21, "learning_rate": 5.455158655398051e-05, "loss": 0.3584, "step": 355000 }, { "epoch": 0.21, "learning_rate": 5.454948658841995e-05, "loss": 0.3614, "step": 355500 }, { "epoch": 0.21, "learning_rate": 5.454738662285938e-05, "loss": 0.3642, "step": 356000 }, { "epoch": 0.21, "learning_rate": 5.454528665729882e-05, "loss": 0.369, "step": 356500 }, { "epoch": 0.21, "learning_rate": 5.454318669173826e-05, "loss": 0.3577, "step": 357000 }, { "epoch": 0.21, "learning_rate": 5.454108672617769e-05, "loss": 0.35, "step": 357500 }, { "epoch": 0.21, "learning_rate": 5.4538986760617124e-05, "loss": 0.3592, "step": 358000 }, { "epoch": 0.21, "learning_rate": 5.4536886795056564e-05, "loss": 0.3635, "step": 358500 }, { "epoch": 0.22, "learning_rate": 5.453479102942712e-05, "loss": 0.3661, "step": 359000 }, { "epoch": 0.22, "learning_rate": 5.453269106386655e-05, "loss": 0.3712, "step": 359500 }, { "epoch": 0.22, "learning_rate": 5.4530591098305985e-05, "loss": 0.3597, "step": 360000 }, { "epoch": 0.22, "learning_rate": 5.4528491132745425e-05, "loss": 0.3683, "step": 360500 }, { "epoch": 0.22, "learning_rate": 5.452639116718486e-05, "loss": 0.3638, "step": 361000 }, { "epoch": 0.22, "learning_rate": 5.452429120162429e-05, "loss": 0.3637, "step": 361500 }, { "epoch": 0.22, "learning_rate": 5.452219123606373e-05, "loss": 0.3693, "step": 362000 }, { "epoch": 0.22, "learning_rate": 5.452009127050316e-05, "loss": 0.3613, "step": 362500 }, { "epoch": 0.22, "learning_rate": 5.45179913049426e-05, "loss": 0.3637, "step": 363000 }, { "epoch": 0.22, "learning_rate": 5.451589553931316e-05, "loss": 0.3619, "step": 363500 }, { "epoch": 0.22, "learning_rate": 5.451379557375259e-05, "loss": 0.3588, "step": 364000 }, { "epoch": 0.22, "learning_rate": 5.4511695608192026e-05, "loss": 0.3617, "step": 364500 }, { "epoch": 0.22, "learning_rate": 5.450959564263146e-05, "loss": 0.3627, "step": 365000 }, { "epoch": 0.22, "learning_rate": 5.450749567707089e-05, "loss": 0.3573, "step": 365500 }, { "epoch": 0.22, "learning_rate": 5.4505395711510327e-05, "loss": 0.3626, "step": 366000 }, { "epoch": 0.22, "learning_rate": 5.450329994588089e-05, "loss": 0.3587, "step": 366500 }, { "epoch": 0.22, "learning_rate": 5.450119998032032e-05, "loss": 0.3673, "step": 367000 }, { "epoch": 0.22, "learning_rate": 5.4499100014759754e-05, "loss": 0.3589, "step": 367500 }, { "epoch": 0.22, "learning_rate": 5.449700004919919e-05, "loss": 0.3619, "step": 368000 }, { "epoch": 0.22, "learning_rate": 5.449490008363863e-05, "loss": 0.3616, "step": 368500 }, { "epoch": 0.22, "learning_rate": 5.449280851794031e-05, "loss": 0.3582, "step": 369000 }, { "epoch": 0.22, "learning_rate": 5.449070855237974e-05, "loss": 0.3641, "step": 369500 }, { "epoch": 0.22, "learning_rate": 5.4488608586819175e-05, "loss": 0.3615, "step": 370000 }, { "epoch": 0.22, "learning_rate": 5.448651282118973e-05, "loss": 0.3647, "step": 370500 }, { "epoch": 0.22, "learning_rate": 5.448441285562917e-05, "loss": 0.356, "step": 371000 }, { "epoch": 0.22, "learning_rate": 5.44823128900686e-05, "loss": 0.354, "step": 371500 }, { "epoch": 0.22, "learning_rate": 5.4480212924508036e-05, "loss": 0.3575, "step": 372000 }, { "epoch": 0.22, "learning_rate": 5.4478112958947476e-05, "loss": 0.3749, "step": 372500 }, { "epoch": 0.22, "learning_rate": 5.447601719331803e-05, "loss": 0.3622, "step": 373000 }, { "epoch": 0.22, "learning_rate": 5.447391722775746e-05, "loss": 0.3557, "step": 373500 }, { "epoch": 0.22, "learning_rate": 5.4471817262196896e-05, "loss": 0.3646, "step": 374000 }, { "epoch": 0.22, "learning_rate": 5.4469717296636337e-05, "loss": 0.3649, "step": 374500 }, { "epoch": 0.22, "learning_rate": 5.446761733107577e-05, "loss": 0.356, "step": 375000 }, { "epoch": 0.23, "learning_rate": 5.4465517365515203e-05, "loss": 0.3658, "step": 375500 }, { "epoch": 0.23, "learning_rate": 5.4463417399954644e-05, "loss": 0.3608, "step": 376000 }, { "epoch": 0.23, "learning_rate": 5.446131743439407e-05, "loss": 0.3575, "step": 376500 }, { "epoch": 0.23, "learning_rate": 5.445922166876463e-05, "loss": 0.3704, "step": 377000 }, { "epoch": 0.23, "learning_rate": 5.445712170320407e-05, "loss": 0.3624, "step": 377500 }, { "epoch": 0.23, "learning_rate": 5.4455021737643504e-05, "loss": 0.3571, "step": 378000 }, { "epoch": 0.23, "learning_rate": 5.445292177208294e-05, "loss": 0.36, "step": 378500 }, { "epoch": 0.23, "learning_rate": 5.445082180652237e-05, "loss": 0.3559, "step": 379000 }, { "epoch": 0.23, "learning_rate": 5.4448721840961805e-05, "loss": 0.3652, "step": 379500 }, { "epoch": 0.23, "learning_rate": 5.4446626075332365e-05, "loss": 0.3612, "step": 380000 }, { "epoch": 0.23, "learning_rate": 5.44445261097718e-05, "loss": 0.3486, "step": 380500 }, { "epoch": 0.23, "learning_rate": 5.444243034414235e-05, "loss": 0.3596, "step": 381000 }, { "epoch": 0.23, "learning_rate": 5.444033037858179e-05, "loss": 0.3626, "step": 381500 }, { "epoch": 0.23, "learning_rate": 5.4438234612952346e-05, "loss": 0.3498, "step": 382000 }, { "epoch": 0.23, "learning_rate": 5.443613464739178e-05, "loss": 0.3715, "step": 382500 }, { "epoch": 0.23, "learning_rate": 5.443403468183122e-05, "loss": 0.3568, "step": 383000 }, { "epoch": 0.23, "learning_rate": 5.443193471627065e-05, "loss": 0.3632, "step": 383500 }, { "epoch": 0.23, "learning_rate": 5.442983475071009e-05, "loss": 0.3594, "step": 384000 }, { "epoch": 0.23, "learning_rate": 5.442773478514953e-05, "loss": 0.3655, "step": 384500 }, { "epoch": 0.23, "learning_rate": 5.442563481958896e-05, "loss": 0.3589, "step": 385000 }, { "epoch": 0.23, "learning_rate": 5.4423534854028394e-05, "loss": 0.3531, "step": 385500 }, { "epoch": 0.23, "learning_rate": 5.442143488846783e-05, "loss": 0.3503, "step": 386000 }, { "epoch": 0.23, "learning_rate": 5.441933492290726e-05, "loss": 0.3562, "step": 386500 }, { "epoch": 0.23, "learning_rate": 5.441723915727782e-05, "loss": 0.3564, "step": 387000 }, { "epoch": 0.23, "learning_rate": 5.4415139191717254e-05, "loss": 0.3556, "step": 387500 }, { "epoch": 0.23, "learning_rate": 5.4413039226156695e-05, "loss": 0.3569, "step": 388000 }, { "epoch": 0.23, "learning_rate": 5.441093926059612e-05, "loss": 0.3555, "step": 388500 }, { "epoch": 0.23, "learning_rate": 5.4408839295035555e-05, "loss": 0.3543, "step": 389000 }, { "epoch": 0.23, "learning_rate": 5.4406739329474995e-05, "loss": 0.3523, "step": 389500 }, { "epoch": 0.23, "learning_rate": 5.440463936391443e-05, "loss": 0.3565, "step": 390000 }, { "epoch": 0.23, "learning_rate": 5.440253939835387e-05, "loss": 0.3564, "step": 390500 }, { "epoch": 0.23, "learning_rate": 5.440044363272442e-05, "loss": 0.355, "step": 391000 }, { "epoch": 0.23, "learning_rate": 5.4398343667163856e-05, "loss": 0.358, "step": 391500 }, { "epoch": 0.24, "learning_rate": 5.439624370160329e-05, "loss": 0.3632, "step": 392000 }, { "epoch": 0.24, "learning_rate": 5.439414373604273e-05, "loss": 0.3554, "step": 392500 }, { "epoch": 0.24, "learning_rate": 5.439204377048216e-05, "loss": 0.3567, "step": 393000 }, { "epoch": 0.24, "learning_rate": 5.4389943804921596e-05, "loss": 0.3581, "step": 393500 }, { "epoch": 0.24, "learning_rate": 5.438784383936104e-05, "loss": 0.3623, "step": 394000 }, { "epoch": 0.24, "learning_rate": 5.438574387380047e-05, "loss": 0.357, "step": 394500 }, { "epoch": 0.24, "learning_rate": 5.4383643908239904e-05, "loss": 0.3534, "step": 395000 }, { "epoch": 0.24, "learning_rate": 5.4381543942679344e-05, "loss": 0.3556, "step": 395500 }, { "epoch": 0.24, "learning_rate": 5.437944397711878e-05, "loss": 0.3583, "step": 396000 }, { "epoch": 0.24, "learning_rate": 5.4377344011558204e-05, "loss": 0.3553, "step": 396500 }, { "epoch": 0.24, "learning_rate": 5.4375248245928764e-05, "loss": 0.3625, "step": 397000 }, { "epoch": 0.24, "learning_rate": 5.4373148280368205e-05, "loss": 0.3607, "step": 397500 }, { "epoch": 0.24, "learning_rate": 5.437104831480764e-05, "loss": 0.3545, "step": 398000 }, { "epoch": 0.24, "learning_rate": 5.436894834924707e-05, "loss": 0.3546, "step": 398500 }, { "epoch": 0.24, "learning_rate": 5.4366848383686505e-05, "loss": 0.3554, "step": 399000 }, { "epoch": 0.24, "learning_rate": 5.4364752618057065e-05, "loss": 0.3625, "step": 399500 }, { "epoch": 0.24, "learning_rate": 5.436265685242762e-05, "loss": 0.364, "step": 400000 }, { "epoch": 0.24, "eval_loss": 0.31898367404937744, "eval_runtime": 1466.0508, "eval_samples_per_second": 359.278, "eval_steps_per_second": 59.88, "step": 400000 }, { "epoch": 0.24, "learning_rate": 5.436055688686705e-05, "loss": 0.3497, "step": 400500 }, { "epoch": 0.24, "learning_rate": 5.435845692130649e-05, "loss": 0.3553, "step": 401000 }, { "epoch": 0.24, "learning_rate": 5.4356356955745926e-05, "loss": 0.3582, "step": 401500 }, { "epoch": 0.24, "learning_rate": 5.435425699018536e-05, "loss": 0.3496, "step": 402000 }, { "epoch": 0.24, "learning_rate": 5.43521570246248e-05, "loss": 0.3493, "step": 402500 }, { "epoch": 0.24, "learning_rate": 5.435005705906423e-05, "loss": 0.3627, "step": 403000 }, { "epoch": 0.24, "learning_rate": 5.434795709350366e-05, "loss": 0.3577, "step": 403500 }, { "epoch": 0.24, "learning_rate": 5.43458571279431e-05, "loss": 0.3596, "step": 404000 }, { "epoch": 0.24, "learning_rate": 5.4343757162382534e-05, "loss": 0.3509, "step": 404500 }, { "epoch": 0.24, "learning_rate": 5.434165719682197e-05, "loss": 0.3561, "step": 405000 }, { "epoch": 0.24, "learning_rate": 5.433956143119253e-05, "loss": 0.3629, "step": 405500 }, { "epoch": 0.24, "learning_rate": 5.433746146563196e-05, "loss": 0.3584, "step": 406000 }, { "epoch": 0.24, "learning_rate": 5.4335361500071394e-05, "loss": 0.3595, "step": 406500 }, { "epoch": 0.24, "learning_rate": 5.4333265734441955e-05, "loss": 0.3509, "step": 407000 }, { "epoch": 0.24, "learning_rate": 5.4331165768881395e-05, "loss": 0.3509, "step": 407500 }, { "epoch": 0.24, "learning_rate": 5.432906580332083e-05, "loss": 0.3563, "step": 408000 }, { "epoch": 0.24, "learning_rate": 5.4326965837760255e-05, "loss": 0.3517, "step": 408500 }, { "epoch": 0.25, "learning_rate": 5.4324870072130815e-05, "loss": 0.3583, "step": 409000 }, { "epoch": 0.25, "learning_rate": 5.4322770106570256e-05, "loss": 0.3623, "step": 409500 }, { "epoch": 0.25, "learning_rate": 5.432067014100969e-05, "loss": 0.356, "step": 410000 }, { "epoch": 0.25, "learning_rate": 5.431857017544912e-05, "loss": 0.3522, "step": 410500 }, { "epoch": 0.25, "learning_rate": 5.4316470209888556e-05, "loss": 0.3565, "step": 411000 }, { "epoch": 0.25, "learning_rate": 5.431437024432799e-05, "loss": 0.3629, "step": 411500 }, { "epoch": 0.25, "learning_rate": 5.431227027876742e-05, "loss": 0.3601, "step": 412000 }, { "epoch": 0.25, "learning_rate": 5.431017031320686e-05, "loss": 0.3565, "step": 412500 }, { "epoch": 0.25, "learning_rate": 5.4308070347646297e-05, "loss": 0.3532, "step": 413000 }, { "epoch": 0.25, "learning_rate": 5.430597038208573e-05, "loss": 0.356, "step": 413500 }, { "epoch": 0.25, "learning_rate": 5.430387041652517e-05, "loss": 0.3574, "step": 414000 }, { "epoch": 0.25, "learning_rate": 5.4301770450964604e-05, "loss": 0.3574, "step": 414500 }, { "epoch": 0.25, "learning_rate": 5.429967048540404e-05, "loss": 0.3529, "step": 415000 }, { "epoch": 0.25, "learning_rate": 5.42975747197746e-05, "loss": 0.3621, "step": 415500 }, { "epoch": 0.25, "learning_rate": 5.429547475421403e-05, "loss": 0.348, "step": 416000 }, { "epoch": 0.25, "learning_rate": 5.4293374788653464e-05, "loss": 0.3517, "step": 416500 }, { "epoch": 0.25, "learning_rate": 5.4291274823092905e-05, "loss": 0.3523, "step": 417000 }, { "epoch": 0.25, "learning_rate": 5.428917485753234e-05, "loss": 0.3594, "step": 417500 }, { "epoch": 0.25, "learning_rate": 5.428707489197177e-05, "loss": 0.3508, "step": 418000 }, { "epoch": 0.25, "learning_rate": 5.4284974926411205e-05, "loss": 0.3483, "step": 418500 }, { "epoch": 0.25, "learning_rate": 5.428287496085064e-05, "loss": 0.3598, "step": 419000 }, { "epoch": 0.25, "learning_rate": 5.428077499529007e-05, "loss": 0.3486, "step": 419500 }, { "epoch": 0.25, "learning_rate": 5.427867922966063e-05, "loss": 0.3598, "step": 420000 }, { "epoch": 0.25, "learning_rate": 5.427657926410007e-05, "loss": 0.3594, "step": 420500 }, { "epoch": 0.25, "learning_rate": 5.42744792985395e-05, "loss": 0.358, "step": 421000 }, { "epoch": 0.25, "learning_rate": 5.427237933297893e-05, "loss": 0.3543, "step": 421500 }, { "epoch": 0.25, "learning_rate": 5.427028356734949e-05, "loss": 0.3528, "step": 422000 }, { "epoch": 0.25, "learning_rate": 5.4268187801720053e-05, "loss": 0.3497, "step": 422500 }, { "epoch": 0.25, "learning_rate": 5.426608783615949e-05, "loss": 0.3595, "step": 423000 }, { "epoch": 0.25, "learning_rate": 5.426398787059892e-05, "loss": 0.3567, "step": 423500 }, { "epoch": 0.25, "learning_rate": 5.426188790503836e-05, "loss": 0.3518, "step": 424000 }, { "epoch": 0.25, "learning_rate": 5.4259787939477794e-05, "loss": 0.35, "step": 424500 }, { "epoch": 0.25, "learning_rate": 5.425768797391723e-05, "loss": 0.3449, "step": 425000 }, { "epoch": 0.26, "learning_rate": 5.425558800835667e-05, "loss": 0.3556, "step": 425500 }, { "epoch": 0.26, "learning_rate": 5.4253488042796094e-05, "loss": 0.3536, "step": 426000 }, { "epoch": 0.26, "learning_rate": 5.4251392277166655e-05, "loss": 0.3491, "step": 426500 }, { "epoch": 0.26, "learning_rate": 5.424929231160609e-05, "loss": 0.3608, "step": 427000 }, { "epoch": 0.26, "learning_rate": 5.424719654597664e-05, "loss": 0.3584, "step": 427500 }, { "epoch": 0.26, "learning_rate": 5.424509658041608e-05, "loss": 0.3547, "step": 428000 }, { "epoch": 0.26, "learning_rate": 5.4242996614855515e-05, "loss": 0.3474, "step": 428500 }, { "epoch": 0.26, "learning_rate": 5.424089664929495e-05, "loss": 0.3491, "step": 429000 }, { "epoch": 0.26, "learning_rate": 5.423879668373439e-05, "loss": 0.3552, "step": 429500 }, { "epoch": 0.26, "learning_rate": 5.423669671817382e-05, "loss": 0.3483, "step": 430000 }, { "epoch": 0.26, "learning_rate": 5.4234596752613256e-05, "loss": 0.3523, "step": 430500 }, { "epoch": 0.26, "learning_rate": 5.423249678705269e-05, "loss": 0.3489, "step": 431000 }, { "epoch": 0.26, "learning_rate": 5.423039682149212e-05, "loss": 0.3491, "step": 431500 }, { "epoch": 0.26, "learning_rate": 5.422830105586268e-05, "loss": 0.3508, "step": 432000 }, { "epoch": 0.26, "learning_rate": 5.4226201090302124e-05, "loss": 0.348, "step": 432500 }, { "epoch": 0.26, "learning_rate": 5.422410532467268e-05, "loss": 0.3478, "step": 433000 }, { "epoch": 0.26, "learning_rate": 5.422200535911211e-05, "loss": 0.3565, "step": 433500 }, { "epoch": 0.26, "learning_rate": 5.4219905393551544e-05, "loss": 0.3568, "step": 434000 }, { "epoch": 0.26, "learning_rate": 5.4217805427990984e-05, "loss": 0.3507, "step": 434500 }, { "epoch": 0.26, "learning_rate": 5.421570546243042e-05, "loss": 0.3543, "step": 435000 }, { "epoch": 0.26, "learning_rate": 5.421360969680097e-05, "loss": 0.3492, "step": 435500 }, { "epoch": 0.26, "learning_rate": 5.421150973124041e-05, "loss": 0.3478, "step": 436000 }, { "epoch": 0.26, "learning_rate": 5.4209409765679845e-05, "loss": 0.3625, "step": 436500 }, { "epoch": 0.26, "learning_rate": 5.420730980011928e-05, "loss": 0.3557, "step": 437000 }, { "epoch": 0.26, "learning_rate": 5.420521403448983e-05, "loss": 0.3535, "step": 437500 }, { "epoch": 0.26, "learning_rate": 5.420311406892927e-05, "loss": 0.3502, "step": 438000 }, { "epoch": 0.26, "learning_rate": 5.4201014103368706e-05, "loss": 0.3534, "step": 438500 }, { "epoch": 0.26, "learning_rate": 5.419891413780814e-05, "loss": 0.3572, "step": 439000 }, { "epoch": 0.26, "learning_rate": 5.419681417224758e-05, "loss": 0.3513, "step": 439500 }, { "epoch": 0.26, "learning_rate": 5.419471840661813e-05, "loss": 0.3471, "step": 440000 }, { "epoch": 0.26, "learning_rate": 5.4192618441057567e-05, "loss": 0.3487, "step": 440500 }, { "epoch": 0.26, "learning_rate": 5.4190518475497e-05, "loss": 0.3612, "step": 441000 }, { "epoch": 0.26, "learning_rate": 5.418841850993644e-05, "loss": 0.3537, "step": 441500 }, { "epoch": 0.26, "learning_rate": 5.4186318544375874e-05, "loss": 0.3503, "step": 442000 }, { "epoch": 0.27, "learning_rate": 5.418421857881531e-05, "loss": 0.3592, "step": 442500 }, { "epoch": 0.27, "learning_rate": 5.418211861325474e-05, "loss": 0.3494, "step": 443000 }, { "epoch": 0.27, "learning_rate": 5.4180018647694174e-05, "loss": 0.3446, "step": 443500 }, { "epoch": 0.27, "learning_rate": 5.4177918682133614e-05, "loss": 0.3485, "step": 444000 }, { "epoch": 0.27, "learning_rate": 5.4175822916504175e-05, "loss": 0.3562, "step": 444500 }, { "epoch": 0.27, "learning_rate": 5.41737229509436e-05, "loss": 0.3433, "step": 445000 }, { "epoch": 0.27, "learning_rate": 5.4171622985383035e-05, "loss": 0.3502, "step": 445500 }, { "epoch": 0.27, "learning_rate": 5.4169523019822475e-05, "loss": 0.3526, "step": 446000 }, { "epoch": 0.27, "learning_rate": 5.4167427254193035e-05, "loss": 0.3463, "step": 446500 }, { "epoch": 0.27, "learning_rate": 5.416532728863247e-05, "loss": 0.3475, "step": 447000 }, { "epoch": 0.27, "learning_rate": 5.4163227323071896e-05, "loss": 0.3464, "step": 447500 }, { "epoch": 0.27, "learning_rate": 5.4161131557442456e-05, "loss": 0.3536, "step": 448000 }, { "epoch": 0.27, "learning_rate": 5.4159031591881896e-05, "loss": 0.3517, "step": 448500 }, { "epoch": 0.27, "learning_rate": 5.415693162632133e-05, "loss": 0.3493, "step": 449000 }, { "epoch": 0.27, "learning_rate": 5.415483166076076e-05, "loss": 0.3502, "step": 449500 }, { "epoch": 0.27, "learning_rate": 5.4152731695200196e-05, "loss": 0.345, "step": 450000 }, { "epoch": 0.27, "learning_rate": 5.415063172963963e-05, "loss": 0.3545, "step": 450500 }, { "epoch": 0.27, "learning_rate": 5.414853176407907e-05, "loss": 0.3547, "step": 451000 }, { "epoch": 0.27, "learning_rate": 5.4146431798518504e-05, "loss": 0.3523, "step": 451500 }, { "epoch": 0.27, "learning_rate": 5.414433183295794e-05, "loss": 0.3562, "step": 452000 }, { "epoch": 0.27, "learning_rate": 5.414223606732849e-05, "loss": 0.3474, "step": 452500 }, { "epoch": 0.27, "learning_rate": 5.414013610176793e-05, "loss": 0.3541, "step": 453000 }, { "epoch": 0.27, "learning_rate": 5.413804033613849e-05, "loss": 0.349, "step": 453500 }, { "epoch": 0.27, "learning_rate": 5.4135940370577925e-05, "loss": 0.3521, "step": 454000 }, { "epoch": 0.27, "learning_rate": 5.413384040501735e-05, "loss": 0.3545, "step": 454500 }, { "epoch": 0.27, "learning_rate": 5.413174043945679e-05, "loss": 0.3503, "step": 455000 }, { "epoch": 0.27, "learning_rate": 5.4129640473896225e-05, "loss": 0.3522, "step": 455500 }, { "epoch": 0.27, "learning_rate": 5.412754050833566e-05, "loss": 0.3523, "step": 456000 }, { "epoch": 0.27, "learning_rate": 5.41254405427751e-05, "loss": 0.3512, "step": 456500 }, { "epoch": 0.27, "learning_rate": 5.412334057721453e-05, "loss": 0.3523, "step": 457000 }, { "epoch": 0.27, "learning_rate": 5.4121244811585086e-05, "loss": 0.3544, "step": 457500 }, { "epoch": 0.27, "learning_rate": 5.4119144846024526e-05, "loss": 0.3506, "step": 458000 }, { "epoch": 0.27, "learning_rate": 5.4117049080395086e-05, "loss": 0.3506, "step": 458500 }, { "epoch": 0.28, "learning_rate": 5.411494911483451e-05, "loss": 0.3468, "step": 459000 }, { "epoch": 0.28, "learning_rate": 5.4112849149273947e-05, "loss": 0.3465, "step": 459500 }, { "epoch": 0.28, "learning_rate": 5.411074918371339e-05, "loss": 0.3462, "step": 460000 }, { "epoch": 0.28, "learning_rate": 5.410864921815282e-05, "loss": 0.3436, "step": 460500 }, { "epoch": 0.28, "learning_rate": 5.4106549252592254e-05, "loss": 0.3466, "step": 461000 }, { "epoch": 0.28, "learning_rate": 5.4104449287031694e-05, "loss": 0.3527, "step": 461500 }, { "epoch": 0.28, "learning_rate": 5.410234932147113e-05, "loss": 0.3473, "step": 462000 }, { "epoch": 0.28, "learning_rate": 5.410025355584168e-05, "loss": 0.3535, "step": 462500 }, { "epoch": 0.28, "learning_rate": 5.4098153590281114e-05, "loss": 0.3463, "step": 463000 }, { "epoch": 0.28, "learning_rate": 5.4096053624720555e-05, "loss": 0.3507, "step": 463500 }, { "epoch": 0.28, "learning_rate": 5.409395365915999e-05, "loss": 0.3453, "step": 464000 }, { "epoch": 0.28, "learning_rate": 5.409185369359942e-05, "loss": 0.3498, "step": 464500 }, { "epoch": 0.28, "learning_rate": 5.408975372803886e-05, "loss": 0.3447, "step": 465000 }, { "epoch": 0.28, "learning_rate": 5.4087657962409415e-05, "loss": 0.3464, "step": 465500 }, { "epoch": 0.28, "learning_rate": 5.408555799684885e-05, "loss": 0.3474, "step": 466000 }, { "epoch": 0.28, "learning_rate": 5.408345803128829e-05, "loss": 0.348, "step": 466500 }, { "epoch": 0.28, "learning_rate": 5.408136226565884e-05, "loss": 0.349, "step": 467000 }, { "epoch": 0.28, "learning_rate": 5.4079262300098276e-05, "loss": 0.344, "step": 467500 }, { "epoch": 0.28, "learning_rate": 5.407716233453771e-05, "loss": 0.3418, "step": 468000 }, { "epoch": 0.28, "learning_rate": 5.407506236897715e-05, "loss": 0.3586, "step": 468500 }, { "epoch": 0.28, "learning_rate": 5.407296240341658e-05, "loss": 0.3443, "step": 469000 }, { "epoch": 0.28, "learning_rate": 5.407086243785602e-05, "loss": 0.35, "step": 469500 }, { "epoch": 0.28, "learning_rate": 5.406876247229546e-05, "loss": 0.345, "step": 470000 }, { "epoch": 0.28, "learning_rate": 5.406666250673489e-05, "loss": 0.3478, "step": 470500 }, { "epoch": 0.28, "learning_rate": 5.4064562541174324e-05, "loss": 0.3472, "step": 471000 }, { "epoch": 0.28, "learning_rate": 5.4062462575613764e-05, "loss": 0.347, "step": 471500 }, { "epoch": 0.28, "learning_rate": 5.406036261005319e-05, "loss": 0.3518, "step": 472000 }, { "epoch": 0.28, "learning_rate": 5.4058262644492624e-05, "loss": 0.3423, "step": 472500 }, { "epoch": 0.28, "learning_rate": 5.4056166878863185e-05, "loss": 0.352, "step": 473000 }, { "epoch": 0.28, "learning_rate": 5.4054066913302625e-05, "loss": 0.3452, "step": 473500 }, { "epoch": 0.28, "learning_rate": 5.405196694774206e-05, "loss": 0.3427, "step": 474000 }, { "epoch": 0.28, "learning_rate": 5.404986698218149e-05, "loss": 0.3433, "step": 474500 }, { "epoch": 0.28, "learning_rate": 5.4047767016620925e-05, "loss": 0.3472, "step": 475000 }, { "epoch": 0.29, "learning_rate": 5.404566705106036e-05, "loss": 0.3435, "step": 475500 }, { "epoch": 0.29, "learning_rate": 5.404357128543092e-05, "loss": 0.35, "step": 476000 }, { "epoch": 0.29, "learning_rate": 5.404147131987035e-05, "loss": 0.3495, "step": 476500 }, { "epoch": 0.29, "learning_rate": 5.4039371354309786e-05, "loss": 0.3529, "step": 477000 }, { "epoch": 0.29, "learning_rate": 5.403727138874922e-05, "loss": 0.3486, "step": 477500 }, { "epoch": 0.29, "learning_rate": 5.403517142318866e-05, "loss": 0.3381, "step": 478000 }, { "epoch": 0.29, "learning_rate": 5.403307145762809e-05, "loss": 0.3462, "step": 478500 }, { "epoch": 0.29, "learning_rate": 5.4030971492067527e-05, "loss": 0.3507, "step": 479000 }, { "epoch": 0.29, "learning_rate": 5.402887152650697e-05, "loss": 0.3463, "step": 479500 }, { "epoch": 0.29, "learning_rate": 5.40267715609464e-05, "loss": 0.3469, "step": 480000 }, { "epoch": 0.29, "learning_rate": 5.4024675795316954e-05, "loss": 0.3508, "step": 480500 }, { "epoch": 0.29, "learning_rate": 5.4022580029687514e-05, "loss": 0.3426, "step": 481000 }, { "epoch": 0.29, "learning_rate": 5.402048006412695e-05, "loss": 0.3445, "step": 481500 }, { "epoch": 0.29, "learning_rate": 5.401838009856638e-05, "loss": 0.3404, "step": 482000 }, { "epoch": 0.29, "learning_rate": 5.4016280133005815e-05, "loss": 0.3503, "step": 482500 }, { "epoch": 0.29, "learning_rate": 5.4014184367376375e-05, "loss": 0.3468, "step": 483000 }, { "epoch": 0.29, "learning_rate": 5.4012084401815815e-05, "loss": 0.3455, "step": 483500 }, { "epoch": 0.29, "learning_rate": 5.400998443625524e-05, "loss": 0.3447, "step": 484000 }, { "epoch": 0.29, "learning_rate": 5.4007884470694675e-05, "loss": 0.3442, "step": 484500 }, { "epoch": 0.29, "learning_rate": 5.4005788705065236e-05, "loss": 0.3501, "step": 485000 }, { "epoch": 0.29, "learning_rate": 5.4003688739504676e-05, "loss": 0.3419, "step": 485500 }, { "epoch": 0.29, "learning_rate": 5.40015887739441e-05, "loss": 0.3401, "step": 486000 }, { "epoch": 0.29, "learning_rate": 5.3999488808383536e-05, "loss": 0.3468, "step": 486500 }, { "epoch": 0.29, "learning_rate": 5.3997388842822976e-05, "loss": 0.346, "step": 487000 }, { "epoch": 0.29, "learning_rate": 5.399528887726241e-05, "loss": 0.3479, "step": 487500 }, { "epoch": 0.29, "learning_rate": 5.399318891170184e-05, "loss": 0.3379, "step": 488000 }, { "epoch": 0.29, "learning_rate": 5.3991088946141283e-05, "loss": 0.3453, "step": 488500 }, { "epoch": 0.29, "learning_rate": 5.398899318051184e-05, "loss": 0.348, "step": 489000 }, { "epoch": 0.29, "learning_rate": 5.398689321495127e-05, "loss": 0.3451, "step": 489500 }, { "epoch": 0.29, "learning_rate": 5.398479324939071e-05, "loss": 0.3372, "step": 490000 }, { "epoch": 0.29, "learning_rate": 5.3982693283830144e-05, "loss": 0.3503, "step": 490500 }, { "epoch": 0.29, "learning_rate": 5.39805975182007e-05, "loss": 0.3513, "step": 491000 }, { "epoch": 0.29, "learning_rate": 5.397850175257126e-05, "loss": 0.3357, "step": 491500 }, { "epoch": 0.29, "learning_rate": 5.397640178701069e-05, "loss": 0.3456, "step": 492000 }, { "epoch": 0.3, "learning_rate": 5.397430182145013e-05, "loss": 0.3508, "step": 492500 }, { "epoch": 0.3, "learning_rate": 5.3972201855889565e-05, "loss": 0.3477, "step": 493000 }, { "epoch": 0.3, "learning_rate": 5.397010189032899e-05, "loss": 0.3526, "step": 493500 }, { "epoch": 0.3, "learning_rate": 5.396800192476843e-05, "loss": 0.3409, "step": 494000 }, { "epoch": 0.3, "learning_rate": 5.3965901959207866e-05, "loss": 0.3465, "step": 494500 }, { "epoch": 0.3, "learning_rate": 5.3963801993647306e-05, "loss": 0.341, "step": 495000 }, { "epoch": 0.3, "learning_rate": 5.396170622801786e-05, "loss": 0.3488, "step": 495500 }, { "epoch": 0.3, "learning_rate": 5.395960626245729e-05, "loss": 0.3548, "step": 496000 }, { "epoch": 0.3, "learning_rate": 5.3957506296896726e-05, "loss": 0.3409, "step": 496500 }, { "epoch": 0.3, "learning_rate": 5.3955406331336167e-05, "loss": 0.3457, "step": 497000 }, { "epoch": 0.3, "learning_rate": 5.39533063657756e-05, "loss": 0.3428, "step": 497500 }, { "epoch": 0.3, "learning_rate": 5.3951206400215033e-05, "loss": 0.3547, "step": 498000 }, { "epoch": 0.3, "learning_rate": 5.3949106434654474e-05, "loss": 0.344, "step": 498500 }, { "epoch": 0.3, "learning_rate": 5.394700646909391e-05, "loss": 0.3415, "step": 499000 }, { "epoch": 0.3, "learning_rate": 5.394491070346446e-05, "loss": 0.3391, "step": 499500 }, { "epoch": 0.3, "learning_rate": 5.3942810737903894e-05, "loss": 0.3393, "step": 500000 }, { "epoch": 0.3, "eval_loss": 0.31016305088996887, "eval_runtime": 1464.0628, "eval_samples_per_second": 359.766, "eval_steps_per_second": 59.961, "step": 500000 }, { "epoch": 0.3, "learning_rate": 5.3940710772343334e-05, "loss": 0.3476, "step": 500500 }, { "epoch": 0.3, "learning_rate": 5.393861080678277e-05, "loss": 0.344, "step": 501000 }, { "epoch": 0.3, "learning_rate": 5.393651504115332e-05, "loss": 0.3406, "step": 501500 }, { "epoch": 0.3, "learning_rate": 5.393441507559276e-05, "loss": 0.3436, "step": 502000 }, { "epoch": 0.3, "learning_rate": 5.3932315110032195e-05, "loss": 0.3441, "step": 502500 }, { "epoch": 0.3, "learning_rate": 5.393021514447163e-05, "loss": 0.3473, "step": 503000 }, { "epoch": 0.3, "learning_rate": 5.392811517891107e-05, "loss": 0.3426, "step": 503500 }, { "epoch": 0.3, "learning_rate": 5.392602361321274e-05, "loss": 0.3406, "step": 504000 }, { "epoch": 0.3, "learning_rate": 5.392392364765218e-05, "loss": 0.3442, "step": 504500 }, { "epoch": 0.3, "learning_rate": 5.3921823682091616e-05, "loss": 0.3489, "step": 505000 }, { "epoch": 0.3, "learning_rate": 5.391972791646217e-05, "loss": 0.3446, "step": 505500 }, { "epoch": 0.3, "learning_rate": 5.39176279509016e-05, "loss": 0.3443, "step": 506000 }, { "epoch": 0.3, "learning_rate": 5.3915527985341043e-05, "loss": 0.3462, "step": 506500 }, { "epoch": 0.3, "learning_rate": 5.391342801978048e-05, "loss": 0.3441, "step": 507000 }, { "epoch": 0.3, "learning_rate": 5.3911328054219904e-05, "loss": 0.3474, "step": 507500 }, { "epoch": 0.3, "learning_rate": 5.3909228088659344e-05, "loss": 0.3399, "step": 508000 }, { "epoch": 0.3, "learning_rate": 5.390712812309878e-05, "loss": 0.3452, "step": 508500 }, { "epoch": 0.31, "learning_rate": 5.390502815753822e-05, "loss": 0.3486, "step": 509000 }, { "epoch": 0.31, "learning_rate": 5.390293239190878e-05, "loss": 0.3441, "step": 509500 }, { "epoch": 0.31, "learning_rate": 5.3900832426348205e-05, "loss": 0.3524, "step": 510000 }, { "epoch": 0.31, "learning_rate": 5.389873246078764e-05, "loss": 0.347, "step": 510500 }, { "epoch": 0.31, "learning_rate": 5.389663249522708e-05, "loss": 0.3465, "step": 511000 }, { "epoch": 0.31, "learning_rate": 5.389453252966651e-05, "loss": 0.3455, "step": 511500 }, { "epoch": 0.31, "learning_rate": 5.3892432564105945e-05, "loss": 0.3445, "step": 512000 }, { "epoch": 0.31, "learning_rate": 5.3890332598545385e-05, "loss": 0.3451, "step": 512500 }, { "epoch": 0.31, "learning_rate": 5.388823263298482e-05, "loss": 0.3411, "step": 513000 }, { "epoch": 0.31, "learning_rate": 5.388613686735537e-05, "loss": 0.3462, "step": 513500 }, { "epoch": 0.31, "learning_rate": 5.388404110172593e-05, "loss": 0.3414, "step": 514000 }, { "epoch": 0.31, "learning_rate": 5.3881941136165366e-05, "loss": 0.3442, "step": 514500 }, { "epoch": 0.31, "learning_rate": 5.38798411706048e-05, "loss": 0.3457, "step": 515000 }, { "epoch": 0.31, "learning_rate": 5.387774120504423e-05, "loss": 0.3445, "step": 515500 }, { "epoch": 0.31, "learning_rate": 5.3875641239483673e-05, "loss": 0.347, "step": 516000 }, { "epoch": 0.31, "learning_rate": 5.387354127392311e-05, "loss": 0.3434, "step": 516500 }, { "epoch": 0.31, "learning_rate": 5.387144130836254e-05, "loss": 0.3344, "step": 517000 }, { "epoch": 0.31, "learning_rate": 5.386934134280198e-05, "loss": 0.3484, "step": 517500 }, { "epoch": 0.31, "learning_rate": 5.3867241377241414e-05, "loss": 0.3452, "step": 518000 }, { "epoch": 0.31, "learning_rate": 5.386514561161197e-05, "loss": 0.3443, "step": 518500 }, { "epoch": 0.31, "learning_rate": 5.38630456460514e-05, "loss": 0.3428, "step": 519000 }, { "epoch": 0.31, "learning_rate": 5.386094568049084e-05, "loss": 0.3349, "step": 519500 }, { "epoch": 0.31, "learning_rate": 5.3858845714930275e-05, "loss": 0.3314, "step": 520000 }, { "epoch": 0.31, "learning_rate": 5.385674574936971e-05, "loss": 0.3411, "step": 520500 }, { "epoch": 0.31, "learning_rate": 5.385464578380915e-05, "loss": 0.3522, "step": 521000 }, { "epoch": 0.31, "learning_rate": 5.38525500181797e-05, "loss": 0.3409, "step": 521500 }, { "epoch": 0.31, "learning_rate": 5.3850454252550256e-05, "loss": 0.3401, "step": 522000 }, { "epoch": 0.31, "learning_rate": 5.384835428698969e-05, "loss": 0.3385, "step": 522500 }, { "epoch": 0.31, "learning_rate": 5.384625432142913e-05, "loss": 0.3392, "step": 523000 }, { "epoch": 0.31, "learning_rate": 5.384415435586856e-05, "loss": 0.3432, "step": 523500 }, { "epoch": 0.31, "learning_rate": 5.3842054390307996e-05, "loss": 0.3456, "step": 524000 }, { "epoch": 0.31, "learning_rate": 5.3839954424747436e-05, "loss": 0.3407, "step": 524500 }, { "epoch": 0.31, "learning_rate": 5.383785445918687e-05, "loss": 0.3427, "step": 525000 }, { "epoch": 0.32, "learning_rate": 5.3835758693557424e-05, "loss": 0.3549, "step": 525500 }, { "epoch": 0.32, "learning_rate": 5.383365872799686e-05, "loss": 0.346, "step": 526000 }, { "epoch": 0.32, "learning_rate": 5.38315587624363e-05, "loss": 0.3425, "step": 526500 }, { "epoch": 0.32, "learning_rate": 5.382945879687573e-05, "loss": 0.3371, "step": 527000 }, { "epoch": 0.32, "learning_rate": 5.3827358831315164e-05, "loss": 0.3431, "step": 527500 }, { "epoch": 0.32, "learning_rate": 5.3825258865754604e-05, "loss": 0.3384, "step": 528000 }, { "epoch": 0.32, "learning_rate": 5.382315890019404e-05, "loss": 0.3465, "step": 528500 }, { "epoch": 0.32, "learning_rate": 5.382105893463347e-05, "loss": 0.348, "step": 529000 }, { "epoch": 0.32, "learning_rate": 5.381895896907291e-05, "loss": 0.3429, "step": 529500 }, { "epoch": 0.32, "learning_rate": 5.381685900351234e-05, "loss": 0.3412, "step": 530000 }, { "epoch": 0.32, "learning_rate": 5.381475903795177e-05, "loss": 0.3397, "step": 530500 }, { "epoch": 0.32, "learning_rate": 5.381265907239121e-05, "loss": 0.3356, "step": 531000 }, { "epoch": 0.32, "learning_rate": 5.3810559106830645e-05, "loss": 0.3397, "step": 531500 }, { "epoch": 0.32, "learning_rate": 5.380845914127008e-05, "loss": 0.3378, "step": 532000 }, { "epoch": 0.32, "learning_rate": 5.380636337564064e-05, "loss": 0.3428, "step": 532500 }, { "epoch": 0.32, "learning_rate": 5.38042676100112e-05, "loss": 0.346, "step": 533000 }, { "epoch": 0.32, "learning_rate": 5.380217184438175e-05, "loss": 0.3395, "step": 533500 }, { "epoch": 0.32, "learning_rate": 5.3800071878821187e-05, "loss": 0.3407, "step": 534000 }, { "epoch": 0.32, "learning_rate": 5.379797191326062e-05, "loss": 0.3414, "step": 534500 }, { "epoch": 0.32, "learning_rate": 5.379587194770006e-05, "loss": 0.3386, "step": 535000 }, { "epoch": 0.32, "learning_rate": 5.3793771982139494e-05, "loss": 0.3458, "step": 535500 }, { "epoch": 0.32, "learning_rate": 5.379167201657893e-05, "loss": 0.3365, "step": 536000 }, { "epoch": 0.32, "learning_rate": 5.378957205101837e-05, "loss": 0.3437, "step": 536500 }, { "epoch": 0.32, "learning_rate": 5.3787472085457794e-05, "loss": 0.3361, "step": 537000 }, { "epoch": 0.32, "learning_rate": 5.378537211989723e-05, "loss": 0.3445, "step": 537500 }, { "epoch": 0.32, "learning_rate": 5.378327215433667e-05, "loss": 0.3364, "step": 538000 }, { "epoch": 0.32, "learning_rate": 5.37811721887761e-05, "loss": 0.3447, "step": 538500 }, { "epoch": 0.32, "learning_rate": 5.377907642314666e-05, "loss": 0.3427, "step": 539000 }, { "epoch": 0.32, "learning_rate": 5.3776976457586095e-05, "loss": 0.3441, "step": 539500 }, { "epoch": 0.32, "learning_rate": 5.377487649202553e-05, "loss": 0.3394, "step": 540000 }, { "epoch": 0.32, "learning_rate": 5.377277652646496e-05, "loss": 0.338, "step": 540500 }, { "epoch": 0.32, "learning_rate": 5.37706765609044e-05, "loss": 0.3483, "step": 541000 }, { "epoch": 0.32, "learning_rate": 5.376858079527496e-05, "loss": 0.3408, "step": 541500 }, { "epoch": 0.32, "learning_rate": 5.376648082971439e-05, "loss": 0.3306, "step": 542000 }, { "epoch": 0.33, "learning_rate": 5.376438086415382e-05, "loss": 0.3398, "step": 542500 }, { "epoch": 0.33, "learning_rate": 5.376228089859326e-05, "loss": 0.3455, "step": 543000 }, { "epoch": 0.33, "learning_rate": 5.3760180933032696e-05, "loss": 0.3403, "step": 543500 }, { "epoch": 0.33, "learning_rate": 5.375808096747213e-05, "loss": 0.3473, "step": 544000 }, { "epoch": 0.33, "learning_rate": 5.375598100191157e-05, "loss": 0.3398, "step": 544500 }, { "epoch": 0.33, "learning_rate": 5.3753881036351004e-05, "loss": 0.3404, "step": 545000 }, { "epoch": 0.33, "learning_rate": 5.375178527072156e-05, "loss": 0.3386, "step": 545500 }, { "epoch": 0.33, "learning_rate": 5.374968530516099e-05, "loss": 0.3413, "step": 546000 }, { "epoch": 0.33, "learning_rate": 5.374758533960043e-05, "loss": 0.3399, "step": 546500 }, { "epoch": 0.33, "learning_rate": 5.3745485374039864e-05, "loss": 0.3379, "step": 547000 }, { "epoch": 0.33, "learning_rate": 5.374338960841042e-05, "loss": 0.3456, "step": 547500 }, { "epoch": 0.33, "learning_rate": 5.374129384278098e-05, "loss": 0.3391, "step": 548000 }, { "epoch": 0.33, "learning_rate": 5.373919387722042e-05, "loss": 0.3468, "step": 548500 }, { "epoch": 0.33, "learning_rate": 5.3737093911659845e-05, "loss": 0.3407, "step": 549000 }, { "epoch": 0.33, "learning_rate": 5.373499394609928e-05, "loss": 0.3426, "step": 549500 }, { "epoch": 0.33, "learning_rate": 5.373289398053872e-05, "loss": 0.3468, "step": 550000 }, { "epoch": 0.33, "learning_rate": 5.373079401497815e-05, "loss": 0.3456, "step": 550500 }, { "epoch": 0.33, "learning_rate": 5.3728694049417586e-05, "loss": 0.3349, "step": 551000 }, { "epoch": 0.33, "learning_rate": 5.3726594083857026e-05, "loss": 0.3405, "step": 551500 }, { "epoch": 0.33, "learning_rate": 5.372449411829646e-05, "loss": 0.3367, "step": 552000 }, { "epoch": 0.33, "learning_rate": 5.372239415273589e-05, "loss": 0.3322, "step": 552500 }, { "epoch": 0.33, "learning_rate": 5.372029418717533e-05, "loss": 0.3339, "step": 553000 }, { "epoch": 0.33, "learning_rate": 5.3718194221614767e-05, "loss": 0.3393, "step": 553500 }, { "epoch": 0.33, "learning_rate": 5.371609845598532e-05, "loss": 0.3393, "step": 554000 }, { "epoch": 0.33, "learning_rate": 5.371399849042476e-05, "loss": 0.3424, "step": 554500 }, { "epoch": 0.33, "learning_rate": 5.3711902724795314e-05, "loss": 0.3378, "step": 555000 }, { "epoch": 0.33, "learning_rate": 5.3709806959165874e-05, "loss": 0.3321, "step": 555500 }, { "epoch": 0.33, "learning_rate": 5.37077069936053e-05, "loss": 0.338, "step": 556000 }, { "epoch": 0.33, "learning_rate": 5.3705607028044734e-05, "loss": 0.3405, "step": 556500 }, { "epoch": 0.33, "learning_rate": 5.3703507062484175e-05, "loss": 0.3359, "step": 557000 }, { "epoch": 0.33, "learning_rate": 5.370140709692361e-05, "loss": 0.3328, "step": 557500 }, { "epoch": 0.33, "learning_rate": 5.369930713136304e-05, "loss": 0.3401, "step": 558000 }, { "epoch": 0.33, "learning_rate": 5.369720716580248e-05, "loss": 0.3353, "step": 558500 }, { "epoch": 0.34, "learning_rate": 5.3695107200241915e-05, "loss": 0.3417, "step": 559000 }, { "epoch": 0.34, "learning_rate": 5.369300723468135e-05, "loss": 0.342, "step": 559500 }, { "epoch": 0.34, "learning_rate": 5.369090726912079e-05, "loss": 0.3388, "step": 560000 }, { "epoch": 0.34, "learning_rate": 5.368881150349134e-05, "loss": 0.3374, "step": 560500 }, { "epoch": 0.34, "learning_rate": 5.3686711537930776e-05, "loss": 0.3384, "step": 561000 }, { "epoch": 0.34, "learning_rate": 5.3684611572370216e-05, "loss": 0.3416, "step": 561500 }, { "epoch": 0.34, "learning_rate": 5.368251160680965e-05, "loss": 0.3421, "step": 562000 }, { "epoch": 0.34, "learning_rate": 5.368041164124908e-05, "loss": 0.3452, "step": 562500 }, { "epoch": 0.34, "learning_rate": 5.3678311675688523e-05, "loss": 0.3362, "step": 563000 }, { "epoch": 0.34, "learning_rate": 5.367621171012796e-05, "loss": 0.3416, "step": 563500 }, { "epoch": 0.34, "learning_rate": 5.3674111744567384e-05, "loss": 0.3382, "step": 564000 }, { "epoch": 0.34, "learning_rate": 5.3672011779006824e-05, "loss": 0.3376, "step": 564500 }, { "epoch": 0.34, "learning_rate": 5.366991181344626e-05, "loss": 0.3418, "step": 565000 }, { "epoch": 0.34, "learning_rate": 5.366781604781682e-05, "loss": 0.3372, "step": 565500 }, { "epoch": 0.34, "learning_rate": 5.366571608225625e-05, "loss": 0.3395, "step": 566000 }, { "epoch": 0.34, "learning_rate": 5.3663616116695685e-05, "loss": 0.3324, "step": 566500 }, { "epoch": 0.34, "learning_rate": 5.3661520351066245e-05, "loss": 0.3376, "step": 567000 }, { "epoch": 0.34, "learning_rate": 5.365942038550568e-05, "loss": 0.3368, "step": 567500 }, { "epoch": 0.34, "learning_rate": 5.365732041994511e-05, "loss": 0.3353, "step": 568000 }, { "epoch": 0.34, "learning_rate": 5.3655220454384545e-05, "loss": 0.3351, "step": 568500 }, { "epoch": 0.34, "learning_rate": 5.3653124688755106e-05, "loss": 0.3382, "step": 569000 }, { "epoch": 0.34, "learning_rate": 5.365102472319454e-05, "loss": 0.3459, "step": 569500 }, { "epoch": 0.34, "learning_rate": 5.364892475763398e-05, "loss": 0.3476, "step": 570000 }, { "epoch": 0.34, "learning_rate": 5.364682479207341e-05, "loss": 0.3425, "step": 570500 }, { "epoch": 0.34, "learning_rate": 5.364472482651284e-05, "loss": 0.3465, "step": 571000 }, { "epoch": 0.34, "learning_rate": 5.364262486095228e-05, "loss": 0.3382, "step": 571500 }, { "epoch": 0.34, "learning_rate": 5.364052489539171e-05, "loss": 0.3376, "step": 572000 }, { "epoch": 0.34, "learning_rate": 5.3638429129762273e-05, "loss": 0.3391, "step": 572500 }, { "epoch": 0.34, "learning_rate": 5.363632916420171e-05, "loss": 0.3417, "step": 573000 }, { "epoch": 0.34, "learning_rate": 5.363422919864114e-05, "loss": 0.3398, "step": 573500 }, { "epoch": 0.34, "learning_rate": 5.3632129233080574e-05, "loss": 0.3443, "step": 574000 }, { "epoch": 0.34, "learning_rate": 5.363002926752001e-05, "loss": 0.3443, "step": 574500 }, { "epoch": 0.34, "learning_rate": 5.362792930195945e-05, "loss": 0.3383, "step": 575000 }, { "epoch": 0.35, "learning_rate": 5.362583353633001e-05, "loss": 0.3344, "step": 575500 }, { "epoch": 0.35, "learning_rate": 5.3623733570769435e-05, "loss": 0.3373, "step": 576000 }, { "epoch": 0.35, "learning_rate": 5.3621633605208875e-05, "loss": 0.3413, "step": 576500 }, { "epoch": 0.35, "learning_rate": 5.361953363964831e-05, "loss": 0.3378, "step": 577000 }, { "epoch": 0.35, "learning_rate": 5.361743367408774e-05, "loss": 0.3397, "step": 577500 }, { "epoch": 0.35, "learning_rate": 5.361533370852718e-05, "loss": 0.3262, "step": 578000 }, { "epoch": 0.35, "learning_rate": 5.3613237942897736e-05, "loss": 0.331, "step": 578500 }, { "epoch": 0.35, "learning_rate": 5.361113797733717e-05, "loss": 0.34, "step": 579000 }, { "epoch": 0.35, "learning_rate": 5.36090380117766e-05, "loss": 0.3359, "step": 579500 }, { "epoch": 0.35, "learning_rate": 5.360693804621604e-05, "loss": 0.3371, "step": 580000 }, { "epoch": 0.35, "learning_rate": 5.3604838080655476e-05, "loss": 0.3355, "step": 580500 }, { "epoch": 0.35, "learning_rate": 5.360273811509491e-05, "loss": 0.3384, "step": 581000 }, { "epoch": 0.35, "learning_rate": 5.360064234946546e-05, "loss": 0.3433, "step": 581500 }, { "epoch": 0.35, "learning_rate": 5.3598542383904903e-05, "loss": 0.3334, "step": 582000 }, { "epoch": 0.35, "learning_rate": 5.359644241834434e-05, "loss": 0.3467, "step": 582500 }, { "epoch": 0.35, "learning_rate": 5.359434245278377e-05, "loss": 0.3259, "step": 583000 }, { "epoch": 0.35, "learning_rate": 5.359224248722321e-05, "loss": 0.341, "step": 583500 }, { "epoch": 0.35, "learning_rate": 5.3590142521662644e-05, "loss": 0.3343, "step": 584000 }, { "epoch": 0.35, "learning_rate": 5.358804255610208e-05, "loss": 0.3329, "step": 584500 }, { "epoch": 0.35, "learning_rate": 5.358594259054152e-05, "loss": 0.3418, "step": 585000 }, { "epoch": 0.35, "learning_rate": 5.358384262498095e-05, "loss": 0.3393, "step": 585500 }, { "epoch": 0.35, "learning_rate": 5.3581746859351505e-05, "loss": 0.336, "step": 586000 }, { "epoch": 0.35, "learning_rate": 5.3579646893790945e-05, "loss": 0.3365, "step": 586500 }, { "epoch": 0.35, "learning_rate": 5.357754692823038e-05, "loss": 0.3343, "step": 587000 }, { "epoch": 0.35, "learning_rate": 5.357544696266981e-05, "loss": 0.344, "step": 587500 }, { "epoch": 0.35, "learning_rate": 5.3573351197040365e-05, "loss": 0.3414, "step": 588000 }, { "epoch": 0.35, "learning_rate": 5.3571251231479806e-05, "loss": 0.3369, "step": 588500 }, { "epoch": 0.35, "learning_rate": 5.356915126591924e-05, "loss": 0.3313, "step": 589000 }, { "epoch": 0.35, "learning_rate": 5.356705130035867e-05, "loss": 0.3492, "step": 589500 }, { "epoch": 0.35, "learning_rate": 5.356495133479811e-05, "loss": 0.3327, "step": 590000 }, { "epoch": 0.35, "learning_rate": 5.3562851369237546e-05, "loss": 0.3395, "step": 590500 }, { "epoch": 0.35, "learning_rate": 5.356075140367697e-05, "loss": 0.3431, "step": 591000 }, { "epoch": 0.35, "learning_rate": 5.355865563804753e-05, "loss": 0.336, "step": 591500 }, { "epoch": 0.35, "learning_rate": 5.3556555672486974e-05, "loss": 0.3447, "step": 592000 }, { "epoch": 0.36, "learning_rate": 5.355445570692641e-05, "loss": 0.336, "step": 592500 }, { "epoch": 0.36, "learning_rate": 5.355235574136584e-05, "loss": 0.3355, "step": 593000 }, { "epoch": 0.36, "learning_rate": 5.3550255775805274e-05, "loss": 0.3286, "step": 593500 }, { "epoch": 0.36, "learning_rate": 5.354815581024471e-05, "loss": 0.3431, "step": 594000 }, { "epoch": 0.36, "learning_rate": 5.354605584468415e-05, "loss": 0.3408, "step": 594500 }, { "epoch": 0.36, "learning_rate": 5.354396007905471e-05, "loss": 0.333, "step": 595000 }, { "epoch": 0.36, "learning_rate": 5.3541860113494135e-05, "loss": 0.3283, "step": 595500 }, { "epoch": 0.36, "learning_rate": 5.353976014793357e-05, "loss": 0.3428, "step": 596000 }, { "epoch": 0.36, "learning_rate": 5.353766018237301e-05, "loss": 0.3404, "step": 596500 }, { "epoch": 0.36, "learning_rate": 5.353556021681244e-05, "loss": 0.3362, "step": 597000 }, { "epoch": 0.36, "learning_rate": 5.3533464451183e-05, "loss": 0.3443, "step": 597500 }, { "epoch": 0.36, "learning_rate": 5.353136448562243e-05, "loss": 0.3321, "step": 598000 }, { "epoch": 0.36, "learning_rate": 5.352926452006187e-05, "loss": 0.3367, "step": 598500 }, { "epoch": 0.36, "learning_rate": 5.35271645545013e-05, "loss": 0.3361, "step": 599000 }, { "epoch": 0.36, "learning_rate": 5.3525064588940736e-05, "loss": 0.3367, "step": 599500 }, { "epoch": 0.36, "learning_rate": 5.3522968823311296e-05, "loss": 0.3344, "step": 600000 }, { "epoch": 0.36, "eval_loss": 0.3018554449081421, "eval_runtime": 1471.2095, "eval_samples_per_second": 358.018, "eval_steps_per_second": 59.67, "step": 600000 }, { "epoch": 0.36, "learning_rate": 5.352086885775073e-05, "loss": 0.3345, "step": 600500 }, { "epoch": 0.36, "learning_rate": 5.351876889219016e-05, "loss": 0.3411, "step": 601000 }, { "epoch": 0.36, "learning_rate": 5.3516673126560724e-05, "loss": 0.33, "step": 601500 }, { "epoch": 0.36, "learning_rate": 5.3514573161000164e-05, "loss": 0.3389, "step": 602000 }, { "epoch": 0.36, "learning_rate": 5.35124731954396e-05, "loss": 0.3292, "step": 602500 }, { "epoch": 0.36, "learning_rate": 5.3510373229879024e-05, "loss": 0.3317, "step": 603000 }, { "epoch": 0.36, "learning_rate": 5.3508277464249584e-05, "loss": 0.3332, "step": 603500 }, { "epoch": 0.36, "learning_rate": 5.3506177498689025e-05, "loss": 0.3353, "step": 604000 }, { "epoch": 0.36, "learning_rate": 5.350407753312846e-05, "loss": 0.3349, "step": 604500 }, { "epoch": 0.36, "learning_rate": 5.350198176749901e-05, "loss": 0.3346, "step": 605000 }, { "epoch": 0.36, "learning_rate": 5.3499881801938445e-05, "loss": 0.3395, "step": 605500 }, { "epoch": 0.36, "learning_rate": 5.3497781836377885e-05, "loss": 0.3384, "step": 606000 }, { "epoch": 0.36, "learning_rate": 5.349568187081732e-05, "loss": 0.3355, "step": 606500 }, { "epoch": 0.36, "learning_rate": 5.349358190525676e-05, "loss": 0.3329, "step": 607000 }, { "epoch": 0.36, "learning_rate": 5.3491481939696186e-05, "loss": 0.3383, "step": 607500 }, { "epoch": 0.36, "learning_rate": 5.348938197413562e-05, "loss": 0.334, "step": 608000 }, { "epoch": 0.36, "learning_rate": 5.348728200857506e-05, "loss": 0.3349, "step": 608500 }, { "epoch": 0.37, "learning_rate": 5.348518204301449e-05, "loss": 0.3348, "step": 609000 }, { "epoch": 0.37, "learning_rate": 5.3483082077453926e-05, "loss": 0.3373, "step": 609500 }, { "epoch": 0.37, "learning_rate": 5.3480982111893367e-05, "loss": 0.3386, "step": 610000 }, { "epoch": 0.37, "learning_rate": 5.34788821463328e-05, "loss": 0.3317, "step": 610500 }, { "epoch": 0.37, "learning_rate": 5.3476782180772233e-05, "loss": 0.338, "step": 611000 }, { "epoch": 0.37, "learning_rate": 5.3474682215211674e-05, "loss": 0.3368, "step": 611500 }, { "epoch": 0.37, "learning_rate": 5.347258644958223e-05, "loss": 0.3364, "step": 612000 }, { "epoch": 0.37, "learning_rate": 5.347048648402166e-05, "loss": 0.3381, "step": 612500 }, { "epoch": 0.37, "learning_rate": 5.3468386518461094e-05, "loss": 0.3405, "step": 613000 }, { "epoch": 0.37, "learning_rate": 5.3466290752831655e-05, "loss": 0.3431, "step": 613500 }, { "epoch": 0.37, "learning_rate": 5.346419078727109e-05, "loss": 0.3371, "step": 614000 }, { "epoch": 0.37, "learning_rate": 5.346209082171052e-05, "loss": 0.3281, "step": 614500 }, { "epoch": 0.37, "learning_rate": 5.345999085614996e-05, "loss": 0.3402, "step": 615000 }, { "epoch": 0.37, "learning_rate": 5.3457890890589395e-05, "loss": 0.3384, "step": 615500 }, { "epoch": 0.37, "learning_rate": 5.345579092502883e-05, "loss": 0.3394, "step": 616000 }, { "epoch": 0.37, "learning_rate": 5.345369095946827e-05, "loss": 0.3293, "step": 616500 }, { "epoch": 0.37, "learning_rate": 5.34515909939077e-05, "loss": 0.3409, "step": 617000 }, { "epoch": 0.37, "learning_rate": 5.3449491028347136e-05, "loss": 0.3431, "step": 617500 }, { "epoch": 0.37, "learning_rate": 5.344739106278657e-05, "loss": 0.3363, "step": 618000 }, { "epoch": 0.37, "learning_rate": 5.3445291097226e-05, "loss": 0.3309, "step": 618500 }, { "epoch": 0.37, "learning_rate": 5.3443191131665436e-05, "loss": 0.3252, "step": 619000 }, { "epoch": 0.37, "learning_rate": 5.3441095366035997e-05, "loss": 0.3312, "step": 619500 }, { "epoch": 0.37, "learning_rate": 5.343899540047544e-05, "loss": 0.3406, "step": 620000 }, { "epoch": 0.37, "learning_rate": 5.3436895434914863e-05, "loss": 0.3358, "step": 620500 }, { "epoch": 0.37, "learning_rate": 5.34347954693543e-05, "loss": 0.3334, "step": 621000 }, { "epoch": 0.37, "learning_rate": 5.343269970372486e-05, "loss": 0.3307, "step": 621500 }, { "epoch": 0.37, "learning_rate": 5.34305997381643e-05, "loss": 0.3293, "step": 622000 }, { "epoch": 0.37, "learning_rate": 5.3428499772603724e-05, "loss": 0.3379, "step": 622500 }, { "epoch": 0.37, "learning_rate": 5.3426404006974285e-05, "loss": 0.3325, "step": 623000 }, { "epoch": 0.37, "learning_rate": 5.342430824134484e-05, "loss": 0.337, "step": 623500 }, { "epoch": 0.37, "learning_rate": 5.342220827578428e-05, "loss": 0.3376, "step": 624000 }, { "epoch": 0.37, "learning_rate": 5.342010831022371e-05, "loss": 0.3374, "step": 624500 }, { "epoch": 0.37, "learning_rate": 5.3418008344663145e-05, "loss": 0.3396, "step": 625000 }, { "epoch": 0.38, "learning_rate": 5.3415908379102585e-05, "loss": 0.3309, "step": 625500 }, { "epoch": 0.38, "learning_rate": 5.341381261347314e-05, "loss": 0.3394, "step": 626000 }, { "epoch": 0.38, "learning_rate": 5.341171264791257e-05, "loss": 0.3352, "step": 626500 }, { "epoch": 0.38, "learning_rate": 5.3409612682352006e-05, "loss": 0.3335, "step": 627000 }, { "epoch": 0.38, "learning_rate": 5.3407512716791446e-05, "loss": 0.3283, "step": 627500 }, { "epoch": 0.38, "learning_rate": 5.340541275123088e-05, "loss": 0.3292, "step": 628000 }, { "epoch": 0.38, "learning_rate": 5.340331278567031e-05, "loss": 0.3258, "step": 628500 }, { "epoch": 0.38, "learning_rate": 5.340121282010975e-05, "loss": 0.3297, "step": 629000 }, { "epoch": 0.38, "learning_rate": 5.339911285454919e-05, "loss": 0.3314, "step": 629500 }, { "epoch": 0.38, "learning_rate": 5.339701288898862e-05, "loss": 0.3289, "step": 630000 }, { "epoch": 0.38, "learning_rate": 5.339491712335918e-05, "loss": 0.337, "step": 630500 }, { "epoch": 0.38, "learning_rate": 5.3392817157798614e-05, "loss": 0.3302, "step": 631000 }, { "epoch": 0.38, "learning_rate": 5.339071719223805e-05, "loss": 0.336, "step": 631500 }, { "epoch": 0.38, "learning_rate": 5.338861722667748e-05, "loss": 0.3332, "step": 632000 }, { "epoch": 0.38, "learning_rate": 5.3386517261116914e-05, "loss": 0.3272, "step": 632500 }, { "epoch": 0.38, "learning_rate": 5.338441729555635e-05, "loss": 0.3353, "step": 633000 }, { "epoch": 0.38, "learning_rate": 5.338231732999579e-05, "loss": 0.3297, "step": 633500 }, { "epoch": 0.38, "learning_rate": 5.338021736443522e-05, "loss": 0.335, "step": 634000 }, { "epoch": 0.38, "learning_rate": 5.3378117398874655e-05, "loss": 0.3331, "step": 634500 }, { "epoch": 0.38, "learning_rate": 5.3376017433314095e-05, "loss": 0.3415, "step": 635000 }, { "epoch": 0.38, "learning_rate": 5.337392166768465e-05, "loss": 0.3307, "step": 635500 }, { "epoch": 0.38, "learning_rate": 5.337182170212408e-05, "loss": 0.3378, "step": 636000 }, { "epoch": 0.38, "learning_rate": 5.3369721736563516e-05, "loss": 0.3404, "step": 636500 }, { "epoch": 0.38, "learning_rate": 5.3367621771002956e-05, "loss": 0.3393, "step": 637000 }, { "epoch": 0.38, "learning_rate": 5.336552180544239e-05, "loss": 0.3296, "step": 637500 }, { "epoch": 0.38, "learning_rate": 5.336342603981294e-05, "loss": 0.3383, "step": 638000 }, { "epoch": 0.38, "learning_rate": 5.3361330274183503e-05, "loss": 0.3328, "step": 638500 }, { "epoch": 0.38, "learning_rate": 5.3359230308622944e-05, "loss": 0.3279, "step": 639000 }, { "epoch": 0.38, "learning_rate": 5.335713034306237e-05, "loss": 0.3275, "step": 639500 }, { "epoch": 0.38, "learning_rate": 5.3355030377501804e-05, "loss": 0.3291, "step": 640000 }, { "epoch": 0.38, "learning_rate": 5.3352930411941244e-05, "loss": 0.333, "step": 640500 }, { "epoch": 0.38, "learning_rate": 5.3350834646311804e-05, "loss": 0.33, "step": 641000 }, { "epoch": 0.38, "learning_rate": 5.334873468075123e-05, "loss": 0.3325, "step": 641500 }, { "epoch": 0.38, "learning_rate": 5.3346634715190665e-05, "loss": 0.3306, "step": 642000 }, { "epoch": 0.39, "learning_rate": 5.3344534749630105e-05, "loss": 0.3379, "step": 642500 }, { "epoch": 0.39, "learning_rate": 5.334243478406954e-05, "loss": 0.3378, "step": 643000 }, { "epoch": 0.39, "learning_rate": 5.33403390184401e-05, "loss": 0.3299, "step": 643500 }, { "epoch": 0.39, "learning_rate": 5.333824325281065e-05, "loss": 0.3298, "step": 644000 }, { "epoch": 0.39, "learning_rate": 5.333614328725009e-05, "loss": 0.3349, "step": 644500 }, { "epoch": 0.39, "learning_rate": 5.3334043321689526e-05, "loss": 0.3281, "step": 645000 }, { "epoch": 0.39, "learning_rate": 5.333194335612896e-05, "loss": 0.3355, "step": 645500 }, { "epoch": 0.39, "learning_rate": 5.33298433905684e-05, "loss": 0.3338, "step": 646000 }, { "epoch": 0.39, "learning_rate": 5.3327743425007826e-05, "loss": 0.3355, "step": 646500 }, { "epoch": 0.39, "learning_rate": 5.332564345944726e-05, "loss": 0.3365, "step": 647000 }, { "epoch": 0.39, "learning_rate": 5.33235434938867e-05, "loss": 0.3314, "step": 647500 }, { "epoch": 0.39, "learning_rate": 5.332144352832613e-05, "loss": 0.3373, "step": 648000 }, { "epoch": 0.39, "learning_rate": 5.331934356276557e-05, "loss": 0.333, "step": 648500 }, { "epoch": 0.39, "learning_rate": 5.331724359720501e-05, "loss": 0.3287, "step": 649000 }, { "epoch": 0.39, "learning_rate": 5.331514363164444e-05, "loss": 0.3282, "step": 649500 }, { "epoch": 0.39, "learning_rate": 5.3313047866014994e-05, "loss": 0.3305, "step": 650000 }, { "epoch": 0.39, "learning_rate": 5.331094790045443e-05, "loss": 0.3437, "step": 650500 }, { "epoch": 0.39, "learning_rate": 5.330884793489387e-05, "loss": 0.3306, "step": 651000 }, { "epoch": 0.39, "learning_rate": 5.33067479693333e-05, "loss": 0.3242, "step": 651500 }, { "epoch": 0.39, "learning_rate": 5.3304652203703855e-05, "loss": 0.3332, "step": 652000 }, { "epoch": 0.39, "learning_rate": 5.3302552238143295e-05, "loss": 0.3338, "step": 652500 }, { "epoch": 0.39, "learning_rate": 5.330045227258273e-05, "loss": 0.3315, "step": 653000 }, { "epoch": 0.39, "learning_rate": 5.329835230702216e-05, "loss": 0.3349, "step": 653500 }, { "epoch": 0.39, "learning_rate": 5.32962523414616e-05, "loss": 0.3296, "step": 654000 }, { "epoch": 0.39, "learning_rate": 5.3294152375901036e-05, "loss": 0.3336, "step": 654500 }, { "epoch": 0.39, "learning_rate": 5.329205241034047e-05, "loss": 0.3356, "step": 655000 }, { "epoch": 0.39, "learning_rate": 5.328995244477991e-05, "loss": 0.3296, "step": 655500 }, { "epoch": 0.39, "learning_rate": 5.328785247921934e-05, "loss": 0.3317, "step": 656000 }, { "epoch": 0.39, "learning_rate": 5.3285752513658776e-05, "loss": 0.3324, "step": 656500 }, { "epoch": 0.39, "learning_rate": 5.328365254809821e-05, "loss": 0.3304, "step": 657000 }, { "epoch": 0.39, "learning_rate": 5.328155258253764e-05, "loss": 0.3304, "step": 657500 }, { "epoch": 0.39, "learning_rate": 5.3279461016839324e-05, "loss": 0.3276, "step": 658000 }, { "epoch": 0.39, "learning_rate": 5.327736105127876e-05, "loss": 0.3247, "step": 658500 }, { "epoch": 0.4, "learning_rate": 5.327526108571819e-05, "loss": 0.3327, "step": 659000 }, { "epoch": 0.4, "learning_rate": 5.327316112015763e-05, "loss": 0.3378, "step": 659500 }, { "epoch": 0.4, "learning_rate": 5.3271061154597064e-05, "loss": 0.3342, "step": 660000 }, { "epoch": 0.4, "learning_rate": 5.3268961189036505e-05, "loss": 0.3361, "step": 660500 }, { "epoch": 0.4, "learning_rate": 5.326686542340706e-05, "loss": 0.3297, "step": 661000 }, { "epoch": 0.4, "learning_rate": 5.326476545784649e-05, "loss": 0.3375, "step": 661500 }, { "epoch": 0.4, "learning_rate": 5.3262665492285925e-05, "loss": 0.332, "step": 662000 }, { "epoch": 0.4, "learning_rate": 5.3260565526725365e-05, "loss": 0.3308, "step": 662500 }, { "epoch": 0.4, "learning_rate": 5.32584655611648e-05, "loss": 0.3258, "step": 663000 }, { "epoch": 0.4, "learning_rate": 5.325636559560423e-05, "loss": 0.3351, "step": 663500 }, { "epoch": 0.4, "learning_rate": 5.3254265630043666e-05, "loss": 0.326, "step": 664000 }, { "epoch": 0.4, "learning_rate": 5.32521656644831e-05, "loss": 0.3252, "step": 664500 }, { "epoch": 0.4, "learning_rate": 5.325006989885366e-05, "loss": 0.3341, "step": 665000 }, { "epoch": 0.4, "learning_rate": 5.324796993329309e-05, "loss": 0.3363, "step": 665500 }, { "epoch": 0.4, "learning_rate": 5.324586996773253e-05, "loss": 0.3406, "step": 666000 }, { "epoch": 0.4, "learning_rate": 5.324377000217196e-05, "loss": 0.3266, "step": 666500 }, { "epoch": 0.4, "learning_rate": 5.324167423654252e-05, "loss": 0.3271, "step": 667000 }, { "epoch": 0.4, "learning_rate": 5.3239578470913074e-05, "loss": 0.3371, "step": 667500 }, { "epoch": 0.4, "learning_rate": 5.3237478505352514e-05, "loss": 0.3361, "step": 668000 }, { "epoch": 0.4, "learning_rate": 5.323537853979195e-05, "loss": 0.3404, "step": 668500 }, { "epoch": 0.4, "learning_rate": 5.323327857423138e-05, "loss": 0.3238, "step": 669000 }, { "epoch": 0.4, "learning_rate": 5.323117860867082e-05, "loss": 0.3314, "step": 669500 }, { "epoch": 0.4, "learning_rate": 5.3229078643110255e-05, "loss": 0.3328, "step": 670000 }, { "epoch": 0.4, "learning_rate": 5.322697867754969e-05, "loss": 0.333, "step": 670500 }, { "epoch": 0.4, "learning_rate": 5.322487871198912e-05, "loss": 0.335, "step": 671000 }, { "epoch": 0.4, "learning_rate": 5.3222778746428555e-05, "loss": 0.3266, "step": 671500 }, { "epoch": 0.4, "learning_rate": 5.3220682980799115e-05, "loss": 0.329, "step": 672000 }, { "epoch": 0.4, "learning_rate": 5.321858301523855e-05, "loss": 0.3301, "step": 672500 }, { "epoch": 0.4, "learning_rate": 5.321648304967799e-05, "loss": 0.3256, "step": 673000 }, { "epoch": 0.4, "learning_rate": 5.3214383084117416e-05, "loss": 0.3267, "step": 673500 }, { "epoch": 0.4, "learning_rate": 5.3212287318487976e-05, "loss": 0.3315, "step": 674000 }, { "epoch": 0.4, "learning_rate": 5.3210187352927416e-05, "loss": 0.3283, "step": 674500 }, { "epoch": 0.4, "learning_rate": 5.320809158729797e-05, "loss": 0.3326, "step": 675000 }, { "epoch": 0.4, "learning_rate": 5.32059916217374e-05, "loss": 0.3291, "step": 675500 }, { "epoch": 0.41, "learning_rate": 5.320389165617684e-05, "loss": 0.3332, "step": 676000 }, { "epoch": 0.41, "learning_rate": 5.320179169061628e-05, "loss": 0.3316, "step": 676500 }, { "epoch": 0.41, "learning_rate": 5.319969172505571e-05, "loss": 0.3366, "step": 677000 }, { "epoch": 0.41, "learning_rate": 5.3197591759495144e-05, "loss": 0.3398, "step": 677500 }, { "epoch": 0.41, "learning_rate": 5.319549179393458e-05, "loss": 0.3257, "step": 678000 }, { "epoch": 0.41, "learning_rate": 5.319339182837401e-05, "loss": 0.335, "step": 678500 }, { "epoch": 0.41, "learning_rate": 5.3191291862813444e-05, "loss": 0.3217, "step": 679000 }, { "epoch": 0.41, "learning_rate": 5.3189196097184005e-05, "loss": 0.3294, "step": 679500 }, { "epoch": 0.41, "learning_rate": 5.3187096131623445e-05, "loss": 0.3327, "step": 680000 }, { "epoch": 0.41, "learning_rate": 5.318499616606287e-05, "loss": 0.3303, "step": 680500 }, { "epoch": 0.41, "learning_rate": 5.318289620050231e-05, "loss": 0.3216, "step": 681000 }, { "epoch": 0.41, "learning_rate": 5.3180796234941745e-05, "loss": 0.326, "step": 681500 }, { "epoch": 0.41, "learning_rate": 5.317869626938118e-05, "loss": 0.3292, "step": 682000 }, { "epoch": 0.41, "learning_rate": 5.317659630382062e-05, "loss": 0.3343, "step": 682500 }, { "epoch": 0.41, "learning_rate": 5.317450053819117e-05, "loss": 0.3236, "step": 683000 }, { "epoch": 0.41, "learning_rate": 5.3172400572630606e-05, "loss": 0.3307, "step": 683500 }, { "epoch": 0.41, "learning_rate": 5.317030060707004e-05, "loss": 0.3311, "step": 684000 }, { "epoch": 0.41, "learning_rate": 5.316820064150948e-05, "loss": 0.3242, "step": 684500 }, { "epoch": 0.41, "learning_rate": 5.316610487588004e-05, "loss": 0.3297, "step": 685000 }, { "epoch": 0.41, "learning_rate": 5.316400491031947e-05, "loss": 0.3259, "step": 685500 }, { "epoch": 0.41, "learning_rate": 5.31619049447589e-05, "loss": 0.3293, "step": 686000 }, { "epoch": 0.41, "learning_rate": 5.315980497919834e-05, "loss": 0.3326, "step": 686500 }, { "epoch": 0.41, "learning_rate": 5.3157705013637774e-05, "loss": 0.3336, "step": 687000 }, { "epoch": 0.41, "learning_rate": 5.3155609248008334e-05, "loss": 0.3361, "step": 687500 }, { "epoch": 0.41, "learning_rate": 5.315350928244777e-05, "loss": 0.3315, "step": 688000 }, { "epoch": 0.41, "learning_rate": 5.315141351681833e-05, "loss": 0.3355, "step": 688500 }, { "epoch": 0.41, "learning_rate": 5.314931355125776e-05, "loss": 0.3379, "step": 689000 }, { "epoch": 0.41, "learning_rate": 5.3147213585697195e-05, "loss": 0.3265, "step": 689500 }, { "epoch": 0.41, "learning_rate": 5.314511362013663e-05, "loss": 0.3354, "step": 690000 }, { "epoch": 0.41, "learning_rate": 5.314301365457606e-05, "loss": 0.3246, "step": 690500 }, { "epoch": 0.41, "learning_rate": 5.3140913689015495e-05, "loss": 0.3309, "step": 691000 }, { "epoch": 0.41, "learning_rate": 5.3138813723454936e-05, "loss": 0.3353, "step": 691500 }, { "epoch": 0.41, "learning_rate": 5.313671375789437e-05, "loss": 0.3314, "step": 692000 }, { "epoch": 0.42, "learning_rate": 5.31346137923338e-05, "loss": 0.3287, "step": 692500 }, { "epoch": 0.42, "learning_rate": 5.3132518026704356e-05, "loss": 0.3408, "step": 693000 }, { "epoch": 0.42, "learning_rate": 5.3130418061143796e-05, "loss": 0.3333, "step": 693500 }, { "epoch": 0.42, "learning_rate": 5.312831809558323e-05, "loss": 0.326, "step": 694000 }, { "epoch": 0.42, "learning_rate": 5.312621813002266e-05, "loss": 0.328, "step": 694500 }, { "epoch": 0.42, "learning_rate": 5.3124118164462103e-05, "loss": 0.3326, "step": 695000 }, { "epoch": 0.42, "learning_rate": 5.312201819890154e-05, "loss": 0.325, "step": 695500 }, { "epoch": 0.42, "learning_rate": 5.311992243327209e-05, "loss": 0.3272, "step": 696000 }, { "epoch": 0.42, "learning_rate": 5.311782246771153e-05, "loss": 0.3303, "step": 696500 }, { "epoch": 0.42, "learning_rate": 5.3115722502150964e-05, "loss": 0.3323, "step": 697000 }, { "epoch": 0.42, "learning_rate": 5.31136225365904e-05, "loss": 0.3288, "step": 697500 }, { "epoch": 0.42, "learning_rate": 5.311152257102984e-05, "loss": 0.3301, "step": 698000 }, { "epoch": 0.42, "learning_rate": 5.310942260546927e-05, "loss": 0.3222, "step": 698500 }, { "epoch": 0.42, "learning_rate": 5.3107322639908705e-05, "loss": 0.3273, "step": 699000 }, { "epoch": 0.42, "learning_rate": 5.310522687427926e-05, "loss": 0.3328, "step": 699500 }, { "epoch": 0.42, "learning_rate": 5.310313110864981e-05, "loss": 0.3345, "step": 700000 }, { "epoch": 0.42, "eval_loss": 0.29597923159599304, "eval_runtime": 1469.5457, "eval_samples_per_second": 358.424, "eval_steps_per_second": 59.738, "step": 700000 }, { "epoch": 0.42, "learning_rate": 5.310103114308925e-05, "loss": 0.3313, "step": 700500 }, { "epoch": 0.42, "learning_rate": 5.3098931177528686e-05, "loss": 0.336, "step": 701000 }, { "epoch": 0.42, "learning_rate": 5.309683121196812e-05, "loss": 0.3305, "step": 701500 }, { "epoch": 0.42, "learning_rate": 5.309473124640756e-05, "loss": 0.3345, "step": 702000 }, { "epoch": 0.42, "learning_rate": 5.309263548077811e-05, "loss": 0.3288, "step": 702500 }, { "epoch": 0.42, "learning_rate": 5.3090535515217546e-05, "loss": 0.3291, "step": 703000 }, { "epoch": 0.42, "learning_rate": 5.3088435549656987e-05, "loss": 0.3327, "step": 703500 }, { "epoch": 0.42, "learning_rate": 5.308633558409642e-05, "loss": 0.328, "step": 704000 }, { "epoch": 0.42, "learning_rate": 5.3084235618535854e-05, "loss": 0.3292, "step": 704500 }, { "epoch": 0.42, "learning_rate": 5.3082135652975294e-05, "loss": 0.3257, "step": 705000 }, { "epoch": 0.42, "learning_rate": 5.308003568741473e-05, "loss": 0.335, "step": 705500 }, { "epoch": 0.42, "learning_rate": 5.307793572185416e-05, "loss": 0.3255, "step": 706000 }, { "epoch": 0.42, "learning_rate": 5.30758357562936e-05, "loss": 0.3195, "step": 706500 }, { "epoch": 0.42, "learning_rate": 5.3073739990664154e-05, "loss": 0.3236, "step": 707000 }, { "epoch": 0.42, "learning_rate": 5.307164002510359e-05, "loss": 0.3232, "step": 707500 }, { "epoch": 0.42, "learning_rate": 5.306954005954302e-05, "loss": 0.3292, "step": 708000 }, { "epoch": 0.42, "learning_rate": 5.306744009398246e-05, "loss": 0.3243, "step": 708500 }, { "epoch": 0.43, "learning_rate": 5.3065340128421895e-05, "loss": 0.3323, "step": 709000 }, { "epoch": 0.43, "learning_rate": 5.306324436279245e-05, "loss": 0.3312, "step": 709500 }, { "epoch": 0.43, "learning_rate": 5.306114439723188e-05, "loss": 0.3272, "step": 710000 }, { "epoch": 0.43, "learning_rate": 5.305904443167132e-05, "loss": 0.3238, "step": 710500 }, { "epoch": 0.43, "learning_rate": 5.3056944466110756e-05, "loss": 0.3236, "step": 711000 }, { "epoch": 0.43, "learning_rate": 5.305484450055019e-05, "loss": 0.3285, "step": 711500 }, { "epoch": 0.43, "learning_rate": 5.305274453498963e-05, "loss": 0.3244, "step": 712000 }, { "epoch": 0.43, "learning_rate": 5.3050644569429056e-05, "loss": 0.3238, "step": 712500 }, { "epoch": 0.43, "learning_rate": 5.3048544603868496e-05, "loss": 0.3308, "step": 713000 }, { "epoch": 0.43, "learning_rate": 5.304644463830793e-05, "loss": 0.3269, "step": 713500 }, { "epoch": 0.43, "learning_rate": 5.304434887267849e-05, "loss": 0.3323, "step": 714000 }, { "epoch": 0.43, "learning_rate": 5.304224890711792e-05, "loss": 0.3229, "step": 714500 }, { "epoch": 0.43, "learning_rate": 5.304015314148848e-05, "loss": 0.3265, "step": 715000 }, { "epoch": 0.43, "learning_rate": 5.303805317592792e-05, "loss": 0.325, "step": 715500 }, { "epoch": 0.43, "learning_rate": 5.303595321036735e-05, "loss": 0.3275, "step": 716000 }, { "epoch": 0.43, "learning_rate": 5.3033853244806784e-05, "loss": 0.3336, "step": 716500 }, { "epoch": 0.43, "learning_rate": 5.303175327924622e-05, "loss": 0.3361, "step": 717000 }, { "epoch": 0.43, "learning_rate": 5.302965751361678e-05, "loss": 0.3331, "step": 717500 }, { "epoch": 0.43, "learning_rate": 5.302755754805621e-05, "loss": 0.3347, "step": 718000 }, { "epoch": 0.43, "learning_rate": 5.3025461782426765e-05, "loss": 0.3362, "step": 718500 }, { "epoch": 0.43, "learning_rate": 5.3023361816866206e-05, "loss": 0.3261, "step": 719000 }, { "epoch": 0.43, "learning_rate": 5.302126185130564e-05, "loss": 0.3369, "step": 719500 }, { "epoch": 0.43, "learning_rate": 5.301916188574507e-05, "loss": 0.33, "step": 720000 }, { "epoch": 0.43, "learning_rate": 5.301706192018451e-05, "loss": 0.3248, "step": 720500 }, { "epoch": 0.43, "learning_rate": 5.3014961954623946e-05, "loss": 0.3338, "step": 721000 }, { "epoch": 0.43, "learning_rate": 5.301286198906338e-05, "loss": 0.3355, "step": 721500 }, { "epoch": 0.43, "learning_rate": 5.301076202350281e-05, "loss": 0.326, "step": 722000 }, { "epoch": 0.43, "learning_rate": 5.3008662057942246e-05, "loss": 0.3249, "step": 722500 }, { "epoch": 0.43, "learning_rate": 5.300656209238168e-05, "loss": 0.3276, "step": 723000 }, { "epoch": 0.43, "learning_rate": 5.300446212682112e-05, "loss": 0.3284, "step": 723500 }, { "epoch": 0.43, "learning_rate": 5.3002362161260554e-05, "loss": 0.3316, "step": 724000 }, { "epoch": 0.43, "learning_rate": 5.3000270595562234e-05, "loss": 0.3282, "step": 724500 }, { "epoch": 0.43, "learning_rate": 5.299817063000167e-05, "loss": 0.3303, "step": 725000 }, { "epoch": 0.43, "learning_rate": 5.299607066444111e-05, "loss": 0.3337, "step": 725500 }, { "epoch": 0.44, "learning_rate": 5.299397489881166e-05, "loss": 0.3235, "step": 726000 }, { "epoch": 0.44, "learning_rate": 5.2991874933251095e-05, "loss": 0.3282, "step": 726500 }, { "epoch": 0.44, "learning_rate": 5.298977496769053e-05, "loss": 0.3264, "step": 727000 }, { "epoch": 0.44, "learning_rate": 5.298767500212997e-05, "loss": 0.3348, "step": 727500 }, { "epoch": 0.44, "learning_rate": 5.29855750365694e-05, "loss": 0.3255, "step": 728000 }, { "epoch": 0.44, "learning_rate": 5.2983475071008835e-05, "loss": 0.3303, "step": 728500 }, { "epoch": 0.44, "learning_rate": 5.298137510544827e-05, "loss": 0.3232, "step": 729000 }, { "epoch": 0.44, "learning_rate": 5.29792751398877e-05, "loss": 0.3225, "step": 729500 }, { "epoch": 0.44, "learning_rate": 5.297717937425826e-05, "loss": 0.3305, "step": 730000 }, { "epoch": 0.44, "learning_rate": 5.2975079408697696e-05, "loss": 0.3245, "step": 730500 }, { "epoch": 0.44, "learning_rate": 5.2972979443137136e-05, "loss": 0.3263, "step": 731000 }, { "epoch": 0.44, "learning_rate": 5.297087947757656e-05, "loss": 0.327, "step": 731500 }, { "epoch": 0.44, "learning_rate": 5.2968779512016e-05, "loss": 0.3261, "step": 732000 }, { "epoch": 0.44, "learning_rate": 5.296667954645544e-05, "loss": 0.3301, "step": 732500 }, { "epoch": 0.44, "learning_rate": 5.296457958089487e-05, "loss": 0.3223, "step": 733000 }, { "epoch": 0.44, "learning_rate": 5.296247961533431e-05, "loss": 0.3382, "step": 733500 }, { "epoch": 0.44, "learning_rate": 5.2960379649773744e-05, "loss": 0.3252, "step": 734000 }, { "epoch": 0.44, "learning_rate": 5.29582838841443e-05, "loss": 0.331, "step": 734500 }, { "epoch": 0.44, "learning_rate": 5.295618391858373e-05, "loss": 0.3215, "step": 735000 }, { "epoch": 0.44, "learning_rate": 5.295408815295429e-05, "loss": 0.3293, "step": 735500 }, { "epoch": 0.44, "learning_rate": 5.2951988187393725e-05, "loss": 0.3329, "step": 736000 }, { "epoch": 0.44, "learning_rate": 5.294988822183316e-05, "loss": 0.3253, "step": 736500 }, { "epoch": 0.44, "learning_rate": 5.294778825627259e-05, "loss": 0.3258, "step": 737000 }, { "epoch": 0.44, "learning_rate": 5.294568829071203e-05, "loss": 0.3254, "step": 737500 }, { "epoch": 0.44, "learning_rate": 5.2943588325151465e-05, "loss": 0.334, "step": 738000 }, { "epoch": 0.44, "learning_rate": 5.29414883595909e-05, "loss": 0.3301, "step": 738500 }, { "epoch": 0.44, "learning_rate": 5.293938839403034e-05, "loss": 0.327, "step": 739000 }, { "epoch": 0.44, "learning_rate": 5.293729262840089e-05, "loss": 0.3295, "step": 739500 }, { "epoch": 0.44, "learning_rate": 5.2935192662840326e-05, "loss": 0.3198, "step": 740000 }, { "epoch": 0.44, "learning_rate": 5.2933096897210886e-05, "loss": 0.331, "step": 740500 }, { "epoch": 0.44, "learning_rate": 5.293100113158144e-05, "loss": 0.3222, "step": 741000 }, { "epoch": 0.44, "learning_rate": 5.292890116602088e-05, "loss": 0.3216, "step": 741500 }, { "epoch": 0.44, "learning_rate": 5.2926805400391434e-05, "loss": 0.3259, "step": 742000 }, { "epoch": 0.45, "learning_rate": 5.292470543483087e-05, "loss": 0.3311, "step": 742500 }, { "epoch": 0.45, "learning_rate": 5.29226054692703e-05, "loss": 0.3258, "step": 743000 }, { "epoch": 0.45, "learning_rate": 5.292050550370974e-05, "loss": 0.3283, "step": 743500 }, { "epoch": 0.45, "learning_rate": 5.2918405538149174e-05, "loss": 0.3328, "step": 744000 }, { "epoch": 0.45, "learning_rate": 5.291630977251973e-05, "loss": 0.3297, "step": 744500 }, { "epoch": 0.45, "learning_rate": 5.291420980695917e-05, "loss": 0.326, "step": 745000 }, { "epoch": 0.45, "learning_rate": 5.29121098413986e-05, "loss": 0.3237, "step": 745500 }, { "epoch": 0.45, "learning_rate": 5.2910009875838035e-05, "loss": 0.3216, "step": 746000 }, { "epoch": 0.45, "learning_rate": 5.2907909910277475e-05, "loss": 0.3296, "step": 746500 }, { "epoch": 0.45, "learning_rate": 5.290580994471691e-05, "loss": 0.3271, "step": 747000 }, { "epoch": 0.45, "learning_rate": 5.290370997915634e-05, "loss": 0.3305, "step": 747500 }, { "epoch": 0.45, "learning_rate": 5.2901610013595776e-05, "loss": 0.3307, "step": 748000 }, { "epoch": 0.45, "learning_rate": 5.289951004803521e-05, "loss": 0.3245, "step": 748500 }, { "epoch": 0.45, "learning_rate": 5.289741008247464e-05, "loss": 0.3245, "step": 749000 }, { "epoch": 0.45, "learning_rate": 5.289531011691408e-05, "loss": 0.333, "step": 749500 }, { "epoch": 0.45, "learning_rate": 5.2893210151353516e-05, "loss": 0.3245, "step": 750000 }, { "epoch": 0.45, "learning_rate": 5.289111018579295e-05, "loss": 0.3305, "step": 750500 }, { "epoch": 0.45, "learning_rate": 5.288901022023239e-05, "loss": 0.3293, "step": 751000 }, { "epoch": 0.45, "learning_rate": 5.2886910254671824e-05, "loss": 0.3316, "step": 751500 }, { "epoch": 0.45, "learning_rate": 5.288481028911126e-05, "loss": 0.3315, "step": 752000 }, { "epoch": 0.45, "learning_rate": 5.288271452348181e-05, "loss": 0.336, "step": 752500 }, { "epoch": 0.45, "learning_rate": 5.288061875785237e-05, "loss": 0.3277, "step": 753000 }, { "epoch": 0.45, "learning_rate": 5.2878518792291804e-05, "loss": 0.3366, "step": 753500 }, { "epoch": 0.45, "learning_rate": 5.287641882673124e-05, "loss": 0.3277, "step": 754000 }, { "epoch": 0.45, "learning_rate": 5.287431886117068e-05, "loss": 0.3304, "step": 754500 }, { "epoch": 0.45, "learning_rate": 5.287221889561011e-05, "loss": 0.3248, "step": 755000 }, { "epoch": 0.45, "learning_rate": 5.2870118930049545e-05, "loss": 0.3234, "step": 755500 }, { "epoch": 0.45, "learning_rate": 5.2868018964488985e-05, "loss": 0.3198, "step": 756000 }, { "epoch": 0.45, "learning_rate": 5.286591899892842e-05, "loss": 0.3313, "step": 756500 }, { "epoch": 0.45, "learning_rate": 5.286382323329897e-05, "loss": 0.3204, "step": 757000 }, { "epoch": 0.45, "learning_rate": 5.2861723267738406e-05, "loss": 0.322, "step": 757500 }, { "epoch": 0.45, "learning_rate": 5.2859623302177846e-05, "loss": 0.3185, "step": 758000 }, { "epoch": 0.45, "learning_rate": 5.285752333661728e-05, "loss": 0.328, "step": 758500 }, { "epoch": 0.46, "learning_rate": 5.285542757098783e-05, "loss": 0.331, "step": 759000 }, { "epoch": 0.46, "learning_rate": 5.2853331805358393e-05, "loss": 0.3228, "step": 759500 }, { "epoch": 0.46, "learning_rate": 5.285123183979783e-05, "loss": 0.3271, "step": 760000 }, { "epoch": 0.46, "learning_rate": 5.284913187423726e-05, "loss": 0.3274, "step": 760500 }, { "epoch": 0.46, "learning_rate": 5.2847031908676694e-05, "loss": 0.3265, "step": 761000 }, { "epoch": 0.46, "learning_rate": 5.2844931943116134e-05, "loss": 0.3193, "step": 761500 }, { "epoch": 0.46, "learning_rate": 5.284283197755557e-05, "loss": 0.3278, "step": 762000 }, { "epoch": 0.46, "learning_rate": 5.2840732011995e-05, "loss": 0.3272, "step": 762500 }, { "epoch": 0.46, "learning_rate": 5.283863204643444e-05, "loss": 0.323, "step": 763000 }, { "epoch": 0.46, "learning_rate": 5.2836532080873875e-05, "loss": 0.329, "step": 763500 }, { "epoch": 0.46, "learning_rate": 5.283443631524443e-05, "loss": 0.3248, "step": 764000 }, { "epoch": 0.46, "learning_rate": 5.283233634968386e-05, "loss": 0.3248, "step": 764500 }, { "epoch": 0.46, "learning_rate": 5.28302363841233e-05, "loss": 0.3242, "step": 765000 }, { "epoch": 0.46, "learning_rate": 5.2828136418562735e-05, "loss": 0.3271, "step": 765500 }, { "epoch": 0.46, "learning_rate": 5.282604065293329e-05, "loss": 0.322, "step": 766000 }, { "epoch": 0.46, "learning_rate": 5.282394068737272e-05, "loss": 0.3235, "step": 766500 }, { "epoch": 0.46, "learning_rate": 5.282184492174328e-05, "loss": 0.3258, "step": 767000 }, { "epoch": 0.46, "learning_rate": 5.2819744956182716e-05, "loss": 0.3233, "step": 767500 }, { "epoch": 0.46, "learning_rate": 5.281764499062215e-05, "loss": 0.3274, "step": 768000 }, { "epoch": 0.46, "learning_rate": 5.281554502506159e-05, "loss": 0.3273, "step": 768500 }, { "epoch": 0.46, "learning_rate": 5.281344925943215e-05, "loss": 0.3311, "step": 769000 }, { "epoch": 0.46, "learning_rate": 5.281134929387158e-05, "loss": 0.3256, "step": 769500 }, { "epoch": 0.46, "learning_rate": 5.280925352824214e-05, "loss": 0.3238, "step": 770000 }, { "epoch": 0.46, "learning_rate": 5.280715356268157e-05, "loss": 0.3227, "step": 770500 }, { "epoch": 0.46, "learning_rate": 5.280505359712101e-05, "loss": 0.3307, "step": 771000 }, { "epoch": 0.46, "learning_rate": 5.2802953631560444e-05, "loss": 0.323, "step": 771500 }, { "epoch": 0.46, "learning_rate": 5.280085366599987e-05, "loss": 0.3309, "step": 772000 }, { "epoch": 0.46, "learning_rate": 5.279875370043931e-05, "loss": 0.3226, "step": 772500 }, { "epoch": 0.46, "learning_rate": 5.2796653734878745e-05, "loss": 0.3269, "step": 773000 }, { "epoch": 0.46, "learning_rate": 5.279455376931818e-05, "loss": 0.3299, "step": 773500 }, { "epoch": 0.46, "learning_rate": 5.279245380375762e-05, "loss": 0.3248, "step": 774000 }, { "epoch": 0.46, "learning_rate": 5.279035383819705e-05, "loss": 0.3275, "step": 774500 }, { "epoch": 0.46, "learning_rate": 5.2788253872636485e-05, "loss": 0.3262, "step": 775000 }, { "epoch": 0.46, "learning_rate": 5.2786153907075926e-05, "loss": 0.3221, "step": 775500 }, { "epoch": 0.47, "learning_rate": 5.278405394151536e-05, "loss": 0.3238, "step": 776000 }, { "epoch": 0.47, "learning_rate": 5.278195817588591e-05, "loss": 0.3312, "step": 776500 }, { "epoch": 0.47, "learning_rate": 5.277985821032535e-05, "loss": 0.3299, "step": 777000 }, { "epoch": 0.47, "learning_rate": 5.2777762444695906e-05, "loss": 0.3187, "step": 777500 }, { "epoch": 0.47, "learning_rate": 5.277566247913534e-05, "loss": 0.3143, "step": 778000 }, { "epoch": 0.47, "learning_rate": 5.2773562513574773e-05, "loss": 0.3281, "step": 778500 }, { "epoch": 0.47, "learning_rate": 5.2771462548014214e-05, "loss": 0.3228, "step": 779000 }, { "epoch": 0.47, "learning_rate": 5.276936258245365e-05, "loss": 0.3253, "step": 779500 }, { "epoch": 0.47, "learning_rate": 5.276726261689308e-05, "loss": 0.324, "step": 780000 }, { "epoch": 0.47, "learning_rate": 5.276516265133252e-05, "loss": 0.3268, "step": 780500 }, { "epoch": 0.47, "learning_rate": 5.2763066885703074e-05, "loss": 0.3154, "step": 781000 }, { "epoch": 0.47, "learning_rate": 5.276096692014251e-05, "loss": 0.3221, "step": 781500 }, { "epoch": 0.47, "learning_rate": 5.275886695458194e-05, "loss": 0.3193, "step": 782000 }, { "epoch": 0.47, "learning_rate": 5.275676698902138e-05, "loss": 0.3239, "step": 782500 }, { "epoch": 0.47, "learning_rate": 5.2754667023460815e-05, "loss": 0.3247, "step": 783000 }, { "epoch": 0.47, "learning_rate": 5.275256705790025e-05, "loss": 0.3242, "step": 783500 }, { "epoch": 0.47, "learning_rate": 5.275046709233969e-05, "loss": 0.3275, "step": 784000 }, { "epoch": 0.47, "learning_rate": 5.2748367126779115e-05, "loss": 0.33, "step": 784500 }, { "epoch": 0.47, "learning_rate": 5.2746271361149676e-05, "loss": 0.3237, "step": 785000 }, { "epoch": 0.47, "learning_rate": 5.2744171395589116e-05, "loss": 0.3287, "step": 785500 }, { "epoch": 0.47, "learning_rate": 5.274207143002855e-05, "loss": 0.3216, "step": 786000 }, { "epoch": 0.47, "learning_rate": 5.27399756643991e-05, "loss": 0.324, "step": 786500 }, { "epoch": 0.47, "learning_rate": 5.2737875698838536e-05, "loss": 0.3276, "step": 787000 }, { "epoch": 0.47, "learning_rate": 5.273577573327798e-05, "loss": 0.3208, "step": 787500 }, { "epoch": 0.47, "learning_rate": 5.273367576771741e-05, "loss": 0.3243, "step": 788000 }, { "epoch": 0.47, "learning_rate": 5.2731575802156844e-05, "loss": 0.3286, "step": 788500 }, { "epoch": 0.47, "learning_rate": 5.2729475836596284e-05, "loss": 0.3257, "step": 789000 }, { "epoch": 0.47, "learning_rate": 5.272737587103571e-05, "loss": 0.3169, "step": 789500 }, { "epoch": 0.47, "learning_rate": 5.272528010540627e-05, "loss": 0.3241, "step": 790000 }, { "epoch": 0.47, "learning_rate": 5.272318013984571e-05, "loss": 0.3186, "step": 790500 }, { "epoch": 0.47, "learning_rate": 5.2721080174285145e-05, "loss": 0.3265, "step": 791000 }, { "epoch": 0.47, "learning_rate": 5.271898020872458e-05, "loss": 0.3267, "step": 791500 }, { "epoch": 0.47, "learning_rate": 5.271688024316401e-05, "loss": 0.329, "step": 792000 }, { "epoch": 0.48, "learning_rate": 5.2714780277603445e-05, "loss": 0.3283, "step": 792500 }, { "epoch": 0.48, "learning_rate": 5.271268031204288e-05, "loss": 0.3274, "step": 793000 }, { "epoch": 0.48, "learning_rate": 5.271058034648232e-05, "loss": 0.3288, "step": 793500 }, { "epoch": 0.48, "learning_rate": 5.270848038092175e-05, "loss": 0.3315, "step": 794000 }, { "epoch": 0.48, "learning_rate": 5.2706384615292306e-05, "loss": 0.323, "step": 794500 }, { "epoch": 0.48, "learning_rate": 5.270428464973174e-05, "loss": 0.3158, "step": 795000 }, { "epoch": 0.48, "learning_rate": 5.270218468417118e-05, "loss": 0.3256, "step": 795500 }, { "epoch": 0.48, "learning_rate": 5.270008471861061e-05, "loss": 0.3223, "step": 796000 }, { "epoch": 0.48, "learning_rate": 5.2697984753050046e-05, "loss": 0.3255, "step": 796500 }, { "epoch": 0.48, "learning_rate": 5.2695884787489487e-05, "loss": 0.3241, "step": 797000 }, { "epoch": 0.48, "learning_rate": 5.269378482192892e-05, "loss": 0.3237, "step": 797500 }, { "epoch": 0.48, "learning_rate": 5.2691684856368353e-05, "loss": 0.3238, "step": 798000 }, { "epoch": 0.48, "learning_rate": 5.2689589090738914e-05, "loss": 0.3155, "step": 798500 }, { "epoch": 0.48, "learning_rate": 5.268749332510947e-05, "loss": 0.3214, "step": 799000 }, { "epoch": 0.48, "learning_rate": 5.26853933595489e-05, "loss": 0.3271, "step": 799500 }, { "epoch": 0.48, "learning_rate": 5.2683293393988334e-05, "loss": 0.3183, "step": 800000 }, { "epoch": 0.48, "eval_loss": 0.2911185920238495, "eval_runtime": 1475.0852, "eval_samples_per_second": 357.078, "eval_steps_per_second": 59.513, "step": 800000 }, { "epoch": 0.48, "learning_rate": 5.2681193428427775e-05, "loss": 0.3193, "step": 800500 }, { "epoch": 0.48, "learning_rate": 5.267909346286721e-05, "loss": 0.3228, "step": 801000 }, { "epoch": 0.48, "learning_rate": 5.267699769723776e-05, "loss": 0.326, "step": 801500 }, { "epoch": 0.48, "learning_rate": 5.2674897731677195e-05, "loss": 0.3274, "step": 802000 }, { "epoch": 0.48, "learning_rate": 5.2672797766116635e-05, "loss": 0.319, "step": 802500 }, { "epoch": 0.48, "learning_rate": 5.267069780055607e-05, "loss": 0.3261, "step": 803000 }, { "epoch": 0.48, "learning_rate": 5.266860203492662e-05, "loss": 0.3203, "step": 803500 }, { "epoch": 0.48, "learning_rate": 5.266650206936606e-05, "loss": 0.3198, "step": 804000 }, { "epoch": 0.48, "learning_rate": 5.2664402103805496e-05, "loss": 0.3241, "step": 804500 }, { "epoch": 0.48, "learning_rate": 5.266230213824493e-05, "loss": 0.3182, "step": 805000 }, { "epoch": 0.48, "learning_rate": 5.266020217268437e-05, "loss": 0.3303, "step": 805500 }, { "epoch": 0.48, "learning_rate": 5.26581022071238e-05, "loss": 0.3145, "step": 806000 }, { "epoch": 0.48, "learning_rate": 5.2656002241563237e-05, "loss": 0.3257, "step": 806500 }, { "epoch": 0.48, "learning_rate": 5.265390227600268e-05, "loss": 0.3196, "step": 807000 }, { "epoch": 0.48, "learning_rate": 5.265180231044211e-05, "loss": 0.3237, "step": 807500 }, { "epoch": 0.48, "learning_rate": 5.2649702344881544e-05, "loss": 0.3217, "step": 808000 }, { "epoch": 0.48, "learning_rate": 5.26476065792521e-05, "loss": 0.3257, "step": 808500 }, { "epoch": 0.49, "learning_rate": 5.264550661369154e-05, "loss": 0.3278, "step": 809000 }, { "epoch": 0.49, "learning_rate": 5.264340664813097e-05, "loss": 0.3225, "step": 809500 }, { "epoch": 0.49, "learning_rate": 5.2641306682570404e-05, "loss": 0.3194, "step": 810000 }, { "epoch": 0.49, "learning_rate": 5.2639206717009845e-05, "loss": 0.3305, "step": 810500 }, { "epoch": 0.49, "learning_rate": 5.26371109513804e-05, "loss": 0.3257, "step": 811000 }, { "epoch": 0.49, "learning_rate": 5.263501098581983e-05, "loss": 0.3257, "step": 811500 }, { "epoch": 0.49, "learning_rate": 5.2632911020259265e-05, "loss": 0.326, "step": 812000 }, { "epoch": 0.49, "learning_rate": 5.2630815254629826e-05, "loss": 0.3236, "step": 812500 }, { "epoch": 0.49, "learning_rate": 5.262871528906926e-05, "loss": 0.3247, "step": 813000 }, { "epoch": 0.49, "learning_rate": 5.262661952343981e-05, "loss": 0.3185, "step": 813500 }, { "epoch": 0.49, "learning_rate": 5.2624519557879246e-05, "loss": 0.3239, "step": 814000 }, { "epoch": 0.49, "learning_rate": 5.2622419592318686e-05, "loss": 0.3231, "step": 814500 }, { "epoch": 0.49, "learning_rate": 5.262031962675812e-05, "loss": 0.3233, "step": 815000 }, { "epoch": 0.49, "learning_rate": 5.261821966119755e-05, "loss": 0.3222, "step": 815500 }, { "epoch": 0.49, "learning_rate": 5.2616119695636993e-05, "loss": 0.32, "step": 816000 }, { "epoch": 0.49, "learning_rate": 5.261401973007643e-05, "loss": 0.3304, "step": 816500 }, { "epoch": 0.49, "learning_rate": 5.261191976451586e-05, "loss": 0.3195, "step": 817000 }, { "epoch": 0.49, "learning_rate": 5.26098197989553e-05, "loss": 0.3212, "step": 817500 }, { "epoch": 0.49, "learning_rate": 5.2607724033325854e-05, "loss": 0.324, "step": 818000 }, { "epoch": 0.49, "learning_rate": 5.260562406776529e-05, "loss": 0.3186, "step": 818500 }, { "epoch": 0.49, "learning_rate": 5.260352830213584e-05, "loss": 0.3231, "step": 819000 }, { "epoch": 0.49, "learning_rate": 5.260142833657528e-05, "loss": 0.3154, "step": 819500 }, { "epoch": 0.49, "learning_rate": 5.2599328371014715e-05, "loss": 0.3173, "step": 820000 }, { "epoch": 0.49, "learning_rate": 5.259722840545415e-05, "loss": 0.3272, "step": 820500 }, { "epoch": 0.49, "learning_rate": 5.259512843989359e-05, "loss": 0.3194, "step": 821000 }, { "epoch": 0.49, "learning_rate": 5.259302847433302e-05, "loss": 0.3153, "step": 821500 }, { "epoch": 0.49, "learning_rate": 5.2590928508772455e-05, "loss": 0.323, "step": 822000 }, { "epoch": 0.49, "learning_rate": 5.2588828543211896e-05, "loss": 0.3209, "step": 822500 }, { "epoch": 0.49, "learning_rate": 5.258673277758245e-05, "loss": 0.3313, "step": 823000 }, { "epoch": 0.49, "learning_rate": 5.258463281202188e-05, "loss": 0.322, "step": 823500 }, { "epoch": 0.49, "learning_rate": 5.2582532846461316e-05, "loss": 0.3218, "step": 824000 }, { "epoch": 0.49, "learning_rate": 5.2580432880900756e-05, "loss": 0.3189, "step": 824500 }, { "epoch": 0.49, "learning_rate": 5.257833291534019e-05, "loss": 0.3211, "step": 825000 }, { "epoch": 0.49, "learning_rate": 5.257623294977962e-05, "loss": 0.3239, "step": 825500 }, { "epoch": 0.5, "learning_rate": 5.257413298421906e-05, "loss": 0.3363, "step": 826000 }, { "epoch": 0.5, "learning_rate": 5.257203301865849e-05, "loss": 0.3193, "step": 826500 }, { "epoch": 0.5, "learning_rate": 5.2569933053097924e-05, "loss": 0.3219, "step": 827000 }, { "epoch": 0.5, "learning_rate": 5.2567833087537364e-05, "loss": 0.3188, "step": 827500 }, { "epoch": 0.5, "learning_rate": 5.25657331219768e-05, "loss": 0.3207, "step": 828000 }, { "epoch": 0.5, "learning_rate": 5.256363315641623e-05, "loss": 0.3271, "step": 828500 }, { "epoch": 0.5, "learning_rate": 5.256153739078679e-05, "loss": 0.3245, "step": 829000 }, { "epoch": 0.5, "learning_rate": 5.255944162515735e-05, "loss": 0.3193, "step": 829500 }, { "epoch": 0.5, "learning_rate": 5.2557341659596785e-05, "loss": 0.3198, "step": 830000 }, { "epoch": 0.5, "learning_rate": 5.255524169403621e-05, "loss": 0.3173, "step": 830500 }, { "epoch": 0.5, "learning_rate": 5.255314172847565e-05, "loss": 0.3202, "step": 831000 }, { "epoch": 0.5, "learning_rate": 5.2551041762915085e-05, "loss": 0.3216, "step": 831500 }, { "epoch": 0.5, "learning_rate": 5.254894179735452e-05, "loss": 0.318, "step": 832000 }, { "epoch": 0.5, "learning_rate": 5.254684183179396e-05, "loss": 0.3207, "step": 832500 }, { "epoch": 0.5, "learning_rate": 5.254474186623339e-05, "loss": 0.32, "step": 833000 }, { "epoch": 0.5, "learning_rate": 5.2542646100603946e-05, "loss": 0.3261, "step": 833500 }, { "epoch": 0.5, "learning_rate": 5.254054613504338e-05, "loss": 0.3271, "step": 834000 }, { "epoch": 0.5, "learning_rate": 5.253844616948282e-05, "loss": 0.3223, "step": 834500 }, { "epoch": 0.5, "learning_rate": 5.253634620392225e-05, "loss": 0.3255, "step": 835000 }, { "epoch": 0.5, "learning_rate": 5.253424623836169e-05, "loss": 0.3175, "step": 835500 }, { "epoch": 0.5, "learning_rate": 5.253214627280113e-05, "loss": 0.3251, "step": 836000 }, { "epoch": 0.5, "learning_rate": 5.253004630724056e-05, "loss": 0.3253, "step": 836500 }, { "epoch": 0.5, "learning_rate": 5.2527946341679994e-05, "loss": 0.3216, "step": 837000 }, { "epoch": 0.5, "learning_rate": 5.2525846376119434e-05, "loss": 0.3166, "step": 837500 }, { "epoch": 0.5, "learning_rate": 5.252375061048999e-05, "loss": 0.3242, "step": 838000 }, { "epoch": 0.5, "learning_rate": 5.252165064492942e-05, "loss": 0.327, "step": 838500 }, { "epoch": 0.5, "learning_rate": 5.2519554879299975e-05, "loss": 0.3241, "step": 839000 }, { "epoch": 0.5, "learning_rate": 5.2517454913739415e-05, "loss": 0.3284, "step": 839500 }, { "epoch": 0.5, "learning_rate": 5.251535494817885e-05, "loss": 0.3199, "step": 840000 }, { "epoch": 0.5, "learning_rate": 5.251325498261828e-05, "loss": 0.3219, "step": 840500 }, { "epoch": 0.5, "learning_rate": 5.251115501705772e-05, "loss": 0.3276, "step": 841000 }, { "epoch": 0.5, "learning_rate": 5.2509055051497156e-05, "loss": 0.3207, "step": 841500 }, { "epoch": 0.5, "learning_rate": 5.250695508593659e-05, "loss": 0.3241, "step": 842000 }, { "epoch": 0.51, "learning_rate": 5.250485512037603e-05, "loss": 0.3277, "step": 842500 }, { "epoch": 0.51, "learning_rate": 5.250275515481546e-05, "loss": 0.3295, "step": 843000 }, { "epoch": 0.51, "learning_rate": 5.2500659389186016e-05, "loss": 0.3209, "step": 843500 }, { "epoch": 0.51, "learning_rate": 5.249856362355657e-05, "loss": 0.3282, "step": 844000 }, { "epoch": 0.51, "learning_rate": 5.249646365799601e-05, "loss": 0.3262, "step": 844500 }, { "epoch": 0.51, "learning_rate": 5.2494363692435444e-05, "loss": 0.3214, "step": 845000 }, { "epoch": 0.51, "learning_rate": 5.249226372687488e-05, "loss": 0.3266, "step": 845500 }, { "epoch": 0.51, "learning_rate": 5.249016376131432e-05, "loss": 0.3222, "step": 846000 }, { "epoch": 0.51, "learning_rate": 5.248806799568487e-05, "loss": 0.3235, "step": 846500 }, { "epoch": 0.51, "learning_rate": 5.2485968030124304e-05, "loss": 0.3266, "step": 847000 }, { "epoch": 0.51, "learning_rate": 5.248386806456374e-05, "loss": 0.3253, "step": 847500 }, { "epoch": 0.51, "learning_rate": 5.248176809900318e-05, "loss": 0.3305, "step": 848000 }, { "epoch": 0.51, "learning_rate": 5.247966813344261e-05, "loss": 0.3212, "step": 848500 }, { "epoch": 0.51, "learning_rate": 5.2477568167882045e-05, "loss": 0.3206, "step": 849000 }, { "epoch": 0.51, "learning_rate": 5.2475468202321485e-05, "loss": 0.3138, "step": 849500 }, { "epoch": 0.51, "learning_rate": 5.247336823676092e-05, "loss": 0.3256, "step": 850000 }, { "epoch": 0.51, "learning_rate": 5.247127247113147e-05, "loss": 0.3231, "step": 850500 }, { "epoch": 0.51, "learning_rate": 5.2469176705502026e-05, "loss": 0.3209, "step": 851000 }, { "epoch": 0.51, "learning_rate": 5.2467076739941466e-05, "loss": 0.33, "step": 851500 }, { "epoch": 0.51, "learning_rate": 5.24649767743809e-05, "loss": 0.3226, "step": 852000 }, { "epoch": 0.51, "learning_rate": 5.246287680882033e-05, "loss": 0.3185, "step": 852500 }, { "epoch": 0.51, "learning_rate": 5.246077684325977e-05, "loss": 0.3155, "step": 853000 }, { "epoch": 0.51, "learning_rate": 5.245867687769921e-05, "loss": 0.3231, "step": 853500 }, { "epoch": 0.51, "learning_rate": 5.245657691213864e-05, "loss": 0.3219, "step": 854000 }, { "epoch": 0.51, "learning_rate": 5.245447694657808e-05, "loss": 0.3242, "step": 854500 }, { "epoch": 0.51, "learning_rate": 5.245237698101751e-05, "loss": 0.3194, "step": 855000 }, { "epoch": 0.51, "learning_rate": 5.245027701545694e-05, "loss": 0.3206, "step": 855500 }, { "epoch": 0.51, "learning_rate": 5.244817704989638e-05, "loss": 0.3172, "step": 856000 }, { "epoch": 0.51, "learning_rate": 5.244608128426694e-05, "loss": 0.3131, "step": 856500 }, { "epoch": 0.51, "learning_rate": 5.2443981318706375e-05, "loss": 0.3163, "step": 857000 }, { "epoch": 0.51, "learning_rate": 5.244188135314581e-05, "loss": 0.3212, "step": 857500 }, { "epoch": 0.51, "learning_rate": 5.243978138758524e-05, "loss": 0.32, "step": 858000 }, { "epoch": 0.51, "learning_rate": 5.2437681422024675e-05, "loss": 0.3149, "step": 858500 }, { "epoch": 0.52, "learning_rate": 5.2435581456464115e-05, "loss": 0.3208, "step": 859000 }, { "epoch": 0.52, "learning_rate": 5.2433485690834675e-05, "loss": 0.319, "step": 859500 }, { "epoch": 0.52, "learning_rate": 5.24313857252741e-05, "loss": 0.3204, "step": 860000 }, { "epoch": 0.52, "learning_rate": 5.242928995964466e-05, "loss": 0.3247, "step": 860500 }, { "epoch": 0.52, "learning_rate": 5.2427189994084096e-05, "loss": 0.3161, "step": 861000 }, { "epoch": 0.52, "learning_rate": 5.2425090028523536e-05, "loss": 0.3186, "step": 861500 }, { "epoch": 0.52, "learning_rate": 5.242299006296297e-05, "loss": 0.3158, "step": 862000 }, { "epoch": 0.52, "learning_rate": 5.2420890097402396e-05, "loss": 0.3239, "step": 862500 }, { "epoch": 0.52, "learning_rate": 5.2418790131841837e-05, "loss": 0.3235, "step": 863000 }, { "epoch": 0.52, "learning_rate": 5.24166943662124e-05, "loss": 0.3205, "step": 863500 }, { "epoch": 0.52, "learning_rate": 5.241459440065183e-05, "loss": 0.3247, "step": 864000 }, { "epoch": 0.52, "learning_rate": 5.2412494435091264e-05, "loss": 0.3225, "step": 864500 }, { "epoch": 0.52, "learning_rate": 5.24103944695307e-05, "loss": 0.3165, "step": 865000 }, { "epoch": 0.52, "learning_rate": 5.240829450397013e-05, "loss": 0.3205, "step": 865500 }, { "epoch": 0.52, "learning_rate": 5.240619453840957e-05, "loss": 0.3207, "step": 866000 }, { "epoch": 0.52, "learning_rate": 5.2404094572849004e-05, "loss": 0.325, "step": 866500 }, { "epoch": 0.52, "learning_rate": 5.240199460728844e-05, "loss": 0.3127, "step": 867000 }, { "epoch": 0.52, "learning_rate": 5.239989464172788e-05, "loss": 0.3184, "step": 867500 }, { "epoch": 0.52, "learning_rate": 5.239779887609843e-05, "loss": 0.3203, "step": 868000 }, { "epoch": 0.52, "learning_rate": 5.2395698910537865e-05, "loss": 0.3219, "step": 868500 }, { "epoch": 0.52, "learning_rate": 5.23935989449773e-05, "loss": 0.3139, "step": 869000 }, { "epoch": 0.52, "learning_rate": 5.239149897941674e-05, "loss": 0.3155, "step": 869500 }, { "epoch": 0.52, "learning_rate": 5.238939901385617e-05, "loss": 0.3219, "step": 870000 }, { "epoch": 0.52, "learning_rate": 5.2387303248226726e-05, "loss": 0.3226, "step": 870500 }, { "epoch": 0.52, "learning_rate": 5.238520328266616e-05, "loss": 0.3251, "step": 871000 }, { "epoch": 0.52, "learning_rate": 5.238310751703672e-05, "loss": 0.3197, "step": 871500 }, { "epoch": 0.52, "learning_rate": 5.238100755147615e-05, "loss": 0.32, "step": 872000 }, { "epoch": 0.52, "learning_rate": 5.237890758591559e-05, "loss": 0.317, "step": 872500 }, { "epoch": 0.52, "learning_rate": 5.237680762035503e-05, "loss": 0.3206, "step": 873000 }, { "epoch": 0.52, "learning_rate": 5.237470765479446e-05, "loss": 0.3251, "step": 873500 }, { "epoch": 0.52, "learning_rate": 5.2372607689233894e-05, "loss": 0.323, "step": 874000 }, { "epoch": 0.52, "learning_rate": 5.2370507723673334e-05, "loss": 0.3244, "step": 874500 }, { "epoch": 0.52, "learning_rate": 5.236840775811277e-05, "loss": 0.3184, "step": 875000 }, { "epoch": 0.52, "learning_rate": 5.23663077925522e-05, "loss": 0.318, "step": 875500 }, { "epoch": 0.53, "learning_rate": 5.2364212026922755e-05, "loss": 0.3223, "step": 876000 }, { "epoch": 0.53, "learning_rate": 5.2362112061362195e-05, "loss": 0.3206, "step": 876500 }, { "epoch": 0.53, "learning_rate": 5.236001209580163e-05, "loss": 0.314, "step": 877000 }, { "epoch": 0.53, "learning_rate": 5.235791213024106e-05, "loss": 0.3226, "step": 877500 }, { "epoch": 0.53, "learning_rate": 5.23558121646805e-05, "loss": 0.3239, "step": 878000 }, { "epoch": 0.53, "learning_rate": 5.2353720598982176e-05, "loss": 0.3267, "step": 878500 }, { "epoch": 0.53, "learning_rate": 5.235162063342161e-05, "loss": 0.3231, "step": 879000 }, { "epoch": 0.53, "learning_rate": 5.234952066786104e-05, "loss": 0.3279, "step": 879500 }, { "epoch": 0.53, "learning_rate": 5.234742070230048e-05, "loss": 0.3226, "step": 880000 }, { "epoch": 0.53, "learning_rate": 5.2345320736739916e-05, "loss": 0.3159, "step": 880500 }, { "epoch": 0.53, "learning_rate": 5.2343224971110477e-05, "loss": 0.3184, "step": 881000 }, { "epoch": 0.53, "learning_rate": 5.23411250055499e-05, "loss": 0.3219, "step": 881500 }, { "epoch": 0.53, "learning_rate": 5.2339025039989344e-05, "loss": 0.32, "step": 882000 }, { "epoch": 0.53, "learning_rate": 5.2336929274359904e-05, "loss": 0.3156, "step": 882500 }, { "epoch": 0.53, "learning_rate": 5.233482930879934e-05, "loss": 0.3199, "step": 883000 }, { "epoch": 0.53, "learning_rate": 5.233272934323877e-05, "loss": 0.3205, "step": 883500 }, { "epoch": 0.53, "learning_rate": 5.2330629377678204e-05, "loss": 0.318, "step": 884000 }, { "epoch": 0.53, "learning_rate": 5.232852941211764e-05, "loss": 0.3181, "step": 884500 }, { "epoch": 0.53, "learning_rate": 5.232642944655707e-05, "loss": 0.3189, "step": 885000 }, { "epoch": 0.53, "learning_rate": 5.232433368092763e-05, "loss": 0.3145, "step": 885500 }, { "epoch": 0.53, "learning_rate": 5.2322233715367065e-05, "loss": 0.3129, "step": 886000 }, { "epoch": 0.53, "learning_rate": 5.23201337498065e-05, "loss": 0.3222, "step": 886500 }, { "epoch": 0.53, "learning_rate": 5.231803378424594e-05, "loss": 0.3188, "step": 887000 }, { "epoch": 0.53, "learning_rate": 5.231593381868537e-05, "loss": 0.3249, "step": 887500 }, { "epoch": 0.53, "learning_rate": 5.2313833853124806e-05, "loss": 0.3163, "step": 888000 }, { "epoch": 0.53, "learning_rate": 5.2311733887564246e-05, "loss": 0.3156, "step": 888500 }, { "epoch": 0.53, "learning_rate": 5.230963392200368e-05, "loss": 0.316, "step": 889000 }, { "epoch": 0.53, "learning_rate": 5.230753395644311e-05, "loss": 0.3189, "step": 889500 }, { "epoch": 0.53, "learning_rate": 5.230543399088255e-05, "loss": 0.3152, "step": 890000 }, { "epoch": 0.53, "learning_rate": 5.2303338225253107e-05, "loss": 0.3225, "step": 890500 }, { "epoch": 0.53, "learning_rate": 5.230123825969254e-05, "loss": 0.3169, "step": 891000 }, { "epoch": 0.53, "learning_rate": 5.2299138294131973e-05, "loss": 0.3194, "step": 891500 }, { "epoch": 0.53, "learning_rate": 5.2297038328571414e-05, "loss": 0.3181, "step": 892000 }, { "epoch": 0.54, "learning_rate": 5.229493836301085e-05, "loss": 0.3202, "step": 892500 }, { "epoch": 0.54, "learning_rate": 5.229283839745028e-05, "loss": 0.3192, "step": 893000 }, { "epoch": 0.54, "learning_rate": 5.229073843188972e-05, "loss": 0.3131, "step": 893500 }, { "epoch": 0.54, "learning_rate": 5.228863846632915e-05, "loss": 0.3185, "step": 894000 }, { "epoch": 0.54, "learning_rate": 5.228653850076858e-05, "loss": 0.3267, "step": 894500 }, { "epoch": 0.54, "learning_rate": 5.228443853520802e-05, "loss": 0.322, "step": 895000 }, { "epoch": 0.54, "learning_rate": 5.2282338569647455e-05, "loss": 0.3216, "step": 895500 }, { "epoch": 0.54, "learning_rate": 5.2280242804018015e-05, "loss": 0.3121, "step": 896000 }, { "epoch": 0.54, "learning_rate": 5.227814283845745e-05, "loss": 0.3185, "step": 896500 }, { "epoch": 0.54, "learning_rate": 5.227604287289688e-05, "loss": 0.3203, "step": 897000 }, { "epoch": 0.54, "learning_rate": 5.2273942907336315e-05, "loss": 0.3218, "step": 897500 }, { "epoch": 0.54, "learning_rate": 5.2271842941775756e-05, "loss": 0.3215, "step": 898000 }, { "epoch": 0.54, "learning_rate": 5.2269747176146316e-05, "loss": 0.3233, "step": 898500 }, { "epoch": 0.54, "learning_rate": 5.226765141051687e-05, "loss": 0.3237, "step": 899000 }, { "epoch": 0.54, "learning_rate": 5.226555564488742e-05, "loss": 0.3287, "step": 899500 }, { "epoch": 0.54, "learning_rate": 5.2263455679326857e-05, "loss": 0.3241, "step": 900000 }, { "epoch": 0.54, "eval_loss": 0.2881280183792114, "eval_runtime": 1457.6063, "eval_samples_per_second": 361.36, "eval_steps_per_second": 60.227, "step": 900000 }, { "epoch": 0.54, "learning_rate": 5.226135571376629e-05, "loss": 0.3225, "step": 900500 }, { "epoch": 0.54, "learning_rate": 5.225925574820573e-05, "loss": 0.3167, "step": 901000 }, { "epoch": 0.54, "learning_rate": 5.2257155782645164e-05, "loss": 0.3186, "step": 901500 }, { "epoch": 0.54, "learning_rate": 5.22550558170846e-05, "loss": 0.3203, "step": 902000 }, { "epoch": 0.54, "learning_rate": 5.225295585152404e-05, "loss": 0.3126, "step": 902500 }, { "epoch": 0.54, "learning_rate": 5.225085588596347e-05, "loss": 0.3093, "step": 903000 }, { "epoch": 0.54, "learning_rate": 5.2248755920402904e-05, "loss": 0.3177, "step": 903500 }, { "epoch": 0.54, "learning_rate": 5.2246660154773465e-05, "loss": 0.3165, "step": 904000 }, { "epoch": 0.54, "learning_rate": 5.22445601892129e-05, "loss": 0.3156, "step": 904500 }, { "epoch": 0.54, "learning_rate": 5.224246022365233e-05, "loss": 0.3164, "step": 905000 }, { "epoch": 0.54, "learning_rate": 5.2240364458022885e-05, "loss": 0.3179, "step": 905500 }, { "epoch": 0.54, "learning_rate": 5.2238264492462325e-05, "loss": 0.3209, "step": 906000 }, { "epoch": 0.54, "learning_rate": 5.223616452690176e-05, "loss": 0.3196, "step": 906500 }, { "epoch": 0.54, "learning_rate": 5.223406456134119e-05, "loss": 0.3225, "step": 907000 }, { "epoch": 0.54, "learning_rate": 5.223196459578063e-05, "loss": 0.3176, "step": 907500 }, { "epoch": 0.54, "learning_rate": 5.2229864630220066e-05, "loss": 0.3192, "step": 908000 }, { "epoch": 0.54, "learning_rate": 5.222776466465949e-05, "loss": 0.315, "step": 908500 }, { "epoch": 0.54, "learning_rate": 5.222566469909893e-05, "loss": 0.3209, "step": 909000 }, { "epoch": 0.55, "learning_rate": 5.2223564733538366e-05, "loss": 0.3115, "step": 909500 }, { "epoch": 0.55, "learning_rate": 5.222146476797781e-05, "loss": 0.3159, "step": 910000 }, { "epoch": 0.55, "learning_rate": 5.221936480241724e-05, "loss": 0.3172, "step": 910500 }, { "epoch": 0.55, "learning_rate": 5.2217264836856674e-05, "loss": 0.3207, "step": 911000 }, { "epoch": 0.55, "learning_rate": 5.221516907122723e-05, "loss": 0.3204, "step": 911500 }, { "epoch": 0.55, "learning_rate": 5.221307330559779e-05, "loss": 0.3136, "step": 912000 }, { "epoch": 0.55, "learning_rate": 5.221097334003723e-05, "loss": 0.3188, "step": 912500 }, { "epoch": 0.55, "learning_rate": 5.2208873374476654e-05, "loss": 0.3172, "step": 913000 }, { "epoch": 0.55, "learning_rate": 5.220677340891609e-05, "loss": 0.3236, "step": 913500 }, { "epoch": 0.55, "learning_rate": 5.220467344335553e-05, "loss": 0.3202, "step": 914000 }, { "epoch": 0.55, "learning_rate": 5.220257347779496e-05, "loss": 0.32, "step": 914500 }, { "epoch": 0.55, "learning_rate": 5.2200473512234395e-05, "loss": 0.3214, "step": 915000 }, { "epoch": 0.55, "learning_rate": 5.2198373546673835e-05, "loss": 0.3159, "step": 915500 }, { "epoch": 0.55, "learning_rate": 5.219627358111327e-05, "loss": 0.3108, "step": 916000 }, { "epoch": 0.55, "learning_rate": 5.21941736155527e-05, "loss": 0.3189, "step": 916500 }, { "epoch": 0.55, "learning_rate": 5.219207364999214e-05, "loss": 0.3143, "step": 917000 }, { "epoch": 0.55, "learning_rate": 5.2189973684431576e-05, "loss": 0.322, "step": 917500 }, { "epoch": 0.55, "learning_rate": 5.218787791880213e-05, "loss": 0.3211, "step": 918000 }, { "epoch": 0.55, "learning_rate": 5.218577795324157e-05, "loss": 0.3136, "step": 918500 }, { "epoch": 0.55, "learning_rate": 5.218368218761212e-05, "loss": 0.3213, "step": 919000 }, { "epoch": 0.55, "learning_rate": 5.218158222205156e-05, "loss": 0.317, "step": 919500 }, { "epoch": 0.55, "learning_rate": 5.217948225649099e-05, "loss": 0.3207, "step": 920000 }, { "epoch": 0.55, "learning_rate": 5.217738229093043e-05, "loss": 0.3203, "step": 920500 }, { "epoch": 0.55, "learning_rate": 5.2175290725232104e-05, "loss": 0.3193, "step": 921000 }, { "epoch": 0.55, "learning_rate": 5.2173190759671544e-05, "loss": 0.3176, "step": 921500 }, { "epoch": 0.55, "learning_rate": 5.217109079411098e-05, "loss": 0.3093, "step": 922000 }, { "epoch": 0.55, "learning_rate": 5.216899082855041e-05, "loss": 0.3196, "step": 922500 }, { "epoch": 0.55, "learning_rate": 5.2166890862989845e-05, "loss": 0.3148, "step": 923000 }, { "epoch": 0.55, "learning_rate": 5.216479089742928e-05, "loss": 0.3262, "step": 923500 }, { "epoch": 0.55, "learning_rate": 5.216269093186872e-05, "loss": 0.3146, "step": 924000 }, { "epoch": 0.55, "learning_rate": 5.216059096630815e-05, "loss": 0.3134, "step": 924500 }, { "epoch": 0.55, "learning_rate": 5.2158491000747585e-05, "loss": 0.3196, "step": 925000 }, { "epoch": 0.55, "learning_rate": 5.215639523511814e-05, "loss": 0.3192, "step": 925500 }, { "epoch": 0.56, "learning_rate": 5.215429526955758e-05, "loss": 0.3251, "step": 926000 }, { "epoch": 0.56, "learning_rate": 5.215219530399701e-05, "loss": 0.313, "step": 926500 }, { "epoch": 0.56, "learning_rate": 5.2150095338436446e-05, "loss": 0.3165, "step": 927000 }, { "epoch": 0.56, "learning_rate": 5.2147999572807e-05, "loss": 0.3179, "step": 927500 }, { "epoch": 0.56, "learning_rate": 5.214589960724644e-05, "loss": 0.321, "step": 928000 }, { "epoch": 0.56, "learning_rate": 5.214379964168587e-05, "loss": 0.3255, "step": 928500 }, { "epoch": 0.56, "learning_rate": 5.214169967612531e-05, "loss": 0.3183, "step": 929000 }, { "epoch": 0.56, "learning_rate": 5.213959971056475e-05, "loss": 0.3181, "step": 929500 }, { "epoch": 0.56, "learning_rate": 5.213749974500418e-05, "loss": 0.3193, "step": 930000 }, { "epoch": 0.56, "learning_rate": 5.2135399779443614e-05, "loss": 0.3176, "step": 930500 }, { "epoch": 0.56, "learning_rate": 5.2133299813883054e-05, "loss": 0.3234, "step": 931000 }, { "epoch": 0.56, "learning_rate": 5.213119984832249e-05, "loss": 0.3171, "step": 931500 }, { "epoch": 0.56, "learning_rate": 5.212909988276192e-05, "loss": 0.3203, "step": 932000 }, { "epoch": 0.56, "learning_rate": 5.2127008317063595e-05, "loss": 0.328, "step": 932500 }, { "epoch": 0.56, "learning_rate": 5.2124908351503035e-05, "loss": 0.3223, "step": 933000 }, { "epoch": 0.56, "learning_rate": 5.212280838594247e-05, "loss": 0.3091, "step": 933500 }, { "epoch": 0.56, "learning_rate": 5.21207084203819e-05, "loss": 0.3178, "step": 934000 }, { "epoch": 0.56, "learning_rate": 5.211860845482134e-05, "loss": 0.3188, "step": 934500 }, { "epoch": 0.56, "learning_rate": 5.2116508489260776e-05, "loss": 0.3149, "step": 935000 }, { "epoch": 0.56, "learning_rate": 5.211440852370021e-05, "loss": 0.3193, "step": 935500 }, { "epoch": 0.56, "learning_rate": 5.211230855813965e-05, "loss": 0.3173, "step": 936000 }, { "epoch": 0.56, "learning_rate": 5.211020859257908e-05, "loss": 0.3103, "step": 936500 }, { "epoch": 0.56, "learning_rate": 5.2108112826949636e-05, "loss": 0.3221, "step": 937000 }, { "epoch": 0.56, "learning_rate": 5.210601286138907e-05, "loss": 0.3249, "step": 937500 }, { "epoch": 0.56, "learning_rate": 5.210391289582851e-05, "loss": 0.3149, "step": 938000 }, { "epoch": 0.56, "learning_rate": 5.2101812930267944e-05, "loss": 0.3195, "step": 938500 }, { "epoch": 0.56, "learning_rate": 5.2099721364569624e-05, "loss": 0.3157, "step": 939000 }, { "epoch": 0.56, "learning_rate": 5.209762139900905e-05, "loss": 0.3134, "step": 939500 }, { "epoch": 0.56, "learning_rate": 5.209552143344849e-05, "loss": 0.323, "step": 940000 }, { "epoch": 0.56, "learning_rate": 5.2093421467887924e-05, "loss": 0.3121, "step": 940500 }, { "epoch": 0.56, "learning_rate": 5.209132150232736e-05, "loss": 0.3184, "step": 941000 }, { "epoch": 0.56, "learning_rate": 5.208922573669792e-05, "loss": 0.3199, "step": 941500 }, { "epoch": 0.56, "learning_rate": 5.208712577113735e-05, "loss": 0.3276, "step": 942000 }, { "epoch": 0.57, "learning_rate": 5.2085025805576785e-05, "loss": 0.3163, "step": 942500 }, { "epoch": 0.57, "learning_rate": 5.208292584001622e-05, "loss": 0.315, "step": 943000 }, { "epoch": 0.57, "learning_rate": 5.208082587445566e-05, "loss": 0.3161, "step": 943500 }, { "epoch": 0.57, "learning_rate": 5.207872590889509e-05, "loss": 0.3254, "step": 944000 }, { "epoch": 0.57, "learning_rate": 5.2076625943334526e-05, "loss": 0.3158, "step": 944500 }, { "epoch": 0.57, "learning_rate": 5.2074525977773966e-05, "loss": 0.3167, "step": 945000 }, { "epoch": 0.57, "learning_rate": 5.20724260122134e-05, "loss": 0.3213, "step": 945500 }, { "epoch": 0.57, "learning_rate": 5.207032604665283e-05, "loss": 0.3212, "step": 946000 }, { "epoch": 0.57, "learning_rate": 5.206822608109227e-05, "loss": 0.3164, "step": 946500 }, { "epoch": 0.57, "learning_rate": 5.206613031546283e-05, "loss": 0.3205, "step": 947000 }, { "epoch": 0.57, "learning_rate": 5.206403034990226e-05, "loss": 0.3217, "step": 947500 }, { "epoch": 0.57, "learning_rate": 5.2061934584272814e-05, "loss": 0.3198, "step": 948000 }, { "epoch": 0.57, "learning_rate": 5.2059834618712254e-05, "loss": 0.3113, "step": 948500 }, { "epoch": 0.57, "learning_rate": 5.205773465315169e-05, "loss": 0.3134, "step": 949000 }, { "epoch": 0.57, "learning_rate": 5.205563468759112e-05, "loss": 0.3171, "step": 949500 }, { "epoch": 0.57, "learning_rate": 5.2053538921961674e-05, "loss": 0.3171, "step": 950000 }, { "epoch": 0.57, "learning_rate": 5.2051438956401115e-05, "loss": 0.3122, "step": 950500 }, { "epoch": 0.57, "learning_rate": 5.204933899084055e-05, "loss": 0.3179, "step": 951000 }, { "epoch": 0.57, "learning_rate": 5.204723902527998e-05, "loss": 0.3206, "step": 951500 }, { "epoch": 0.57, "learning_rate": 5.204513905971942e-05, "loss": 0.3144, "step": 952000 }, { "epoch": 0.57, "learning_rate": 5.2043039094158855e-05, "loss": 0.3161, "step": 952500 }, { "epoch": 0.57, "learning_rate": 5.204093912859829e-05, "loss": 0.3208, "step": 953000 }, { "epoch": 0.57, "learning_rate": 5.203884336296885e-05, "loss": 0.321, "step": 953500 }, { "epoch": 0.57, "learning_rate": 5.203674339740828e-05, "loss": 0.3122, "step": 954000 }, { "epoch": 0.57, "learning_rate": 5.2034643431847716e-05, "loss": 0.3143, "step": 954500 }, { "epoch": 0.57, "learning_rate": 5.2032543466287156e-05, "loss": 0.3162, "step": 955000 }, { "epoch": 0.57, "learning_rate": 5.203044350072659e-05, "loss": 0.3071, "step": 955500 }, { "epoch": 0.57, "learning_rate": 5.202834773509714e-05, "loss": 0.3168, "step": 956000 }, { "epoch": 0.57, "learning_rate": 5.202624776953658e-05, "loss": 0.3183, "step": 956500 }, { "epoch": 0.57, "learning_rate": 5.202414780397602e-05, "loss": 0.3192, "step": 957000 }, { "epoch": 0.57, "learning_rate": 5.202204783841545e-05, "loss": 0.3114, "step": 957500 }, { "epoch": 0.57, "learning_rate": 5.2019947872854884e-05, "loss": 0.313, "step": 958000 }, { "epoch": 0.57, "learning_rate": 5.2017847907294324e-05, "loss": 0.3179, "step": 958500 }, { "epoch": 0.57, "learning_rate": 5.201575214166488e-05, "loss": 0.315, "step": 959000 }, { "epoch": 0.58, "learning_rate": 5.201365217610431e-05, "loss": 0.3243, "step": 959500 }, { "epoch": 0.58, "learning_rate": 5.2011552210543745e-05, "loss": 0.3177, "step": 960000 }, { "epoch": 0.58, "learning_rate": 5.2009452244983185e-05, "loss": 0.3108, "step": 960500 }, { "epoch": 0.58, "learning_rate": 5.200735227942262e-05, "loss": 0.3222, "step": 961000 }, { "epoch": 0.58, "learning_rate": 5.200525231386205e-05, "loss": 0.3141, "step": 961500 }, { "epoch": 0.58, "learning_rate": 5.2003152348301485e-05, "loss": 0.3237, "step": 962000 }, { "epoch": 0.58, "learning_rate": 5.200105238274092e-05, "loss": 0.3198, "step": 962500 }, { "epoch": 0.58, "learning_rate": 5.199895241718036e-05, "loss": 0.3106, "step": 963000 }, { "epoch": 0.58, "learning_rate": 5.199685665155092e-05, "loss": 0.3217, "step": 963500 }, { "epoch": 0.58, "learning_rate": 5.1994756685990346e-05, "loss": 0.3202, "step": 964000 }, { "epoch": 0.58, "learning_rate": 5.199265672042978e-05, "loss": 0.3145, "step": 964500 }, { "epoch": 0.58, "learning_rate": 5.199055675486922e-05, "loss": 0.3106, "step": 965000 }, { "epoch": 0.58, "learning_rate": 5.198845678930865e-05, "loss": 0.3159, "step": 965500 }, { "epoch": 0.58, "learning_rate": 5.1986365223610334e-05, "loss": 0.3174, "step": 966000 }, { "epoch": 0.58, "learning_rate": 5.198426525804977e-05, "loss": 0.3155, "step": 966500 }, { "epoch": 0.58, "learning_rate": 5.19821652924892e-05, "loss": 0.3246, "step": 967000 }, { "epoch": 0.58, "learning_rate": 5.198006952685976e-05, "loss": 0.3127, "step": 967500 }, { "epoch": 0.58, "learning_rate": 5.1977969561299194e-05, "loss": 0.3173, "step": 968000 }, { "epoch": 0.58, "learning_rate": 5.197586959573863e-05, "loss": 0.3133, "step": 968500 }, { "epoch": 0.58, "learning_rate": 5.197376963017807e-05, "loss": 0.3183, "step": 969000 }, { "epoch": 0.58, "learning_rate": 5.19716696646175e-05, "loss": 0.3213, "step": 969500 }, { "epoch": 0.58, "learning_rate": 5.1969569699056935e-05, "loss": 0.3175, "step": 970000 }, { "epoch": 0.58, "learning_rate": 5.1967469733496375e-05, "loss": 0.3268, "step": 970500 }, { "epoch": 0.58, "learning_rate": 5.19653697679358e-05, "loss": 0.3177, "step": 971000 }, { "epoch": 0.58, "learning_rate": 5.1963269802375235e-05, "loss": 0.3098, "step": 971500 }, { "epoch": 0.58, "learning_rate": 5.1961174036745796e-05, "loss": 0.3135, "step": 972000 }, { "epoch": 0.58, "learning_rate": 5.1959074071185236e-05, "loss": 0.3217, "step": 972500 }, { "epoch": 0.58, "learning_rate": 5.195697410562467e-05, "loss": 0.3145, "step": 973000 }, { "epoch": 0.58, "learning_rate": 5.1954874140064096e-05, "loss": 0.3152, "step": 973500 }, { "epoch": 0.58, "learning_rate": 5.1952774174503536e-05, "loss": 0.3128, "step": 974000 }, { "epoch": 0.58, "learning_rate": 5.1950678408874097e-05, "loss": 0.3215, "step": 974500 }, { "epoch": 0.58, "learning_rate": 5.194857844331353e-05, "loss": 0.3141, "step": 975000 }, { "epoch": 0.58, "learning_rate": 5.194647847775297e-05, "loss": 0.3213, "step": 975500 }, { "epoch": 0.59, "learning_rate": 5.19443785121924e-05, "loss": 0.311, "step": 976000 }, { "epoch": 0.59, "learning_rate": 5.194227854663183e-05, "loss": 0.3137, "step": 976500 }, { "epoch": 0.59, "learning_rate": 5.194017858107127e-05, "loss": 0.3218, "step": 977000 }, { "epoch": 0.59, "learning_rate": 5.1938078615510704e-05, "loss": 0.3156, "step": 977500 }, { "epoch": 0.59, "learning_rate": 5.193597864995014e-05, "loss": 0.3214, "step": 978000 }, { "epoch": 0.59, "learning_rate": 5.193387868438958e-05, "loss": 0.3106, "step": 978500 }, { "epoch": 0.59, "learning_rate": 5.193178291876013e-05, "loss": 0.3125, "step": 979000 }, { "epoch": 0.59, "learning_rate": 5.1929682953199565e-05, "loss": 0.3204, "step": 979500 }, { "epoch": 0.59, "learning_rate": 5.1927582987639e-05, "loss": 0.3127, "step": 980000 }, { "epoch": 0.59, "learning_rate": 5.192548302207844e-05, "loss": 0.3123, "step": 980500 }, { "epoch": 0.59, "learning_rate": 5.192338725644899e-05, "loss": 0.3205, "step": 981000 }, { "epoch": 0.59, "learning_rate": 5.1921287290888426e-05, "loss": 0.3155, "step": 981500 }, { "epoch": 0.59, "learning_rate": 5.1919187325327866e-05, "loss": 0.3169, "step": 982000 }, { "epoch": 0.59, "learning_rate": 5.19170873597673e-05, "loss": 0.3176, "step": 982500 }, { "epoch": 0.59, "learning_rate": 5.191499159413785e-05, "loss": 0.3186, "step": 983000 }, { "epoch": 0.59, "learning_rate": 5.1912891628577286e-05, "loss": 0.3134, "step": 983500 }, { "epoch": 0.59, "learning_rate": 5.191079586294785e-05, "loss": 0.3211, "step": 984000 }, { "epoch": 0.59, "learning_rate": 5.190869589738729e-05, "loss": 0.3154, "step": 984500 }, { "epoch": 0.59, "learning_rate": 5.190659593182672e-05, "loss": 0.3178, "step": 985000 }, { "epoch": 0.59, "learning_rate": 5.190449596626615e-05, "loss": 0.3165, "step": 985500 }, { "epoch": 0.59, "learning_rate": 5.190239600070559e-05, "loss": 0.3192, "step": 986000 }, { "epoch": 0.59, "learning_rate": 5.190030023507615e-05, "loss": 0.321, "step": 986500 }, { "epoch": 0.59, "learning_rate": 5.189820026951558e-05, "loss": 0.32, "step": 987000 }, { "epoch": 0.59, "learning_rate": 5.1896100303955015e-05, "loss": 0.3086, "step": 987500 }, { "epoch": 0.59, "learning_rate": 5.189400033839445e-05, "loss": 0.3134, "step": 988000 }, { "epoch": 0.59, "learning_rate": 5.189190037283388e-05, "loss": 0.3164, "step": 988500 }, { "epoch": 0.59, "learning_rate": 5.188980040727332e-05, "loss": 0.3171, "step": 989000 }, { "epoch": 0.59, "learning_rate": 5.1887700441712755e-05, "loss": 0.3129, "step": 989500 }, { "epoch": 0.59, "learning_rate": 5.188560047615219e-05, "loss": 0.3158, "step": 990000 }, { "epoch": 0.59, "learning_rate": 5.188350051059163e-05, "loss": 0.3164, "step": 990500 }, { "epoch": 0.59, "learning_rate": 5.188140054503106e-05, "loss": 0.3218, "step": 991000 }, { "epoch": 0.59, "learning_rate": 5.1879300579470496e-05, "loss": 0.3109, "step": 991500 }, { "epoch": 0.59, "learning_rate": 5.1877200613909936e-05, "loss": 0.314, "step": 992000 }, { "epoch": 0.6, "learning_rate": 5.187510484828049e-05, "loss": 0.3153, "step": 992500 }, { "epoch": 0.6, "learning_rate": 5.187300908265104e-05, "loss": 0.3181, "step": 993000 }, { "epoch": 0.6, "learning_rate": 5.187090911709048e-05, "loss": 0.3145, "step": 993500 }, { "epoch": 0.6, "learning_rate": 5.186880915152991e-05, "loss": 0.3111, "step": 994000 }, { "epoch": 0.6, "learning_rate": 5.186670918596935e-05, "loss": 0.3148, "step": 994500 }, { "epoch": 0.6, "learning_rate": 5.1864609220408784e-05, "loss": 0.3115, "step": 995000 }, { "epoch": 0.6, "learning_rate": 5.1862517654710464e-05, "loss": 0.3162, "step": 995500 }, { "epoch": 0.6, "learning_rate": 5.18604176891499e-05, "loss": 0.3192, "step": 996000 }, { "epoch": 0.6, "learning_rate": 5.185831772358934e-05, "loss": 0.3137, "step": 996500 }, { "epoch": 0.6, "learning_rate": 5.185621775802877e-05, "loss": 0.3114, "step": 997000 }, { "epoch": 0.6, "learning_rate": 5.18541177924682e-05, "loss": 0.3118, "step": 997500 }, { "epoch": 0.6, "learning_rate": 5.185201782690764e-05, "loss": 0.3152, "step": 998000 }, { "epoch": 0.6, "learning_rate": 5.184991786134707e-05, "loss": 0.3165, "step": 998500 }, { "epoch": 0.6, "learning_rate": 5.1847817895786505e-05, "loss": 0.3139, "step": 999000 }, { "epoch": 0.6, "learning_rate": 5.184572213015706e-05, "loss": 0.3118, "step": 999500 }, { "epoch": 0.6, "learning_rate": 5.18436221645965e-05, "loss": 0.3179, "step": 1000000 }, { "epoch": 0.6, "eval_loss": 0.2849758267402649, "eval_runtime": 1458.9307, "eval_samples_per_second": 361.032, "eval_steps_per_second": 60.172, "step": 1000000 }, { "epoch": 0.6, "learning_rate": 5.184152219903593e-05, "loss": 0.3121, "step": 1000500 }, { "epoch": 0.6, "learning_rate": 5.1839422233475366e-05, "loss": 0.3218, "step": 1001000 }, { "epoch": 0.6, "learning_rate": 5.1837322267914806e-05, "loss": 0.3122, "step": 1001500 }, { "epoch": 0.6, "learning_rate": 5.183522230235424e-05, "loss": 0.316, "step": 1002000 }, { "epoch": 0.6, "learning_rate": 5.183312653672479e-05, "loss": 0.3163, "step": 1002500 }, { "epoch": 0.6, "learning_rate": 5.1831026571164233e-05, "loss": 0.3145, "step": 1003000 }, { "epoch": 0.6, "learning_rate": 5.182892660560367e-05, "loss": 0.3097, "step": 1003500 }, { "epoch": 0.6, "learning_rate": 5.18268266400431e-05, "loss": 0.3137, "step": 1004000 }, { "epoch": 0.6, "learning_rate": 5.1824730874413654e-05, "loss": 0.3163, "step": 1004500 }, { "epoch": 0.6, "learning_rate": 5.1822630908853094e-05, "loss": 0.3151, "step": 1005000 }, { "epoch": 0.6, "learning_rate": 5.182053094329253e-05, "loss": 0.3147, "step": 1005500 }, { "epoch": 0.6, "learning_rate": 5.181843097773196e-05, "loss": 0.3123, "step": 1006000 }, { "epoch": 0.6, "learning_rate": 5.181633521210252e-05, "loss": 0.3127, "step": 1006500 }, { "epoch": 0.6, "learning_rate": 5.1814235246541955e-05, "loss": 0.3151, "step": 1007000 }, { "epoch": 0.6, "learning_rate": 5.181213528098139e-05, "loss": 0.3167, "step": 1007500 }, { "epoch": 0.6, "learning_rate": 5.181003531542082e-05, "loss": 0.3149, "step": 1008000 }, { "epoch": 0.6, "learning_rate": 5.180793534986026e-05, "loss": 0.3133, "step": 1008500 }, { "epoch": 0.6, "learning_rate": 5.1805835384299696e-05, "loss": 0.3243, "step": 1009000 }, { "epoch": 0.61, "learning_rate": 5.180373541873913e-05, "loss": 0.3156, "step": 1009500 }, { "epoch": 0.61, "learning_rate": 5.180163965310969e-05, "loss": 0.3135, "step": 1010000 }, { "epoch": 0.61, "learning_rate": 5.179953968754912e-05, "loss": 0.3114, "step": 1010500 }, { "epoch": 0.61, "learning_rate": 5.1797439721988556e-05, "loss": 0.3175, "step": 1011000 }, { "epoch": 0.61, "learning_rate": 5.1795339756427996e-05, "loss": 0.3087, "step": 1011500 }, { "epoch": 0.61, "learning_rate": 5.179323979086743e-05, "loss": 0.3127, "step": 1012000 }, { "epoch": 0.61, "learning_rate": 5.1791144025237984e-05, "loss": 0.3209, "step": 1012500 }, { "epoch": 0.61, "learning_rate": 5.178904405967742e-05, "loss": 0.3122, "step": 1013000 }, { "epoch": 0.61, "learning_rate": 5.178694409411686e-05, "loss": 0.3198, "step": 1013500 }, { "epoch": 0.61, "learning_rate": 5.178484412855629e-05, "loss": 0.3149, "step": 1014000 }, { "epoch": 0.61, "learning_rate": 5.1782744162995724e-05, "loss": 0.3099, "step": 1014500 }, { "epoch": 0.61, "learning_rate": 5.178064839736628e-05, "loss": 0.3154, "step": 1015000 }, { "epoch": 0.61, "learning_rate": 5.177854843180572e-05, "loss": 0.3121, "step": 1015500 }, { "epoch": 0.61, "learning_rate": 5.177645266617628e-05, "loss": 0.313, "step": 1016000 }, { "epoch": 0.61, "learning_rate": 5.1774352700615705e-05, "loss": 0.3139, "step": 1016500 }, { "epoch": 0.61, "learning_rate": 5.1772252735055145e-05, "loss": 0.3102, "step": 1017000 }, { "epoch": 0.61, "learning_rate": 5.177015276949458e-05, "loss": 0.3121, "step": 1017500 }, { "epoch": 0.61, "learning_rate": 5.176805280393401e-05, "loss": 0.3102, "step": 1018000 }, { "epoch": 0.61, "learning_rate": 5.176595283837345e-05, "loss": 0.3169, "step": 1018500 }, { "epoch": 0.61, "learning_rate": 5.1763852872812886e-05, "loss": 0.3161, "step": 1019000 }, { "epoch": 0.61, "learning_rate": 5.176175290725232e-05, "loss": 0.3148, "step": 1019500 }, { "epoch": 0.61, "learning_rate": 5.175965294169176e-05, "loss": 0.3205, "step": 1020000 }, { "epoch": 0.61, "learning_rate": 5.175755717606231e-05, "loss": 0.3111, "step": 1020500 }, { "epoch": 0.61, "learning_rate": 5.1755457210501747e-05, "loss": 0.3156, "step": 1021000 }, { "epoch": 0.61, "learning_rate": 5.175335724494118e-05, "loss": 0.3183, "step": 1021500 }, { "epoch": 0.61, "learning_rate": 5.175125727938062e-05, "loss": 0.3149, "step": 1022000 }, { "epoch": 0.61, "learning_rate": 5.1749161513751174e-05, "loss": 0.3101, "step": 1022500 }, { "epoch": 0.61, "learning_rate": 5.1747065748121734e-05, "loss": 0.3195, "step": 1023000 }, { "epoch": 0.61, "learning_rate": 5.174496578256116e-05, "loss": 0.3147, "step": 1023500 }, { "epoch": 0.61, "learning_rate": 5.17428658170006e-05, "loss": 0.3173, "step": 1024000 }, { "epoch": 0.61, "learning_rate": 5.1740765851440035e-05, "loss": 0.3065, "step": 1024500 }, { "epoch": 0.61, "learning_rate": 5.173866588587947e-05, "loss": 0.319, "step": 1025000 }, { "epoch": 0.61, "learning_rate": 5.173656592031891e-05, "loss": 0.3045, "step": 1025500 }, { "epoch": 0.62, "learning_rate": 5.173447015468946e-05, "loss": 0.3135, "step": 1026000 }, { "epoch": 0.62, "learning_rate": 5.1732370189128895e-05, "loss": 0.3166, "step": 1026500 }, { "epoch": 0.62, "learning_rate": 5.173027022356833e-05, "loss": 0.3074, "step": 1027000 }, { "epoch": 0.62, "learning_rate": 5.172817025800777e-05, "loss": 0.3127, "step": 1027500 }, { "epoch": 0.62, "learning_rate": 5.17260702924472e-05, "loss": 0.3207, "step": 1028000 }, { "epoch": 0.62, "learning_rate": 5.1723970326886636e-05, "loss": 0.3115, "step": 1028500 }, { "epoch": 0.62, "learning_rate": 5.1721870361326076e-05, "loss": 0.3144, "step": 1029000 }, { "epoch": 0.62, "learning_rate": 5.171977039576551e-05, "loss": 0.323, "step": 1029500 }, { "epoch": 0.62, "learning_rate": 5.171767463013606e-05, "loss": 0.3136, "step": 1030000 }, { "epoch": 0.62, "learning_rate": 5.17155746645755e-05, "loss": 0.3096, "step": 1030500 }, { "epoch": 0.62, "learning_rate": 5.171347469901494e-05, "loss": 0.3238, "step": 1031000 }, { "epoch": 0.62, "learning_rate": 5.171137473345437e-05, "loss": 0.312, "step": 1031500 }, { "epoch": 0.62, "learning_rate": 5.1709278967824924e-05, "loss": 0.3137, "step": 1032000 }, { "epoch": 0.62, "learning_rate": 5.1707179002264364e-05, "loss": 0.3109, "step": 1032500 }, { "epoch": 0.62, "learning_rate": 5.17050790367038e-05, "loss": 0.3175, "step": 1033000 }, { "epoch": 0.62, "learning_rate": 5.170297907114323e-05, "loss": 0.318, "step": 1033500 }, { "epoch": 0.62, "learning_rate": 5.170087910558267e-05, "loss": 0.3165, "step": 1034000 }, { "epoch": 0.62, "learning_rate": 5.1698779140022105e-05, "loss": 0.309, "step": 1034500 }, { "epoch": 0.62, "learning_rate": 5.169667917446154e-05, "loss": 0.3158, "step": 1035000 }, { "epoch": 0.62, "learning_rate": 5.169457920890098e-05, "loss": 0.3115, "step": 1035500 }, { "epoch": 0.62, "learning_rate": 5.1692479243340405e-05, "loss": 0.3124, "step": 1036000 }, { "epoch": 0.62, "learning_rate": 5.1690383477710965e-05, "loss": 0.3179, "step": 1036500 }, { "epoch": 0.62, "learning_rate": 5.168828771208152e-05, "loss": 0.3165, "step": 1037000 }, { "epoch": 0.62, "learning_rate": 5.168618774652095e-05, "loss": 0.3139, "step": 1037500 }, { "epoch": 0.62, "learning_rate": 5.168408778096039e-05, "loss": 0.3198, "step": 1038000 }, { "epoch": 0.62, "learning_rate": 5.1681987815399826e-05, "loss": 0.3141, "step": 1038500 }, { "epoch": 0.62, "learning_rate": 5.167989204977038e-05, "loss": 0.3062, "step": 1039000 }, { "epoch": 0.62, "learning_rate": 5.167779208420982e-05, "loss": 0.3174, "step": 1039500 }, { "epoch": 0.62, "learning_rate": 5.1675692118649253e-05, "loss": 0.3094, "step": 1040000 }, { "epoch": 0.62, "learning_rate": 5.167359215308869e-05, "loss": 0.321, "step": 1040500 }, { "epoch": 0.62, "learning_rate": 5.167149218752813e-05, "loss": 0.3086, "step": 1041000 }, { "epoch": 0.62, "learning_rate": 5.166939222196756e-05, "loss": 0.3156, "step": 1041500 }, { "epoch": 0.62, "learning_rate": 5.1667292256406994e-05, "loss": 0.3035, "step": 1042000 }, { "epoch": 0.63, "learning_rate": 5.1665192290846434e-05, "loss": 0.3144, "step": 1042500 }, { "epoch": 0.63, "learning_rate": 5.166309232528587e-05, "loss": 0.3157, "step": 1043000 }, { "epoch": 0.63, "learning_rate": 5.1660992359725294e-05, "loss": 0.3184, "step": 1043500 }, { "epoch": 0.63, "learning_rate": 5.1658892394164735e-05, "loss": 0.3147, "step": 1044000 }, { "epoch": 0.63, "learning_rate": 5.165679242860417e-05, "loss": 0.3066, "step": 1044500 }, { "epoch": 0.63, "learning_rate": 5.165469666297473e-05, "loss": 0.3092, "step": 1045000 }, { "epoch": 0.63, "learning_rate": 5.165259669741416e-05, "loss": 0.3107, "step": 1045500 }, { "epoch": 0.63, "learning_rate": 5.1650496731853595e-05, "loss": 0.3141, "step": 1046000 }, { "epoch": 0.63, "learning_rate": 5.164839676629303e-05, "loss": 0.3151, "step": 1046500 }, { "epoch": 0.63, "learning_rate": 5.164629680073247e-05, "loss": 0.3114, "step": 1047000 }, { "epoch": 0.63, "learning_rate": 5.164420103510303e-05, "loss": 0.3158, "step": 1047500 }, { "epoch": 0.63, "learning_rate": 5.1642101069542456e-05, "loss": 0.3151, "step": 1048000 }, { "epoch": 0.63, "learning_rate": 5.164000110398189e-05, "loss": 0.3056, "step": 1048500 }, { "epoch": 0.63, "learning_rate": 5.163790113842133e-05, "loss": 0.3151, "step": 1049000 }, { "epoch": 0.63, "learning_rate": 5.163580117286076e-05, "loss": 0.3057, "step": 1049500 }, { "epoch": 0.63, "learning_rate": 5.16337012073002e-05, "loss": 0.3133, "step": 1050000 }, { "epoch": 0.63, "learning_rate": 5.163160124173964e-05, "loss": 0.3158, "step": 1050500 }, { "epoch": 0.63, "learning_rate": 5.162950127617907e-05, "loss": 0.3141, "step": 1051000 }, { "epoch": 0.63, "learning_rate": 5.1627405510549624e-05, "loss": 0.314, "step": 1051500 }, { "epoch": 0.63, "learning_rate": 5.162530554498906e-05, "loss": 0.3164, "step": 1052000 }, { "epoch": 0.63, "learning_rate": 5.16232055794285e-05, "loss": 0.3042, "step": 1052500 }, { "epoch": 0.63, "learning_rate": 5.162110981379905e-05, "loss": 0.3185, "step": 1053000 }, { "epoch": 0.63, "learning_rate": 5.1619009848238485e-05, "loss": 0.3116, "step": 1053500 }, { "epoch": 0.63, "learning_rate": 5.1616914082609045e-05, "loss": 0.3116, "step": 1054000 }, { "epoch": 0.63, "learning_rate": 5.1614814117048485e-05, "loss": 0.3131, "step": 1054500 }, { "epoch": 0.63, "learning_rate": 5.161271415148792e-05, "loss": 0.314, "step": 1055000 }, { "epoch": 0.63, "learning_rate": 5.1610614185927345e-05, "loss": 0.3151, "step": 1055500 }, { "epoch": 0.63, "learning_rate": 5.1608514220366786e-05, "loss": 0.3162, "step": 1056000 }, { "epoch": 0.63, "learning_rate": 5.160641425480622e-05, "loss": 0.3154, "step": 1056500 }, { "epoch": 0.63, "learning_rate": 5.160431428924565e-05, "loss": 0.3061, "step": 1057000 }, { "epoch": 0.63, "learning_rate": 5.160221432368509e-05, "loss": 0.3085, "step": 1057500 }, { "epoch": 0.63, "learning_rate": 5.1600114358124526e-05, "loss": 0.3113, "step": 1058000 }, { "epoch": 0.63, "learning_rate": 5.159801439256396e-05, "loss": 0.3115, "step": 1058500 }, { "epoch": 0.63, "learning_rate": 5.15959144270034e-05, "loss": 0.3075, "step": 1059000 }, { "epoch": 0.64, "learning_rate": 5.1593814461442833e-05, "loss": 0.3109, "step": 1059500 }, { "epoch": 0.64, "learning_rate": 5.159171449588227e-05, "loss": 0.3151, "step": 1060000 }, { "epoch": 0.64, "learning_rate": 5.158961873025282e-05, "loss": 0.3147, "step": 1060500 }, { "epoch": 0.64, "learning_rate": 5.158751876469226e-05, "loss": 0.3101, "step": 1061000 }, { "epoch": 0.64, "learning_rate": 5.1585422999062814e-05, "loss": 0.3085, "step": 1061500 }, { "epoch": 0.64, "learning_rate": 5.158332303350225e-05, "loss": 0.3131, "step": 1062000 }, { "epoch": 0.64, "learning_rate": 5.158122306794169e-05, "loss": 0.3121, "step": 1062500 }, { "epoch": 0.64, "learning_rate": 5.157912310238112e-05, "loss": 0.3101, "step": 1063000 }, { "epoch": 0.64, "learning_rate": 5.1577027336751675e-05, "loss": 0.3198, "step": 1063500 }, { "epoch": 0.64, "learning_rate": 5.157492737119111e-05, "loss": 0.3068, "step": 1064000 }, { "epoch": 0.64, "learning_rate": 5.157282740563055e-05, "loss": 0.3135, "step": 1064500 }, { "epoch": 0.64, "learning_rate": 5.157072744006998e-05, "loss": 0.3133, "step": 1065000 }, { "epoch": 0.64, "learning_rate": 5.1568627474509416e-05, "loss": 0.3125, "step": 1065500 }, { "epoch": 0.64, "learning_rate": 5.1566527508948856e-05, "loss": 0.3112, "step": 1066000 }, { "epoch": 0.64, "learning_rate": 5.156442754338829e-05, "loss": 0.3089, "step": 1066500 }, { "epoch": 0.64, "learning_rate": 5.156232757782772e-05, "loss": 0.3158, "step": 1067000 }, { "epoch": 0.64, "learning_rate": 5.156022761226716e-05, "loss": 0.3141, "step": 1067500 }, { "epoch": 0.64, "learning_rate": 5.155812764670659e-05, "loss": 0.3152, "step": 1068000 }, { "epoch": 0.64, "learning_rate": 5.155603188107715e-05, "loss": 0.3103, "step": 1068500 }, { "epoch": 0.64, "learning_rate": 5.1553931915516584e-05, "loss": 0.3163, "step": 1069000 }, { "epoch": 0.64, "learning_rate": 5.1551831949956024e-05, "loss": 0.3097, "step": 1069500 }, { "epoch": 0.64, "learning_rate": 5.154973198439546e-05, "loss": 0.3117, "step": 1070000 }, { "epoch": 0.64, "learning_rate": 5.154763201883489e-05, "loss": 0.3134, "step": 1070500 }, { "epoch": 0.64, "learning_rate": 5.1545532053274324e-05, "loss": 0.3144, "step": 1071000 }, { "epoch": 0.64, "learning_rate": 5.154343208771376e-05, "loss": 0.3142, "step": 1071500 }, { "epoch": 0.64, "learning_rate": 5.15413321221532e-05, "loss": 0.3121, "step": 1072000 }, { "epoch": 0.64, "learning_rate": 5.153923215659263e-05, "loss": 0.3127, "step": 1072500 }, { "epoch": 0.64, "learning_rate": 5.1537136390963185e-05, "loss": 0.313, "step": 1073000 }, { "epoch": 0.64, "learning_rate": 5.153503642540262e-05, "loss": 0.3164, "step": 1073500 }, { "epoch": 0.64, "learning_rate": 5.153293645984206e-05, "loss": 0.3088, "step": 1074000 }, { "epoch": 0.64, "learning_rate": 5.153083649428149e-05, "loss": 0.3118, "step": 1074500 }, { "epoch": 0.64, "learning_rate": 5.1528736528720925e-05, "loss": 0.3074, "step": 1075000 }, { "epoch": 0.64, "learning_rate": 5.1526636563160366e-05, "loss": 0.3107, "step": 1075500 }, { "epoch": 0.65, "learning_rate": 5.152454079753092e-05, "loss": 0.3087, "step": 1076000 }, { "epoch": 0.65, "learning_rate": 5.152244083197035e-05, "loss": 0.3128, "step": 1076500 }, { "epoch": 0.65, "learning_rate": 5.1520340866409786e-05, "loss": 0.3179, "step": 1077000 }, { "epoch": 0.65, "learning_rate": 5.1518240900849226e-05, "loss": 0.3137, "step": 1077500 }, { "epoch": 0.65, "learning_rate": 5.151614093528866e-05, "loss": 0.3038, "step": 1078000 }, { "epoch": 0.65, "learning_rate": 5.151404936959034e-05, "loss": 0.3199, "step": 1078500 }, { "epoch": 0.65, "learning_rate": 5.1511949404029774e-05, "loss": 0.3126, "step": 1079000 }, { "epoch": 0.65, "learning_rate": 5.1509849438469214e-05, "loss": 0.3148, "step": 1079500 }, { "epoch": 0.65, "learning_rate": 5.150774947290864e-05, "loss": 0.3172, "step": 1080000 }, { "epoch": 0.65, "learning_rate": 5.15056537072792e-05, "loss": 0.3156, "step": 1080500 }, { "epoch": 0.65, "learning_rate": 5.1503553741718635e-05, "loss": 0.3042, "step": 1081000 }, { "epoch": 0.65, "learning_rate": 5.1501453776158075e-05, "loss": 0.3101, "step": 1081500 }, { "epoch": 0.65, "learning_rate": 5.149935381059751e-05, "loss": 0.3195, "step": 1082000 }, { "epoch": 0.65, "learning_rate": 5.1497253845036935e-05, "loss": 0.3213, "step": 1082500 }, { "epoch": 0.65, "learning_rate": 5.1495153879476375e-05, "loss": 0.3081, "step": 1083000 }, { "epoch": 0.65, "learning_rate": 5.149305391391581e-05, "loss": 0.3123, "step": 1083500 }, { "epoch": 0.65, "learning_rate": 5.149095394835524e-05, "loss": 0.312, "step": 1084000 }, { "epoch": 0.65, "learning_rate": 5.148885398279468e-05, "loss": 0.319, "step": 1084500 }, { "epoch": 0.65, "learning_rate": 5.1486754017234116e-05, "loss": 0.3142, "step": 1085000 }, { "epoch": 0.65, "learning_rate": 5.148465405167355e-05, "loss": 0.312, "step": 1085500 }, { "epoch": 0.65, "learning_rate": 5.148255408611299e-05, "loss": 0.3125, "step": 1086000 }, { "epoch": 0.65, "learning_rate": 5.148045412055242e-05, "loss": 0.3194, "step": 1086500 }, { "epoch": 0.65, "learning_rate": 5.1478358354922977e-05, "loss": 0.3063, "step": 1087000 }, { "epoch": 0.65, "learning_rate": 5.147625838936242e-05, "loss": 0.3133, "step": 1087500 }, { "epoch": 0.65, "learning_rate": 5.147415842380185e-05, "loss": 0.3079, "step": 1088000 }, { "epoch": 0.65, "learning_rate": 5.1472058458241284e-05, "loss": 0.3063, "step": 1088500 }, { "epoch": 0.65, "learning_rate": 5.146996269261184e-05, "loss": 0.3076, "step": 1089000 }, { "epoch": 0.65, "learning_rate": 5.146786272705128e-05, "loss": 0.3117, "step": 1089500 }, { "epoch": 0.65, "learning_rate": 5.146576276149071e-05, "loss": 0.3105, "step": 1090000 }, { "epoch": 0.65, "learning_rate": 5.1463662795930144e-05, "loss": 0.3145, "step": 1090500 }, { "epoch": 0.65, "learning_rate": 5.1461562830369585e-05, "loss": 0.3157, "step": 1091000 }, { "epoch": 0.65, "learning_rate": 5.145946706474014e-05, "loss": 0.3201, "step": 1091500 }, { "epoch": 0.65, "learning_rate": 5.145736709917957e-05, "loss": 0.3155, "step": 1092000 }, { "epoch": 0.65, "learning_rate": 5.1455271333550125e-05, "loss": 0.3092, "step": 1092500 }, { "epoch": 0.66, "learning_rate": 5.1453171367989565e-05, "loss": 0.3105, "step": 1093000 }, { "epoch": 0.66, "learning_rate": 5.1451071402429e-05, "loss": 0.3106, "step": 1093500 }, { "epoch": 0.66, "learning_rate": 5.144897143686843e-05, "loss": 0.32, "step": 1094000 }, { "epoch": 0.66, "learning_rate": 5.144687147130787e-05, "loss": 0.3106, "step": 1094500 }, { "epoch": 0.66, "learning_rate": 5.1444771505747306e-05, "loss": 0.313, "step": 1095000 }, { "epoch": 0.66, "learning_rate": 5.144267154018674e-05, "loss": 0.3027, "step": 1095500 }, { "epoch": 0.66, "learning_rate": 5.144057157462618e-05, "loss": 0.3123, "step": 1096000 }, { "epoch": 0.66, "learning_rate": 5.143847160906561e-05, "loss": 0.311, "step": 1096500 }, { "epoch": 0.66, "learning_rate": 5.143637584343617e-05, "loss": 0.3055, "step": 1097000 }, { "epoch": 0.66, "learning_rate": 5.14342758778756e-05, "loss": 0.3142, "step": 1097500 }, { "epoch": 0.66, "learning_rate": 5.1432180112246154e-05, "loss": 0.3105, "step": 1098000 }, { "epoch": 0.66, "learning_rate": 5.1430084346616714e-05, "loss": 0.3139, "step": 1098500 }, { "epoch": 0.66, "learning_rate": 5.142798438105615e-05, "loss": 0.3176, "step": 1099000 }, { "epoch": 0.66, "learning_rate": 5.142588441549558e-05, "loss": 0.3127, "step": 1099500 }, { "epoch": 0.66, "learning_rate": 5.142378444993502e-05, "loss": 0.312, "step": 1100000 }, { "epoch": 0.66, "eval_loss": 0.28178706765174866, "eval_runtime": 1457.3807, "eval_samples_per_second": 361.416, "eval_steps_per_second": 60.236, "step": 1100000 }, { "epoch": 0.66, "learning_rate": 5.1421684484374455e-05, "loss": 0.3156, "step": 1100500 }, { "epoch": 0.66, "learning_rate": 5.141958451881389e-05, "loss": 0.3103, "step": 1101000 }, { "epoch": 0.66, "learning_rate": 5.141748455325333e-05, "loss": 0.3054, "step": 1101500 }, { "epoch": 0.66, "learning_rate": 5.141538458769276e-05, "loss": 0.3063, "step": 1102000 }, { "epoch": 0.66, "learning_rate": 5.1413284622132195e-05, "loss": 0.3144, "step": 1102500 }, { "epoch": 0.66, "learning_rate": 5.1411184656571636e-05, "loss": 0.313, "step": 1103000 }, { "epoch": 0.66, "learning_rate": 5.140908469101107e-05, "loss": 0.3056, "step": 1103500 }, { "epoch": 0.66, "learning_rate": 5.140698892538162e-05, "loss": 0.3184, "step": 1104000 }, { "epoch": 0.66, "learning_rate": 5.1404888959821056e-05, "loss": 0.307, "step": 1104500 }, { "epoch": 0.66, "learning_rate": 5.1402788994260496e-05, "loss": 0.3122, "step": 1105000 }, { "epoch": 0.66, "learning_rate": 5.140068902869993e-05, "loss": 0.3075, "step": 1105500 }, { "epoch": 0.66, "learning_rate": 5.139858906313936e-05, "loss": 0.3173, "step": 1106000 }, { "epoch": 0.66, "learning_rate": 5.1396489097578804e-05, "loss": 0.3109, "step": 1106500 }, { "epoch": 0.66, "learning_rate": 5.139438913201823e-05, "loss": 0.309, "step": 1107000 }, { "epoch": 0.66, "learning_rate": 5.139228916645767e-05, "loss": 0.3131, "step": 1107500 }, { "epoch": 0.66, "learning_rate": 5.139019340082823e-05, "loss": 0.31, "step": 1108000 }, { "epoch": 0.66, "learning_rate": 5.1388093435267664e-05, "loss": 0.3117, "step": 1108500 }, { "epoch": 0.66, "learning_rate": 5.13859934697071e-05, "loss": 0.3172, "step": 1109000 }, { "epoch": 0.67, "learning_rate": 5.138389350414653e-05, "loss": 0.306, "step": 1109500 }, { "epoch": 0.67, "learning_rate": 5.138179773851709e-05, "loss": 0.3107, "step": 1110000 }, { "epoch": 0.67, "learning_rate": 5.1379697772956525e-05, "loss": 0.3065, "step": 1110500 }, { "epoch": 0.67, "learning_rate": 5.137759780739596e-05, "loss": 0.3185, "step": 1111000 }, { "epoch": 0.67, "learning_rate": 5.137549784183539e-05, "loss": 0.3121, "step": 1111500 }, { "epoch": 0.67, "learning_rate": 5.1373397876274825e-05, "loss": 0.3128, "step": 1112000 }, { "epoch": 0.67, "learning_rate": 5.137129791071426e-05, "loss": 0.3061, "step": 1112500 }, { "epoch": 0.67, "learning_rate": 5.13691979451537e-05, "loss": 0.3191, "step": 1113000 }, { "epoch": 0.67, "learning_rate": 5.136709797959313e-05, "loss": 0.3084, "step": 1113500 }, { "epoch": 0.67, "learning_rate": 5.1365002213963686e-05, "loss": 0.3117, "step": 1114000 }, { "epoch": 0.67, "learning_rate": 5.1362906448334246e-05, "loss": 0.3077, "step": 1114500 }, { "epoch": 0.67, "learning_rate": 5.136080648277369e-05, "loss": 0.3097, "step": 1115000 }, { "epoch": 0.67, "learning_rate": 5.135870651721312e-05, "loss": 0.3114, "step": 1115500 }, { "epoch": 0.67, "learning_rate": 5.1356606551652554e-05, "loss": 0.3119, "step": 1116000 }, { "epoch": 0.67, "learning_rate": 5.135450658609199e-05, "loss": 0.3144, "step": 1116500 }, { "epoch": 0.67, "learning_rate": 5.135240662053142e-05, "loss": 0.3177, "step": 1117000 }, { "epoch": 0.67, "learning_rate": 5.1350306654970854e-05, "loss": 0.3086, "step": 1117500 }, { "epoch": 0.67, "learning_rate": 5.1348206689410294e-05, "loss": 0.3158, "step": 1118000 }, { "epoch": 0.67, "learning_rate": 5.134610672384973e-05, "loss": 0.31, "step": 1118500 }, { "epoch": 0.67, "learning_rate": 5.134401095822028e-05, "loss": 0.3098, "step": 1119000 }, { "epoch": 0.67, "learning_rate": 5.1341910992659715e-05, "loss": 0.3103, "step": 1119500 }, { "epoch": 0.67, "learning_rate": 5.1339815227030275e-05, "loss": 0.3075, "step": 1120000 }, { "epoch": 0.67, "learning_rate": 5.1337715261469715e-05, "loss": 0.3166, "step": 1120500 }, { "epoch": 0.67, "learning_rate": 5.133561949584027e-05, "loss": 0.3116, "step": 1121000 }, { "epoch": 0.67, "learning_rate": 5.13335195302797e-05, "loss": 0.3125, "step": 1121500 }, { "epoch": 0.67, "learning_rate": 5.133141956471914e-05, "loss": 0.3135, "step": 1122000 }, { "epoch": 0.67, "learning_rate": 5.1329319599158576e-05, "loss": 0.3099, "step": 1122500 }, { "epoch": 0.67, "learning_rate": 5.132721963359801e-05, "loss": 0.3127, "step": 1123000 }, { "epoch": 0.67, "learning_rate": 5.132511966803744e-05, "loss": 0.3086, "step": 1123500 }, { "epoch": 0.67, "learning_rate": 5.1323019702476876e-05, "loss": 0.3163, "step": 1124000 }, { "epoch": 0.67, "learning_rate": 5.132091973691631e-05, "loss": 0.315, "step": 1124500 }, { "epoch": 0.67, "learning_rate": 5.131881977135575e-05, "loss": 0.3104, "step": 1125000 }, { "epoch": 0.67, "learning_rate": 5.1316719805795184e-05, "loss": 0.3093, "step": 1125500 }, { "epoch": 0.68, "learning_rate": 5.131461984023462e-05, "loss": 0.3106, "step": 1126000 }, { "epoch": 0.68, "learning_rate": 5.131251987467406e-05, "loss": 0.3104, "step": 1126500 }, { "epoch": 0.68, "learning_rate": 5.131042410904461e-05, "loss": 0.3086, "step": 1127000 }, { "epoch": 0.68, "learning_rate": 5.1308324143484044e-05, "loss": 0.3109, "step": 1127500 }, { "epoch": 0.68, "learning_rate": 5.1306228377854605e-05, "loss": 0.3108, "step": 1128000 }, { "epoch": 0.68, "learning_rate": 5.130412841229404e-05, "loss": 0.3085, "step": 1128500 }, { "epoch": 0.68, "learning_rate": 5.130202844673347e-05, "loss": 0.317, "step": 1129000 }, { "epoch": 0.68, "learning_rate": 5.1299928481172905e-05, "loss": 0.3117, "step": 1129500 }, { "epoch": 0.68, "learning_rate": 5.1297828515612345e-05, "loss": 0.308, "step": 1130000 }, { "epoch": 0.68, "learning_rate": 5.129572855005178e-05, "loss": 0.3097, "step": 1130500 }, { "epoch": 0.68, "learning_rate": 5.129363278442233e-05, "loss": 0.3237, "step": 1131000 }, { "epoch": 0.68, "learning_rate": 5.1291532818861766e-05, "loss": 0.3097, "step": 1131500 }, { "epoch": 0.68, "learning_rate": 5.1289432853301206e-05, "loss": 0.311, "step": 1132000 }, { "epoch": 0.68, "learning_rate": 5.128733288774064e-05, "loss": 0.3076, "step": 1132500 }, { "epoch": 0.68, "learning_rate": 5.128523292218007e-05, "loss": 0.3133, "step": 1133000 }, { "epoch": 0.68, "learning_rate": 5.128313295661951e-05, "loss": 0.3097, "step": 1133500 }, { "epoch": 0.68, "learning_rate": 5.1281032991058947e-05, "loss": 0.3105, "step": 1134000 }, { "epoch": 0.68, "learning_rate": 5.127893302549838e-05, "loss": 0.3122, "step": 1134500 }, { "epoch": 0.68, "learning_rate": 5.127683305993782e-05, "loss": 0.3134, "step": 1135000 }, { "epoch": 0.68, "learning_rate": 5.1274733094377254e-05, "loss": 0.3077, "step": 1135500 }, { "epoch": 0.68, "learning_rate": 5.127263312881668e-05, "loss": 0.3069, "step": 1136000 }, { "epoch": 0.68, "learning_rate": 5.127053736318724e-05, "loss": 0.3087, "step": 1136500 }, { "epoch": 0.68, "learning_rate": 5.126843739762668e-05, "loss": 0.3053, "step": 1137000 }, { "epoch": 0.68, "learning_rate": 5.1266337432066114e-05, "loss": 0.3078, "step": 1137500 }, { "epoch": 0.68, "learning_rate": 5.126423746650555e-05, "loss": 0.3156, "step": 1138000 }, { "epoch": 0.68, "learning_rate": 5.126214170087611e-05, "loss": 0.3083, "step": 1138500 }, { "epoch": 0.68, "learning_rate": 5.126004173531554e-05, "loss": 0.3085, "step": 1139000 }, { "epoch": 0.68, "learning_rate": 5.1257941769754975e-05, "loss": 0.3128, "step": 1139500 }, { "epoch": 0.68, "learning_rate": 5.1255841804194415e-05, "loss": 0.3062, "step": 1140000 }, { "epoch": 0.68, "learning_rate": 5.125374183863385e-05, "loss": 0.3112, "step": 1140500 }, { "epoch": 0.68, "learning_rate": 5.1251641873073276e-05, "loss": 0.3126, "step": 1141000 }, { "epoch": 0.68, "learning_rate": 5.1249546107443836e-05, "loss": 0.3049, "step": 1141500 }, { "epoch": 0.68, "learning_rate": 5.1247446141883276e-05, "loss": 0.3117, "step": 1142000 }, { "epoch": 0.68, "learning_rate": 5.124534617632271e-05, "loss": 0.3092, "step": 1142500 }, { "epoch": 0.69, "learning_rate": 5.124324621076214e-05, "loss": 0.3056, "step": 1143000 }, { "epoch": 0.69, "learning_rate": 5.1241146245201577e-05, "loss": 0.3137, "step": 1143500 }, { "epoch": 0.69, "learning_rate": 5.123904627964101e-05, "loss": 0.3082, "step": 1144000 }, { "epoch": 0.69, "learning_rate": 5.1236946314080443e-05, "loss": 0.3105, "step": 1144500 }, { "epoch": 0.69, "learning_rate": 5.1234846348519884e-05, "loss": 0.3033, "step": 1145000 }, { "epoch": 0.69, "learning_rate": 5.123274638295932e-05, "loss": 0.3069, "step": 1145500 }, { "epoch": 0.69, "learning_rate": 5.123064641739876e-05, "loss": 0.3128, "step": 1146000 }, { "epoch": 0.69, "learning_rate": 5.122855065176931e-05, "loss": 0.3118, "step": 1146500 }, { "epoch": 0.69, "learning_rate": 5.1226450686208744e-05, "loss": 0.3111, "step": 1147000 }, { "epoch": 0.69, "learning_rate": 5.1224354920579305e-05, "loss": 0.3118, "step": 1147500 }, { "epoch": 0.69, "learning_rate": 5.122225495501873e-05, "loss": 0.3078, "step": 1148000 }, { "epoch": 0.69, "learning_rate": 5.122015498945817e-05, "loss": 0.3228, "step": 1148500 }, { "epoch": 0.69, "learning_rate": 5.1218055023897605e-05, "loss": 0.3066, "step": 1149000 }, { "epoch": 0.69, "learning_rate": 5.121595505833704e-05, "loss": 0.3039, "step": 1149500 }, { "epoch": 0.69, "learning_rate": 5.121385509277648e-05, "loss": 0.3124, "step": 1150000 }, { "epoch": 0.69, "learning_rate": 5.121175512721591e-05, "loss": 0.3138, "step": 1150500 }, { "epoch": 0.69, "learning_rate": 5.1209655161655346e-05, "loss": 0.3072, "step": 1151000 }, { "epoch": 0.69, "learning_rate": 5.12075593960259e-05, "loss": 0.3107, "step": 1151500 }, { "epoch": 0.69, "learning_rate": 5.120545943046534e-05, "loss": 0.3102, "step": 1152000 }, { "epoch": 0.69, "learning_rate": 5.120335946490477e-05, "loss": 0.3142, "step": 1152500 }, { "epoch": 0.69, "learning_rate": 5.120125949934421e-05, "loss": 0.3053, "step": 1153000 }, { "epoch": 0.69, "learning_rate": 5.119915953378365e-05, "loss": 0.3114, "step": 1153500 }, { "epoch": 0.69, "learning_rate": 5.11970637681542e-05, "loss": 0.3053, "step": 1154000 }, { "epoch": 0.69, "learning_rate": 5.1194963802593634e-05, "loss": 0.3085, "step": 1154500 }, { "epoch": 0.69, "learning_rate": 5.1192868036964194e-05, "loss": 0.3102, "step": 1155000 }, { "epoch": 0.69, "learning_rate": 5.119076807140363e-05, "loss": 0.3066, "step": 1155500 }, { "epoch": 0.69, "learning_rate": 5.118866810584306e-05, "loss": 0.3167, "step": 1156000 }, { "epoch": 0.69, "learning_rate": 5.1186568140282494e-05, "loss": 0.3137, "step": 1156500 }, { "epoch": 0.69, "learning_rate": 5.1184468174721935e-05, "loss": 0.3124, "step": 1157000 }, { "epoch": 0.69, "learning_rate": 5.118236820916137e-05, "loss": 0.3081, "step": 1157500 }, { "epoch": 0.69, "learning_rate": 5.11802682436008e-05, "loss": 0.3065, "step": 1158000 }, { "epoch": 0.69, "learning_rate": 5.117816827804024e-05, "loss": 0.3135, "step": 1158500 }, { "epoch": 0.69, "learning_rate": 5.1176068312479675e-05, "loss": 0.3104, "step": 1159000 }, { "epoch": 0.7, "learning_rate": 5.117396834691911e-05, "loss": 0.3116, "step": 1159500 }, { "epoch": 0.7, "learning_rate": 5.117186838135855e-05, "loss": 0.3018, "step": 1160000 }, { "epoch": 0.7, "learning_rate": 5.116976841579798e-05, "loss": 0.3103, "step": 1160500 }, { "epoch": 0.7, "learning_rate": 5.1167672650168536e-05, "loss": 0.3085, "step": 1161000 }, { "epoch": 0.7, "learning_rate": 5.1165572684607976e-05, "loss": 0.3061, "step": 1161500 }, { "epoch": 0.7, "learning_rate": 5.116347271904741e-05, "loss": 0.3103, "step": 1162000 }, { "epoch": 0.7, "learning_rate": 5.116137275348684e-05, "loss": 0.3013, "step": 1162500 }, { "epoch": 0.7, "learning_rate": 5.115928118778852e-05, "loss": 0.3129, "step": 1163000 }, { "epoch": 0.7, "learning_rate": 5.115718122222795e-05, "loss": 0.3103, "step": 1163500 }, { "epoch": 0.7, "learning_rate": 5.115508125666739e-05, "loss": 0.3079, "step": 1164000 }, { "epoch": 0.7, "learning_rate": 5.1152981291106824e-05, "loss": 0.309, "step": 1164500 }, { "epoch": 0.7, "learning_rate": 5.115088132554626e-05, "loss": 0.3125, "step": 1165000 }, { "epoch": 0.7, "learning_rate": 5.11487813599857e-05, "loss": 0.3099, "step": 1165500 }, { "epoch": 0.7, "learning_rate": 5.114668559435625e-05, "loss": 0.3082, "step": 1166000 }, { "epoch": 0.7, "learning_rate": 5.1144585628795685e-05, "loss": 0.3172, "step": 1166500 }, { "epoch": 0.7, "learning_rate": 5.1142485663235125e-05, "loss": 0.3144, "step": 1167000 }, { "epoch": 0.7, "learning_rate": 5.114038569767456e-05, "loss": 0.3042, "step": 1167500 }, { "epoch": 0.7, "learning_rate": 5.113828573211399e-05, "loss": 0.3104, "step": 1168000 }, { "epoch": 0.7, "learning_rate": 5.1136189966484546e-05, "loss": 0.3148, "step": 1168500 }, { "epoch": 0.7, "learning_rate": 5.1134094200855106e-05, "loss": 0.3128, "step": 1169000 }, { "epoch": 0.7, "learning_rate": 5.113199423529454e-05, "loss": 0.3103, "step": 1169500 }, { "epoch": 0.7, "learning_rate": 5.112989426973397e-05, "loss": 0.3151, "step": 1170000 }, { "epoch": 0.7, "learning_rate": 5.1127794304173406e-05, "loss": 0.3063, "step": 1170500 }, { "epoch": 0.7, "learning_rate": 5.1125694338612846e-05, "loss": 0.3032, "step": 1171000 }, { "epoch": 0.7, "learning_rate": 5.112359437305228e-05, "loss": 0.3051, "step": 1171500 }, { "epoch": 0.7, "learning_rate": 5.1121494407491713e-05, "loss": 0.31, "step": 1172000 }, { "epoch": 0.7, "learning_rate": 5.1119394441931154e-05, "loss": 0.3002, "step": 1172500 }, { "epoch": 0.7, "learning_rate": 5.111729447637059e-05, "loss": 0.3129, "step": 1173000 }, { "epoch": 0.7, "learning_rate": 5.111519451081002e-05, "loss": 0.3099, "step": 1173500 }, { "epoch": 0.7, "learning_rate": 5.111309454524946e-05, "loss": 0.3209, "step": 1174000 }, { "epoch": 0.7, "learning_rate": 5.1110994579688894e-05, "loss": 0.3118, "step": 1174500 }, { "epoch": 0.7, "learning_rate": 5.110889461412833e-05, "loss": 0.3078, "step": 1175000 }, { "epoch": 0.7, "learning_rate": 5.110679464856776e-05, "loss": 0.3048, "step": 1175500 }, { "epoch": 0.71, "learning_rate": 5.110469888293832e-05, "loss": 0.3091, "step": 1176000 }, { "epoch": 0.71, "learning_rate": 5.1102598917377755e-05, "loss": 0.3122, "step": 1176500 }, { "epoch": 0.71, "learning_rate": 5.110050315174831e-05, "loss": 0.315, "step": 1177000 }, { "epoch": 0.71, "learning_rate": 5.109840318618775e-05, "loss": 0.3084, "step": 1177500 }, { "epoch": 0.71, "learning_rate": 5.109630322062718e-05, "loss": 0.309, "step": 1178000 }, { "epoch": 0.71, "learning_rate": 5.1094203255066616e-05, "loss": 0.3133, "step": 1178500 }, { "epoch": 0.71, "learning_rate": 5.1092103289506056e-05, "loss": 0.3147, "step": 1179000 }, { "epoch": 0.71, "learning_rate": 5.109000332394549e-05, "loss": 0.3059, "step": 1179500 }, { "epoch": 0.71, "learning_rate": 5.1087903358384916e-05, "loss": 0.305, "step": 1180000 }, { "epoch": 0.71, "learning_rate": 5.1085803392824356e-05, "loss": 0.3112, "step": 1180500 }, { "epoch": 0.71, "learning_rate": 5.108370342726379e-05, "loss": 0.3084, "step": 1181000 }, { "epoch": 0.71, "learning_rate": 5.108160346170322e-05, "loss": 0.3086, "step": 1181500 }, { "epoch": 0.71, "learning_rate": 5.1079507696073784e-05, "loss": 0.3112, "step": 1182000 }, { "epoch": 0.71, "learning_rate": 5.1077411930444344e-05, "loss": 0.3045, "step": 1182500 }, { "epoch": 0.71, "learning_rate": 5.107531196488378e-05, "loss": 0.3078, "step": 1183000 }, { "epoch": 0.71, "learning_rate": 5.107321199932321e-05, "loss": 0.3076, "step": 1183500 }, { "epoch": 0.71, "learning_rate": 5.107111203376265e-05, "loss": 0.3031, "step": 1184000 }, { "epoch": 0.71, "learning_rate": 5.106901206820208e-05, "loss": 0.3088, "step": 1184500 }, { "epoch": 0.71, "learning_rate": 5.106691210264151e-05, "loss": 0.3087, "step": 1185000 }, { "epoch": 0.71, "learning_rate": 5.106481633701207e-05, "loss": 0.3077, "step": 1185500 }, { "epoch": 0.71, "learning_rate": 5.106271637145151e-05, "loss": 0.3093, "step": 1186000 }, { "epoch": 0.71, "learning_rate": 5.1060620605822065e-05, "loss": 0.3085, "step": 1186500 }, { "epoch": 0.71, "learning_rate": 5.10585206402615e-05, "loss": 0.3093, "step": 1187000 }, { "epoch": 0.71, "learning_rate": 5.105642067470093e-05, "loss": 0.3145, "step": 1187500 }, { "epoch": 0.71, "learning_rate": 5.105432070914037e-05, "loss": 0.3095, "step": 1188000 }, { "epoch": 0.71, "learning_rate": 5.1052220743579806e-05, "loss": 0.3068, "step": 1188500 }, { "epoch": 0.71, "learning_rate": 5.105012077801924e-05, "loss": 0.3135, "step": 1189000 }, { "epoch": 0.71, "learning_rate": 5.104802081245867e-05, "loss": 0.3069, "step": 1189500 }, { "epoch": 0.71, "learning_rate": 5.1045920846898106e-05, "loss": 0.314, "step": 1190000 }, { "epoch": 0.71, "learning_rate": 5.104382088133755e-05, "loss": 0.3107, "step": 1190500 }, { "epoch": 0.71, "learning_rate": 5.104172091577698e-05, "loss": 0.3139, "step": 1191000 }, { "epoch": 0.71, "learning_rate": 5.1039620950216414e-05, "loss": 0.3019, "step": 1191500 }, { "epoch": 0.71, "learning_rate": 5.1037520984655854e-05, "loss": 0.3086, "step": 1192000 }, { "epoch": 0.71, "learning_rate": 5.103542101909529e-05, "loss": 0.3084, "step": 1192500 }, { "epoch": 0.72, "learning_rate": 5.103332525346584e-05, "loss": 0.3104, "step": 1193000 }, { "epoch": 0.72, "learning_rate": 5.10312294878364e-05, "loss": 0.3116, "step": 1193500 }, { "epoch": 0.72, "learning_rate": 5.102912952227583e-05, "loss": 0.3084, "step": 1194000 }, { "epoch": 0.72, "learning_rate": 5.102702955671527e-05, "loss": 0.3058, "step": 1194500 }, { "epoch": 0.72, "learning_rate": 5.10249295911547e-05, "loss": 0.314, "step": 1195000 }, { "epoch": 0.72, "learning_rate": 5.102283382552526e-05, "loss": 0.3076, "step": 1195500 }, { "epoch": 0.72, "learning_rate": 5.1020733859964695e-05, "loss": 0.3134, "step": 1196000 }, { "epoch": 0.72, "learning_rate": 5.101863389440413e-05, "loss": 0.3096, "step": 1196500 }, { "epoch": 0.72, "learning_rate": 5.101653392884356e-05, "loss": 0.308, "step": 1197000 }, { "epoch": 0.72, "learning_rate": 5.1014433963283e-05, "loss": 0.2989, "step": 1197500 }, { "epoch": 0.72, "learning_rate": 5.1012333997722436e-05, "loss": 0.3078, "step": 1198000 }, { "epoch": 0.72, "learning_rate": 5.1010238232092996e-05, "loss": 0.3097, "step": 1198500 }, { "epoch": 0.72, "learning_rate": 5.100813826653242e-05, "loss": 0.3084, "step": 1199000 }, { "epoch": 0.72, "learning_rate": 5.100603830097186e-05, "loss": 0.3068, "step": 1199500 }, { "epoch": 0.72, "learning_rate": 5.10039383354113e-05, "loss": 0.3037, "step": 1200000 }, { "epoch": 0.72, "eval_loss": 0.2797383666038513, "eval_runtime": 1458.5506, "eval_samples_per_second": 361.126, "eval_steps_per_second": 60.188, "step": 1200000 }, { "epoch": 0.72, "learning_rate": 5.100184256978186e-05, "loss": 0.3071, "step": 1200500 }, { "epoch": 0.72, "learning_rate": 5.099974260422129e-05, "loss": 0.3062, "step": 1201000 }, { "epoch": 0.72, "learning_rate": 5.0997646838591844e-05, "loss": 0.3068, "step": 1201500 }, { "epoch": 0.72, "learning_rate": 5.0995546873031284e-05, "loss": 0.3068, "step": 1202000 }, { "epoch": 0.72, "learning_rate": 5.099344690747072e-05, "loss": 0.3054, "step": 1202500 }, { "epoch": 0.72, "learning_rate": 5.099134694191015e-05, "loss": 0.3091, "step": 1203000 }, { "epoch": 0.72, "learning_rate": 5.0989246976349585e-05, "loss": 0.3104, "step": 1203500 }, { "epoch": 0.72, "learning_rate": 5.098714701078902e-05, "loss": 0.3087, "step": 1204000 }, { "epoch": 0.72, "learning_rate": 5.098504704522846e-05, "loss": 0.3065, "step": 1204500 }, { "epoch": 0.72, "learning_rate": 5.098294707966789e-05, "loss": 0.3012, "step": 1205000 }, { "epoch": 0.72, "learning_rate": 5.0980847114107325e-05, "loss": 0.3059, "step": 1205500 }, { "epoch": 0.72, "learning_rate": 5.0978747148546766e-05, "loss": 0.3122, "step": 1206000 }, { "epoch": 0.72, "learning_rate": 5.09766471829862e-05, "loss": 0.3141, "step": 1206500 }, { "epoch": 0.72, "learning_rate": 5.097454721742563e-05, "loss": 0.3088, "step": 1207000 }, { "epoch": 0.72, "learning_rate": 5.097244725186507e-05, "loss": 0.3075, "step": 1207500 }, { "epoch": 0.72, "learning_rate": 5.0970347286304506e-05, "loss": 0.3066, "step": 1208000 }, { "epoch": 0.72, "learning_rate": 5.096824732074394e-05, "loss": 0.2994, "step": 1208500 }, { "epoch": 0.72, "learning_rate": 5.096614735518337e-05, "loss": 0.3003, "step": 1209000 }, { "epoch": 0.73, "learning_rate": 5.0964047389622807e-05, "loss": 0.3065, "step": 1209500 }, { "epoch": 0.73, "learning_rate": 5.096194742406224e-05, "loss": 0.3155, "step": 1210000 }, { "epoch": 0.73, "learning_rate": 5.09598516584328e-05, "loss": 0.3091, "step": 1210500 }, { "epoch": 0.73, "learning_rate": 5.095775169287224e-05, "loss": 0.3033, "step": 1211000 }, { "epoch": 0.73, "learning_rate": 5.095565172731167e-05, "loss": 0.308, "step": 1211500 }, { "epoch": 0.73, "learning_rate": 5.095355176175111e-05, "loss": 0.3097, "step": 1212000 }, { "epoch": 0.73, "learning_rate": 5.095145599612167e-05, "loss": 0.3065, "step": 1212500 }, { "epoch": 0.73, "learning_rate": 5.09493560305611e-05, "loss": 0.304, "step": 1213000 }, { "epoch": 0.73, "learning_rate": 5.0947256065000535e-05, "loss": 0.3065, "step": 1213500 }, { "epoch": 0.73, "learning_rate": 5.094515609943997e-05, "loss": 0.3025, "step": 1214000 }, { "epoch": 0.73, "learning_rate": 5.09430561338794e-05, "loss": 0.3065, "step": 1214500 }, { "epoch": 0.73, "learning_rate": 5.0940956168318835e-05, "loss": 0.3109, "step": 1215000 }, { "epoch": 0.73, "learning_rate": 5.0938856202758275e-05, "loss": 0.3112, "step": 1215500 }, { "epoch": 0.73, "learning_rate": 5.0936760437128836e-05, "loss": 0.3071, "step": 1216000 }, { "epoch": 0.73, "learning_rate": 5.093466047156826e-05, "loss": 0.3004, "step": 1216500 }, { "epoch": 0.73, "learning_rate": 5.093256470593882e-05, "loss": 0.312, "step": 1217000 }, { "epoch": 0.73, "learning_rate": 5.0930464740378256e-05, "loss": 0.3049, "step": 1217500 }, { "epoch": 0.73, "learning_rate": 5.0928364774817696e-05, "loss": 0.3033, "step": 1218000 }, { "epoch": 0.73, "learning_rate": 5.092626480925713e-05, "loss": 0.3089, "step": 1218500 }, { "epoch": 0.73, "learning_rate": 5.092416484369656e-05, "loss": 0.3145, "step": 1219000 }, { "epoch": 0.73, "learning_rate": 5.0922064878136e-05, "loss": 0.3156, "step": 1219500 }, { "epoch": 0.73, "learning_rate": 5.091996911250656e-05, "loss": 0.315, "step": 1220000 }, { "epoch": 0.73, "learning_rate": 5.091786914694599e-05, "loss": 0.309, "step": 1220500 }, { "epoch": 0.73, "learning_rate": 5.0915769181385424e-05, "loss": 0.3014, "step": 1221000 }, { "epoch": 0.73, "learning_rate": 5.091366921582486e-05, "loss": 0.3049, "step": 1221500 }, { "epoch": 0.73, "learning_rate": 5.091156925026429e-05, "loss": 0.3108, "step": 1222000 }, { "epoch": 0.73, "learning_rate": 5.090946928470373e-05, "loss": 0.303, "step": 1222500 }, { "epoch": 0.73, "learning_rate": 5.0907369319143165e-05, "loss": 0.3097, "step": 1223000 }, { "epoch": 0.73, "learning_rate": 5.090527355351372e-05, "loss": 0.3059, "step": 1223500 }, { "epoch": 0.73, "learning_rate": 5.090317358795315e-05, "loss": 0.3095, "step": 1224000 }, { "epoch": 0.73, "learning_rate": 5.090107782232371e-05, "loss": 0.3118, "step": 1224500 }, { "epoch": 0.73, "learning_rate": 5.089897785676315e-05, "loss": 0.3038, "step": 1225000 }, { "epoch": 0.73, "learning_rate": 5.0896877891202586e-05, "loss": 0.3139, "step": 1225500 }, { "epoch": 0.74, "learning_rate": 5.089477792564202e-05, "loss": 0.3037, "step": 1226000 }, { "epoch": 0.74, "learning_rate": 5.089267796008145e-05, "loss": 0.305, "step": 1226500 }, { "epoch": 0.74, "learning_rate": 5.0890577994520886e-05, "loss": 0.3096, "step": 1227000 }, { "epoch": 0.74, "learning_rate": 5.0888478028960326e-05, "loss": 0.3095, "step": 1227500 }, { "epoch": 0.74, "learning_rate": 5.088637806339976e-05, "loss": 0.3084, "step": 1228000 }, { "epoch": 0.74, "learning_rate": 5.088427809783919e-05, "loss": 0.3117, "step": 1228500 }, { "epoch": 0.74, "learning_rate": 5.088218233220975e-05, "loss": 0.3076, "step": 1229000 }, { "epoch": 0.74, "learning_rate": 5.088008236664919e-05, "loss": 0.3042, "step": 1229500 }, { "epoch": 0.74, "learning_rate": 5.087798240108862e-05, "loss": 0.3043, "step": 1230000 }, { "epoch": 0.74, "learning_rate": 5.0875882435528054e-05, "loss": 0.3102, "step": 1230500 }, { "epoch": 0.74, "learning_rate": 5.0873782469967494e-05, "loss": 0.3092, "step": 1231000 }, { "epoch": 0.74, "learning_rate": 5.087168250440693e-05, "loss": 0.3038, "step": 1231500 }, { "epoch": 0.74, "learning_rate": 5.086958253884636e-05, "loss": 0.3042, "step": 1232000 }, { "epoch": 0.74, "learning_rate": 5.08674825732858e-05, "loss": 0.3073, "step": 1232500 }, { "epoch": 0.74, "learning_rate": 5.0865382607725235e-05, "loss": 0.3067, "step": 1233000 }, { "epoch": 0.74, "learning_rate": 5.086328264216467e-05, "loss": 0.3048, "step": 1233500 }, { "epoch": 0.74, "learning_rate": 5.08611826766041e-05, "loss": 0.3049, "step": 1234000 }, { "epoch": 0.74, "learning_rate": 5.0859082711043535e-05, "loss": 0.3006, "step": 1234500 }, { "epoch": 0.74, "learning_rate": 5.0856986945414096e-05, "loss": 0.3041, "step": 1235000 }, { "epoch": 0.74, "learning_rate": 5.085488697985353e-05, "loss": 0.3099, "step": 1235500 }, { "epoch": 0.74, "learning_rate": 5.085278701429296e-05, "loss": 0.3002, "step": 1236000 }, { "epoch": 0.74, "learning_rate": 5.0850687048732396e-05, "loss": 0.3018, "step": 1236500 }, { "epoch": 0.74, "learning_rate": 5.0848587083171836e-05, "loss": 0.3062, "step": 1237000 }, { "epoch": 0.74, "learning_rate": 5.084648711761127e-05, "loss": 0.3059, "step": 1237500 }, { "epoch": 0.74, "learning_rate": 5.08443871520507e-05, "loss": 0.3135, "step": 1238000 }, { "epoch": 0.74, "learning_rate": 5.084228718649014e-05, "loss": 0.2999, "step": 1238500 }, { "epoch": 0.74, "learning_rate": 5.08401914208607e-05, "loss": 0.3073, "step": 1239000 }, { "epoch": 0.74, "learning_rate": 5.083809145530013e-05, "loss": 0.3109, "step": 1239500 }, { "epoch": 0.74, "learning_rate": 5.083599568967069e-05, "loss": 0.3086, "step": 1240000 }, { "epoch": 0.74, "learning_rate": 5.0833899924041244e-05, "loss": 0.3141, "step": 1240500 }, { "epoch": 0.74, "learning_rate": 5.083179995848068e-05, "loss": 0.3066, "step": 1241000 }, { "epoch": 0.74, "learning_rate": 5.082969999292012e-05, "loss": 0.3004, "step": 1241500 }, { "epoch": 0.74, "learning_rate": 5.082760002735955e-05, "loss": 0.3075, "step": 1242000 }, { "epoch": 0.74, "learning_rate": 5.0825500061798985e-05, "loss": 0.307, "step": 1242500 }, { "epoch": 0.75, "learning_rate": 5.0823400096238425e-05, "loss": 0.3096, "step": 1243000 }, { "epoch": 0.75, "learning_rate": 5.082130013067785e-05, "loss": 0.3095, "step": 1243500 }, { "epoch": 0.75, "learning_rate": 5.081920016511729e-05, "loss": 0.3122, "step": 1244000 }, { "epoch": 0.75, "learning_rate": 5.081710439948785e-05, "loss": 0.3042, "step": 1244500 }, { "epoch": 0.75, "learning_rate": 5.0815004433927286e-05, "loss": 0.309, "step": 1245000 }, { "epoch": 0.75, "learning_rate": 5.081290446836671e-05, "loss": 0.3074, "step": 1245500 }, { "epoch": 0.75, "learning_rate": 5.081080450280615e-05, "loss": 0.3036, "step": 1246000 }, { "epoch": 0.75, "learning_rate": 5.0808704537245586e-05, "loss": 0.3067, "step": 1246500 }, { "epoch": 0.75, "learning_rate": 5.080660877161615e-05, "loss": 0.3151, "step": 1247000 }, { "epoch": 0.75, "learning_rate": 5.080450880605558e-05, "loss": 0.3089, "step": 1247500 }, { "epoch": 0.75, "learning_rate": 5.0802408840495014e-05, "loss": 0.3111, "step": 1248000 }, { "epoch": 0.75, "learning_rate": 5.080030887493445e-05, "loss": 0.305, "step": 1248500 }, { "epoch": 0.75, "learning_rate": 5.079821310930501e-05, "loss": 0.3057, "step": 1249000 }, { "epoch": 0.75, "learning_rate": 5.079611314374444e-05, "loss": 0.3085, "step": 1249500 }, { "epoch": 0.75, "learning_rate": 5.0794017378115e-05, "loss": 0.3051, "step": 1250000 }, { "epoch": 0.75, "learning_rate": 5.0791917412554435e-05, "loss": 0.3058, "step": 1250500 }, { "epoch": 0.75, "learning_rate": 5.078981744699387e-05, "loss": 0.3027, "step": 1251000 }, { "epoch": 0.75, "learning_rate": 5.078771748143331e-05, "loss": 0.3122, "step": 1251500 }, { "epoch": 0.75, "learning_rate": 5.078561751587274e-05, "loss": 0.3189, "step": 1252000 }, { "epoch": 0.75, "learning_rate": 5.0783517550312175e-05, "loss": 0.3067, "step": 1252500 }, { "epoch": 0.75, "learning_rate": 5.078141758475161e-05, "loss": 0.3064, "step": 1253000 }, { "epoch": 0.75, "learning_rate": 5.077931761919104e-05, "loss": 0.3051, "step": 1253500 }, { "epoch": 0.75, "learning_rate": 5.0777217653630476e-05, "loss": 0.3096, "step": 1254000 }, { "epoch": 0.75, "learning_rate": 5.0775117688069916e-05, "loss": 0.3073, "step": 1254500 }, { "epoch": 0.75, "learning_rate": 5.0773021922440476e-05, "loss": 0.3052, "step": 1255000 }, { "epoch": 0.75, "learning_rate": 5.07709219568799e-05, "loss": 0.3092, "step": 1255500 }, { "epoch": 0.75, "learning_rate": 5.0768821991319336e-05, "loss": 0.3083, "step": 1256000 }, { "epoch": 0.75, "learning_rate": 5.0766722025758777e-05, "loss": 0.3073, "step": 1256500 }, { "epoch": 0.75, "learning_rate": 5.076462626012934e-05, "loss": 0.3062, "step": 1257000 }, { "epoch": 0.75, "learning_rate": 5.0762526294568764e-05, "loss": 0.3118, "step": 1257500 }, { "epoch": 0.75, "learning_rate": 5.0760426329008204e-05, "loss": 0.3038, "step": 1258000 }, { "epoch": 0.75, "learning_rate": 5.075832636344764e-05, "loss": 0.3043, "step": 1258500 }, { "epoch": 0.75, "learning_rate": 5.075622639788707e-05, "loss": 0.3058, "step": 1259000 }, { "epoch": 0.76, "learning_rate": 5.075412643232651e-05, "loss": 0.3022, "step": 1259500 }, { "epoch": 0.76, "learning_rate": 5.0752026466765944e-05, "loss": 0.3155, "step": 1260000 }, { "epoch": 0.76, "learning_rate": 5.074992650120538e-05, "loss": 0.3085, "step": 1260500 }, { "epoch": 0.76, "learning_rate": 5.074783073557593e-05, "loss": 0.3102, "step": 1261000 }, { "epoch": 0.76, "learning_rate": 5.074573077001537e-05, "loss": 0.2997, "step": 1261500 }, { "epoch": 0.76, "learning_rate": 5.0743630804454805e-05, "loss": 0.3024, "step": 1262000 }, { "epoch": 0.76, "learning_rate": 5.074153083889424e-05, "loss": 0.3075, "step": 1262500 }, { "epoch": 0.76, "learning_rate": 5.073943087333368e-05, "loss": 0.3153, "step": 1263000 }, { "epoch": 0.76, "learning_rate": 5.073733090777311e-05, "loss": 0.3095, "step": 1263500 }, { "epoch": 0.76, "learning_rate": 5.0735230942212546e-05, "loss": 0.3004, "step": 1264000 }, { "epoch": 0.76, "learning_rate": 5.0733135176583106e-05, "loss": 0.3087, "step": 1264500 }, { "epoch": 0.76, "learning_rate": 5.073103521102254e-05, "loss": 0.302, "step": 1265000 }, { "epoch": 0.76, "learning_rate": 5.072893524546197e-05, "loss": 0.3073, "step": 1265500 }, { "epoch": 0.76, "learning_rate": 5.072683527990141e-05, "loss": 0.2994, "step": 1266000 }, { "epoch": 0.76, "learning_rate": 5.072473951427197e-05, "loss": 0.3083, "step": 1266500 }, { "epoch": 0.76, "learning_rate": 5.072264374864252e-05, "loss": 0.3105, "step": 1267000 }, { "epoch": 0.76, "learning_rate": 5.0720543783081954e-05, "loss": 0.3051, "step": 1267500 }, { "epoch": 0.76, "learning_rate": 5.071844381752139e-05, "loss": 0.3062, "step": 1268000 }, { "epoch": 0.76, "learning_rate": 5.071634805189195e-05, "loss": 0.307, "step": 1268500 }, { "epoch": 0.76, "learning_rate": 5.071424808633139e-05, "loss": 0.3117, "step": 1269000 }, { "epoch": 0.76, "learning_rate": 5.0712148120770815e-05, "loss": 0.3058, "step": 1269500 }, { "epoch": 0.76, "learning_rate": 5.071004815521025e-05, "loss": 0.3137, "step": 1270000 }, { "epoch": 0.76, "learning_rate": 5.070794818964969e-05, "loss": 0.3005, "step": 1270500 }, { "epoch": 0.76, "learning_rate": 5.070584822408912e-05, "loss": 0.3064, "step": 1271000 }, { "epoch": 0.76, "learning_rate": 5.070374825852856e-05, "loss": 0.3022, "step": 1271500 }, { "epoch": 0.76, "learning_rate": 5.0701648292967995e-05, "loss": 0.3023, "step": 1272000 }, { "epoch": 0.76, "learning_rate": 5.069954832740743e-05, "loss": 0.3125, "step": 1272500 }, { "epoch": 0.76, "learning_rate": 5.069744836184687e-05, "loss": 0.3056, "step": 1273000 }, { "epoch": 0.76, "learning_rate": 5.06953483962863e-05, "loss": 0.3044, "step": 1273500 }, { "epoch": 0.76, "learning_rate": 5.0693248430725736e-05, "loss": 0.307, "step": 1274000 }, { "epoch": 0.76, "learning_rate": 5.0691148465165176e-05, "loss": 0.3079, "step": 1274500 }, { "epoch": 0.76, "learning_rate": 5.06890484996046e-05, "loss": 0.3096, "step": 1275000 }, { "epoch": 0.76, "learning_rate": 5.0686948534044036e-05, "loss": 0.3052, "step": 1275500 }, { "epoch": 0.77, "learning_rate": 5.06848527684146e-05, "loss": 0.3088, "step": 1276000 }, { "epoch": 0.77, "learning_rate": 5.068275280285404e-05, "loss": 0.3001, "step": 1276500 }, { "epoch": 0.77, "learning_rate": 5.068065283729347e-05, "loss": 0.3082, "step": 1277000 }, { "epoch": 0.77, "learning_rate": 5.06785528717329e-05, "loss": 0.3118, "step": 1277500 }, { "epoch": 0.77, "learning_rate": 5.067645290617234e-05, "loss": 0.3077, "step": 1278000 }, { "epoch": 0.77, "learning_rate": 5.067435294061177e-05, "loss": 0.3084, "step": 1278500 }, { "epoch": 0.77, "learning_rate": 5.0672252975051204e-05, "loss": 0.3013, "step": 1279000 }, { "epoch": 0.77, "learning_rate": 5.0670153009490645e-05, "loss": 0.3038, "step": 1279500 }, { "epoch": 0.77, "learning_rate": 5.066805304393008e-05, "loss": 0.3115, "step": 1280000 }, { "epoch": 0.77, "learning_rate": 5.066595727830063e-05, "loss": 0.3071, "step": 1280500 }, { "epoch": 0.77, "learning_rate": 5.066385731274007e-05, "loss": 0.3038, "step": 1281000 }, { "epoch": 0.77, "learning_rate": 5.0661757347179505e-05, "loss": 0.306, "step": 1281500 }, { "epoch": 0.77, "learning_rate": 5.065965738161894e-05, "loss": 0.3056, "step": 1282000 }, { "epoch": 0.77, "learning_rate": 5.065756161598949e-05, "loss": 0.3059, "step": 1282500 }, { "epoch": 0.77, "learning_rate": 5.065546585036005e-05, "loss": 0.3022, "step": 1283000 }, { "epoch": 0.77, "learning_rate": 5.065336588479949e-05, "loss": 0.3048, "step": 1283500 }, { "epoch": 0.77, "learning_rate": 5.0651265919238926e-05, "loss": 0.3103, "step": 1284000 }, { "epoch": 0.77, "learning_rate": 5.064916595367835e-05, "loss": 0.3044, "step": 1284500 }, { "epoch": 0.77, "learning_rate": 5.064706598811779e-05, "loss": 0.3088, "step": 1285000 }, { "epoch": 0.77, "learning_rate": 5.064496602255723e-05, "loss": 0.3109, "step": 1285500 }, { "epoch": 0.77, "learning_rate": 5.064287025692779e-05, "loss": 0.3011, "step": 1286000 }, { "epoch": 0.77, "learning_rate": 5.064077029136722e-05, "loss": 0.31, "step": 1286500 }, { "epoch": 0.77, "learning_rate": 5.0638670325806654e-05, "loss": 0.3041, "step": 1287000 }, { "epoch": 0.77, "learning_rate": 5.063657036024609e-05, "loss": 0.3038, "step": 1287500 }, { "epoch": 0.77, "learning_rate": 5.063447039468553e-05, "loss": 0.3021, "step": 1288000 }, { "epoch": 0.77, "learning_rate": 5.063237042912496e-05, "loss": 0.302, "step": 1288500 }, { "epoch": 0.77, "learning_rate": 5.0630270463564395e-05, "loss": 0.3117, "step": 1289000 }, { "epoch": 0.77, "learning_rate": 5.0628170498003835e-05, "loss": 0.305, "step": 1289500 }, { "epoch": 0.77, "learning_rate": 5.062607053244327e-05, "loss": 0.3042, "step": 1290000 }, { "epoch": 0.77, "learning_rate": 5.06239705668827e-05, "loss": 0.3092, "step": 1290500 }, { "epoch": 0.77, "learning_rate": 5.062187060132214e-05, "loss": 0.3024, "step": 1291000 }, { "epoch": 0.77, "learning_rate": 5.0619774835692696e-05, "loss": 0.3139, "step": 1291500 }, { "epoch": 0.77, "learning_rate": 5.061767487013213e-05, "loss": 0.3102, "step": 1292000 }, { "epoch": 0.77, "learning_rate": 5.061557490457156e-05, "loss": 0.31, "step": 1292500 }, { "epoch": 0.78, "learning_rate": 5.0613474939011e-05, "loss": 0.3002, "step": 1293000 }, { "epoch": 0.78, "learning_rate": 5.0611374973450436e-05, "loss": 0.3041, "step": 1293500 }, { "epoch": 0.78, "learning_rate": 5.060927500788987e-05, "loss": 0.3007, "step": 1294000 }, { "epoch": 0.78, "learning_rate": 5.060717504232931e-05, "loss": 0.3011, "step": 1294500 }, { "epoch": 0.78, "learning_rate": 5.0605079276699864e-05, "loss": 0.306, "step": 1295000 }, { "epoch": 0.78, "learning_rate": 5.060298351107042e-05, "loss": 0.3082, "step": 1295500 }, { "epoch": 0.78, "learning_rate": 5.060088774544098e-05, "loss": 0.3014, "step": 1296000 }, { "epoch": 0.78, "learning_rate": 5.0598787779880404e-05, "loss": 0.3029, "step": 1296500 }, { "epoch": 0.78, "learning_rate": 5.0596687814319844e-05, "loss": 0.299, "step": 1297000 }, { "epoch": 0.78, "learning_rate": 5.059458784875928e-05, "loss": 0.3022, "step": 1297500 }, { "epoch": 0.78, "learning_rate": 5.059248788319871e-05, "loss": 0.3054, "step": 1298000 }, { "epoch": 0.78, "learning_rate": 5.059038791763815e-05, "loss": 0.3084, "step": 1298500 }, { "epoch": 0.78, "learning_rate": 5.0588287952077585e-05, "loss": 0.3097, "step": 1299000 }, { "epoch": 0.78, "learning_rate": 5.058618798651702e-05, "loss": 0.3032, "step": 1299500 }, { "epoch": 0.78, "learning_rate": 5.058408802095646e-05, "loss": 0.3023, "step": 1300000 }, { "epoch": 0.78, "eval_loss": 0.2759383022785187, "eval_runtime": 1465.2871, "eval_samples_per_second": 359.465, "eval_steps_per_second": 59.911, "step": 1300000 }, { "epoch": 0.78, "learning_rate": 5.058199225532701e-05, "loss": 0.3049, "step": 1300500 }, { "epoch": 0.78, "learning_rate": 5.0579892289766446e-05, "loss": 0.31, "step": 1301000 }, { "epoch": 0.78, "learning_rate": 5.057779232420588e-05, "loss": 0.3104, "step": 1301500 }, { "epoch": 0.78, "learning_rate": 5.057569235864532e-05, "loss": 0.3053, "step": 1302000 }, { "epoch": 0.78, "learning_rate": 5.057359239308475e-05, "loss": 0.3023, "step": 1302500 }, { "epoch": 0.78, "learning_rate": 5.0571492427524186e-05, "loss": 0.3058, "step": 1303000 }, { "epoch": 0.78, "learning_rate": 5.0569392461963627e-05, "loss": 0.3112, "step": 1303500 }, { "epoch": 0.78, "learning_rate": 5.056729249640306e-05, "loss": 0.2983, "step": 1304000 }, { "epoch": 0.78, "learning_rate": 5.0565192530842493e-05, "loss": 0.3077, "step": 1304500 }, { "epoch": 0.78, "learning_rate": 5.0563096765213054e-05, "loss": 0.3005, "step": 1305000 }, { "epoch": 0.78, "learning_rate": 5.056099679965249e-05, "loss": 0.3078, "step": 1305500 }, { "epoch": 0.78, "learning_rate": 5.055889683409192e-05, "loss": 0.3099, "step": 1306000 }, { "epoch": 0.78, "learning_rate": 5.0556801068462474e-05, "loss": 0.2995, "step": 1306500 }, { "epoch": 0.78, "learning_rate": 5.0554701102901915e-05, "loss": 0.3043, "step": 1307000 }, { "epoch": 0.78, "learning_rate": 5.055260113734135e-05, "loss": 0.3114, "step": 1307500 }, { "epoch": 0.78, "learning_rate": 5.055050117178078e-05, "loss": 0.2994, "step": 1308000 }, { "epoch": 0.78, "learning_rate": 5.054840120622022e-05, "loss": 0.3025, "step": 1308500 }, { "epoch": 0.78, "learning_rate": 5.054630124065965e-05, "loss": 0.3044, "step": 1309000 }, { "epoch": 0.79, "learning_rate": 5.054420547503021e-05, "loss": 0.3037, "step": 1309500 }, { "epoch": 0.79, "learning_rate": 5.054210550946964e-05, "loss": 0.2991, "step": 1310000 }, { "epoch": 0.79, "learning_rate": 5.054000554390908e-05, "loss": 0.303, "step": 1310500 }, { "epoch": 0.79, "learning_rate": 5.0537909778279636e-05, "loss": 0.3036, "step": 1311000 }, { "epoch": 0.79, "learning_rate": 5.053580981271907e-05, "loss": 0.3063, "step": 1311500 }, { "epoch": 0.79, "learning_rate": 5.053370984715851e-05, "loss": 0.3034, "step": 1312000 }, { "epoch": 0.79, "learning_rate": 5.053160988159794e-05, "loss": 0.2998, "step": 1312500 }, { "epoch": 0.79, "learning_rate": 5.0529509916037377e-05, "loss": 0.3008, "step": 1313000 }, { "epoch": 0.79, "learning_rate": 5.052740995047682e-05, "loss": 0.307, "step": 1313500 }, { "epoch": 0.79, "learning_rate": 5.0525309984916244e-05, "loss": 0.304, "step": 1314000 }, { "epoch": 0.79, "learning_rate": 5.052321001935568e-05, "loss": 0.3031, "step": 1314500 }, { "epoch": 0.79, "learning_rate": 5.052111005379512e-05, "loss": 0.2981, "step": 1315000 }, { "epoch": 0.79, "learning_rate": 5.051901428816568e-05, "loss": 0.3046, "step": 1315500 }, { "epoch": 0.79, "learning_rate": 5.051691432260511e-05, "loss": 0.3044, "step": 1316000 }, { "epoch": 0.79, "learning_rate": 5.0514818556975665e-05, "loss": 0.3109, "step": 1316500 }, { "epoch": 0.79, "learning_rate": 5.0512718591415105e-05, "loss": 0.3001, "step": 1317000 }, { "epoch": 0.79, "learning_rate": 5.051061862585454e-05, "loss": 0.3049, "step": 1317500 }, { "epoch": 0.79, "learning_rate": 5.050851866029397e-05, "loss": 0.3087, "step": 1318000 }, { "epoch": 0.79, "learning_rate": 5.0506418694733405e-05, "loss": 0.2983, "step": 1318500 }, { "epoch": 0.79, "learning_rate": 5.050431872917284e-05, "loss": 0.3086, "step": 1319000 }, { "epoch": 0.79, "learning_rate": 5.050221876361227e-05, "loss": 0.3097, "step": 1319500 }, { "epoch": 0.79, "learning_rate": 5.050011879805171e-05, "loss": 0.3005, "step": 1320000 }, { "epoch": 0.79, "learning_rate": 5.0498018832491146e-05, "loss": 0.3075, "step": 1320500 }, { "epoch": 0.79, "learning_rate": 5.049591886693058e-05, "loss": 0.303, "step": 1321000 }, { "epoch": 0.79, "learning_rate": 5.049381890137002e-05, "loss": 0.2986, "step": 1321500 }, { "epoch": 0.79, "learning_rate": 5.049171893580945e-05, "loss": 0.3038, "step": 1322000 }, { "epoch": 0.79, "learning_rate": 5.0489618970248886e-05, "loss": 0.3017, "step": 1322500 }, { "epoch": 0.79, "learning_rate": 5.048751900468833e-05, "loss": 0.3043, "step": 1323000 }, { "epoch": 0.79, "learning_rate": 5.048541903912776e-05, "loss": 0.306, "step": 1323500 }, { "epoch": 0.79, "learning_rate": 5.0483323273498314e-05, "loss": 0.3093, "step": 1324000 }, { "epoch": 0.79, "learning_rate": 5.048122330793775e-05, "loss": 0.3047, "step": 1324500 }, { "epoch": 0.79, "learning_rate": 5.047912334237719e-05, "loss": 0.3066, "step": 1325000 }, { "epoch": 0.79, "learning_rate": 5.047702337681662e-05, "loss": 0.3104, "step": 1325500 }, { "epoch": 0.79, "learning_rate": 5.0474927611187174e-05, "loss": 0.3046, "step": 1326000 }, { "epoch": 0.8, "learning_rate": 5.0472827645626615e-05, "loss": 0.2993, "step": 1326500 }, { "epoch": 0.8, "learning_rate": 5.047072768006605e-05, "loss": 0.3079, "step": 1327000 }, { "epoch": 0.8, "learning_rate": 5.046862771450548e-05, "loss": 0.3145, "step": 1327500 }, { "epoch": 0.8, "learning_rate": 5.046652774894492e-05, "loss": 0.3098, "step": 1328000 }, { "epoch": 0.8, "learning_rate": 5.0464431983315475e-05, "loss": 0.3057, "step": 1328500 }, { "epoch": 0.8, "learning_rate": 5.046233201775491e-05, "loss": 0.309, "step": 1329000 }, { "epoch": 0.8, "learning_rate": 5.046023205219434e-05, "loss": 0.3114, "step": 1329500 }, { "epoch": 0.8, "learning_rate": 5.045813208663378e-05, "loss": 0.3034, "step": 1330000 }, { "epoch": 0.8, "learning_rate": 5.0456032121073216e-05, "loss": 0.3077, "step": 1330500 }, { "epoch": 0.8, "learning_rate": 5.045393215551265e-05, "loss": 0.3058, "step": 1331000 }, { "epoch": 0.8, "learning_rate": 5.045183218995208e-05, "loss": 0.304, "step": 1331500 }, { "epoch": 0.8, "learning_rate": 5.0449732224391516e-05, "loss": 0.3049, "step": 1332000 }, { "epoch": 0.8, "learning_rate": 5.044763645876208e-05, "loss": 0.3054, "step": 1332500 }, { "epoch": 0.8, "learning_rate": 5.044553649320151e-05, "loss": 0.3048, "step": 1333000 }, { "epoch": 0.8, "learning_rate": 5.044343652764095e-05, "loss": 0.3052, "step": 1333500 }, { "epoch": 0.8, "learning_rate": 5.044133656208038e-05, "loss": 0.3097, "step": 1334000 }, { "epoch": 0.8, "learning_rate": 5.043924079645094e-05, "loss": 0.3062, "step": 1334500 }, { "epoch": 0.8, "learning_rate": 5.043714083089038e-05, "loss": 0.3103, "step": 1335000 }, { "epoch": 0.8, "learning_rate": 5.043504086532981e-05, "loss": 0.3032, "step": 1335500 }, { "epoch": 0.8, "learning_rate": 5.043294089976924e-05, "loss": 0.311, "step": 1336000 }, { "epoch": 0.8, "learning_rate": 5.043084093420868e-05, "loss": 0.305, "step": 1336500 }, { "epoch": 0.8, "learning_rate": 5.042874516857924e-05, "loss": 0.3084, "step": 1337000 }, { "epoch": 0.8, "learning_rate": 5.042664520301867e-05, "loss": 0.2982, "step": 1337500 }, { "epoch": 0.8, "learning_rate": 5.0424545237458105e-05, "loss": 0.3082, "step": 1338000 }, { "epoch": 0.8, "learning_rate": 5.042244527189754e-05, "loss": 0.303, "step": 1338500 }, { "epoch": 0.8, "learning_rate": 5.04203495062681e-05, "loss": 0.3045, "step": 1339000 }, { "epoch": 0.8, "learning_rate": 5.041824954070753e-05, "loss": 0.2937, "step": 1339500 }, { "epoch": 0.8, "learning_rate": 5.0416153775078086e-05, "loss": 0.3098, "step": 1340000 }, { "epoch": 0.8, "learning_rate": 5.0414053809517526e-05, "loss": 0.3043, "step": 1340500 }, { "epoch": 0.8, "learning_rate": 5.041195384395696e-05, "loss": 0.3033, "step": 1341000 }, { "epoch": 0.8, "learning_rate": 5.040985387839639e-05, "loss": 0.3006, "step": 1341500 }, { "epoch": 0.8, "learning_rate": 5.0407753912835834e-05, "loss": 0.309, "step": 1342000 }, { "epoch": 0.8, "learning_rate": 5.040565394727527e-05, "loss": 0.3073, "step": 1342500 }, { "epoch": 0.81, "learning_rate": 5.04035539817147e-05, "loss": 0.307, "step": 1343000 }, { "epoch": 0.81, "learning_rate": 5.0401454016154134e-05, "loss": 0.3077, "step": 1343500 }, { "epoch": 0.81, "learning_rate": 5.039935405059357e-05, "loss": 0.3004, "step": 1344000 }, { "epoch": 0.81, "learning_rate": 5.0397254085033e-05, "loss": 0.3079, "step": 1344500 }, { "epoch": 0.81, "learning_rate": 5.039515831940356e-05, "loss": 0.2992, "step": 1345000 }, { "epoch": 0.81, "learning_rate": 5.0393058353842995e-05, "loss": 0.302, "step": 1345500 }, { "epoch": 0.81, "learning_rate": 5.0390962588213555e-05, "loss": 0.3052, "step": 1346000 }, { "epoch": 0.81, "learning_rate": 5.038886262265299e-05, "loss": 0.3105, "step": 1346500 }, { "epoch": 0.81, "learning_rate": 5.038676265709242e-05, "loss": 0.306, "step": 1347000 }, { "epoch": 0.81, "learning_rate": 5.038466269153186e-05, "loss": 0.3016, "step": 1347500 }, { "epoch": 0.81, "learning_rate": 5.038256272597129e-05, "loss": 0.307, "step": 1348000 }, { "epoch": 0.81, "learning_rate": 5.038046696034185e-05, "loss": 0.3078, "step": 1348500 }, { "epoch": 0.81, "learning_rate": 5.037836699478129e-05, "loss": 0.3048, "step": 1349000 }, { "epoch": 0.81, "learning_rate": 5.037626702922072e-05, "loss": 0.3055, "step": 1349500 }, { "epoch": 0.81, "learning_rate": 5.0374167063660156e-05, "loss": 0.3076, "step": 1350000 }, { "epoch": 0.81, "learning_rate": 5.037206709809959e-05, "loss": 0.3041, "step": 1350500 }, { "epoch": 0.81, "learning_rate": 5.036996713253902e-05, "loss": 0.3071, "step": 1351000 }, { "epoch": 0.81, "learning_rate": 5.036786716697846e-05, "loss": 0.3024, "step": 1351500 }, { "epoch": 0.81, "learning_rate": 5.03657672014179e-05, "loss": 0.2996, "step": 1352000 }, { "epoch": 0.81, "learning_rate": 5.036366723585733e-05, "loss": 0.3012, "step": 1352500 }, { "epoch": 0.81, "learning_rate": 5.0361571470227884e-05, "loss": 0.3062, "step": 1353000 }, { "epoch": 0.81, "learning_rate": 5.035947150466732e-05, "loss": 0.3078, "step": 1353500 }, { "epoch": 0.81, "learning_rate": 5.035737153910676e-05, "loss": 0.2967, "step": 1354000 }, { "epoch": 0.81, "learning_rate": 5.035527577347732e-05, "loss": 0.3052, "step": 1354500 }, { "epoch": 0.81, "learning_rate": 5.0353175807916745e-05, "loss": 0.2992, "step": 1355000 }, { "epoch": 0.81, "learning_rate": 5.0351080042287305e-05, "loss": 0.3068, "step": 1355500 }, { "epoch": 0.81, "learning_rate": 5.0348980076726745e-05, "loss": 0.3027, "step": 1356000 }, { "epoch": 0.81, "learning_rate": 5.034688011116618e-05, "loss": 0.3026, "step": 1356500 }, { "epoch": 0.81, "learning_rate": 5.034478014560561e-05, "loss": 0.3034, "step": 1357000 }, { "epoch": 0.81, "learning_rate": 5.0342680180045046e-05, "loss": 0.2998, "step": 1357500 }, { "epoch": 0.81, "learning_rate": 5.034058021448448e-05, "loss": 0.3109, "step": 1358000 }, { "epoch": 0.81, "learning_rate": 5.033848024892391e-05, "loss": 0.3031, "step": 1358500 }, { "epoch": 0.81, "learning_rate": 5.033638028336335e-05, "loss": 0.2982, "step": 1359000 }, { "epoch": 0.82, "learning_rate": 5.0334280317802786e-05, "loss": 0.3013, "step": 1359500 }, { "epoch": 0.82, "learning_rate": 5.033218455217334e-05, "loss": 0.3116, "step": 1360000 }, { "epoch": 0.82, "learning_rate": 5.033008458661277e-05, "loss": 0.3096, "step": 1360500 }, { "epoch": 0.82, "learning_rate": 5.0327984621052214e-05, "loss": 0.299, "step": 1361000 }, { "epoch": 0.82, "learning_rate": 5.032588465549165e-05, "loss": 0.3054, "step": 1361500 }, { "epoch": 0.82, "learning_rate": 5.032378468993108e-05, "loss": 0.3015, "step": 1362000 }, { "epoch": 0.82, "learning_rate": 5.032168472437052e-05, "loss": 0.3066, "step": 1362500 }, { "epoch": 0.82, "learning_rate": 5.0319584758809954e-05, "loss": 0.3028, "step": 1363000 }, { "epoch": 0.82, "learning_rate": 5.0317484793249394e-05, "loss": 0.306, "step": 1363500 }, { "epoch": 0.82, "learning_rate": 5.031538902761995e-05, "loss": 0.3102, "step": 1364000 }, { "epoch": 0.82, "learning_rate": 5.031328906205938e-05, "loss": 0.3093, "step": 1364500 }, { "epoch": 0.82, "learning_rate": 5.0311189096498815e-05, "loss": 0.303, "step": 1365000 }, { "epoch": 0.82, "learning_rate": 5.0309089130938255e-05, "loss": 0.3084, "step": 1365500 }, { "epoch": 0.82, "learning_rate": 5.030698916537769e-05, "loss": 0.2974, "step": 1366000 }, { "epoch": 0.82, "learning_rate": 5.030489339974824e-05, "loss": 0.3004, "step": 1366500 }, { "epoch": 0.82, "learning_rate": 5.0302793434187676e-05, "loss": 0.3035, "step": 1367000 }, { "epoch": 0.82, "learning_rate": 5.0300693468627116e-05, "loss": 0.3005, "step": 1367500 }, { "epoch": 0.82, "learning_rate": 5.029859350306655e-05, "loss": 0.3008, "step": 1368000 }, { "epoch": 0.82, "learning_rate": 5.02964977374371e-05, "loss": 0.3037, "step": 1368500 }, { "epoch": 0.82, "learning_rate": 5.0294397771876536e-05, "loss": 0.3061, "step": 1369000 }, { "epoch": 0.82, "learning_rate": 5.029229780631598e-05, "loss": 0.3054, "step": 1369500 }, { "epoch": 0.82, "learning_rate": 5.029019784075541e-05, "loss": 0.3011, "step": 1370000 }, { "epoch": 0.82, "learning_rate": 5.028809787519485e-05, "loss": 0.3031, "step": 1370500 }, { "epoch": 0.82, "learning_rate": 5.0286002109565404e-05, "loss": 0.3025, "step": 1371000 }, { "epoch": 0.82, "learning_rate": 5.0283906343935964e-05, "loss": 0.2983, "step": 1371500 }, { "epoch": 0.82, "learning_rate": 5.028180637837539e-05, "loss": 0.3057, "step": 1372000 }, { "epoch": 0.82, "learning_rate": 5.0279706412814824e-05, "loss": 0.309, "step": 1372500 }, { "epoch": 0.82, "learning_rate": 5.0277606447254265e-05, "loss": 0.3126, "step": 1373000 }, { "epoch": 0.82, "learning_rate": 5.02755064816937e-05, "loss": 0.3077, "step": 1373500 }, { "epoch": 0.82, "learning_rate": 5.027341071606426e-05, "loss": 0.2997, "step": 1374000 }, { "epoch": 0.82, "learning_rate": 5.0271310750503685e-05, "loss": 0.3055, "step": 1374500 }, { "epoch": 0.82, "learning_rate": 5.0269214984874245e-05, "loss": 0.3148, "step": 1375000 }, { "epoch": 0.82, "learning_rate": 5.0267115019313686e-05, "loss": 0.3027, "step": 1375500 }, { "epoch": 0.82, "learning_rate": 5.026501505375312e-05, "loss": 0.3017, "step": 1376000 }, { "epoch": 0.83, "learning_rate": 5.026291508819255e-05, "loss": 0.3066, "step": 1376500 }, { "epoch": 0.83, "learning_rate": 5.0260815122631986e-05, "loss": 0.2999, "step": 1377000 }, { "epoch": 0.83, "learning_rate": 5.025871515707142e-05, "loss": 0.3067, "step": 1377500 }, { "epoch": 0.83, "learning_rate": 5.025661519151086e-05, "loss": 0.305, "step": 1378000 }, { "epoch": 0.83, "learning_rate": 5.025451522595029e-05, "loss": 0.295, "step": 1378500 }, { "epoch": 0.83, "learning_rate": 5.025241526038973e-05, "loss": 0.3043, "step": 1379000 }, { "epoch": 0.83, "learning_rate": 5.025031949476028e-05, "loss": 0.2964, "step": 1379500 }, { "epoch": 0.83, "learning_rate": 5.024821952919972e-05, "loss": 0.3034, "step": 1380000 }, { "epoch": 0.83, "learning_rate": 5.024612376357028e-05, "loss": 0.3033, "step": 1380500 }, { "epoch": 0.83, "learning_rate": 5.0244023798009714e-05, "loss": 0.3096, "step": 1381000 }, { "epoch": 0.83, "learning_rate": 5.024192383244914e-05, "loss": 0.3023, "step": 1381500 }, { "epoch": 0.83, "learning_rate": 5.023982386688858e-05, "loss": 0.3099, "step": 1382000 }, { "epoch": 0.83, "learning_rate": 5.0237723901328015e-05, "loss": 0.3063, "step": 1382500 }, { "epoch": 0.83, "learning_rate": 5.0235623935767455e-05, "loss": 0.3067, "step": 1383000 }, { "epoch": 0.83, "learning_rate": 5.023352397020689e-05, "loss": 0.3127, "step": 1383500 }, { "epoch": 0.83, "learning_rate": 5.023142400464632e-05, "loss": 0.3057, "step": 1384000 }, { "epoch": 0.83, "learning_rate": 5.022932403908576e-05, "loss": 0.3044, "step": 1384500 }, { "epoch": 0.83, "learning_rate": 5.0227224073525196e-05, "loss": 0.3054, "step": 1385000 }, { "epoch": 0.83, "learning_rate": 5.022512830789575e-05, "loss": 0.3058, "step": 1385500 }, { "epoch": 0.83, "learning_rate": 5.022302834233518e-05, "loss": 0.301, "step": 1386000 }, { "epoch": 0.83, "learning_rate": 5.022092837677462e-05, "loss": 0.3075, "step": 1386500 }, { "epoch": 0.83, "learning_rate": 5.0218828411214056e-05, "loss": 0.3, "step": 1387000 }, { "epoch": 0.83, "learning_rate": 5.021672844565349e-05, "loss": 0.3023, "step": 1387500 }, { "epoch": 0.83, "learning_rate": 5.021462848009293e-05, "loss": 0.306, "step": 1388000 }, { "epoch": 0.83, "learning_rate": 5.0212528514532363e-05, "loss": 0.3022, "step": 1388500 }, { "epoch": 0.83, "learning_rate": 5.02104285489718e-05, "loss": 0.3045, "step": 1389000 }, { "epoch": 0.83, "learning_rate": 5.020833278334235e-05, "loss": 0.3002, "step": 1389500 }, { "epoch": 0.83, "learning_rate": 5.020623281778179e-05, "loss": 0.2982, "step": 1390000 }, { "epoch": 0.83, "learning_rate": 5.0204132852221224e-05, "loss": 0.301, "step": 1390500 }, { "epoch": 0.83, "learning_rate": 5.020203288666066e-05, "loss": 0.3046, "step": 1391000 }, { "epoch": 0.83, "learning_rate": 5.019994132096233e-05, "loss": 0.3048, "step": 1391500 }, { "epoch": 0.83, "learning_rate": 5.019784135540177e-05, "loss": 0.3056, "step": 1392000 }, { "epoch": 0.83, "learning_rate": 5.0195741389841205e-05, "loss": 0.2999, "step": 1392500 }, { "epoch": 0.84, "learning_rate": 5.019364142428064e-05, "loss": 0.3064, "step": 1393000 }, { "epoch": 0.84, "learning_rate": 5.019154145872008e-05, "loss": 0.3112, "step": 1393500 }, { "epoch": 0.84, "learning_rate": 5.018944569309063e-05, "loss": 0.3055, "step": 1394000 }, { "epoch": 0.84, "learning_rate": 5.0187345727530066e-05, "loss": 0.2989, "step": 1394500 }, { "epoch": 0.84, "learning_rate": 5.01852457619695e-05, "loss": 0.3059, "step": 1395000 }, { "epoch": 0.84, "learning_rate": 5.018314579640894e-05, "loss": 0.3059, "step": 1395500 }, { "epoch": 0.84, "learning_rate": 5.018105003077949e-05, "loss": 0.2952, "step": 1396000 }, { "epoch": 0.84, "learning_rate": 5.0178950065218926e-05, "loss": 0.3012, "step": 1396500 }, { "epoch": 0.84, "learning_rate": 5.017685429958949e-05, "loss": 0.3031, "step": 1397000 }, { "epoch": 0.84, "learning_rate": 5.017475853396004e-05, "loss": 0.3037, "step": 1397500 }, { "epoch": 0.84, "learning_rate": 5.017265856839948e-05, "loss": 0.2945, "step": 1398000 }, { "epoch": 0.84, "learning_rate": 5.0170558602838914e-05, "loss": 0.2971, "step": 1398500 }, { "epoch": 0.84, "learning_rate": 5.016845863727835e-05, "loss": 0.2977, "step": 1399000 }, { "epoch": 0.84, "learning_rate": 5.016635867171779e-05, "loss": 0.2943, "step": 1399500 }, { "epoch": 0.84, "learning_rate": 5.016426290608834e-05, "loss": 0.3009, "step": 1400000 }, { "epoch": 0.84, "eval_loss": 0.27523356676101685, "eval_runtime": 1475.9951, "eval_samples_per_second": 356.858, "eval_steps_per_second": 59.476, "step": 1400000 }, { "epoch": 0.84, "learning_rate": 5.0162162940527775e-05, "loss": 0.3036, "step": 1400500 }, { "epoch": 0.84, "learning_rate": 5.016006297496721e-05, "loss": 0.3034, "step": 1401000 }, { "epoch": 0.84, "learning_rate": 5.015796300940665e-05, "loss": 0.3032, "step": 1401500 }, { "epoch": 0.84, "learning_rate": 5.015586304384608e-05, "loss": 0.3007, "step": 1402000 }, { "epoch": 0.84, "learning_rate": 5.0153763078285515e-05, "loss": 0.2955, "step": 1402500 }, { "epoch": 0.84, "learning_rate": 5.015166311272495e-05, "loss": 0.299, "step": 1403000 }, { "epoch": 0.84, "learning_rate": 5.014956314716438e-05, "loss": 0.3004, "step": 1403500 }, { "epoch": 0.84, "learning_rate": 5.014746318160382e-05, "loss": 0.2987, "step": 1404000 }, { "epoch": 0.84, "learning_rate": 5.014536741597438e-05, "loss": 0.3003, "step": 1404500 }, { "epoch": 0.84, "learning_rate": 5.0143271650344936e-05, "loss": 0.31, "step": 1405000 }, { "epoch": 0.84, "learning_rate": 5.014117168478437e-05, "loss": 0.3024, "step": 1405500 }, { "epoch": 0.84, "learning_rate": 5.0139071719223803e-05, "loss": 0.3055, "step": 1406000 }, { "epoch": 0.84, "learning_rate": 5.0136971753663244e-05, "loss": 0.3036, "step": 1406500 }, { "epoch": 0.84, "learning_rate": 5.013487178810268e-05, "loss": 0.3069, "step": 1407000 }, { "epoch": 0.84, "learning_rate": 5.0132771822542104e-05, "loss": 0.3099, "step": 1407500 }, { "epoch": 0.84, "learning_rate": 5.0130671856981544e-05, "loss": 0.3007, "step": 1408000 }, { "epoch": 0.84, "learning_rate": 5.012857189142098e-05, "loss": 0.3031, "step": 1408500 }, { "epoch": 0.84, "learning_rate": 5.012647192586041e-05, "loss": 0.3013, "step": 1409000 }, { "epoch": 0.85, "learning_rate": 5.012437196029985e-05, "loss": 0.298, "step": 1409500 }, { "epoch": 0.85, "learning_rate": 5.0122271994739285e-05, "loss": 0.3012, "step": 1410000 }, { "epoch": 0.85, "learning_rate": 5.012017202917872e-05, "loss": 0.3077, "step": 1410500 }, { "epoch": 0.85, "learning_rate": 5.011807206361816e-05, "loss": 0.3046, "step": 1411000 }, { "epoch": 0.85, "learning_rate": 5.011597209805759e-05, "loss": 0.3038, "step": 1411500 }, { "epoch": 0.85, "learning_rate": 5.0113872132497025e-05, "loss": 0.3036, "step": 1412000 }, { "epoch": 0.85, "learning_rate": 5.0111772166936465e-05, "loss": 0.3045, "step": 1412500 }, { "epoch": 0.85, "learning_rate": 5.010967640130702e-05, "loss": 0.3045, "step": 1413000 }, { "epoch": 0.85, "learning_rate": 5.010757643574645e-05, "loss": 0.3011, "step": 1413500 }, { "epoch": 0.85, "learning_rate": 5.010547647018589e-05, "loss": 0.3038, "step": 1414000 }, { "epoch": 0.85, "learning_rate": 5.0103380704556446e-05, "loss": 0.3098, "step": 1414500 }, { "epoch": 0.85, "learning_rate": 5.010128073899588e-05, "loss": 0.2999, "step": 1415000 }, { "epoch": 0.85, "learning_rate": 5.009918077343531e-05, "loss": 0.3049, "step": 1415500 }, { "epoch": 0.85, "learning_rate": 5.0097080807874753e-05, "loss": 0.2986, "step": 1416000 }, { "epoch": 0.85, "learning_rate": 5.009498084231419e-05, "loss": 0.302, "step": 1416500 }, { "epoch": 0.85, "learning_rate": 5.009288087675362e-05, "loss": 0.3072, "step": 1417000 }, { "epoch": 0.85, "learning_rate": 5.009078091119306e-05, "loss": 0.3013, "step": 1417500 }, { "epoch": 0.85, "learning_rate": 5.008868094563249e-05, "loss": 0.3026, "step": 1418000 }, { "epoch": 0.85, "learning_rate": 5.008658098007192e-05, "loss": 0.2991, "step": 1418500 }, { "epoch": 0.85, "learning_rate": 5.008448521444248e-05, "loss": 0.3057, "step": 1419000 }, { "epoch": 0.85, "learning_rate": 5.008238524888192e-05, "loss": 0.3054, "step": 1419500 }, { "epoch": 0.85, "learning_rate": 5.0080285283321355e-05, "loss": 0.3013, "step": 1420000 }, { "epoch": 0.85, "learning_rate": 5.007818531776079e-05, "loss": 0.299, "step": 1420500 }, { "epoch": 0.85, "learning_rate": 5.007608535220022e-05, "loss": 0.3045, "step": 1421000 }, { "epoch": 0.85, "learning_rate": 5.007398958657078e-05, "loss": 0.3032, "step": 1421500 }, { "epoch": 0.85, "learning_rate": 5.0071889621010216e-05, "loss": 0.3066, "step": 1422000 }, { "epoch": 0.85, "learning_rate": 5.006978965544965e-05, "loss": 0.2975, "step": 1422500 }, { "epoch": 0.85, "learning_rate": 5.006769388982021e-05, "loss": 0.3058, "step": 1423000 }, { "epoch": 0.85, "learning_rate": 5.006559392425964e-05, "loss": 0.3032, "step": 1423500 }, { "epoch": 0.85, "learning_rate": 5.0063493958699076e-05, "loss": 0.3023, "step": 1424000 }, { "epoch": 0.85, "learning_rate": 5.0061393993138517e-05, "loss": 0.3036, "step": 1424500 }, { "epoch": 0.85, "learning_rate": 5.005929402757794e-05, "loss": 0.3081, "step": 1425000 }, { "epoch": 0.85, "learning_rate": 5.005719406201738e-05, "loss": 0.2976, "step": 1425500 }, { "epoch": 0.85, "learning_rate": 5.005509409645682e-05, "loss": 0.3035, "step": 1426000 }, { "epoch": 0.86, "learning_rate": 5.005299413089625e-05, "loss": 0.3037, "step": 1426500 }, { "epoch": 0.86, "learning_rate": 5.0050894165335684e-05, "loss": 0.3042, "step": 1427000 }, { "epoch": 0.86, "learning_rate": 5.0048798399706244e-05, "loss": 0.3017, "step": 1427500 }, { "epoch": 0.86, "learning_rate": 5.004669843414568e-05, "loss": 0.3074, "step": 1428000 }, { "epoch": 0.86, "learning_rate": 5.004459846858511e-05, "loss": 0.3005, "step": 1428500 }, { "epoch": 0.86, "learning_rate": 5.004249850302455e-05, "loss": 0.3021, "step": 1429000 }, { "epoch": 0.86, "learning_rate": 5.004040273739511e-05, "loss": 0.3073, "step": 1429500 }, { "epoch": 0.86, "learning_rate": 5.003830277183454e-05, "loss": 0.3014, "step": 1430000 }, { "epoch": 0.86, "learning_rate": 5.003620280627397e-05, "loss": 0.2955, "step": 1430500 }, { "epoch": 0.86, "learning_rate": 5.003410284071341e-05, "loss": 0.3037, "step": 1431000 }, { "epoch": 0.86, "learning_rate": 5.0032002875152845e-05, "loss": 0.3043, "step": 1431500 }, { "epoch": 0.86, "learning_rate": 5.0029907109523406e-05, "loss": 0.3015, "step": 1432000 }, { "epoch": 0.86, "learning_rate": 5.002780714396283e-05, "loss": 0.311, "step": 1432500 }, { "epoch": 0.86, "learning_rate": 5.002570717840227e-05, "loss": 0.3029, "step": 1433000 }, { "epoch": 0.86, "learning_rate": 5.0023607212841706e-05, "loss": 0.3036, "step": 1433500 }, { "epoch": 0.86, "learning_rate": 5.0021511447212267e-05, "loss": 0.2964, "step": 1434000 }, { "epoch": 0.86, "learning_rate": 5.00194114816517e-05, "loss": 0.2977, "step": 1434500 }, { "epoch": 0.86, "learning_rate": 5.0017311516091133e-05, "loss": 0.2998, "step": 1435000 }, { "epoch": 0.86, "learning_rate": 5.001521155053057e-05, "loss": 0.3067, "step": 1435500 }, { "epoch": 0.86, "learning_rate": 5.001311578490113e-05, "loss": 0.3025, "step": 1436000 }, { "epoch": 0.86, "learning_rate": 5.001101581934057e-05, "loss": 0.3046, "step": 1436500 }, { "epoch": 0.86, "learning_rate": 5.0008915853779994e-05, "loss": 0.3071, "step": 1437000 }, { "epoch": 0.86, "learning_rate": 5.000681588821943e-05, "loss": 0.2998, "step": 1437500 }, { "epoch": 0.86, "learning_rate": 5.000471592265887e-05, "loss": 0.2985, "step": 1438000 }, { "epoch": 0.86, "learning_rate": 5.00026159570983e-05, "loss": 0.3029, "step": 1438500 }, { "epoch": 0.86, "learning_rate": 5.0000515991537735e-05, "loss": 0.3, "step": 1439000 }, { "epoch": 0.86, "learning_rate": 4.999842022590829e-05, "loss": 0.3051, "step": 1439500 }, { "epoch": 0.86, "learning_rate": 4.999632026034773e-05, "loss": 0.3009, "step": 1440000 }, { "epoch": 0.86, "learning_rate": 4.999422029478716e-05, "loss": 0.307, "step": 1440500 }, { "epoch": 0.86, "learning_rate": 4.999212452915772e-05, "loss": 0.2938, "step": 1441000 }, { "epoch": 0.86, "learning_rate": 4.999002456359716e-05, "loss": 0.3019, "step": 1441500 }, { "epoch": 0.86, "learning_rate": 4.998792459803659e-05, "loss": 0.3044, "step": 1442000 }, { "epoch": 0.86, "learning_rate": 4.998582463247602e-05, "loss": 0.3072, "step": 1442500 }, { "epoch": 0.87, "learning_rate": 4.998372466691546e-05, "loss": 0.3023, "step": 1443000 }, { "epoch": 0.87, "learning_rate": 4.9981624701354897e-05, "loss": 0.3017, "step": 1443500 }, { "epoch": 0.87, "learning_rate": 4.997952473579433e-05, "loss": 0.3041, "step": 1444000 }, { "epoch": 0.87, "learning_rate": 4.9977428970164884e-05, "loss": 0.3117, "step": 1444500 }, { "epoch": 0.87, "learning_rate": 4.9975329004604324e-05, "loss": 0.2993, "step": 1445000 }, { "epoch": 0.87, "learning_rate": 4.997322903904376e-05, "loss": 0.3017, "step": 1445500 }, { "epoch": 0.87, "learning_rate": 4.997112907348319e-05, "loss": 0.302, "step": 1446000 }, { "epoch": 0.87, "learning_rate": 4.996902910792263e-05, "loss": 0.3038, "step": 1446500 }, { "epoch": 0.87, "learning_rate": 4.9966929142362064e-05, "loss": 0.2988, "step": 1447000 }, { "epoch": 0.87, "learning_rate": 4.99648291768015e-05, "loss": 0.3063, "step": 1447500 }, { "epoch": 0.87, "learning_rate": 4.996272921124094e-05, "loss": 0.3035, "step": 1448000 }, { "epoch": 0.87, "learning_rate": 4.996062924568037e-05, "loss": 0.3099, "step": 1448500 }, { "epoch": 0.87, "learning_rate": 4.9958529280119805e-05, "loss": 0.3012, "step": 1449000 }, { "epoch": 0.87, "learning_rate": 4.995642931455924e-05, "loss": 0.3, "step": 1449500 }, { "epoch": 0.87, "learning_rate": 4.995432934899867e-05, "loss": 0.3, "step": 1450000 }, { "epoch": 0.87, "learning_rate": 4.995222938343811e-05, "loss": 0.2999, "step": 1450500 }, { "epoch": 0.87, "learning_rate": 4.995013361780867e-05, "loss": 0.3069, "step": 1451000 }, { "epoch": 0.87, "learning_rate": 4.9948037852179226e-05, "loss": 0.3033, "step": 1451500 }, { "epoch": 0.87, "learning_rate": 4.994593788661866e-05, "loss": 0.2922, "step": 1452000 }, { "epoch": 0.87, "learning_rate": 4.994383792105809e-05, "loss": 0.3014, "step": 1452500 }, { "epoch": 0.87, "learning_rate": 4.994173795549753e-05, "loss": 0.2987, "step": 1453000 }, { "epoch": 0.87, "learning_rate": 4.993964218986809e-05, "loss": 0.3102, "step": 1453500 }, { "epoch": 0.87, "learning_rate": 4.993754222430752e-05, "loss": 0.3072, "step": 1454000 }, { "epoch": 0.87, "learning_rate": 4.9935442258746954e-05, "loss": 0.2953, "step": 1454500 }, { "epoch": 0.87, "learning_rate": 4.9933342293186394e-05, "loss": 0.3036, "step": 1455000 }, { "epoch": 0.87, "learning_rate": 4.993124232762583e-05, "loss": 0.3079, "step": 1455500 }, { "epoch": 0.87, "learning_rate": 4.992914236206526e-05, "loss": 0.3103, "step": 1456000 }, { "epoch": 0.87, "learning_rate": 4.99270423965047e-05, "loss": 0.297, "step": 1456500 }, { "epoch": 0.87, "learning_rate": 4.992494243094413e-05, "loss": 0.3062, "step": 1457000 }, { "epoch": 0.87, "learning_rate": 4.992284246538357e-05, "loss": 0.2995, "step": 1457500 }, { "epoch": 0.87, "learning_rate": 4.9920742499823e-05, "loss": 0.2985, "step": 1458000 }, { "epoch": 0.87, "learning_rate": 4.9918642534262435e-05, "loss": 0.3071, "step": 1458500 }, { "epoch": 0.87, "learning_rate": 4.991654676863299e-05, "loss": 0.3013, "step": 1459000 }, { "epoch": 0.88, "learning_rate": 4.991444680307243e-05, "loss": 0.2939, "step": 1459500 }, { "epoch": 0.88, "learning_rate": 4.991234683751186e-05, "loss": 0.3017, "step": 1460000 }, { "epoch": 0.88, "learning_rate": 4.9910246871951296e-05, "loss": 0.3048, "step": 1460500 }, { "epoch": 0.88, "learning_rate": 4.9908146906390736e-05, "loss": 0.3051, "step": 1461000 }, { "epoch": 0.88, "learning_rate": 4.990604694083017e-05, "loss": 0.2996, "step": 1461500 }, { "epoch": 0.88, "learning_rate": 4.990395117520072e-05, "loss": 0.3063, "step": 1462000 }, { "epoch": 0.88, "learning_rate": 4.9901851209640156e-05, "loss": 0.3084, "step": 1462500 }, { "epoch": 0.88, "learning_rate": 4.98997512440796e-05, "loss": 0.2981, "step": 1463000 }, { "epoch": 0.88, "learning_rate": 4.989765127851903e-05, "loss": 0.3074, "step": 1463500 }, { "epoch": 0.88, "learning_rate": 4.9895551312958464e-05, "loss": 0.296, "step": 1464000 }, { "epoch": 0.88, "learning_rate": 4.9893455547329024e-05, "loss": 0.3052, "step": 1464500 }, { "epoch": 0.88, "learning_rate": 4.9891359781699584e-05, "loss": 0.3043, "step": 1465000 }, { "epoch": 0.88, "learning_rate": 4.988926401607014e-05, "loss": 0.3008, "step": 1465500 }, { "epoch": 0.88, "learning_rate": 4.988716405050957e-05, "loss": 0.3047, "step": 1466000 }, { "epoch": 0.88, "learning_rate": 4.9885064084949005e-05, "loss": 0.2979, "step": 1466500 }, { "epoch": 0.88, "learning_rate": 4.9882964119388445e-05, "loss": 0.3006, "step": 1467000 }, { "epoch": 0.88, "learning_rate": 4.988086415382788e-05, "loss": 0.2926, "step": 1467500 }, { "epoch": 0.88, "learning_rate": 4.987876418826731e-05, "loss": 0.3028, "step": 1468000 }, { "epoch": 0.88, "learning_rate": 4.987666422270675e-05, "loss": 0.3054, "step": 1468500 }, { "epoch": 0.88, "learning_rate": 4.987456425714618e-05, "loss": 0.3064, "step": 1469000 }, { "epoch": 0.88, "learning_rate": 4.987246429158561e-05, "loss": 0.3043, "step": 1469500 }, { "epoch": 0.88, "learning_rate": 4.987036432602505e-05, "loss": 0.3, "step": 1470000 }, { "epoch": 0.88, "learning_rate": 4.986827276032673e-05, "loss": 0.3031, "step": 1470500 }, { "epoch": 0.88, "learning_rate": 4.9866172794766166e-05, "loss": 0.303, "step": 1471000 }, { "epoch": 0.88, "learning_rate": 4.98640728292056e-05, "loss": 0.3001, "step": 1471500 }, { "epoch": 0.88, "learning_rate": 4.986197286364504e-05, "loss": 0.3063, "step": 1472000 }, { "epoch": 0.88, "learning_rate": 4.9859872898084474e-05, "loss": 0.299, "step": 1472500 }, { "epoch": 0.88, "learning_rate": 4.985777293252391e-05, "loss": 0.2965, "step": 1473000 }, { "epoch": 0.88, "learning_rate": 4.985567296696334e-05, "loss": 0.295, "step": 1473500 }, { "epoch": 0.88, "learning_rate": 4.9853573001402774e-05, "loss": 0.3027, "step": 1474000 }, { "epoch": 0.88, "learning_rate": 4.9851477235773334e-05, "loss": 0.3008, "step": 1474500 }, { "epoch": 0.88, "learning_rate": 4.984937727021277e-05, "loss": 0.3042, "step": 1475000 }, { "epoch": 0.88, "learning_rate": 4.984727730465221e-05, "loss": 0.3044, "step": 1475500 }, { "epoch": 0.88, "learning_rate": 4.9845177339091635e-05, "loss": 0.3049, "step": 1476000 }, { "epoch": 0.89, "learning_rate": 4.984307737353107e-05, "loss": 0.3021, "step": 1476500 }, { "epoch": 0.89, "learning_rate": 4.984097740797051e-05, "loss": 0.3056, "step": 1477000 }, { "epoch": 0.89, "learning_rate": 4.983888164234107e-05, "loss": 0.3031, "step": 1477500 }, { "epoch": 0.89, "learning_rate": 4.98367816767805e-05, "loss": 0.302, "step": 1478000 }, { "epoch": 0.89, "learning_rate": 4.9834681711219936e-05, "loss": 0.3021, "step": 1478500 }, { "epoch": 0.89, "learning_rate": 4.983258174565937e-05, "loss": 0.2994, "step": 1479000 }, { "epoch": 0.89, "learning_rate": 4.98304817800988e-05, "loss": 0.3064, "step": 1479500 }, { "epoch": 0.89, "learning_rate": 4.982838181453824e-05, "loss": 0.3018, "step": 1480000 }, { "epoch": 0.89, "learning_rate": 4.9826281848977676e-05, "loss": 0.2981, "step": 1480500 }, { "epoch": 0.89, "learning_rate": 4.982418188341711e-05, "loss": 0.2993, "step": 1481000 }, { "epoch": 0.89, "learning_rate": 4.982208191785655e-05, "loss": 0.2998, "step": 1481500 }, { "epoch": 0.89, "learning_rate": 4.9819986152227104e-05, "loss": 0.3003, "step": 1482000 }, { "epoch": 0.89, "learning_rate": 4.981788618666654e-05, "loss": 0.3035, "step": 1482500 }, { "epoch": 0.89, "learning_rate": 4.981578622110597e-05, "loss": 0.2985, "step": 1483000 }, { "epoch": 0.89, "learning_rate": 4.981368625554541e-05, "loss": 0.2966, "step": 1483500 }, { "epoch": 0.89, "learning_rate": 4.9811586289984844e-05, "loss": 0.3019, "step": 1484000 }, { "epoch": 0.89, "learning_rate": 4.980948632442428e-05, "loss": 0.3093, "step": 1484500 }, { "epoch": 0.89, "learning_rate": 4.980739055879483e-05, "loss": 0.3055, "step": 1485000 }, { "epoch": 0.89, "learning_rate": 4.980529059323427e-05, "loss": 0.3001, "step": 1485500 }, { "epoch": 0.89, "learning_rate": 4.9803190627673705e-05, "loss": 0.2994, "step": 1486000 }, { "epoch": 0.89, "learning_rate": 4.980109066211314e-05, "loss": 0.3049, "step": 1486500 }, { "epoch": 0.89, "learning_rate": 4.97989948964837e-05, "loss": 0.3004, "step": 1487000 }, { "epoch": 0.89, "learning_rate": 4.979689493092313e-05, "loss": 0.2984, "step": 1487500 }, { "epoch": 0.89, "learning_rate": 4.9794794965362566e-05, "loss": 0.3039, "step": 1488000 }, { "epoch": 0.89, "learning_rate": 4.979269919973312e-05, "loss": 0.2971, "step": 1488500 }, { "epoch": 0.89, "learning_rate": 4.979059923417256e-05, "loss": 0.3023, "step": 1489000 }, { "epoch": 0.89, "learning_rate": 4.978849926861199e-05, "loss": 0.2982, "step": 1489500 }, { "epoch": 0.89, "learning_rate": 4.9786399303051426e-05, "loss": 0.3038, "step": 1490000 }, { "epoch": 0.89, "learning_rate": 4.978430353742198e-05, "loss": 0.3039, "step": 1490500 }, { "epoch": 0.89, "learning_rate": 4.978220357186142e-05, "loss": 0.2985, "step": 1491000 }, { "epoch": 0.89, "learning_rate": 4.9780103606300854e-05, "loss": 0.2964, "step": 1491500 }, { "epoch": 0.89, "learning_rate": 4.977800364074029e-05, "loss": 0.2978, "step": 1492000 }, { "epoch": 0.89, "learning_rate": 4.977590367517973e-05, "loss": 0.301, "step": 1492500 }, { "epoch": 0.9, "learning_rate": 4.977380370961916e-05, "loss": 0.2979, "step": 1493000 }, { "epoch": 0.9, "learning_rate": 4.9771703744058594e-05, "loss": 0.3028, "step": 1493500 }, { "epoch": 0.9, "learning_rate": 4.9769607978429155e-05, "loss": 0.3014, "step": 1494000 }, { "epoch": 0.9, "learning_rate": 4.976750801286859e-05, "loss": 0.3035, "step": 1494500 }, { "epoch": 0.9, "learning_rate": 4.976540804730802e-05, "loss": 0.2989, "step": 1495000 }, { "epoch": 0.9, "learning_rate": 4.976330808174746e-05, "loss": 0.2937, "step": 1495500 }, { "epoch": 0.9, "learning_rate": 4.9761208116186895e-05, "loss": 0.3013, "step": 1496000 }, { "epoch": 0.9, "learning_rate": 4.975910815062633e-05, "loss": 0.3042, "step": 1496500 }, { "epoch": 0.9, "learning_rate": 4.975700818506577e-05, "loss": 0.3034, "step": 1497000 }, { "epoch": 0.9, "learning_rate": 4.97549082195052e-05, "loss": 0.3052, "step": 1497500 }, { "epoch": 0.9, "learning_rate": 4.975280825394463e-05, "loss": 0.3041, "step": 1498000 }, { "epoch": 0.9, "learning_rate": 4.975070828838407e-05, "loss": 0.3017, "step": 1498500 }, { "epoch": 0.9, "learning_rate": 4.97486083228235e-05, "loss": 0.3055, "step": 1499000 }, { "epoch": 0.9, "learning_rate": 4.9746508357262936e-05, "loss": 0.301, "step": 1499500 }, { "epoch": 0.9, "learning_rate": 4.9744412591633497e-05, "loss": 0.2999, "step": 1500000 }, { "epoch": 0.9, "eval_loss": 0.27191513776779175, "eval_runtime": 1482.9586, "eval_samples_per_second": 355.182, "eval_steps_per_second": 59.197, "step": 1500000 }, { "epoch": 0.9, "learning_rate": 4.974231262607293e-05, "loss": 0.2994, "step": 1500500 }, { "epoch": 0.9, "learning_rate": 4.9740212660512363e-05, "loss": 0.3043, "step": 1501000 }, { "epoch": 0.9, "learning_rate": 4.9738112694951804e-05, "loss": 0.2953, "step": 1501500 }, { "epoch": 0.9, "learning_rate": 4.9736016929322364e-05, "loss": 0.2937, "step": 1502000 }, { "epoch": 0.9, "learning_rate": 4.97339169637618e-05, "loss": 0.3004, "step": 1502500 }, { "epoch": 0.9, "learning_rate": 4.9731816998201224e-05, "loss": 0.3003, "step": 1503000 }, { "epoch": 0.9, "learning_rate": 4.9729717032640664e-05, "loss": 0.3061, "step": 1503500 }, { "epoch": 0.9, "learning_rate": 4.97276170670801e-05, "loss": 0.2934, "step": 1504000 }, { "epoch": 0.9, "learning_rate": 4.972551710151953e-05, "loss": 0.294, "step": 1504500 }, { "epoch": 0.9, "learning_rate": 4.972341713595897e-05, "loss": 0.304, "step": 1505000 }, { "epoch": 0.9, "learning_rate": 4.9721317170398405e-05, "loss": 0.3031, "step": 1505500 }, { "epoch": 0.9, "learning_rate": 4.971921720483784e-05, "loss": 0.2981, "step": 1506000 }, { "epoch": 0.9, "learning_rate": 4.971711723927728e-05, "loss": 0.3003, "step": 1506500 }, { "epoch": 0.9, "learning_rate": 4.971502147364783e-05, "loss": 0.3007, "step": 1507000 }, { "epoch": 0.9, "learning_rate": 4.9712921508087266e-05, "loss": 0.3108, "step": 1507500 }, { "epoch": 0.9, "learning_rate": 4.97108215425267e-05, "loss": 0.3025, "step": 1508000 }, { "epoch": 0.9, "learning_rate": 4.970872157696614e-05, "loss": 0.304, "step": 1508500 }, { "epoch": 0.9, "learning_rate": 4.970662581133669e-05, "loss": 0.3011, "step": 1509000 }, { "epoch": 0.91, "learning_rate": 4.9704525845776126e-05, "loss": 0.3056, "step": 1509500 }, { "epoch": 0.91, "learning_rate": 4.970242588021557e-05, "loss": 0.3041, "step": 1510000 }, { "epoch": 0.91, "learning_rate": 4.9700325914655e-05, "loss": 0.2992, "step": 1510500 }, { "epoch": 0.91, "learning_rate": 4.9698225949094434e-05, "loss": 0.2953, "step": 1511000 }, { "epoch": 0.91, "learning_rate": 4.969613018346499e-05, "loss": 0.3017, "step": 1511500 }, { "epoch": 0.91, "learning_rate": 4.969403021790443e-05, "loss": 0.3007, "step": 1512000 }, { "epoch": 0.91, "learning_rate": 4.969193025234386e-05, "loss": 0.2922, "step": 1512500 }, { "epoch": 0.91, "learning_rate": 4.9689830286783294e-05, "loss": 0.3074, "step": 1513000 }, { "epoch": 0.91, "learning_rate": 4.9687730321222735e-05, "loss": 0.2972, "step": 1513500 }, { "epoch": 0.91, "learning_rate": 4.968563455559329e-05, "loss": 0.2963, "step": 1514000 }, { "epoch": 0.91, "learning_rate": 4.968353878996385e-05, "loss": 0.2946, "step": 1514500 }, { "epoch": 0.91, "learning_rate": 4.9681438824403275e-05, "loss": 0.2989, "step": 1515000 }, { "epoch": 0.91, "learning_rate": 4.9679338858842715e-05, "loss": 0.3001, "step": 1515500 }, { "epoch": 0.91, "learning_rate": 4.967723889328215e-05, "loss": 0.2959, "step": 1516000 }, { "epoch": 0.91, "learning_rate": 4.967513892772158e-05, "loss": 0.2973, "step": 1516500 }, { "epoch": 0.91, "learning_rate": 4.9673043162092136e-05, "loss": 0.297, "step": 1517000 }, { "epoch": 0.91, "learning_rate": 4.9670943196531576e-05, "loss": 0.3087, "step": 1517500 }, { "epoch": 0.91, "learning_rate": 4.966884323097101e-05, "loss": 0.2972, "step": 1518000 }, { "epoch": 0.91, "learning_rate": 4.966674326541044e-05, "loss": 0.2957, "step": 1518500 }, { "epoch": 0.91, "learning_rate": 4.966464329984988e-05, "loss": 0.3022, "step": 1519000 }, { "epoch": 0.91, "learning_rate": 4.966254753422044e-05, "loss": 0.2954, "step": 1519500 }, { "epoch": 0.91, "learning_rate": 4.966044756865987e-05, "loss": 0.3013, "step": 1520000 }, { "epoch": 0.91, "learning_rate": 4.9658347603099304e-05, "loss": 0.3001, "step": 1520500 }, { "epoch": 0.91, "learning_rate": 4.9656251837469864e-05, "loss": 0.3034, "step": 1521000 }, { "epoch": 0.91, "learning_rate": 4.9654156071840425e-05, "loss": 0.2999, "step": 1521500 }, { "epoch": 0.91, "learning_rate": 4.965205610627986e-05, "loss": 0.3043, "step": 1522000 }, { "epoch": 0.91, "learning_rate": 4.964995614071929e-05, "loss": 0.3046, "step": 1522500 }, { "epoch": 0.91, "learning_rate": 4.964785617515873e-05, "loss": 0.2998, "step": 1523000 }, { "epoch": 0.91, "learning_rate": 4.9645756209598165e-05, "loss": 0.3031, "step": 1523500 }, { "epoch": 0.91, "learning_rate": 4.96436562440376e-05, "loss": 0.3015, "step": 1524000 }, { "epoch": 0.91, "learning_rate": 4.964155627847703e-05, "loss": 0.2973, "step": 1524500 }, { "epoch": 0.91, "learning_rate": 4.9639456312916466e-05, "loss": 0.2998, "step": 1525000 }, { "epoch": 0.91, "learning_rate": 4.96373563473559e-05, "loss": 0.2945, "step": 1525500 }, { "epoch": 0.91, "learning_rate": 4.963526058172646e-05, "loss": 0.3053, "step": 1526000 }, { "epoch": 0.92, "learning_rate": 4.963316061616589e-05, "loss": 0.2998, "step": 1526500 }, { "epoch": 0.92, "learning_rate": 4.9631060650605326e-05, "loss": 0.3016, "step": 1527000 }, { "epoch": 0.92, "learning_rate": 4.962896068504476e-05, "loss": 0.3006, "step": 1527500 }, { "epoch": 0.92, "learning_rate": 4.96268607194842e-05, "loss": 0.2979, "step": 1528000 }, { "epoch": 0.92, "learning_rate": 4.962476495385476e-05, "loss": 0.3087, "step": 1528500 }, { "epoch": 0.92, "learning_rate": 4.962266498829419e-05, "loss": 0.3059, "step": 1529000 }, { "epoch": 0.92, "learning_rate": 4.962056502273363e-05, "loss": 0.3014, "step": 1529500 }, { "epoch": 0.92, "learning_rate": 4.961846505717306e-05, "loss": 0.3004, "step": 1530000 }, { "epoch": 0.92, "learning_rate": 4.9616365091612494e-05, "loss": 0.2988, "step": 1530500 }, { "epoch": 0.92, "learning_rate": 4.9614265126051934e-05, "loss": 0.3017, "step": 1531000 }, { "epoch": 0.92, "learning_rate": 4.961216516049137e-05, "loss": 0.2965, "step": 1531500 }, { "epoch": 0.92, "learning_rate": 4.96100651949308e-05, "loss": 0.2993, "step": 1532000 }, { "epoch": 0.92, "learning_rate": 4.960796522937024e-05, "loss": 0.2949, "step": 1532500 }, { "epoch": 0.92, "learning_rate": 4.9605865263809675e-05, "loss": 0.2942, "step": 1533000 }, { "epoch": 0.92, "learning_rate": 4.960376529824911e-05, "loss": 0.2951, "step": 1533500 }, { "epoch": 0.92, "learning_rate": 4.960166533268855e-05, "loss": 0.3009, "step": 1534000 }, { "epoch": 0.92, "learning_rate": 4.9599565367127975e-05, "loss": 0.2999, "step": 1534500 }, { "epoch": 0.92, "learning_rate": 4.959746540156741e-05, "loss": 0.298, "step": 1535000 }, { "epoch": 0.92, "learning_rate": 4.959536543600685e-05, "loss": 0.304, "step": 1535500 }, { "epoch": 0.92, "learning_rate": 4.959326547044628e-05, "loss": 0.3067, "step": 1536000 }, { "epoch": 0.92, "learning_rate": 4.959116970481684e-05, "loss": 0.2972, "step": 1536500 }, { "epoch": 0.92, "learning_rate": 4.958906973925627e-05, "loss": 0.2991, "step": 1537000 }, { "epoch": 0.92, "learning_rate": 4.958696977369571e-05, "loss": 0.303, "step": 1537500 }, { "epoch": 0.92, "learning_rate": 4.958486980813514e-05, "loss": 0.2975, "step": 1538000 }, { "epoch": 0.92, "learning_rate": 4.958276984257458e-05, "loss": 0.3035, "step": 1538500 }, { "epoch": 0.92, "learning_rate": 4.958067407694514e-05, "loss": 0.2969, "step": 1539000 }, { "epoch": 0.92, "learning_rate": 4.957857411138457e-05, "loss": 0.2942, "step": 1539500 }, { "epoch": 0.92, "learning_rate": 4.9576474145824004e-05, "loss": 0.3011, "step": 1540000 }, { "epoch": 0.92, "learning_rate": 4.9574374180263444e-05, "loss": 0.2956, "step": 1540500 }, { "epoch": 0.92, "learning_rate": 4.957227421470288e-05, "loss": 0.2902, "step": 1541000 }, { "epoch": 0.92, "learning_rate": 4.957017424914231e-05, "loss": 0.302, "step": 1541500 }, { "epoch": 0.92, "learning_rate": 4.9568078483512865e-05, "loss": 0.2991, "step": 1542000 }, { "epoch": 0.92, "learning_rate": 4.9565978517952305e-05, "loss": 0.3002, "step": 1542500 }, { "epoch": 0.93, "learning_rate": 4.956387855239174e-05, "loss": 0.306, "step": 1543000 }, { "epoch": 0.93, "learning_rate": 4.956177858683117e-05, "loss": 0.3029, "step": 1543500 }, { "epoch": 0.93, "learning_rate": 4.9559682821201725e-05, "loss": 0.2939, "step": 1544000 }, { "epoch": 0.93, "learning_rate": 4.9557582855641166e-05, "loss": 0.3036, "step": 1544500 }, { "epoch": 0.93, "learning_rate": 4.95554828900806e-05, "loss": 0.2922, "step": 1545000 }, { "epoch": 0.93, "learning_rate": 4.955338292452003e-05, "loss": 0.3015, "step": 1545500 }, { "epoch": 0.93, "learning_rate": 4.955128715889059e-05, "loss": 0.3016, "step": 1546000 }, { "epoch": 0.93, "learning_rate": 4.9549187193330026e-05, "loss": 0.2986, "step": 1546500 }, { "epoch": 0.93, "learning_rate": 4.954708722776946e-05, "loss": 0.3051, "step": 1547000 }, { "epoch": 0.93, "learning_rate": 4.95449872622089e-05, "loss": 0.3021, "step": 1547500 }, { "epoch": 0.93, "learning_rate": 4.9542887296648334e-05, "loss": 0.2959, "step": 1548000 }, { "epoch": 0.93, "learning_rate": 4.954078733108777e-05, "loss": 0.3007, "step": 1548500 }, { "epoch": 0.93, "learning_rate": 4.953868736552721e-05, "loss": 0.3014, "step": 1549000 }, { "epoch": 0.93, "learning_rate": 4.953658739996664e-05, "loss": 0.3044, "step": 1549500 }, { "epoch": 0.93, "learning_rate": 4.9534487434406074e-05, "loss": 0.3001, "step": 1550000 }, { "epoch": 0.93, "learning_rate": 4.9532387468845514e-05, "loss": 0.2939, "step": 1550500 }, { "epoch": 0.93, "learning_rate": 4.953028750328495e-05, "loss": 0.2957, "step": 1551000 }, { "epoch": 0.93, "learning_rate": 4.952818753772438e-05, "loss": 0.2982, "step": 1551500 }, { "epoch": 0.93, "learning_rate": 4.9526087572163815e-05, "loss": 0.3029, "step": 1552000 }, { "epoch": 0.93, "learning_rate": 4.9523991806534375e-05, "loss": 0.2956, "step": 1552500 }, { "epoch": 0.93, "learning_rate": 4.952189184097381e-05, "loss": 0.2975, "step": 1553000 }, { "epoch": 0.93, "learning_rate": 4.951979187541324e-05, "loss": 0.2989, "step": 1553500 }, { "epoch": 0.93, "learning_rate": 4.951769190985268e-05, "loss": 0.2931, "step": 1554000 }, { "epoch": 0.93, "learning_rate": 4.951559194429211e-05, "loss": 0.2971, "step": 1554500 }, { "epoch": 0.93, "learning_rate": 4.951349197873155e-05, "loss": 0.2966, "step": 1555000 }, { "epoch": 0.93, "learning_rate": 4.951139621310211e-05, "loss": 0.2973, "step": 1555500 }, { "epoch": 0.93, "learning_rate": 4.950929624754154e-05, "loss": 0.3028, "step": 1556000 }, { "epoch": 0.93, "learning_rate": 4.9507196281980976e-05, "loss": 0.2985, "step": 1556500 }, { "epoch": 0.93, "learning_rate": 4.950509631642041e-05, "loss": 0.301, "step": 1557000 }, { "epoch": 0.93, "learning_rate": 4.9503004750722084e-05, "loss": 0.3007, "step": 1557500 }, { "epoch": 0.93, "learning_rate": 4.9500908985092644e-05, "loss": 0.2974, "step": 1558000 }, { "epoch": 0.93, "learning_rate": 4.94988132194632e-05, "loss": 0.31, "step": 1558500 }, { "epoch": 0.93, "learning_rate": 4.949671325390264e-05, "loss": 0.3036, "step": 1559000 }, { "epoch": 0.93, "learning_rate": 4.949461328834207e-05, "loss": 0.2915, "step": 1559500 }, { "epoch": 0.94, "learning_rate": 4.949251332278151e-05, "loss": 0.29, "step": 1560000 }, { "epoch": 0.94, "learning_rate": 4.9490413357220945e-05, "loss": 0.3002, "step": 1560500 }, { "epoch": 0.94, "learning_rate": 4.948831339166037e-05, "loss": 0.3036, "step": 1561000 }, { "epoch": 0.94, "learning_rate": 4.948621342609981e-05, "loss": 0.3002, "step": 1561500 }, { "epoch": 0.94, "learning_rate": 4.9484113460539245e-05, "loss": 0.2982, "step": 1562000 }, { "epoch": 0.94, "learning_rate": 4.948201349497868e-05, "loss": 0.3015, "step": 1562500 }, { "epoch": 0.94, "learning_rate": 4.947991772934923e-05, "loss": 0.2916, "step": 1563000 }, { "epoch": 0.94, "learning_rate": 4.947781776378867e-05, "loss": 0.3059, "step": 1563500 }, { "epoch": 0.94, "learning_rate": 4.9475717798228106e-05, "loss": 0.2971, "step": 1564000 }, { "epoch": 0.94, "learning_rate": 4.947361783266754e-05, "loss": 0.2987, "step": 1564500 }, { "epoch": 0.94, "learning_rate": 4.947151786710698e-05, "loss": 0.2963, "step": 1565000 }, { "epoch": 0.94, "learning_rate": 4.946941790154641e-05, "loss": 0.2953, "step": 1565500 }, { "epoch": 0.94, "learning_rate": 4.946731793598585e-05, "loss": 0.2982, "step": 1566000 }, { "epoch": 0.94, "learning_rate": 4.946521797042529e-05, "loss": 0.2944, "step": 1566500 }, { "epoch": 0.94, "learning_rate": 4.946311800486472e-05, "loss": 0.2985, "step": 1567000 }, { "epoch": 0.94, "learning_rate": 4.9461018039304154e-05, "loss": 0.3026, "step": 1567500 }, { "epoch": 0.94, "learning_rate": 4.9458922273674714e-05, "loss": 0.2942, "step": 1568000 }, { "epoch": 0.94, "learning_rate": 4.945682230811415e-05, "loss": 0.2947, "step": 1568500 }, { "epoch": 0.94, "learning_rate": 4.945472234255358e-05, "loss": 0.3051, "step": 1569000 }, { "epoch": 0.94, "learning_rate": 4.945262237699302e-05, "loss": 0.3024, "step": 1569500 }, { "epoch": 0.94, "learning_rate": 4.9450522411432455e-05, "loss": 0.2948, "step": 1570000 }, { "epoch": 0.94, "learning_rate": 4.944842244587189e-05, "loss": 0.2982, "step": 1570500 }, { "epoch": 0.94, "learning_rate": 4.944632248031132e-05, "loss": 0.2954, "step": 1571000 }, { "epoch": 0.94, "learning_rate": 4.9444222514750755e-05, "loss": 0.2972, "step": 1571500 }, { "epoch": 0.94, "learning_rate": 4.9442130949052436e-05, "loss": 0.3044, "step": 1572000 }, { "epoch": 0.94, "learning_rate": 4.944003098349187e-05, "loss": 0.3003, "step": 1572500 }, { "epoch": 0.94, "learning_rate": 4.94379310179313e-05, "loss": 0.3011, "step": 1573000 }, { "epoch": 0.94, "learning_rate": 4.943583105237074e-05, "loss": 0.3004, "step": 1573500 }, { "epoch": 0.94, "learning_rate": 4.9433731086810176e-05, "loss": 0.2951, "step": 1574000 }, { "epoch": 0.94, "learning_rate": 4.943163112124961e-05, "loss": 0.2963, "step": 1574500 }, { "epoch": 0.94, "learning_rate": 4.942953115568905e-05, "loss": 0.2996, "step": 1575000 }, { "epoch": 0.94, "learning_rate": 4.942743119012848e-05, "loss": 0.3071, "step": 1575500 }, { "epoch": 0.94, "learning_rate": 4.942533122456792e-05, "loss": 0.2966, "step": 1576000 }, { "epoch": 0.95, "learning_rate": 4.942323125900735e-05, "loss": 0.299, "step": 1576500 }, { "epoch": 0.95, "learning_rate": 4.9421131293446784e-05, "loss": 0.2978, "step": 1577000 }, { "epoch": 0.95, "learning_rate": 4.9419031327886224e-05, "loss": 0.2973, "step": 1577500 }, { "epoch": 0.95, "learning_rate": 4.941693136232566e-05, "loss": 0.2979, "step": 1578000 }, { "epoch": 0.95, "learning_rate": 4.941483139676509e-05, "loss": 0.3091, "step": 1578500 }, { "epoch": 0.95, "learning_rate": 4.9412735631135644e-05, "loss": 0.2997, "step": 1579000 }, { "epoch": 0.95, "learning_rate": 4.9410635665575085e-05, "loss": 0.3027, "step": 1579500 }, { "epoch": 0.95, "learning_rate": 4.940853570001452e-05, "loss": 0.2982, "step": 1580000 }, { "epoch": 0.95, "learning_rate": 4.940643573445395e-05, "loss": 0.2944, "step": 1580500 }, { "epoch": 0.95, "learning_rate": 4.940433576889339e-05, "loss": 0.3047, "step": 1581000 }, { "epoch": 0.95, "learning_rate": 4.9402240003263945e-05, "loss": 0.2988, "step": 1581500 }, { "epoch": 0.95, "learning_rate": 4.940014003770338e-05, "loss": 0.2963, "step": 1582000 }, { "epoch": 0.95, "learning_rate": 4.939804007214281e-05, "loss": 0.3026, "step": 1582500 }, { "epoch": 0.95, "learning_rate": 4.939594010658225e-05, "loss": 0.2969, "step": 1583000 }, { "epoch": 0.95, "learning_rate": 4.9393840141021686e-05, "loss": 0.2982, "step": 1583500 }, { "epoch": 0.95, "learning_rate": 4.939174017546112e-05, "loss": 0.2943, "step": 1584000 }, { "epoch": 0.95, "learning_rate": 4.938964440983168e-05, "loss": 0.2994, "step": 1584500 }, { "epoch": 0.95, "learning_rate": 4.938754444427111e-05, "loss": 0.3038, "step": 1585000 }, { "epoch": 0.95, "learning_rate": 4.938544447871055e-05, "loss": 0.2947, "step": 1585500 }, { "epoch": 0.95, "learning_rate": 4.938334451314999e-05, "loss": 0.3003, "step": 1586000 }, { "epoch": 0.95, "learning_rate": 4.938124874752054e-05, "loss": 0.2979, "step": 1586500 }, { "epoch": 0.95, "learning_rate": 4.9379148781959974e-05, "loss": 0.2988, "step": 1587000 }, { "epoch": 0.95, "learning_rate": 4.937704881639941e-05, "loss": 0.3005, "step": 1587500 }, { "epoch": 0.95, "learning_rate": 4.937494885083885e-05, "loss": 0.3005, "step": 1588000 }, { "epoch": 0.95, "learning_rate": 4.93728530852094e-05, "loss": 0.3002, "step": 1588500 }, { "epoch": 0.95, "learning_rate": 4.9370753119648835e-05, "loss": 0.3002, "step": 1589000 }, { "epoch": 0.95, "learning_rate": 4.936865315408827e-05, "loss": 0.2996, "step": 1589500 }, { "epoch": 0.95, "learning_rate": 4.936655318852771e-05, "loss": 0.2956, "step": 1590000 }, { "epoch": 0.95, "learning_rate": 4.936445742289826e-05, "loss": 0.299, "step": 1590500 }, { "epoch": 0.95, "learning_rate": 4.9362357457337695e-05, "loss": 0.295, "step": 1591000 }, { "epoch": 0.95, "learning_rate": 4.9360257491777136e-05, "loss": 0.2938, "step": 1591500 }, { "epoch": 0.95, "learning_rate": 4.935815752621657e-05, "loss": 0.2974, "step": 1592000 }, { "epoch": 0.95, "learning_rate": 4.935606176058712e-05, "loss": 0.3032, "step": 1592500 }, { "epoch": 0.96, "learning_rate": 4.9353961795026556e-05, "loss": 0.2985, "step": 1593000 }, { "epoch": 0.96, "learning_rate": 4.9351861829465996e-05, "loss": 0.2978, "step": 1593500 }, { "epoch": 0.96, "learning_rate": 4.934976186390543e-05, "loss": 0.3021, "step": 1594000 }, { "epoch": 0.96, "learning_rate": 4.934766189834486e-05, "loss": 0.2982, "step": 1594500 }, { "epoch": 0.96, "learning_rate": 4.934556613271542e-05, "loss": 0.3052, "step": 1595000 }, { "epoch": 0.96, "learning_rate": 4.934346616715486e-05, "loss": 0.301, "step": 1595500 }, { "epoch": 0.96, "learning_rate": 4.934136620159429e-05, "loss": 0.2959, "step": 1596000 }, { "epoch": 0.96, "learning_rate": 4.9339266236033724e-05, "loss": 0.2963, "step": 1596500 }, { "epoch": 0.96, "learning_rate": 4.9337166270473164e-05, "loss": 0.2971, "step": 1597000 }, { "epoch": 0.96, "learning_rate": 4.93350663049126e-05, "loss": 0.3002, "step": 1597500 }, { "epoch": 0.96, "learning_rate": 4.933296633935203e-05, "loss": 0.2981, "step": 1598000 }, { "epoch": 0.96, "learning_rate": 4.933086637379147e-05, "loss": 0.2938, "step": 1598500 }, { "epoch": 0.96, "learning_rate": 4.9328766408230905e-05, "loss": 0.2943, "step": 1599000 }, { "epoch": 0.96, "learning_rate": 4.932666644267034e-05, "loss": 0.3056, "step": 1599500 }, { "epoch": 0.96, "learning_rate": 4.93245706770409e-05, "loss": 0.3022, "step": 1600000 }, { "epoch": 0.96, "eval_loss": 0.2697148025035858, "eval_runtime": 1482.5603, "eval_samples_per_second": 355.277, "eval_steps_per_second": 59.213, "step": 1600000 }, { "epoch": 0.96, "learning_rate": 4.932247071148033e-05, "loss": 0.2978, "step": 1600500 }, { "epoch": 0.96, "learning_rate": 4.9320370745919766e-05, "loss": 0.2972, "step": 1601000 }, { "epoch": 0.96, "learning_rate": 4.9318270780359206e-05, "loss": 0.2977, "step": 1601500 }, { "epoch": 0.96, "learning_rate": 4.931617081479864e-05, "loss": 0.2996, "step": 1602000 }, { "epoch": 0.96, "learning_rate": 4.931407504916919e-05, "loss": 0.2948, "step": 1602500 }, { "epoch": 0.96, "learning_rate": 4.9311975083608626e-05, "loss": 0.3011, "step": 1603000 }, { "epoch": 0.96, "learning_rate": 4.930987511804807e-05, "loss": 0.2919, "step": 1603500 }, { "epoch": 0.96, "learning_rate": 4.93077751524875e-05, "loss": 0.2936, "step": 1604000 }, { "epoch": 0.96, "learning_rate": 4.9305679386858054e-05, "loss": 0.3047, "step": 1604500 }, { "epoch": 0.96, "learning_rate": 4.930357942129749e-05, "loss": 0.3004, "step": 1605000 }, { "epoch": 0.96, "learning_rate": 4.930147945573693e-05, "loss": 0.2986, "step": 1605500 }, { "epoch": 0.96, "learning_rate": 4.929937949017636e-05, "loss": 0.2959, "step": 1606000 }, { "epoch": 0.96, "learning_rate": 4.92972795246158e-05, "loss": 0.3009, "step": 1606500 }, { "epoch": 0.96, "learning_rate": 4.9295183758986355e-05, "loss": 0.2992, "step": 1607000 }, { "epoch": 0.96, "learning_rate": 4.929308379342579e-05, "loss": 0.2957, "step": 1607500 }, { "epoch": 0.96, "learning_rate": 4.929098382786522e-05, "loss": 0.3111, "step": 1608000 }, { "epoch": 0.96, "learning_rate": 4.928888386230466e-05, "loss": 0.3089, "step": 1608500 }, { "epoch": 0.96, "learning_rate": 4.9286783896744095e-05, "loss": 0.3041, "step": 1609000 }, { "epoch": 0.96, "learning_rate": 4.928468393118353e-05, "loss": 0.2996, "step": 1609500 }, { "epoch": 0.97, "learning_rate": 4.928258396562296e-05, "loss": 0.2961, "step": 1610000 }, { "epoch": 0.97, "learning_rate": 4.928048819999352e-05, "loss": 0.3085, "step": 1610500 }, { "epoch": 0.97, "learning_rate": 4.9278388234432956e-05, "loss": 0.3009, "step": 1611000 }, { "epoch": 0.97, "learning_rate": 4.927628826887239e-05, "loss": 0.303, "step": 1611500 }, { "epoch": 0.97, "learning_rate": 4.927418830331183e-05, "loss": 0.298, "step": 1612000 }, { "epoch": 0.97, "learning_rate": 4.9272088337751256e-05, "loss": 0.3013, "step": 1612500 }, { "epoch": 0.97, "learning_rate": 4.926999257212182e-05, "loss": 0.3055, "step": 1613000 }, { "epoch": 0.97, "learning_rate": 4.926789680649237e-05, "loss": 0.2985, "step": 1613500 }, { "epoch": 0.97, "learning_rate": 4.926579684093181e-05, "loss": 0.2947, "step": 1614000 }, { "epoch": 0.97, "learning_rate": 4.9263696875371244e-05, "loss": 0.2913, "step": 1614500 }, { "epoch": 0.97, "learning_rate": 4.926159690981068e-05, "loss": 0.3012, "step": 1615000 }, { "epoch": 0.97, "learning_rate": 4.925949694425012e-05, "loss": 0.3001, "step": 1615500 }, { "epoch": 0.97, "learning_rate": 4.925739697868955e-05, "loss": 0.3081, "step": 1616000 }, { "epoch": 0.97, "learning_rate": 4.9255297013128985e-05, "loss": 0.302, "step": 1616500 }, { "epoch": 0.97, "learning_rate": 4.925319704756842e-05, "loss": 0.3045, "step": 1617000 }, { "epoch": 0.97, "learning_rate": 4.925109708200785e-05, "loss": 0.2932, "step": 1617500 }, { "epoch": 0.97, "learning_rate": 4.924900131637841e-05, "loss": 0.2986, "step": 1618000 }, { "epoch": 0.97, "learning_rate": 4.9246901350817845e-05, "loss": 0.2967, "step": 1618500 }, { "epoch": 0.97, "learning_rate": 4.9244801385257286e-05, "loss": 0.2948, "step": 1619000 }, { "epoch": 0.97, "learning_rate": 4.924270141969671e-05, "loss": 0.3024, "step": 1619500 }, { "epoch": 0.97, "learning_rate": 4.924060565406727e-05, "loss": 0.2957, "step": 1620000 }, { "epoch": 0.97, "learning_rate": 4.923850568850671e-05, "loss": 0.2995, "step": 1620500 }, { "epoch": 0.97, "learning_rate": 4.9236405722946146e-05, "loss": 0.3053, "step": 1621000 }, { "epoch": 0.97, "learning_rate": 4.923430575738558e-05, "loss": 0.2956, "step": 1621500 }, { "epoch": 0.97, "learning_rate": 4.923220579182501e-05, "loss": 0.2968, "step": 1622000 }, { "epoch": 0.97, "learning_rate": 4.923010582626445e-05, "loss": 0.2901, "step": 1622500 }, { "epoch": 0.97, "learning_rate": 4.922800586070388e-05, "loss": 0.294, "step": 1623000 }, { "epoch": 0.97, "learning_rate": 4.922590589514332e-05, "loss": 0.3018, "step": 1623500 }, { "epoch": 0.97, "learning_rate": 4.922381012951388e-05, "loss": 0.3015, "step": 1624000 }, { "epoch": 0.97, "learning_rate": 4.922171016395331e-05, "loss": 0.2926, "step": 1624500 }, { "epoch": 0.97, "learning_rate": 4.921961019839274e-05, "loss": 0.2998, "step": 1625000 }, { "epoch": 0.97, "learning_rate": 4.921751023283218e-05, "loss": 0.2963, "step": 1625500 }, { "epoch": 0.97, "learning_rate": 4.9215410267271615e-05, "loss": 0.2957, "step": 1626000 }, { "epoch": 0.98, "learning_rate": 4.921331450164217e-05, "loss": 0.2967, "step": 1626500 }, { "epoch": 0.98, "learning_rate": 4.921121453608161e-05, "loss": 0.3032, "step": 1627000 }, { "epoch": 0.98, "learning_rate": 4.920911457052104e-05, "loss": 0.2946, "step": 1627500 }, { "epoch": 0.98, "learning_rate": 4.9207014604960475e-05, "loss": 0.2957, "step": 1628000 }, { "epoch": 0.98, "learning_rate": 4.9204914639399915e-05, "loss": 0.2957, "step": 1628500 }, { "epoch": 0.98, "learning_rate": 4.920281887377047e-05, "loss": 0.2932, "step": 1629000 }, { "epoch": 0.98, "learning_rate": 4.92007189082099e-05, "loss": 0.3002, "step": 1629500 }, { "epoch": 0.98, "learning_rate": 4.9198618942649336e-05, "loss": 0.2938, "step": 1630000 }, { "epoch": 0.98, "learning_rate": 4.9196518977088776e-05, "loss": 0.2999, "step": 1630500 }, { "epoch": 0.98, "learning_rate": 4.9194423211459337e-05, "loss": 0.3004, "step": 1631000 }, { "epoch": 0.98, "learning_rate": 4.919232324589876e-05, "loss": 0.2958, "step": 1631500 }, { "epoch": 0.98, "learning_rate": 4.91902232803382e-05, "loss": 0.3017, "step": 1632000 }, { "epoch": 0.98, "learning_rate": 4.918812331477764e-05, "loss": 0.3011, "step": 1632500 }, { "epoch": 0.98, "learning_rate": 4.918602334921707e-05, "loss": 0.2989, "step": 1633000 }, { "epoch": 0.98, "learning_rate": 4.918392758358763e-05, "loss": 0.3007, "step": 1633500 }, { "epoch": 0.98, "learning_rate": 4.9181827618027064e-05, "loss": 0.299, "step": 1634000 }, { "epoch": 0.98, "learning_rate": 4.91797276524665e-05, "loss": 0.3009, "step": 1634500 }, { "epoch": 0.98, "learning_rate": 4.917762768690593e-05, "loss": 0.2998, "step": 1635000 }, { "epoch": 0.98, "learning_rate": 4.917552772134537e-05, "loss": 0.3031, "step": 1635500 }, { "epoch": 0.98, "learning_rate": 4.9173427755784805e-05, "loss": 0.3001, "step": 1636000 }, { "epoch": 0.98, "learning_rate": 4.917133199015536e-05, "loss": 0.2963, "step": 1636500 }, { "epoch": 0.98, "learning_rate": 4.916923202459479e-05, "loss": 0.3005, "step": 1637000 }, { "epoch": 0.98, "learning_rate": 4.916713205903423e-05, "loss": 0.3029, "step": 1637500 }, { "epoch": 0.98, "learning_rate": 4.916503629340479e-05, "loss": 0.3016, "step": 1638000 }, { "epoch": 0.98, "learning_rate": 4.916293632784422e-05, "loss": 0.293, "step": 1638500 }, { "epoch": 0.98, "learning_rate": 4.916083636228365e-05, "loss": 0.3065, "step": 1639000 }, { "epoch": 0.98, "learning_rate": 4.915873639672309e-05, "loss": 0.297, "step": 1639500 }, { "epoch": 0.98, "learning_rate": 4.9156636431162526e-05, "loss": 0.2976, "step": 1640000 }, { "epoch": 0.98, "learning_rate": 4.915453646560196e-05, "loss": 0.3002, "step": 1640500 }, { "epoch": 0.98, "learning_rate": 4.91524365000414e-05, "loss": 0.2949, "step": 1641000 }, { "epoch": 0.98, "learning_rate": 4.9150336534480833e-05, "loss": 0.3025, "step": 1641500 }, { "epoch": 0.98, "learning_rate": 4.914823656892027e-05, "loss": 0.2973, "step": 1642000 }, { "epoch": 0.98, "learning_rate": 4.914613660335971e-05, "loss": 0.3023, "step": 1642500 }, { "epoch": 0.99, "learning_rate": 4.914404083773026e-05, "loss": 0.3012, "step": 1643000 }, { "epoch": 0.99, "learning_rate": 4.9141940872169694e-05, "loss": 0.3056, "step": 1643500 }, { "epoch": 0.99, "learning_rate": 4.9139840906609134e-05, "loss": 0.2954, "step": 1644000 }, { "epoch": 0.99, "learning_rate": 4.913774094104857e-05, "loss": 0.3032, "step": 1644500 }, { "epoch": 0.99, "learning_rate": 4.9135640975488e-05, "loss": 0.295, "step": 1645000 }, { "epoch": 0.99, "learning_rate": 4.9133545209858555e-05, "loss": 0.296, "step": 1645500 }, { "epoch": 0.99, "learning_rate": 4.9131445244297995e-05, "loss": 0.2927, "step": 1646000 }, { "epoch": 0.99, "learning_rate": 4.912934527873743e-05, "loss": 0.2982, "step": 1646500 }, { "epoch": 0.99, "learning_rate": 4.912724531317686e-05, "loss": 0.3019, "step": 1647000 }, { "epoch": 0.99, "learning_rate": 4.91251453476163e-05, "loss": 0.3025, "step": 1647500 }, { "epoch": 0.99, "learning_rate": 4.9123045382055736e-05, "loss": 0.2915, "step": 1648000 }, { "epoch": 0.99, "learning_rate": 4.912094961642629e-05, "loss": 0.3009, "step": 1648500 }, { "epoch": 0.99, "learning_rate": 4.911884965086572e-05, "loss": 0.2985, "step": 1649000 }, { "epoch": 0.99, "learning_rate": 4.911674968530516e-05, "loss": 0.2944, "step": 1649500 }, { "epoch": 0.99, "learning_rate": 4.9114649719744596e-05, "loss": 0.2906, "step": 1650000 }, { "epoch": 0.99, "learning_rate": 4.911254975418403e-05, "loss": 0.2943, "step": 1650500 }, { "epoch": 0.99, "learning_rate": 4.911044978862347e-05, "loss": 0.2981, "step": 1651000 }, { "epoch": 0.99, "learning_rate": 4.9108354022994024e-05, "loss": 0.3027, "step": 1651500 }, { "epoch": 0.99, "learning_rate": 4.910625405743346e-05, "loss": 0.2953, "step": 1652000 }, { "epoch": 0.99, "learning_rate": 4.91041540918729e-05, "loss": 0.2906, "step": 1652500 }, { "epoch": 0.99, "learning_rate": 4.910205412631233e-05, "loss": 0.303, "step": 1653000 }, { "epoch": 0.99, "learning_rate": 4.9099958360682884e-05, "loss": 0.3028, "step": 1653500 }, { "epoch": 0.99, "learning_rate": 4.909785839512232e-05, "loss": 0.2968, "step": 1654000 }, { "epoch": 0.99, "learning_rate": 4.909575842956176e-05, "loss": 0.298, "step": 1654500 }, { "epoch": 0.99, "learning_rate": 4.909365846400119e-05, "loss": 0.2917, "step": 1655000 }, { "epoch": 0.99, "learning_rate": 4.9091558498440625e-05, "loss": 0.296, "step": 1655500 }, { "epoch": 0.99, "learning_rate": 4.908945853288006e-05, "loss": 0.2998, "step": 1656000 }, { "epoch": 0.99, "learning_rate": 4.908735856731949e-05, "loss": 0.3012, "step": 1656500 }, { "epoch": 0.99, "learning_rate": 4.9085258601758925e-05, "loss": 0.2969, "step": 1657000 }, { "epoch": 0.99, "learning_rate": 4.9083158636198366e-05, "loss": 0.2931, "step": 1657500 }, { "epoch": 0.99, "learning_rate": 4.9081062870568926e-05, "loss": 0.2988, "step": 1658000 }, { "epoch": 0.99, "learning_rate": 4.907896290500835e-05, "loss": 0.2922, "step": 1658500 }, { "epoch": 0.99, "learning_rate": 4.907686293944779e-05, "loss": 0.2938, "step": 1659000 }, { "epoch": 0.99, "learning_rate": 4.907476717381835e-05, "loss": 0.2949, "step": 1659500 }, { "epoch": 1.0, "learning_rate": 4.907266720825779e-05, "loss": 0.2919, "step": 1660000 }, { "epoch": 1.0, "learning_rate": 4.907056724269722e-05, "loss": 0.2964, "step": 1660500 }, { "epoch": 1.0, "learning_rate": 4.9068467277136654e-05, "loss": 0.2956, "step": 1661000 }, { "epoch": 1.0, "learning_rate": 4.906636731157609e-05, "loss": 0.2944, "step": 1661500 }, { "epoch": 1.0, "learning_rate": 4.906427154594665e-05, "loss": 0.2978, "step": 1662000 }, { "epoch": 1.0, "learning_rate": 4.906217158038608e-05, "loss": 0.2967, "step": 1662500 }, { "epoch": 1.0, "learning_rate": 4.9060071614825514e-05, "loss": 0.3023, "step": 1663000 }, { "epoch": 1.0, "learning_rate": 4.905797164926495e-05, "loss": 0.2982, "step": 1663500 }, { "epoch": 1.0, "learning_rate": 4.905587168370438e-05, "loss": 0.2964, "step": 1664000 }, { "epoch": 1.0, "learning_rate": 4.905377171814382e-05, "loss": 0.2917, "step": 1664500 }, { "epoch": 1.0, "learning_rate": 4.9051671752583255e-05, "loss": 0.2998, "step": 1665000 }, { "epoch": 1.0, "learning_rate": 4.904957178702269e-05, "loss": 0.3, "step": 1665500 }, { "epoch": 1.0, "learning_rate": 4.904747182146213e-05, "loss": 0.2938, "step": 1666000 }, { "epoch": 1.0, "learning_rate": 4.904538025576381e-05, "loss": 0.2945, "step": 1666500 }, { "epoch": 1.0, "learning_rate": 4.904328029020324e-05, "loss": 0.2929, "step": 1667000 }, { "epoch": 1.0, "learning_rate": 4.9041180324642676e-05, "loss": 0.2956, "step": 1667500 }, { "epoch": 1.0, "learning_rate": 4.903908035908211e-05, "loss": 0.2947, "step": 1668000 }, { "epoch": 1.0, "learning_rate": 4.903698039352154e-05, "loss": 0.2913, "step": 1668500 }, { "epoch": 1.0, "learning_rate": 4.9034880427960976e-05, "loss": 0.3005, "step": 1669000 }, { "epoch": 1.0, "learning_rate": 4.903278046240042e-05, "loss": 0.2915, "step": 1669500 }, { "epoch": 1.0, "learning_rate": 4.903068049683985e-05, "loss": 0.2947, "step": 1670000 }, { "epoch": 1.0, "learning_rate": 4.9028584731210404e-05, "loss": 0.2988, "step": 1670500 }, { "epoch": 1.0, "learning_rate": 4.902648476564984e-05, "loss": 0.2954, "step": 1671000 }, { "epoch": 1.0, "learning_rate": 4.90243890000204e-05, "loss": 0.2867, "step": 1671500 }, { "epoch": 1.0, "learning_rate": 4.902228903445984e-05, "loss": 0.2885, "step": 1672000 }, { "epoch": 1.0, "learning_rate": 4.9020189068899264e-05, "loss": 0.2915, "step": 1672500 }, { "epoch": 1.0, "learning_rate": 4.9018093303269825e-05, "loss": 0.296, "step": 1673000 }, { "epoch": 1.0, "learning_rate": 4.9015993337709265e-05, "loss": 0.2975, "step": 1673500 }, { "epoch": 1.0, "learning_rate": 4.90138933721487e-05, "loss": 0.2937, "step": 1674000 }, { "epoch": 1.0, "learning_rate": 4.901179340658813e-05, "loss": 0.2974, "step": 1674500 }, { "epoch": 1.0, "learning_rate": 4.9009693441027565e-05, "loss": 0.2887, "step": 1675000 }, { "epoch": 1.0, "learning_rate": 4.9007593475467e-05, "loss": 0.2961, "step": 1675500 }, { "epoch": 1.0, "learning_rate": 4.900549350990643e-05, "loss": 0.2905, "step": 1676000 }, { "epoch": 1.01, "learning_rate": 4.900339354434587e-05, "loss": 0.2968, "step": 1676500 }, { "epoch": 1.01, "learning_rate": 4.9001293578785306e-05, "loss": 0.294, "step": 1677000 }, { "epoch": 1.01, "learning_rate": 4.899919361322474e-05, "loss": 0.2988, "step": 1677500 }, { "epoch": 1.01, "learning_rate": 4.899709364766418e-05, "loss": 0.2972, "step": 1678000 }, { "epoch": 1.01, "learning_rate": 4.899499368210361e-05, "loss": 0.2991, "step": 1678500 }, { "epoch": 1.01, "learning_rate": 4.899289371654305e-05, "loss": 0.2902, "step": 1679000 }, { "epoch": 1.01, "learning_rate": 4.899079375098249e-05, "loss": 0.2917, "step": 1679500 }, { "epoch": 1.01, "learning_rate": 4.898869798535304e-05, "loss": 0.2915, "step": 1680000 }, { "epoch": 1.01, "learning_rate": 4.8986598019792474e-05, "loss": 0.297, "step": 1680500 }, { "epoch": 1.01, "learning_rate": 4.8984498054231914e-05, "loss": 0.3001, "step": 1681000 }, { "epoch": 1.01, "learning_rate": 4.898239808867135e-05, "loss": 0.2879, "step": 1681500 }, { "epoch": 1.01, "learning_rate": 4.89803023230419e-05, "loss": 0.2876, "step": 1682000 }, { "epoch": 1.01, "learning_rate": 4.8978202357481335e-05, "loss": 0.2916, "step": 1682500 }, { "epoch": 1.01, "learning_rate": 4.8976102391920775e-05, "loss": 0.2905, "step": 1683000 }, { "epoch": 1.01, "learning_rate": 4.897400242636021e-05, "loss": 0.2972, "step": 1683500 }, { "epoch": 1.01, "learning_rate": 4.897190246079964e-05, "loss": 0.2881, "step": 1684000 }, { "epoch": 1.01, "learning_rate": 4.8969806695170195e-05, "loss": 0.2873, "step": 1684500 }, { "epoch": 1.01, "learning_rate": 4.8967710929540756e-05, "loss": 0.2965, "step": 1685000 }, { "epoch": 1.01, "learning_rate": 4.896561096398019e-05, "loss": 0.2947, "step": 1685500 }, { "epoch": 1.01, "learning_rate": 4.896351099841962e-05, "loss": 0.2911, "step": 1686000 }, { "epoch": 1.01, "learning_rate": 4.896141103285906e-05, "loss": 0.294, "step": 1686500 }, { "epoch": 1.01, "learning_rate": 4.8959311067298496e-05, "loss": 0.2914, "step": 1687000 }, { "epoch": 1.01, "learning_rate": 4.895721110173793e-05, "loss": 0.2881, "step": 1687500 }, { "epoch": 1.01, "learning_rate": 4.895511113617737e-05, "loss": 0.2885, "step": 1688000 }, { "epoch": 1.01, "learning_rate": 4.8953015370547924e-05, "loss": 0.2952, "step": 1688500 }, { "epoch": 1.01, "learning_rate": 4.895091540498736e-05, "loss": 0.2916, "step": 1689000 }, { "epoch": 1.01, "learning_rate": 4.894881543942679e-05, "loss": 0.2976, "step": 1689500 }, { "epoch": 1.01, "learning_rate": 4.8946719673797344e-05, "loss": 0.2952, "step": 1690000 }, { "epoch": 1.01, "learning_rate": 4.8944619708236784e-05, "loss": 0.2926, "step": 1690500 }, { "epoch": 1.01, "learning_rate": 4.894251974267622e-05, "loss": 0.2988, "step": 1691000 }, { "epoch": 1.01, "learning_rate": 4.894041977711565e-05, "loss": 0.2884, "step": 1691500 }, { "epoch": 1.01, "learning_rate": 4.893831981155509e-05, "loss": 0.2865, "step": 1692000 }, { "epoch": 1.01, "learning_rate": 4.8936219845994525e-05, "loss": 0.3013, "step": 1692500 }, { "epoch": 1.02, "learning_rate": 4.893411988043396e-05, "loss": 0.2949, "step": 1693000 }, { "epoch": 1.02, "learning_rate": 4.89320199148734e-05, "loss": 0.2962, "step": 1693500 }, { "epoch": 1.02, "learning_rate": 4.892991994931283e-05, "loss": 0.2936, "step": 1694000 }, { "epoch": 1.02, "learning_rate": 4.8927819983752266e-05, "loss": 0.2868, "step": 1694500 }, { "epoch": 1.02, "learning_rate": 4.89257200181917e-05, "loss": 0.2968, "step": 1695000 }, { "epoch": 1.02, "learning_rate": 4.892362005263113e-05, "loss": 0.2944, "step": 1695500 }, { "epoch": 1.02, "learning_rate": 4.892152428700169e-05, "loss": 0.2918, "step": 1696000 }, { "epoch": 1.02, "learning_rate": 4.891942432144113e-05, "loss": 0.2929, "step": 1696500 }, { "epoch": 1.02, "learning_rate": 4.8917324355880567e-05, "loss": 0.2926, "step": 1697000 }, { "epoch": 1.02, "learning_rate": 4.891522439031999e-05, "loss": 0.2933, "step": 1697500 }, { "epoch": 1.02, "learning_rate": 4.8913124424759433e-05, "loss": 0.2908, "step": 1698000 }, { "epoch": 1.02, "learning_rate": 4.891102445919887e-05, "loss": 0.2895, "step": 1698500 }, { "epoch": 1.02, "learning_rate": 4.890892869356943e-05, "loss": 0.2952, "step": 1699000 }, { "epoch": 1.02, "learning_rate": 4.8906828728008854e-05, "loss": 0.2948, "step": 1699500 }, { "epoch": 1.02, "learning_rate": 4.8904728762448294e-05, "loss": 0.291, "step": 1700000 }, { "epoch": 1.02, "eval_loss": 0.2680596709251404, "eval_runtime": 1472.3776, "eval_samples_per_second": 357.734, "eval_steps_per_second": 59.623, "step": 1700000 }, { "epoch": 1.02, "learning_rate": 4.890262879688773e-05, "loss": 0.2962, "step": 1700500 }, { "epoch": 1.02, "learning_rate": 4.890053303125829e-05, "loss": 0.2911, "step": 1701000 }, { "epoch": 1.02, "learning_rate": 4.889843306569772e-05, "loss": 0.2921, "step": 1701500 }, { "epoch": 1.02, "learning_rate": 4.8896333100137155e-05, "loss": 0.2926, "step": 1702000 }, { "epoch": 1.02, "learning_rate": 4.889423313457659e-05, "loss": 0.2897, "step": 1702500 }, { "epoch": 1.02, "learning_rate": 4.889213316901603e-05, "loss": 0.2941, "step": 1703000 }, { "epoch": 1.02, "learning_rate": 4.889003740338659e-05, "loss": 0.2917, "step": 1703500 }, { "epoch": 1.02, "learning_rate": 4.888794163775714e-05, "loss": 0.3002, "step": 1704000 }, { "epoch": 1.02, "learning_rate": 4.8885841672196576e-05, "loss": 0.2994, "step": 1704500 }, { "epoch": 1.02, "learning_rate": 4.888374170663601e-05, "loss": 0.2854, "step": 1705000 }, { "epoch": 1.02, "learning_rate": 4.888164174107545e-05, "loss": 0.2867, "step": 1705500 }, { "epoch": 1.02, "learning_rate": 4.887954177551488e-05, "loss": 0.294, "step": 1706000 }, { "epoch": 1.02, "learning_rate": 4.8877441809954317e-05, "loss": 0.2942, "step": 1706500 }, { "epoch": 1.02, "learning_rate": 4.887534184439375e-05, "loss": 0.2905, "step": 1707000 }, { "epoch": 1.02, "learning_rate": 4.887324607876431e-05, "loss": 0.3044, "step": 1707500 }, { "epoch": 1.02, "learning_rate": 4.8871146113203744e-05, "loss": 0.2933, "step": 1708000 }, { "epoch": 1.02, "learning_rate": 4.886904614764318e-05, "loss": 0.2881, "step": 1708500 }, { "epoch": 1.02, "learning_rate": 4.886694618208261e-05, "loss": 0.286, "step": 1709000 }, { "epoch": 1.02, "learning_rate": 4.8864846216522044e-05, "loss": 0.2946, "step": 1709500 }, { "epoch": 1.03, "learning_rate": 4.8862746250961484e-05, "loss": 0.2991, "step": 1710000 }, { "epoch": 1.03, "learning_rate": 4.886064628540092e-05, "loss": 0.2971, "step": 1710500 }, { "epoch": 1.03, "learning_rate": 4.885854631984035e-05, "loss": 0.2879, "step": 1711000 }, { "epoch": 1.03, "learning_rate": 4.885644635427979e-05, "loss": 0.2939, "step": 1711500 }, { "epoch": 1.03, "learning_rate": 4.8854346388719225e-05, "loss": 0.2958, "step": 1712000 }, { "epoch": 1.03, "learning_rate": 4.885225062308978e-05, "loss": 0.2923, "step": 1712500 }, { "epoch": 1.03, "learning_rate": 4.885015065752921e-05, "loss": 0.2843, "step": 1713000 }, { "epoch": 1.03, "learning_rate": 4.884805069196865e-05, "loss": 0.2968, "step": 1713500 }, { "epoch": 1.03, "learning_rate": 4.8845950726408086e-05, "loss": 0.2897, "step": 1714000 }, { "epoch": 1.03, "learning_rate": 4.884385076084752e-05, "loss": 0.2886, "step": 1714500 }, { "epoch": 1.03, "learning_rate": 4.884175079528696e-05, "loss": 0.2948, "step": 1715000 }, { "epoch": 1.03, "learning_rate": 4.883965082972639e-05, "loss": 0.2937, "step": 1715500 }, { "epoch": 1.03, "learning_rate": 4.8837550864165826e-05, "loss": 0.2944, "step": 1716000 }, { "epoch": 1.03, "learning_rate": 4.883545509853638e-05, "loss": 0.2958, "step": 1716500 }, { "epoch": 1.03, "learning_rate": 4.883335513297582e-05, "loss": 0.2861, "step": 1717000 }, { "epoch": 1.03, "learning_rate": 4.8831259367346374e-05, "loss": 0.2906, "step": 1717500 }, { "epoch": 1.03, "learning_rate": 4.882915940178581e-05, "loss": 0.2949, "step": 1718000 }, { "epoch": 1.03, "learning_rate": 4.882705943622525e-05, "loss": 0.2948, "step": 1718500 }, { "epoch": 1.03, "learning_rate": 4.88249636705958e-05, "loss": 0.2892, "step": 1719000 }, { "epoch": 1.03, "learning_rate": 4.8822863705035235e-05, "loss": 0.2954, "step": 1719500 }, { "epoch": 1.03, "learning_rate": 4.882076373947467e-05, "loss": 0.2963, "step": 1720000 }, { "epoch": 1.03, "learning_rate": 4.881866377391411e-05, "loss": 0.2949, "step": 1720500 }, { "epoch": 1.03, "learning_rate": 4.881656380835354e-05, "loss": 0.2891, "step": 1721000 }, { "epoch": 1.03, "learning_rate": 4.8814463842792975e-05, "loss": 0.2951, "step": 1721500 }, { "epoch": 1.03, "learning_rate": 4.8812363877232415e-05, "loss": 0.2863, "step": 1722000 }, { "epoch": 1.03, "learning_rate": 4.881026391167185e-05, "loss": 0.2938, "step": 1722500 }, { "epoch": 1.03, "learning_rate": 4.88081681460424e-05, "loss": 0.2885, "step": 1723000 }, { "epoch": 1.03, "learning_rate": 4.8806068180481836e-05, "loss": 0.2917, "step": 1723500 }, { "epoch": 1.03, "learning_rate": 4.8803968214921276e-05, "loss": 0.2912, "step": 1724000 }, { "epoch": 1.03, "learning_rate": 4.880187244929183e-05, "loss": 0.2881, "step": 1724500 }, { "epoch": 1.03, "learning_rate": 4.879977248373126e-05, "loss": 0.2989, "step": 1725000 }, { "epoch": 1.03, "learning_rate": 4.8797672518170703e-05, "loss": 0.2927, "step": 1725500 }, { "epoch": 1.03, "learning_rate": 4.879557255261014e-05, "loss": 0.2915, "step": 1726000 }, { "epoch": 1.04, "learning_rate": 4.879347258704957e-05, "loss": 0.2928, "step": 1726500 }, { "epoch": 1.04, "learning_rate": 4.879137262148901e-05, "loss": 0.2934, "step": 1727000 }, { "epoch": 1.04, "learning_rate": 4.8789272655928444e-05, "loss": 0.291, "step": 1727500 }, { "epoch": 1.04, "learning_rate": 4.8787176890299e-05, "loss": 0.2886, "step": 1728000 }, { "epoch": 1.04, "learning_rate": 4.878507692473843e-05, "loss": 0.2956, "step": 1728500 }, { "epoch": 1.04, "learning_rate": 4.878297695917787e-05, "loss": 0.2973, "step": 1729000 }, { "epoch": 1.04, "learning_rate": 4.8780876993617305e-05, "loss": 0.2876, "step": 1729500 }, { "epoch": 1.04, "learning_rate": 4.877877702805674e-05, "loss": 0.2976, "step": 1730000 }, { "epoch": 1.04, "learning_rate": 4.877667706249618e-05, "loss": 0.3002, "step": 1730500 }, { "epoch": 1.04, "learning_rate": 4.877457709693561e-05, "loss": 0.2952, "step": 1731000 }, { "epoch": 1.04, "learning_rate": 4.8772477131375045e-05, "loss": 0.2929, "step": 1731500 }, { "epoch": 1.04, "learning_rate": 4.8770381365745606e-05, "loss": 0.3017, "step": 1732000 }, { "epoch": 1.04, "learning_rate": 4.876828560011616e-05, "loss": 0.2963, "step": 1732500 }, { "epoch": 1.04, "learning_rate": 4.876618563455559e-05, "loss": 0.288, "step": 1733000 }, { "epoch": 1.04, "learning_rate": 4.8764085668995026e-05, "loss": 0.2884, "step": 1733500 }, { "epoch": 1.04, "learning_rate": 4.8761985703434466e-05, "loss": 0.2878, "step": 1734000 }, { "epoch": 1.04, "learning_rate": 4.87598857378739e-05, "loss": 0.2892, "step": 1734500 }, { "epoch": 1.04, "learning_rate": 4.875778577231333e-05, "loss": 0.2899, "step": 1735000 }, { "epoch": 1.04, "learning_rate": 4.875569000668389e-05, "loss": 0.2919, "step": 1735500 }, { "epoch": 1.04, "learning_rate": 4.875359004112333e-05, "loss": 0.2903, "step": 1736000 }, { "epoch": 1.04, "learning_rate": 4.875149007556276e-05, "loss": 0.2921, "step": 1736500 }, { "epoch": 1.04, "learning_rate": 4.8749390110002194e-05, "loss": 0.2943, "step": 1737000 }, { "epoch": 1.04, "learning_rate": 4.8747290144441634e-05, "loss": 0.2981, "step": 1737500 }, { "epoch": 1.04, "learning_rate": 4.874519017888107e-05, "loss": 0.2886, "step": 1738000 }, { "epoch": 1.04, "learning_rate": 4.87430902133205e-05, "loss": 0.2884, "step": 1738500 }, { "epoch": 1.04, "learning_rate": 4.8740990247759935e-05, "loss": 0.2957, "step": 1739000 }, { "epoch": 1.04, "learning_rate": 4.8738894482130495e-05, "loss": 0.2977, "step": 1739500 }, { "epoch": 1.04, "learning_rate": 4.873679451656993e-05, "loss": 0.2954, "step": 1740000 }, { "epoch": 1.04, "learning_rate": 4.873469455100937e-05, "loss": 0.2957, "step": 1740500 }, { "epoch": 1.04, "learning_rate": 4.8732594585448795e-05, "loss": 0.2913, "step": 1741000 }, { "epoch": 1.04, "learning_rate": 4.873049461988823e-05, "loss": 0.2916, "step": 1741500 }, { "epoch": 1.04, "learning_rate": 4.872839465432767e-05, "loss": 0.2899, "step": 1742000 }, { "epoch": 1.04, "learning_rate": 4.872629888869823e-05, "loss": 0.2983, "step": 1742500 }, { "epoch": 1.04, "learning_rate": 4.872419892313766e-05, "loss": 0.2913, "step": 1743000 }, { "epoch": 1.05, "learning_rate": 4.872209895757709e-05, "loss": 0.2936, "step": 1743500 }, { "epoch": 1.05, "learning_rate": 4.871999899201653e-05, "loss": 0.2871, "step": 1744000 }, { "epoch": 1.05, "learning_rate": 4.871789902645596e-05, "loss": 0.2949, "step": 1744500 }, { "epoch": 1.05, "learning_rate": 4.87157990608954e-05, "loss": 0.2942, "step": 1745000 }, { "epoch": 1.05, "learning_rate": 4.871369909533484e-05, "loss": 0.2929, "step": 1745500 }, { "epoch": 1.05, "learning_rate": 4.871159912977427e-05, "loss": 0.2897, "step": 1746000 }, { "epoch": 1.05, "learning_rate": 4.8709499164213704e-05, "loss": 0.2879, "step": 1746500 }, { "epoch": 1.05, "learning_rate": 4.8707403398584264e-05, "loss": 0.2925, "step": 1747000 }, { "epoch": 1.05, "learning_rate": 4.87053034330237e-05, "loss": 0.3006, "step": 1747500 }, { "epoch": 1.05, "learning_rate": 4.870320346746313e-05, "loss": 0.2998, "step": 1748000 }, { "epoch": 1.05, "learning_rate": 4.870110350190257e-05, "loss": 0.2948, "step": 1748500 }, { "epoch": 1.05, "learning_rate": 4.8699007736273125e-05, "loss": 0.2967, "step": 1749000 }, { "epoch": 1.05, "learning_rate": 4.869690777071256e-05, "loss": 0.2893, "step": 1749500 }, { "epoch": 1.05, "learning_rate": 4.869480780515199e-05, "loss": 0.2912, "step": 1750000 }, { "epoch": 1.05, "learning_rate": 4.869270783959143e-05, "loss": 0.3013, "step": 1750500 }, { "epoch": 1.05, "learning_rate": 4.8690607874030866e-05, "loss": 0.297, "step": 1751000 }, { "epoch": 1.05, "learning_rate": 4.868851210840142e-05, "loss": 0.2927, "step": 1751500 }, { "epoch": 1.05, "learning_rate": 4.868641214284085e-05, "loss": 0.2994, "step": 1752000 }, { "epoch": 1.05, "learning_rate": 4.868431217728029e-05, "loss": 0.2884, "step": 1752500 }, { "epoch": 1.05, "learning_rate": 4.8682216411650846e-05, "loss": 0.2922, "step": 1753000 }, { "epoch": 1.05, "learning_rate": 4.868011644609028e-05, "loss": 0.2902, "step": 1753500 }, { "epoch": 1.05, "learning_rate": 4.867801648052972e-05, "loss": 0.2869, "step": 1754000 }, { "epoch": 1.05, "learning_rate": 4.8675916514969154e-05, "loss": 0.294, "step": 1754500 }, { "epoch": 1.05, "learning_rate": 4.867381654940859e-05, "loss": 0.2945, "step": 1755000 }, { "epoch": 1.05, "learning_rate": 4.867171658384803e-05, "loss": 0.2914, "step": 1755500 }, { "epoch": 1.05, "learning_rate": 4.866961661828746e-05, "loss": 0.2873, "step": 1756000 }, { "epoch": 1.05, "learning_rate": 4.8667516652726894e-05, "loss": 0.2918, "step": 1756500 }, { "epoch": 1.05, "learning_rate": 4.866542088709745e-05, "loss": 0.2904, "step": 1757000 }, { "epoch": 1.05, "learning_rate": 4.866332092153689e-05, "loss": 0.2918, "step": 1757500 }, { "epoch": 1.05, "learning_rate": 4.866122095597632e-05, "loss": 0.2918, "step": 1758000 }, { "epoch": 1.05, "learning_rate": 4.8659120990415755e-05, "loss": 0.2861, "step": 1758500 }, { "epoch": 1.05, "learning_rate": 4.8657021024855195e-05, "loss": 0.2943, "step": 1759000 }, { "epoch": 1.05, "learning_rate": 4.865492105929463e-05, "loss": 0.2942, "step": 1759500 }, { "epoch": 1.06, "learning_rate": 4.865282109373406e-05, "loss": 0.2948, "step": 1760000 }, { "epoch": 1.06, "learning_rate": 4.86507211281735e-05, "loss": 0.295, "step": 1760500 }, { "epoch": 1.06, "learning_rate": 4.8648625362544056e-05, "loss": 0.2944, "step": 1761000 }, { "epoch": 1.06, "learning_rate": 4.864652959691461e-05, "loss": 0.2904, "step": 1761500 }, { "epoch": 1.06, "learning_rate": 4.864442963135404e-05, "loss": 0.2954, "step": 1762000 }, { "epoch": 1.06, "learning_rate": 4.864232966579348e-05, "loss": 0.2929, "step": 1762500 }, { "epoch": 1.06, "learning_rate": 4.864022970023292e-05, "loss": 0.2925, "step": 1763000 }, { "epoch": 1.06, "learning_rate": 4.863812973467235e-05, "loss": 0.3009, "step": 1763500 }, { "epoch": 1.06, "learning_rate": 4.863602976911179e-05, "loss": 0.2959, "step": 1764000 }, { "epoch": 1.06, "learning_rate": 4.8633929803551224e-05, "loss": 0.2972, "step": 1764500 }, { "epoch": 1.06, "learning_rate": 4.863182983799066e-05, "loss": 0.2951, "step": 1765000 }, { "epoch": 1.06, "learning_rate": 4.862972987243009e-05, "loss": 0.3022, "step": 1765500 }, { "epoch": 1.06, "learning_rate": 4.862763410680065e-05, "loss": 0.2943, "step": 1766000 }, { "epoch": 1.06, "learning_rate": 4.8625534141240085e-05, "loss": 0.2935, "step": 1766500 }, { "epoch": 1.06, "learning_rate": 4.862343837561064e-05, "loss": 0.2901, "step": 1767000 }, { "epoch": 1.06, "learning_rate": 4.862133841005007e-05, "loss": 0.2905, "step": 1767500 }, { "epoch": 1.06, "learning_rate": 4.861923844448951e-05, "loss": 0.2942, "step": 1768000 }, { "epoch": 1.06, "learning_rate": 4.8617138478928945e-05, "loss": 0.2962, "step": 1768500 }, { "epoch": 1.06, "learning_rate": 4.861503851336838e-05, "loss": 0.289, "step": 1769000 }, { "epoch": 1.06, "learning_rate": 4.861293854780782e-05, "loss": 0.2954, "step": 1769500 }, { "epoch": 1.06, "learning_rate": 4.861083858224725e-05, "loss": 0.2922, "step": 1770000 }, { "epoch": 1.06, "learning_rate": 4.8608738616686686e-05, "loss": 0.2962, "step": 1770500 }, { "epoch": 1.06, "learning_rate": 4.860663865112612e-05, "loss": 0.2899, "step": 1771000 }, { "epoch": 1.06, "learning_rate": 4.86045470854278e-05, "loss": 0.2898, "step": 1771500 }, { "epoch": 1.06, "learning_rate": 4.860244711986723e-05, "loss": 0.3012, "step": 1772000 }, { "epoch": 1.06, "learning_rate": 4.860034715430667e-05, "loss": 0.2882, "step": 1772500 }, { "epoch": 1.06, "learning_rate": 4.859824718874611e-05, "loss": 0.2922, "step": 1773000 }, { "epoch": 1.06, "learning_rate": 4.859614722318554e-05, "loss": 0.2914, "step": 1773500 }, { "epoch": 1.06, "learning_rate": 4.8594047257624974e-05, "loss": 0.2972, "step": 1774000 }, { "epoch": 1.06, "learning_rate": 4.859195149199553e-05, "loss": 0.2929, "step": 1774500 }, { "epoch": 1.06, "learning_rate": 4.858985152643497e-05, "loss": 0.2929, "step": 1775000 }, { "epoch": 1.06, "learning_rate": 4.85877515608744e-05, "loss": 0.2955, "step": 1775500 }, { "epoch": 1.06, "learning_rate": 4.8585651595313835e-05, "loss": 0.2907, "step": 1776000 }, { "epoch": 1.07, "learning_rate": 4.8583551629753275e-05, "loss": 0.2912, "step": 1776500 }, { "epoch": 1.07, "learning_rate": 4.858145166419271e-05, "loss": 0.2998, "step": 1777000 }, { "epoch": 1.07, "learning_rate": 4.857935169863214e-05, "loss": 0.2914, "step": 1777500 }, { "epoch": 1.07, "learning_rate": 4.8577251733071575e-05, "loss": 0.2944, "step": 1778000 }, { "epoch": 1.07, "learning_rate": 4.857515176751101e-05, "loss": 0.2868, "step": 1778500 }, { "epoch": 1.07, "learning_rate": 4.857305180195045e-05, "loss": 0.2983, "step": 1779000 }, { "epoch": 1.07, "learning_rate": 4.857095603632101e-05, "loss": 0.2902, "step": 1779500 }, { "epoch": 1.07, "learning_rate": 4.8568856070760436e-05, "loss": 0.2977, "step": 1780000 }, { "epoch": 1.07, "learning_rate": 4.8566760305130996e-05, "loss": 0.2906, "step": 1780500 }, { "epoch": 1.07, "learning_rate": 4.856466033957043e-05, "loss": 0.2946, "step": 1781000 }, { "epoch": 1.07, "learning_rate": 4.856256037400987e-05, "loss": 0.2864, "step": 1781500 }, { "epoch": 1.07, "learning_rate": 4.85604604084493e-05, "loss": 0.2961, "step": 1782000 }, { "epoch": 1.07, "learning_rate": 4.855836044288873e-05, "loss": 0.2993, "step": 1782500 }, { "epoch": 1.07, "learning_rate": 4.855626047732817e-05, "loss": 0.2912, "step": 1783000 }, { "epoch": 1.07, "learning_rate": 4.8554160511767604e-05, "loss": 0.2932, "step": 1783500 }, { "epoch": 1.07, "learning_rate": 4.8552060546207044e-05, "loss": 0.2932, "step": 1784000 }, { "epoch": 1.07, "learning_rate": 4.854996058064648e-05, "loss": 0.2901, "step": 1784500 }, { "epoch": 1.07, "learning_rate": 4.854786061508591e-05, "loss": 0.2931, "step": 1785000 }, { "epoch": 1.07, "learning_rate": 4.854576064952535e-05, "loss": 0.304, "step": 1785500 }, { "epoch": 1.07, "learning_rate": 4.8543664883895905e-05, "loss": 0.2942, "step": 1786000 }, { "epoch": 1.07, "learning_rate": 4.854156491833534e-05, "loss": 0.2911, "step": 1786500 }, { "epoch": 1.07, "learning_rate": 4.853946495277477e-05, "loss": 0.2896, "step": 1787000 }, { "epoch": 1.07, "learning_rate": 4.853736498721421e-05, "loss": 0.2907, "step": 1787500 }, { "epoch": 1.07, "learning_rate": 4.8535269221584765e-05, "loss": 0.2904, "step": 1788000 }, { "epoch": 1.07, "learning_rate": 4.85331692560242e-05, "loss": 0.2919, "step": 1788500 }, { "epoch": 1.07, "learning_rate": 4.853106929046363e-05, "loss": 0.2901, "step": 1789000 }, { "epoch": 1.07, "learning_rate": 4.8528973524834186e-05, "loss": 0.2938, "step": 1789500 }, { "epoch": 1.07, "learning_rate": 4.8526873559273626e-05, "loss": 0.289, "step": 1790000 }, { "epoch": 1.07, "learning_rate": 4.852477359371306e-05, "loss": 0.2957, "step": 1790500 }, { "epoch": 1.07, "learning_rate": 4.85226736281525e-05, "loss": 0.2988, "step": 1791000 }, { "epoch": 1.07, "learning_rate": 4.852057786252306e-05, "loss": 0.2868, "step": 1791500 }, { "epoch": 1.07, "learning_rate": 4.851847789696249e-05, "loss": 0.2947, "step": 1792000 }, { "epoch": 1.07, "learning_rate": 4.851637793140192e-05, "loss": 0.289, "step": 1792500 }, { "epoch": 1.07, "learning_rate": 4.851427796584136e-05, "loss": 0.2933, "step": 1793000 }, { "epoch": 1.08, "learning_rate": 4.8512178000280794e-05, "loss": 0.2872, "step": 1793500 }, { "epoch": 1.08, "learning_rate": 4.851007803472023e-05, "loss": 0.2964, "step": 1794000 }, { "epoch": 1.08, "learning_rate": 4.850797806915967e-05, "loss": 0.2929, "step": 1794500 }, { "epoch": 1.08, "learning_rate": 4.85058781035991e-05, "loss": 0.2898, "step": 1795000 }, { "epoch": 1.08, "learning_rate": 4.8503778138038535e-05, "loss": 0.2871, "step": 1795500 }, { "epoch": 1.08, "learning_rate": 4.8501678172477975e-05, "loss": 0.2933, "step": 1796000 }, { "epoch": 1.08, "learning_rate": 4.849957820691741e-05, "loss": 0.2925, "step": 1796500 }, { "epoch": 1.08, "learning_rate": 4.849747824135684e-05, "loss": 0.2971, "step": 1797000 }, { "epoch": 1.08, "learning_rate": 4.8495382475727395e-05, "loss": 0.2926, "step": 1797500 }, { "epoch": 1.08, "learning_rate": 4.8493282510166836e-05, "loss": 0.2901, "step": 1798000 }, { "epoch": 1.08, "learning_rate": 4.849118674453739e-05, "loss": 0.2884, "step": 1798500 }, { "epoch": 1.08, "learning_rate": 4.848908677897682e-05, "loss": 0.297, "step": 1799000 }, { "epoch": 1.08, "learning_rate": 4.848698681341626e-05, "loss": 0.294, "step": 1799500 }, { "epoch": 1.08, "learning_rate": 4.8484886847855696e-05, "loss": 0.2912, "step": 1800000 }, { "epoch": 1.08, "eval_loss": 0.2663831114768982, "eval_runtime": 1462.6548, "eval_samples_per_second": 360.112, "eval_steps_per_second": 60.019, "step": 1800000 }, { "epoch": 1.08, "learning_rate": 4.848278688229513e-05, "loss": 0.2876, "step": 1800500 }, { "epoch": 1.08, "learning_rate": 4.848068691673457e-05, "loss": 0.2896, "step": 1801000 }, { "epoch": 1.08, "learning_rate": 4.8478586951174004e-05, "loss": 0.2962, "step": 1801500 }, { "epoch": 1.08, "learning_rate": 4.847648698561343e-05, "loss": 0.2966, "step": 1802000 }, { "epoch": 1.08, "learning_rate": 4.847439121998399e-05, "loss": 0.2965, "step": 1802500 }, { "epoch": 1.08, "learning_rate": 4.847229125442343e-05, "loss": 0.295, "step": 1803000 }, { "epoch": 1.08, "learning_rate": 4.8470191288862864e-05, "loss": 0.2863, "step": 1803500 }, { "epoch": 1.08, "learning_rate": 4.84680913233023e-05, "loss": 0.296, "step": 1804000 }, { "epoch": 1.08, "learning_rate": 4.846599135774173e-05, "loss": 0.2989, "step": 1804500 }, { "epoch": 1.08, "learning_rate": 4.8463891392181165e-05, "loss": 0.2899, "step": 1805000 }, { "epoch": 1.08, "learning_rate": 4.84617914266206e-05, "loss": 0.2842, "step": 1805500 }, { "epoch": 1.08, "learning_rate": 4.845969146106004e-05, "loss": 0.2935, "step": 1806000 }, { "epoch": 1.08, "learning_rate": 4.84575956954306e-05, "loss": 0.2924, "step": 1806500 }, { "epoch": 1.08, "learning_rate": 4.8455495729870025e-05, "loss": 0.2933, "step": 1807000 }, { "epoch": 1.08, "learning_rate": 4.8453395764309466e-05, "loss": 0.2915, "step": 1807500 }, { "epoch": 1.08, "learning_rate": 4.84512957987489e-05, "loss": 0.2895, "step": 1808000 }, { "epoch": 1.08, "learning_rate": 4.844919583318833e-05, "loss": 0.3027, "step": 1808500 }, { "epoch": 1.08, "learning_rate": 4.844709586762777e-05, "loss": 0.293, "step": 1809000 }, { "epoch": 1.08, "learning_rate": 4.8445000101998326e-05, "loss": 0.2968, "step": 1809500 }, { "epoch": 1.09, "learning_rate": 4.844290013643776e-05, "loss": 0.2948, "step": 1810000 }, { "epoch": 1.09, "learning_rate": 4.844080017087719e-05, "loss": 0.2878, "step": 1810500 }, { "epoch": 1.09, "learning_rate": 4.8438700205316633e-05, "loss": 0.2899, "step": 1811000 }, { "epoch": 1.09, "learning_rate": 4.843660023975607e-05, "loss": 0.2888, "step": 1811500 }, { "epoch": 1.09, "learning_rate": 4.84345002741955e-05, "loss": 0.2958, "step": 1812000 }, { "epoch": 1.09, "learning_rate": 4.843240030863494e-05, "loss": 0.2845, "step": 1812500 }, { "epoch": 1.09, "learning_rate": 4.8430304543005494e-05, "loss": 0.2833, "step": 1813000 }, { "epoch": 1.09, "learning_rate": 4.842820457744493e-05, "loss": 0.2898, "step": 1813500 }, { "epoch": 1.09, "learning_rate": 4.842610461188436e-05, "loss": 0.2924, "step": 1814000 }, { "epoch": 1.09, "learning_rate": 4.84240046463238e-05, "loss": 0.2845, "step": 1814500 }, { "epoch": 1.09, "learning_rate": 4.8421904680763235e-05, "loss": 0.2982, "step": 1815000 }, { "epoch": 1.09, "learning_rate": 4.841980471520267e-05, "loss": 0.2967, "step": 1815500 }, { "epoch": 1.09, "learning_rate": 4.841770474964211e-05, "loss": 0.2914, "step": 1816000 }, { "epoch": 1.09, "learning_rate": 4.841560478408154e-05, "loss": 0.2921, "step": 1816500 }, { "epoch": 1.09, "learning_rate": 4.8413504818520975e-05, "loss": 0.2926, "step": 1817000 }, { "epoch": 1.09, "learning_rate": 4.841140485296041e-05, "loss": 0.2849, "step": 1817500 }, { "epoch": 1.09, "learning_rate": 4.840930908733097e-05, "loss": 0.2957, "step": 1818000 }, { "epoch": 1.09, "learning_rate": 4.84072091217704e-05, "loss": 0.2906, "step": 1818500 }, { "epoch": 1.09, "learning_rate": 4.840510915620984e-05, "loss": 0.2978, "step": 1819000 }, { "epoch": 1.09, "learning_rate": 4.840300919064927e-05, "loss": 0.2884, "step": 1819500 }, { "epoch": 1.09, "learning_rate": 4.84009092250887e-05, "loss": 0.2934, "step": 1820000 }, { "epoch": 1.09, "learning_rate": 4.8398817659390384e-05, "loss": 0.2962, "step": 1820500 }, { "epoch": 1.09, "learning_rate": 4.839671769382982e-05, "loss": 0.2936, "step": 1821000 }, { "epoch": 1.09, "learning_rate": 4.839461772826926e-05, "loss": 0.2869, "step": 1821500 }, { "epoch": 1.09, "learning_rate": 4.839251776270869e-05, "loss": 0.295, "step": 1822000 }, { "epoch": 1.09, "learning_rate": 4.8390417797148124e-05, "loss": 0.2895, "step": 1822500 }, { "epoch": 1.09, "learning_rate": 4.8388322031518685e-05, "loss": 0.2953, "step": 1823000 }, { "epoch": 1.09, "learning_rate": 4.838622206595812e-05, "loss": 0.2942, "step": 1823500 }, { "epoch": 1.09, "learning_rate": 4.838412210039755e-05, "loss": 0.297, "step": 1824000 }, { "epoch": 1.09, "learning_rate": 4.838202213483699e-05, "loss": 0.2921, "step": 1824500 }, { "epoch": 1.09, "learning_rate": 4.8379922169276425e-05, "loss": 0.2962, "step": 1825000 }, { "epoch": 1.09, "learning_rate": 4.837782220371586e-05, "loss": 0.2963, "step": 1825500 }, { "epoch": 1.09, "learning_rate": 4.83757222381553e-05, "loss": 0.2963, "step": 1826000 }, { "epoch": 1.1, "learning_rate": 4.8373622272594726e-05, "loss": 0.2994, "step": 1826500 }, { "epoch": 1.1, "learning_rate": 4.8371526506965286e-05, "loss": 0.292, "step": 1827000 }, { "epoch": 1.1, "learning_rate": 4.836942654140472e-05, "loss": 0.2941, "step": 1827500 }, { "epoch": 1.1, "learning_rate": 4.836733077577527e-05, "loss": 0.2847, "step": 1828000 }, { "epoch": 1.1, "learning_rate": 4.836523081021471e-05, "loss": 0.2932, "step": 1828500 }, { "epoch": 1.1, "learning_rate": 4.8363130844654147e-05, "loss": 0.2896, "step": 1829000 }, { "epoch": 1.1, "learning_rate": 4.836103087909358e-05, "loss": 0.2946, "step": 1829500 }, { "epoch": 1.1, "learning_rate": 4.835893091353302e-05, "loss": 0.2908, "step": 1830000 }, { "epoch": 1.1, "learning_rate": 4.8356830947972454e-05, "loss": 0.2899, "step": 1830500 }, { "epoch": 1.1, "learning_rate": 4.8354730982411894e-05, "loss": 0.2913, "step": 1831000 }, { "epoch": 1.1, "learning_rate": 4.835263101685132e-05, "loss": 0.2872, "step": 1831500 }, { "epoch": 1.1, "learning_rate": 4.8350531051290754e-05, "loss": 0.2876, "step": 1832000 }, { "epoch": 1.1, "learning_rate": 4.8348435285661314e-05, "loss": 0.2886, "step": 1832500 }, { "epoch": 1.1, "learning_rate": 4.8346335320100755e-05, "loss": 0.295, "step": 1833000 }, { "epoch": 1.1, "learning_rate": 4.834423535454019e-05, "loss": 0.2938, "step": 1833500 }, { "epoch": 1.1, "learning_rate": 4.8342135388979615e-05, "loss": 0.2917, "step": 1834000 }, { "epoch": 1.1, "learning_rate": 4.8340035423419055e-05, "loss": 0.2874, "step": 1834500 }, { "epoch": 1.1, "learning_rate": 4.8337939657789615e-05, "loss": 0.2908, "step": 1835000 }, { "epoch": 1.1, "learning_rate": 4.833583969222905e-05, "loss": 0.2879, "step": 1835500 }, { "epoch": 1.1, "learning_rate": 4.8333739726668476e-05, "loss": 0.2939, "step": 1836000 }, { "epoch": 1.1, "learning_rate": 4.8331639761107916e-05, "loss": 0.2894, "step": 1836500 }, { "epoch": 1.1, "learning_rate": 4.832953979554735e-05, "loss": 0.2821, "step": 1837000 }, { "epoch": 1.1, "learning_rate": 4.832744402991791e-05, "loss": 0.2976, "step": 1837500 }, { "epoch": 1.1, "learning_rate": 4.832534406435735e-05, "loss": 0.2933, "step": 1838000 }, { "epoch": 1.1, "learning_rate": 4.8323244098796777e-05, "loss": 0.29, "step": 1838500 }, { "epoch": 1.1, "learning_rate": 4.832114413323621e-05, "loss": 0.2868, "step": 1839000 }, { "epoch": 1.1, "learning_rate": 4.831904416767565e-05, "loss": 0.2958, "step": 1839500 }, { "epoch": 1.1, "learning_rate": 4.831694840204621e-05, "loss": 0.2947, "step": 1840000 }, { "epoch": 1.1, "learning_rate": 4.8314848436485644e-05, "loss": 0.2907, "step": 1840500 }, { "epoch": 1.1, "learning_rate": 4.831274847092507e-05, "loss": 0.2897, "step": 1841000 }, { "epoch": 1.1, "learning_rate": 4.831064850536451e-05, "loss": 0.287, "step": 1841500 }, { "epoch": 1.1, "learning_rate": 4.8308556939666185e-05, "loss": 0.2939, "step": 1842000 }, { "epoch": 1.1, "learning_rate": 4.8306456974105625e-05, "loss": 0.2918, "step": 1842500 }, { "epoch": 1.1, "learning_rate": 4.830435700854506e-05, "loss": 0.2906, "step": 1843000 }, { "epoch": 1.11, "learning_rate": 4.83022570429845e-05, "loss": 0.2886, "step": 1843500 }, { "epoch": 1.11, "learning_rate": 4.830015707742393e-05, "loss": 0.2856, "step": 1844000 }, { "epoch": 1.11, "learning_rate": 4.8298057111863366e-05, "loss": 0.2908, "step": 1844500 }, { "epoch": 1.11, "learning_rate": 4.829596134623392e-05, "loss": 0.296, "step": 1845000 }, { "epoch": 1.11, "learning_rate": 4.829386138067336e-05, "loss": 0.2926, "step": 1845500 }, { "epoch": 1.11, "learning_rate": 4.829176561504391e-05, "loss": 0.2876, "step": 1846000 }, { "epoch": 1.11, "learning_rate": 4.8289665649483346e-05, "loss": 0.2938, "step": 1846500 }, { "epoch": 1.11, "learning_rate": 4.828756568392278e-05, "loss": 0.2951, "step": 1847000 }, { "epoch": 1.11, "learning_rate": 4.828546571836222e-05, "loss": 0.2888, "step": 1847500 }, { "epoch": 1.11, "learning_rate": 4.8283365752801653e-05, "loss": 0.2955, "step": 1848000 }, { "epoch": 1.11, "learning_rate": 4.828126578724109e-05, "loss": 0.2967, "step": 1848500 }, { "epoch": 1.11, "learning_rate": 4.827916582168053e-05, "loss": 0.2928, "step": 1849000 }, { "epoch": 1.11, "learning_rate": 4.827706585611996e-05, "loss": 0.2899, "step": 1849500 }, { "epoch": 1.11, "learning_rate": 4.8274965890559394e-05, "loss": 0.2882, "step": 1850000 }, { "epoch": 1.11, "learning_rate": 4.827286592499883e-05, "loss": 0.2918, "step": 1850500 }, { "epoch": 1.11, "learning_rate": 4.827076595943826e-05, "loss": 0.2891, "step": 1851000 }, { "epoch": 1.11, "learning_rate": 4.82686659938777e-05, "loss": 0.2936, "step": 1851500 }, { "epoch": 1.11, "learning_rate": 4.826657022824826e-05, "loss": 0.2904, "step": 1852000 }, { "epoch": 1.11, "learning_rate": 4.8264470262687695e-05, "loss": 0.2887, "step": 1852500 }, { "epoch": 1.11, "learning_rate": 4.826237029712712e-05, "loss": 0.2902, "step": 1853000 }, { "epoch": 1.11, "learning_rate": 4.82602787314288e-05, "loss": 0.298, "step": 1853500 }, { "epoch": 1.11, "learning_rate": 4.8258178765868236e-05, "loss": 0.2901, "step": 1854000 }, { "epoch": 1.11, "learning_rate": 4.8256078800307676e-05, "loss": 0.2887, "step": 1854500 }, { "epoch": 1.11, "learning_rate": 4.825397883474711e-05, "loss": 0.2859, "step": 1855000 }, { "epoch": 1.11, "learning_rate": 4.825187886918654e-05, "loss": 0.2927, "step": 1855500 }, { "epoch": 1.11, "learning_rate": 4.824977890362598e-05, "loss": 0.2903, "step": 1856000 }, { "epoch": 1.11, "learning_rate": 4.8247678938065417e-05, "loss": 0.291, "step": 1856500 }, { "epoch": 1.11, "learning_rate": 4.824557897250485e-05, "loss": 0.2937, "step": 1857000 }, { "epoch": 1.11, "learning_rate": 4.8243479006944283e-05, "loss": 0.2912, "step": 1857500 }, { "epoch": 1.11, "learning_rate": 4.8241383241314844e-05, "loss": 0.2912, "step": 1858000 }, { "epoch": 1.11, "learning_rate": 4.823928327575428e-05, "loss": 0.2948, "step": 1858500 }, { "epoch": 1.11, "learning_rate": 4.823718331019372e-05, "loss": 0.2895, "step": 1859000 }, { "epoch": 1.11, "learning_rate": 4.823508334463315e-05, "loss": 0.2951, "step": 1859500 }, { "epoch": 1.12, "learning_rate": 4.823298337907258e-05, "loss": 0.2877, "step": 1860000 }, { "epoch": 1.12, "learning_rate": 4.823088341351202e-05, "loss": 0.3011, "step": 1860500 }, { "epoch": 1.12, "learning_rate": 4.822878344795145e-05, "loss": 0.294, "step": 1861000 }, { "epoch": 1.12, "learning_rate": 4.8226683482390885e-05, "loss": 0.2903, "step": 1861500 }, { "epoch": 1.12, "learning_rate": 4.8224583516830325e-05, "loss": 0.2853, "step": 1862000 }, { "epoch": 1.12, "learning_rate": 4.822248355126976e-05, "loss": 0.2848, "step": 1862500 }, { "epoch": 1.12, "learning_rate": 4.822039198557144e-05, "loss": 0.3018, "step": 1863000 }, { "epoch": 1.12, "learning_rate": 4.821829202001087e-05, "loss": 0.2903, "step": 1863500 }, { "epoch": 1.12, "learning_rate": 4.8216192054450306e-05, "loss": 0.2939, "step": 1864000 }, { "epoch": 1.12, "learning_rate": 4.8214092088889746e-05, "loss": 0.2956, "step": 1864500 }, { "epoch": 1.12, "learning_rate": 4.821199212332917e-05, "loss": 0.2987, "step": 1865000 }, { "epoch": 1.12, "learning_rate": 4.820989215776861e-05, "loss": 0.2906, "step": 1865500 }, { "epoch": 1.12, "learning_rate": 4.8207792192208046e-05, "loss": 0.2951, "step": 1866000 }, { "epoch": 1.12, "learning_rate": 4.820569222664748e-05, "loss": 0.2859, "step": 1866500 }, { "epoch": 1.12, "learning_rate": 4.8203596461018034e-05, "loss": 0.2912, "step": 1867000 }, { "epoch": 1.12, "learning_rate": 4.8201496495457474e-05, "loss": 0.291, "step": 1867500 }, { "epoch": 1.12, "learning_rate": 4.819939652989691e-05, "loss": 0.2904, "step": 1868000 }, { "epoch": 1.12, "learning_rate": 4.819730496419859e-05, "loss": 0.296, "step": 1868500 }, { "epoch": 1.12, "learning_rate": 4.819520499863802e-05, "loss": 0.2926, "step": 1869000 }, { "epoch": 1.12, "learning_rate": 4.8193105033077455e-05, "loss": 0.2901, "step": 1869500 }, { "epoch": 1.12, "learning_rate": 4.8191005067516895e-05, "loss": 0.2865, "step": 1870000 }, { "epoch": 1.12, "learning_rate": 4.818890510195633e-05, "loss": 0.2957, "step": 1870500 }, { "epoch": 1.12, "learning_rate": 4.818680513639576e-05, "loss": 0.2971, "step": 1871000 }, { "epoch": 1.12, "learning_rate": 4.81847051708352e-05, "loss": 0.297, "step": 1871500 }, { "epoch": 1.12, "learning_rate": 4.818260520527463e-05, "loss": 0.2948, "step": 1872000 }, { "epoch": 1.12, "learning_rate": 4.818050523971407e-05, "loss": 0.2886, "step": 1872500 }, { "epoch": 1.12, "learning_rate": 4.81784052741535e-05, "loss": 0.2878, "step": 1873000 }, { "epoch": 1.12, "learning_rate": 4.8176305308592936e-05, "loss": 0.2874, "step": 1873500 }, { "epoch": 1.12, "learning_rate": 4.8174205343032376e-05, "loss": 0.2889, "step": 1874000 }, { "epoch": 1.12, "learning_rate": 4.817210957740293e-05, "loss": 0.2848, "step": 1874500 }, { "epoch": 1.12, "learning_rate": 4.817001381177349e-05, "loss": 0.2919, "step": 1875000 }, { "epoch": 1.12, "learning_rate": 4.8167913846212923e-05, "loss": 0.2922, "step": 1875500 }, { "epoch": 1.12, "learning_rate": 4.816581388065236e-05, "loss": 0.2906, "step": 1876000 }, { "epoch": 1.13, "learning_rate": 4.816371391509179e-05, "loss": 0.2887, "step": 1876500 }, { "epoch": 1.13, "learning_rate": 4.8161613949531224e-05, "loss": 0.292, "step": 1877000 }, { "epoch": 1.13, "learning_rate": 4.815951398397066e-05, "loss": 0.2857, "step": 1877500 }, { "epoch": 1.13, "learning_rate": 4.81574140184101e-05, "loss": 0.2939, "step": 1878000 }, { "epoch": 1.13, "learning_rate": 4.815531405284953e-05, "loss": 0.2915, "step": 1878500 }, { "epoch": 1.13, "learning_rate": 4.8153214087288964e-05, "loss": 0.2877, "step": 1879000 }, { "epoch": 1.13, "learning_rate": 4.8151118321659525e-05, "loss": 0.2913, "step": 1879500 }, { "epoch": 1.13, "learning_rate": 4.814901835609896e-05, "loss": 0.2908, "step": 1880000 }, { "epoch": 1.13, "learning_rate": 4.814691839053839e-05, "loss": 0.2893, "step": 1880500 }, { "epoch": 1.13, "learning_rate": 4.814481842497783e-05, "loss": 0.289, "step": 1881000 }, { "epoch": 1.13, "learning_rate": 4.8142722659348386e-05, "loss": 0.2941, "step": 1881500 }, { "epoch": 1.13, "learning_rate": 4.8140626893718946e-05, "loss": 0.2914, "step": 1882000 }, { "epoch": 1.13, "learning_rate": 4.813852692815838e-05, "loss": 0.2972, "step": 1882500 }, { "epoch": 1.13, "learning_rate": 4.813642696259781e-05, "loss": 0.2925, "step": 1883000 }, { "epoch": 1.13, "learning_rate": 4.813432699703725e-05, "loss": 0.2901, "step": 1883500 }, { "epoch": 1.13, "learning_rate": 4.813222703147668e-05, "loss": 0.2917, "step": 1884000 }, { "epoch": 1.13, "learning_rate": 4.813013126584724e-05, "loss": 0.2884, "step": 1884500 }, { "epoch": 1.13, "learning_rate": 4.8128031300286674e-05, "loss": 0.2908, "step": 1885000 }, { "epoch": 1.13, "learning_rate": 4.8125931334726114e-05, "loss": 0.2857, "step": 1885500 }, { "epoch": 1.13, "learning_rate": 4.812383136916554e-05, "loss": 0.292, "step": 1886000 }, { "epoch": 1.13, "learning_rate": 4.812173140360498e-05, "loss": 0.2937, "step": 1886500 }, { "epoch": 1.13, "learning_rate": 4.8119631438044414e-05, "loss": 0.2846, "step": 1887000 }, { "epoch": 1.13, "learning_rate": 4.811753147248385e-05, "loss": 0.2917, "step": 1887500 }, { "epoch": 1.13, "learning_rate": 4.811543150692329e-05, "loss": 0.2872, "step": 1888000 }, { "epoch": 1.13, "learning_rate": 4.811333154136272e-05, "loss": 0.2871, "step": 1888500 }, { "epoch": 1.13, "learning_rate": 4.8111235775733275e-05, "loss": 0.3003, "step": 1889000 }, { "epoch": 1.13, "learning_rate": 4.810913581017271e-05, "loss": 0.2832, "step": 1889500 }, { "epoch": 1.13, "learning_rate": 4.810703584461215e-05, "loss": 0.2912, "step": 1890000 }, { "epoch": 1.13, "learning_rate": 4.810493587905158e-05, "loss": 0.2878, "step": 1890500 }, { "epoch": 1.13, "learning_rate": 4.8102840113422136e-05, "loss": 0.293, "step": 1891000 }, { "epoch": 1.13, "learning_rate": 4.810074014786157e-05, "loss": 0.2896, "step": 1891500 }, { "epoch": 1.13, "learning_rate": 4.809864018230101e-05, "loss": 0.2859, "step": 1892000 }, { "epoch": 1.13, "learning_rate": 4.809654021674044e-05, "loss": 0.2912, "step": 1892500 }, { "epoch": 1.13, "learning_rate": 4.8094444451111e-05, "loss": 0.2978, "step": 1893000 }, { "epoch": 1.14, "learning_rate": 4.8092344485550437e-05, "loss": 0.2934, "step": 1893500 }, { "epoch": 1.14, "learning_rate": 4.809024451998987e-05, "loss": 0.2919, "step": 1894000 }, { "epoch": 1.14, "learning_rate": 4.808814875436043e-05, "loss": 0.291, "step": 1894500 }, { "epoch": 1.14, "learning_rate": 4.8086048788799864e-05, "loss": 0.2879, "step": 1895000 }, { "epoch": 1.14, "learning_rate": 4.8083948823239304e-05, "loss": 0.2979, "step": 1895500 }, { "epoch": 1.14, "learning_rate": 4.808184885767873e-05, "loss": 0.2911, "step": 1896000 }, { "epoch": 1.14, "learning_rate": 4.8079748892118164e-05, "loss": 0.2892, "step": 1896500 }, { "epoch": 1.14, "learning_rate": 4.8077648926557604e-05, "loss": 0.2882, "step": 1897000 }, { "epoch": 1.14, "learning_rate": 4.807554896099704e-05, "loss": 0.2929, "step": 1897500 }, { "epoch": 1.14, "learning_rate": 4.807344899543647e-05, "loss": 0.2846, "step": 1898000 }, { "epoch": 1.14, "learning_rate": 4.807134902987591e-05, "loss": 0.2893, "step": 1898500 }, { "epoch": 1.14, "learning_rate": 4.8069257464177585e-05, "loss": 0.2964, "step": 1899000 }, { "epoch": 1.14, "learning_rate": 4.8067157498617025e-05, "loss": 0.2886, "step": 1899500 }, { "epoch": 1.14, "learning_rate": 4.806505753305646e-05, "loss": 0.2952, "step": 1900000 }, { "epoch": 1.14, "eval_loss": 0.26594987511634827, "eval_runtime": 1483.6613, "eval_samples_per_second": 355.014, "eval_steps_per_second": 59.169, "step": 1900000 }, { "epoch": 1.14, "learning_rate": 4.806295756749589e-05, "loss": 0.2848, "step": 1900500 }, { "epoch": 1.14, "learning_rate": 4.8060857601935326e-05, "loss": 0.2939, "step": 1901000 }, { "epoch": 1.14, "learning_rate": 4.805875763637476e-05, "loss": 0.2955, "step": 1901500 }, { "epoch": 1.14, "learning_rate": 4.80566576708142e-05, "loss": 0.2916, "step": 1902000 }, { "epoch": 1.14, "learning_rate": 4.805456190518476e-05, "loss": 0.2908, "step": 1902500 }, { "epoch": 1.14, "learning_rate": 4.8052461939624187e-05, "loss": 0.2886, "step": 1903000 }, { "epoch": 1.14, "learning_rate": 4.805036197406362e-05, "loss": 0.2832, "step": 1903500 }, { "epoch": 1.14, "learning_rate": 4.804826200850306e-05, "loss": 0.2933, "step": 1904000 }, { "epoch": 1.14, "learning_rate": 4.8046162042942494e-05, "loss": 0.2939, "step": 1904500 }, { "epoch": 1.14, "learning_rate": 4.804406207738193e-05, "loss": 0.2937, "step": 1905000 }, { "epoch": 1.14, "learning_rate": 4.804196211182137e-05, "loss": 0.2932, "step": 1905500 }, { "epoch": 1.14, "learning_rate": 4.80398621462608e-05, "loss": 0.282, "step": 1906000 }, { "epoch": 1.14, "learning_rate": 4.8037766380631354e-05, "loss": 0.2887, "step": 1906500 }, { "epoch": 1.14, "learning_rate": 4.803566641507079e-05, "loss": 0.2924, "step": 1907000 }, { "epoch": 1.14, "learning_rate": 4.803356644951023e-05, "loss": 0.2897, "step": 1907500 }, { "epoch": 1.14, "learning_rate": 4.803146648394966e-05, "loss": 0.285, "step": 1908000 }, { "epoch": 1.14, "learning_rate": 4.80293665183891e-05, "loss": 0.2909, "step": 1908500 }, { "epoch": 1.14, "learning_rate": 4.8027270752759655e-05, "loss": 0.2946, "step": 1909000 }, { "epoch": 1.14, "learning_rate": 4.802517078719909e-05, "loss": 0.2875, "step": 1909500 }, { "epoch": 1.15, "learning_rate": 4.802307082163852e-05, "loss": 0.2864, "step": 1910000 }, { "epoch": 1.15, "learning_rate": 4.802097085607796e-05, "loss": 0.2903, "step": 1910500 }, { "epoch": 1.15, "learning_rate": 4.8018875090448516e-05, "loss": 0.2899, "step": 1911000 }, { "epoch": 1.15, "learning_rate": 4.801677512488795e-05, "loss": 0.2847, "step": 1911500 }, { "epoch": 1.15, "learning_rate": 4.801467515932738e-05, "loss": 0.2845, "step": 1912000 }, { "epoch": 1.15, "learning_rate": 4.801257519376682e-05, "loss": 0.2868, "step": 1912500 }, { "epoch": 1.15, "learning_rate": 4.801047522820626e-05, "loss": 0.2921, "step": 1913000 }, { "epoch": 1.15, "learning_rate": 4.800837946257681e-05, "loss": 0.2813, "step": 1913500 }, { "epoch": 1.15, "learning_rate": 4.8006279497016244e-05, "loss": 0.2881, "step": 1914000 }, { "epoch": 1.15, "learning_rate": 4.8004179531455684e-05, "loss": 0.2876, "step": 1914500 }, { "epoch": 1.15, "learning_rate": 4.800207956589512e-05, "loss": 0.2882, "step": 1915000 }, { "epoch": 1.15, "learning_rate": 4.799998380026567e-05, "loss": 0.293, "step": 1915500 }, { "epoch": 1.15, "learning_rate": 4.799788383470511e-05, "loss": 0.2901, "step": 1916000 }, { "epoch": 1.15, "learning_rate": 4.7995783869144545e-05, "loss": 0.295, "step": 1916500 }, { "epoch": 1.15, "learning_rate": 4.799368390358398e-05, "loss": 0.2903, "step": 1917000 }, { "epoch": 1.15, "learning_rate": 4.799158813795453e-05, "loss": 0.294, "step": 1917500 }, { "epoch": 1.15, "learning_rate": 4.798948817239397e-05, "loss": 0.2882, "step": 1918000 }, { "epoch": 1.15, "learning_rate": 4.7987388206833406e-05, "loss": 0.2909, "step": 1918500 }, { "epoch": 1.15, "learning_rate": 4.798528824127284e-05, "loss": 0.2927, "step": 1919000 }, { "epoch": 1.15, "learning_rate": 4.798318827571228e-05, "loss": 0.2935, "step": 1919500 }, { "epoch": 1.15, "learning_rate": 4.798108831015171e-05, "loss": 0.2842, "step": 1920000 }, { "epoch": 1.15, "learning_rate": 4.7978992544522266e-05, "loss": 0.2879, "step": 1920500 }, { "epoch": 1.15, "learning_rate": 4.7976896778892827e-05, "loss": 0.297, "step": 1921000 }, { "epoch": 1.15, "learning_rate": 4.797479681333227e-05, "loss": 0.2915, "step": 1921500 }, { "epoch": 1.15, "learning_rate": 4.7972696847771694e-05, "loss": 0.2868, "step": 1922000 }, { "epoch": 1.15, "learning_rate": 4.797059688221113e-05, "loss": 0.2894, "step": 1922500 }, { "epoch": 1.15, "learning_rate": 4.796849691665057e-05, "loss": 0.2898, "step": 1923000 }, { "epoch": 1.15, "learning_rate": 4.796639695109e-05, "loss": 0.2954, "step": 1923500 }, { "epoch": 1.15, "learning_rate": 4.7964296985529434e-05, "loss": 0.2914, "step": 1924000 }, { "epoch": 1.15, "learning_rate": 4.7962197019968874e-05, "loss": 0.2888, "step": 1924500 }, { "epoch": 1.15, "learning_rate": 4.796010125433943e-05, "loss": 0.2993, "step": 1925000 }, { "epoch": 1.15, "learning_rate": 4.795800128877886e-05, "loss": 0.2964, "step": 1925500 }, { "epoch": 1.15, "learning_rate": 4.7955901323218295e-05, "loss": 0.288, "step": 1926000 }, { "epoch": 1.16, "learning_rate": 4.7953801357657735e-05, "loss": 0.2901, "step": 1926500 }, { "epoch": 1.16, "learning_rate": 4.795170139209717e-05, "loss": 0.2949, "step": 1927000 }, { "epoch": 1.16, "learning_rate": 4.794960562646772e-05, "loss": 0.2873, "step": 1927500 }, { "epoch": 1.16, "learning_rate": 4.794750566090716e-05, "loss": 0.2868, "step": 1928000 }, { "epoch": 1.16, "learning_rate": 4.7945405695346596e-05, "loss": 0.2978, "step": 1928500 }, { "epoch": 1.16, "learning_rate": 4.794330572978603e-05, "loss": 0.2914, "step": 1929000 }, { "epoch": 1.16, "learning_rate": 4.794120576422547e-05, "loss": 0.2915, "step": 1929500 }, { "epoch": 1.16, "learning_rate": 4.79391057986649e-05, "loss": 0.2893, "step": 1930000 }, { "epoch": 1.16, "learning_rate": 4.7937010033035457e-05, "loss": 0.2881, "step": 1930500 }, { "epoch": 1.16, "learning_rate": 4.793491006747489e-05, "loss": 0.2995, "step": 1931000 }, { "epoch": 1.16, "learning_rate": 4.793281010191433e-05, "loss": 0.2902, "step": 1931500 }, { "epoch": 1.16, "learning_rate": 4.7930710136353764e-05, "loss": 0.2875, "step": 1932000 }, { "epoch": 1.16, "learning_rate": 4.79286101707932e-05, "loss": 0.2948, "step": 1932500 }, { "epoch": 1.16, "learning_rate": 4.792651020523264e-05, "loss": 0.2933, "step": 1933000 }, { "epoch": 1.16, "learning_rate": 4.792441023967207e-05, "loss": 0.287, "step": 1933500 }, { "epoch": 1.16, "learning_rate": 4.7922318673973745e-05, "loss": 0.2907, "step": 1934000 }, { "epoch": 1.16, "learning_rate": 4.792021870841318e-05, "loss": 0.2916, "step": 1934500 }, { "epoch": 1.16, "learning_rate": 4.791812294278374e-05, "loss": 0.2932, "step": 1935000 }, { "epoch": 1.16, "learning_rate": 4.791602297722318e-05, "loss": 0.2888, "step": 1935500 }, { "epoch": 1.16, "learning_rate": 4.791392301166261e-05, "loss": 0.293, "step": 1936000 }, { "epoch": 1.16, "learning_rate": 4.791182304610204e-05, "loss": 0.2938, "step": 1936500 }, { "epoch": 1.16, "learning_rate": 4.790972308054148e-05, "loss": 0.2851, "step": 1937000 }, { "epoch": 1.16, "learning_rate": 4.790762311498091e-05, "loss": 0.2944, "step": 1937500 }, { "epoch": 1.16, "learning_rate": 4.7905523149420346e-05, "loss": 0.2939, "step": 1938000 }, { "epoch": 1.16, "learning_rate": 4.7903423183859786e-05, "loss": 0.2913, "step": 1938500 }, { "epoch": 1.16, "learning_rate": 4.790132321829922e-05, "loss": 0.2898, "step": 1939000 }, { "epoch": 1.16, "learning_rate": 4.789922325273865e-05, "loss": 0.2872, "step": 1939500 }, { "epoch": 1.16, "learning_rate": 4.789712328717809e-05, "loss": 0.29, "step": 1940000 }, { "epoch": 1.16, "learning_rate": 4.789502332161753e-05, "loss": 0.2954, "step": 1940500 }, { "epoch": 1.16, "learning_rate": 4.789292335605696e-05, "loss": 0.2929, "step": 1941000 }, { "epoch": 1.16, "learning_rate": 4.7890831790358634e-05, "loss": 0.2874, "step": 1941500 }, { "epoch": 1.16, "learning_rate": 4.7888731824798074e-05, "loss": 0.2902, "step": 1942000 }, { "epoch": 1.16, "learning_rate": 4.788663185923751e-05, "loss": 0.2912, "step": 1942500 }, { "epoch": 1.16, "learning_rate": 4.788453189367694e-05, "loss": 0.2841, "step": 1943000 }, { "epoch": 1.17, "learning_rate": 4.788243192811638e-05, "loss": 0.2897, "step": 1943500 }, { "epoch": 1.17, "learning_rate": 4.7880331962555815e-05, "loss": 0.2909, "step": 1944000 }, { "epoch": 1.17, "learning_rate": 4.787823199699525e-05, "loss": 0.2887, "step": 1944500 }, { "epoch": 1.17, "learning_rate": 4.787613203143469e-05, "loss": 0.2839, "step": 1945000 }, { "epoch": 1.17, "learning_rate": 4.787403206587412e-05, "loss": 0.2914, "step": 1945500 }, { "epoch": 1.17, "learning_rate": 4.7871936300244675e-05, "loss": 0.2869, "step": 1946000 }, { "epoch": 1.17, "learning_rate": 4.786983633468411e-05, "loss": 0.3009, "step": 1946500 }, { "epoch": 1.17, "learning_rate": 4.786773636912355e-05, "loss": 0.2904, "step": 1947000 }, { "epoch": 1.17, "learning_rate": 4.786563640356298e-05, "loss": 0.2857, "step": 1947500 }, { "epoch": 1.17, "learning_rate": 4.7863536438002416e-05, "loss": 0.2883, "step": 1948000 }, { "epoch": 1.17, "learning_rate": 4.786144067237297e-05, "loss": 0.2964, "step": 1948500 }, { "epoch": 1.17, "learning_rate": 4.785934070681241e-05, "loss": 0.2915, "step": 1949000 }, { "epoch": 1.17, "learning_rate": 4.785724074125184e-05, "loss": 0.2928, "step": 1949500 }, { "epoch": 1.17, "learning_rate": 4.785514077569128e-05, "loss": 0.2848, "step": 1950000 }, { "epoch": 1.17, "learning_rate": 4.785304081013072e-05, "loss": 0.2933, "step": 1950500 }, { "epoch": 1.17, "learning_rate": 4.785094504450127e-05, "loss": 0.2879, "step": 1951000 }, { "epoch": 1.17, "learning_rate": 4.7848845078940704e-05, "loss": 0.2892, "step": 1951500 }, { "epoch": 1.17, "learning_rate": 4.7846745113380144e-05, "loss": 0.2859, "step": 1952000 }, { "epoch": 1.17, "learning_rate": 4.784464514781958e-05, "loss": 0.2904, "step": 1952500 }, { "epoch": 1.17, "learning_rate": 4.784254938219013e-05, "loss": 0.2933, "step": 1953000 }, { "epoch": 1.17, "learning_rate": 4.7840449416629565e-05, "loss": 0.2869, "step": 1953500 }, { "epoch": 1.17, "learning_rate": 4.783835365100012e-05, "loss": 0.2998, "step": 1954000 }, { "epoch": 1.17, "learning_rate": 4.783625368543956e-05, "loss": 0.2903, "step": 1954500 }, { "epoch": 1.17, "learning_rate": 4.783415371987899e-05, "loss": 0.288, "step": 1955000 }, { "epoch": 1.17, "learning_rate": 4.7832053754318426e-05, "loss": 0.2878, "step": 1955500 }, { "epoch": 1.17, "learning_rate": 4.7829953788757866e-05, "loss": 0.2873, "step": 1956000 }, { "epoch": 1.17, "learning_rate": 4.78278538231973e-05, "loss": 0.2895, "step": 1956500 }, { "epoch": 1.17, "learning_rate": 4.782575385763673e-05, "loss": 0.2873, "step": 1957000 }, { "epoch": 1.17, "learning_rate": 4.782365389207617e-05, "loss": 0.2911, "step": 1957500 }, { "epoch": 1.17, "learning_rate": 4.7821558126446726e-05, "loss": 0.2911, "step": 1958000 }, { "epoch": 1.17, "learning_rate": 4.781945816088616e-05, "loss": 0.291, "step": 1958500 }, { "epoch": 1.17, "learning_rate": 4.78173581953256e-05, "loss": 0.2869, "step": 1959000 }, { "epoch": 1.17, "learning_rate": 4.7815258229765034e-05, "loss": 0.2819, "step": 1959500 }, { "epoch": 1.18, "learning_rate": 4.781315826420447e-05, "loss": 0.2919, "step": 1960000 }, { "epoch": 1.18, "learning_rate": 4.781106249857502e-05, "loss": 0.2926, "step": 1960500 }, { "epoch": 1.18, "learning_rate": 4.780896253301446e-05, "loss": 0.2943, "step": 1961000 }, { "epoch": 1.18, "learning_rate": 4.7806862567453894e-05, "loss": 0.2923, "step": 1961500 }, { "epoch": 1.18, "learning_rate": 4.780476260189333e-05, "loss": 0.2833, "step": 1962000 }, { "epoch": 1.18, "learning_rate": 4.780266683626388e-05, "loss": 0.2924, "step": 1962500 }, { "epoch": 1.18, "learning_rate": 4.780056687070332e-05, "loss": 0.2937, "step": 1963000 }, { "epoch": 1.18, "learning_rate": 4.7798475305005e-05, "loss": 0.2916, "step": 1963500 }, { "epoch": 1.18, "learning_rate": 4.7796375339444436e-05, "loss": 0.2946, "step": 1964000 }, { "epoch": 1.18, "learning_rate": 4.779427537388387e-05, "loss": 0.2905, "step": 1964500 }, { "epoch": 1.18, "learning_rate": 4.77921754083233e-05, "loss": 0.2922, "step": 1965000 }, { "epoch": 1.18, "learning_rate": 4.7790075442762736e-05, "loss": 0.3025, "step": 1965500 }, { "epoch": 1.18, "learning_rate": 4.778797547720217e-05, "loss": 0.2844, "step": 1966000 }, { "epoch": 1.18, "learning_rate": 4.778587551164161e-05, "loss": 0.29, "step": 1966500 }, { "epoch": 1.18, "learning_rate": 4.778377554608104e-05, "loss": 0.2884, "step": 1967000 }, { "epoch": 1.18, "learning_rate": 4.7781675580520477e-05, "loss": 0.2874, "step": 1967500 }, { "epoch": 1.18, "learning_rate": 4.777957561495992e-05, "loss": 0.2931, "step": 1968000 }, { "epoch": 1.18, "learning_rate": 4.777747564939935e-05, "loss": 0.2915, "step": 1968500 }, { "epoch": 1.18, "learning_rate": 4.7775375683838784e-05, "loss": 0.2935, "step": 1969000 }, { "epoch": 1.18, "learning_rate": 4.7773275718278224e-05, "loss": 0.2952, "step": 1969500 }, { "epoch": 1.18, "learning_rate": 4.777117995264878e-05, "loss": 0.2925, "step": 1970000 }, { "epoch": 1.18, "learning_rate": 4.776907998708821e-05, "loss": 0.292, "step": 1970500 }, { "epoch": 1.18, "learning_rate": 4.7766980021527644e-05, "loss": 0.2931, "step": 1971000 }, { "epoch": 1.18, "learning_rate": 4.7764880055967085e-05, "loss": 0.293, "step": 1971500 }, { "epoch": 1.18, "learning_rate": 4.776278429033764e-05, "loss": 0.2887, "step": 1972000 }, { "epoch": 1.18, "learning_rate": 4.776068432477707e-05, "loss": 0.2844, "step": 1972500 }, { "epoch": 1.18, "learning_rate": 4.7758588559147625e-05, "loss": 0.2961, "step": 1973000 }, { "epoch": 1.18, "learning_rate": 4.7756488593587066e-05, "loss": 0.2865, "step": 1973500 }, { "epoch": 1.18, "learning_rate": 4.77543886280265e-05, "loss": 0.2859, "step": 1974000 }, { "epoch": 1.18, "learning_rate": 4.775228866246593e-05, "loss": 0.289, "step": 1974500 }, { "epoch": 1.18, "learning_rate": 4.775018869690537e-05, "loss": 0.2809, "step": 1975000 }, { "epoch": 1.18, "learning_rate": 4.7748088731344806e-05, "loss": 0.2916, "step": 1975500 }, { "epoch": 1.18, "learning_rate": 4.774599296571536e-05, "loss": 0.2862, "step": 1976000 }, { "epoch": 1.18, "learning_rate": 4.774389300015479e-05, "loss": 0.2963, "step": 1976500 }, { "epoch": 1.19, "learning_rate": 4.7741797234525353e-05, "loss": 0.2855, "step": 1977000 }, { "epoch": 1.19, "learning_rate": 4.773969726896479e-05, "loss": 0.2866, "step": 1977500 }, { "epoch": 1.19, "learning_rate": 4.773759730340422e-05, "loss": 0.2881, "step": 1978000 }, { "epoch": 1.19, "learning_rate": 4.773549733784366e-05, "loss": 0.2927, "step": 1978500 }, { "epoch": 1.19, "learning_rate": 4.7733397372283094e-05, "loss": 0.2899, "step": 1979000 }, { "epoch": 1.19, "learning_rate": 4.773129740672253e-05, "loss": 0.2893, "step": 1979500 }, { "epoch": 1.19, "learning_rate": 4.772919744116197e-05, "loss": 0.2968, "step": 1980000 }, { "epoch": 1.19, "learning_rate": 4.77270974756014e-05, "loss": 0.2933, "step": 1980500 }, { "epoch": 1.19, "learning_rate": 4.7724997510040835e-05, "loss": 0.2895, "step": 1981000 }, { "epoch": 1.19, "learning_rate": 4.7722897544480275e-05, "loss": 0.2892, "step": 1981500 }, { "epoch": 1.19, "learning_rate": 4.772079757891971e-05, "loss": 0.295, "step": 1982000 }, { "epoch": 1.19, "learning_rate": 4.7718697613359135e-05, "loss": 0.2958, "step": 1982500 }, { "epoch": 1.19, "learning_rate": 4.7716597647798575e-05, "loss": 0.2907, "step": 1983000 }, { "epoch": 1.19, "learning_rate": 4.7714501882169136e-05, "loss": 0.2881, "step": 1983500 }, { "epoch": 1.19, "learning_rate": 4.771240191660857e-05, "loss": 0.2877, "step": 1984000 }, { "epoch": 1.19, "learning_rate": 4.7710301951047996e-05, "loss": 0.2954, "step": 1984500 }, { "epoch": 1.19, "learning_rate": 4.7708201985487436e-05, "loss": 0.2914, "step": 1985000 }, { "epoch": 1.19, "learning_rate": 4.7706106219857996e-05, "loss": 0.2862, "step": 1985500 }, { "epoch": 1.19, "learning_rate": 4.770400625429743e-05, "loss": 0.2925, "step": 1986000 }, { "epoch": 1.19, "learning_rate": 4.770190628873687e-05, "loss": 0.2883, "step": 1986500 }, { "epoch": 1.19, "learning_rate": 4.76998063231763e-05, "loss": 0.2803, "step": 1987000 }, { "epoch": 1.19, "learning_rate": 4.769770635761573e-05, "loss": 0.2854, "step": 1987500 }, { "epoch": 1.19, "learning_rate": 4.769561059198629e-05, "loss": 0.2887, "step": 1988000 }, { "epoch": 1.19, "learning_rate": 4.769351062642573e-05, "loss": 0.2936, "step": 1988500 }, { "epoch": 1.19, "learning_rate": 4.7691410660865164e-05, "loss": 0.2926, "step": 1989000 }, { "epoch": 1.19, "learning_rate": 4.768931069530459e-05, "loss": 0.2889, "step": 1989500 }, { "epoch": 1.19, "learning_rate": 4.768721072974403e-05, "loss": 0.2886, "step": 1990000 }, { "epoch": 1.19, "learning_rate": 4.7685110764183465e-05, "loss": 0.2883, "step": 1990500 }, { "epoch": 1.19, "learning_rate": 4.7683014998554025e-05, "loss": 0.2866, "step": 1991000 }, { "epoch": 1.19, "learning_rate": 4.768091503299346e-05, "loss": 0.2932, "step": 1991500 }, { "epoch": 1.19, "learning_rate": 4.767881506743289e-05, "loss": 0.2905, "step": 1992000 }, { "epoch": 1.19, "learning_rate": 4.7676715101872325e-05, "loss": 0.2874, "step": 1992500 }, { "epoch": 1.19, "learning_rate": 4.767461513631176e-05, "loss": 0.2861, "step": 1993000 }, { "epoch": 1.2, "learning_rate": 4.7672519370682326e-05, "loss": 0.2925, "step": 1993500 }, { "epoch": 1.2, "learning_rate": 4.767042360505288e-05, "loss": 0.2865, "step": 1994000 }, { "epoch": 1.2, "learning_rate": 4.766832363949231e-05, "loss": 0.2851, "step": 1994500 }, { "epoch": 1.2, "learning_rate": 4.7666223673931746e-05, "loss": 0.2911, "step": 1995000 }, { "epoch": 1.2, "learning_rate": 4.766412370837119e-05, "loss": 0.2849, "step": 1995500 }, { "epoch": 1.2, "learning_rate": 4.766202794274174e-05, "loss": 0.287, "step": 1996000 }, { "epoch": 1.2, "learning_rate": 4.7659927977181174e-05, "loss": 0.2934, "step": 1996500 }, { "epoch": 1.2, "learning_rate": 4.765782801162061e-05, "loss": 0.2885, "step": 1997000 }, { "epoch": 1.2, "learning_rate": 4.765572804606005e-05, "loss": 0.2812, "step": 1997500 }, { "epoch": 1.2, "learning_rate": 4.765362808049948e-05, "loss": 0.291, "step": 1998000 }, { "epoch": 1.2, "learning_rate": 4.7651528114938914e-05, "loss": 0.2923, "step": 1998500 }, { "epoch": 1.2, "learning_rate": 4.764942814937835e-05, "loss": 0.2855, "step": 1999000 }, { "epoch": 1.2, "learning_rate": 4.764732818381778e-05, "loss": 0.2872, "step": 1999500 }, { "epoch": 1.2, "learning_rate": 4.764522821825722e-05, "loss": 0.2839, "step": 2000000 }, { "epoch": 1.2, "eval_loss": 0.2663571536540985, "eval_runtime": 1466.8387, "eval_samples_per_second": 359.085, "eval_steps_per_second": 59.848, "step": 2000000 }, { "epoch": 1.2, "learning_rate": 4.764313245262778e-05, "loss": 0.2949, "step": 2000500 }, { "epoch": 1.2, "learning_rate": 4.7641032487067215e-05, "loss": 0.2929, "step": 2001000 }, { "epoch": 1.2, "learning_rate": 4.763893252150664e-05, "loss": 0.2848, "step": 2001500 }, { "epoch": 1.2, "learning_rate": 4.763683255594608e-05, "loss": 0.288, "step": 2002000 }, { "epoch": 1.2, "learning_rate": 4.7634732590385516e-05, "loss": 0.2866, "step": 2002500 }, { "epoch": 1.2, "learning_rate": 4.7632636824756076e-05, "loss": 0.2901, "step": 2003000 }, { "epoch": 1.2, "learning_rate": 4.763053685919551e-05, "loss": 0.2909, "step": 2003500 }, { "epoch": 1.2, "learning_rate": 4.762844109356606e-05, "loss": 0.2872, "step": 2004000 }, { "epoch": 1.2, "learning_rate": 4.76263411280055e-05, "loss": 0.2933, "step": 2004500 }, { "epoch": 1.2, "learning_rate": 4.762424116244494e-05, "loss": 0.2938, "step": 2005000 }, { "epoch": 1.2, "learning_rate": 4.762214119688437e-05, "loss": 0.2884, "step": 2005500 }, { "epoch": 1.2, "learning_rate": 4.762004543125493e-05, "loss": 0.2893, "step": 2006000 }, { "epoch": 1.2, "learning_rate": 4.7617945465694364e-05, "loss": 0.2924, "step": 2006500 }, { "epoch": 1.2, "learning_rate": 4.76158455001338e-05, "loss": 0.2868, "step": 2007000 }, { "epoch": 1.2, "learning_rate": 4.761374553457324e-05, "loss": 0.2847, "step": 2007500 }, { "epoch": 1.2, "learning_rate": 4.761164556901267e-05, "loss": 0.2918, "step": 2008000 }, { "epoch": 1.2, "learning_rate": 4.76095456034521e-05, "loss": 0.2901, "step": 2008500 }, { "epoch": 1.2, "learning_rate": 4.760744983782266e-05, "loss": 0.2897, "step": 2009000 }, { "epoch": 1.2, "learning_rate": 4.76053498722621e-05, "loss": 0.2879, "step": 2009500 }, { "epoch": 1.21, "learning_rate": 4.760324990670153e-05, "loss": 0.2895, "step": 2010000 }, { "epoch": 1.21, "learning_rate": 4.7601149941140965e-05, "loss": 0.2908, "step": 2010500 }, { "epoch": 1.21, "learning_rate": 4.75990499755804e-05, "loss": 0.2882, "step": 2011000 }, { "epoch": 1.21, "learning_rate": 4.759695001001983e-05, "loss": 0.2843, "step": 2011500 }, { "epoch": 1.21, "learning_rate": 4.7594850044459266e-05, "loss": 0.2917, "step": 2012000 }, { "epoch": 1.21, "learning_rate": 4.7592750078898706e-05, "loss": 0.2897, "step": 2012500 }, { "epoch": 1.21, "learning_rate": 4.759065011333814e-05, "loss": 0.2903, "step": 2013000 }, { "epoch": 1.21, "learning_rate": 4.758855014777757e-05, "loss": 0.2857, "step": 2013500 }, { "epoch": 1.21, "learning_rate": 4.758645438214813e-05, "loss": 0.2941, "step": 2014000 }, { "epoch": 1.21, "learning_rate": 4.758435441658757e-05, "loss": 0.2988, "step": 2014500 }, { "epoch": 1.21, "learning_rate": 4.7582254451027e-05, "loss": 0.2929, "step": 2015000 }, { "epoch": 1.21, "learning_rate": 4.758015448546644e-05, "loss": 0.2876, "step": 2015500 }, { "epoch": 1.21, "learning_rate": 4.7578054519905874e-05, "loss": 0.2863, "step": 2016000 }, { "epoch": 1.21, "learning_rate": 4.757595875427643e-05, "loss": 0.2929, "step": 2016500 }, { "epoch": 1.21, "learning_rate": 4.757385878871586e-05, "loss": 0.2873, "step": 2017000 }, { "epoch": 1.21, "learning_rate": 4.75717588231553e-05, "loss": 0.2966, "step": 2017500 }, { "epoch": 1.21, "learning_rate": 4.7569658857594735e-05, "loss": 0.2847, "step": 2018000 }, { "epoch": 1.21, "learning_rate": 4.756755889203417e-05, "loss": 0.2926, "step": 2018500 }, { "epoch": 1.21, "learning_rate": 4.756546312640472e-05, "loss": 0.2893, "step": 2019000 }, { "epoch": 1.21, "learning_rate": 4.756336316084416e-05, "loss": 0.2936, "step": 2019500 }, { "epoch": 1.21, "learning_rate": 4.7561263195283595e-05, "loss": 0.2902, "step": 2020000 }, { "epoch": 1.21, "learning_rate": 4.755916322972303e-05, "loss": 0.2885, "step": 2020500 }, { "epoch": 1.21, "learning_rate": 4.755706746409359e-05, "loss": 0.2917, "step": 2021000 }, { "epoch": 1.21, "learning_rate": 4.755496749853302e-05, "loss": 0.292, "step": 2021500 }, { "epoch": 1.21, "learning_rate": 4.7552867532972456e-05, "loss": 0.2853, "step": 2022000 }, { "epoch": 1.21, "learning_rate": 4.7550771767343016e-05, "loss": 0.292, "step": 2022500 }, { "epoch": 1.21, "learning_rate": 4.754867180178245e-05, "loss": 0.2933, "step": 2023000 }, { "epoch": 1.21, "learning_rate": 4.754657183622188e-05, "loss": 0.2953, "step": 2023500 }, { "epoch": 1.21, "learning_rate": 4.754447187066132e-05, "loss": 0.2967, "step": 2024000 }, { "epoch": 1.21, "learning_rate": 4.754237190510076e-05, "loss": 0.2924, "step": 2024500 }, { "epoch": 1.21, "learning_rate": 4.754027193954019e-05, "loss": 0.2944, "step": 2025000 }, { "epoch": 1.21, "learning_rate": 4.7538171973979624e-05, "loss": 0.2894, "step": 2025500 }, { "epoch": 1.21, "learning_rate": 4.7536072008419064e-05, "loss": 0.2885, "step": 2026000 }, { "epoch": 1.21, "learning_rate": 4.75339720428585e-05, "loss": 0.2927, "step": 2026500 }, { "epoch": 1.22, "learning_rate": 4.753187207729793e-05, "loss": 0.2981, "step": 2027000 }, { "epoch": 1.22, "learning_rate": 4.752977211173737e-05, "loss": 0.285, "step": 2027500 }, { "epoch": 1.22, "learning_rate": 4.7527672146176805e-05, "loss": 0.2874, "step": 2028000 }, { "epoch": 1.22, "learning_rate": 4.752557218061623e-05, "loss": 0.2878, "step": 2028500 }, { "epoch": 1.22, "learning_rate": 4.752347221505567e-05, "loss": 0.2867, "step": 2029000 }, { "epoch": 1.22, "learning_rate": 4.7521372249495105e-05, "loss": 0.2823, "step": 2029500 }, { "epoch": 1.22, "learning_rate": 4.7519276483865666e-05, "loss": 0.2955, "step": 2030000 }, { "epoch": 1.22, "learning_rate": 4.75171765183051e-05, "loss": 0.2963, "step": 2030500 }, { "epoch": 1.22, "learning_rate": 4.751507655274453e-05, "loss": 0.2935, "step": 2031000 }, { "epoch": 1.22, "learning_rate": 4.751298078711509e-05, "loss": 0.2988, "step": 2031500 }, { "epoch": 1.22, "learning_rate": 4.7510880821554526e-05, "loss": 0.2833, "step": 2032000 }, { "epoch": 1.22, "learning_rate": 4.7508780855993966e-05, "loss": 0.2845, "step": 2032500 }, { "epoch": 1.22, "learning_rate": 4.750668089043339e-05, "loss": 0.2861, "step": 2033000 }, { "epoch": 1.22, "learning_rate": 4.750458092487283e-05, "loss": 0.283, "step": 2033500 }, { "epoch": 1.22, "learning_rate": 4.750248095931227e-05, "loss": 0.2891, "step": 2034000 }, { "epoch": 1.22, "learning_rate": 4.750038519368283e-05, "loss": 0.29, "step": 2034500 }, { "epoch": 1.22, "learning_rate": 4.749828522812226e-05, "loss": 0.2865, "step": 2035000 }, { "epoch": 1.22, "learning_rate": 4.749618526256169e-05, "loss": 0.2877, "step": 2035500 }, { "epoch": 1.22, "learning_rate": 4.749408529700113e-05, "loss": 0.289, "step": 2036000 }, { "epoch": 1.22, "learning_rate": 4.749198533144056e-05, "loss": 0.2925, "step": 2036500 }, { "epoch": 1.22, "learning_rate": 4.7489885365879995e-05, "loss": 0.2957, "step": 2037000 }, { "epoch": 1.22, "learning_rate": 4.7487785400319435e-05, "loss": 0.2851, "step": 2037500 }, { "epoch": 1.22, "learning_rate": 4.748568543475887e-05, "loss": 0.2887, "step": 2038000 }, { "epoch": 1.22, "learning_rate": 4.74835854691983e-05, "loss": 0.2883, "step": 2038500 }, { "epoch": 1.22, "learning_rate": 4.748149390349998e-05, "loss": 0.2902, "step": 2039000 }, { "epoch": 1.22, "learning_rate": 4.747939393793942e-05, "loss": 0.2929, "step": 2039500 }, { "epoch": 1.22, "learning_rate": 4.7477293972378856e-05, "loss": 0.2865, "step": 2040000 }, { "epoch": 1.22, "learning_rate": 4.747519400681828e-05, "loss": 0.2915, "step": 2040500 }, { "epoch": 1.22, "learning_rate": 4.747309404125772e-05, "loss": 0.2858, "step": 2041000 }, { "epoch": 1.22, "learning_rate": 4.7470994075697156e-05, "loss": 0.2921, "step": 2041500 }, { "epoch": 1.22, "learning_rate": 4.746889411013659e-05, "loss": 0.2885, "step": 2042000 }, { "epoch": 1.22, "learning_rate": 4.746679414457603e-05, "loss": 0.2908, "step": 2042500 }, { "epoch": 1.22, "learning_rate": 4.746469417901546e-05, "loss": 0.291, "step": 2043000 }, { "epoch": 1.23, "learning_rate": 4.746259841338602e-05, "loss": 0.2931, "step": 2043500 }, { "epoch": 1.23, "learning_rate": 4.746049844782545e-05, "loss": 0.2967, "step": 2044000 }, { "epoch": 1.23, "learning_rate": 4.745839848226489e-05, "loss": 0.2864, "step": 2044500 }, { "epoch": 1.23, "learning_rate": 4.7456298516704324e-05, "loss": 0.2896, "step": 2045000 }, { "epoch": 1.23, "learning_rate": 4.745420275107488e-05, "loss": 0.2899, "step": 2045500 }, { "epoch": 1.23, "learning_rate": 4.745210278551432e-05, "loss": 0.2894, "step": 2046000 }, { "epoch": 1.23, "learning_rate": 4.745000281995375e-05, "loss": 0.2905, "step": 2046500 }, { "epoch": 1.23, "learning_rate": 4.7447902854393185e-05, "loss": 0.2849, "step": 2047000 }, { "epoch": 1.23, "learning_rate": 4.7445802888832625e-05, "loss": 0.2878, "step": 2047500 }, { "epoch": 1.23, "learning_rate": 4.744370292327206e-05, "loss": 0.2885, "step": 2048000 }, { "epoch": 1.23, "learning_rate": 4.744160295771149e-05, "loss": 0.2827, "step": 2048500 }, { "epoch": 1.23, "learning_rate": 4.7439507192082046e-05, "loss": 0.2891, "step": 2049000 }, { "epoch": 1.23, "learning_rate": 4.7437407226521486e-05, "loss": 0.2826, "step": 2049500 }, { "epoch": 1.23, "learning_rate": 4.743531146089204e-05, "loss": 0.287, "step": 2050000 }, { "epoch": 1.23, "learning_rate": 4.743321149533147e-05, "loss": 0.296, "step": 2050500 }, { "epoch": 1.23, "learning_rate": 4.7431111529770906e-05, "loss": 0.291, "step": 2051000 }, { "epoch": 1.23, "learning_rate": 4.7429011564210346e-05, "loss": 0.2902, "step": 2051500 }, { "epoch": 1.23, "learning_rate": 4.742691159864978e-05, "loss": 0.2896, "step": 2052000 }, { "epoch": 1.23, "learning_rate": 4.742481163308922e-05, "loss": 0.2848, "step": 2052500 }, { "epoch": 1.23, "learning_rate": 4.7422711667528654e-05, "loss": 0.2904, "step": 2053000 }, { "epoch": 1.23, "learning_rate": 4.742061170196809e-05, "loss": 0.2874, "step": 2053500 }, { "epoch": 1.23, "learning_rate": 4.741851593633864e-05, "loss": 0.2971, "step": 2054000 }, { "epoch": 1.23, "learning_rate": 4.7416420170709194e-05, "loss": 0.2912, "step": 2054500 }, { "epoch": 1.23, "learning_rate": 4.7414320205148634e-05, "loss": 0.2942, "step": 2055000 }, { "epoch": 1.23, "learning_rate": 4.741222023958807e-05, "loss": 0.2954, "step": 2055500 }, { "epoch": 1.23, "learning_rate": 4.74101202740275e-05, "loss": 0.2937, "step": 2056000 }, { "epoch": 1.23, "learning_rate": 4.740802030846694e-05, "loss": 0.2837, "step": 2056500 }, { "epoch": 1.23, "learning_rate": 4.7405920342906375e-05, "loss": 0.2897, "step": 2057000 }, { "epoch": 1.23, "learning_rate": 4.740382037734581e-05, "loss": 0.2859, "step": 2057500 }, { "epoch": 1.23, "learning_rate": 4.740172461171636e-05, "loss": 0.29, "step": 2058000 }, { "epoch": 1.23, "learning_rate": 4.73996246461558e-05, "loss": 0.2898, "step": 2058500 }, { "epoch": 1.23, "learning_rate": 4.739752888052636e-05, "loss": 0.2893, "step": 2059000 }, { "epoch": 1.23, "learning_rate": 4.739542891496579e-05, "loss": 0.2889, "step": 2059500 }, { "epoch": 1.24, "learning_rate": 4.739332894940523e-05, "loss": 0.2964, "step": 2060000 }, { "epoch": 1.24, "learning_rate": 4.739122898384466e-05, "loss": 0.2908, "step": 2060500 }, { "epoch": 1.24, "learning_rate": 4.7389129018284097e-05, "loss": 0.2828, "step": 2061000 }, { "epoch": 1.24, "learning_rate": 4.738702905272354e-05, "loss": 0.286, "step": 2061500 }, { "epoch": 1.24, "learning_rate": 4.738492908716297e-05, "loss": 0.286, "step": 2062000 }, { "epoch": 1.24, "learning_rate": 4.7382829121602404e-05, "loss": 0.2839, "step": 2062500 }, { "epoch": 1.24, "learning_rate": 4.738073335597296e-05, "loss": 0.2901, "step": 2063000 }, { "epoch": 1.24, "learning_rate": 4.73786333904124e-05, "loss": 0.2936, "step": 2063500 }, { "epoch": 1.24, "learning_rate": 4.737653342485183e-05, "loss": 0.2871, "step": 2064000 }, { "epoch": 1.24, "learning_rate": 4.7374433459291264e-05, "loss": 0.291, "step": 2064500 }, { "epoch": 1.24, "learning_rate": 4.7372333493730705e-05, "loss": 0.2962, "step": 2065000 }, { "epoch": 1.24, "learning_rate": 4.737023352817014e-05, "loss": 0.2929, "step": 2065500 }, { "epoch": 1.24, "learning_rate": 4.736813356260957e-05, "loss": 0.2923, "step": 2066000 }, { "epoch": 1.24, "learning_rate": 4.736603359704901e-05, "loss": 0.2874, "step": 2066500 }, { "epoch": 1.24, "learning_rate": 4.736393363148844e-05, "loss": 0.2895, "step": 2067000 }, { "epoch": 1.24, "learning_rate": 4.736183366592788e-05, "loss": 0.2912, "step": 2067500 }, { "epoch": 1.24, "learning_rate": 4.735973370036731e-05, "loss": 0.2893, "step": 2068000 }, { "epoch": 1.24, "learning_rate": 4.7357633734806746e-05, "loss": 0.2886, "step": 2068500 }, { "epoch": 1.24, "learning_rate": 4.7355537969177306e-05, "loss": 0.2868, "step": 2069000 }, { "epoch": 1.24, "learning_rate": 4.735343800361674e-05, "loss": 0.2899, "step": 2069500 }, { "epoch": 1.24, "learning_rate": 4.735133803805617e-05, "loss": 0.2926, "step": 2070000 }, { "epoch": 1.24, "learning_rate": 4.7349238072495606e-05, "loss": 0.2847, "step": 2070500 }, { "epoch": 1.24, "learning_rate": 4.734713810693505e-05, "loss": 0.2948, "step": 2071000 }, { "epoch": 1.24, "learning_rate": 4.734504234130561e-05, "loss": 0.2889, "step": 2071500 }, { "epoch": 1.24, "learning_rate": 4.7342942375745034e-05, "loss": 0.2858, "step": 2072000 }, { "epoch": 1.24, "learning_rate": 4.734084241018447e-05, "loss": 0.2901, "step": 2072500 }, { "epoch": 1.24, "learning_rate": 4.733874244462391e-05, "loss": 0.2873, "step": 2073000 }, { "epoch": 1.24, "learning_rate": 4.733664247906334e-05, "loss": 0.2866, "step": 2073500 }, { "epoch": 1.24, "learning_rate": 4.73345467134339e-05, "loss": 0.2844, "step": 2074000 }, { "epoch": 1.24, "learning_rate": 4.7332446747873335e-05, "loss": 0.2854, "step": 2074500 }, { "epoch": 1.24, "learning_rate": 4.7330350982243895e-05, "loss": 0.2919, "step": 2075000 }, { "epoch": 1.24, "learning_rate": 4.732825101668333e-05, "loss": 0.2856, "step": 2075500 }, { "epoch": 1.24, "learning_rate": 4.732615105112276e-05, "loss": 0.2967, "step": 2076000 }, { "epoch": 1.24, "learning_rate": 4.73240510855622e-05, "loss": 0.2859, "step": 2076500 }, { "epoch": 1.25, "learning_rate": 4.732195112000163e-05, "loss": 0.287, "step": 2077000 }, { "epoch": 1.25, "learning_rate": 4.731985115444106e-05, "loss": 0.2951, "step": 2077500 }, { "epoch": 1.25, "learning_rate": 4.73177511888805e-05, "loss": 0.2888, "step": 2078000 }, { "epoch": 1.25, "learning_rate": 4.7315651223319936e-05, "loss": 0.29, "step": 2078500 }, { "epoch": 1.25, "learning_rate": 4.731355125775937e-05, "loss": 0.2904, "step": 2079000 }, { "epoch": 1.25, "learning_rate": 4.731145549212992e-05, "loss": 0.293, "step": 2079500 }, { "epoch": 1.25, "learning_rate": 4.730935552656936e-05, "loss": 0.2866, "step": 2080000 }, { "epoch": 1.25, "learning_rate": 4.73072555610088e-05, "loss": 0.2847, "step": 2080500 }, { "epoch": 1.25, "learning_rate": 4.730515979537936e-05, "loss": 0.2854, "step": 2081000 }, { "epoch": 1.25, "learning_rate": 4.730305982981879e-05, "loss": 0.2918, "step": 2081500 }, { "epoch": 1.25, "learning_rate": 4.7300959864258224e-05, "loss": 0.2852, "step": 2082000 }, { "epoch": 1.25, "learning_rate": 4.729885989869766e-05, "loss": 0.2858, "step": 2082500 }, { "epoch": 1.25, "learning_rate": 4.72967599331371e-05, "loss": 0.2881, "step": 2083000 }, { "epoch": 1.25, "learning_rate": 4.729465996757653e-05, "loss": 0.2913, "step": 2083500 }, { "epoch": 1.25, "learning_rate": 4.7292560002015965e-05, "loss": 0.2846, "step": 2084000 }, { "epoch": 1.25, "learning_rate": 4.729046423638652e-05, "loss": 0.2848, "step": 2084500 }, { "epoch": 1.25, "learning_rate": 4.728836427082596e-05, "loss": 0.2851, "step": 2085000 }, { "epoch": 1.25, "learning_rate": 4.728626430526539e-05, "loss": 0.2829, "step": 2085500 }, { "epoch": 1.25, "learning_rate": 4.7284164339704825e-05, "loss": 0.2904, "step": 2086000 }, { "epoch": 1.25, "learning_rate": 4.7282064374144266e-05, "loss": 0.2836, "step": 2086500 }, { "epoch": 1.25, "learning_rate": 4.72799644085837e-05, "loss": 0.2887, "step": 2087000 }, { "epoch": 1.25, "learning_rate": 4.727786444302313e-05, "loss": 0.2851, "step": 2087500 }, { "epoch": 1.25, "learning_rate": 4.7275768677393686e-05, "loss": 0.285, "step": 2088000 }, { "epoch": 1.25, "learning_rate": 4.7273668711833126e-05, "loss": 0.2879, "step": 2088500 }, { "epoch": 1.25, "learning_rate": 4.727156874627256e-05, "loss": 0.2941, "step": 2089000 }, { "epoch": 1.25, "learning_rate": 4.726947298064311e-05, "loss": 0.2894, "step": 2089500 }, { "epoch": 1.25, "learning_rate": 4.7267373015082554e-05, "loss": 0.289, "step": 2090000 }, { "epoch": 1.25, "learning_rate": 4.726527304952199e-05, "loss": 0.2852, "step": 2090500 }, { "epoch": 1.25, "learning_rate": 4.726317308396142e-05, "loss": 0.287, "step": 2091000 }, { "epoch": 1.25, "learning_rate": 4.726107311840086e-05, "loss": 0.2898, "step": 2091500 }, { "epoch": 1.25, "learning_rate": 4.7258973152840294e-05, "loss": 0.2872, "step": 2092000 }, { "epoch": 1.25, "learning_rate": 4.725687318727973e-05, "loss": 0.2911, "step": 2092500 }, { "epoch": 1.25, "learning_rate": 4.725477322171917e-05, "loss": 0.2889, "step": 2093000 }, { "epoch": 1.26, "learning_rate": 4.72526732561586e-05, "loss": 0.2893, "step": 2093500 }, { "epoch": 1.26, "learning_rate": 4.725057329059803e-05, "loss": 0.2866, "step": 2094000 }, { "epoch": 1.26, "learning_rate": 4.724847332503747e-05, "loss": 0.284, "step": 2094500 }, { "epoch": 1.26, "learning_rate": 4.72463733594769e-05, "loss": 0.2836, "step": 2095000 }, { "epoch": 1.26, "learning_rate": 4.724427759384746e-05, "loss": 0.2921, "step": 2095500 }, { "epoch": 1.26, "learning_rate": 4.7242177628286895e-05, "loss": 0.2875, "step": 2096000 }, { "epoch": 1.26, "learning_rate": 4.724007766272633e-05, "loss": 0.289, "step": 2096500 }, { "epoch": 1.26, "learning_rate": 4.723797769716576e-05, "loss": 0.2909, "step": 2097000 }, { "epoch": 1.26, "learning_rate": 4.7235877731605196e-05, "loss": 0.2933, "step": 2097500 }, { "epoch": 1.26, "learning_rate": 4.7233777766044636e-05, "loss": 0.2882, "step": 2098000 }, { "epoch": 1.26, "learning_rate": 4.723167780048407e-05, "loss": 0.2866, "step": 2098500 }, { "epoch": 1.26, "learning_rate": 4.722957783492351e-05, "loss": 0.2836, "step": 2099000 }, { "epoch": 1.26, "learning_rate": 4.722747786936294e-05, "loss": 0.2872, "step": 2099500 }, { "epoch": 1.26, "learning_rate": 4.72253821037335e-05, "loss": 0.2839, "step": 2100000 }, { "epoch": 1.26, "eval_loss": 0.262479692697525, "eval_runtime": 1461.3082, "eval_samples_per_second": 360.444, "eval_steps_per_second": 60.074, "step": 2100000 }, { "epoch": 1.26, "learning_rate": 4.722328213817293e-05, "loss": 0.2908, "step": 2100500 }, { "epoch": 1.26, "learning_rate": 4.722118217261237e-05, "loss": 0.2864, "step": 2101000 }, { "epoch": 1.26, "learning_rate": 4.7219082207051804e-05, "loss": 0.2912, "step": 2101500 }, { "epoch": 1.26, "learning_rate": 4.721698224149124e-05, "loss": 0.2847, "step": 2102000 }, { "epoch": 1.26, "learning_rate": 4.721488647586179e-05, "loss": 0.2832, "step": 2102500 }, { "epoch": 1.26, "learning_rate": 4.721278651030123e-05, "loss": 0.2911, "step": 2103000 }, { "epoch": 1.26, "learning_rate": 4.7210686544740665e-05, "loss": 0.2834, "step": 2103500 }, { "epoch": 1.26, "learning_rate": 4.72085865791801e-05, "loss": 0.2924, "step": 2104000 }, { "epoch": 1.26, "learning_rate": 4.720648661361954e-05, "loss": 0.285, "step": 2104500 }, { "epoch": 1.26, "learning_rate": 4.720439084799009e-05, "loss": 0.2949, "step": 2105000 }, { "epoch": 1.26, "learning_rate": 4.720229508236065e-05, "loss": 0.2913, "step": 2105500 }, { "epoch": 1.26, "learning_rate": 4.7200199316731206e-05, "loss": 0.2911, "step": 2106000 }, { "epoch": 1.26, "learning_rate": 4.719809935117064e-05, "loss": 0.2898, "step": 2106500 }, { "epoch": 1.26, "learning_rate": 4.719599938561008e-05, "loss": 0.2804, "step": 2107000 }, { "epoch": 1.26, "learning_rate": 4.719389942004951e-05, "loss": 0.2855, "step": 2107500 }, { "epoch": 1.26, "learning_rate": 4.719180365442007e-05, "loss": 0.2901, "step": 2108000 }, { "epoch": 1.26, "learning_rate": 4.71897036888595e-05, "loss": 0.2806, "step": 2108500 }, { "epoch": 1.26, "learning_rate": 4.718760372329894e-05, "loss": 0.2884, "step": 2109000 }, { "epoch": 1.26, "learning_rate": 4.7185503757738374e-05, "loss": 0.2821, "step": 2109500 }, { "epoch": 1.27, "learning_rate": 4.718340379217781e-05, "loss": 0.2918, "step": 2110000 }, { "epoch": 1.27, "learning_rate": 4.718130382661725e-05, "loss": 0.2849, "step": 2110500 }, { "epoch": 1.27, "learning_rate": 4.7179203861056674e-05, "loss": 0.2892, "step": 2111000 }, { "epoch": 1.27, "learning_rate": 4.7177108095427235e-05, "loss": 0.2847, "step": 2111500 }, { "epoch": 1.27, "learning_rate": 4.7175008129866675e-05, "loss": 0.2837, "step": 2112000 }, { "epoch": 1.27, "learning_rate": 4.717290816430611e-05, "loss": 0.2866, "step": 2112500 }, { "epoch": 1.27, "learning_rate": 4.717080819874554e-05, "loss": 0.2896, "step": 2113000 }, { "epoch": 1.27, "learning_rate": 4.7168708233184975e-05, "loss": 0.2888, "step": 2113500 }, { "epoch": 1.27, "learning_rate": 4.716660826762441e-05, "loss": 0.2881, "step": 2114000 }, { "epoch": 1.27, "learning_rate": 4.716450830206384e-05, "loss": 0.2846, "step": 2114500 }, { "epoch": 1.27, "learning_rate": 4.716240833650328e-05, "loss": 0.2886, "step": 2115000 }, { "epoch": 1.27, "learning_rate": 4.7160312570873836e-05, "loss": 0.2848, "step": 2115500 }, { "epoch": 1.27, "learning_rate": 4.715821260531327e-05, "loss": 0.2855, "step": 2116000 }, { "epoch": 1.27, "learning_rate": 4.71561126397527e-05, "loss": 0.2971, "step": 2116500 }, { "epoch": 1.27, "learning_rate": 4.715401267419214e-05, "loss": 0.294, "step": 2117000 }, { "epoch": 1.27, "learning_rate": 4.7151912708631576e-05, "loss": 0.288, "step": 2117500 }, { "epoch": 1.27, "learning_rate": 4.714981274307101e-05, "loss": 0.2898, "step": 2118000 }, { "epoch": 1.27, "learning_rate": 4.714771697744157e-05, "loss": 0.2862, "step": 2118500 }, { "epoch": 1.27, "learning_rate": 4.7145617011881004e-05, "loss": 0.2874, "step": 2119000 }, { "epoch": 1.27, "learning_rate": 4.714351704632044e-05, "loss": 0.2944, "step": 2119500 }, { "epoch": 1.27, "learning_rate": 4.714141708075988e-05, "loss": 0.2836, "step": 2120000 }, { "epoch": 1.27, "learning_rate": 4.713931711519931e-05, "loss": 0.2885, "step": 2120500 }, { "epoch": 1.27, "learning_rate": 4.7137217149638744e-05, "loss": 0.2917, "step": 2121000 }, { "epoch": 1.27, "learning_rate": 4.7135117184078185e-05, "loss": 0.289, "step": 2121500 }, { "epoch": 1.27, "learning_rate": 4.713301721851762e-05, "loss": 0.2902, "step": 2122000 }, { "epoch": 1.27, "learning_rate": 4.713092145288817e-05, "loss": 0.2909, "step": 2122500 }, { "epoch": 1.27, "learning_rate": 4.7128821487327605e-05, "loss": 0.2906, "step": 2123000 }, { "epoch": 1.27, "learning_rate": 4.7126721521767045e-05, "loss": 0.2889, "step": 2123500 }, { "epoch": 1.27, "learning_rate": 4.71246257561376e-05, "loss": 0.2861, "step": 2124000 }, { "epoch": 1.27, "learning_rate": 4.712252579057703e-05, "loss": 0.2828, "step": 2124500 }, { "epoch": 1.27, "learning_rate": 4.7120425825016466e-05, "loss": 0.2916, "step": 2125000 }, { "epoch": 1.27, "learning_rate": 4.7118325859455906e-05, "loss": 0.2867, "step": 2125500 }, { "epoch": 1.27, "learning_rate": 4.711622589389534e-05, "loss": 0.2899, "step": 2126000 }, { "epoch": 1.27, "learning_rate": 4.711412592833477e-05, "loss": 0.2875, "step": 2126500 }, { "epoch": 1.28, "learning_rate": 4.711202596277421e-05, "loss": 0.2869, "step": 2127000 }, { "epoch": 1.28, "learning_rate": 4.710992599721365e-05, "loss": 0.2861, "step": 2127500 }, { "epoch": 1.28, "learning_rate": 4.710782603165308e-05, "loss": 0.2868, "step": 2128000 }, { "epoch": 1.28, "learning_rate": 4.710573026602364e-05, "loss": 0.2866, "step": 2128500 }, { "epoch": 1.28, "learning_rate": 4.7103630300463074e-05, "loss": 0.289, "step": 2129000 }, { "epoch": 1.28, "learning_rate": 4.710153033490251e-05, "loss": 0.2875, "step": 2129500 }, { "epoch": 1.28, "learning_rate": 4.709943456927306e-05, "loss": 0.2911, "step": 2130000 }, { "epoch": 1.28, "learning_rate": 4.70973346037125e-05, "loss": 0.2801, "step": 2130500 }, { "epoch": 1.28, "learning_rate": 4.7095234638151935e-05, "loss": 0.2883, "step": 2131000 }, { "epoch": 1.28, "learning_rate": 4.709313467259137e-05, "loss": 0.2891, "step": 2131500 }, { "epoch": 1.28, "learning_rate": 4.709103470703081e-05, "loss": 0.2955, "step": 2132000 }, { "epoch": 1.28, "learning_rate": 4.708893474147024e-05, "loss": 0.2832, "step": 2132500 }, { "epoch": 1.28, "learning_rate": 4.708683477590967e-05, "loss": 0.2883, "step": 2133000 }, { "epoch": 1.28, "learning_rate": 4.708473481034911e-05, "loss": 0.2943, "step": 2133500 }, { "epoch": 1.28, "learning_rate": 4.708263904471967e-05, "loss": 0.2813, "step": 2134000 }, { "epoch": 1.28, "learning_rate": 4.70805390791591e-05, "loss": 0.2854, "step": 2134500 }, { "epoch": 1.28, "learning_rate": 4.7078439113598536e-05, "loss": 0.2904, "step": 2135000 }, { "epoch": 1.28, "learning_rate": 4.7076343347969096e-05, "loss": 0.2897, "step": 2135500 }, { "epoch": 1.28, "learning_rate": 4.707424338240853e-05, "loss": 0.2773, "step": 2136000 }, { "epoch": 1.28, "learning_rate": 4.707214341684796e-05, "loss": 0.2891, "step": 2136500 }, { "epoch": 1.28, "learning_rate": 4.7070043451287403e-05, "loss": 0.2836, "step": 2137000 }, { "epoch": 1.28, "learning_rate": 4.706794348572684e-05, "loss": 0.2904, "step": 2137500 }, { "epoch": 1.28, "learning_rate": 4.706584772009739e-05, "loss": 0.2911, "step": 2138000 }, { "epoch": 1.28, "learning_rate": 4.7063751954467944e-05, "loss": 0.2893, "step": 2138500 }, { "epoch": 1.28, "learning_rate": 4.706165198890738e-05, "loss": 0.2797, "step": 2139000 }, { "epoch": 1.28, "learning_rate": 4.705955202334682e-05, "loss": 0.2877, "step": 2139500 }, { "epoch": 1.28, "learning_rate": 4.705745205778625e-05, "loss": 0.2791, "step": 2140000 }, { "epoch": 1.28, "learning_rate": 4.7055352092225685e-05, "loss": 0.287, "step": 2140500 }, { "epoch": 1.28, "learning_rate": 4.7053256326596245e-05, "loss": 0.2973, "step": 2141000 }, { "epoch": 1.28, "learning_rate": 4.705115636103568e-05, "loss": 0.2873, "step": 2141500 }, { "epoch": 1.28, "learning_rate": 4.704905639547511e-05, "loss": 0.2845, "step": 2142000 }, { "epoch": 1.28, "learning_rate": 4.704695642991455e-05, "loss": 0.2879, "step": 2142500 }, { "epoch": 1.28, "learning_rate": 4.7044856464353986e-05, "loss": 0.2862, "step": 2143000 }, { "epoch": 1.29, "learning_rate": 4.704275649879342e-05, "loss": 0.2849, "step": 2143500 }, { "epoch": 1.29, "learning_rate": 4.704065653323286e-05, "loss": 0.2904, "step": 2144000 }, { "epoch": 1.29, "learning_rate": 4.703855656767229e-05, "loss": 0.2819, "step": 2144500 }, { "epoch": 1.29, "learning_rate": 4.7036465001973967e-05, "loss": 0.2878, "step": 2145000 }, { "epoch": 1.29, "learning_rate": 4.70343650364134e-05, "loss": 0.289, "step": 2145500 }, { "epoch": 1.29, "learning_rate": 4.7032265070852833e-05, "loss": 0.2783, "step": 2146000 }, { "epoch": 1.29, "learning_rate": 4.7030165105292274e-05, "loss": 0.2877, "step": 2146500 }, { "epoch": 1.29, "learning_rate": 4.702806513973171e-05, "loss": 0.2893, "step": 2147000 }, { "epoch": 1.29, "learning_rate": 4.702596517417114e-05, "loss": 0.287, "step": 2147500 }, { "epoch": 1.29, "learning_rate": 4.702386520861058e-05, "loss": 0.2878, "step": 2148000 }, { "epoch": 1.29, "learning_rate": 4.7021765243050014e-05, "loss": 0.2893, "step": 2148500 }, { "epoch": 1.29, "learning_rate": 4.701966527748945e-05, "loss": 0.2839, "step": 2149000 }, { "epoch": 1.29, "learning_rate": 4.701756531192889e-05, "loss": 0.2882, "step": 2149500 }, { "epoch": 1.29, "learning_rate": 4.7015465346368315e-05, "loss": 0.2944, "step": 2150000 }, { "epoch": 1.29, "learning_rate": 4.7013365380807755e-05, "loss": 0.2943, "step": 2150500 }, { "epoch": 1.29, "learning_rate": 4.701126541524719e-05, "loss": 0.2859, "step": 2151000 }, { "epoch": 1.29, "learning_rate": 4.700916964961775e-05, "loss": 0.2849, "step": 2151500 }, { "epoch": 1.29, "learning_rate": 4.7007069684057175e-05, "loss": 0.2791, "step": 2152000 }, { "epoch": 1.29, "learning_rate": 4.7004969718496616e-05, "loss": 0.2904, "step": 2152500 }, { "epoch": 1.29, "learning_rate": 4.700286975293605e-05, "loss": 0.2898, "step": 2153000 }, { "epoch": 1.29, "learning_rate": 4.700077398730661e-05, "loss": 0.288, "step": 2153500 }, { "epoch": 1.29, "learning_rate": 4.699867402174604e-05, "loss": 0.2838, "step": 2154000 }, { "epoch": 1.29, "learning_rate": 4.6996574056185476e-05, "loss": 0.292, "step": 2154500 }, { "epoch": 1.29, "learning_rate": 4.699447409062491e-05, "loss": 0.2885, "step": 2155000 }, { "epoch": 1.29, "learning_rate": 4.699237412506434e-05, "loss": 0.291, "step": 2155500 }, { "epoch": 1.29, "learning_rate": 4.6990274159503784e-05, "loss": 0.2857, "step": 2156000 }, { "epoch": 1.29, "learning_rate": 4.698817419394322e-05, "loss": 0.2889, "step": 2156500 }, { "epoch": 1.29, "learning_rate": 4.698607422838265e-05, "loss": 0.2889, "step": 2157000 }, { "epoch": 1.29, "learning_rate": 4.698397426282209e-05, "loss": 0.2904, "step": 2157500 }, { "epoch": 1.29, "learning_rate": 4.6981874297261524e-05, "loss": 0.2878, "step": 2158000 }, { "epoch": 1.29, "learning_rate": 4.697977853163208e-05, "loss": 0.2938, "step": 2158500 }, { "epoch": 1.29, "learning_rate": 4.697768276600264e-05, "loss": 0.2882, "step": 2159000 }, { "epoch": 1.29, "learning_rate": 4.697558280044207e-05, "loss": 0.2907, "step": 2159500 }, { "epoch": 1.3, "learning_rate": 4.697348703481263e-05, "loss": 0.2798, "step": 2160000 }, { "epoch": 1.3, "learning_rate": 4.6971387069252065e-05, "loss": 0.29, "step": 2160500 }, { "epoch": 1.3, "learning_rate": 4.69692871036915e-05, "loss": 0.2849, "step": 2161000 }, { "epoch": 1.3, "learning_rate": 4.696718713813093e-05, "loss": 0.2869, "step": 2161500 }, { "epoch": 1.3, "learning_rate": 4.6965087172570366e-05, "loss": 0.2899, "step": 2162000 }, { "epoch": 1.3, "learning_rate": 4.69629872070098e-05, "loss": 0.2891, "step": 2162500 }, { "epoch": 1.3, "learning_rate": 4.696088724144924e-05, "loss": 0.2866, "step": 2163000 }, { "epoch": 1.3, "learning_rate": 4.695878727588867e-05, "loss": 0.2857, "step": 2163500 }, { "epoch": 1.3, "learning_rate": 4.6956687310328106e-05, "loss": 0.2832, "step": 2164000 }, { "epoch": 1.3, "learning_rate": 4.6954587344767547e-05, "loss": 0.2829, "step": 2164500 }, { "epoch": 1.3, "learning_rate": 4.695248737920698e-05, "loss": 0.2863, "step": 2165000 }, { "epoch": 1.3, "learning_rate": 4.695038741364642e-05, "loss": 0.2805, "step": 2165500 }, { "epoch": 1.3, "learning_rate": 4.6948287448085854e-05, "loss": 0.2908, "step": 2166000 }, { "epoch": 1.3, "learning_rate": 4.694618748252529e-05, "loss": 0.2913, "step": 2166500 }, { "epoch": 1.3, "learning_rate": 4.694408751696472e-05, "loss": 0.2914, "step": 2167000 }, { "epoch": 1.3, "learning_rate": 4.6941987551404154e-05, "loss": 0.2919, "step": 2167500 }, { "epoch": 1.3, "learning_rate": 4.6939891785774714e-05, "loss": 0.2861, "step": 2168000 }, { "epoch": 1.3, "learning_rate": 4.693779182021415e-05, "loss": 0.2881, "step": 2168500 }, { "epoch": 1.3, "learning_rate": 4.693569185465359e-05, "loss": 0.2876, "step": 2169000 }, { "epoch": 1.3, "learning_rate": 4.6933591889093015e-05, "loss": 0.2931, "step": 2169500 }, { "epoch": 1.3, "learning_rate": 4.6931496123463575e-05, "loss": 0.2832, "step": 2170000 }, { "epoch": 1.3, "learning_rate": 4.692939615790301e-05, "loss": 0.2791, "step": 2170500 }, { "epoch": 1.3, "learning_rate": 4.692730039227357e-05, "loss": 0.2899, "step": 2171000 }, { "epoch": 1.3, "learning_rate": 4.6925200426713e-05, "loss": 0.2843, "step": 2171500 }, { "epoch": 1.3, "learning_rate": 4.6923100461152436e-05, "loss": 0.2871, "step": 2172000 }, { "epoch": 1.3, "learning_rate": 4.6921000495591876e-05, "loss": 0.2867, "step": 2172500 }, { "epoch": 1.3, "learning_rate": 4.691890053003131e-05, "loss": 0.2929, "step": 2173000 }, { "epoch": 1.3, "learning_rate": 4.691680476440186e-05, "loss": 0.2908, "step": 2173500 }, { "epoch": 1.3, "learning_rate": 4.6914704798841297e-05, "loss": 0.2841, "step": 2174000 }, { "epoch": 1.3, "learning_rate": 4.691260483328074e-05, "loss": 0.285, "step": 2174500 }, { "epoch": 1.3, "learning_rate": 4.691050486772017e-05, "loss": 0.2882, "step": 2175000 }, { "epoch": 1.3, "learning_rate": 4.6908404902159604e-05, "loss": 0.2805, "step": 2175500 }, { "epoch": 1.3, "learning_rate": 4.690630913653016e-05, "loss": 0.2871, "step": 2176000 }, { "epoch": 1.3, "learning_rate": 4.69042091709696e-05, "loss": 0.2842, "step": 2176500 }, { "epoch": 1.31, "learning_rate": 4.690210920540903e-05, "loss": 0.2854, "step": 2177000 }, { "epoch": 1.31, "learning_rate": 4.6900009239848464e-05, "loss": 0.285, "step": 2177500 }, { "epoch": 1.31, "learning_rate": 4.6897909274287905e-05, "loss": 0.2884, "step": 2178000 }, { "epoch": 1.31, "learning_rate": 4.689580930872734e-05, "loss": 0.2939, "step": 2178500 }, { "epoch": 1.31, "learning_rate": 4.689370934316677e-05, "loss": 0.2871, "step": 2179000 }, { "epoch": 1.31, "learning_rate": 4.6891609377606205e-05, "loss": 0.2868, "step": 2179500 }, { "epoch": 1.31, "learning_rate": 4.688950941204564e-05, "loss": 0.2894, "step": 2180000 }, { "epoch": 1.31, "learning_rate": 4.68874136464162e-05, "loss": 0.2856, "step": 2180500 }, { "epoch": 1.31, "learning_rate": 4.688531368085564e-05, "loss": 0.2914, "step": 2181000 }, { "epoch": 1.31, "learning_rate": 4.6883213715295066e-05, "loss": 0.2878, "step": 2181500 }, { "epoch": 1.31, "learning_rate": 4.68811137497345e-05, "loss": 0.2818, "step": 2182000 }, { "epoch": 1.31, "learning_rate": 4.687901798410506e-05, "loss": 0.2889, "step": 2182500 }, { "epoch": 1.31, "learning_rate": 4.68769180185445e-05, "loss": 0.28, "step": 2183000 }, { "epoch": 1.31, "learning_rate": 4.687481805298393e-05, "loss": 0.284, "step": 2183500 }, { "epoch": 1.31, "learning_rate": 4.687271808742336e-05, "loss": 0.2839, "step": 2184000 }, { "epoch": 1.31, "learning_rate": 4.68706181218628e-05, "loss": 0.2885, "step": 2184500 }, { "epoch": 1.31, "learning_rate": 4.686852235623336e-05, "loss": 0.284, "step": 2185000 }, { "epoch": 1.31, "learning_rate": 4.6866422390672794e-05, "loss": 0.2915, "step": 2185500 }, { "epoch": 1.31, "learning_rate": 4.686432662504335e-05, "loss": 0.2922, "step": 2186000 }, { "epoch": 1.31, "learning_rate": 4.686222665948279e-05, "loss": 0.2864, "step": 2186500 }, { "epoch": 1.31, "learning_rate": 4.686012669392222e-05, "loss": 0.2813, "step": 2187000 }, { "epoch": 1.31, "learning_rate": 4.6858026728361655e-05, "loss": 0.2876, "step": 2187500 }, { "epoch": 1.31, "learning_rate": 4.6855926762801095e-05, "loss": 0.289, "step": 2188000 }, { "epoch": 1.31, "learning_rate": 4.685382679724052e-05, "loss": 0.2907, "step": 2188500 }, { "epoch": 1.31, "learning_rate": 4.6851726831679955e-05, "loss": 0.2884, "step": 2189000 }, { "epoch": 1.31, "learning_rate": 4.6849626866119395e-05, "loss": 0.2888, "step": 2189500 }, { "epoch": 1.31, "learning_rate": 4.684752690055883e-05, "loss": 0.2793, "step": 2190000 }, { "epoch": 1.31, "learning_rate": 4.684542693499826e-05, "loss": 0.2879, "step": 2190500 }, { "epoch": 1.31, "learning_rate": 4.6843331169368816e-05, "loss": 0.2867, "step": 2191000 }, { "epoch": 1.31, "learning_rate": 4.6841231203808256e-05, "loss": 0.2901, "step": 2191500 }, { "epoch": 1.31, "learning_rate": 4.6839135438178816e-05, "loss": 0.2861, "step": 2192000 }, { "epoch": 1.31, "learning_rate": 4.683703547261825e-05, "loss": 0.2883, "step": 2192500 }, { "epoch": 1.31, "learning_rate": 4.6834935507057683e-05, "loss": 0.2894, "step": 2193000 }, { "epoch": 1.32, "learning_rate": 4.683283554149712e-05, "loss": 0.283, "step": 2193500 }, { "epoch": 1.32, "learning_rate": 4.683073557593655e-05, "loss": 0.2807, "step": 2194000 }, { "epoch": 1.32, "learning_rate": 4.682863561037599e-05, "loss": 0.2842, "step": 2194500 }, { "epoch": 1.32, "learning_rate": 4.6826535644815424e-05, "loss": 0.2891, "step": 2195000 }, { "epoch": 1.32, "learning_rate": 4.6824439879185984e-05, "loss": 0.2808, "step": 2195500 }, { "epoch": 1.32, "learning_rate": 4.682233991362541e-05, "loss": 0.2885, "step": 2196000 }, { "epoch": 1.32, "learning_rate": 4.682023994806485e-05, "loss": 0.2823, "step": 2196500 }, { "epoch": 1.32, "learning_rate": 4.6818139982504285e-05, "loss": 0.2843, "step": 2197000 }, { "epoch": 1.32, "learning_rate": 4.681604001694372e-05, "loss": 0.2857, "step": 2197500 }, { "epoch": 1.32, "learning_rate": 4.681394005138316e-05, "loss": 0.2894, "step": 2198000 }, { "epoch": 1.32, "learning_rate": 4.681184008582259e-05, "loss": 0.2869, "step": 2198500 }, { "epoch": 1.32, "learning_rate": 4.6809740120262025e-05, "loss": 0.2891, "step": 2199000 }, { "epoch": 1.32, "learning_rate": 4.680764435463258e-05, "loss": 0.2893, "step": 2199500 }, { "epoch": 1.32, "learning_rate": 4.680554438907202e-05, "loss": 0.2824, "step": 2200000 }, { "epoch": 1.32, "eval_loss": 0.2632770538330078, "eval_runtime": 1460.696, "eval_samples_per_second": 360.595, "eval_steps_per_second": 60.099, "step": 2200000 }, { "epoch": 1.32, "learning_rate": 4.680344442351145e-05, "loss": 0.2824, "step": 2200500 }, { "epoch": 1.32, "learning_rate": 4.6801344457950886e-05, "loss": 0.2899, "step": 2201000 }, { "epoch": 1.32, "learning_rate": 4.6799244492390326e-05, "loss": 0.2864, "step": 2201500 }, { "epoch": 1.32, "learning_rate": 4.679714452682976e-05, "loss": 0.2858, "step": 2202000 }, { "epoch": 1.32, "learning_rate": 4.679504876120031e-05, "loss": 0.2952, "step": 2202500 }, { "epoch": 1.32, "learning_rate": 4.6792948795639754e-05, "loss": 0.2869, "step": 2203000 }, { "epoch": 1.32, "learning_rate": 4.679084883007919e-05, "loss": 0.2858, "step": 2203500 }, { "epoch": 1.32, "learning_rate": 4.678874886451862e-05, "loss": 0.2866, "step": 2204000 }, { "epoch": 1.32, "learning_rate": 4.678664889895806e-05, "loss": 0.2821, "step": 2204500 }, { "epoch": 1.32, "learning_rate": 4.6784548933397494e-05, "loss": 0.2868, "step": 2205000 }, { "epoch": 1.32, "learning_rate": 4.678245316776805e-05, "loss": 0.2869, "step": 2205500 }, { "epoch": 1.32, "learning_rate": 4.678035320220748e-05, "loss": 0.2836, "step": 2206000 }, { "epoch": 1.32, "learning_rate": 4.677825323664692e-05, "loss": 0.2822, "step": 2206500 }, { "epoch": 1.32, "learning_rate": 4.6776153271086355e-05, "loss": 0.2875, "step": 2207000 }, { "epoch": 1.32, "learning_rate": 4.677405330552579e-05, "loss": 0.2837, "step": 2207500 }, { "epoch": 1.32, "learning_rate": 4.677195753989634e-05, "loss": 0.2886, "step": 2208000 }, { "epoch": 1.32, "learning_rate": 4.676985757433578e-05, "loss": 0.2872, "step": 2208500 }, { "epoch": 1.32, "learning_rate": 4.6767757608775216e-05, "loss": 0.288, "step": 2209000 }, { "epoch": 1.32, "learning_rate": 4.676565764321465e-05, "loss": 0.2841, "step": 2209500 }, { "epoch": 1.32, "learning_rate": 4.676356187758521e-05, "loss": 0.2839, "step": 2210000 }, { "epoch": 1.33, "learning_rate": 4.676146191202464e-05, "loss": 0.2901, "step": 2210500 }, { "epoch": 1.33, "learning_rate": 4.6759361946464076e-05, "loss": 0.2872, "step": 2211000 }, { "epoch": 1.33, "learning_rate": 4.675726618083463e-05, "loss": 0.2876, "step": 2211500 }, { "epoch": 1.33, "learning_rate": 4.675516621527407e-05, "loss": 0.2839, "step": 2212000 }, { "epoch": 1.33, "learning_rate": 4.6753066249713504e-05, "loss": 0.2809, "step": 2212500 }, { "epoch": 1.33, "learning_rate": 4.675096628415294e-05, "loss": 0.2882, "step": 2213000 }, { "epoch": 1.33, "learning_rate": 4.674887051852349e-05, "loss": 0.2893, "step": 2213500 }, { "epoch": 1.33, "learning_rate": 4.674677055296293e-05, "loss": 0.284, "step": 2214000 }, { "epoch": 1.33, "learning_rate": 4.6744670587402364e-05, "loss": 0.2827, "step": 2214500 }, { "epoch": 1.33, "learning_rate": 4.67425706218418e-05, "loss": 0.2789, "step": 2215000 }, { "epoch": 1.33, "learning_rate": 4.674047485621236e-05, "loss": 0.2828, "step": 2215500 }, { "epoch": 1.33, "learning_rate": 4.673837489065179e-05, "loss": 0.295, "step": 2216000 }, { "epoch": 1.33, "learning_rate": 4.6736274925091225e-05, "loss": 0.286, "step": 2216500 }, { "epoch": 1.33, "learning_rate": 4.6734174959530665e-05, "loss": 0.2811, "step": 2217000 }, { "epoch": 1.33, "learning_rate": 4.67320749939701e-05, "loss": 0.2857, "step": 2217500 }, { "epoch": 1.33, "learning_rate": 4.672997502840953e-05, "loss": 0.2894, "step": 2218000 }, { "epoch": 1.33, "learning_rate": 4.672787506284897e-05, "loss": 0.2875, "step": 2218500 }, { "epoch": 1.33, "learning_rate": 4.6725775097288406e-05, "loss": 0.3011, "step": 2219000 }, { "epoch": 1.33, "learning_rate": 4.672367513172784e-05, "loss": 0.2843, "step": 2219500 }, { "epoch": 1.33, "learning_rate": 4.672157516616728e-05, "loss": 0.2854, "step": 2220000 }, { "epoch": 1.33, "learning_rate": 4.671947940053783e-05, "loss": 0.2866, "step": 2220500 }, { "epoch": 1.33, "learning_rate": 4.671737943497727e-05, "loss": 0.2823, "step": 2221000 }, { "epoch": 1.33, "learning_rate": 4.67152794694167e-05, "loss": 0.2864, "step": 2221500 }, { "epoch": 1.33, "learning_rate": 4.671317950385614e-05, "loss": 0.2804, "step": 2222000 }, { "epoch": 1.33, "learning_rate": 4.6711083738226694e-05, "loss": 0.2936, "step": 2222500 }, { "epoch": 1.33, "learning_rate": 4.670898797259725e-05, "loss": 0.294, "step": 2223000 }, { "epoch": 1.33, "learning_rate": 4.670688800703668e-05, "loss": 0.2821, "step": 2223500 }, { "epoch": 1.33, "learning_rate": 4.670478804147612e-05, "loss": 0.2894, "step": 2224000 }, { "epoch": 1.33, "learning_rate": 4.6702688075915555e-05, "loss": 0.2815, "step": 2224500 }, { "epoch": 1.33, "learning_rate": 4.670059231028611e-05, "loss": 0.2942, "step": 2225000 }, { "epoch": 1.33, "learning_rate": 4.669849234472554e-05, "loss": 0.2829, "step": 2225500 }, { "epoch": 1.33, "learning_rate": 4.669639237916498e-05, "loss": 0.2918, "step": 2226000 }, { "epoch": 1.33, "learning_rate": 4.6694292413604415e-05, "loss": 0.2866, "step": 2226500 }, { "epoch": 1.34, "learning_rate": 4.669219244804385e-05, "loss": 0.2866, "step": 2227000 }, { "epoch": 1.34, "learning_rate": 4.669009248248329e-05, "loss": 0.2854, "step": 2227500 }, { "epoch": 1.34, "learning_rate": 4.668799251692272e-05, "loss": 0.2875, "step": 2228000 }, { "epoch": 1.34, "learning_rate": 4.6685892551362156e-05, "loss": 0.2906, "step": 2228500 }, { "epoch": 1.34, "learning_rate": 4.6683792585801596e-05, "loss": 0.2832, "step": 2229000 }, { "epoch": 1.34, "learning_rate": 4.668169682017215e-05, "loss": 0.2883, "step": 2229500 }, { "epoch": 1.34, "learning_rate": 4.667959685461158e-05, "loss": 0.2916, "step": 2230000 }, { "epoch": 1.34, "learning_rate": 4.6677496889051024e-05, "loss": 0.2894, "step": 2230500 }, { "epoch": 1.34, "learning_rate": 4.667539692349046e-05, "loss": 0.291, "step": 2231000 }, { "epoch": 1.34, "learning_rate": 4.667330115786101e-05, "loss": 0.2923, "step": 2231500 }, { "epoch": 1.34, "learning_rate": 4.6671201192300444e-05, "loss": 0.2926, "step": 2232000 }, { "epoch": 1.34, "learning_rate": 4.6669101226739884e-05, "loss": 0.2838, "step": 2232500 }, { "epoch": 1.34, "learning_rate": 4.666700126117932e-05, "loss": 0.2804, "step": 2233000 }, { "epoch": 1.34, "learning_rate": 4.666490129561875e-05, "loss": 0.2866, "step": 2233500 }, { "epoch": 1.34, "learning_rate": 4.666280133005819e-05, "loss": 0.2848, "step": 2234000 }, { "epoch": 1.34, "learning_rate": 4.666070136449762e-05, "loss": 0.2854, "step": 2234500 }, { "epoch": 1.34, "learning_rate": 4.665860139893705e-05, "loss": 0.2884, "step": 2235000 }, { "epoch": 1.34, "learning_rate": 4.665650143337649e-05, "loss": 0.2891, "step": 2235500 }, { "epoch": 1.34, "learning_rate": 4.665440566774705e-05, "loss": 0.2853, "step": 2236000 }, { "epoch": 1.34, "learning_rate": 4.6652305702186486e-05, "loss": 0.2897, "step": 2236500 }, { "epoch": 1.34, "learning_rate": 4.665020993655704e-05, "loss": 0.2897, "step": 2237000 }, { "epoch": 1.34, "learning_rate": 4.664810997099648e-05, "loss": 0.2887, "step": 2237500 }, { "epoch": 1.34, "learning_rate": 4.664601000543591e-05, "loss": 0.2897, "step": 2238000 }, { "epoch": 1.34, "learning_rate": 4.6643910039875346e-05, "loss": 0.2804, "step": 2238500 }, { "epoch": 1.34, "learning_rate": 4.66418142742459e-05, "loss": 0.2865, "step": 2239000 }, { "epoch": 1.34, "learning_rate": 4.663971430868534e-05, "loss": 0.2848, "step": 2239500 }, { "epoch": 1.34, "learning_rate": 4.6637614343124774e-05, "loss": 0.2876, "step": 2240000 }, { "epoch": 1.34, "learning_rate": 4.663551437756421e-05, "loss": 0.2885, "step": 2240500 }, { "epoch": 1.34, "learning_rate": 4.663341441200365e-05, "loss": 0.2889, "step": 2241000 }, { "epoch": 1.34, "learning_rate": 4.663131444644308e-05, "loss": 0.2816, "step": 2241500 }, { "epoch": 1.34, "learning_rate": 4.662921448088251e-05, "loss": 0.2867, "step": 2242000 }, { "epoch": 1.34, "learning_rate": 4.662711451532195e-05, "loss": 0.2921, "step": 2242500 }, { "epoch": 1.34, "learning_rate": 4.662501454976138e-05, "loss": 0.2783, "step": 2243000 }, { "epoch": 1.35, "learning_rate": 4.662291878413194e-05, "loss": 0.2893, "step": 2243500 }, { "epoch": 1.35, "learning_rate": 4.6620818818571375e-05, "loss": 0.284, "step": 2244000 }, { "epoch": 1.35, "learning_rate": 4.661871885301081e-05, "loss": 0.2879, "step": 2244500 }, { "epoch": 1.35, "learning_rate": 4.661661888745024e-05, "loss": 0.2902, "step": 2245000 }, { "epoch": 1.35, "learning_rate": 4.661451892188968e-05, "loss": 0.2885, "step": 2245500 }, { "epoch": 1.35, "learning_rate": 4.6612418956329116e-05, "loss": 0.2843, "step": 2246000 }, { "epoch": 1.35, "learning_rate": 4.661032319069967e-05, "loss": 0.2895, "step": 2246500 }, { "epoch": 1.35, "learning_rate": 4.66082232251391e-05, "loss": 0.2866, "step": 2247000 }, { "epoch": 1.35, "learning_rate": 4.660612325957854e-05, "loss": 0.2873, "step": 2247500 }, { "epoch": 1.35, "learning_rate": 4.6604023294017976e-05, "loss": 0.2885, "step": 2248000 }, { "epoch": 1.35, "learning_rate": 4.660192332845741e-05, "loss": 0.287, "step": 2248500 }, { "epoch": 1.35, "learning_rate": 4.659982336289685e-05, "loss": 0.287, "step": 2249000 }, { "epoch": 1.35, "learning_rate": 4.6597727597267404e-05, "loss": 0.2821, "step": 2249500 }, { "epoch": 1.35, "learning_rate": 4.659562763170684e-05, "loss": 0.289, "step": 2250000 }, { "epoch": 1.35, "learning_rate": 4.65935318660774e-05, "loss": 0.2876, "step": 2250500 }, { "epoch": 1.35, "learning_rate": 4.659143190051683e-05, "loss": 0.2949, "step": 2251000 }, { "epoch": 1.35, "learning_rate": 4.6589331934956264e-05, "loss": 0.286, "step": 2251500 }, { "epoch": 1.35, "learning_rate": 4.65872319693957e-05, "loss": 0.2762, "step": 2252000 }, { "epoch": 1.35, "learning_rate": 4.658513200383514e-05, "loss": 0.2817, "step": 2252500 }, { "epoch": 1.35, "learning_rate": 4.658303203827457e-05, "loss": 0.2899, "step": 2253000 }, { "epoch": 1.35, "learning_rate": 4.6580932072714005e-05, "loss": 0.2844, "step": 2253500 }, { "epoch": 1.35, "learning_rate": 4.6578832107153445e-05, "loss": 0.2844, "step": 2254000 }, { "epoch": 1.35, "learning_rate": 4.6576736341524e-05, "loss": 0.2827, "step": 2254500 }, { "epoch": 1.35, "learning_rate": 4.657463637596343e-05, "loss": 0.2857, "step": 2255000 }, { "epoch": 1.35, "learning_rate": 4.6572536410402866e-05, "loss": 0.2854, "step": 2255500 }, { "epoch": 1.35, "learning_rate": 4.6570436444842306e-05, "loss": 0.2837, "step": 2256000 }, { "epoch": 1.35, "learning_rate": 4.656833647928174e-05, "loss": 0.2869, "step": 2256500 }, { "epoch": 1.35, "learning_rate": 4.656623651372117e-05, "loss": 0.2895, "step": 2257000 }, { "epoch": 1.35, "learning_rate": 4.6564140748091726e-05, "loss": 0.2836, "step": 2257500 }, { "epoch": 1.35, "learning_rate": 4.6562040782531167e-05, "loss": 0.2859, "step": 2258000 }, { "epoch": 1.35, "learning_rate": 4.65599408169706e-05, "loss": 0.2922, "step": 2258500 }, { "epoch": 1.35, "learning_rate": 4.6557840851410033e-05, "loss": 0.2836, "step": 2259000 }, { "epoch": 1.35, "learning_rate": 4.6555745085780594e-05, "loss": 0.2854, "step": 2259500 }, { "epoch": 1.35, "learning_rate": 4.655364512022003e-05, "loss": 0.2872, "step": 2260000 }, { "epoch": 1.36, "learning_rate": 4.655154515465946e-05, "loss": 0.2932, "step": 2260500 }, { "epoch": 1.36, "learning_rate": 4.65494451890989e-05, "loss": 0.284, "step": 2261000 }, { "epoch": 1.36, "learning_rate": 4.6547349423469455e-05, "loss": 0.2908, "step": 2261500 }, { "epoch": 1.36, "learning_rate": 4.654524945790889e-05, "loss": 0.2812, "step": 2262000 }, { "epoch": 1.36, "learning_rate": 4.654314949234832e-05, "loss": 0.2848, "step": 2262500 }, { "epoch": 1.36, "learning_rate": 4.654104952678776e-05, "loss": 0.2851, "step": 2263000 }, { "epoch": 1.36, "learning_rate": 4.6538949561227195e-05, "loss": 0.2903, "step": 2263500 }, { "epoch": 1.36, "learning_rate": 4.653684959566663e-05, "loss": 0.2924, "step": 2264000 }, { "epoch": 1.36, "learning_rate": 4.653475383003718e-05, "loss": 0.2843, "step": 2264500 }, { "epoch": 1.36, "learning_rate": 4.653265386447662e-05, "loss": 0.2838, "step": 2265000 }, { "epoch": 1.36, "learning_rate": 4.6530553898916056e-05, "loss": 0.2883, "step": 2265500 }, { "epoch": 1.36, "learning_rate": 4.652845393335549e-05, "loss": 0.2883, "step": 2266000 }, { "epoch": 1.36, "learning_rate": 4.652635396779493e-05, "loss": 0.2817, "step": 2266500 }, { "epoch": 1.36, "learning_rate": 4.652425820216548e-05, "loss": 0.279, "step": 2267000 }, { "epoch": 1.36, "learning_rate": 4.6522162436536044e-05, "loss": 0.2911, "step": 2267500 }, { "epoch": 1.36, "learning_rate": 4.652006247097547e-05, "loss": 0.2831, "step": 2268000 }, { "epoch": 1.36, "learning_rate": 4.651796250541491e-05, "loss": 0.2865, "step": 2268500 }, { "epoch": 1.36, "learning_rate": 4.6515862539854344e-05, "loss": 0.2847, "step": 2269000 }, { "epoch": 1.36, "learning_rate": 4.651376257429378e-05, "loss": 0.2933, "step": 2269500 }, { "epoch": 1.36, "learning_rate": 4.651166260873322e-05, "loss": 0.2885, "step": 2270000 }, { "epoch": 1.36, "learning_rate": 4.650956264317265e-05, "loss": 0.2876, "step": 2270500 }, { "epoch": 1.36, "learning_rate": 4.6507462677612085e-05, "loss": 0.2855, "step": 2271000 }, { "epoch": 1.36, "learning_rate": 4.6505362712051525e-05, "loss": 0.2848, "step": 2271500 }, { "epoch": 1.36, "learning_rate": 4.650326694642208e-05, "loss": 0.2815, "step": 2272000 }, { "epoch": 1.36, "learning_rate": 4.650116698086151e-05, "loss": 0.2879, "step": 2272500 }, { "epoch": 1.36, "learning_rate": 4.6499067015300945e-05, "loss": 0.2923, "step": 2273000 }, { "epoch": 1.36, "learning_rate": 4.6496967049740385e-05, "loss": 0.2917, "step": 2273500 }, { "epoch": 1.36, "learning_rate": 4.649486708417982e-05, "loss": 0.285, "step": 2274000 }, { "epoch": 1.36, "learning_rate": 4.649277131855037e-05, "loss": 0.2872, "step": 2274500 }, { "epoch": 1.36, "learning_rate": 4.649067135298981e-05, "loss": 0.2832, "step": 2275000 }, { "epoch": 1.36, "learning_rate": 4.6488571387429246e-05, "loss": 0.2862, "step": 2275500 }, { "epoch": 1.36, "learning_rate": 4.648647142186868e-05, "loss": 0.2897, "step": 2276000 }, { "epoch": 1.36, "learning_rate": 4.648437145630812e-05, "loss": 0.2849, "step": 2276500 }, { "epoch": 1.37, "learning_rate": 4.648227149074755e-05, "loss": 0.2854, "step": 2277000 }, { "epoch": 1.37, "learning_rate": 4.648017152518699e-05, "loss": 0.2765, "step": 2277500 }, { "epoch": 1.37, "learning_rate": 4.647807575955754e-05, "loss": 0.2852, "step": 2278000 }, { "epoch": 1.37, "learning_rate": 4.647597579399698e-05, "loss": 0.2821, "step": 2278500 }, { "epoch": 1.37, "learning_rate": 4.6473875828436414e-05, "loss": 0.291, "step": 2279000 }, { "epoch": 1.37, "learning_rate": 4.647177586287585e-05, "loss": 0.2835, "step": 2279500 }, { "epoch": 1.37, "learning_rate": 4.64696800972464e-05, "loss": 0.2875, "step": 2280000 }, { "epoch": 1.37, "learning_rate": 4.646758013168584e-05, "loss": 0.2871, "step": 2280500 }, { "epoch": 1.37, "learning_rate": 4.6465480166125275e-05, "loss": 0.289, "step": 2281000 }, { "epoch": 1.37, "learning_rate": 4.646338020056471e-05, "loss": 0.2845, "step": 2281500 }, { "epoch": 1.37, "learning_rate": 4.646128023500415e-05, "loss": 0.2844, "step": 2282000 }, { "epoch": 1.37, "learning_rate": 4.645918026944358e-05, "loss": 0.2818, "step": 2282500 }, { "epoch": 1.37, "learning_rate": 4.6457080303883015e-05, "loss": 0.2834, "step": 2283000 }, { "epoch": 1.37, "learning_rate": 4.645498033832245e-05, "loss": 0.2851, "step": 2283500 }, { "epoch": 1.37, "learning_rate": 4.645288037276188e-05, "loss": 0.2825, "step": 2284000 }, { "epoch": 1.37, "learning_rate": 4.645078460713244e-05, "loss": 0.2887, "step": 2284500 }, { "epoch": 1.37, "learning_rate": 4.644868464157188e-05, "loss": 0.2847, "step": 2285000 }, { "epoch": 1.37, "learning_rate": 4.644658467601131e-05, "loss": 0.2803, "step": 2285500 }, { "epoch": 1.37, "learning_rate": 4.644448471045074e-05, "loss": 0.2863, "step": 2286000 }, { "epoch": 1.37, "learning_rate": 4.644238474489018e-05, "loss": 0.2861, "step": 2286500 }, { "epoch": 1.37, "learning_rate": 4.6440288979260744e-05, "loss": 0.2878, "step": 2287000 }, { "epoch": 1.37, "learning_rate": 4.643818901370018e-05, "loss": 0.2807, "step": 2287500 }, { "epoch": 1.37, "learning_rate": 4.6436089048139604e-05, "loss": 0.2832, "step": 2288000 }, { "epoch": 1.37, "learning_rate": 4.6433989082579044e-05, "loss": 0.2938, "step": 2288500 }, { "epoch": 1.37, "learning_rate": 4.643188911701848e-05, "loss": 0.2837, "step": 2289000 }, { "epoch": 1.37, "learning_rate": 4.642979335138904e-05, "loss": 0.2829, "step": 2289500 }, { "epoch": 1.37, "learning_rate": 4.642769338582848e-05, "loss": 0.2808, "step": 2290000 }, { "epoch": 1.37, "learning_rate": 4.6425593420267905e-05, "loss": 0.2889, "step": 2290500 }, { "epoch": 1.37, "learning_rate": 4.642349345470734e-05, "loss": 0.2851, "step": 2291000 }, { "epoch": 1.37, "learning_rate": 4.64213976890779e-05, "loss": 0.2883, "step": 2291500 }, { "epoch": 1.37, "learning_rate": 4.641929772351734e-05, "loss": 0.2879, "step": 2292000 }, { "epoch": 1.37, "learning_rate": 4.6417197757956765e-05, "loss": 0.2811, "step": 2292500 }, { "epoch": 1.37, "learning_rate": 4.6415101992327326e-05, "loss": 0.2818, "step": 2293000 }, { "epoch": 1.38, "learning_rate": 4.641300202676676e-05, "loss": 0.2918, "step": 2293500 }, { "epoch": 1.38, "learning_rate": 4.64109020612062e-05, "loss": 0.2771, "step": 2294000 }, { "epoch": 1.38, "learning_rate": 4.640880209564563e-05, "loss": 0.2822, "step": 2294500 }, { "epoch": 1.38, "learning_rate": 4.640670213008506e-05, "loss": 0.2883, "step": 2295000 }, { "epoch": 1.38, "learning_rate": 4.64046021645245e-05, "loss": 0.2827, "step": 2295500 }, { "epoch": 1.38, "learning_rate": 4.640250219896393e-05, "loss": 0.2857, "step": 2296000 }, { "epoch": 1.38, "learning_rate": 4.6400402233403374e-05, "loss": 0.2881, "step": 2296500 }, { "epoch": 1.38, "learning_rate": 4.639831066770505e-05, "loss": 0.2809, "step": 2297000 }, { "epoch": 1.38, "learning_rate": 4.639621070214449e-05, "loss": 0.29, "step": 2297500 }, { "epoch": 1.38, "learning_rate": 4.639411073658392e-05, "loss": 0.2888, "step": 2298000 }, { "epoch": 1.38, "learning_rate": 4.6392010771023354e-05, "loss": 0.2847, "step": 2298500 }, { "epoch": 1.38, "learning_rate": 4.6389910805462795e-05, "loss": 0.286, "step": 2299000 }, { "epoch": 1.38, "learning_rate": 4.638781083990223e-05, "loss": 0.2851, "step": 2299500 }, { "epoch": 1.38, "learning_rate": 4.638571507427278e-05, "loss": 0.2843, "step": 2300000 }, { "epoch": 1.38, "eval_loss": 0.2611614763736725, "eval_runtime": 1456.651, "eval_samples_per_second": 361.597, "eval_steps_per_second": 60.266, "step": 2300000 }, { "epoch": 1.38, "learning_rate": 4.6383615108712215e-05, "loss": 0.2845, "step": 2300500 }, { "epoch": 1.38, "learning_rate": 4.6381515143151655e-05, "loss": 0.2904, "step": 2301000 }, { "epoch": 1.38, "learning_rate": 4.637941517759109e-05, "loss": 0.2838, "step": 2301500 }, { "epoch": 1.38, "learning_rate": 4.637731521203052e-05, "loss": 0.2844, "step": 2302000 }, { "epoch": 1.38, "learning_rate": 4.6375215246469956e-05, "loss": 0.2844, "step": 2302500 }, { "epoch": 1.38, "learning_rate": 4.637311528090939e-05, "loss": 0.2884, "step": 2303000 }, { "epoch": 1.38, "learning_rate": 4.637101531534883e-05, "loss": 0.2901, "step": 2303500 }, { "epoch": 1.38, "learning_rate": 4.636891534978826e-05, "loss": 0.2839, "step": 2304000 }, { "epoch": 1.38, "learning_rate": 4.6366819584158817e-05, "loss": 0.2859, "step": 2304500 }, { "epoch": 1.38, "learning_rate": 4.636471961859825e-05, "loss": 0.2822, "step": 2305000 }, { "epoch": 1.38, "learning_rate": 4.636261965303769e-05, "loss": 0.2847, "step": 2305500 }, { "epoch": 1.38, "learning_rate": 4.6360519687477124e-05, "loss": 0.2823, "step": 2306000 }, { "epoch": 1.38, "learning_rate": 4.635841972191656e-05, "loss": 0.2829, "step": 2306500 }, { "epoch": 1.38, "learning_rate": 4.635632395628711e-05, "loss": 0.2815, "step": 2307000 }, { "epoch": 1.38, "learning_rate": 4.635422819065767e-05, "loss": 0.2904, "step": 2307500 }, { "epoch": 1.38, "learning_rate": 4.635212822509711e-05, "loss": 0.2864, "step": 2308000 }, { "epoch": 1.38, "learning_rate": 4.6350028259536545e-05, "loss": 0.2873, "step": 2308500 }, { "epoch": 1.38, "learning_rate": 4.634792829397598e-05, "loss": 0.2875, "step": 2309000 }, { "epoch": 1.38, "learning_rate": 4.634582832841541e-05, "loss": 0.2874, "step": 2309500 }, { "epoch": 1.38, "learning_rate": 4.6343728362854845e-05, "loss": 0.281, "step": 2310000 }, { "epoch": 1.39, "learning_rate": 4.6341628397294285e-05, "loss": 0.2879, "step": 2310500 }, { "epoch": 1.39, "learning_rate": 4.633952843173372e-05, "loss": 0.2767, "step": 2311000 }, { "epoch": 1.39, "learning_rate": 4.633743266610427e-05, "loss": 0.2891, "step": 2311500 }, { "epoch": 1.39, "learning_rate": 4.633533690047483e-05, "loss": 0.2848, "step": 2312000 }, { "epoch": 1.39, "learning_rate": 4.6333236934914266e-05, "loss": 0.2898, "step": 2312500 }, { "epoch": 1.39, "learning_rate": 4.6331136969353706e-05, "loss": 0.2843, "step": 2313000 }, { "epoch": 1.39, "learning_rate": 4.632903700379314e-05, "loss": 0.2812, "step": 2313500 }, { "epoch": 1.39, "learning_rate": 4.6326937038232567e-05, "loss": 0.2906, "step": 2314000 }, { "epoch": 1.39, "learning_rate": 4.632483707267201e-05, "loss": 0.2873, "step": 2314500 }, { "epoch": 1.39, "learning_rate": 4.632273710711144e-05, "loss": 0.2927, "step": 2315000 }, { "epoch": 1.39, "learning_rate": 4.6320637141550874e-05, "loss": 0.2861, "step": 2315500 }, { "epoch": 1.39, "learning_rate": 4.6318541375921434e-05, "loss": 0.2853, "step": 2316000 }, { "epoch": 1.39, "learning_rate": 4.631644141036087e-05, "loss": 0.2916, "step": 2316500 }, { "epoch": 1.39, "learning_rate": 4.63143414448003e-05, "loss": 0.2894, "step": 2317000 }, { "epoch": 1.39, "learning_rate": 4.631224147923974e-05, "loss": 0.2811, "step": 2317500 }, { "epoch": 1.39, "learning_rate": 4.6310141513679175e-05, "loss": 0.2854, "step": 2318000 }, { "epoch": 1.39, "learning_rate": 4.6308049947980855e-05, "loss": 0.2836, "step": 2318500 }, { "epoch": 1.39, "learning_rate": 4.630594998242029e-05, "loss": 0.2859, "step": 2319000 }, { "epoch": 1.39, "learning_rate": 4.630385001685972e-05, "loss": 0.283, "step": 2319500 }, { "epoch": 1.39, "learning_rate": 4.630175005129916e-05, "loss": 0.2866, "step": 2320000 }, { "epoch": 1.39, "learning_rate": 4.6299650085738596e-05, "loss": 0.2918, "step": 2320500 }, { "epoch": 1.39, "learning_rate": 4.629755012017803e-05, "loss": 0.287, "step": 2321000 }, { "epoch": 1.39, "learning_rate": 4.629545015461746e-05, "loss": 0.2896, "step": 2321500 }, { "epoch": 1.39, "learning_rate": 4.6293350189056896e-05, "loss": 0.2815, "step": 2322000 }, { "epoch": 1.39, "learning_rate": 4.629125022349633e-05, "loss": 0.2782, "step": 2322500 }, { "epoch": 1.39, "learning_rate": 4.628915025793577e-05, "loss": 0.2884, "step": 2323000 }, { "epoch": 1.39, "learning_rate": 4.62870502923752e-05, "loss": 0.2874, "step": 2323500 }, { "epoch": 1.39, "learning_rate": 4.628495032681464e-05, "loss": 0.2874, "step": 2324000 }, { "epoch": 1.39, "learning_rate": 4.62828545611852e-05, "loss": 0.2824, "step": 2324500 }, { "epoch": 1.39, "learning_rate": 4.628075459562463e-05, "loss": 0.2884, "step": 2325000 }, { "epoch": 1.39, "learning_rate": 4.6278654630064064e-05, "loss": 0.2822, "step": 2325500 }, { "epoch": 1.39, "learning_rate": 4.6276554664503504e-05, "loss": 0.2829, "step": 2326000 }, { "epoch": 1.39, "learning_rate": 4.627445889887406e-05, "loss": 0.2918, "step": 2326500 }, { "epoch": 1.4, "learning_rate": 4.627235893331349e-05, "loss": 0.2791, "step": 2327000 }, { "epoch": 1.4, "learning_rate": 4.627026316768405e-05, "loss": 0.2839, "step": 2327500 }, { "epoch": 1.4, "learning_rate": 4.6268163202123485e-05, "loss": 0.2877, "step": 2328000 }, { "epoch": 1.4, "learning_rate": 4.626606323656292e-05, "loss": 0.2947, "step": 2328500 }, { "epoch": 1.4, "learning_rate": 4.626396327100235e-05, "loss": 0.288, "step": 2329000 }, { "epoch": 1.4, "learning_rate": 4.6261863305441785e-05, "loss": 0.2848, "step": 2329500 }, { "epoch": 1.4, "learning_rate": 4.6259763339881226e-05, "loss": 0.2896, "step": 2330000 }, { "epoch": 1.4, "learning_rate": 4.625766337432066e-05, "loss": 0.285, "step": 2330500 }, { "epoch": 1.4, "learning_rate": 4.625556340876009e-05, "loss": 0.2822, "step": 2331000 }, { "epoch": 1.4, "learning_rate": 4.625346764313065e-05, "loss": 0.2934, "step": 2331500 }, { "epoch": 1.4, "learning_rate": 4.6251367677570086e-05, "loss": 0.2837, "step": 2332000 }, { "epoch": 1.4, "learning_rate": 4.624926771200952e-05, "loss": 0.2882, "step": 2332500 }, { "epoch": 1.4, "learning_rate": 4.624716774644896e-05, "loss": 0.2858, "step": 2333000 }, { "epoch": 1.4, "learning_rate": 4.6245067780888394e-05, "loss": 0.2895, "step": 2333500 }, { "epoch": 1.4, "learning_rate": 4.624297201525895e-05, "loss": 0.2882, "step": 2334000 }, { "epoch": 1.4, "learning_rate": 4.624087624962951e-05, "loss": 0.2873, "step": 2334500 }, { "epoch": 1.4, "learning_rate": 4.623877628406894e-05, "loss": 0.2885, "step": 2335000 }, { "epoch": 1.4, "learning_rate": 4.6236676318508374e-05, "loss": 0.2782, "step": 2335500 }, { "epoch": 1.4, "learning_rate": 4.623457635294781e-05, "loss": 0.2883, "step": 2336000 }, { "epoch": 1.4, "learning_rate": 4.623247638738724e-05, "loss": 0.2806, "step": 2336500 }, { "epoch": 1.4, "learning_rate": 4.623037642182668e-05, "loss": 0.2793, "step": 2337000 }, { "epoch": 1.4, "learning_rate": 4.622828065619724e-05, "loss": 0.2873, "step": 2337500 }, { "epoch": 1.4, "learning_rate": 4.622618069063667e-05, "loss": 0.2833, "step": 2338000 }, { "epoch": 1.4, "learning_rate": 4.622408072507611e-05, "loss": 0.2928, "step": 2338500 }, { "epoch": 1.4, "learning_rate": 4.622198075951554e-05, "loss": 0.2862, "step": 2339000 }, { "epoch": 1.4, "learning_rate": 4.6219880793954976e-05, "loss": 0.2921, "step": 2339500 }, { "epoch": 1.4, "learning_rate": 4.6217785028325536e-05, "loss": 0.2856, "step": 2340000 }, { "epoch": 1.4, "learning_rate": 4.621568506276497e-05, "loss": 0.2841, "step": 2340500 }, { "epoch": 1.4, "learning_rate": 4.62135850972044e-05, "loss": 0.2869, "step": 2341000 }, { "epoch": 1.4, "learning_rate": 4.6211485131643837e-05, "loss": 0.2857, "step": 2341500 }, { "epoch": 1.4, "learning_rate": 4.620938516608328e-05, "loss": 0.2881, "step": 2342000 }, { "epoch": 1.4, "learning_rate": 4.620728520052271e-05, "loss": 0.2892, "step": 2342500 }, { "epoch": 1.4, "learning_rate": 4.6205185234962144e-05, "loss": 0.2912, "step": 2343000 }, { "epoch": 1.41, "learning_rate": 4.62030894693327e-05, "loss": 0.2876, "step": 2343500 }, { "epoch": 1.41, "learning_rate": 4.620098950377214e-05, "loss": 0.2948, "step": 2344000 }, { "epoch": 1.41, "learning_rate": 4.619888953821157e-05, "loss": 0.2884, "step": 2344500 }, { "epoch": 1.41, "learning_rate": 4.6196789572651004e-05, "loss": 0.2801, "step": 2345000 }, { "epoch": 1.41, "learning_rate": 4.6194689607090445e-05, "loss": 0.2788, "step": 2345500 }, { "epoch": 1.41, "learning_rate": 4.619258964152988e-05, "loss": 0.2799, "step": 2346000 }, { "epoch": 1.41, "learning_rate": 4.619048967596931e-05, "loss": 0.2914, "step": 2346500 }, { "epoch": 1.41, "learning_rate": 4.618838971040875e-05, "loss": 0.2855, "step": 2347000 }, { "epoch": 1.41, "learning_rate": 4.6186293944779305e-05, "loss": 0.2855, "step": 2347500 }, { "epoch": 1.41, "learning_rate": 4.618419397921874e-05, "loss": 0.285, "step": 2348000 }, { "epoch": 1.41, "learning_rate": 4.618209821358929e-05, "loss": 0.2859, "step": 2348500 }, { "epoch": 1.41, "learning_rate": 4.617999824802873e-05, "loss": 0.2816, "step": 2349000 }, { "epoch": 1.41, "learning_rate": 4.6177898282468166e-05, "loss": 0.2851, "step": 2349500 }, { "epoch": 1.41, "learning_rate": 4.61757983169076e-05, "loss": 0.2836, "step": 2350000 }, { "epoch": 1.41, "learning_rate": 4.617369835134704e-05, "loss": 0.2831, "step": 2350500 }, { "epoch": 1.41, "learning_rate": 4.617159838578647e-05, "loss": 0.2869, "step": 2351000 }, { "epoch": 1.41, "learning_rate": 4.616950262015703e-05, "loss": 0.2806, "step": 2351500 }, { "epoch": 1.41, "learning_rate": 4.616740265459646e-05, "loss": 0.286, "step": 2352000 }, { "epoch": 1.41, "learning_rate": 4.61653026890359e-05, "loss": 0.2877, "step": 2352500 }, { "epoch": 1.41, "learning_rate": 4.6163202723475334e-05, "loss": 0.2884, "step": 2353000 }, { "epoch": 1.41, "learning_rate": 4.616110695784589e-05, "loss": 0.2923, "step": 2353500 }, { "epoch": 1.41, "learning_rate": 4.615900699228533e-05, "loss": 0.2825, "step": 2354000 }, { "epoch": 1.41, "learning_rate": 4.615690702672476e-05, "loss": 0.28, "step": 2354500 }, { "epoch": 1.41, "learning_rate": 4.6154807061164195e-05, "loss": 0.2838, "step": 2355000 }, { "epoch": 1.41, "learning_rate": 4.6152707095603635e-05, "loss": 0.2806, "step": 2355500 }, { "epoch": 1.41, "learning_rate": 4.615060713004307e-05, "loss": 0.2841, "step": 2356000 }, { "epoch": 1.41, "learning_rate": 4.61485071644825e-05, "loss": 0.2827, "step": 2356500 }, { "epoch": 1.41, "learning_rate": 4.614640719892194e-05, "loss": 0.2934, "step": 2357000 }, { "epoch": 1.41, "learning_rate": 4.6144307233361376e-05, "loss": 0.2816, "step": 2357500 }, { "epoch": 1.41, "learning_rate": 4.614221146773193e-05, "loss": 0.2767, "step": 2358000 }, { "epoch": 1.41, "learning_rate": 4.614011150217136e-05, "loss": 0.2828, "step": 2358500 }, { "epoch": 1.41, "learning_rate": 4.61380115366108e-05, "loss": 0.2779, "step": 2359000 }, { "epoch": 1.41, "learning_rate": 4.6135911571050236e-05, "loss": 0.2802, "step": 2359500 }, { "epoch": 1.41, "learning_rate": 4.613381580542079e-05, "loss": 0.2935, "step": 2360000 }, { "epoch": 1.42, "learning_rate": 4.613171583986023e-05, "loss": 0.2857, "step": 2360500 }, { "epoch": 1.42, "learning_rate": 4.6129620074230784e-05, "loss": 0.2837, "step": 2361000 }, { "epoch": 1.42, "learning_rate": 4.612752010867022e-05, "loss": 0.2837, "step": 2361500 }, { "epoch": 1.42, "learning_rate": 4.612542014310965e-05, "loss": 0.2857, "step": 2362000 }, { "epoch": 1.42, "learning_rate": 4.612332017754909e-05, "loss": 0.289, "step": 2362500 }, { "epoch": 1.42, "learning_rate": 4.6121220211988524e-05, "loss": 0.2887, "step": 2363000 }, { "epoch": 1.42, "learning_rate": 4.611912024642796e-05, "loss": 0.2816, "step": 2363500 }, { "epoch": 1.42, "learning_rate": 4.61170202808674e-05, "loss": 0.2816, "step": 2364000 }, { "epoch": 1.42, "learning_rate": 4.611492451523795e-05, "loss": 0.2821, "step": 2364500 }, { "epoch": 1.42, "learning_rate": 4.6112824549677385e-05, "loss": 0.2882, "step": 2365000 }, { "epoch": 1.42, "learning_rate": 4.611072458411682e-05, "loss": 0.2842, "step": 2365500 }, { "epoch": 1.42, "learning_rate": 4.610862461855626e-05, "loss": 0.2853, "step": 2366000 }, { "epoch": 1.42, "learning_rate": 4.610652885292681e-05, "loss": 0.2847, "step": 2366500 }, { "epoch": 1.42, "learning_rate": 4.6104428887366246e-05, "loss": 0.2839, "step": 2367000 }, { "epoch": 1.42, "learning_rate": 4.6102328921805686e-05, "loss": 0.2851, "step": 2367500 }, { "epoch": 1.42, "learning_rate": 4.610022895624512e-05, "loss": 0.2848, "step": 2368000 }, { "epoch": 1.42, "learning_rate": 4.609812899068455e-05, "loss": 0.2863, "step": 2368500 }, { "epoch": 1.42, "learning_rate": 4.609602902512399e-05, "loss": 0.2834, "step": 2369000 }, { "epoch": 1.42, "learning_rate": 4.609392905956342e-05, "loss": 0.2824, "step": 2369500 }, { "epoch": 1.42, "learning_rate": 4.609182909400285e-05, "loss": 0.2858, "step": 2370000 }, { "epoch": 1.42, "learning_rate": 4.6089729128442293e-05, "loss": 0.2808, "step": 2370500 }, { "epoch": 1.42, "learning_rate": 4.608762916288173e-05, "loss": 0.2827, "step": 2371000 }, { "epoch": 1.42, "learning_rate": 4.608552919732116e-05, "loss": 0.2857, "step": 2371500 }, { "epoch": 1.42, "learning_rate": 4.60834292317606e-05, "loss": 0.2837, "step": 2372000 }, { "epoch": 1.42, "learning_rate": 4.6081329266200034e-05, "loss": 0.2909, "step": 2372500 }, { "epoch": 1.42, "learning_rate": 4.607923350057059e-05, "loss": 0.2893, "step": 2373000 }, { "epoch": 1.42, "learning_rate": 4.607713353501002e-05, "loss": 0.2903, "step": 2373500 }, { "epoch": 1.42, "learning_rate": 4.607503356944946e-05, "loss": 0.2891, "step": 2374000 }, { "epoch": 1.42, "learning_rate": 4.6072933603888895e-05, "loss": 0.2876, "step": 2374500 }, { "epoch": 1.42, "learning_rate": 4.607083363832833e-05, "loss": 0.2911, "step": 2375000 }, { "epoch": 1.42, "learning_rate": 4.606873787269889e-05, "loss": 0.2861, "step": 2375500 }, { "epoch": 1.42, "learning_rate": 4.606664210706945e-05, "loss": 0.2844, "step": 2376000 }, { "epoch": 1.42, "learning_rate": 4.606454214150888e-05, "loss": 0.2797, "step": 2376500 }, { "epoch": 1.43, "learning_rate": 4.606244217594831e-05, "loss": 0.2898, "step": 2377000 }, { "epoch": 1.43, "learning_rate": 4.606034221038775e-05, "loss": 0.2854, "step": 2377500 }, { "epoch": 1.43, "learning_rate": 4.605824224482718e-05, "loss": 0.2811, "step": 2378000 }, { "epoch": 1.43, "learning_rate": 4.6056142279266616e-05, "loss": 0.2809, "step": 2378500 }, { "epoch": 1.43, "learning_rate": 4.6054042313706057e-05, "loss": 0.2853, "step": 2379000 }, { "epoch": 1.43, "learning_rate": 4.605194654807661e-05, "loss": 0.2864, "step": 2379500 }, { "epoch": 1.43, "learning_rate": 4.6049846582516044e-05, "loss": 0.2823, "step": 2380000 }, { "epoch": 1.43, "learning_rate": 4.604774661695548e-05, "loss": 0.2829, "step": 2380500 }, { "epoch": 1.43, "learning_rate": 4.604564665139492e-05, "loss": 0.2875, "step": 2381000 }, { "epoch": 1.43, "learning_rate": 4.604355088576547e-05, "loss": 0.2838, "step": 2381500 }, { "epoch": 1.43, "learning_rate": 4.6041450920204904e-05, "loss": 0.2765, "step": 2382000 }, { "epoch": 1.43, "learning_rate": 4.6039350954644345e-05, "loss": 0.2828, "step": 2382500 }, { "epoch": 1.43, "learning_rate": 4.603725098908378e-05, "loss": 0.2801, "step": 2383000 }, { "epoch": 1.43, "learning_rate": 4.603515102352321e-05, "loss": 0.2823, "step": 2383500 }, { "epoch": 1.43, "learning_rate": 4.603305105796265e-05, "loss": 0.283, "step": 2384000 }, { "epoch": 1.43, "learning_rate": 4.6030951092402085e-05, "loss": 0.287, "step": 2384500 }, { "epoch": 1.43, "learning_rate": 4.602885112684152e-05, "loss": 0.2816, "step": 2385000 }, { "epoch": 1.43, "learning_rate": 4.602675536121207e-05, "loss": 0.2832, "step": 2385500 }, { "epoch": 1.43, "learning_rate": 4.602465539565151e-05, "loss": 0.2833, "step": 2386000 }, { "epoch": 1.43, "learning_rate": 4.6022555430090946e-05, "loss": 0.282, "step": 2386500 }, { "epoch": 1.43, "learning_rate": 4.602045546453038e-05, "loss": 0.281, "step": 2387000 }, { "epoch": 1.43, "learning_rate": 4.601835549896982e-05, "loss": 0.2844, "step": 2387500 }, { "epoch": 1.43, "learning_rate": 4.601625553340925e-05, "loss": 0.2844, "step": 2388000 }, { "epoch": 1.43, "learning_rate": 4.6014155567848686e-05, "loss": 0.2823, "step": 2388500 }, { "epoch": 1.43, "learning_rate": 4.601205980221924e-05, "loss": 0.2844, "step": 2389000 }, { "epoch": 1.43, "learning_rate": 4.600995983665868e-05, "loss": 0.2874, "step": 2389500 }, { "epoch": 1.43, "learning_rate": 4.6007859871098114e-05, "loss": 0.2835, "step": 2390000 }, { "epoch": 1.43, "learning_rate": 4.600575990553755e-05, "loss": 0.2821, "step": 2390500 }, { "epoch": 1.43, "learning_rate": 4.600366413990811e-05, "loss": 0.2867, "step": 2391000 }, { "epoch": 1.43, "learning_rate": 4.600156417434754e-05, "loss": 0.2831, "step": 2391500 }, { "epoch": 1.43, "learning_rate": 4.5999464208786974e-05, "loss": 0.2949, "step": 2392000 }, { "epoch": 1.43, "learning_rate": 4.5997364243226415e-05, "loss": 0.282, "step": 2392500 }, { "epoch": 1.43, "learning_rate": 4.599526427766585e-05, "loss": 0.2828, "step": 2393000 }, { "epoch": 1.44, "learning_rate": 4.59931685120364e-05, "loss": 0.2863, "step": 2393500 }, { "epoch": 1.44, "learning_rate": 4.5991068546475835e-05, "loss": 0.2847, "step": 2394000 }, { "epoch": 1.44, "learning_rate": 4.5988968580915275e-05, "loss": 0.2841, "step": 2394500 }, { "epoch": 1.44, "learning_rate": 4.598687281528583e-05, "loss": 0.2807, "step": 2395000 }, { "epoch": 1.44, "learning_rate": 4.598477284972526e-05, "loss": 0.2868, "step": 2395500 }, { "epoch": 1.44, "learning_rate": 4.5982672884164696e-05, "loss": 0.2854, "step": 2396000 }, { "epoch": 1.44, "learning_rate": 4.5980572918604136e-05, "loss": 0.2852, "step": 2396500 }, { "epoch": 1.44, "learning_rate": 4.597847295304357e-05, "loss": 0.2883, "step": 2397000 }, { "epoch": 1.44, "learning_rate": 4.5976372987483e-05, "loss": 0.2798, "step": 2397500 }, { "epoch": 1.44, "learning_rate": 4.597427302192244e-05, "loss": 0.2846, "step": 2398000 }, { "epoch": 1.44, "learning_rate": 4.597217305636188e-05, "loss": 0.2831, "step": 2398500 }, { "epoch": 1.44, "learning_rate": 4.597007309080131e-05, "loss": 0.2828, "step": 2399000 }, { "epoch": 1.44, "learning_rate": 4.596797732517187e-05, "loss": 0.2843, "step": 2399500 }, { "epoch": 1.44, "learning_rate": 4.5965877359611304e-05, "loss": 0.2828, "step": 2400000 }, { "epoch": 1.44, "eval_loss": 0.25953343510627747, "eval_runtime": 1454.488, "eval_samples_per_second": 362.134, "eval_steps_per_second": 60.356, "step": 2400000 }, { "epoch": 1.44, "learning_rate": 4.596377739405074e-05, "loss": 0.2922, "step": 2400500 }, { "epoch": 1.44, "learning_rate": 4.596167742849018e-05, "loss": 0.2815, "step": 2401000 }, { "epoch": 1.44, "learning_rate": 4.5959577462929604e-05, "loss": 0.2913, "step": 2401500 }, { "epoch": 1.44, "learning_rate": 4.595747749736904e-05, "loss": 0.2798, "step": 2402000 }, { "epoch": 1.44, "learning_rate": 4.59553817317396e-05, "loss": 0.2844, "step": 2402500 }, { "epoch": 1.44, "learning_rate": 4.595328176617904e-05, "loss": 0.2889, "step": 2403000 }, { "epoch": 1.44, "learning_rate": 4.595118180061847e-05, "loss": 0.2885, "step": 2403500 }, { "epoch": 1.44, "learning_rate": 4.59490818350579e-05, "loss": 0.28, "step": 2404000 }, { "epoch": 1.44, "learning_rate": 4.594698186949734e-05, "loss": 0.2818, "step": 2404500 }, { "epoch": 1.44, "learning_rate": 4.594489030379902e-05, "loss": 0.2852, "step": 2405000 }, { "epoch": 1.44, "learning_rate": 4.594279033823845e-05, "loss": 0.2865, "step": 2405500 }, { "epoch": 1.44, "learning_rate": 4.5940690372677886e-05, "loss": 0.2898, "step": 2406000 }, { "epoch": 1.44, "learning_rate": 4.593859460704844e-05, "loss": 0.2842, "step": 2406500 }, { "epoch": 1.44, "learning_rate": 4.593649464148788e-05, "loss": 0.2899, "step": 2407000 }, { "epoch": 1.44, "learning_rate": 4.5934394675927313e-05, "loss": 0.2846, "step": 2407500 }, { "epoch": 1.44, "learning_rate": 4.593229471036675e-05, "loss": 0.2849, "step": 2408000 }, { "epoch": 1.44, "learning_rate": 4.593019474480619e-05, "loss": 0.2775, "step": 2408500 }, { "epoch": 1.44, "learning_rate": 4.592809477924562e-05, "loss": 0.2764, "step": 2409000 }, { "epoch": 1.44, "learning_rate": 4.5925994813685054e-05, "loss": 0.2892, "step": 2409500 }, { "epoch": 1.44, "learning_rate": 4.5923894848124494e-05, "loss": 0.2902, "step": 2410000 }, { "epoch": 1.45, "learning_rate": 4.592179488256393e-05, "loss": 0.2904, "step": 2410500 }, { "epoch": 1.45, "learning_rate": 4.5919694917003354e-05, "loss": 0.2801, "step": 2411000 }, { "epoch": 1.45, "learning_rate": 4.5917594951442795e-05, "loss": 0.283, "step": 2411500 }, { "epoch": 1.45, "learning_rate": 4.591549498588223e-05, "loss": 0.2839, "step": 2412000 }, { "epoch": 1.45, "learning_rate": 4.591339502032166e-05, "loss": 0.2875, "step": 2412500 }, { "epoch": 1.45, "learning_rate": 4.591129925469222e-05, "loss": 0.2838, "step": 2413000 }, { "epoch": 1.45, "learning_rate": 4.5909199289131655e-05, "loss": 0.2883, "step": 2413500 }, { "epoch": 1.45, "learning_rate": 4.590709932357109e-05, "loss": 0.2816, "step": 2414000 }, { "epoch": 1.45, "learning_rate": 4.590499935801053e-05, "loss": 0.2859, "step": 2414500 }, { "epoch": 1.45, "learning_rate": 4.590289939244996e-05, "loss": 0.2832, "step": 2415000 }, { "epoch": 1.45, "learning_rate": 4.5900799426889396e-05, "loss": 0.2823, "step": 2415500 }, { "epoch": 1.45, "learning_rate": 4.589870366125995e-05, "loss": 0.2868, "step": 2416000 }, { "epoch": 1.45, "learning_rate": 4.589660369569939e-05, "loss": 0.2816, "step": 2416500 }, { "epoch": 1.45, "learning_rate": 4.589450373013882e-05, "loss": 0.293, "step": 2417000 }, { "epoch": 1.45, "learning_rate": 4.589240376457826e-05, "loss": 0.2865, "step": 2417500 }, { "epoch": 1.45, "learning_rate": 4.58903037990177e-05, "loss": 0.2871, "step": 2418000 }, { "epoch": 1.45, "learning_rate": 4.588820803338825e-05, "loss": 0.2848, "step": 2418500 }, { "epoch": 1.45, "learning_rate": 4.5886108067827684e-05, "loss": 0.2826, "step": 2419000 }, { "epoch": 1.45, "learning_rate": 4.588400810226712e-05, "loss": 0.2858, "step": 2419500 }, { "epoch": 1.45, "learning_rate": 4.588190813670656e-05, "loss": 0.288, "step": 2420000 }, { "epoch": 1.45, "learning_rate": 4.587980817114599e-05, "loss": 0.2815, "step": 2420500 }, { "epoch": 1.45, "learning_rate": 4.5877712405516545e-05, "loss": 0.282, "step": 2421000 }, { "epoch": 1.45, "learning_rate": 4.5875612439955985e-05, "loss": 0.2764, "step": 2421500 }, { "epoch": 1.45, "learning_rate": 4.587351247439542e-05, "loss": 0.2842, "step": 2422000 }, { "epoch": 1.45, "learning_rate": 4.587141250883485e-05, "loss": 0.2829, "step": 2422500 }, { "epoch": 1.45, "learning_rate": 4.5869316743205406e-05, "loss": 0.2769, "step": 2423000 }, { "epoch": 1.45, "learning_rate": 4.5867220977575966e-05, "loss": 0.2825, "step": 2423500 }, { "epoch": 1.45, "learning_rate": 4.5865121012015406e-05, "loss": 0.2801, "step": 2424000 }, { "epoch": 1.45, "learning_rate": 4.586302104645484e-05, "loss": 0.2799, "step": 2424500 }, { "epoch": 1.45, "learning_rate": 4.586092108089427e-05, "loss": 0.2866, "step": 2425000 }, { "epoch": 1.45, "learning_rate": 4.5858821115333706e-05, "loss": 0.2829, "step": 2425500 }, { "epoch": 1.45, "learning_rate": 4.585672534970427e-05, "loss": 0.2851, "step": 2426000 }, { "epoch": 1.45, "learning_rate": 4.58546253841437e-05, "loss": 0.2798, "step": 2426500 }, { "epoch": 1.46, "learning_rate": 4.585252541858314e-05, "loss": 0.2853, "step": 2427000 }, { "epoch": 1.46, "learning_rate": 4.585042545302257e-05, "loss": 0.2847, "step": 2427500 }, { "epoch": 1.46, "learning_rate": 4.5848325487462e-05, "loss": 0.2927, "step": 2428000 }, { "epoch": 1.46, "learning_rate": 4.584622552190144e-05, "loss": 0.2825, "step": 2428500 }, { "epoch": 1.46, "learning_rate": 4.5844125556340874e-05, "loss": 0.2835, "step": 2429000 }, { "epoch": 1.46, "learning_rate": 4.584202559078031e-05, "loss": 0.2863, "step": 2429500 }, { "epoch": 1.46, "learning_rate": 4.583992982515086e-05, "loss": 0.2812, "step": 2430000 }, { "epoch": 1.46, "learning_rate": 4.58378298595903e-05, "loss": 0.284, "step": 2430500 }, { "epoch": 1.46, "learning_rate": 4.5835729894029735e-05, "loss": 0.2849, "step": 2431000 }, { "epoch": 1.46, "learning_rate": 4.583362992846917e-05, "loss": 0.2826, "step": 2431500 }, { "epoch": 1.46, "learning_rate": 4.583153416283973e-05, "loss": 0.2837, "step": 2432000 }, { "epoch": 1.46, "learning_rate": 4.582943419727916e-05, "loss": 0.2784, "step": 2432500 }, { "epoch": 1.46, "learning_rate": 4.5827334231718596e-05, "loss": 0.2904, "step": 2433000 }, { "epoch": 1.46, "learning_rate": 4.5825238466089156e-05, "loss": 0.2806, "step": 2433500 }, { "epoch": 1.46, "learning_rate": 4.5823138500528596e-05, "loss": 0.279, "step": 2434000 }, { "epoch": 1.46, "learning_rate": 4.582103853496803e-05, "loss": 0.2834, "step": 2434500 }, { "epoch": 1.46, "learning_rate": 4.5818938569407457e-05, "loss": 0.2829, "step": 2435000 }, { "epoch": 1.46, "learning_rate": 4.58168386038469e-05, "loss": 0.2803, "step": 2435500 }, { "epoch": 1.46, "learning_rate": 4.581473863828633e-05, "loss": 0.2809, "step": 2436000 }, { "epoch": 1.46, "learning_rate": 4.5812638672725764e-05, "loss": 0.2891, "step": 2436500 }, { "epoch": 1.46, "learning_rate": 4.5810538707165204e-05, "loss": 0.2895, "step": 2437000 }, { "epoch": 1.46, "learning_rate": 4.580844294153576e-05, "loss": 0.2821, "step": 2437500 }, { "epoch": 1.46, "learning_rate": 4.580634297597519e-05, "loss": 0.2811, "step": 2438000 }, { "epoch": 1.46, "learning_rate": 4.5804243010414624e-05, "loss": 0.2772, "step": 2438500 }, { "epoch": 1.46, "learning_rate": 4.5802143044854065e-05, "loss": 0.285, "step": 2439000 }, { "epoch": 1.46, "learning_rate": 4.58000430792935e-05, "loss": 0.2851, "step": 2439500 }, { "epoch": 1.46, "learning_rate": 4.579794731366405e-05, "loss": 0.2779, "step": 2440000 }, { "epoch": 1.46, "learning_rate": 4.579584734810349e-05, "loss": 0.289, "step": 2440500 }, { "epoch": 1.46, "learning_rate": 4.579375158247405e-05, "loss": 0.2884, "step": 2441000 }, { "epoch": 1.46, "learning_rate": 4.5791651616913486e-05, "loss": 0.2833, "step": 2441500 }, { "epoch": 1.46, "learning_rate": 4.578955165135291e-05, "loss": 0.2818, "step": 2442000 }, { "epoch": 1.46, "learning_rate": 4.578745168579235e-05, "loss": 0.2785, "step": 2442500 }, { "epoch": 1.46, "learning_rate": 4.578535592016291e-05, "loss": 0.2832, "step": 2443000 }, { "epoch": 1.46, "learning_rate": 4.5783255954602346e-05, "loss": 0.2808, "step": 2443500 }, { "epoch": 1.47, "learning_rate": 4.578115598904178e-05, "loss": 0.2777, "step": 2444000 }, { "epoch": 1.47, "learning_rate": 4.577905602348121e-05, "loss": 0.2805, "step": 2444500 }, { "epoch": 1.47, "learning_rate": 4.577695605792065e-05, "loss": 0.286, "step": 2445000 }, { "epoch": 1.47, "learning_rate": 4.577485609236008e-05, "loss": 0.2903, "step": 2445500 }, { "epoch": 1.47, "learning_rate": 4.577275612679952e-05, "loss": 0.2854, "step": 2446000 }, { "epoch": 1.47, "learning_rate": 4.5770656161238954e-05, "loss": 0.2834, "step": 2446500 }, { "epoch": 1.47, "learning_rate": 4.576856039560951e-05, "loss": 0.2814, "step": 2447000 }, { "epoch": 1.47, "learning_rate": 4.576646043004895e-05, "loss": 0.2853, "step": 2447500 }, { "epoch": 1.47, "learning_rate": 4.576436046448838e-05, "loss": 0.2834, "step": 2448000 }, { "epoch": 1.47, "learning_rate": 4.5762260498927815e-05, "loss": 0.2816, "step": 2448500 }, { "epoch": 1.47, "learning_rate": 4.5760160533367255e-05, "loss": 0.2821, "step": 2449000 }, { "epoch": 1.47, "learning_rate": 4.575806476773781e-05, "loss": 0.2827, "step": 2449500 }, { "epoch": 1.47, "learning_rate": 4.575596480217724e-05, "loss": 0.2868, "step": 2450000 }, { "epoch": 1.47, "learning_rate": 4.5753864836616675e-05, "loss": 0.2805, "step": 2450500 }, { "epoch": 1.47, "learning_rate": 4.5751764871056116e-05, "loss": 0.2806, "step": 2451000 }, { "epoch": 1.47, "learning_rate": 4.574966910542667e-05, "loss": 0.2866, "step": 2451500 }, { "epoch": 1.47, "learning_rate": 4.57475691398661e-05, "loss": 0.2918, "step": 2452000 }, { "epoch": 1.47, "learning_rate": 4.5745469174305536e-05, "loss": 0.2836, "step": 2452500 }, { "epoch": 1.47, "learning_rate": 4.5743369208744976e-05, "loss": 0.2846, "step": 2453000 }, { "epoch": 1.47, "learning_rate": 4.574126924318441e-05, "loss": 0.2804, "step": 2453500 }, { "epoch": 1.47, "learning_rate": 4.573916927762384e-05, "loss": 0.2867, "step": 2454000 }, { "epoch": 1.47, "learning_rate": 4.5737073511994404e-05, "loss": 0.2857, "step": 2454500 }, { "epoch": 1.47, "learning_rate": 4.573497354643384e-05, "loss": 0.282, "step": 2455000 }, { "epoch": 1.47, "learning_rate": 4.573287358087327e-05, "loss": 0.2884, "step": 2455500 }, { "epoch": 1.47, "learning_rate": 4.573077361531271e-05, "loss": 0.2871, "step": 2456000 }, { "epoch": 1.47, "learning_rate": 4.5728677849683264e-05, "loss": 0.2794, "step": 2456500 }, { "epoch": 1.47, "learning_rate": 4.57265778841227e-05, "loss": 0.2807, "step": 2457000 }, { "epoch": 1.47, "learning_rate": 4.572447791856213e-05, "loss": 0.2849, "step": 2457500 }, { "epoch": 1.47, "learning_rate": 4.572237795300157e-05, "loss": 0.2805, "step": 2458000 }, { "epoch": 1.47, "learning_rate": 4.5720277987441005e-05, "loss": 0.2881, "step": 2458500 }, { "epoch": 1.47, "learning_rate": 4.571817802188044e-05, "loss": 0.2846, "step": 2459000 }, { "epoch": 1.47, "learning_rate": 4.571608225625099e-05, "loss": 0.2854, "step": 2459500 }, { "epoch": 1.47, "learning_rate": 4.571398229069043e-05, "loss": 0.2847, "step": 2460000 }, { "epoch": 1.48, "learning_rate": 4.5711882325129866e-05, "loss": 0.2867, "step": 2460500 }, { "epoch": 1.48, "learning_rate": 4.57097823595693e-05, "loss": 0.2873, "step": 2461000 }, { "epoch": 1.48, "learning_rate": 4.570768659393986e-05, "loss": 0.286, "step": 2461500 }, { "epoch": 1.48, "learning_rate": 4.570558662837929e-05, "loss": 0.2843, "step": 2462000 }, { "epoch": 1.48, "learning_rate": 4.5703486662818726e-05, "loss": 0.2757, "step": 2462500 }, { "epoch": 1.48, "learning_rate": 4.570139089718929e-05, "loss": 0.2852, "step": 2463000 }, { "epoch": 1.48, "learning_rate": 4.569929093162872e-05, "loss": 0.2835, "step": 2463500 }, { "epoch": 1.48, "learning_rate": 4.5697190966068154e-05, "loss": 0.2897, "step": 2464000 }, { "epoch": 1.48, "learning_rate": 4.569509100050759e-05, "loss": 0.2866, "step": 2464500 }, { "epoch": 1.48, "learning_rate": 4.569299103494703e-05, "loss": 0.2799, "step": 2465000 }, { "epoch": 1.48, "learning_rate": 4.569089106938646e-05, "loss": 0.2839, "step": 2465500 }, { "epoch": 1.48, "learning_rate": 4.5688791103825894e-05, "loss": 0.2831, "step": 2466000 }, { "epoch": 1.48, "learning_rate": 4.5686691138265335e-05, "loss": 0.277, "step": 2466500 }, { "epoch": 1.48, "learning_rate": 4.568459957256701e-05, "loss": 0.2856, "step": 2467000 }, { "epoch": 1.48, "learning_rate": 4.568249960700645e-05, "loss": 0.2817, "step": 2467500 }, { "epoch": 1.48, "learning_rate": 4.5680399641445875e-05, "loss": 0.2848, "step": 2468000 }, { "epoch": 1.48, "learning_rate": 4.5678299675885315e-05, "loss": 0.2857, "step": 2468500 }, { "epoch": 1.48, "learning_rate": 4.567619971032475e-05, "loss": 0.2831, "step": 2469000 }, { "epoch": 1.48, "learning_rate": 4.567410394469531e-05, "loss": 0.2853, "step": 2469500 }, { "epoch": 1.48, "learning_rate": 4.567200397913474e-05, "loss": 0.2858, "step": 2470000 }, { "epoch": 1.48, "learning_rate": 4.5669904013574176e-05, "loss": 0.292, "step": 2470500 }, { "epoch": 1.48, "learning_rate": 4.566780404801361e-05, "loss": 0.2835, "step": 2471000 }, { "epoch": 1.48, "learning_rate": 4.566570408245304e-05, "loss": 0.2795, "step": 2471500 }, { "epoch": 1.48, "learning_rate": 4.566360411689248e-05, "loss": 0.2847, "step": 2472000 }, { "epoch": 1.48, "learning_rate": 4.566150415133192e-05, "loss": 0.2856, "step": 2472500 }, { "epoch": 1.48, "learning_rate": 4.565940418577135e-05, "loss": 0.2866, "step": 2473000 }, { "epoch": 1.48, "learning_rate": 4.5657308420141904e-05, "loss": 0.2905, "step": 2473500 }, { "epoch": 1.48, "learning_rate": 4.5655212654512464e-05, "loss": 0.2831, "step": 2474000 }, { "epoch": 1.48, "learning_rate": 4.5653112688951904e-05, "loss": 0.2891, "step": 2474500 }, { "epoch": 1.48, "learning_rate": 4.565101272339134e-05, "loss": 0.2785, "step": 2475000 }, { "epoch": 1.48, "learning_rate": 4.564891275783077e-05, "loss": 0.2834, "step": 2475500 }, { "epoch": 1.48, "learning_rate": 4.5646812792270205e-05, "loss": 0.2867, "step": 2476000 }, { "epoch": 1.48, "learning_rate": 4.564471282670964e-05, "loss": 0.2829, "step": 2476500 }, { "epoch": 1.49, "learning_rate": 4.56426170610802e-05, "loss": 0.2774, "step": 2477000 }, { "epoch": 1.49, "learning_rate": 4.564051709551963e-05, "loss": 0.284, "step": 2477500 }, { "epoch": 1.49, "learning_rate": 4.563842132989019e-05, "loss": 0.2803, "step": 2478000 }, { "epoch": 1.49, "learning_rate": 4.5636321364329626e-05, "loss": 0.2852, "step": 2478500 }, { "epoch": 1.49, "learning_rate": 4.563422139876906e-05, "loss": 0.279, "step": 2479000 }, { "epoch": 1.49, "learning_rate": 4.56321214332085e-05, "loss": 0.2849, "step": 2479500 }, { "epoch": 1.49, "learning_rate": 4.5630021467647926e-05, "loss": 0.2847, "step": 2480000 }, { "epoch": 1.49, "learning_rate": 4.5627925702018487e-05, "loss": 0.285, "step": 2480500 }, { "epoch": 1.49, "learning_rate": 4.562582573645792e-05, "loss": 0.2862, "step": 2481000 }, { "epoch": 1.49, "learning_rate": 4.562372577089736e-05, "loss": 0.2826, "step": 2481500 }, { "epoch": 1.49, "learning_rate": 4.5621625805336794e-05, "loss": 0.2759, "step": 2482000 }, { "epoch": 1.49, "learning_rate": 4.561953003970735e-05, "loss": 0.2824, "step": 2482500 }, { "epoch": 1.49, "learning_rate": 4.561743007414679e-05, "loss": 0.282, "step": 2483000 }, { "epoch": 1.49, "learning_rate": 4.561533010858622e-05, "loss": 0.2796, "step": 2483500 }, { "epoch": 1.49, "learning_rate": 4.5613230143025654e-05, "loss": 0.2882, "step": 2484000 }, { "epoch": 1.49, "learning_rate": 4.5611130177465095e-05, "loss": 0.2784, "step": 2484500 }, { "epoch": 1.49, "learning_rate": 4.560903021190452e-05, "loss": 0.285, "step": 2485000 }, { "epoch": 1.49, "learning_rate": 4.5606930246343955e-05, "loss": 0.283, "step": 2485500 }, { "epoch": 1.49, "learning_rate": 4.5604830280783395e-05, "loss": 0.2833, "step": 2486000 }, { "epoch": 1.49, "learning_rate": 4.560273031522283e-05, "loss": 0.2858, "step": 2486500 }, { "epoch": 1.49, "learning_rate": 4.560063034966226e-05, "loss": 0.287, "step": 2487000 }, { "epoch": 1.49, "learning_rate": 4.55985303841017e-05, "loss": 0.2856, "step": 2487500 }, { "epoch": 1.49, "learning_rate": 4.5596430418541136e-05, "loss": 0.2834, "step": 2488000 }, { "epoch": 1.49, "learning_rate": 4.559433045298057e-05, "loss": 0.2846, "step": 2488500 }, { "epoch": 1.49, "learning_rate": 4.559223048742001e-05, "loss": 0.2863, "step": 2489000 }, { "epoch": 1.49, "learning_rate": 4.559013052185944e-05, "loss": 0.2884, "step": 2489500 }, { "epoch": 1.49, "learning_rate": 4.5588030556298876e-05, "loss": 0.2796, "step": 2490000 }, { "epoch": 1.49, "learning_rate": 4.558593059073831e-05, "loss": 0.2848, "step": 2490500 }, { "epoch": 1.49, "learning_rate": 4.558383062517774e-05, "loss": 0.2774, "step": 2491000 }, { "epoch": 1.49, "learning_rate": 4.5581734859548304e-05, "loss": 0.2857, "step": 2491500 }, { "epoch": 1.49, "learning_rate": 4.5579634893987744e-05, "loss": 0.2827, "step": 2492000 }, { "epoch": 1.49, "learning_rate": 4.557753492842717e-05, "loss": 0.2894, "step": 2492500 }, { "epoch": 1.49, "learning_rate": 4.5575434962866604e-05, "loss": 0.284, "step": 2493000 }, { "epoch": 1.49, "learning_rate": 4.5573339197237164e-05, "loss": 0.2861, "step": 2493500 }, { "epoch": 1.5, "learning_rate": 4.5571239231676605e-05, "loss": 0.285, "step": 2494000 }, { "epoch": 1.5, "learning_rate": 4.556913926611604e-05, "loss": 0.2833, "step": 2494500 }, { "epoch": 1.5, "learning_rate": 4.5567039300555465e-05, "loss": 0.2791, "step": 2495000 }, { "epoch": 1.5, "learning_rate": 4.5564939334994905e-05, "loss": 0.2768, "step": 2495500 }, { "epoch": 1.5, "learning_rate": 4.5562843569365465e-05, "loss": 0.2814, "step": 2496000 }, { "epoch": 1.5, "learning_rate": 4.55607436038049e-05, "loss": 0.2862, "step": 2496500 }, { "epoch": 1.5, "learning_rate": 4.555864363824433e-05, "loss": 0.2858, "step": 2497000 }, { "epoch": 1.5, "learning_rate": 4.5556543672683766e-05, "loss": 0.2858, "step": 2497500 }, { "epoch": 1.5, "learning_rate": 4.55544437071232e-05, "loss": 0.2856, "step": 2498000 }, { "epoch": 1.5, "learning_rate": 4.555234374156264e-05, "loss": 0.2813, "step": 2498500 }, { "epoch": 1.5, "learning_rate": 4.55502479759332e-05, "loss": 0.2793, "step": 2499000 }, { "epoch": 1.5, "learning_rate": 4.554814801037263e-05, "loss": 0.285, "step": 2499500 }, { "epoch": 1.5, "learning_rate": 4.554604804481206e-05, "loss": 0.2819, "step": 2500000 }, { "epoch": 1.5, "eval_loss": 0.25834259390830994, "eval_runtime": 1455.0809, "eval_samples_per_second": 361.987, "eval_steps_per_second": 60.331, "step": 2500000 }, { "epoch": 1.5, "learning_rate": 4.55439480792515e-05, "loss": 0.2824, "step": 2500500 }, { "epoch": 1.5, "learning_rate": 4.554185231362206e-05, "loss": 0.2816, "step": 2501000 }, { "epoch": 1.5, "learning_rate": 4.5539752348061494e-05, "loss": 0.2804, "step": 2501500 }, { "epoch": 1.5, "learning_rate": 4.553765238250093e-05, "loss": 0.2838, "step": 2502000 }, { "epoch": 1.5, "learning_rate": 4.553555241694036e-05, "loss": 0.2893, "step": 2502500 }, { "epoch": 1.5, "learning_rate": 4.553345665131092e-05, "loss": 0.282, "step": 2503000 }, { "epoch": 1.5, "learning_rate": 4.5531356685750355e-05, "loss": 0.2839, "step": 2503500 }, { "epoch": 1.5, "learning_rate": 4.552925672018979e-05, "loss": 0.2764, "step": 2504000 }, { "epoch": 1.5, "learning_rate": 4.552715675462922e-05, "loss": 0.2795, "step": 2504500 }, { "epoch": 1.5, "learning_rate": 4.5525056789068655e-05, "loss": 0.2839, "step": 2505000 }, { "epoch": 1.5, "learning_rate": 4.5522965223370335e-05, "loss": 0.2822, "step": 2505500 }, { "epoch": 1.5, "learning_rate": 4.552086525780977e-05, "loss": 0.2784, "step": 2506000 }, { "epoch": 1.5, "learning_rate": 4.551876529224921e-05, "loss": 0.2866, "step": 2506500 }, { "epoch": 1.5, "learning_rate": 4.551666952661976e-05, "loss": 0.2866, "step": 2507000 }, { "epoch": 1.5, "learning_rate": 4.5514569561059196e-05, "loss": 0.2854, "step": 2507500 }, { "epoch": 1.5, "learning_rate": 4.551246959549863e-05, "loss": 0.2844, "step": 2508000 }, { "epoch": 1.5, "learning_rate": 4.551036962993807e-05, "loss": 0.2799, "step": 2508500 }, { "epoch": 1.5, "learning_rate": 4.55082696643775e-05, "loss": 0.2861, "step": 2509000 }, { "epoch": 1.5, "learning_rate": 4.550616969881694e-05, "loss": 0.2864, "step": 2509500 }, { "epoch": 1.5, "learning_rate": 4.550406973325638e-05, "loss": 0.2832, "step": 2510000 }, { "epoch": 1.51, "learning_rate": 4.550196976769581e-05, "loss": 0.2822, "step": 2510500 }, { "epoch": 1.51, "learning_rate": 4.5499869802135244e-05, "loss": 0.2877, "step": 2511000 }, { "epoch": 1.51, "learning_rate": 4.5497769836574684e-05, "loss": 0.2845, "step": 2511500 }, { "epoch": 1.51, "learning_rate": 4.549566987101411e-05, "loss": 0.2859, "step": 2512000 }, { "epoch": 1.51, "learning_rate": 4.549356990545355e-05, "loss": 0.2827, "step": 2512500 }, { "epoch": 1.51, "learning_rate": 4.549147413982411e-05, "loss": 0.2883, "step": 2513000 }, { "epoch": 1.51, "learning_rate": 4.5489374174263545e-05, "loss": 0.2761, "step": 2513500 }, { "epoch": 1.51, "learning_rate": 4.548727420870297e-05, "loss": 0.2802, "step": 2514000 }, { "epoch": 1.51, "learning_rate": 4.548517424314241e-05, "loss": 0.2886, "step": 2514500 }, { "epoch": 1.51, "learning_rate": 4.548307847751297e-05, "loss": 0.2815, "step": 2515000 }, { "epoch": 1.51, "learning_rate": 4.5480978511952406e-05, "loss": 0.2783, "step": 2515500 }, { "epoch": 1.51, "learning_rate": 4.547887854639184e-05, "loss": 0.2826, "step": 2516000 }, { "epoch": 1.51, "learning_rate": 4.547677858083127e-05, "loss": 0.2795, "step": 2516500 }, { "epoch": 1.51, "learning_rate": 4.5474678615270706e-05, "loss": 0.2779, "step": 2517000 }, { "epoch": 1.51, "learning_rate": 4.5472582849641266e-05, "loss": 0.2889, "step": 2517500 }, { "epoch": 1.51, "learning_rate": 4.54704828840807e-05, "loss": 0.2783, "step": 2518000 }, { "epoch": 1.51, "learning_rate": 4.546838291852014e-05, "loss": 0.2809, "step": 2518500 }, { "epoch": 1.51, "learning_rate": 4.546628295295957e-05, "loss": 0.2916, "step": 2519000 }, { "epoch": 1.51, "learning_rate": 4.546418718733013e-05, "loss": 0.2833, "step": 2519500 }, { "epoch": 1.51, "learning_rate": 4.546208722176957e-05, "loss": 0.2811, "step": 2520000 }, { "epoch": 1.51, "learning_rate": 4.5459987256209e-05, "loss": 0.2788, "step": 2520500 }, { "epoch": 1.51, "learning_rate": 4.5457891490579554e-05, "loss": 0.2837, "step": 2521000 }, { "epoch": 1.51, "learning_rate": 4.545579152501899e-05, "loss": 0.2814, "step": 2521500 }, { "epoch": 1.51, "learning_rate": 4.545369155945843e-05, "loss": 0.2819, "step": 2522000 }, { "epoch": 1.51, "learning_rate": 4.545159159389786e-05, "loss": 0.2854, "step": 2522500 }, { "epoch": 1.51, "learning_rate": 4.5449491628337295e-05, "loss": 0.2875, "step": 2523000 }, { "epoch": 1.51, "learning_rate": 4.544739586270785e-05, "loss": 0.2902, "step": 2523500 }, { "epoch": 1.51, "learning_rate": 4.544529589714729e-05, "loss": 0.2849, "step": 2524000 }, { "epoch": 1.51, "learning_rate": 4.544319593158672e-05, "loss": 0.282, "step": 2524500 }, { "epoch": 1.51, "learning_rate": 4.5441095966026156e-05, "loss": 0.28, "step": 2525000 }, { "epoch": 1.51, "learning_rate": 4.5438996000465596e-05, "loss": 0.2792, "step": 2525500 }, { "epoch": 1.51, "learning_rate": 4.543689603490502e-05, "loss": 0.2837, "step": 2526000 }, { "epoch": 1.51, "learning_rate": 4.543479606934446e-05, "loss": 0.2832, "step": 2526500 }, { "epoch": 1.52, "learning_rate": 4.5432696103783896e-05, "loss": 0.2818, "step": 2527000 }, { "epoch": 1.52, "learning_rate": 4.543059613822333e-05, "loss": 0.2817, "step": 2527500 }, { "epoch": 1.52, "learning_rate": 4.542850037259389e-05, "loss": 0.2833, "step": 2528000 }, { "epoch": 1.52, "learning_rate": 4.5426400407033324e-05, "loss": 0.2839, "step": 2528500 }, { "epoch": 1.52, "learning_rate": 4.5424304641403884e-05, "loss": 0.2792, "step": 2529000 }, { "epoch": 1.52, "learning_rate": 4.542220467584332e-05, "loss": 0.2787, "step": 2529500 }, { "epoch": 1.52, "learning_rate": 4.542010471028275e-05, "loss": 0.2809, "step": 2530000 }, { "epoch": 1.52, "learning_rate": 4.541800474472219e-05, "loss": 0.2808, "step": 2530500 }, { "epoch": 1.52, "learning_rate": 4.541590477916162e-05, "loss": 0.2854, "step": 2531000 }, { "epoch": 1.52, "learning_rate": 4.541380481360105e-05, "loss": 0.2863, "step": 2531500 }, { "epoch": 1.52, "learning_rate": 4.541170484804049e-05, "loss": 0.2796, "step": 2532000 }, { "epoch": 1.52, "learning_rate": 4.5409604882479925e-05, "loss": 0.281, "step": 2532500 }, { "epoch": 1.52, "learning_rate": 4.5407509116850485e-05, "loss": 0.2841, "step": 2533000 }, { "epoch": 1.52, "learning_rate": 4.540540915128992e-05, "loss": 0.283, "step": 2533500 }, { "epoch": 1.52, "learning_rate": 4.540330918572935e-05, "loss": 0.2792, "step": 2534000 }, { "epoch": 1.52, "learning_rate": 4.5401209220168786e-05, "loss": 0.2834, "step": 2534500 }, { "epoch": 1.52, "learning_rate": 4.5399109254608226e-05, "loss": 0.2824, "step": 2535000 }, { "epoch": 1.52, "learning_rate": 4.539701348897878e-05, "loss": 0.2873, "step": 2535500 }, { "epoch": 1.52, "learning_rate": 4.539491352341821e-05, "loss": 0.2801, "step": 2536000 }, { "epoch": 1.52, "learning_rate": 4.5392813557857646e-05, "loss": 0.2824, "step": 2536500 }, { "epoch": 1.52, "learning_rate": 4.5390713592297087e-05, "loss": 0.2801, "step": 2537000 }, { "epoch": 1.52, "learning_rate": 4.538861782666765e-05, "loss": 0.2799, "step": 2537500 }, { "epoch": 1.52, "learning_rate": 4.5386517861107074e-05, "loss": 0.2916, "step": 2538000 }, { "epoch": 1.52, "learning_rate": 4.538441789554651e-05, "loss": 0.2841, "step": 2538500 }, { "epoch": 1.52, "learning_rate": 4.538231792998595e-05, "loss": 0.2859, "step": 2539000 }, { "epoch": 1.52, "learning_rate": 4.538022216435651e-05, "loss": 0.2803, "step": 2539500 }, { "epoch": 1.52, "learning_rate": 4.537812219879594e-05, "loss": 0.2832, "step": 2540000 }, { "epoch": 1.52, "learning_rate": 4.5376022233235375e-05, "loss": 0.2852, "step": 2540500 }, { "epoch": 1.52, "learning_rate": 4.537392226767481e-05, "loss": 0.2808, "step": 2541000 }, { "epoch": 1.52, "learning_rate": 4.537182230211424e-05, "loss": 0.2844, "step": 2541500 }, { "epoch": 1.52, "learning_rate": 4.53697265364848e-05, "loss": 0.2819, "step": 2542000 }, { "epoch": 1.52, "learning_rate": 4.536762657092424e-05, "loss": 0.2808, "step": 2542500 }, { "epoch": 1.52, "learning_rate": 4.536552660536367e-05, "loss": 0.2913, "step": 2543000 }, { "epoch": 1.52, "learning_rate": 4.53634266398031e-05, "loss": 0.2793, "step": 2543500 }, { "epoch": 1.53, "learning_rate": 4.536133087417366e-05, "loss": 0.2878, "step": 2544000 }, { "epoch": 1.53, "learning_rate": 4.53592309086131e-05, "loss": 0.2845, "step": 2544500 }, { "epoch": 1.53, "learning_rate": 4.535713094305253e-05, "loss": 0.2821, "step": 2545000 }, { "epoch": 1.53, "learning_rate": 4.535503097749196e-05, "loss": 0.2812, "step": 2545500 }, { "epoch": 1.53, "learning_rate": 4.535293521186252e-05, "loss": 0.2862, "step": 2546000 }, { "epoch": 1.53, "learning_rate": 4.5350835246301964e-05, "loss": 0.2767, "step": 2546500 }, { "epoch": 1.53, "learning_rate": 4.53487352807414e-05, "loss": 0.2867, "step": 2547000 }, { "epoch": 1.53, "learning_rate": 4.534663531518083e-05, "loss": 0.2842, "step": 2547500 }, { "epoch": 1.53, "learning_rate": 4.5344535349620264e-05, "loss": 0.2861, "step": 2548000 }, { "epoch": 1.53, "learning_rate": 4.5342439583990824e-05, "loss": 0.283, "step": 2548500 }, { "epoch": 1.53, "learning_rate": 4.534033961843026e-05, "loss": 0.2832, "step": 2549000 }, { "epoch": 1.53, "learning_rate": 4.53382396528697e-05, "loss": 0.2953, "step": 2549500 }, { "epoch": 1.53, "learning_rate": 4.5336139687309125e-05, "loss": 0.28, "step": 2550000 }, { "epoch": 1.53, "learning_rate": 4.5334043921679685e-05, "loss": 0.284, "step": 2550500 }, { "epoch": 1.53, "learning_rate": 4.533194815605024e-05, "loss": 0.2846, "step": 2551000 }, { "epoch": 1.53, "learning_rate": 4.532984819048967e-05, "loss": 0.2892, "step": 2551500 }, { "epoch": 1.53, "learning_rate": 4.532774822492911e-05, "loss": 0.2874, "step": 2552000 }, { "epoch": 1.53, "learning_rate": 4.5325648259368546e-05, "loss": 0.2847, "step": 2552500 }, { "epoch": 1.53, "learning_rate": 4.532354829380798e-05, "loss": 0.2877, "step": 2553000 }, { "epoch": 1.53, "learning_rate": 4.532145252817854e-05, "loss": 0.2825, "step": 2553500 }, { "epoch": 1.53, "learning_rate": 4.531935256261797e-05, "loss": 0.2771, "step": 2554000 }, { "epoch": 1.53, "learning_rate": 4.5317252597057406e-05, "loss": 0.2841, "step": 2554500 }, { "epoch": 1.53, "learning_rate": 4.531515263149685e-05, "loss": 0.2859, "step": 2555000 }, { "epoch": 1.53, "learning_rate": 4.531305266593628e-05, "loss": 0.2873, "step": 2555500 }, { "epoch": 1.53, "learning_rate": 4.5310952700375714e-05, "loss": 0.2851, "step": 2556000 }, { "epoch": 1.53, "learning_rate": 4.5308852734815154e-05, "loss": 0.2793, "step": 2556500 }, { "epoch": 1.53, "learning_rate": 4.530675276925458e-05, "loss": 0.2806, "step": 2557000 }, { "epoch": 1.53, "learning_rate": 4.5304652803694014e-05, "loss": 0.281, "step": 2557500 }, { "epoch": 1.53, "learning_rate": 4.5302557038064574e-05, "loss": 0.2827, "step": 2558000 }, { "epoch": 1.53, "learning_rate": 4.5300457072504015e-05, "loss": 0.2797, "step": 2558500 }, { "epoch": 1.53, "learning_rate": 4.529835710694345e-05, "loss": 0.2804, "step": 2559000 }, { "epoch": 1.53, "learning_rate": 4.5296257141382875e-05, "loss": 0.288, "step": 2559500 }, { "epoch": 1.53, "learning_rate": 4.5294161375753435e-05, "loss": 0.2805, "step": 2560000 }, { "epoch": 1.54, "learning_rate": 4.5292061410192875e-05, "loss": 0.2851, "step": 2560500 }, { "epoch": 1.54, "learning_rate": 4.528996144463231e-05, "loss": 0.2781, "step": 2561000 }, { "epoch": 1.54, "learning_rate": 4.528786147907174e-05, "loss": 0.2751, "step": 2561500 }, { "epoch": 1.54, "learning_rate": 4.52857657134423e-05, "loss": 0.2913, "step": 2562000 }, { "epoch": 1.54, "learning_rate": 4.5283665747881736e-05, "loss": 0.2792, "step": 2562500 }, { "epoch": 1.54, "learning_rate": 4.528156578232117e-05, "loss": 0.2877, "step": 2563000 }, { "epoch": 1.54, "learning_rate": 4.527946581676061e-05, "loss": 0.2831, "step": 2563500 }, { "epoch": 1.54, "learning_rate": 4.527736585120004e-05, "loss": 0.2833, "step": 2564000 }, { "epoch": 1.54, "learning_rate": 4.52752700855706e-05, "loss": 0.2877, "step": 2564500 }, { "epoch": 1.54, "learning_rate": 4.527317012001003e-05, "loss": 0.2819, "step": 2565000 }, { "epoch": 1.54, "learning_rate": 4.5271074354380584e-05, "loss": 0.2841, "step": 2565500 }, { "epoch": 1.54, "learning_rate": 4.5268974388820024e-05, "loss": 0.2861, "step": 2566000 }, { "epoch": 1.54, "learning_rate": 4.526687442325946e-05, "loss": 0.2811, "step": 2566500 }, { "epoch": 1.54, "learning_rate": 4.526477445769889e-05, "loss": 0.2844, "step": 2567000 }, { "epoch": 1.54, "learning_rate": 4.526267449213833e-05, "loss": 0.2829, "step": 2567500 }, { "epoch": 1.54, "learning_rate": 4.5260578726508885e-05, "loss": 0.2843, "step": 2568000 }, { "epoch": 1.54, "learning_rate": 4.525847876094832e-05, "loss": 0.2853, "step": 2568500 }, { "epoch": 1.54, "learning_rate": 4.525637879538776e-05, "loss": 0.2842, "step": 2569000 }, { "epoch": 1.54, "learning_rate": 4.525427882982719e-05, "loss": 0.2821, "step": 2569500 }, { "epoch": 1.54, "learning_rate": 4.5252178864266625e-05, "loss": 0.2807, "step": 2570000 }, { "epoch": 1.54, "learning_rate": 4.5250078898706066e-05, "loss": 0.2811, "step": 2570500 }, { "epoch": 1.54, "learning_rate": 4.52479789331455e-05, "loss": 0.2804, "step": 2571000 }, { "epoch": 1.54, "learning_rate": 4.5245878967584926e-05, "loss": 0.2816, "step": 2571500 }, { "epoch": 1.54, "learning_rate": 4.5243783201955486e-05, "loss": 0.2822, "step": 2572000 }, { "epoch": 1.54, "learning_rate": 4.5241683236394926e-05, "loss": 0.2829, "step": 2572500 }, { "epoch": 1.54, "learning_rate": 4.523958327083436e-05, "loss": 0.2862, "step": 2573000 }, { "epoch": 1.54, "learning_rate": 4.523748330527379e-05, "loss": 0.2899, "step": 2573500 }, { "epoch": 1.54, "learning_rate": 4.523538753964435e-05, "loss": 0.2894, "step": 2574000 }, { "epoch": 1.54, "learning_rate": 4.523328757408379e-05, "loss": 0.2815, "step": 2574500 }, { "epoch": 1.54, "learning_rate": 4.523118760852322e-05, "loss": 0.2808, "step": 2575000 }, { "epoch": 1.54, "learning_rate": 4.5229091842893774e-05, "loss": 0.2777, "step": 2575500 }, { "epoch": 1.54, "learning_rate": 4.5226991877333214e-05, "loss": 0.2803, "step": 2576000 }, { "epoch": 1.54, "learning_rate": 4.522489191177265e-05, "loss": 0.2729, "step": 2576500 }, { "epoch": 1.55, "learning_rate": 4.522279194621208e-05, "loss": 0.2839, "step": 2577000 }, { "epoch": 1.55, "learning_rate": 4.522069198065152e-05, "loss": 0.2824, "step": 2577500 }, { "epoch": 1.55, "learning_rate": 4.5218592015090955e-05, "loss": 0.2779, "step": 2578000 }, { "epoch": 1.55, "learning_rate": 4.521649204953038e-05, "loss": 0.2794, "step": 2578500 }, { "epoch": 1.55, "learning_rate": 4.521439208396982e-05, "loss": 0.2786, "step": 2579000 }, { "epoch": 1.55, "learning_rate": 4.5212292118409255e-05, "loss": 0.2845, "step": 2579500 }, { "epoch": 1.55, "learning_rate": 4.521019215284869e-05, "loss": 0.2877, "step": 2580000 }, { "epoch": 1.55, "learning_rate": 4.520809218728813e-05, "loss": 0.2876, "step": 2580500 }, { "epoch": 1.55, "learning_rate": 4.520599222172756e-05, "loss": 0.2812, "step": 2581000 }, { "epoch": 1.55, "learning_rate": 4.5203896456098116e-05, "loss": 0.2804, "step": 2581500 }, { "epoch": 1.55, "learning_rate": 4.520179649053755e-05, "loss": 0.2818, "step": 2582000 }, { "epoch": 1.55, "learning_rate": 4.519969652497699e-05, "loss": 0.2755, "step": 2582500 }, { "epoch": 1.55, "learning_rate": 4.519759655941642e-05, "loss": 0.2856, "step": 2583000 }, { "epoch": 1.55, "learning_rate": 4.519550079378698e-05, "loss": 0.2825, "step": 2583500 }, { "epoch": 1.55, "learning_rate": 4.519340082822642e-05, "loss": 0.2837, "step": 2584000 }, { "epoch": 1.55, "learning_rate": 4.519130086266585e-05, "loss": 0.2862, "step": 2584500 }, { "epoch": 1.55, "learning_rate": 4.5189200897105284e-05, "loss": 0.2814, "step": 2585000 }, { "epoch": 1.55, "learning_rate": 4.5187100931544724e-05, "loss": 0.2862, "step": 2585500 }, { "epoch": 1.55, "learning_rate": 4.518500516591528e-05, "loss": 0.2818, "step": 2586000 }, { "epoch": 1.55, "learning_rate": 4.518290520035471e-05, "loss": 0.2834, "step": 2586500 }, { "epoch": 1.55, "learning_rate": 4.5180805234794145e-05, "loss": 0.2815, "step": 2587000 }, { "epoch": 1.55, "learning_rate": 4.5178705269233585e-05, "loss": 0.2779, "step": 2587500 }, { "epoch": 1.55, "learning_rate": 4.517660950360414e-05, "loss": 0.2843, "step": 2588000 }, { "epoch": 1.55, "learning_rate": 4.517450953804357e-05, "loss": 0.2852, "step": 2588500 }, { "epoch": 1.55, "learning_rate": 4.517240957248301e-05, "loss": 0.2798, "step": 2589000 }, { "epoch": 1.55, "learning_rate": 4.5170309606922446e-05, "loss": 0.2775, "step": 2589500 }, { "epoch": 1.55, "learning_rate": 4.5168213841293006e-05, "loss": 0.2822, "step": 2590000 }, { "epoch": 1.55, "learning_rate": 4.516611387573243e-05, "loss": 0.2728, "step": 2590500 }, { "epoch": 1.55, "learning_rate": 4.516401811010299e-05, "loss": 0.2775, "step": 2591000 }, { "epoch": 1.55, "learning_rate": 4.516191814454243e-05, "loss": 0.2808, "step": 2591500 }, { "epoch": 1.55, "learning_rate": 4.515981817898187e-05, "loss": 0.282, "step": 2592000 }, { "epoch": 1.55, "learning_rate": 4.51577182134213e-05, "loss": 0.2872, "step": 2592500 }, { "epoch": 1.55, "learning_rate": 4.5155618247860734e-05, "loss": 0.2821, "step": 2593000 }, { "epoch": 1.55, "learning_rate": 4.515351828230017e-05, "loss": 0.2843, "step": 2593500 }, { "epoch": 1.56, "learning_rate": 4.51514183167396e-05, "loss": 0.2846, "step": 2594000 }, { "epoch": 1.56, "learning_rate": 4.514931835117904e-05, "loss": 0.2881, "step": 2594500 }, { "epoch": 1.56, "learning_rate": 4.514722678548072e-05, "loss": 0.2817, "step": 2595000 }, { "epoch": 1.56, "learning_rate": 4.5145126819920155e-05, "loss": 0.2739, "step": 2595500 }, { "epoch": 1.56, "learning_rate": 4.514302685435959e-05, "loss": 0.279, "step": 2596000 }, { "epoch": 1.56, "learning_rate": 4.514092688879903e-05, "loss": 0.2853, "step": 2596500 }, { "epoch": 1.56, "learning_rate": 4.513882692323846e-05, "loss": 0.2845, "step": 2597000 }, { "epoch": 1.56, "learning_rate": 4.513672695767789e-05, "loss": 0.2819, "step": 2597500 }, { "epoch": 1.56, "learning_rate": 4.513462699211733e-05, "loss": 0.2827, "step": 2598000 }, { "epoch": 1.56, "learning_rate": 4.513252702655676e-05, "loss": 0.2797, "step": 2598500 }, { "epoch": 1.56, "learning_rate": 4.5130427060996196e-05, "loss": 0.2782, "step": 2599000 }, { "epoch": 1.56, "learning_rate": 4.5128327095435636e-05, "loss": 0.279, "step": 2599500 }, { "epoch": 1.56, "learning_rate": 4.512623132980619e-05, "loss": 0.2919, "step": 2600000 }, { "epoch": 1.56, "eval_loss": 0.2566536068916321, "eval_runtime": 1454.846, "eval_samples_per_second": 362.045, "eval_steps_per_second": 60.341, "step": 2600000 }, { "epoch": 1.56, "learning_rate": 4.512413136424562e-05, "loss": 0.2818, "step": 2600500 }, { "epoch": 1.56, "learning_rate": 4.5122031398685056e-05, "loss": 0.2757, "step": 2601000 }, { "epoch": 1.56, "learning_rate": 4.51199314331245e-05, "loss": 0.2833, "step": 2601500 }, { "epoch": 1.56, "learning_rate": 4.511783146756393e-05, "loss": 0.2843, "step": 2602000 }, { "epoch": 1.56, "learning_rate": 4.5115735701934484e-05, "loss": 0.2802, "step": 2602500 }, { "epoch": 1.56, "learning_rate": 4.5113639936305044e-05, "loss": 0.2799, "step": 2603000 }, { "epoch": 1.56, "learning_rate": 4.51115441706756e-05, "loss": 0.2837, "step": 2603500 }, { "epoch": 1.56, "learning_rate": 4.510944420511504e-05, "loss": 0.2857, "step": 2604000 }, { "epoch": 1.56, "learning_rate": 4.510734423955447e-05, "loss": 0.2738, "step": 2604500 }, { "epoch": 1.56, "learning_rate": 4.5105244273993905e-05, "loss": 0.2834, "step": 2605000 }, { "epoch": 1.56, "learning_rate": 4.5103144308433345e-05, "loss": 0.2826, "step": 2605500 }, { "epoch": 1.56, "learning_rate": 4.510104434287278e-05, "loss": 0.2805, "step": 2606000 }, { "epoch": 1.56, "learning_rate": 4.509894437731221e-05, "loss": 0.283, "step": 2606500 }, { "epoch": 1.56, "learning_rate": 4.5096844411751645e-05, "loss": 0.2861, "step": 2607000 }, { "epoch": 1.56, "learning_rate": 4.509474444619108e-05, "loss": 0.2827, "step": 2607500 }, { "epoch": 1.56, "learning_rate": 4.509264448063051e-05, "loss": 0.2751, "step": 2608000 }, { "epoch": 1.56, "learning_rate": 4.509054451506995e-05, "loss": 0.2777, "step": 2608500 }, { "epoch": 1.56, "learning_rate": 4.5088444549509386e-05, "loss": 0.2762, "step": 2609000 }, { "epoch": 1.56, "learning_rate": 4.508634458394882e-05, "loss": 0.2847, "step": 2609500 }, { "epoch": 1.56, "learning_rate": 4.50842530182505e-05, "loss": 0.2885, "step": 2610000 }, { "epoch": 1.57, "learning_rate": 4.508215305268994e-05, "loss": 0.2763, "step": 2610500 }, { "epoch": 1.57, "learning_rate": 4.5080053087129374e-05, "loss": 0.2848, "step": 2611000 }, { "epoch": 1.57, "learning_rate": 4.507795312156881e-05, "loss": 0.2858, "step": 2611500 }, { "epoch": 1.57, "learning_rate": 4.507585315600824e-05, "loss": 0.2845, "step": 2612000 }, { "epoch": 1.57, "learning_rate": 4.5073753190447674e-05, "loss": 0.281, "step": 2612500 }, { "epoch": 1.57, "learning_rate": 4.507165322488711e-05, "loss": 0.2792, "step": 2613000 }, { "epoch": 1.57, "learning_rate": 4.506955325932655e-05, "loss": 0.2839, "step": 2613500 }, { "epoch": 1.57, "learning_rate": 4.506745749369711e-05, "loss": 0.2879, "step": 2614000 }, { "epoch": 1.57, "learning_rate": 4.5065357528136535e-05, "loss": 0.2818, "step": 2614500 }, { "epoch": 1.57, "learning_rate": 4.506325756257597e-05, "loss": 0.2789, "step": 2615000 }, { "epoch": 1.57, "learning_rate": 4.506115759701541e-05, "loss": 0.2857, "step": 2615500 }, { "epoch": 1.57, "learning_rate": 4.505906183138597e-05, "loss": 0.2842, "step": 2616000 }, { "epoch": 1.57, "learning_rate": 4.5056961865825395e-05, "loss": 0.2778, "step": 2616500 }, { "epoch": 1.57, "learning_rate": 4.5054861900264836e-05, "loss": 0.2824, "step": 2617000 }, { "epoch": 1.57, "learning_rate": 4.505276193470427e-05, "loss": 0.2853, "step": 2617500 }, { "epoch": 1.57, "learning_rate": 4.50506619691437e-05, "loss": 0.2819, "step": 2618000 }, { "epoch": 1.57, "learning_rate": 4.504856620351426e-05, "loss": 0.2784, "step": 2618500 }, { "epoch": 1.57, "learning_rate": 4.5046466237953696e-05, "loss": 0.2816, "step": 2619000 }, { "epoch": 1.57, "learning_rate": 4.504437047232426e-05, "loss": 0.2865, "step": 2619500 }, { "epoch": 1.57, "learning_rate": 4.504227050676369e-05, "loss": 0.2799, "step": 2620000 }, { "epoch": 1.57, "learning_rate": 4.5040170541203124e-05, "loss": 0.2899, "step": 2620500 }, { "epoch": 1.57, "learning_rate": 4.5038070575642564e-05, "loss": 0.29, "step": 2621000 }, { "epoch": 1.57, "learning_rate": 4.503597061008199e-05, "loss": 0.2822, "step": 2621500 }, { "epoch": 1.57, "learning_rate": 4.5033870644521424e-05, "loss": 0.2847, "step": 2622000 }, { "epoch": 1.57, "learning_rate": 4.5031774878891984e-05, "loss": 0.2803, "step": 2622500 }, { "epoch": 1.57, "learning_rate": 4.5029674913331425e-05, "loss": 0.2813, "step": 2623000 }, { "epoch": 1.57, "learning_rate": 4.502757494777086e-05, "loss": 0.2921, "step": 2623500 }, { "epoch": 1.57, "learning_rate": 4.502547498221029e-05, "loss": 0.2786, "step": 2624000 }, { "epoch": 1.57, "learning_rate": 4.5023375016649725e-05, "loss": 0.2859, "step": 2624500 }, { "epoch": 1.57, "learning_rate": 4.502127505108916e-05, "loss": 0.2906, "step": 2625000 }, { "epoch": 1.57, "learning_rate": 4.50191750855286e-05, "loss": 0.2841, "step": 2625500 }, { "epoch": 1.57, "learning_rate": 4.501707511996803e-05, "loss": 0.283, "step": 2626000 }, { "epoch": 1.57, "learning_rate": 4.5014975154407466e-05, "loss": 0.2784, "step": 2626500 }, { "epoch": 1.57, "learning_rate": 4.501287938877802e-05, "loss": 0.287, "step": 2627000 }, { "epoch": 1.58, "learning_rate": 4.501077942321746e-05, "loss": 0.2844, "step": 2627500 }, { "epoch": 1.58, "learning_rate": 4.500867945765689e-05, "loss": 0.2833, "step": 2628000 }, { "epoch": 1.58, "learning_rate": 4.5006583692027446e-05, "loss": 0.2871, "step": 2628500 }, { "epoch": 1.58, "learning_rate": 4.500448372646688e-05, "loss": 0.2809, "step": 2629000 }, { "epoch": 1.58, "learning_rate": 4.500238376090632e-05, "loss": 0.2823, "step": 2629500 }, { "epoch": 1.58, "learning_rate": 4.5000283795345754e-05, "loss": 0.2788, "step": 2630000 }, { "epoch": 1.58, "learning_rate": 4.499818382978519e-05, "loss": 0.2816, "step": 2630500 }, { "epoch": 1.58, "learning_rate": 4.499608386422463e-05, "loss": 0.2857, "step": 2631000 }, { "epoch": 1.58, "learning_rate": 4.499398389866406e-05, "loss": 0.281, "step": 2631500 }, { "epoch": 1.58, "learning_rate": 4.4991883933103494e-05, "loss": 0.2769, "step": 2632000 }, { "epoch": 1.58, "learning_rate": 4.4989783967542934e-05, "loss": 0.2802, "step": 2632500 }, { "epoch": 1.58, "learning_rate": 4.498768820191349e-05, "loss": 0.2815, "step": 2633000 }, { "epoch": 1.58, "learning_rate": 4.498558823635292e-05, "loss": 0.2838, "step": 2633500 }, { "epoch": 1.58, "learning_rate": 4.498348827079236e-05, "loss": 0.2838, "step": 2634000 }, { "epoch": 1.58, "learning_rate": 4.4981388305231795e-05, "loss": 0.2785, "step": 2634500 }, { "epoch": 1.58, "learning_rate": 4.497929253960235e-05, "loss": 0.2816, "step": 2635000 }, { "epoch": 1.58, "learning_rate": 4.497719257404178e-05, "loss": 0.275, "step": 2635500 }, { "epoch": 1.58, "learning_rate": 4.497509260848122e-05, "loss": 0.2752, "step": 2636000 }, { "epoch": 1.58, "learning_rate": 4.4972992642920656e-05, "loss": 0.281, "step": 2636500 }, { "epoch": 1.58, "learning_rate": 4.497089687729121e-05, "loss": 0.2849, "step": 2637000 }, { "epoch": 1.58, "learning_rate": 4.496879691173064e-05, "loss": 0.2876, "step": 2637500 }, { "epoch": 1.58, "learning_rate": 4.496669694617008e-05, "loss": 0.2769, "step": 2638000 }, { "epoch": 1.58, "learning_rate": 4.496459698060952e-05, "loss": 0.2823, "step": 2638500 }, { "epoch": 1.58, "learning_rate": 4.496249701504895e-05, "loss": 0.2796, "step": 2639000 }, { "epoch": 1.58, "learning_rate": 4.496039704948839e-05, "loss": 0.2819, "step": 2639500 }, { "epoch": 1.58, "learning_rate": 4.4958297083927824e-05, "loss": 0.2766, "step": 2640000 }, { "epoch": 1.58, "learning_rate": 4.495619711836726e-05, "loss": 0.2762, "step": 2640500 }, { "epoch": 1.58, "learning_rate": 4.495410135273782e-05, "loss": 0.28, "step": 2641000 }, { "epoch": 1.58, "learning_rate": 4.495200138717725e-05, "loss": 0.2815, "step": 2641500 }, { "epoch": 1.58, "learning_rate": 4.4949901421616685e-05, "loss": 0.2771, "step": 2642000 }, { "epoch": 1.58, "learning_rate": 4.4947801456056125e-05, "loss": 0.287, "step": 2642500 }, { "epoch": 1.58, "learning_rate": 4.494570569042668e-05, "loss": 0.2828, "step": 2643000 }, { "epoch": 1.58, "learning_rate": 4.494360572486611e-05, "loss": 0.2893, "step": 2643500 }, { "epoch": 1.59, "learning_rate": 4.4941505759305545e-05, "loss": 0.2774, "step": 2644000 }, { "epoch": 1.59, "learning_rate": 4.4939405793744986e-05, "loss": 0.2769, "step": 2644500 }, { "epoch": 1.59, "learning_rate": 4.493730582818442e-05, "loss": 0.2754, "step": 2645000 }, { "epoch": 1.59, "learning_rate": 4.493521006255497e-05, "loss": 0.2783, "step": 2645500 }, { "epoch": 1.59, "learning_rate": 4.4933110096994406e-05, "loss": 0.2806, "step": 2646000 }, { "epoch": 1.59, "learning_rate": 4.4931010131433846e-05, "loss": 0.2879, "step": 2646500 }, { "epoch": 1.59, "learning_rate": 4.492891016587328e-05, "loss": 0.2866, "step": 2647000 }, { "epoch": 1.59, "learning_rate": 4.492681440024383e-05, "loss": 0.2735, "step": 2647500 }, { "epoch": 1.59, "learning_rate": 4.4924714434683274e-05, "loss": 0.284, "step": 2648000 }, { "epoch": 1.59, "learning_rate": 4.492261866905383e-05, "loss": 0.2761, "step": 2648500 }, { "epoch": 1.59, "learning_rate": 4.492051870349326e-05, "loss": 0.2821, "step": 2649000 }, { "epoch": 1.59, "learning_rate": 4.4918418737932694e-05, "loss": 0.2799, "step": 2649500 }, { "epoch": 1.59, "learning_rate": 4.4916318772372134e-05, "loss": 0.2838, "step": 2650000 }, { "epoch": 1.59, "learning_rate": 4.491421880681157e-05, "loss": 0.2741, "step": 2650500 }, { "epoch": 1.59, "learning_rate": 4.491212304118212e-05, "loss": 0.28, "step": 2651000 }, { "epoch": 1.59, "learning_rate": 4.4910023075621555e-05, "loss": 0.2777, "step": 2651500 }, { "epoch": 1.59, "learning_rate": 4.4907923110060995e-05, "loss": 0.28, "step": 2652000 }, { "epoch": 1.59, "learning_rate": 4.490582314450043e-05, "loss": 0.2861, "step": 2652500 }, { "epoch": 1.59, "learning_rate": 4.490372317893986e-05, "loss": 0.2814, "step": 2653000 }, { "epoch": 1.59, "learning_rate": 4.49016232133793e-05, "loss": 0.2834, "step": 2653500 }, { "epoch": 1.59, "learning_rate": 4.4899523247818736e-05, "loss": 0.2841, "step": 2654000 }, { "epoch": 1.59, "learning_rate": 4.489742748218929e-05, "loss": 0.289, "step": 2654500 }, { "epoch": 1.59, "learning_rate": 4.489532751662873e-05, "loss": 0.283, "step": 2655000 }, { "epoch": 1.59, "learning_rate": 4.489322755106816e-05, "loss": 0.2783, "step": 2655500 }, { "epoch": 1.59, "learning_rate": 4.4891127585507596e-05, "loss": 0.2827, "step": 2656000 }, { "epoch": 1.59, "learning_rate": 4.488903181987815e-05, "loss": 0.2794, "step": 2656500 }, { "epoch": 1.59, "learning_rate": 4.488693185431759e-05, "loss": 0.2789, "step": 2657000 }, { "epoch": 1.59, "learning_rate": 4.4884831888757024e-05, "loss": 0.2789, "step": 2657500 }, { "epoch": 1.59, "learning_rate": 4.488273192319646e-05, "loss": 0.2796, "step": 2658000 }, { "epoch": 1.59, "learning_rate": 4.48806319576359e-05, "loss": 0.284, "step": 2658500 }, { "epoch": 1.59, "learning_rate": 4.487853199207533e-05, "loss": 0.2816, "step": 2659000 }, { "epoch": 1.59, "learning_rate": 4.4876432026514764e-05, "loss": 0.2812, "step": 2659500 }, { "epoch": 1.59, "learning_rate": 4.4874332060954204e-05, "loss": 0.2778, "step": 2660000 }, { "epoch": 1.6, "learning_rate": 4.487223629532476e-05, "loss": 0.2851, "step": 2660500 }, { "epoch": 1.6, "learning_rate": 4.487013632976419e-05, "loss": 0.2798, "step": 2661000 }, { "epoch": 1.6, "learning_rate": 4.486803636420363e-05, "loss": 0.2797, "step": 2661500 }, { "epoch": 1.6, "learning_rate": 4.4865936398643065e-05, "loss": 0.2801, "step": 2662000 }, { "epoch": 1.6, "learning_rate": 4.486383643308249e-05, "loss": 0.2837, "step": 2662500 }, { "epoch": 1.6, "learning_rate": 4.486173646752193e-05, "loss": 0.2776, "step": 2663000 }, { "epoch": 1.6, "learning_rate": 4.485964070189249e-05, "loss": 0.2807, "step": 2663500 }, { "epoch": 1.6, "learning_rate": 4.4857540736331926e-05, "loss": 0.2858, "step": 2664000 }, { "epoch": 1.6, "learning_rate": 4.485544077077136e-05, "loss": 0.28, "step": 2664500 }, { "epoch": 1.6, "learning_rate": 4.485334500514191e-05, "loss": 0.2843, "step": 2665000 }, { "epoch": 1.6, "learning_rate": 4.485124503958135e-05, "loss": 0.2775, "step": 2665500 }, { "epoch": 1.6, "learning_rate": 4.4849145074020787e-05, "loss": 0.2815, "step": 2666000 }, { "epoch": 1.6, "learning_rate": 4.484704510846022e-05, "loss": 0.2803, "step": 2666500 }, { "epoch": 1.6, "learning_rate": 4.484494514289966e-05, "loss": 0.2819, "step": 2667000 }, { "epoch": 1.6, "learning_rate": 4.484284517733909e-05, "loss": 0.2831, "step": 2667500 }, { "epoch": 1.6, "learning_rate": 4.484074521177853e-05, "loss": 0.28, "step": 2668000 }, { "epoch": 1.6, "learning_rate": 4.483864524621796e-05, "loss": 0.2778, "step": 2668500 }, { "epoch": 1.6, "learning_rate": 4.4836545280657394e-05, "loss": 0.2833, "step": 2669000 }, { "epoch": 1.6, "learning_rate": 4.4834449515027954e-05, "loss": 0.2825, "step": 2669500 }, { "epoch": 1.6, "learning_rate": 4.483234954946739e-05, "loss": 0.2806, "step": 2670000 }, { "epoch": 1.6, "learning_rate": 4.483024958390682e-05, "loss": 0.275, "step": 2670500 }, { "epoch": 1.6, "learning_rate": 4.4828149618346255e-05, "loss": 0.2844, "step": 2671000 }, { "epoch": 1.6, "learning_rate": 4.4826053852716815e-05, "loss": 0.2881, "step": 2671500 }, { "epoch": 1.6, "learning_rate": 4.4823953887156255e-05, "loss": 0.277, "step": 2672000 }, { "epoch": 1.6, "learning_rate": 4.482185392159568e-05, "loss": 0.2823, "step": 2672500 }, { "epoch": 1.6, "learning_rate": 4.4819753956035116e-05, "loss": 0.2789, "step": 2673000 }, { "epoch": 1.6, "learning_rate": 4.4817658190405676e-05, "loss": 0.282, "step": 2673500 }, { "epoch": 1.6, "learning_rate": 4.4815558224845116e-05, "loss": 0.2866, "step": 2674000 }, { "epoch": 1.6, "learning_rate": 4.481345825928454e-05, "loss": 0.2857, "step": 2674500 }, { "epoch": 1.6, "learning_rate": 4.481135829372398e-05, "loss": 0.2773, "step": 2675000 }, { "epoch": 1.6, "learning_rate": 4.4809258328163417e-05, "loss": 0.2822, "step": 2675500 }, { "epoch": 1.6, "learning_rate": 4.480716256253398e-05, "loss": 0.2883, "step": 2676000 }, { "epoch": 1.6, "learning_rate": 4.480506259697341e-05, "loss": 0.2763, "step": 2676500 }, { "epoch": 1.6, "learning_rate": 4.4802962631412844e-05, "loss": 0.2789, "step": 2677000 }, { "epoch": 1.61, "learning_rate": 4.4800866865783404e-05, "loss": 0.2846, "step": 2677500 }, { "epoch": 1.61, "learning_rate": 4.479876690022284e-05, "loss": 0.2809, "step": 2678000 }, { "epoch": 1.61, "learning_rate": 4.479666693466227e-05, "loss": 0.2837, "step": 2678500 }, { "epoch": 1.61, "learning_rate": 4.479456696910171e-05, "loss": 0.2787, "step": 2679000 }, { "epoch": 1.61, "learning_rate": 4.479246700354114e-05, "loss": 0.2794, "step": 2679500 }, { "epoch": 1.61, "learning_rate": 4.479036703798057e-05, "loss": 0.2781, "step": 2680000 }, { "epoch": 1.61, "learning_rate": 4.478826707242001e-05, "loss": 0.2827, "step": 2680500 }, { "epoch": 1.61, "learning_rate": 4.4786167106859445e-05, "loss": 0.2812, "step": 2681000 }, { "epoch": 1.61, "learning_rate": 4.478406714129888e-05, "loss": 0.2795, "step": 2681500 }, { "epoch": 1.61, "learning_rate": 4.478196717573832e-05, "loss": 0.2772, "step": 2682000 }, { "epoch": 1.61, "learning_rate": 4.477986721017775e-05, "loss": 0.2776, "step": 2682500 }, { "epoch": 1.61, "learning_rate": 4.4777767244617186e-05, "loss": 0.2845, "step": 2683000 }, { "epoch": 1.61, "learning_rate": 4.4775671478987746e-05, "loss": 0.2793, "step": 2683500 }, { "epoch": 1.61, "learning_rate": 4.477357151342718e-05, "loss": 0.2808, "step": 2684000 }, { "epoch": 1.61, "learning_rate": 4.477147154786661e-05, "loss": 0.2815, "step": 2684500 }, { "epoch": 1.61, "learning_rate": 4.476937158230605e-05, "loss": 0.2739, "step": 2685000 }, { "epoch": 1.61, "learning_rate": 4.476727161674549e-05, "loss": 0.2773, "step": 2685500 }, { "epoch": 1.61, "learning_rate": 4.476518005104717e-05, "loss": 0.2849, "step": 2686000 }, { "epoch": 1.61, "learning_rate": 4.4763080085486594e-05, "loss": 0.2795, "step": 2686500 }, { "epoch": 1.61, "learning_rate": 4.476098011992603e-05, "loss": 0.2808, "step": 2687000 }, { "epoch": 1.61, "learning_rate": 4.475888015436547e-05, "loss": 0.272, "step": 2687500 }, { "epoch": 1.61, "learning_rate": 4.47567801888049e-05, "loss": 0.2848, "step": 2688000 }, { "epoch": 1.61, "learning_rate": 4.4754680223244335e-05, "loss": 0.2835, "step": 2688500 }, { "epoch": 1.61, "learning_rate": 4.4752580257683775e-05, "loss": 0.2842, "step": 2689000 }, { "epoch": 1.61, "learning_rate": 4.475048029212321e-05, "loss": 0.2841, "step": 2689500 }, { "epoch": 1.61, "learning_rate": 4.474838452649376e-05, "loss": 0.2817, "step": 2690000 }, { "epoch": 1.61, "learning_rate": 4.47462845609332e-05, "loss": 0.2738, "step": 2690500 }, { "epoch": 1.61, "learning_rate": 4.4744184595372635e-05, "loss": 0.2826, "step": 2691000 }, { "epoch": 1.61, "learning_rate": 4.474208462981207e-05, "loss": 0.2798, "step": 2691500 }, { "epoch": 1.61, "learning_rate": 4.473998466425151e-05, "loss": 0.2869, "step": 2692000 }, { "epoch": 1.61, "learning_rate": 4.473788469869094e-05, "loss": 0.2848, "step": 2692500 }, { "epoch": 1.61, "learning_rate": 4.4735784733130376e-05, "loss": 0.2795, "step": 2693000 }, { "epoch": 1.61, "learning_rate": 4.473368896750093e-05, "loss": 0.2799, "step": 2693500 }, { "epoch": 1.62, "learning_rate": 4.473158900194037e-05, "loss": 0.2794, "step": 2694000 }, { "epoch": 1.62, "learning_rate": 4.47294890363798e-05, "loss": 0.2847, "step": 2694500 }, { "epoch": 1.62, "learning_rate": 4.472738907081924e-05, "loss": 0.2831, "step": 2695000 }, { "epoch": 1.62, "learning_rate": 4.472529330518979e-05, "loss": 0.2798, "step": 2695500 }, { "epoch": 1.62, "learning_rate": 4.472319333962923e-05, "loss": 0.2859, "step": 2696000 }, { "epoch": 1.62, "learning_rate": 4.4721093374068664e-05, "loss": 0.2812, "step": 2696500 }, { "epoch": 1.62, "learning_rate": 4.47189934085081e-05, "loss": 0.274, "step": 2697000 }, { "epoch": 1.62, "learning_rate": 4.471689764287866e-05, "loss": 0.2826, "step": 2697500 }, { "epoch": 1.62, "learning_rate": 4.471479767731809e-05, "loss": 0.2809, "step": 2698000 }, { "epoch": 1.62, "learning_rate": 4.4712697711757525e-05, "loss": 0.2798, "step": 2698500 }, { "epoch": 1.62, "learning_rate": 4.4710597746196965e-05, "loss": 0.2786, "step": 2699000 }, { "epoch": 1.62, "learning_rate": 4.470850198056752e-05, "loss": 0.2875, "step": 2699500 }, { "epoch": 1.62, "learning_rate": 4.470640201500695e-05, "loss": 0.283, "step": 2700000 }, { "epoch": 1.62, "eval_loss": 0.256526380777359, "eval_runtime": 1451.6039, "eval_samples_per_second": 362.854, "eval_steps_per_second": 60.476, "step": 2700000 }, { "epoch": 1.62, "learning_rate": 4.4704302049446386e-05, "loss": 0.2824, "step": 2700500 }, { "epoch": 1.62, "learning_rate": 4.4702202083885826e-05, "loss": 0.2799, "step": 2701000 }, { "epoch": 1.62, "learning_rate": 4.470010211832526e-05, "loss": 0.2754, "step": 2701500 }, { "epoch": 1.62, "learning_rate": 4.469800635269581e-05, "loss": 0.2795, "step": 2702000 }, { "epoch": 1.62, "learning_rate": 4.4695906387135246e-05, "loss": 0.284, "step": 2702500 }, { "epoch": 1.62, "learning_rate": 4.4693806421574686e-05, "loss": 0.2861, "step": 2703000 }, { "epoch": 1.62, "learning_rate": 4.469170645601412e-05, "loss": 0.2786, "step": 2703500 }, { "epoch": 1.62, "learning_rate": 4.4689606490453553e-05, "loss": 0.2771, "step": 2704000 }, { "epoch": 1.62, "learning_rate": 4.4687506524892994e-05, "loss": 0.2736, "step": 2704500 }, { "epoch": 1.62, "learning_rate": 4.468541075926355e-05, "loss": 0.2764, "step": 2705000 }, { "epoch": 1.62, "learning_rate": 4.468331079370298e-05, "loss": 0.2805, "step": 2705500 }, { "epoch": 1.62, "learning_rate": 4.468121082814242e-05, "loss": 0.2772, "step": 2706000 }, { "epoch": 1.62, "learning_rate": 4.4679110862581854e-05, "loss": 0.2841, "step": 2706500 }, { "epoch": 1.62, "learning_rate": 4.467701089702129e-05, "loss": 0.2868, "step": 2707000 }, { "epoch": 1.62, "learning_rate": 4.467491093146073e-05, "loss": 0.2871, "step": 2707500 }, { "epoch": 1.62, "learning_rate": 4.467281516583128e-05, "loss": 0.2875, "step": 2708000 }, { "epoch": 1.62, "learning_rate": 4.4670715200270715e-05, "loss": 0.2704, "step": 2708500 }, { "epoch": 1.62, "learning_rate": 4.466861523471015e-05, "loss": 0.2794, "step": 2709000 }, { "epoch": 1.62, "learning_rate": 4.466651526914959e-05, "loss": 0.2812, "step": 2709500 }, { "epoch": 1.62, "learning_rate": 4.466441950352014e-05, "loss": 0.2796, "step": 2710000 }, { "epoch": 1.63, "learning_rate": 4.4662319537959576e-05, "loss": 0.2798, "step": 2710500 }, { "epoch": 1.63, "learning_rate": 4.466021957239901e-05, "loss": 0.2783, "step": 2711000 }, { "epoch": 1.63, "learning_rate": 4.465811960683845e-05, "loss": 0.2778, "step": 2711500 }, { "epoch": 1.63, "learning_rate": 4.465601964127788e-05, "loss": 0.2819, "step": 2712000 }, { "epoch": 1.63, "learning_rate": 4.4653923875648437e-05, "loss": 0.2797, "step": 2712500 }, { "epoch": 1.63, "learning_rate": 4.465182391008788e-05, "loss": 0.2787, "step": 2713000 }, { "epoch": 1.63, "learning_rate": 4.464972394452731e-05, "loss": 0.2802, "step": 2713500 }, { "epoch": 1.63, "learning_rate": 4.4647623978966744e-05, "loss": 0.282, "step": 2714000 }, { "epoch": 1.63, "learning_rate": 4.46455282133373e-05, "loss": 0.2801, "step": 2714500 }, { "epoch": 1.63, "learning_rate": 4.464342824777674e-05, "loss": 0.275, "step": 2715000 }, { "epoch": 1.63, "learning_rate": 4.464132828221617e-05, "loss": 0.2815, "step": 2715500 }, { "epoch": 1.63, "learning_rate": 4.4639228316655604e-05, "loss": 0.2852, "step": 2716000 }, { "epoch": 1.63, "learning_rate": 4.463713255102616e-05, "loss": 0.2869, "step": 2716500 }, { "epoch": 1.63, "learning_rate": 4.46350325854656e-05, "loss": 0.2859, "step": 2717000 }, { "epoch": 1.63, "learning_rate": 4.463293681983615e-05, "loss": 0.2783, "step": 2717500 }, { "epoch": 1.63, "learning_rate": 4.4630836854275585e-05, "loss": 0.288, "step": 2718000 }, { "epoch": 1.63, "learning_rate": 4.4628736888715026e-05, "loss": 0.282, "step": 2718500 }, { "epoch": 1.63, "learning_rate": 4.462663692315446e-05, "loss": 0.286, "step": 2719000 }, { "epoch": 1.63, "learning_rate": 4.462453695759389e-05, "loss": 0.2753, "step": 2719500 }, { "epoch": 1.63, "learning_rate": 4.462243699203333e-05, "loss": 0.2816, "step": 2720000 }, { "epoch": 1.63, "learning_rate": 4.4620341226403886e-05, "loss": 0.2807, "step": 2720500 }, { "epoch": 1.63, "learning_rate": 4.461824126084332e-05, "loss": 0.2771, "step": 2721000 }, { "epoch": 1.63, "learning_rate": 4.461614129528275e-05, "loss": 0.2742, "step": 2721500 }, { "epoch": 1.63, "learning_rate": 4.4614041329722193e-05, "loss": 0.2821, "step": 2722000 }, { "epoch": 1.63, "learning_rate": 4.461194136416163e-05, "loss": 0.2818, "step": 2722500 }, { "epoch": 1.63, "learning_rate": 4.460984139860106e-05, "loss": 0.2748, "step": 2723000 }, { "epoch": 1.63, "learning_rate": 4.46077414330405e-05, "loss": 0.2762, "step": 2723500 }, { "epoch": 1.63, "learning_rate": 4.4605641467479934e-05, "loss": 0.2761, "step": 2724000 }, { "epoch": 1.63, "learning_rate": 4.460354570185049e-05, "loss": 0.2792, "step": 2724500 }, { "epoch": 1.63, "learning_rate": 4.460144573628992e-05, "loss": 0.2841, "step": 2725000 }, { "epoch": 1.63, "learning_rate": 4.459934577072936e-05, "loss": 0.2839, "step": 2725500 }, { "epoch": 1.63, "learning_rate": 4.4597245805168795e-05, "loss": 0.2827, "step": 2726000 }, { "epoch": 1.63, "learning_rate": 4.4595145839608235e-05, "loss": 0.2786, "step": 2726500 }, { "epoch": 1.63, "learning_rate": 4.459305007397879e-05, "loss": 0.2869, "step": 2727000 }, { "epoch": 1.64, "learning_rate": 4.459095010841822e-05, "loss": 0.2733, "step": 2727500 }, { "epoch": 1.64, "learning_rate": 4.4588854342788776e-05, "loss": 0.2829, "step": 2728000 }, { "epoch": 1.64, "learning_rate": 4.458675437722821e-05, "loss": 0.2831, "step": 2728500 }, { "epoch": 1.64, "learning_rate": 4.458465441166765e-05, "loss": 0.2757, "step": 2729000 }, { "epoch": 1.64, "learning_rate": 4.458255444610708e-05, "loss": 0.2812, "step": 2729500 }, { "epoch": 1.64, "learning_rate": 4.4580454480546516e-05, "loss": 0.2837, "step": 2730000 }, { "epoch": 1.64, "learning_rate": 4.4578354514985956e-05, "loss": 0.2822, "step": 2730500 }, { "epoch": 1.64, "learning_rate": 4.457625454942539e-05, "loss": 0.2762, "step": 2731000 }, { "epoch": 1.64, "learning_rate": 4.457415458386482e-05, "loss": 0.2826, "step": 2731500 }, { "epoch": 1.64, "learning_rate": 4.4572054618304264e-05, "loss": 0.2792, "step": 2732000 }, { "epoch": 1.64, "learning_rate": 4.456996305260594e-05, "loss": 0.2799, "step": 2732500 }, { "epoch": 1.64, "learning_rate": 4.456786308704537e-05, "loss": 0.281, "step": 2733000 }, { "epoch": 1.64, "learning_rate": 4.4565763121484804e-05, "loss": 0.2732, "step": 2733500 }, { "epoch": 1.64, "learning_rate": 4.4563663155924244e-05, "loss": 0.2802, "step": 2734000 }, { "epoch": 1.64, "learning_rate": 4.456156319036368e-05, "loss": 0.2802, "step": 2734500 }, { "epoch": 1.64, "learning_rate": 4.455946742473423e-05, "loss": 0.2843, "step": 2735000 }, { "epoch": 1.64, "learning_rate": 4.4557367459173665e-05, "loss": 0.2826, "step": 2735500 }, { "epoch": 1.64, "learning_rate": 4.4555267493613105e-05, "loss": 0.2855, "step": 2736000 }, { "epoch": 1.64, "learning_rate": 4.455316752805254e-05, "loss": 0.2861, "step": 2736500 }, { "epoch": 1.64, "learning_rate": 4.455106756249197e-05, "loss": 0.2782, "step": 2737000 }, { "epoch": 1.64, "learning_rate": 4.4548971796862526e-05, "loss": 0.2839, "step": 2737500 }, { "epoch": 1.64, "learning_rate": 4.4546871831301966e-05, "loss": 0.2778, "step": 2738000 }, { "epoch": 1.64, "learning_rate": 4.45447718657414e-05, "loss": 0.2818, "step": 2738500 }, { "epoch": 1.64, "learning_rate": 4.454267190018084e-05, "loss": 0.2782, "step": 2739000 }, { "epoch": 1.64, "learning_rate": 4.454057193462027e-05, "loss": 0.2853, "step": 2739500 }, { "epoch": 1.64, "learning_rate": 4.4538471969059706e-05, "loss": 0.2768, "step": 2740000 }, { "epoch": 1.64, "learning_rate": 4.453637200349915e-05, "loss": 0.2799, "step": 2740500 }, { "epoch": 1.64, "learning_rate": 4.453427203793858e-05, "loss": 0.2854, "step": 2741000 }, { "epoch": 1.64, "learning_rate": 4.4532172072378014e-05, "loss": 0.2767, "step": 2741500 }, { "epoch": 1.64, "learning_rate": 4.453007210681745e-05, "loss": 0.2795, "step": 2742000 }, { "epoch": 1.64, "learning_rate": 4.452797634118801e-05, "loss": 0.2804, "step": 2742500 }, { "epoch": 1.64, "learning_rate": 4.452587637562744e-05, "loss": 0.2898, "step": 2743000 }, { "epoch": 1.64, "learning_rate": 4.4523776410066874e-05, "loss": 0.2859, "step": 2743500 }, { "epoch": 1.65, "learning_rate": 4.4521676444506315e-05, "loss": 0.2771, "step": 2744000 }, { "epoch": 1.65, "learning_rate": 4.451958067887687e-05, "loss": 0.2857, "step": 2744500 }, { "epoch": 1.65, "learning_rate": 4.45174807133163e-05, "loss": 0.2753, "step": 2745000 }, { "epoch": 1.65, "learning_rate": 4.4515380747755735e-05, "loss": 0.2741, "step": 2745500 }, { "epoch": 1.65, "learning_rate": 4.4513284982126295e-05, "loss": 0.2789, "step": 2746000 }, { "epoch": 1.65, "learning_rate": 4.451118501656573e-05, "loss": 0.2886, "step": 2746500 }, { "epoch": 1.65, "learning_rate": 4.450908505100516e-05, "loss": 0.2838, "step": 2747000 }, { "epoch": 1.65, "learning_rate": 4.45069850854446e-05, "loss": 0.2789, "step": 2747500 }, { "epoch": 1.65, "learning_rate": 4.4504885119884036e-05, "loss": 0.2788, "step": 2748000 }, { "epoch": 1.65, "learning_rate": 4.450278515432347e-05, "loss": 0.2783, "step": 2748500 }, { "epoch": 1.65, "learning_rate": 4.450068518876291e-05, "loss": 0.2854, "step": 2749000 }, { "epoch": 1.65, "learning_rate": 4.4498585223202336e-05, "loss": 0.2852, "step": 2749500 }, { "epoch": 1.65, "learning_rate": 4.44964894575729e-05, "loss": 0.2812, "step": 2750000 }, { "epoch": 1.65, "learning_rate": 4.449438949201233e-05, "loss": 0.2719, "step": 2750500 }, { "epoch": 1.65, "learning_rate": 4.449228952645177e-05, "loss": 0.2801, "step": 2751000 }, { "epoch": 1.65, "learning_rate": 4.44901895608912e-05, "loss": 0.2843, "step": 2751500 }, { "epoch": 1.65, "learning_rate": 4.448809379526176e-05, "loss": 0.284, "step": 2752000 }, { "epoch": 1.65, "learning_rate": 4.448599802963231e-05, "loss": 0.2829, "step": 2752500 }, { "epoch": 1.65, "learning_rate": 4.448389806407175e-05, "loss": 0.2786, "step": 2753000 }, { "epoch": 1.65, "learning_rate": 4.4481798098511185e-05, "loss": 0.2806, "step": 2753500 }, { "epoch": 1.65, "learning_rate": 4.447969813295062e-05, "loss": 0.2826, "step": 2754000 }, { "epoch": 1.65, "learning_rate": 4.447759816739006e-05, "loss": 0.2853, "step": 2754500 }, { "epoch": 1.65, "learning_rate": 4.447549820182949e-05, "loss": 0.2767, "step": 2755000 }, { "epoch": 1.65, "learning_rate": 4.4473398236268925e-05, "loss": 0.2812, "step": 2755500 }, { "epoch": 1.65, "learning_rate": 4.4471298270708366e-05, "loss": 0.2804, "step": 2756000 }, { "epoch": 1.65, "learning_rate": 4.446920250507892e-05, "loss": 0.2843, "step": 2756500 }, { "epoch": 1.65, "learning_rate": 4.446710253951835e-05, "loss": 0.2752, "step": 2757000 }, { "epoch": 1.65, "learning_rate": 4.4465002573957786e-05, "loss": 0.2751, "step": 2757500 }, { "epoch": 1.65, "learning_rate": 4.4462902608397226e-05, "loss": 0.2762, "step": 2758000 }, { "epoch": 1.65, "learning_rate": 4.446080264283666e-05, "loss": 0.2781, "step": 2758500 }, { "epoch": 1.65, "learning_rate": 4.4458706877207213e-05, "loss": 0.2764, "step": 2759000 }, { "epoch": 1.65, "learning_rate": 4.445660691164665e-05, "loss": 0.2742, "step": 2759500 }, { "epoch": 1.65, "learning_rate": 4.445450694608609e-05, "loss": 0.2768, "step": 2760000 }, { "epoch": 1.66, "learning_rate": 4.445240698052552e-05, "loss": 0.2761, "step": 2760500 }, { "epoch": 1.66, "learning_rate": 4.4450311214896074e-05, "loss": 0.2808, "step": 2761000 }, { "epoch": 1.66, "learning_rate": 4.4448211249335514e-05, "loss": 0.2819, "step": 2761500 }, { "epoch": 1.66, "learning_rate": 4.444611128377495e-05, "loss": 0.2855, "step": 2762000 }, { "epoch": 1.66, "learning_rate": 4.444401131821438e-05, "loss": 0.2778, "step": 2762500 }, { "epoch": 1.66, "learning_rate": 4.4441915552584935e-05, "loss": 0.2774, "step": 2763000 }, { "epoch": 1.66, "learning_rate": 4.4439815587024375e-05, "loss": 0.2753, "step": 2763500 }, { "epoch": 1.66, "learning_rate": 4.443771562146381e-05, "loss": 0.2769, "step": 2764000 }, { "epoch": 1.66, "learning_rate": 4.443561565590324e-05, "loss": 0.2863, "step": 2764500 }, { "epoch": 1.66, "learning_rate": 4.4433519890273796e-05, "loss": 0.2837, "step": 2765000 }, { "epoch": 1.66, "learning_rate": 4.4431419924713236e-05, "loss": 0.2869, "step": 2765500 }, { "epoch": 1.66, "learning_rate": 4.442931995915267e-05, "loss": 0.2786, "step": 2766000 }, { "epoch": 1.66, "learning_rate": 4.44272199935921e-05, "loss": 0.2863, "step": 2766500 }, { "epoch": 1.66, "learning_rate": 4.442512422796266e-05, "loss": 0.2766, "step": 2767000 }, { "epoch": 1.66, "learning_rate": 4.4423024262402097e-05, "loss": 0.2817, "step": 2767500 }, { "epoch": 1.66, "learning_rate": 4.442092429684153e-05, "loss": 0.2796, "step": 2768000 }, { "epoch": 1.66, "learning_rate": 4.441882433128097e-05, "loss": 0.282, "step": 2768500 }, { "epoch": 1.66, "learning_rate": 4.4416728565651524e-05, "loss": 0.2735, "step": 2769000 }, { "epoch": 1.66, "learning_rate": 4.441462860009096e-05, "loss": 0.2812, "step": 2769500 }, { "epoch": 1.66, "learning_rate": 4.441252863453039e-05, "loss": 0.2851, "step": 2770000 }, { "epoch": 1.66, "learning_rate": 4.441042866896983e-05, "loss": 0.2757, "step": 2770500 }, { "epoch": 1.66, "learning_rate": 4.4408328703409264e-05, "loss": 0.2835, "step": 2771000 }, { "epoch": 1.66, "learning_rate": 4.440623293777982e-05, "loss": 0.2787, "step": 2771500 }, { "epoch": 1.66, "learning_rate": 4.440413297221925e-05, "loss": 0.2847, "step": 2772000 }, { "epoch": 1.66, "learning_rate": 4.440203720658981e-05, "loss": 0.2779, "step": 2772500 }, { "epoch": 1.66, "learning_rate": 4.4399937241029245e-05, "loss": 0.2795, "step": 2773000 }, { "epoch": 1.66, "learning_rate": 4.439783727546868e-05, "loss": 0.2803, "step": 2773500 }, { "epoch": 1.66, "learning_rate": 4.439573730990812e-05, "loss": 0.28, "step": 2774000 }, { "epoch": 1.66, "learning_rate": 4.439363734434755e-05, "loss": 0.2823, "step": 2774500 }, { "epoch": 1.66, "learning_rate": 4.4391537378786986e-05, "loss": 0.2811, "step": 2775000 }, { "epoch": 1.66, "learning_rate": 4.4389437413226426e-05, "loss": 0.2779, "step": 2775500 }, { "epoch": 1.66, "learning_rate": 4.438733744766586e-05, "loss": 0.2815, "step": 2776000 }, { "epoch": 1.66, "learning_rate": 4.438523748210529e-05, "loss": 0.282, "step": 2776500 }, { "epoch": 1.66, "learning_rate": 4.438314171647585e-05, "loss": 0.2789, "step": 2777000 }, { "epoch": 1.67, "learning_rate": 4.438104175091529e-05, "loss": 0.2787, "step": 2777500 }, { "epoch": 1.67, "learning_rate": 4.437894178535472e-05, "loss": 0.277, "step": 2778000 }, { "epoch": 1.67, "learning_rate": 4.4376841819794154e-05, "loss": 0.2903, "step": 2778500 }, { "epoch": 1.67, "learning_rate": 4.437474605416471e-05, "loss": 0.2795, "step": 2779000 }, { "epoch": 1.67, "learning_rate": 4.437264608860415e-05, "loss": 0.2789, "step": 2779500 }, { "epoch": 1.67, "learning_rate": 4.43705503229747e-05, "loss": 0.2846, "step": 2780000 }, { "epoch": 1.67, "learning_rate": 4.4368450357414135e-05, "loss": 0.2842, "step": 2780500 }, { "epoch": 1.67, "learning_rate": 4.4366350391853575e-05, "loss": 0.2749, "step": 2781000 }, { "epoch": 1.67, "learning_rate": 4.436425042629301e-05, "loss": 0.2719, "step": 2781500 }, { "epoch": 1.67, "learning_rate": 4.436215046073244e-05, "loss": 0.2757, "step": 2782000 }, { "epoch": 1.67, "learning_rate": 4.436005049517188e-05, "loss": 0.28, "step": 2782500 }, { "epoch": 1.67, "learning_rate": 4.4357950529611315e-05, "loss": 0.2728, "step": 2783000 }, { "epoch": 1.67, "learning_rate": 4.435585056405075e-05, "loss": 0.2822, "step": 2783500 }, { "epoch": 1.67, "learning_rate": 4.435375899835243e-05, "loss": 0.2818, "step": 2784000 }, { "epoch": 1.67, "learning_rate": 4.4351659032791856e-05, "loss": 0.2847, "step": 2784500 }, { "epoch": 1.67, "learning_rate": 4.4349559067231296e-05, "loss": 0.2761, "step": 2785000 }, { "epoch": 1.67, "learning_rate": 4.434745910167073e-05, "loss": 0.2875, "step": 2785500 }, { "epoch": 1.67, "learning_rate": 4.434535913611016e-05, "loss": 0.2765, "step": 2786000 }, { "epoch": 1.67, "learning_rate": 4.4343259170549603e-05, "loss": 0.2761, "step": 2786500 }, { "epoch": 1.67, "learning_rate": 4.434115920498904e-05, "loss": 0.277, "step": 2787000 }, { "epoch": 1.67, "learning_rate": 4.433906343935959e-05, "loss": 0.2865, "step": 2787500 }, { "epoch": 1.67, "learning_rate": 4.433696347379903e-05, "loss": 0.2783, "step": 2788000 }, { "epoch": 1.67, "learning_rate": 4.4334863508238464e-05, "loss": 0.2771, "step": 2788500 }, { "epoch": 1.67, "learning_rate": 4.43327635426779e-05, "loss": 0.2736, "step": 2789000 }, { "epoch": 1.67, "learning_rate": 4.433066357711734e-05, "loss": 0.2789, "step": 2789500 }, { "epoch": 1.67, "learning_rate": 4.432856781148789e-05, "loss": 0.2802, "step": 2790000 }, { "epoch": 1.67, "learning_rate": 4.4326467845927325e-05, "loss": 0.2804, "step": 2790500 }, { "epoch": 1.67, "learning_rate": 4.432436788036676e-05, "loss": 0.2812, "step": 2791000 }, { "epoch": 1.67, "learning_rate": 4.43222679148062e-05, "loss": 0.2754, "step": 2791500 }, { "epoch": 1.67, "learning_rate": 4.432016794924563e-05, "loss": 0.2816, "step": 2792000 }, { "epoch": 1.67, "learning_rate": 4.4318067983685066e-05, "loss": 0.2821, "step": 2792500 }, { "epoch": 1.67, "learning_rate": 4.4315968018124506e-05, "loss": 0.2753, "step": 2793000 }, { "epoch": 1.67, "learning_rate": 4.431386805256394e-05, "loss": 0.2833, "step": 2793500 }, { "epoch": 1.68, "learning_rate": 4.431176808700337e-05, "loss": 0.2812, "step": 2794000 }, { "epoch": 1.68, "learning_rate": 4.4309672321373926e-05, "loss": 0.2848, "step": 2794500 }, { "epoch": 1.68, "learning_rate": 4.4307576555744487e-05, "loss": 0.2793, "step": 2795000 }, { "epoch": 1.68, "learning_rate": 4.430547659018392e-05, "loss": 0.2814, "step": 2795500 }, { "epoch": 1.68, "learning_rate": 4.4303376624623354e-05, "loss": 0.2824, "step": 2796000 }, { "epoch": 1.68, "learning_rate": 4.4301276659062794e-05, "loss": 0.2798, "step": 2796500 }, { "epoch": 1.68, "learning_rate": 4.429917669350223e-05, "loss": 0.2767, "step": 2797000 }, { "epoch": 1.68, "learning_rate": 4.429707672794166e-05, "loss": 0.28, "step": 2797500 }, { "epoch": 1.68, "learning_rate": 4.42949767623811e-05, "loss": 0.2779, "step": 2798000 }, { "epoch": 1.68, "learning_rate": 4.4292876796820534e-05, "loss": 0.2769, "step": 2798500 }, { "epoch": 1.68, "learning_rate": 4.429077683125997e-05, "loss": 0.2803, "step": 2799000 }, { "epoch": 1.68, "learning_rate": 4.428868526556164e-05, "loss": 0.2792, "step": 2799500 }, { "epoch": 1.68, "learning_rate": 4.4286585300001075e-05, "loss": 0.279, "step": 2800000 }, { "epoch": 1.68, "eval_loss": 0.2554187774658203, "eval_runtime": 1450.9376, "eval_samples_per_second": 363.02, "eval_steps_per_second": 60.504, "step": 2800000 }, { "epoch": 1.68, "learning_rate": 4.4284485334440515e-05, "loss": 0.283, "step": 2800500 }, { "epoch": 1.68, "learning_rate": 4.428238536887995e-05, "loss": 0.2802, "step": 2801000 }, { "epoch": 1.68, "learning_rate": 4.428028540331938e-05, "loss": 0.2917, "step": 2801500 }, { "epoch": 1.68, "learning_rate": 4.427818543775882e-05, "loss": 0.2772, "step": 2802000 }, { "epoch": 1.68, "learning_rate": 4.4276085472198256e-05, "loss": 0.2806, "step": 2802500 }, { "epoch": 1.68, "learning_rate": 4.427398550663769e-05, "loss": 0.2764, "step": 2803000 }, { "epoch": 1.68, "learning_rate": 4.427188974100825e-05, "loss": 0.2809, "step": 2803500 }, { "epoch": 1.68, "learning_rate": 4.426978977544768e-05, "loss": 0.2768, "step": 2804000 }, { "epoch": 1.68, "learning_rate": 4.4267689809887117e-05, "loss": 0.2745, "step": 2804500 }, { "epoch": 1.68, "learning_rate": 4.426558984432656e-05, "loss": 0.2766, "step": 2805000 }, { "epoch": 1.68, "learning_rate": 4.426349407869711e-05, "loss": 0.2839, "step": 2805500 }, { "epoch": 1.68, "learning_rate": 4.4261394113136544e-05, "loss": 0.2775, "step": 2806000 }, { "epoch": 1.68, "learning_rate": 4.425929414757598e-05, "loss": 0.2715, "step": 2806500 }, { "epoch": 1.68, "learning_rate": 4.425719418201542e-05, "loss": 0.2786, "step": 2807000 }, { "epoch": 1.68, "learning_rate": 4.425509841638597e-05, "loss": 0.2741, "step": 2807500 }, { "epoch": 1.68, "learning_rate": 4.4252998450825405e-05, "loss": 0.2827, "step": 2808000 }, { "epoch": 1.68, "learning_rate": 4.4250906885127085e-05, "loss": 0.2805, "step": 2808500 }, { "epoch": 1.68, "learning_rate": 4.424880691956652e-05, "loss": 0.2823, "step": 2809000 }, { "epoch": 1.68, "learning_rate": 4.424670695400596e-05, "loss": 0.2834, "step": 2809500 }, { "epoch": 1.68, "learning_rate": 4.424460698844539e-05, "loss": 0.2774, "step": 2810000 }, { "epoch": 1.69, "learning_rate": 4.424250702288482e-05, "loss": 0.2803, "step": 2810500 }, { "epoch": 1.69, "learning_rate": 4.424040705732426e-05, "loss": 0.2803, "step": 2811000 }, { "epoch": 1.69, "learning_rate": 4.423830709176369e-05, "loss": 0.2773, "step": 2811500 }, { "epoch": 1.69, "learning_rate": 4.4236207126203126e-05, "loss": 0.2911, "step": 2812000 }, { "epoch": 1.69, "learning_rate": 4.4234107160642566e-05, "loss": 0.2739, "step": 2812500 }, { "epoch": 1.69, "learning_rate": 4.4232007195082e-05, "loss": 0.2773, "step": 2813000 }, { "epoch": 1.69, "learning_rate": 4.422990722952143e-05, "loss": 0.2764, "step": 2813500 }, { "epoch": 1.69, "learning_rate": 4.4227807263960873e-05, "loss": 0.2735, "step": 2814000 }, { "epoch": 1.69, "learning_rate": 4.422570729840031e-05, "loss": 0.2757, "step": 2814500 }, { "epoch": 1.69, "learning_rate": 4.422360733283974e-05, "loss": 0.2816, "step": 2815000 }, { "epoch": 1.69, "learning_rate": 4.422150736727918e-05, "loss": 0.2784, "step": 2815500 }, { "epoch": 1.69, "learning_rate": 4.421940740171861e-05, "loss": 0.2763, "step": 2816000 }, { "epoch": 1.69, "learning_rate": 4.421731163608917e-05, "loss": 0.2804, "step": 2816500 }, { "epoch": 1.69, "learning_rate": 4.421521167052861e-05, "loss": 0.2796, "step": 2817000 }, { "epoch": 1.69, "learning_rate": 4.421311590489916e-05, "loss": 0.2749, "step": 2817500 }, { "epoch": 1.69, "learning_rate": 4.4211015939338595e-05, "loss": 0.2774, "step": 2818000 }, { "epoch": 1.69, "learning_rate": 4.420891597377803e-05, "loss": 0.2802, "step": 2818500 }, { "epoch": 1.69, "learning_rate": 4.420681600821747e-05, "loss": 0.2786, "step": 2819000 }, { "epoch": 1.69, "learning_rate": 4.42047160426569e-05, "loss": 0.2817, "step": 2819500 }, { "epoch": 1.69, "learning_rate": 4.4202616077096335e-05, "loss": 0.2807, "step": 2820000 }, { "epoch": 1.69, "learning_rate": 4.4200516111535776e-05, "loss": 0.2785, "step": 2820500 }, { "epoch": 1.69, "learning_rate": 4.41984161459752e-05, "loss": 0.2805, "step": 2821000 }, { "epoch": 1.69, "learning_rate": 4.4196316180414636e-05, "loss": 0.2751, "step": 2821500 }, { "epoch": 1.69, "learning_rate": 4.4194220414785196e-05, "loss": 0.2757, "step": 2822000 }, { "epoch": 1.69, "learning_rate": 4.4192120449224636e-05, "loss": 0.2799, "step": 2822500 }, { "epoch": 1.69, "learning_rate": 4.419002048366406e-05, "loss": 0.273, "step": 2823000 }, { "epoch": 1.69, "learning_rate": 4.41879205181035e-05, "loss": 0.2777, "step": 2823500 }, { "epoch": 1.69, "learning_rate": 4.4185824752474064e-05, "loss": 0.2766, "step": 2824000 }, { "epoch": 1.69, "learning_rate": 4.41837247869135e-05, "loss": 0.2819, "step": 2824500 }, { "epoch": 1.69, "learning_rate": 4.418162482135293e-05, "loss": 0.2764, "step": 2825000 }, { "epoch": 1.69, "learning_rate": 4.4179524855792364e-05, "loss": 0.2792, "step": 2825500 }, { "epoch": 1.69, "learning_rate": 4.41774248902318e-05, "loss": 0.2757, "step": 2826000 }, { "epoch": 1.69, "learning_rate": 4.417533332453348e-05, "loss": 0.2827, "step": 2826500 }, { "epoch": 1.69, "learning_rate": 4.417323755890403e-05, "loss": 0.2913, "step": 2827000 }, { "epoch": 1.7, "learning_rate": 4.4171137593343465e-05, "loss": 0.2844, "step": 2827500 }, { "epoch": 1.7, "learning_rate": 4.41690376277829e-05, "loss": 0.2821, "step": 2828000 }, { "epoch": 1.7, "learning_rate": 4.416693766222234e-05, "loss": 0.2833, "step": 2828500 }, { "epoch": 1.7, "learning_rate": 4.416483769666177e-05, "loss": 0.2759, "step": 2829000 }, { "epoch": 1.7, "learning_rate": 4.416273773110121e-05, "loss": 0.2817, "step": 2829500 }, { "epoch": 1.7, "learning_rate": 4.4160637765540646e-05, "loss": 0.2809, "step": 2830000 }, { "epoch": 1.7, "learning_rate": 4.415853779998008e-05, "loss": 0.283, "step": 2830500 }, { "epoch": 1.7, "learning_rate": 4.415643783441952e-05, "loss": 0.2737, "step": 2831000 }, { "epoch": 1.7, "learning_rate": 4.415433786885895e-05, "loss": 0.2773, "step": 2831500 }, { "epoch": 1.7, "learning_rate": 4.4152237903298386e-05, "loss": 0.2825, "step": 2832000 }, { "epoch": 1.7, "learning_rate": 4.415014213766894e-05, "loss": 0.2803, "step": 2832500 }, { "epoch": 1.7, "learning_rate": 4.414804217210838e-05, "loss": 0.2794, "step": 2833000 }, { "epoch": 1.7, "learning_rate": 4.4145942206547814e-05, "loss": 0.2811, "step": 2833500 }, { "epoch": 1.7, "learning_rate": 4.414384224098725e-05, "loss": 0.2856, "step": 2834000 }, { "epoch": 1.7, "learning_rate": 4.414174227542669e-05, "loss": 0.2805, "step": 2834500 }, { "epoch": 1.7, "learning_rate": 4.4139642309866114e-05, "loss": 0.2784, "step": 2835000 }, { "epoch": 1.7, "learning_rate": 4.413754234430555e-05, "loss": 0.2762, "step": 2835500 }, { "epoch": 1.7, "learning_rate": 4.413544237874499e-05, "loss": 0.2734, "step": 2836000 }, { "epoch": 1.7, "learning_rate": 4.413334661311555e-05, "loss": 0.2807, "step": 2836500 }, { "epoch": 1.7, "learning_rate": 4.413124664755498e-05, "loss": 0.2807, "step": 2837000 }, { "epoch": 1.7, "learning_rate": 4.4129146681994415e-05, "loss": 0.2741, "step": 2837500 }, { "epoch": 1.7, "learning_rate": 4.412704671643385e-05, "loss": 0.2786, "step": 2838000 }, { "epoch": 1.7, "learning_rate": 4.412494675087328e-05, "loss": 0.2822, "step": 2838500 }, { "epoch": 1.7, "learning_rate": 4.412285098524384e-05, "loss": 0.2817, "step": 2839000 }, { "epoch": 1.7, "learning_rate": 4.412075101968328e-05, "loss": 0.2734, "step": 2839500 }, { "epoch": 1.7, "learning_rate": 4.411865105412271e-05, "loss": 0.2775, "step": 2840000 }, { "epoch": 1.7, "learning_rate": 4.411655528849327e-05, "loss": 0.2856, "step": 2840500 }, { "epoch": 1.7, "learning_rate": 4.41144553229327e-05, "loss": 0.2747, "step": 2841000 }, { "epoch": 1.7, "learning_rate": 4.411235535737214e-05, "loss": 0.2736, "step": 2841500 }, { "epoch": 1.7, "learning_rate": 4.411025539181158e-05, "loss": 0.2706, "step": 2842000 }, { "epoch": 1.7, "learning_rate": 4.4108155426251003e-05, "loss": 0.2801, "step": 2842500 }, { "epoch": 1.7, "learning_rate": 4.4106055460690444e-05, "loss": 0.2866, "step": 2843000 }, { "epoch": 1.7, "learning_rate": 4.410395549512988e-05, "loss": 0.2763, "step": 2843500 }, { "epoch": 1.71, "learning_rate": 4.410185552956931e-05, "loss": 0.2789, "step": 2844000 }, { "epoch": 1.71, "learning_rate": 4.409975976393987e-05, "loss": 0.2784, "step": 2844500 }, { "epoch": 1.71, "learning_rate": 4.4097659798379304e-05, "loss": 0.2817, "step": 2845000 }, { "epoch": 1.71, "learning_rate": 4.409555983281874e-05, "loss": 0.283, "step": 2845500 }, { "epoch": 1.71, "learning_rate": 4.409345986725818e-05, "loss": 0.2789, "step": 2846000 }, { "epoch": 1.71, "learning_rate": 4.409136410162874e-05, "loss": 0.2825, "step": 2846500 }, { "epoch": 1.71, "learning_rate": 4.4089264136068165e-05, "loss": 0.2849, "step": 2847000 }, { "epoch": 1.71, "learning_rate": 4.4087168370438726e-05, "loss": 0.2799, "step": 2847500 }, { "epoch": 1.71, "learning_rate": 4.408506840487816e-05, "loss": 0.2795, "step": 2848000 }, { "epoch": 1.71, "learning_rate": 4.40829684393176e-05, "loss": 0.2819, "step": 2848500 }, { "epoch": 1.71, "learning_rate": 4.408086847375703e-05, "loss": 0.2812, "step": 2849000 }, { "epoch": 1.71, "learning_rate": 4.407876850819646e-05, "loss": 0.2764, "step": 2849500 }, { "epoch": 1.71, "learning_rate": 4.40766685426359e-05, "loss": 0.2822, "step": 2850000 }, { "epoch": 1.71, "learning_rate": 4.407456857707533e-05, "loss": 0.2789, "step": 2850500 }, { "epoch": 1.71, "learning_rate": 4.4072468611514767e-05, "loss": 0.277, "step": 2851000 }, { "epoch": 1.71, "learning_rate": 4.407037284588533e-05, "loss": 0.2823, "step": 2851500 }, { "epoch": 1.71, "learning_rate": 4.406827288032476e-05, "loss": 0.281, "step": 2852000 }, { "epoch": 1.71, "learning_rate": 4.4066172914764194e-05, "loss": 0.2783, "step": 2852500 }, { "epoch": 1.71, "learning_rate": 4.4064077149134754e-05, "loss": 0.2814, "step": 2853000 }, { "epoch": 1.71, "learning_rate": 4.4061977183574194e-05, "loss": 0.2834, "step": 2853500 }, { "epoch": 1.71, "learning_rate": 4.405987721801362e-05, "loss": 0.2872, "step": 2854000 }, { "epoch": 1.71, "learning_rate": 4.4057777252453055e-05, "loss": 0.2791, "step": 2854500 }, { "epoch": 1.71, "learning_rate": 4.4055677286892495e-05, "loss": 0.2809, "step": 2855000 }, { "epoch": 1.71, "learning_rate": 4.405357732133193e-05, "loss": 0.2722, "step": 2855500 }, { "epoch": 1.71, "learning_rate": 4.405148155570249e-05, "loss": 0.2765, "step": 2856000 }, { "epoch": 1.71, "learning_rate": 4.4049381590141915e-05, "loss": 0.2778, "step": 2856500 }, { "epoch": 1.71, "learning_rate": 4.4047281624581355e-05, "loss": 0.2816, "step": 2857000 }, { "epoch": 1.71, "learning_rate": 4.404518165902079e-05, "loss": 0.2792, "step": 2857500 }, { "epoch": 1.71, "learning_rate": 4.404308169346022e-05, "loss": 0.2777, "step": 2858000 }, { "epoch": 1.71, "learning_rate": 4.404098172789966e-05, "loss": 0.2784, "step": 2858500 }, { "epoch": 1.71, "learning_rate": 4.4038881762339096e-05, "loss": 0.2816, "step": 2859000 }, { "epoch": 1.71, "learning_rate": 4.403678179677853e-05, "loss": 0.2811, "step": 2859500 }, { "epoch": 1.71, "learning_rate": 4.403468183121797e-05, "loss": 0.277, "step": 2860000 }, { "epoch": 1.71, "learning_rate": 4.40325818656574e-05, "loss": 0.2784, "step": 2860500 }, { "epoch": 1.72, "learning_rate": 4.4030490299959084e-05, "loss": 0.2784, "step": 2861000 }, { "epoch": 1.72, "learning_rate": 4.402839033439851e-05, "loss": 0.2867, "step": 2861500 }, { "epoch": 1.72, "learning_rate": 4.402629036883795e-05, "loss": 0.2773, "step": 2862000 }, { "epoch": 1.72, "learning_rate": 4.4024190403277384e-05, "loss": 0.2776, "step": 2862500 }, { "epoch": 1.72, "learning_rate": 4.402209043771682e-05, "loss": 0.278, "step": 2863000 }, { "epoch": 1.72, "learning_rate": 4.401999047215626e-05, "loss": 0.2812, "step": 2863500 }, { "epoch": 1.72, "learning_rate": 4.401789050659569e-05, "loss": 0.2788, "step": 2864000 }, { "epoch": 1.72, "learning_rate": 4.4015790541035125e-05, "loss": 0.2737, "step": 2864500 }, { "epoch": 1.72, "learning_rate": 4.4013690575474565e-05, "loss": 0.2728, "step": 2865000 }, { "epoch": 1.72, "learning_rate": 4.401159480984512e-05, "loss": 0.2841, "step": 2865500 }, { "epoch": 1.72, "learning_rate": 4.400949484428455e-05, "loss": 0.276, "step": 2866000 }, { "epoch": 1.72, "learning_rate": 4.4007394878723985e-05, "loss": 0.2765, "step": 2866500 }, { "epoch": 1.72, "learning_rate": 4.4005294913163426e-05, "loss": 0.2778, "step": 2867000 }, { "epoch": 1.72, "learning_rate": 4.400319914753398e-05, "loss": 0.2779, "step": 2867500 }, { "epoch": 1.72, "learning_rate": 4.400109918197341e-05, "loss": 0.2833, "step": 2868000 }, { "epoch": 1.72, "learning_rate": 4.399899921641285e-05, "loss": 0.2783, "step": 2868500 }, { "epoch": 1.72, "learning_rate": 4.3996899250852286e-05, "loss": 0.2813, "step": 2869000 }, { "epoch": 1.72, "learning_rate": 4.399480348522284e-05, "loss": 0.2776, "step": 2869500 }, { "epoch": 1.72, "learning_rate": 4.3992703519662273e-05, "loss": 0.2735, "step": 2870000 }, { "epoch": 1.72, "learning_rate": 4.3990603554101714e-05, "loss": 0.2785, "step": 2870500 }, { "epoch": 1.72, "learning_rate": 4.398850358854115e-05, "loss": 0.2798, "step": 2871000 }, { "epoch": 1.72, "learning_rate": 4.39864078229117e-05, "loss": 0.2809, "step": 2871500 }, { "epoch": 1.72, "learning_rate": 4.3984307857351134e-05, "loss": 0.2805, "step": 2872000 }, { "epoch": 1.72, "learning_rate": 4.3982207891790574e-05, "loss": 0.28, "step": 2872500 }, { "epoch": 1.72, "learning_rate": 4.398010792623001e-05, "loss": 0.2801, "step": 2873000 }, { "epoch": 1.72, "learning_rate": 4.397800796066944e-05, "loss": 0.2776, "step": 2873500 }, { "epoch": 1.72, "learning_rate": 4.397591219504e-05, "loss": 0.283, "step": 2874000 }, { "epoch": 1.72, "learning_rate": 4.3973812229479435e-05, "loss": 0.2792, "step": 2874500 }, { "epoch": 1.72, "learning_rate": 4.397171226391887e-05, "loss": 0.2819, "step": 2875000 }, { "epoch": 1.72, "learning_rate": 4.396961229835831e-05, "loss": 0.2871, "step": 2875500 }, { "epoch": 1.72, "learning_rate": 4.396751233279774e-05, "loss": 0.2767, "step": 2876000 }, { "epoch": 1.72, "learning_rate": 4.3965416567168296e-05, "loss": 0.2782, "step": 2876500 }, { "epoch": 1.72, "learning_rate": 4.396331660160773e-05, "loss": 0.2793, "step": 2877000 }, { "epoch": 1.73, "learning_rate": 4.396121663604717e-05, "loss": 0.2755, "step": 2877500 }, { "epoch": 1.73, "learning_rate": 4.39591166704866e-05, "loss": 0.2743, "step": 2878000 }, { "epoch": 1.73, "learning_rate": 4.3957020904857157e-05, "loss": 0.2745, "step": 2878500 }, { "epoch": 1.73, "learning_rate": 4.395492093929659e-05, "loss": 0.2778, "step": 2879000 }, { "epoch": 1.73, "learning_rate": 4.395282097373603e-05, "loss": 0.279, "step": 2879500 }, { "epoch": 1.73, "learning_rate": 4.3950721008175464e-05, "loss": 0.2777, "step": 2880000 }, { "epoch": 1.73, "learning_rate": 4.394862524254602e-05, "loss": 0.2776, "step": 2880500 }, { "epoch": 1.73, "learning_rate": 4.394652527698546e-05, "loss": 0.2768, "step": 2881000 }, { "epoch": 1.73, "learning_rate": 4.394442531142489e-05, "loss": 0.2724, "step": 2881500 }, { "epoch": 1.73, "learning_rate": 4.3942325345864324e-05, "loss": 0.2804, "step": 2882000 }, { "epoch": 1.73, "learning_rate": 4.3940225380303765e-05, "loss": 0.2796, "step": 2882500 }, { "epoch": 1.73, "learning_rate": 4.393812961467432e-05, "loss": 0.2795, "step": 2883000 }, { "epoch": 1.73, "learning_rate": 4.393602964911375e-05, "loss": 0.2863, "step": 2883500 }, { "epoch": 1.73, "learning_rate": 4.3933929683553185e-05, "loss": 0.2785, "step": 2884000 }, { "epoch": 1.73, "learning_rate": 4.3931833917923746e-05, "loss": 0.2808, "step": 2884500 }, { "epoch": 1.73, "learning_rate": 4.392973395236318e-05, "loss": 0.28, "step": 2885000 }, { "epoch": 1.73, "learning_rate": 4.392763398680261e-05, "loss": 0.2801, "step": 2885500 }, { "epoch": 1.73, "learning_rate": 4.3925534021242046e-05, "loss": 0.2749, "step": 2886000 }, { "epoch": 1.73, "learning_rate": 4.3923434055681486e-05, "loss": 0.2817, "step": 2886500 }, { "epoch": 1.73, "learning_rate": 4.392133409012092e-05, "loss": 0.2807, "step": 2887000 }, { "epoch": 1.73, "learning_rate": 4.391923412456035e-05, "loss": 0.2755, "step": 2887500 }, { "epoch": 1.73, "learning_rate": 4.391713415899979e-05, "loss": 0.2861, "step": 2888000 }, { "epoch": 1.73, "learning_rate": 4.391503419343923e-05, "loss": 0.2797, "step": 2888500 }, { "epoch": 1.73, "learning_rate": 4.391293422787867e-05, "loss": 0.2722, "step": 2889000 }, { "epoch": 1.73, "learning_rate": 4.39108342623181e-05, "loss": 0.2744, "step": 2889500 }, { "epoch": 1.73, "learning_rate": 4.3908734296757534e-05, "loss": 0.2786, "step": 2890000 }, { "epoch": 1.73, "learning_rate": 4.390663853112809e-05, "loss": 0.2829, "step": 2890500 }, { "epoch": 1.73, "learning_rate": 4.390453856556753e-05, "loss": 0.2774, "step": 2891000 }, { "epoch": 1.73, "learning_rate": 4.390243860000696e-05, "loss": 0.2802, "step": 2891500 }, { "epoch": 1.73, "learning_rate": 4.3900338634446395e-05, "loss": 0.2831, "step": 2892000 }, { "epoch": 1.73, "learning_rate": 4.389824286881695e-05, "loss": 0.2827, "step": 2892500 }, { "epoch": 1.73, "learning_rate": 4.389614290325639e-05, "loss": 0.2895, "step": 2893000 }, { "epoch": 1.73, "learning_rate": 4.389404293769582e-05, "loss": 0.2747, "step": 2893500 }, { "epoch": 1.74, "learning_rate": 4.3891942972135255e-05, "loss": 0.2826, "step": 2894000 }, { "epoch": 1.74, "learning_rate": 4.3889843006574696e-05, "loss": 0.2805, "step": 2894500 }, { "epoch": 1.74, "learning_rate": 4.388774304101413e-05, "loss": 0.2771, "step": 2895000 }, { "epoch": 1.74, "learning_rate": 4.388564727538468e-05, "loss": 0.2856, "step": 2895500 }, { "epoch": 1.74, "learning_rate": 4.388354730982412e-05, "loss": 0.2748, "step": 2896000 }, { "epoch": 1.74, "learning_rate": 4.3881447344263556e-05, "loss": 0.282, "step": 2896500 }, { "epoch": 1.74, "learning_rate": 4.387934737870299e-05, "loss": 0.2776, "step": 2897000 }, { "epoch": 1.74, "learning_rate": 4.387724741314243e-05, "loss": 0.2773, "step": 2897500 }, { "epoch": 1.74, "learning_rate": 4.387514744758186e-05, "loss": 0.2802, "step": 2898000 }, { "epoch": 1.74, "learning_rate": 4.387305168195242e-05, "loss": 0.2819, "step": 2898500 }, { "epoch": 1.74, "learning_rate": 4.387095171639185e-05, "loss": 0.2744, "step": 2899000 }, { "epoch": 1.74, "learning_rate": 4.386885175083129e-05, "loss": 0.2874, "step": 2899500 }, { "epoch": 1.74, "learning_rate": 4.386675178527072e-05, "loss": 0.2745, "step": 2900000 }, { "epoch": 1.74, "eval_loss": 0.2543603181838989, "eval_runtime": 1454.9796, "eval_samples_per_second": 362.012, "eval_steps_per_second": 60.336, "step": 2900000 }, { "epoch": 1.74, "learning_rate": 4.386465181971015e-05, "loss": 0.2725, "step": 2900500 }, { "epoch": 1.74, "learning_rate": 4.386255185414959e-05, "loss": 0.2748, "step": 2901000 }, { "epoch": 1.74, "learning_rate": 4.386045608852015e-05, "loss": 0.2812, "step": 2901500 }, { "epoch": 1.74, "learning_rate": 4.3858356122959585e-05, "loss": 0.2781, "step": 2902000 }, { "epoch": 1.74, "learning_rate": 4.385625615739902e-05, "loss": 0.2792, "step": 2902500 }, { "epoch": 1.74, "learning_rate": 4.385415619183845e-05, "loss": 0.2789, "step": 2903000 }, { "epoch": 1.74, "learning_rate": 4.3852056226277885e-05, "loss": 0.2768, "step": 2903500 }, { "epoch": 1.74, "learning_rate": 4.3849956260717326e-05, "loss": 0.2725, "step": 2904000 }, { "epoch": 1.74, "learning_rate": 4.384785629515676e-05, "loss": 0.2809, "step": 2904500 }, { "epoch": 1.74, "learning_rate": 4.384575632959619e-05, "loss": 0.2769, "step": 2905000 }, { "epoch": 1.74, "learning_rate": 4.3843660563966746e-05, "loss": 0.2759, "step": 2905500 }, { "epoch": 1.74, "learning_rate": 4.3841560598406186e-05, "loss": 0.2751, "step": 2906000 }, { "epoch": 1.74, "learning_rate": 4.383946063284562e-05, "loss": 0.2762, "step": 2906500 }, { "epoch": 1.74, "learning_rate": 4.383736066728505e-05, "loss": 0.2768, "step": 2907000 }, { "epoch": 1.74, "learning_rate": 4.383526490165561e-05, "loss": 0.2733, "step": 2907500 }, { "epoch": 1.74, "learning_rate": 4.383316493609505e-05, "loss": 0.2805, "step": 2908000 }, { "epoch": 1.74, "learning_rate": 4.383106917046561e-05, "loss": 0.2817, "step": 2908500 }, { "epoch": 1.74, "learning_rate": 4.382896920490504e-05, "loss": 0.2746, "step": 2909000 }, { "epoch": 1.74, "learning_rate": 4.3826869239344474e-05, "loss": 0.2735, "step": 2909500 }, { "epoch": 1.74, "learning_rate": 4.382476927378391e-05, "loss": 0.2798, "step": 2910000 }, { "epoch": 1.74, "learning_rate": 4.382266930822334e-05, "loss": 0.2832, "step": 2910500 }, { "epoch": 1.75, "learning_rate": 4.382056934266278e-05, "loss": 0.2794, "step": 2911000 }, { "epoch": 1.75, "learning_rate": 4.3818469377102215e-05, "loss": 0.277, "step": 2911500 }, { "epoch": 1.75, "learning_rate": 4.381636941154165e-05, "loss": 0.2828, "step": 2912000 }, { "epoch": 1.75, "learning_rate": 4.38142736459122e-05, "loss": 0.2811, "step": 2912500 }, { "epoch": 1.75, "learning_rate": 4.381217368035164e-05, "loss": 0.2753, "step": 2913000 }, { "epoch": 1.75, "learning_rate": 4.3810073714791076e-05, "loss": 0.2823, "step": 2913500 }, { "epoch": 1.75, "learning_rate": 4.380797374923051e-05, "loss": 0.2778, "step": 2914000 }, { "epoch": 1.75, "learning_rate": 4.380587378366995e-05, "loss": 0.2761, "step": 2914500 }, { "epoch": 1.75, "learning_rate": 4.38037780180405e-05, "loss": 0.2776, "step": 2915000 }, { "epoch": 1.75, "learning_rate": 4.3801678052479936e-05, "loss": 0.2781, "step": 2915500 }, { "epoch": 1.75, "learning_rate": 4.379957808691937e-05, "loss": 0.2802, "step": 2916000 }, { "epoch": 1.75, "learning_rate": 4.379747812135881e-05, "loss": 0.2746, "step": 2916500 }, { "epoch": 1.75, "learning_rate": 4.3795382355729364e-05, "loss": 0.2784, "step": 2917000 }, { "epoch": 1.75, "learning_rate": 4.37932823901688e-05, "loss": 0.2759, "step": 2917500 }, { "epoch": 1.75, "learning_rate": 4.379118662453936e-05, "loss": 0.2774, "step": 2918000 }, { "epoch": 1.75, "learning_rate": 4.37890866589788e-05, "loss": 0.2781, "step": 2918500 }, { "epoch": 1.75, "learning_rate": 4.378698669341823e-05, "loss": 0.2763, "step": 2919000 }, { "epoch": 1.75, "learning_rate": 4.378488672785766e-05, "loss": 0.2778, "step": 2919500 }, { "epoch": 1.75, "learning_rate": 4.37827867622971e-05, "loss": 0.2781, "step": 2920000 }, { "epoch": 1.75, "learning_rate": 4.378068679673653e-05, "loss": 0.2772, "step": 2920500 }, { "epoch": 1.75, "learning_rate": 4.3778586831175965e-05, "loss": 0.2844, "step": 2921000 }, { "epoch": 1.75, "learning_rate": 4.377649106554652e-05, "loss": 0.2808, "step": 2921500 }, { "epoch": 1.75, "learning_rate": 4.377439109998596e-05, "loss": 0.2814, "step": 2922000 }, { "epoch": 1.75, "learning_rate": 4.377229113442539e-05, "loss": 0.2821, "step": 2922500 }, { "epoch": 1.75, "learning_rate": 4.3770191168864826e-05, "loss": 0.2749, "step": 2923000 }, { "epoch": 1.75, "learning_rate": 4.3768091203304266e-05, "loss": 0.2794, "step": 2923500 }, { "epoch": 1.75, "learning_rate": 4.37659912377437e-05, "loss": 0.2779, "step": 2924000 }, { "epoch": 1.75, "learning_rate": 4.376389547211425e-05, "loss": 0.2806, "step": 2924500 }, { "epoch": 1.75, "learning_rate": 4.376179550655369e-05, "loss": 0.2836, "step": 2925000 }, { "epoch": 1.75, "learning_rate": 4.375969554099313e-05, "loss": 0.278, "step": 2925500 }, { "epoch": 1.75, "learning_rate": 4.375759557543256e-05, "loss": 0.2736, "step": 2926000 }, { "epoch": 1.75, "learning_rate": 4.3755495609872e-05, "loss": 0.2793, "step": 2926500 }, { "epoch": 1.75, "learning_rate": 4.3753395644311434e-05, "loss": 0.2842, "step": 2927000 }, { "epoch": 1.76, "learning_rate": 4.375129567875087e-05, "loss": 0.277, "step": 2927500 }, { "epoch": 1.76, "learning_rate": 4.374919571319031e-05, "loss": 0.2693, "step": 2928000 }, { "epoch": 1.76, "learning_rate": 4.374709574762974e-05, "loss": 0.2736, "step": 2928500 }, { "epoch": 1.76, "learning_rate": 4.3744999982000295e-05, "loss": 0.2767, "step": 2929000 }, { "epoch": 1.76, "learning_rate": 4.374290001643973e-05, "loss": 0.2818, "step": 2929500 }, { "epoch": 1.76, "learning_rate": 4.374080005087917e-05, "loss": 0.2745, "step": 2930000 }, { "epoch": 1.76, "learning_rate": 4.37387000853186e-05, "loss": 0.2854, "step": 2930500 }, { "epoch": 1.76, "learning_rate": 4.3736604319689155e-05, "loss": 0.2792, "step": 2931000 }, { "epoch": 1.76, "learning_rate": 4.373450435412859e-05, "loss": 0.2744, "step": 2931500 }, { "epoch": 1.76, "learning_rate": 4.373240438856803e-05, "loss": 0.2766, "step": 2932000 }, { "epoch": 1.76, "learning_rate": 4.373030442300746e-05, "loss": 0.2722, "step": 2932500 }, { "epoch": 1.76, "learning_rate": 4.3728204457446896e-05, "loss": 0.2834, "step": 2933000 }, { "epoch": 1.76, "learning_rate": 4.3726108691817456e-05, "loss": 0.2738, "step": 2933500 }, { "epoch": 1.76, "learning_rate": 4.372400872625689e-05, "loss": 0.2801, "step": 2934000 }, { "epoch": 1.76, "learning_rate": 4.372190876069632e-05, "loss": 0.2722, "step": 2934500 }, { "epoch": 1.76, "learning_rate": 4.371980879513576e-05, "loss": 0.2805, "step": 2935000 }, { "epoch": 1.76, "learning_rate": 4.37177088295752e-05, "loss": 0.282, "step": 2935500 }, { "epoch": 1.76, "learning_rate": 4.371561306394575e-05, "loss": 0.274, "step": 2936000 }, { "epoch": 1.76, "learning_rate": 4.3713513098385184e-05, "loss": 0.2746, "step": 2936500 }, { "epoch": 1.76, "learning_rate": 4.3711413132824624e-05, "loss": 0.276, "step": 2937000 }, { "epoch": 1.76, "learning_rate": 4.370931316726406e-05, "loss": 0.2777, "step": 2937500 }, { "epoch": 1.76, "learning_rate": 4.370721740163461e-05, "loss": 0.2742, "step": 2938000 }, { "epoch": 1.76, "learning_rate": 4.3705117436074045e-05, "loss": 0.2767, "step": 2938500 }, { "epoch": 1.76, "learning_rate": 4.3703021670444605e-05, "loss": 0.2744, "step": 2939000 }, { "epoch": 1.76, "learning_rate": 4.370092170488404e-05, "loss": 0.2793, "step": 2939500 }, { "epoch": 1.76, "learning_rate": 4.369882173932347e-05, "loss": 0.274, "step": 2940000 }, { "epoch": 1.76, "learning_rate": 4.369672177376291e-05, "loss": 0.2759, "step": 2940500 }, { "epoch": 1.76, "learning_rate": 4.3694621808202346e-05, "loss": 0.2774, "step": 2941000 }, { "epoch": 1.76, "learning_rate": 4.369252184264178e-05, "loss": 0.2795, "step": 2941500 }, { "epoch": 1.76, "learning_rate": 4.369042187708122e-05, "loss": 0.2859, "step": 2942000 }, { "epoch": 1.76, "learning_rate": 4.368832191152065e-05, "loss": 0.2783, "step": 2942500 }, { "epoch": 1.76, "learning_rate": 4.3686221945960086e-05, "loss": 0.2771, "step": 2943000 }, { "epoch": 1.76, "learning_rate": 4.368412618033064e-05, "loss": 0.2717, "step": 2943500 }, { "epoch": 1.77, "learning_rate": 4.368202621477008e-05, "loss": 0.2797, "step": 2944000 }, { "epoch": 1.77, "learning_rate": 4.3679926249209513e-05, "loss": 0.2812, "step": 2944500 }, { "epoch": 1.77, "learning_rate": 4.367783048358007e-05, "loss": 0.2747, "step": 2945000 }, { "epoch": 1.77, "learning_rate": 4.36757305180195e-05, "loss": 0.2779, "step": 2945500 }, { "epoch": 1.77, "learning_rate": 4.367363055245894e-05, "loss": 0.2751, "step": 2946000 }, { "epoch": 1.77, "learning_rate": 4.3671534786829494e-05, "loss": 0.2814, "step": 2946500 }, { "epoch": 1.77, "learning_rate": 4.366943482126893e-05, "loss": 0.2809, "step": 2947000 }, { "epoch": 1.77, "learning_rate": 4.366733485570837e-05, "loss": 0.2783, "step": 2947500 }, { "epoch": 1.77, "learning_rate": 4.36652348901478e-05, "loss": 0.2751, "step": 2948000 }, { "epoch": 1.77, "learning_rate": 4.3663134924587235e-05, "loss": 0.2795, "step": 2948500 }, { "epoch": 1.77, "learning_rate": 4.3661034959026675e-05, "loss": 0.2795, "step": 2949000 }, { "epoch": 1.77, "learning_rate": 4.365893499346611e-05, "loss": 0.278, "step": 2949500 }, { "epoch": 1.77, "learning_rate": 4.365683502790554e-05, "loss": 0.2762, "step": 2950000 }, { "epoch": 1.77, "learning_rate": 4.365473506234498e-05, "loss": 0.28, "step": 2950500 }, { "epoch": 1.77, "learning_rate": 4.365263509678441e-05, "loss": 0.2779, "step": 2951000 }, { "epoch": 1.77, "learning_rate": 4.365053513122384e-05, "loss": 0.2827, "step": 2951500 }, { "epoch": 1.77, "learning_rate": 4.364843516566328e-05, "loss": 0.275, "step": 2952000 }, { "epoch": 1.77, "learning_rate": 4.3646335200102716e-05, "loss": 0.2779, "step": 2952500 }, { "epoch": 1.77, "learning_rate": 4.3644239434473276e-05, "loss": 0.2752, "step": 2953000 }, { "epoch": 1.77, "learning_rate": 4.36421394689127e-05, "loss": 0.274, "step": 2953500 }, { "epoch": 1.77, "learning_rate": 4.364003950335214e-05, "loss": 0.2747, "step": 2954000 }, { "epoch": 1.77, "learning_rate": 4.363793953779158e-05, "loss": 0.2781, "step": 2954500 }, { "epoch": 1.77, "learning_rate": 4.363584377216214e-05, "loss": 0.2827, "step": 2955000 }, { "epoch": 1.77, "learning_rate": 4.363374380660158e-05, "loss": 0.2797, "step": 2955500 }, { "epoch": 1.77, "learning_rate": 4.3631643841041004e-05, "loss": 0.2705, "step": 2956000 }, { "epoch": 1.77, "learning_rate": 4.362954387548044e-05, "loss": 0.2833, "step": 2956500 }, { "epoch": 1.77, "learning_rate": 4.3627448109851e-05, "loss": 0.2761, "step": 2957000 }, { "epoch": 1.77, "learning_rate": 4.362534814429044e-05, "loss": 0.2825, "step": 2957500 }, { "epoch": 1.77, "learning_rate": 4.3623248178729865e-05, "loss": 0.2783, "step": 2958000 }, { "epoch": 1.77, "learning_rate": 4.36211482131693e-05, "loss": 0.2824, "step": 2958500 }, { "epoch": 1.77, "learning_rate": 4.361905244753986e-05, "loss": 0.2761, "step": 2959000 }, { "epoch": 1.77, "learning_rate": 4.36169524819793e-05, "loss": 0.2801, "step": 2959500 }, { "epoch": 1.77, "learning_rate": 4.361485251641873e-05, "loss": 0.2785, "step": 2960000 }, { "epoch": 1.77, "learning_rate": 4.361275255085816e-05, "loss": 0.2689, "step": 2960500 }, { "epoch": 1.78, "learning_rate": 4.36106525852976e-05, "loss": 0.2771, "step": 2961000 }, { "epoch": 1.78, "learning_rate": 4.360855261973703e-05, "loss": 0.2849, "step": 2961500 }, { "epoch": 1.78, "learning_rate": 4.360645265417647e-05, "loss": 0.2806, "step": 2962000 }, { "epoch": 1.78, "learning_rate": 4.3604352688615906e-05, "loss": 0.2757, "step": 2962500 }, { "epoch": 1.78, "learning_rate": 4.360225692298646e-05, "loss": 0.2798, "step": 2963000 }, { "epoch": 1.78, "learning_rate": 4.3600156957425893e-05, "loss": 0.2675, "step": 2963500 }, { "epoch": 1.78, "learning_rate": 4.3598056991865334e-05, "loss": 0.2819, "step": 2964000 }, { "epoch": 1.78, "learning_rate": 4.359595702630477e-05, "loss": 0.2769, "step": 2964500 }, { "epoch": 1.78, "learning_rate": 4.35938570607442e-05, "loss": 0.2785, "step": 2965000 }, { "epoch": 1.78, "learning_rate": 4.3591761295114754e-05, "loss": 0.2861, "step": 2965500 }, { "epoch": 1.78, "learning_rate": 4.3589661329554194e-05, "loss": 0.2765, "step": 2966000 }, { "epoch": 1.78, "learning_rate": 4.358756136399363e-05, "loss": 0.2736, "step": 2966500 }, { "epoch": 1.78, "learning_rate": 4.358546139843306e-05, "loss": 0.2791, "step": 2967000 }, { "epoch": 1.78, "learning_rate": 4.35833614328725e-05, "loss": 0.2777, "step": 2967500 }, { "epoch": 1.78, "learning_rate": 4.3581261467311935e-05, "loss": 0.2754, "step": 2968000 }, { "epoch": 1.78, "learning_rate": 4.357916570168249e-05, "loss": 0.2834, "step": 2968500 }, { "epoch": 1.78, "learning_rate": 4.357706573612193e-05, "loss": 0.2776, "step": 2969000 }, { "epoch": 1.78, "learning_rate": 4.357496577056136e-05, "loss": 0.278, "step": 2969500 }, { "epoch": 1.78, "learning_rate": 4.3572865805000796e-05, "loss": 0.2805, "step": 2970000 }, { "epoch": 1.78, "learning_rate": 4.357077003937135e-05, "loss": 0.2729, "step": 2970500 }, { "epoch": 1.78, "learning_rate": 4.356867007381079e-05, "loss": 0.2799, "step": 2971000 }, { "epoch": 1.78, "learning_rate": 4.356657010825022e-05, "loss": 0.2763, "step": 2971500 }, { "epoch": 1.78, "learning_rate": 4.3564470142689656e-05, "loss": 0.2763, "step": 2972000 }, { "epoch": 1.78, "learning_rate": 4.356237437706021e-05, "loss": 0.2755, "step": 2972500 }, { "epoch": 1.78, "learning_rate": 4.356027441149965e-05, "loss": 0.2811, "step": 2973000 }, { "epoch": 1.78, "learning_rate": 4.3558174445939084e-05, "loss": 0.2718, "step": 2973500 }, { "epoch": 1.78, "learning_rate": 4.355607448037852e-05, "loss": 0.2806, "step": 2974000 }, { "epoch": 1.78, "learning_rate": 4.355397451481796e-05, "loss": 0.2784, "step": 2974500 }, { "epoch": 1.78, "learning_rate": 4.355187874918851e-05, "loss": 0.279, "step": 2975000 }, { "epoch": 1.78, "learning_rate": 4.3549778783627944e-05, "loss": 0.2787, "step": 2975500 }, { "epoch": 1.78, "learning_rate": 4.3547678818067385e-05, "loss": 0.2815, "step": 2976000 }, { "epoch": 1.78, "learning_rate": 4.354557885250682e-05, "loss": 0.2817, "step": 2976500 }, { "epoch": 1.78, "learning_rate": 4.354347888694625e-05, "loss": 0.2721, "step": 2977000 }, { "epoch": 1.79, "learning_rate": 4.354137892138569e-05, "loss": 0.2758, "step": 2977500 }, { "epoch": 1.79, "learning_rate": 4.3539278955825125e-05, "loss": 0.2775, "step": 2978000 }, { "epoch": 1.79, "learning_rate": 4.353717899026456e-05, "loss": 0.278, "step": 2978500 }, { "epoch": 1.79, "learning_rate": 4.353508322463511e-05, "loss": 0.2764, "step": 2979000 }, { "epoch": 1.79, "learning_rate": 4.353298325907455e-05, "loss": 0.2755, "step": 2979500 }, { "epoch": 1.79, "learning_rate": 4.3530883293513986e-05, "loss": 0.2851, "step": 2980000 }, { "epoch": 1.79, "learning_rate": 4.352878332795342e-05, "loss": 0.2784, "step": 2980500 }, { "epoch": 1.79, "learning_rate": 4.352668756232397e-05, "loss": 0.2766, "step": 2981000 }, { "epoch": 1.79, "learning_rate": 4.352458759676341e-05, "loss": 0.2766, "step": 2981500 }, { "epoch": 1.79, "learning_rate": 4.352249183113397e-05, "loss": 0.2738, "step": 2982000 }, { "epoch": 1.79, "learning_rate": 4.35203918655734e-05, "loss": 0.2778, "step": 2982500 }, { "epoch": 1.79, "learning_rate": 4.351829190001284e-05, "loss": 0.2743, "step": 2983000 }, { "epoch": 1.79, "learning_rate": 4.3516191934452274e-05, "loss": 0.2745, "step": 2983500 }, { "epoch": 1.79, "learning_rate": 4.351409196889171e-05, "loss": 0.2731, "step": 2984000 }, { "epoch": 1.79, "learning_rate": 4.351199200333115e-05, "loss": 0.2746, "step": 2984500 }, { "epoch": 1.79, "learning_rate": 4.350989203777058e-05, "loss": 0.2747, "step": 2985000 }, { "epoch": 1.79, "learning_rate": 4.3507792072210015e-05, "loss": 0.2722, "step": 2985500 }, { "epoch": 1.79, "learning_rate": 4.350569630658057e-05, "loss": 0.2748, "step": 2986000 }, { "epoch": 1.79, "learning_rate": 4.350359634102001e-05, "loss": 0.2727, "step": 2986500 }, { "epoch": 1.79, "learning_rate": 4.350150057539056e-05, "loss": 0.2802, "step": 2987000 }, { "epoch": 1.79, "learning_rate": 4.3499400609829995e-05, "loss": 0.2711, "step": 2987500 }, { "epoch": 1.79, "learning_rate": 4.349730064426943e-05, "loss": 0.2777, "step": 2988000 }, { "epoch": 1.79, "learning_rate": 4.349520067870887e-05, "loss": 0.2778, "step": 2988500 }, { "epoch": 1.79, "learning_rate": 4.34931007131483e-05, "loss": 0.2805, "step": 2989000 }, { "epoch": 1.79, "learning_rate": 4.3491000747587736e-05, "loss": 0.2722, "step": 2989500 }, { "epoch": 1.79, "learning_rate": 4.3488900782027176e-05, "loss": 0.283, "step": 2990000 }, { "epoch": 1.79, "learning_rate": 4.348680081646661e-05, "loss": 0.2792, "step": 2990500 }, { "epoch": 1.79, "learning_rate": 4.348470085090604e-05, "loss": 0.2778, "step": 2991000 }, { "epoch": 1.79, "learning_rate": 4.3482605085276604e-05, "loss": 0.2785, "step": 2991500 }, { "epoch": 1.79, "learning_rate": 4.348050511971604e-05, "loss": 0.277, "step": 2992000 }, { "epoch": 1.79, "learning_rate": 4.347840515415547e-05, "loss": 0.2804, "step": 2992500 }, { "epoch": 1.79, "learning_rate": 4.347630518859491e-05, "loss": 0.2791, "step": 2993000 }, { "epoch": 1.79, "learning_rate": 4.3474209422965464e-05, "loss": 0.2849, "step": 2993500 }, { "epoch": 1.8, "learning_rate": 4.34721094574049e-05, "loss": 0.2804, "step": 2994000 }, { "epoch": 1.8, "learning_rate": 4.347000949184433e-05, "loss": 0.2797, "step": 2994500 }, { "epoch": 1.8, "learning_rate": 4.346790952628377e-05, "loss": 0.2715, "step": 2995000 }, { "epoch": 1.8, "learning_rate": 4.3465813760654325e-05, "loss": 0.2761, "step": 2995500 }, { "epoch": 1.8, "learning_rate": 4.346371379509376e-05, "loss": 0.275, "step": 2996000 }, { "epoch": 1.8, "learning_rate": 4.346161382953319e-05, "loss": 0.2802, "step": 2996500 }, { "epoch": 1.8, "learning_rate": 4.345951386397263e-05, "loss": 0.2726, "step": 2997000 }, { "epoch": 1.8, "learning_rate": 4.3457413898412066e-05, "loss": 0.2819, "step": 2997500 }, { "epoch": 1.8, "learning_rate": 4.345531813278262e-05, "loss": 0.2811, "step": 2998000 }, { "epoch": 1.8, "learning_rate": 4.345321816722206e-05, "loss": 0.2823, "step": 2998500 }, { "epoch": 1.8, "learning_rate": 4.345111820166149e-05, "loss": 0.2784, "step": 2999000 }, { "epoch": 1.8, "learning_rate": 4.3449022436032047e-05, "loss": 0.271, "step": 2999500 }, { "epoch": 1.8, "learning_rate": 4.344692247047148e-05, "loss": 0.2729, "step": 3000000 }, { "epoch": 1.8, "eval_loss": 0.2533246576786041, "eval_runtime": 1463.101, "eval_samples_per_second": 360.002, "eval_steps_per_second": 60.001, "step": 3000000 }, { "epoch": 1.8, "learning_rate": 4.344482250491092e-05, "loss": 0.2817, "step": 3000500 }, { "epoch": 1.8, "learning_rate": 4.3442722539350354e-05, "loss": 0.276, "step": 3001000 }, { "epoch": 1.8, "learning_rate": 4.344062257378979e-05, "loss": 0.2776, "step": 3001500 }, { "epoch": 1.8, "learning_rate": 4.343852260822923e-05, "loss": 0.2771, "step": 3002000 }, { "epoch": 1.8, "learning_rate": 4.343642264266866e-05, "loss": 0.2827, "step": 3002500 }, { "epoch": 1.8, "learning_rate": 4.3434322677108094e-05, "loss": 0.2736, "step": 3003000 }, { "epoch": 1.8, "learning_rate": 4.343222691147865e-05, "loss": 0.2757, "step": 3003500 }, { "epoch": 1.8, "learning_rate": 4.343012694591809e-05, "loss": 0.2771, "step": 3004000 }, { "epoch": 1.8, "learning_rate": 4.342802698035752e-05, "loss": 0.2701, "step": 3004500 }, { "epoch": 1.8, "learning_rate": 4.3425927014796955e-05, "loss": 0.2836, "step": 3005000 }, { "epoch": 1.8, "learning_rate": 4.3423827049236395e-05, "loss": 0.2696, "step": 3005500 }, { "epoch": 1.8, "learning_rate": 4.342173128360695e-05, "loss": 0.2725, "step": 3006000 }, { "epoch": 1.8, "learning_rate": 4.341963131804638e-05, "loss": 0.278, "step": 3006500 }, { "epoch": 1.8, "learning_rate": 4.341753135248582e-05, "loss": 0.2792, "step": 3007000 }, { "epoch": 1.8, "learning_rate": 4.3415431386925256e-05, "loss": 0.2786, "step": 3007500 }, { "epoch": 1.8, "learning_rate": 4.341333142136469e-05, "loss": 0.278, "step": 3008000 }, { "epoch": 1.8, "learning_rate": 4.341123565573524e-05, "loss": 0.2753, "step": 3008500 }, { "epoch": 1.8, "learning_rate": 4.340913569017468e-05, "loss": 0.2804, "step": 3009000 }, { "epoch": 1.8, "learning_rate": 4.340703572461412e-05, "loss": 0.2801, "step": 3009500 }, { "epoch": 1.8, "learning_rate": 4.340493575905355e-05, "loss": 0.277, "step": 3010000 }, { "epoch": 1.8, "learning_rate": 4.3402839993424104e-05, "loss": 0.28, "step": 3010500 }, { "epoch": 1.81, "learning_rate": 4.3400740027863544e-05, "loss": 0.2714, "step": 3011000 }, { "epoch": 1.81, "learning_rate": 4.339864006230298e-05, "loss": 0.2705, "step": 3011500 }, { "epoch": 1.81, "learning_rate": 4.339654009674241e-05, "loss": 0.275, "step": 3012000 }, { "epoch": 1.81, "learning_rate": 4.339444013118185e-05, "loss": 0.2807, "step": 3012500 }, { "epoch": 1.81, "learning_rate": 4.3392344365552405e-05, "loss": 0.2761, "step": 3013000 }, { "epoch": 1.81, "learning_rate": 4.339024859992296e-05, "loss": 0.2765, "step": 3013500 }, { "epoch": 1.81, "learning_rate": 4.338814863436239e-05, "loss": 0.2775, "step": 3014000 }, { "epoch": 1.81, "learning_rate": 4.338604866880183e-05, "loss": 0.2823, "step": 3014500 }, { "epoch": 1.81, "learning_rate": 4.3383948703241265e-05, "loss": 0.2818, "step": 3015000 }, { "epoch": 1.81, "learning_rate": 4.33818487376807e-05, "loss": 0.2782, "step": 3015500 }, { "epoch": 1.81, "learning_rate": 4.337974877212014e-05, "loss": 0.2751, "step": 3016000 }, { "epoch": 1.81, "learning_rate": 4.337764880655957e-05, "loss": 0.2789, "step": 3016500 }, { "epoch": 1.81, "learning_rate": 4.3375553040930126e-05, "loss": 0.2838, "step": 3017000 }, { "epoch": 1.81, "learning_rate": 4.337345307536956e-05, "loss": 0.2779, "step": 3017500 }, { "epoch": 1.81, "learning_rate": 4.3371353109809e-05, "loss": 0.2766, "step": 3018000 }, { "epoch": 1.81, "learning_rate": 4.336925314424843e-05, "loss": 0.2733, "step": 3018500 }, { "epoch": 1.81, "learning_rate": 4.336715317868787e-05, "loss": 0.2816, "step": 3019000 }, { "epoch": 1.81, "learning_rate": 4.336505321312731e-05, "loss": 0.2768, "step": 3019500 }, { "epoch": 1.81, "learning_rate": 4.336295324756674e-05, "loss": 0.2755, "step": 3020000 }, { "epoch": 1.81, "learning_rate": 4.336085328200618e-05, "loss": 0.2813, "step": 3020500 }, { "epoch": 1.81, "learning_rate": 4.3358757516376734e-05, "loss": 0.2772, "step": 3021000 }, { "epoch": 1.81, "learning_rate": 4.335666175074729e-05, "loss": 0.28, "step": 3021500 }, { "epoch": 1.81, "learning_rate": 4.335456178518672e-05, "loss": 0.2757, "step": 3022000 }, { "epoch": 1.81, "learning_rate": 4.3352461819626155e-05, "loss": 0.2736, "step": 3022500 }, { "epoch": 1.81, "learning_rate": 4.3350361854065595e-05, "loss": 0.2771, "step": 3023000 }, { "epoch": 1.81, "learning_rate": 4.334826188850503e-05, "loss": 0.2772, "step": 3023500 }, { "epoch": 1.81, "learning_rate": 4.334616192294446e-05, "loss": 0.2749, "step": 3024000 }, { "epoch": 1.81, "learning_rate": 4.33440619573839e-05, "loss": 0.2802, "step": 3024500 }, { "epoch": 1.81, "learning_rate": 4.3341961991823336e-05, "loss": 0.2797, "step": 3025000 }, { "epoch": 1.81, "learning_rate": 4.333986202626276e-05, "loss": 0.278, "step": 3025500 }, { "epoch": 1.81, "learning_rate": 4.333776626063333e-05, "loss": 0.2804, "step": 3026000 }, { "epoch": 1.81, "learning_rate": 4.333566629507276e-05, "loss": 0.2748, "step": 3026500 }, { "epoch": 1.81, "learning_rate": 4.3333570529443316e-05, "loss": 0.2803, "step": 3027000 }, { "epoch": 1.82, "learning_rate": 4.333147056388275e-05, "loss": 0.2727, "step": 3027500 }, { "epoch": 1.82, "learning_rate": 4.332937059832219e-05, "loss": 0.2766, "step": 3028000 }, { "epoch": 1.82, "learning_rate": 4.3327270632761624e-05, "loss": 0.2734, "step": 3028500 }, { "epoch": 1.82, "learning_rate": 4.332517486713218e-05, "loss": 0.2768, "step": 3029000 }, { "epoch": 1.82, "learning_rate": 4.332307490157161e-05, "loss": 0.2762, "step": 3029500 }, { "epoch": 1.82, "learning_rate": 4.332097493601105e-05, "loss": 0.2778, "step": 3030000 }, { "epoch": 1.82, "learning_rate": 4.3318874970450484e-05, "loss": 0.2804, "step": 3030500 }, { "epoch": 1.82, "learning_rate": 4.331677500488992e-05, "loss": 0.2799, "step": 3031000 }, { "epoch": 1.82, "learning_rate": 4.331467503932936e-05, "loss": 0.2723, "step": 3031500 }, { "epoch": 1.82, "learning_rate": 4.331257507376879e-05, "loss": 0.2757, "step": 3032000 }, { "epoch": 1.82, "learning_rate": 4.3310475108208225e-05, "loss": 0.2714, "step": 3032500 }, { "epoch": 1.82, "learning_rate": 4.3308379342578785e-05, "loss": 0.2786, "step": 3033000 }, { "epoch": 1.82, "learning_rate": 4.330627937701822e-05, "loss": 0.2706, "step": 3033500 }, { "epoch": 1.82, "learning_rate": 4.330417941145765e-05, "loss": 0.2697, "step": 3034000 }, { "epoch": 1.82, "learning_rate": 4.330207944589709e-05, "loss": 0.2796, "step": 3034500 }, { "epoch": 1.82, "learning_rate": 4.3299983680267646e-05, "loss": 0.2763, "step": 3035000 }, { "epoch": 1.82, "learning_rate": 4.329788371470708e-05, "loss": 0.2833, "step": 3035500 }, { "epoch": 1.82, "learning_rate": 4.329578374914651e-05, "loss": 0.2784, "step": 3036000 }, { "epoch": 1.82, "learning_rate": 4.329368378358595e-05, "loss": 0.2746, "step": 3036500 }, { "epoch": 1.82, "learning_rate": 4.329158381802539e-05, "loss": 0.2829, "step": 3037000 }, { "epoch": 1.82, "learning_rate": 4.328948805239594e-05, "loss": 0.2775, "step": 3037500 }, { "epoch": 1.82, "learning_rate": 4.3287388086835374e-05, "loss": 0.2793, "step": 3038000 }, { "epoch": 1.82, "learning_rate": 4.3285288121274814e-05, "loss": 0.2786, "step": 3038500 }, { "epoch": 1.82, "learning_rate": 4.328318815571425e-05, "loss": 0.2763, "step": 3039000 }, { "epoch": 1.82, "learning_rate": 4.32810923900848e-05, "loss": 0.2784, "step": 3039500 }, { "epoch": 1.82, "learning_rate": 4.327899242452424e-05, "loss": 0.2795, "step": 3040000 }, { "epoch": 1.82, "learning_rate": 4.3276892458963675e-05, "loss": 0.274, "step": 3040500 }, { "epoch": 1.82, "learning_rate": 4.327479249340311e-05, "loss": 0.2765, "step": 3041000 }, { "epoch": 1.82, "learning_rate": 4.327269252784255e-05, "loss": 0.2808, "step": 3041500 }, { "epoch": 1.82, "learning_rate": 4.3270600962144215e-05, "loss": 0.2747, "step": 3042000 }, { "epoch": 1.82, "learning_rate": 4.3268500996583655e-05, "loss": 0.2737, "step": 3042500 }, { "epoch": 1.82, "learning_rate": 4.326640103102309e-05, "loss": 0.2809, "step": 3043000 }, { "epoch": 1.82, "learning_rate": 4.326430106546252e-05, "loss": 0.2769, "step": 3043500 }, { "epoch": 1.83, "learning_rate": 4.326220109990196e-05, "loss": 0.2782, "step": 3044000 }, { "epoch": 1.83, "learning_rate": 4.3260101134341396e-05, "loss": 0.2783, "step": 3044500 }, { "epoch": 1.83, "learning_rate": 4.325800116878083e-05, "loss": 0.278, "step": 3045000 }, { "epoch": 1.83, "learning_rate": 4.325590120322027e-05, "loss": 0.2734, "step": 3045500 }, { "epoch": 1.83, "learning_rate": 4.325380543759082e-05, "loss": 0.2752, "step": 3046000 }, { "epoch": 1.83, "learning_rate": 4.325170547203026e-05, "loss": 0.2757, "step": 3046500 }, { "epoch": 1.83, "learning_rate": 4.32496055064697e-05, "loss": 0.2686, "step": 3047000 }, { "epoch": 1.83, "learning_rate": 4.324750554090913e-05, "loss": 0.275, "step": 3047500 }, { "epoch": 1.83, "learning_rate": 4.3245405575348564e-05, "loss": 0.2782, "step": 3048000 }, { "epoch": 1.83, "learning_rate": 4.324330980971912e-05, "loss": 0.2809, "step": 3048500 }, { "epoch": 1.83, "learning_rate": 4.324120984415856e-05, "loss": 0.2794, "step": 3049000 }, { "epoch": 1.83, "learning_rate": 4.323910987859799e-05, "loss": 0.2809, "step": 3049500 }, { "epoch": 1.83, "learning_rate": 4.3237009913037425e-05, "loss": 0.2756, "step": 3050000 }, { "epoch": 1.83, "learning_rate": 4.3234909947476865e-05, "loss": 0.2795, "step": 3050500 }, { "epoch": 1.83, "learning_rate": 4.32328099819163e-05, "loss": 0.2751, "step": 3051000 }, { "epoch": 1.83, "learning_rate": 4.323071001635573e-05, "loss": 0.2768, "step": 3051500 }, { "epoch": 1.83, "learning_rate": 4.3228610050795165e-05, "loss": 0.2753, "step": 3052000 }, { "epoch": 1.83, "learning_rate": 4.32265100852346e-05, "loss": 0.2796, "step": 3052500 }, { "epoch": 1.83, "learning_rate": 4.322441431960516e-05, "loss": 0.2727, "step": 3053000 }, { "epoch": 1.83, "learning_rate": 4.322231435404459e-05, "loss": 0.2776, "step": 3053500 }, { "epoch": 1.83, "learning_rate": 4.322021858841515e-05, "loss": 0.2805, "step": 3054000 }, { "epoch": 1.83, "learning_rate": 4.3218118622854586e-05, "loss": 0.281, "step": 3054500 }, { "epoch": 1.83, "learning_rate": 4.321601865729402e-05, "loss": 0.2785, "step": 3055000 }, { "epoch": 1.83, "learning_rate": 4.321391869173346e-05, "loss": 0.2766, "step": 3055500 }, { "epoch": 1.83, "learning_rate": 4.3211818726172894e-05, "loss": 0.2829, "step": 3056000 }, { "epoch": 1.83, "learning_rate": 4.320972296054345e-05, "loss": 0.2781, "step": 3056500 }, { "epoch": 1.83, "learning_rate": 4.320762299498288e-05, "loss": 0.2824, "step": 3057000 }, { "epoch": 1.83, "learning_rate": 4.320552302942232e-05, "loss": 0.2769, "step": 3057500 }, { "epoch": 1.83, "learning_rate": 4.3203423063861754e-05, "loss": 0.2791, "step": 3058000 }, { "epoch": 1.83, "learning_rate": 4.320132309830119e-05, "loss": 0.2719, "step": 3058500 }, { "epoch": 1.83, "learning_rate": 4.319922313274062e-05, "loss": 0.2794, "step": 3059000 }, { "epoch": 1.83, "learning_rate": 4.3197123167180055e-05, "loss": 0.2755, "step": 3059500 }, { "epoch": 1.83, "learning_rate": 4.319502320161949e-05, "loss": 0.2782, "step": 3060000 }, { "epoch": 1.83, "learning_rate": 4.319292743599005e-05, "loss": 0.2761, "step": 3060500 }, { "epoch": 1.84, "learning_rate": 4.319082747042949e-05, "loss": 0.2798, "step": 3061000 }, { "epoch": 1.84, "learning_rate": 4.3188727504868915e-05, "loss": 0.2725, "step": 3061500 }, { "epoch": 1.84, "learning_rate": 4.3186627539308356e-05, "loss": 0.2777, "step": 3062000 }, { "epoch": 1.84, "learning_rate": 4.3184531773678916e-05, "loss": 0.2715, "step": 3062500 }, { "epoch": 1.84, "learning_rate": 4.318243180811835e-05, "loss": 0.2814, "step": 3063000 }, { "epoch": 1.84, "learning_rate": 4.318033184255778e-05, "loss": 0.2843, "step": 3063500 }, { "epoch": 1.84, "learning_rate": 4.3178231876997216e-05, "loss": 0.2718, "step": 3064000 }, { "epoch": 1.84, "learning_rate": 4.317613191143665e-05, "loss": 0.2771, "step": 3064500 }, { "epoch": 1.84, "learning_rate": 4.317403614580721e-05, "loss": 0.2771, "step": 3065000 }, { "epoch": 1.84, "learning_rate": 4.3171936180246644e-05, "loss": 0.2739, "step": 3065500 }, { "epoch": 1.84, "learning_rate": 4.31698404146172e-05, "loss": 0.2847, "step": 3066000 }, { "epoch": 1.84, "learning_rate": 4.316774464898776e-05, "loss": 0.2746, "step": 3066500 }, { "epoch": 1.84, "learning_rate": 4.316564468342719e-05, "loss": 0.2768, "step": 3067000 }, { "epoch": 1.84, "learning_rate": 4.3163544717866624e-05, "loss": 0.2776, "step": 3067500 }, { "epoch": 1.84, "learning_rate": 4.3161444752306065e-05, "loss": 0.2761, "step": 3068000 }, { "epoch": 1.84, "learning_rate": 4.31593447867455e-05, "loss": 0.2772, "step": 3068500 }, { "epoch": 1.84, "learning_rate": 4.315724482118493e-05, "loss": 0.2813, "step": 3069000 }, { "epoch": 1.84, "learning_rate": 4.315514485562437e-05, "loss": 0.2795, "step": 3069500 }, { "epoch": 1.84, "learning_rate": 4.3153044890063805e-05, "loss": 0.2757, "step": 3070000 }, { "epoch": 1.84, "learning_rate": 4.315094492450324e-05, "loss": 0.2746, "step": 3070500 }, { "epoch": 1.84, "learning_rate": 4.314884495894267e-05, "loss": 0.2743, "step": 3071000 }, { "epoch": 1.84, "learning_rate": 4.3146744993382106e-05, "loss": 0.2772, "step": 3071500 }, { "epoch": 1.84, "learning_rate": 4.314464502782154e-05, "loss": 0.2809, "step": 3072000 }, { "epoch": 1.84, "learning_rate": 4.314254506226098e-05, "loss": 0.272, "step": 3072500 }, { "epoch": 1.84, "learning_rate": 4.314044929663154e-05, "loss": 0.2756, "step": 3073000 }, { "epoch": 1.84, "learning_rate": 4.3138349331070966e-05, "loss": 0.2785, "step": 3073500 }, { "epoch": 1.84, "learning_rate": 4.31362493655104e-05, "loss": 0.2774, "step": 3074000 }, { "epoch": 1.84, "learning_rate": 4.313414939994984e-05, "loss": 0.278, "step": 3074500 }, { "epoch": 1.84, "learning_rate": 4.31320536343204e-05, "loss": 0.2734, "step": 3075000 }, { "epoch": 1.84, "learning_rate": 4.312995366875983e-05, "loss": 0.2806, "step": 3075500 }, { "epoch": 1.84, "learning_rate": 4.312785370319927e-05, "loss": 0.2717, "step": 3076000 }, { "epoch": 1.84, "learning_rate": 4.31257537376387e-05, "loss": 0.2734, "step": 3076500 }, { "epoch": 1.84, "learning_rate": 4.312365797200926e-05, "loss": 0.2732, "step": 3077000 }, { "epoch": 1.85, "learning_rate": 4.3121562206379815e-05, "loss": 0.2779, "step": 3077500 }, { "epoch": 1.85, "learning_rate": 4.311946224081925e-05, "loss": 0.2736, "step": 3078000 }, { "epoch": 1.85, "learning_rate": 4.31173664751898e-05, "loss": 0.2779, "step": 3078500 }, { "epoch": 1.85, "learning_rate": 4.311526650962924e-05, "loss": 0.2829, "step": 3079000 }, { "epoch": 1.85, "learning_rate": 4.3113166544068675e-05, "loss": 0.276, "step": 3079500 }, { "epoch": 1.85, "learning_rate": 4.311106657850811e-05, "loss": 0.2808, "step": 3080000 }, { "epoch": 1.85, "learning_rate": 4.310896661294755e-05, "loss": 0.282, "step": 3080500 }, { "epoch": 1.85, "learning_rate": 4.310686664738698e-05, "loss": 0.2749, "step": 3081000 }, { "epoch": 1.85, "learning_rate": 4.3104766681826416e-05, "loss": 0.2753, "step": 3081500 }, { "epoch": 1.85, "learning_rate": 4.3102666716265856e-05, "loss": 0.273, "step": 3082000 }, { "epoch": 1.85, "learning_rate": 4.310056675070529e-05, "loss": 0.2762, "step": 3082500 }, { "epoch": 1.85, "learning_rate": 4.309846678514472e-05, "loss": 0.2765, "step": 3083000 }, { "epoch": 1.85, "learning_rate": 4.309636681958416e-05, "loss": 0.2846, "step": 3083500 }, { "epoch": 1.85, "learning_rate": 4.309426685402359e-05, "loss": 0.2735, "step": 3084000 }, { "epoch": 1.85, "learning_rate": 4.309217108839415e-05, "loss": 0.2724, "step": 3084500 }, { "epoch": 1.85, "learning_rate": 4.3090071122833584e-05, "loss": 0.2753, "step": 3085000 }, { "epoch": 1.85, "learning_rate": 4.308797115727302e-05, "loss": 0.2773, "step": 3085500 }, { "epoch": 1.85, "learning_rate": 4.308587119171245e-05, "loss": 0.2803, "step": 3086000 }, { "epoch": 1.85, "learning_rate": 4.308377122615189e-05, "loss": 0.2694, "step": 3086500 }, { "epoch": 1.85, "learning_rate": 4.308167546052245e-05, "loss": 0.2745, "step": 3087000 }, { "epoch": 1.85, "learning_rate": 4.307957549496188e-05, "loss": 0.2779, "step": 3087500 }, { "epoch": 1.85, "learning_rate": 4.307747972933244e-05, "loss": 0.2797, "step": 3088000 }, { "epoch": 1.85, "learning_rate": 4.307537976377187e-05, "loss": 0.2758, "step": 3088500 }, { "epoch": 1.85, "learning_rate": 4.307327979821131e-05, "loss": 0.2732, "step": 3089000 }, { "epoch": 1.85, "learning_rate": 4.3071179832650746e-05, "loss": 0.2781, "step": 3089500 }, { "epoch": 1.85, "learning_rate": 4.306907986709018e-05, "loss": 0.2735, "step": 3090000 }, { "epoch": 1.85, "learning_rate": 4.306697990152961e-05, "loss": 0.276, "step": 3090500 }, { "epoch": 1.85, "learning_rate": 4.3064879935969046e-05, "loss": 0.2735, "step": 3091000 }, { "epoch": 1.85, "learning_rate": 4.3062779970408486e-05, "loss": 0.2703, "step": 3091500 }, { "epoch": 1.85, "learning_rate": 4.306068000484792e-05, "loss": 0.2777, "step": 3092000 }, { "epoch": 1.85, "learning_rate": 4.305858423921847e-05, "loss": 0.2782, "step": 3092500 }, { "epoch": 1.85, "learning_rate": 4.305648427365791e-05, "loss": 0.2798, "step": 3093000 }, { "epoch": 1.85, "learning_rate": 4.305438430809735e-05, "loss": 0.2743, "step": 3093500 }, { "epoch": 1.85, "learning_rate": 4.305228434253678e-05, "loss": 0.2759, "step": 3094000 }, { "epoch": 1.86, "learning_rate": 4.305018857690734e-05, "loss": 0.2796, "step": 3094500 }, { "epoch": 1.86, "learning_rate": 4.304808861134677e-05, "loss": 0.2772, "step": 3095000 }, { "epoch": 1.86, "learning_rate": 4.304599284571733e-05, "loss": 0.2741, "step": 3095500 }, { "epoch": 1.86, "learning_rate": 4.304389288015677e-05, "loss": 0.281, "step": 3096000 }, { "epoch": 1.86, "learning_rate": 4.30417929145962e-05, "loss": 0.2743, "step": 3096500 }, { "epoch": 1.86, "learning_rate": 4.3039692949035635e-05, "loss": 0.2801, "step": 3097000 }, { "epoch": 1.86, "learning_rate": 4.3037597183406195e-05, "loss": 0.276, "step": 3097500 }, { "epoch": 1.86, "learning_rate": 4.303549721784563e-05, "loss": 0.2759, "step": 3098000 }, { "epoch": 1.86, "learning_rate": 4.303339725228506e-05, "loss": 0.2773, "step": 3098500 }, { "epoch": 1.86, "learning_rate": 4.30312972867245e-05, "loss": 0.2748, "step": 3099000 }, { "epoch": 1.86, "learning_rate": 4.302919732116393e-05, "loss": 0.2706, "step": 3099500 }, { "epoch": 1.86, "learning_rate": 4.302709735560336e-05, "loss": 0.2716, "step": 3100000 }, { "epoch": 1.86, "eval_loss": 0.25133055448532104, "eval_runtime": 1463.358, "eval_samples_per_second": 359.939, "eval_steps_per_second": 59.99, "step": 3100000 }, { "epoch": 1.86, "learning_rate": 4.30249973900428e-05, "loss": 0.2748, "step": 3100500 }, { "epoch": 1.86, "learning_rate": 4.3022897424482236e-05, "loss": 0.2667, "step": 3101000 }, { "epoch": 1.86, "learning_rate": 4.30208016588528e-05, "loss": 0.2761, "step": 3101500 }, { "epoch": 1.86, "learning_rate": 4.3018701693292223e-05, "loss": 0.28, "step": 3102000 }, { "epoch": 1.86, "learning_rate": 4.3016601727731664e-05, "loss": 0.2772, "step": 3102500 }, { "epoch": 1.86, "learning_rate": 4.30145017621711e-05, "loss": 0.2746, "step": 3103000 }, { "epoch": 1.86, "learning_rate": 4.301240179661053e-05, "loss": 0.2757, "step": 3103500 }, { "epoch": 1.86, "learning_rate": 4.30103060309811e-05, "loss": 0.2732, "step": 3104000 }, { "epoch": 1.86, "learning_rate": 4.3008206065420524e-05, "loss": 0.2819, "step": 3104500 }, { "epoch": 1.86, "learning_rate": 4.300610609985996e-05, "loss": 0.278, "step": 3105000 }, { "epoch": 1.86, "learning_rate": 4.30040061342994e-05, "loss": 0.2804, "step": 3105500 }, { "epoch": 1.86, "learning_rate": 4.300191036866996e-05, "loss": 0.2756, "step": 3106000 }, { "epoch": 1.86, "learning_rate": 4.2999810403109385e-05, "loss": 0.2749, "step": 3106500 }, { "epoch": 1.86, "learning_rate": 4.299771043754882e-05, "loss": 0.2792, "step": 3107000 }, { "epoch": 1.86, "learning_rate": 4.299561047198826e-05, "loss": 0.276, "step": 3107500 }, { "epoch": 1.86, "learning_rate": 4.299351470635882e-05, "loss": 0.2795, "step": 3108000 }, { "epoch": 1.86, "learning_rate": 4.299141474079825e-05, "loss": 0.2779, "step": 3108500 }, { "epoch": 1.86, "learning_rate": 4.298931477523768e-05, "loss": 0.2773, "step": 3109000 }, { "epoch": 1.86, "learning_rate": 4.298721480967712e-05, "loss": 0.2805, "step": 3109500 }, { "epoch": 1.86, "learning_rate": 4.298511904404768e-05, "loss": 0.2774, "step": 3110000 }, { "epoch": 1.86, "learning_rate": 4.298301907848711e-05, "loss": 0.2772, "step": 3110500 }, { "epoch": 1.87, "learning_rate": 4.2980919112926554e-05, "loss": 0.2802, "step": 3111000 }, { "epoch": 1.87, "learning_rate": 4.297881914736598e-05, "loss": 0.2745, "step": 3111500 }, { "epoch": 1.87, "learning_rate": 4.2976719181805414e-05, "loss": 0.2723, "step": 3112000 }, { "epoch": 1.87, "learning_rate": 4.2974623416175974e-05, "loss": 0.2775, "step": 3112500 }, { "epoch": 1.87, "learning_rate": 4.2972523450615414e-05, "loss": 0.2685, "step": 3113000 }, { "epoch": 1.87, "learning_rate": 4.297042348505485e-05, "loss": 0.2744, "step": 3113500 }, { "epoch": 1.87, "learning_rate": 4.2968323519494274e-05, "loss": 0.2752, "step": 3114000 }, { "epoch": 1.87, "learning_rate": 4.2966227753864835e-05, "loss": 0.2723, "step": 3114500 }, { "epoch": 1.87, "learning_rate": 4.296413198823539e-05, "loss": 0.2797, "step": 3115000 }, { "epoch": 1.87, "learning_rate": 4.296203202267483e-05, "loss": 0.2748, "step": 3115500 }, { "epoch": 1.87, "learning_rate": 4.295993205711426e-05, "loss": 0.2781, "step": 3116000 }, { "epoch": 1.87, "learning_rate": 4.2957832091553695e-05, "loss": 0.2696, "step": 3116500 }, { "epoch": 1.87, "learning_rate": 4.2955732125993136e-05, "loss": 0.274, "step": 3117000 }, { "epoch": 1.87, "learning_rate": 4.295363216043257e-05, "loss": 0.2776, "step": 3117500 }, { "epoch": 1.87, "learning_rate": 4.295153219487201e-05, "loss": 0.2705, "step": 3118000 }, { "epoch": 1.87, "learning_rate": 4.2949432229311436e-05, "loss": 0.2746, "step": 3118500 }, { "epoch": 1.87, "learning_rate": 4.294733226375087e-05, "loss": 0.28, "step": 3119000 }, { "epoch": 1.87, "learning_rate": 4.294523649812143e-05, "loss": 0.2772, "step": 3119500 }, { "epoch": 1.87, "learning_rate": 4.294313653256087e-05, "loss": 0.2773, "step": 3120000 }, { "epoch": 1.87, "learning_rate": 4.2941036567000304e-05, "loss": 0.2767, "step": 3120500 }, { "epoch": 1.87, "learning_rate": 4.293893660143973e-05, "loss": 0.2748, "step": 3121000 }, { "epoch": 1.87, "learning_rate": 4.293684503574141e-05, "loss": 0.2848, "step": 3121500 }, { "epoch": 1.87, "learning_rate": 4.2934745070180844e-05, "loss": 0.282, "step": 3122000 }, { "epoch": 1.87, "learning_rate": 4.2932645104620284e-05, "loss": 0.2737, "step": 3122500 }, { "epoch": 1.87, "learning_rate": 4.293054513905972e-05, "loss": 0.2757, "step": 3123000 }, { "epoch": 1.87, "learning_rate": 4.292844517349916e-05, "loss": 0.2802, "step": 3123500 }, { "epoch": 1.87, "learning_rate": 4.292634520793859e-05, "loss": 0.2818, "step": 3124000 }, { "epoch": 1.87, "learning_rate": 4.2924249442309145e-05, "loss": 0.2822, "step": 3124500 }, { "epoch": 1.87, "learning_rate": 4.292214947674858e-05, "loss": 0.2835, "step": 3125000 }, { "epoch": 1.87, "learning_rate": 4.292004951118802e-05, "loss": 0.2797, "step": 3125500 }, { "epoch": 1.87, "learning_rate": 4.291794954562745e-05, "loss": 0.2722, "step": 3126000 }, { "epoch": 1.87, "learning_rate": 4.2915849580066886e-05, "loss": 0.2767, "step": 3126500 }, { "epoch": 1.87, "learning_rate": 4.2913749614506326e-05, "loss": 0.2745, "step": 3127000 }, { "epoch": 1.88, "learning_rate": 4.291164964894576e-05, "loss": 0.2749, "step": 3127500 }, { "epoch": 1.88, "learning_rate": 4.2909549683385186e-05, "loss": 0.2812, "step": 3128000 }, { "epoch": 1.88, "learning_rate": 4.2907453917755747e-05, "loss": 0.2711, "step": 3128500 }, { "epoch": 1.88, "learning_rate": 4.290535395219519e-05, "loss": 0.2814, "step": 3129000 }, { "epoch": 1.88, "learning_rate": 4.290325398663462e-05, "loss": 0.2804, "step": 3129500 }, { "epoch": 1.88, "learning_rate": 4.2901154021074054e-05, "loss": 0.2799, "step": 3130000 }, { "epoch": 1.88, "learning_rate": 4.2899058255444614e-05, "loss": 0.2782, "step": 3130500 }, { "epoch": 1.88, "learning_rate": 4.289695828988405e-05, "loss": 0.2753, "step": 3131000 }, { "epoch": 1.88, "learning_rate": 4.289485832432348e-05, "loss": 0.2748, "step": 3131500 }, { "epoch": 1.88, "learning_rate": 4.289275835876292e-05, "loss": 0.2735, "step": 3132000 }, { "epoch": 1.88, "learning_rate": 4.2890662593133475e-05, "loss": 0.2702, "step": 3132500 }, { "epoch": 1.88, "learning_rate": 4.288856262757291e-05, "loss": 0.2793, "step": 3133000 }, { "epoch": 1.88, "learning_rate": 4.288646266201234e-05, "loss": 0.2774, "step": 3133500 }, { "epoch": 1.88, "learning_rate": 4.288436269645178e-05, "loss": 0.2795, "step": 3134000 }, { "epoch": 1.88, "learning_rate": 4.2882266930822335e-05, "loss": 0.2776, "step": 3134500 }, { "epoch": 1.88, "learning_rate": 4.288016696526177e-05, "loss": 0.276, "step": 3135000 }, { "epoch": 1.88, "learning_rate": 4.28780669997012e-05, "loss": 0.279, "step": 3135500 }, { "epoch": 1.88, "learning_rate": 4.287596703414064e-05, "loss": 0.275, "step": 3136000 }, { "epoch": 1.88, "learning_rate": 4.2873867068580076e-05, "loss": 0.2777, "step": 3136500 }, { "epoch": 1.88, "learning_rate": 4.287176710301951e-05, "loss": 0.2757, "step": 3137000 }, { "epoch": 1.88, "learning_rate": 4.286966713745894e-05, "loss": 0.2799, "step": 3137500 }, { "epoch": 1.88, "learning_rate": 4.2867567171898376e-05, "loss": 0.2793, "step": 3138000 }, { "epoch": 1.88, "learning_rate": 4.286547140626894e-05, "loss": 0.2735, "step": 3138500 }, { "epoch": 1.88, "learning_rate": 4.286337144070838e-05, "loss": 0.275, "step": 3139000 }, { "epoch": 1.88, "learning_rate": 4.286127147514781e-05, "loss": 0.2715, "step": 3139500 }, { "epoch": 1.88, "learning_rate": 4.285917150958724e-05, "loss": 0.2774, "step": 3140000 }, { "epoch": 1.88, "learning_rate": 4.285707154402668e-05, "loss": 0.2732, "step": 3140500 }, { "epoch": 1.88, "learning_rate": 4.285497157846611e-05, "loss": 0.2737, "step": 3141000 }, { "epoch": 1.88, "learning_rate": 4.2852871612905544e-05, "loss": 0.2827, "step": 3141500 }, { "epoch": 1.88, "learning_rate": 4.2850771647344985e-05, "loss": 0.2781, "step": 3142000 }, { "epoch": 1.88, "learning_rate": 4.284867588171554e-05, "loss": 0.2743, "step": 3142500 }, { "epoch": 1.88, "learning_rate": 4.284657591615497e-05, "loss": 0.2748, "step": 3143000 }, { "epoch": 1.88, "learning_rate": 4.2844475950594405e-05, "loss": 0.2765, "step": 3143500 }, { "epoch": 1.88, "learning_rate": 4.2842375985033845e-05, "loss": 0.2705, "step": 3144000 }, { "epoch": 1.89, "learning_rate": 4.284027601947328e-05, "loss": 0.2808, "step": 3144500 }, { "epoch": 1.89, "learning_rate": 4.283818025384383e-05, "loss": 0.2767, "step": 3145000 }, { "epoch": 1.89, "learning_rate": 4.283608028828327e-05, "loss": 0.28, "step": 3145500 }, { "epoch": 1.89, "learning_rate": 4.2833980322722706e-05, "loss": 0.2765, "step": 3146000 }, { "epoch": 1.89, "learning_rate": 4.2831884557093266e-05, "loss": 0.2747, "step": 3146500 }, { "epoch": 1.89, "learning_rate": 4.282978459153269e-05, "loss": 0.2766, "step": 3147000 }, { "epoch": 1.89, "learning_rate": 4.282768462597213e-05, "loss": 0.2751, "step": 3147500 }, { "epoch": 1.89, "learning_rate": 4.282558466041157e-05, "loss": 0.2733, "step": 3148000 }, { "epoch": 1.89, "learning_rate": 4.2823484694851e-05, "loss": 0.2762, "step": 3148500 }, { "epoch": 1.89, "learning_rate": 4.282138472929044e-05, "loss": 0.269, "step": 3149000 }, { "epoch": 1.89, "learning_rate": 4.2819288963660994e-05, "loss": 0.2739, "step": 3149500 }, { "epoch": 1.89, "learning_rate": 4.281718899810043e-05, "loss": 0.2785, "step": 3150000 }, { "epoch": 1.89, "learning_rate": 4.281508903253986e-05, "loss": 0.2842, "step": 3150500 }, { "epoch": 1.89, "learning_rate": 4.28129890669793e-05, "loss": 0.2713, "step": 3151000 }, { "epoch": 1.89, "learning_rate": 4.2810889101418735e-05, "loss": 0.2754, "step": 3151500 }, { "epoch": 1.89, "learning_rate": 4.280878913585817e-05, "loss": 0.2702, "step": 3152000 }, { "epoch": 1.89, "learning_rate": 4.280668917029761e-05, "loss": 0.2825, "step": 3152500 }, { "epoch": 1.89, "learning_rate": 4.280458920473704e-05, "loss": 0.2753, "step": 3153000 }, { "epoch": 1.89, "learning_rate": 4.2802493439107595e-05, "loss": 0.2755, "step": 3153500 }, { "epoch": 1.89, "learning_rate": 4.2800393473547036e-05, "loss": 0.2773, "step": 3154000 }, { "epoch": 1.89, "learning_rate": 4.279829350798647e-05, "loss": 0.2762, "step": 3154500 }, { "epoch": 1.89, "learning_rate": 4.27961935424259e-05, "loss": 0.2739, "step": 3155000 }, { "epoch": 1.89, "learning_rate": 4.2794097776796456e-05, "loss": 0.2767, "step": 3155500 }, { "epoch": 1.89, "learning_rate": 4.2791997811235896e-05, "loss": 0.2756, "step": 3156000 }, { "epoch": 1.89, "learning_rate": 4.278989784567533e-05, "loss": 0.2795, "step": 3156500 }, { "epoch": 1.89, "learning_rate": 4.278779788011476e-05, "loss": 0.2755, "step": 3157000 }, { "epoch": 1.89, "learning_rate": 4.2785697914554204e-05, "loss": 0.2685, "step": 3157500 }, { "epoch": 1.89, "learning_rate": 4.278359794899364e-05, "loss": 0.283, "step": 3158000 }, { "epoch": 1.89, "learning_rate": 4.278149798343307e-05, "loss": 0.2804, "step": 3158500 }, { "epoch": 1.89, "learning_rate": 4.277939801787251e-05, "loss": 0.2747, "step": 3159000 }, { "epoch": 1.89, "learning_rate": 4.2777298052311944e-05, "loss": 0.272, "step": 3159500 }, { "epoch": 1.89, "learning_rate": 4.27752022866825e-05, "loss": 0.2755, "step": 3160000 }, { "epoch": 1.89, "learning_rate": 4.277310232112193e-05, "loss": 0.2726, "step": 3160500 }, { "epoch": 1.9, "learning_rate": 4.277100235556137e-05, "loss": 0.2737, "step": 3161000 }, { "epoch": 1.9, "learning_rate": 4.2768902390000805e-05, "loss": 0.2691, "step": 3161500 }, { "epoch": 1.9, "learning_rate": 4.276680662437136e-05, "loss": 0.2722, "step": 3162000 }, { "epoch": 1.9, "learning_rate": 4.276471085874191e-05, "loss": 0.2782, "step": 3162500 }, { "epoch": 1.9, "learning_rate": 4.276261089318135e-05, "loss": 0.2734, "step": 3163000 }, { "epoch": 1.9, "learning_rate": 4.2760510927620786e-05, "loss": 0.2766, "step": 3163500 }, { "epoch": 1.9, "learning_rate": 4.275841096206022e-05, "loss": 0.2727, "step": 3164000 }, { "epoch": 1.9, "learning_rate": 4.275631099649966e-05, "loss": 0.2848, "step": 3164500 }, { "epoch": 1.9, "learning_rate": 4.275421103093909e-05, "loss": 0.2715, "step": 3165000 }, { "epoch": 1.9, "learning_rate": 4.2752111065378526e-05, "loss": 0.2812, "step": 3165500 }, { "epoch": 1.9, "learning_rate": 4.2750011099817967e-05, "loss": 0.2784, "step": 3166000 }, { "epoch": 1.9, "learning_rate": 4.27479111342574e-05, "loss": 0.2777, "step": 3166500 }, { "epoch": 1.9, "learning_rate": 4.2745815368627954e-05, "loss": 0.2715, "step": 3167000 }, { "epoch": 1.9, "learning_rate": 4.274371540306739e-05, "loss": 0.2793, "step": 3167500 }, { "epoch": 1.9, "learning_rate": 4.274161543750683e-05, "loss": 0.2768, "step": 3168000 }, { "epoch": 1.9, "learning_rate": 4.273951547194626e-05, "loss": 0.2749, "step": 3168500 }, { "epoch": 1.9, "learning_rate": 4.2737419706316814e-05, "loss": 0.2797, "step": 3169000 }, { "epoch": 1.9, "learning_rate": 4.2735319740756255e-05, "loss": 0.2715, "step": 3169500 }, { "epoch": 1.9, "learning_rate": 4.273321977519569e-05, "loss": 0.2775, "step": 3170000 }, { "epoch": 1.9, "learning_rate": 4.273112400956624e-05, "loss": 0.2759, "step": 3170500 }, { "epoch": 1.9, "learning_rate": 4.2729024044005675e-05, "loss": 0.2713, "step": 3171000 }, { "epoch": 1.9, "learning_rate": 4.2726924078445115e-05, "loss": 0.2663, "step": 3171500 }, { "epoch": 1.9, "learning_rate": 4.272482411288455e-05, "loss": 0.2803, "step": 3172000 }, { "epoch": 1.9, "learning_rate": 4.272272414732398e-05, "loss": 0.2803, "step": 3172500 }, { "epoch": 1.9, "learning_rate": 4.272062418176342e-05, "loss": 0.2755, "step": 3173000 }, { "epoch": 1.9, "learning_rate": 4.2718528416133976e-05, "loss": 0.2848, "step": 3173500 }, { "epoch": 1.9, "learning_rate": 4.271642845057341e-05, "loss": 0.2774, "step": 3174000 }, { "epoch": 1.9, "learning_rate": 4.271432848501284e-05, "loss": 0.2775, "step": 3174500 }, { "epoch": 1.9, "learning_rate": 4.271222851945228e-05, "loss": 0.2713, "step": 3175000 }, { "epoch": 1.9, "learning_rate": 4.2710128553891717e-05, "loss": 0.2753, "step": 3175500 }, { "epoch": 1.9, "learning_rate": 4.270802858833116e-05, "loss": 0.2672, "step": 3176000 }, { "epoch": 1.9, "learning_rate": 4.2705928622770584e-05, "loss": 0.2774, "step": 3176500 }, { "epoch": 1.9, "learning_rate": 4.270382865721002e-05, "loss": 0.2746, "step": 3177000 }, { "epoch": 1.91, "learning_rate": 4.270173289158058e-05, "loss": 0.2755, "step": 3177500 }, { "epoch": 1.91, "learning_rate": 4.269964132588225e-05, "loss": 0.2752, "step": 3178000 }, { "epoch": 1.91, "learning_rate": 4.2697541360321684e-05, "loss": 0.2764, "step": 3178500 }, { "epoch": 1.91, "learning_rate": 4.2695441394761125e-05, "loss": 0.2706, "step": 3179000 }, { "epoch": 1.91, "learning_rate": 4.269334142920056e-05, "loss": 0.2703, "step": 3179500 }, { "epoch": 1.91, "learning_rate": 4.269124146363999e-05, "loss": 0.2728, "step": 3180000 }, { "epoch": 1.91, "learning_rate": 4.268914149807943e-05, "loss": 0.2789, "step": 3180500 }, { "epoch": 1.91, "learning_rate": 4.2687041532518865e-05, "loss": 0.2718, "step": 3181000 }, { "epoch": 1.91, "learning_rate": 4.26849415669583e-05, "loss": 0.2758, "step": 3181500 }, { "epoch": 1.91, "learning_rate": 4.268284160139774e-05, "loss": 0.2689, "step": 3182000 }, { "epoch": 1.91, "learning_rate": 4.268074163583717e-05, "loss": 0.2732, "step": 3182500 }, { "epoch": 1.91, "learning_rate": 4.267864167027661e-05, "loss": 0.2763, "step": 3183000 }, { "epoch": 1.91, "learning_rate": 4.267654170471604e-05, "loss": 0.275, "step": 3183500 }, { "epoch": 1.91, "learning_rate": 4.267444173915547e-05, "loss": 0.2715, "step": 3184000 }, { "epoch": 1.91, "learning_rate": 4.267234597352603e-05, "loss": 0.2735, "step": 3184500 }, { "epoch": 1.91, "learning_rate": 4.267025020789659e-05, "loss": 0.2733, "step": 3185000 }, { "epoch": 1.91, "learning_rate": 4.266815444226714e-05, "loss": 0.2785, "step": 3185500 }, { "epoch": 1.91, "learning_rate": 4.266605447670658e-05, "loss": 0.2696, "step": 3186000 }, { "epoch": 1.91, "learning_rate": 4.2663954511146014e-05, "loss": 0.2737, "step": 3186500 }, { "epoch": 1.91, "learning_rate": 4.266185454558545e-05, "loss": 0.274, "step": 3187000 }, { "epoch": 1.91, "learning_rate": 4.265975458002489e-05, "loss": 0.2778, "step": 3187500 }, { "epoch": 1.91, "learning_rate": 4.265765881439544e-05, "loss": 0.275, "step": 3188000 }, { "epoch": 1.91, "learning_rate": 4.2655558848834875e-05, "loss": 0.2719, "step": 3188500 }, { "epoch": 1.91, "learning_rate": 4.2653458883274315e-05, "loss": 0.2764, "step": 3189000 }, { "epoch": 1.91, "learning_rate": 4.265135891771375e-05, "loss": 0.2737, "step": 3189500 }, { "epoch": 1.91, "learning_rate": 4.264925895215318e-05, "loss": 0.275, "step": 3190000 }, { "epoch": 1.91, "learning_rate": 4.264715898659262e-05, "loss": 0.2726, "step": 3190500 }, { "epoch": 1.91, "learning_rate": 4.2645059021032056e-05, "loss": 0.2727, "step": 3191000 }, { "epoch": 1.91, "learning_rate": 4.264295905547149e-05, "loss": 0.2747, "step": 3191500 }, { "epoch": 1.91, "learning_rate": 4.264085908991093e-05, "loss": 0.2793, "step": 3192000 }, { "epoch": 1.91, "learning_rate": 4.263875912435036e-05, "loss": 0.2723, "step": 3192500 }, { "epoch": 1.91, "learning_rate": 4.2636659158789796e-05, "loss": 0.2628, "step": 3193000 }, { "epoch": 1.91, "learning_rate": 4.263456339316035e-05, "loss": 0.2716, "step": 3193500 }, { "epoch": 1.91, "learning_rate": 4.263246342759979e-05, "loss": 0.2751, "step": 3194000 }, { "epoch": 1.92, "learning_rate": 4.2630363462039224e-05, "loss": 0.2769, "step": 3194500 }, { "epoch": 1.92, "learning_rate": 4.262826349647866e-05, "loss": 0.2699, "step": 3195000 }, { "epoch": 1.92, "learning_rate": 4.262616353091809e-05, "loss": 0.2739, "step": 3195500 }, { "epoch": 1.92, "learning_rate": 4.2624063565357524e-05, "loss": 0.2674, "step": 3196000 }, { "epoch": 1.92, "learning_rate": 4.2621963599796964e-05, "loss": 0.2749, "step": 3196500 }, { "epoch": 1.92, "learning_rate": 4.26198636342364e-05, "loss": 0.2829, "step": 3197000 }, { "epoch": 1.92, "learning_rate": 4.261777206853808e-05, "loss": 0.2725, "step": 3197500 }, { "epoch": 1.92, "learning_rate": 4.261567210297751e-05, "loss": 0.2745, "step": 3198000 }, { "epoch": 1.92, "learning_rate": 4.2613572137416945e-05, "loss": 0.2757, "step": 3198500 }, { "epoch": 1.92, "learning_rate": 4.2611472171856385e-05, "loss": 0.2725, "step": 3199000 }, { "epoch": 1.92, "learning_rate": 4.260937220629582e-05, "loss": 0.2782, "step": 3199500 }, { "epoch": 1.92, "learning_rate": 4.260727224073525e-05, "loss": 0.268, "step": 3200000 }, { "epoch": 1.92, "eval_loss": 0.2533232569694519, "eval_runtime": 1462.1663, "eval_samples_per_second": 360.233, "eval_steps_per_second": 60.039, "step": 3200000 }, { "epoch": 1.92, "learning_rate": 4.2605172275174686e-05, "loss": 0.2787, "step": 3200500 }, { "epoch": 1.92, "learning_rate": 4.260307230961412e-05, "loss": 0.2789, "step": 3201000 }, { "epoch": 1.92, "learning_rate": 4.260097654398468e-05, "loss": 0.2737, "step": 3201500 }, { "epoch": 1.92, "learning_rate": 4.259887657842411e-05, "loss": 0.2738, "step": 3202000 }, { "epoch": 1.92, "learning_rate": 4.259677661286355e-05, "loss": 0.2723, "step": 3202500 }, { "epoch": 1.92, "learning_rate": 4.259467664730298e-05, "loss": 0.2778, "step": 3203000 }, { "epoch": 1.92, "learning_rate": 4.259258088167354e-05, "loss": 0.2733, "step": 3203500 }, { "epoch": 1.92, "learning_rate": 4.259048091611298e-05, "loss": 0.2721, "step": 3204000 }, { "epoch": 1.92, "learning_rate": 4.2588385150483534e-05, "loss": 0.2781, "step": 3204500 }, { "epoch": 1.92, "learning_rate": 4.258628518492297e-05, "loss": 0.2699, "step": 3205000 }, { "epoch": 1.92, "learning_rate": 4.25841852193624e-05, "loss": 0.2793, "step": 3205500 }, { "epoch": 1.92, "learning_rate": 4.258208525380184e-05, "loss": 0.2772, "step": 3206000 }, { "epoch": 1.92, "learning_rate": 4.2579985288241275e-05, "loss": 0.2756, "step": 3206500 }, { "epoch": 1.92, "learning_rate": 4.257788532268071e-05, "loss": 0.2791, "step": 3207000 }, { "epoch": 1.92, "learning_rate": 4.257578535712014e-05, "loss": 0.2733, "step": 3207500 }, { "epoch": 1.92, "learning_rate": 4.2573685391559575e-05, "loss": 0.2837, "step": 3208000 }, { "epoch": 1.92, "learning_rate": 4.257158542599901e-05, "loss": 0.2729, "step": 3208500 }, { "epoch": 1.92, "learning_rate": 4.256948966036957e-05, "loss": 0.2787, "step": 3209000 }, { "epoch": 1.92, "learning_rate": 4.256738969480901e-05, "loss": 0.278, "step": 3209500 }, { "epoch": 1.92, "learning_rate": 4.2565289729248436e-05, "loss": 0.2776, "step": 3210000 }, { "epoch": 1.92, "learning_rate": 4.2563189763687876e-05, "loss": 0.2766, "step": 3210500 }, { "epoch": 1.93, "learning_rate": 4.2561093998058436e-05, "loss": 0.2777, "step": 3211000 }, { "epoch": 1.93, "learning_rate": 4.255899403249787e-05, "loss": 0.2775, "step": 3211500 }, { "epoch": 1.93, "learning_rate": 4.25568940669373e-05, "loss": 0.2822, "step": 3212000 }, { "epoch": 1.93, "learning_rate": 4.2554794101376737e-05, "loss": 0.2752, "step": 3212500 }, { "epoch": 1.93, "learning_rate": 4.255269413581617e-05, "loss": 0.2823, "step": 3213000 }, { "epoch": 1.93, "learning_rate": 4.255059837018673e-05, "loss": 0.2731, "step": 3213500 }, { "epoch": 1.93, "learning_rate": 4.2548498404626164e-05, "loss": 0.2757, "step": 3214000 }, { "epoch": 1.93, "learning_rate": 4.25463984390656e-05, "loss": 0.2758, "step": 3214500 }, { "epoch": 1.93, "learning_rate": 4.254429847350503e-05, "loss": 0.2779, "step": 3215000 }, { "epoch": 1.93, "learning_rate": 4.254220270787559e-05, "loss": 0.2703, "step": 3215500 }, { "epoch": 1.93, "learning_rate": 4.2540102742315025e-05, "loss": 0.2708, "step": 3216000 }, { "epoch": 1.93, "learning_rate": 4.2538002776754465e-05, "loss": 0.284, "step": 3216500 }, { "epoch": 1.93, "learning_rate": 4.253590281119389e-05, "loss": 0.2723, "step": 3217000 }, { "epoch": 1.93, "learning_rate": 4.253380704556445e-05, "loss": 0.276, "step": 3217500 }, { "epoch": 1.93, "learning_rate": 4.253170708000389e-05, "loss": 0.2783, "step": 3218000 }, { "epoch": 1.93, "learning_rate": 4.2529607114443326e-05, "loss": 0.2675, "step": 3218500 }, { "epoch": 1.93, "learning_rate": 4.252750714888276e-05, "loss": 0.2757, "step": 3219000 }, { "epoch": 1.93, "learning_rate": 4.252541138325331e-05, "loss": 0.2707, "step": 3219500 }, { "epoch": 1.93, "learning_rate": 4.252331141769275e-05, "loss": 0.2744, "step": 3220000 }, { "epoch": 1.93, "learning_rate": 4.2521211452132186e-05, "loss": 0.2742, "step": 3220500 }, { "epoch": 1.93, "learning_rate": 4.251911148657162e-05, "loss": 0.2783, "step": 3221000 }, { "epoch": 1.93, "learning_rate": 4.251701152101106e-05, "loss": 0.2796, "step": 3221500 }, { "epoch": 1.93, "learning_rate": 4.251491155545049e-05, "loss": 0.2672, "step": 3222000 }, { "epoch": 1.93, "learning_rate": 4.251281158988992e-05, "loss": 0.2779, "step": 3222500 }, { "epoch": 1.93, "learning_rate": 4.251071162432936e-05, "loss": 0.2733, "step": 3223000 }, { "epoch": 1.93, "learning_rate": 4.250861585869992e-05, "loss": 0.27, "step": 3223500 }, { "epoch": 1.93, "learning_rate": 4.2506520093070474e-05, "loss": 0.2664, "step": 3224000 }, { "epoch": 1.93, "learning_rate": 4.250442012750991e-05, "loss": 0.2764, "step": 3224500 }, { "epoch": 1.93, "learning_rate": 4.250232016194935e-05, "loss": 0.277, "step": 3225000 }, { "epoch": 1.93, "learning_rate": 4.250022019638878e-05, "loss": 0.276, "step": 3225500 }, { "epoch": 1.93, "learning_rate": 4.2498120230828215e-05, "loss": 0.2748, "step": 3226000 }, { "epoch": 1.93, "learning_rate": 4.249602026526765e-05, "loss": 0.2736, "step": 3226500 }, { "epoch": 1.93, "learning_rate": 4.249392029970708e-05, "loss": 0.2762, "step": 3227000 }, { "epoch": 1.94, "learning_rate": 4.2491820334146515e-05, "loss": 0.2732, "step": 3227500 }, { "epoch": 1.94, "learning_rate": 4.2489724568517076e-05, "loss": 0.279, "step": 3228000 }, { "epoch": 1.94, "learning_rate": 4.2487624602956516e-05, "loss": 0.2763, "step": 3228500 }, { "epoch": 1.94, "learning_rate": 4.248552463739594e-05, "loss": 0.2782, "step": 3229000 }, { "epoch": 1.94, "learning_rate": 4.2483424671835376e-05, "loss": 0.2779, "step": 3229500 }, { "epoch": 1.94, "learning_rate": 4.2481328906205936e-05, "loss": 0.2739, "step": 3230000 }, { "epoch": 1.94, "learning_rate": 4.2479228940645377e-05, "loss": 0.2702, "step": 3230500 }, { "epoch": 1.94, "learning_rate": 4.247712897508481e-05, "loss": 0.2797, "step": 3231000 }, { "epoch": 1.94, "learning_rate": 4.2475029009524244e-05, "loss": 0.2731, "step": 3231500 }, { "epoch": 1.94, "learning_rate": 4.2472933243894804e-05, "loss": 0.2737, "step": 3232000 }, { "epoch": 1.94, "learning_rate": 4.247083327833424e-05, "loss": 0.2811, "step": 3232500 }, { "epoch": 1.94, "learning_rate": 4.246873331277367e-05, "loss": 0.2705, "step": 3233000 }, { "epoch": 1.94, "learning_rate": 4.246663334721311e-05, "loss": 0.273, "step": 3233500 }, { "epoch": 1.94, "learning_rate": 4.2464537581583665e-05, "loss": 0.2754, "step": 3234000 }, { "epoch": 1.94, "learning_rate": 4.24624376160231e-05, "loss": 0.2781, "step": 3234500 }, { "epoch": 1.94, "learning_rate": 4.246033765046253e-05, "loss": 0.2682, "step": 3235000 }, { "epoch": 1.94, "learning_rate": 4.245823768490197e-05, "loss": 0.2762, "step": 3235500 }, { "epoch": 1.94, "learning_rate": 4.24561377193414e-05, "loss": 0.2745, "step": 3236000 }, { "epoch": 1.94, "learning_rate": 4.245404195371196e-05, "loss": 0.2771, "step": 3236500 }, { "epoch": 1.94, "learning_rate": 4.245194198815139e-05, "loss": 0.2737, "step": 3237000 }, { "epoch": 1.94, "learning_rate": 4.244984202259083e-05, "loss": 0.2696, "step": 3237500 }, { "epoch": 1.94, "learning_rate": 4.2447742057030266e-05, "loss": 0.2753, "step": 3238000 }, { "epoch": 1.94, "learning_rate": 4.244564629140082e-05, "loss": 0.2718, "step": 3238500 }, { "epoch": 1.94, "learning_rate": 4.244354632584026e-05, "loss": 0.2727, "step": 3239000 }, { "epoch": 1.94, "learning_rate": 4.244144636027969e-05, "loss": 0.2765, "step": 3239500 }, { "epoch": 1.94, "learning_rate": 4.243934639471913e-05, "loss": 0.2786, "step": 3240000 }, { "epoch": 1.94, "learning_rate": 4.243725062908968e-05, "loss": 0.2649, "step": 3240500 }, { "epoch": 1.94, "learning_rate": 4.243515066352912e-05, "loss": 0.2786, "step": 3241000 }, { "epoch": 1.94, "learning_rate": 4.2433050697968554e-05, "loss": 0.2788, "step": 3241500 }, { "epoch": 1.94, "learning_rate": 4.243095073240799e-05, "loss": 0.2758, "step": 3242000 }, { "epoch": 1.94, "learning_rate": 4.242885076684743e-05, "loss": 0.2736, "step": 3242500 }, { "epoch": 1.94, "learning_rate": 4.242675500121798e-05, "loss": 0.2786, "step": 3243000 }, { "epoch": 1.94, "learning_rate": 4.2424655035657415e-05, "loss": 0.2698, "step": 3243500 }, { "epoch": 1.94, "learning_rate": 4.242255507009685e-05, "loss": 0.2731, "step": 3244000 }, { "epoch": 1.95, "learning_rate": 4.242045510453629e-05, "loss": 0.2823, "step": 3244500 }, { "epoch": 1.95, "learning_rate": 4.241835933890684e-05, "loss": 0.2759, "step": 3245000 }, { "epoch": 1.95, "learning_rate": 4.2416259373346275e-05, "loss": 0.2783, "step": 3245500 }, { "epoch": 1.95, "learning_rate": 4.2414159407785716e-05, "loss": 0.2761, "step": 3246000 }, { "epoch": 1.95, "learning_rate": 4.241205944222515e-05, "loss": 0.2758, "step": 3246500 }, { "epoch": 1.95, "learning_rate": 4.24099636765957e-05, "loss": 0.2719, "step": 3247000 }, { "epoch": 1.95, "learning_rate": 4.2407863711035136e-05, "loss": 0.2707, "step": 3247500 }, { "epoch": 1.95, "learning_rate": 4.2405763745474576e-05, "loss": 0.2767, "step": 3248000 }, { "epoch": 1.95, "learning_rate": 4.240366377991401e-05, "loss": 0.2678, "step": 3248500 }, { "epoch": 1.95, "learning_rate": 4.240156381435344e-05, "loss": 0.2758, "step": 3249000 }, { "epoch": 1.95, "learning_rate": 4.2399468048724e-05, "loss": 0.2752, "step": 3249500 }, { "epoch": 1.95, "learning_rate": 4.239736808316344e-05, "loss": 0.2703, "step": 3250000 }, { "epoch": 1.95, "learning_rate": 4.239526811760287e-05, "loss": 0.2775, "step": 3250500 }, { "epoch": 1.95, "learning_rate": 4.2393168152042304e-05, "loss": 0.2696, "step": 3251000 }, { "epoch": 1.95, "learning_rate": 4.2391068186481744e-05, "loss": 0.2732, "step": 3251500 }, { "epoch": 1.95, "learning_rate": 4.23889724208523e-05, "loss": 0.2803, "step": 3252000 }, { "epoch": 1.95, "learning_rate": 4.238687245529173e-05, "loss": 0.2759, "step": 3252500 }, { "epoch": 1.95, "learning_rate": 4.238477248973117e-05, "loss": 0.2701, "step": 3253000 }, { "epoch": 1.95, "learning_rate": 4.2382672524170605e-05, "loss": 0.2703, "step": 3253500 }, { "epoch": 1.95, "learning_rate": 4.238057255861004e-05, "loss": 0.2723, "step": 3254000 }, { "epoch": 1.95, "learning_rate": 4.237847679298059e-05, "loss": 0.2754, "step": 3254500 }, { "epoch": 1.95, "learning_rate": 4.237637682742003e-05, "loss": 0.2763, "step": 3255000 }, { "epoch": 1.95, "learning_rate": 4.2374276861859466e-05, "loss": 0.2743, "step": 3255500 }, { "epoch": 1.95, "learning_rate": 4.23721768962989e-05, "loss": 0.2749, "step": 3256000 }, { "epoch": 1.95, "learning_rate": 4.237008113066945e-05, "loss": 0.278, "step": 3256500 }, { "epoch": 1.95, "learning_rate": 4.236798116510889e-05, "loss": 0.2774, "step": 3257000 }, { "epoch": 1.95, "learning_rate": 4.2365881199548326e-05, "loss": 0.2795, "step": 3257500 }, { "epoch": 1.95, "learning_rate": 4.236378123398776e-05, "loss": 0.2729, "step": 3258000 }, { "epoch": 1.95, "learning_rate": 4.236168546835832e-05, "loss": 0.2786, "step": 3258500 }, { "epoch": 1.95, "learning_rate": 4.2359585502797754e-05, "loss": 0.2752, "step": 3259000 }, { "epoch": 1.95, "learning_rate": 4.235748553723719e-05, "loss": 0.2688, "step": 3259500 }, { "epoch": 1.95, "learning_rate": 4.235538557167663e-05, "loss": 0.2697, "step": 3260000 }, { "epoch": 1.95, "learning_rate": 4.235328560611606e-05, "loss": 0.2739, "step": 3260500 }, { "epoch": 1.96, "learning_rate": 4.2351189840486614e-05, "loss": 0.2721, "step": 3261000 }, { "epoch": 1.96, "learning_rate": 4.234908987492605e-05, "loss": 0.2786, "step": 3261500 }, { "epoch": 1.96, "learning_rate": 4.234698990936549e-05, "loss": 0.2695, "step": 3262000 }, { "epoch": 1.96, "learning_rate": 4.234489414373604e-05, "loss": 0.275, "step": 3262500 }, { "epoch": 1.96, "learning_rate": 4.2342794178175475e-05, "loss": 0.2653, "step": 3263000 }, { "epoch": 1.96, "learning_rate": 4.234069421261491e-05, "loss": 0.2811, "step": 3263500 }, { "epoch": 1.96, "learning_rate": 4.233859424705435e-05, "loss": 0.2751, "step": 3264000 }, { "epoch": 1.96, "learning_rate": 4.233649428149378e-05, "loss": 0.2699, "step": 3264500 }, { "epoch": 1.96, "learning_rate": 4.2334394315933216e-05, "loss": 0.2782, "step": 3265000 }, { "epoch": 1.96, "learning_rate": 4.2332294350372656e-05, "loss": 0.2786, "step": 3265500 }, { "epoch": 1.96, "learning_rate": 4.233019438481209e-05, "loss": 0.2774, "step": 3266000 }, { "epoch": 1.96, "learning_rate": 4.232809861918264e-05, "loss": 0.2745, "step": 3266500 }, { "epoch": 1.96, "learning_rate": 4.232599865362208e-05, "loss": 0.2748, "step": 3267000 }, { "epoch": 1.96, "learning_rate": 4.232390288799264e-05, "loss": 0.2742, "step": 3267500 }, { "epoch": 1.96, "learning_rate": 4.232180292243207e-05, "loss": 0.2768, "step": 3268000 }, { "epoch": 1.96, "learning_rate": 4.2319702956871504e-05, "loss": 0.2728, "step": 3268500 }, { "epoch": 1.96, "learning_rate": 4.2317602991310944e-05, "loss": 0.2712, "step": 3269000 }, { "epoch": 1.96, "learning_rate": 4.231550302575038e-05, "loss": 0.2731, "step": 3269500 }, { "epoch": 1.96, "learning_rate": 4.231340306018981e-05, "loss": 0.2738, "step": 3270000 }, { "epoch": 1.96, "learning_rate": 4.231130309462925e-05, "loss": 0.2723, "step": 3270500 }, { "epoch": 1.96, "learning_rate": 4.2309203129068685e-05, "loss": 0.2695, "step": 3271000 }, { "epoch": 1.96, "learning_rate": 4.230710316350812e-05, "loss": 0.2712, "step": 3271500 }, { "epoch": 1.96, "learning_rate": 4.230500739787867e-05, "loss": 0.2739, "step": 3272000 }, { "epoch": 1.96, "learning_rate": 4.230290743231811e-05, "loss": 0.2713, "step": 3272500 }, { "epoch": 1.96, "learning_rate": 4.2300807466757545e-05, "loss": 0.27, "step": 3273000 }, { "epoch": 1.96, "learning_rate": 4.2298707501196986e-05, "loss": 0.275, "step": 3273500 }, { "epoch": 1.96, "learning_rate": 4.229661593549865e-05, "loss": 0.2707, "step": 3274000 }, { "epoch": 1.96, "learning_rate": 4.229451596993809e-05, "loss": 0.2729, "step": 3274500 }, { "epoch": 1.96, "learning_rate": 4.2292416004377526e-05, "loss": 0.2709, "step": 3275000 }, { "epoch": 1.96, "learning_rate": 4.229031603881696e-05, "loss": 0.2758, "step": 3275500 }, { "epoch": 1.96, "learning_rate": 4.22882160732564e-05, "loss": 0.2725, "step": 3276000 }, { "epoch": 1.96, "learning_rate": 4.228611610769583e-05, "loss": 0.2706, "step": 3276500 }, { "epoch": 1.96, "learning_rate": 4.228401614213527e-05, "loss": 0.2714, "step": 3277000 }, { "epoch": 1.96, "learning_rate": 4.228192037650582e-05, "loss": 0.2742, "step": 3277500 }, { "epoch": 1.97, "learning_rate": 4.227982041094526e-05, "loss": 0.2771, "step": 3278000 }, { "epoch": 1.97, "learning_rate": 4.2277720445384694e-05, "loss": 0.2793, "step": 3278500 }, { "epoch": 1.97, "learning_rate": 4.227562047982413e-05, "loss": 0.2712, "step": 3279000 }, { "epoch": 1.97, "learning_rate": 4.227352471419469e-05, "loss": 0.2714, "step": 3279500 }, { "epoch": 1.97, "learning_rate": 4.227142474863412e-05, "loss": 0.2749, "step": 3280000 }, { "epoch": 1.97, "learning_rate": 4.2269324783073555e-05, "loss": 0.2731, "step": 3280500 }, { "epoch": 1.97, "learning_rate": 4.2267224817512995e-05, "loss": 0.2713, "step": 3281000 }, { "epoch": 1.97, "learning_rate": 4.226512485195243e-05, "loss": 0.2787, "step": 3281500 }, { "epoch": 1.97, "learning_rate": 4.226302488639186e-05, "loss": 0.2766, "step": 3282000 }, { "epoch": 1.97, "learning_rate": 4.22609249208313e-05, "loss": 0.2751, "step": 3282500 }, { "epoch": 1.97, "learning_rate": 4.2258824955270736e-05, "loss": 0.2734, "step": 3283000 }, { "epoch": 1.97, "learning_rate": 4.225672918964129e-05, "loss": 0.2792, "step": 3283500 }, { "epoch": 1.97, "learning_rate": 4.225462922408072e-05, "loss": 0.2805, "step": 3284000 }, { "epoch": 1.97, "learning_rate": 4.225252925852016e-05, "loss": 0.2736, "step": 3284500 }, { "epoch": 1.97, "learning_rate": 4.2250429292959596e-05, "loss": 0.2769, "step": 3285000 }, { "epoch": 1.97, "learning_rate": 4.224832932739903e-05, "loss": 0.2704, "step": 3285500 }, { "epoch": 1.97, "learning_rate": 4.224623356176959e-05, "loss": 0.2775, "step": 3286000 }, { "epoch": 1.97, "learning_rate": 4.2244137796140144e-05, "loss": 0.2746, "step": 3286500 }, { "epoch": 1.97, "learning_rate": 4.224203783057958e-05, "loss": 0.27, "step": 3287000 }, { "epoch": 1.97, "learning_rate": 4.223993786501901e-05, "loss": 0.2685, "step": 3287500 }, { "epoch": 1.97, "learning_rate": 4.223783789945845e-05, "loss": 0.2827, "step": 3288000 }, { "epoch": 1.97, "learning_rate": 4.2235742133829004e-05, "loss": 0.2788, "step": 3288500 }, { "epoch": 1.97, "learning_rate": 4.223364216826844e-05, "loss": 0.2759, "step": 3289000 }, { "epoch": 1.97, "learning_rate": 4.223154220270787e-05, "loss": 0.2736, "step": 3289500 }, { "epoch": 1.97, "learning_rate": 4.222944223714731e-05, "loss": 0.2721, "step": 3290000 }, { "epoch": 1.97, "learning_rate": 4.2227342271586745e-05, "loss": 0.2779, "step": 3290500 }, { "epoch": 1.97, "learning_rate": 4.22252465059573e-05, "loss": 0.2759, "step": 3291000 }, { "epoch": 1.97, "learning_rate": 4.222314654039673e-05, "loss": 0.2827, "step": 3291500 }, { "epoch": 1.97, "learning_rate": 4.222104657483617e-05, "loss": 0.2746, "step": 3292000 }, { "epoch": 1.97, "learning_rate": 4.2218946609275606e-05, "loss": 0.2764, "step": 3292500 }, { "epoch": 1.97, "learning_rate": 4.2216846643715046e-05, "loss": 0.2787, "step": 3293000 }, { "epoch": 1.97, "learning_rate": 4.221474667815448e-05, "loss": 0.2728, "step": 3293500 }, { "epoch": 1.97, "learning_rate": 4.221265091252503e-05, "loss": 0.2699, "step": 3294000 }, { "epoch": 1.98, "learning_rate": 4.2210550946964467e-05, "loss": 0.2738, "step": 3294500 }, { "epoch": 1.98, "learning_rate": 4.220845098140391e-05, "loss": 0.2652, "step": 3295000 }, { "epoch": 1.98, "learning_rate": 4.220635101584334e-05, "loss": 0.2792, "step": 3295500 }, { "epoch": 1.98, "learning_rate": 4.2204251050282774e-05, "loss": 0.2816, "step": 3296000 }, { "epoch": 1.98, "learning_rate": 4.2202151084722214e-05, "loss": 0.2797, "step": 3296500 }, { "epoch": 1.98, "learning_rate": 4.220005111916165e-05, "loss": 0.278, "step": 3297000 }, { "epoch": 1.98, "learning_rate": 4.219795115360108e-05, "loss": 0.2714, "step": 3297500 }, { "epoch": 1.98, "learning_rate": 4.2195851188040514e-05, "loss": 0.2747, "step": 3298000 }, { "epoch": 1.98, "learning_rate": 4.2193755422411075e-05, "loss": 0.271, "step": 3298500 }, { "epoch": 1.98, "learning_rate": 4.219165545685051e-05, "loss": 0.2742, "step": 3299000 }, { "epoch": 1.98, "learning_rate": 4.218955969122106e-05, "loss": 0.275, "step": 3299500 }, { "epoch": 1.98, "learning_rate": 4.21874597256605e-05, "loss": 0.2783, "step": 3300000 }, { "epoch": 1.98, "eval_loss": 0.2493673712015152, "eval_runtime": 1464.6104, "eval_samples_per_second": 359.631, "eval_steps_per_second": 59.939, "step": 3300000 }, { "epoch": 1.98, "learning_rate": 4.2185359760099935e-05, "loss": 0.2738, "step": 3300500 }, { "epoch": 1.98, "learning_rate": 4.218325979453937e-05, "loss": 0.2761, "step": 3301000 }, { "epoch": 1.98, "learning_rate": 4.218115982897881e-05, "loss": 0.271, "step": 3301500 }, { "epoch": 1.98, "learning_rate": 4.217905986341824e-05, "loss": 0.28, "step": 3302000 }, { "epoch": 1.98, "learning_rate": 4.2176959897857676e-05, "loss": 0.2729, "step": 3302500 }, { "epoch": 1.98, "learning_rate": 4.217485993229711e-05, "loss": 0.2684, "step": 3303000 }, { "epoch": 1.98, "learning_rate": 4.217276416666767e-05, "loss": 0.2681, "step": 3303500 }, { "epoch": 1.98, "learning_rate": 4.21706642011071e-05, "loss": 0.2692, "step": 3304000 }, { "epoch": 1.98, "learning_rate": 4.216856423554654e-05, "loss": 0.2726, "step": 3304500 }, { "epoch": 1.98, "learning_rate": 4.216646426998598e-05, "loss": 0.2787, "step": 3305000 }, { "epoch": 1.98, "learning_rate": 4.2164364304425404e-05, "loss": 0.278, "step": 3305500 }, { "epoch": 1.98, "learning_rate": 4.2162268538795964e-05, "loss": 0.2715, "step": 3306000 }, { "epoch": 1.98, "learning_rate": 4.21601685732354e-05, "loss": 0.2741, "step": 3306500 }, { "epoch": 1.98, "learning_rate": 4.215806860767484e-05, "loss": 0.2775, "step": 3307000 }, { "epoch": 1.98, "learning_rate": 4.2155968642114264e-05, "loss": 0.276, "step": 3307500 }, { "epoch": 1.98, "learning_rate": 4.2153872876484825e-05, "loss": 0.2741, "step": 3308000 }, { "epoch": 1.98, "learning_rate": 4.2151772910924265e-05, "loss": 0.2745, "step": 3308500 }, { "epoch": 1.98, "learning_rate": 4.21496729453637e-05, "loss": 0.2749, "step": 3309000 }, { "epoch": 1.98, "learning_rate": 4.214757297980313e-05, "loss": 0.2716, "step": 3309500 }, { "epoch": 1.98, "learning_rate": 4.2145477214173685e-05, "loss": 0.2729, "step": 3310000 }, { "epoch": 1.98, "learning_rate": 4.2143377248613126e-05, "loss": 0.2738, "step": 3310500 }, { "epoch": 1.99, "learning_rate": 4.214127728305256e-05, "loss": 0.2757, "step": 3311000 }, { "epoch": 1.99, "learning_rate": 4.213917731749199e-05, "loss": 0.2717, "step": 3311500 }, { "epoch": 1.99, "learning_rate": 4.2137081551862546e-05, "loss": 0.2725, "step": 3312000 }, { "epoch": 1.99, "learning_rate": 4.2134981586301986e-05, "loss": 0.2723, "step": 3312500 }, { "epoch": 1.99, "learning_rate": 4.213288162074142e-05, "loss": 0.2714, "step": 3313000 }, { "epoch": 1.99, "learning_rate": 4.213078165518085e-05, "loss": 0.2737, "step": 3313500 }, { "epoch": 1.99, "learning_rate": 4.2128681689620294e-05, "loss": 0.2748, "step": 3314000 }, { "epoch": 1.99, "learning_rate": 4.212658172405973e-05, "loss": 0.277, "step": 3314500 }, { "epoch": 1.99, "learning_rate": 4.212448175849916e-05, "loss": 0.2745, "step": 3315000 }, { "epoch": 1.99, "learning_rate": 4.2122381792938594e-05, "loss": 0.2751, "step": 3315500 }, { "epoch": 1.99, "learning_rate": 4.2120286027309154e-05, "loss": 0.2699, "step": 3316000 }, { "epoch": 1.99, "learning_rate": 4.211818606174859e-05, "loss": 0.2762, "step": 3316500 }, { "epoch": 1.99, "learning_rate": 4.211608609618802e-05, "loss": 0.2747, "step": 3317000 }, { "epoch": 1.99, "learning_rate": 4.2113986130627455e-05, "loss": 0.2751, "step": 3317500 }, { "epoch": 1.99, "learning_rate": 4.2111890364998015e-05, "loss": 0.2768, "step": 3318000 }, { "epoch": 1.99, "learning_rate": 4.210979039943745e-05, "loss": 0.2797, "step": 3318500 }, { "epoch": 1.99, "learning_rate": 4.210769043387689e-05, "loss": 0.2791, "step": 3319000 }, { "epoch": 1.99, "learning_rate": 4.210559886817856e-05, "loss": 0.281, "step": 3319500 }, { "epoch": 1.99, "learning_rate": 4.2103498902617996e-05, "loss": 0.2768, "step": 3320000 }, { "epoch": 1.99, "learning_rate": 4.210139893705743e-05, "loss": 0.2691, "step": 3320500 }, { "epoch": 1.99, "learning_rate": 4.209929897149687e-05, "loss": 0.2823, "step": 3321000 }, { "epoch": 1.99, "learning_rate": 4.20971990059363e-05, "loss": 0.272, "step": 3321500 }, { "epoch": 1.99, "learning_rate": 4.2095099040375736e-05, "loss": 0.2757, "step": 3322000 }, { "epoch": 1.99, "learning_rate": 4.209299907481518e-05, "loss": 0.2728, "step": 3322500 }, { "epoch": 1.99, "learning_rate": 4.209089910925461e-05, "loss": 0.2698, "step": 3323000 }, { "epoch": 1.99, "learning_rate": 4.2088799143694044e-05, "loss": 0.2738, "step": 3323500 }, { "epoch": 1.99, "learning_rate": 4.2086699178133484e-05, "loss": 0.2751, "step": 3324000 }, { "epoch": 1.99, "learning_rate": 4.208459921257291e-05, "loss": 0.2707, "step": 3324500 }, { "epoch": 1.99, "learning_rate": 4.2082499247012344e-05, "loss": 0.2735, "step": 3325000 }, { "epoch": 1.99, "learning_rate": 4.2080403481382904e-05, "loss": 0.2725, "step": 3325500 }, { "epoch": 1.99, "learning_rate": 4.2078303515822345e-05, "loss": 0.2743, "step": 3326000 }, { "epoch": 1.99, "learning_rate": 4.207620355026177e-05, "loss": 0.2799, "step": 3326500 }, { "epoch": 1.99, "learning_rate": 4.2074103584701205e-05, "loss": 0.2708, "step": 3327000 }, { "epoch": 1.99, "learning_rate": 4.2072003619140645e-05, "loss": 0.2707, "step": 3327500 }, { "epoch": 2.0, "learning_rate": 4.2069907853511205e-05, "loss": 0.2721, "step": 3328000 }, { "epoch": 2.0, "learning_rate": 4.206780788795064e-05, "loss": 0.2701, "step": 3328500 }, { "epoch": 2.0, "learning_rate": 4.206570792239007e-05, "loss": 0.2768, "step": 3329000 }, { "epoch": 2.0, "learning_rate": 4.2063607956829506e-05, "loss": 0.2742, "step": 3329500 }, { "epoch": 2.0, "learning_rate": 4.2061512191200066e-05, "loss": 0.277, "step": 3330000 }, { "epoch": 2.0, "learning_rate": 4.20594122256395e-05, "loss": 0.2707, "step": 3330500 }, { "epoch": 2.0, "learning_rate": 4.205731646001005e-05, "loss": 0.2747, "step": 3331000 }, { "epoch": 2.0, "learning_rate": 4.205521649444949e-05, "loss": 0.2737, "step": 3331500 }, { "epoch": 2.0, "learning_rate": 4.205311652888893e-05, "loss": 0.2698, "step": 3332000 }, { "epoch": 2.0, "learning_rate": 4.205101656332836e-05, "loss": 0.2676, "step": 3332500 }, { "epoch": 2.0, "learning_rate": 4.20489165977678e-05, "loss": 0.2718, "step": 3333000 }, { "epoch": 2.0, "learning_rate": 4.2046816632207234e-05, "loss": 0.2781, "step": 3333500 }, { "epoch": 2.0, "learning_rate": 4.204471666664666e-05, "loss": 0.2721, "step": 3334000 }, { "epoch": 2.0, "learning_rate": 4.20426167010861e-05, "loss": 0.2806, "step": 3334500 }, { "epoch": 2.0, "learning_rate": 4.204052093545666e-05, "loss": 0.2819, "step": 3335000 }, { "epoch": 2.0, "learning_rate": 4.2038420969896095e-05, "loss": 0.2719, "step": 3335500 }, { "epoch": 2.0, "learning_rate": 4.203632100433553e-05, "loss": 0.2746, "step": 3336000 }, { "epoch": 2.0, "learning_rate": 4.203422103877496e-05, "loss": 0.2676, "step": 3336500 }, { "epoch": 2.0, "learning_rate": 4.203212527314552e-05, "loss": 0.2674, "step": 3337000 }, { "epoch": 2.0, "learning_rate": 4.2030025307584955e-05, "loss": 0.2729, "step": 3337500 }, { "epoch": 2.0, "learning_rate": 4.2027925342024396e-05, "loss": 0.2663, "step": 3338000 }, { "epoch": 2.0, "learning_rate": 4.202582537646382e-05, "loss": 0.275, "step": 3338500 }, { "epoch": 2.0, "learning_rate": 4.202372961083438e-05, "loss": 0.265, "step": 3339000 }, { "epoch": 2.0, "learning_rate": 4.2021629645273816e-05, "loss": 0.2676, "step": 3339500 }, { "epoch": 2.0, "learning_rate": 4.2019529679713256e-05, "loss": 0.2644, "step": 3340000 }, { "epoch": 2.0, "learning_rate": 4.201742971415269e-05, "loss": 0.2755, "step": 3340500 }, { "epoch": 2.0, "learning_rate": 4.2015329748592116e-05, "loss": 0.2651, "step": 3341000 }, { "epoch": 2.0, "learning_rate": 4.201323398296268e-05, "loss": 0.271, "step": 3341500 }, { "epoch": 2.0, "learning_rate": 4.201113401740212e-05, "loss": 0.269, "step": 3342000 }, { "epoch": 2.0, "learning_rate": 4.200903405184155e-05, "loss": 0.2678, "step": 3342500 }, { "epoch": 2.0, "learning_rate": 4.2006934086280984e-05, "loss": 0.2704, "step": 3343000 }, { "epoch": 2.0, "learning_rate": 4.2004838320651544e-05, "loss": 0.2718, "step": 3343500 }, { "epoch": 2.0, "learning_rate": 4.200273835509098e-05, "loss": 0.275, "step": 3344000 }, { "epoch": 2.01, "learning_rate": 4.200063838953041e-05, "loss": 0.266, "step": 3344500 }, { "epoch": 2.01, "learning_rate": 4.199853842396985e-05, "loss": 0.2641, "step": 3345000 }, { "epoch": 2.01, "learning_rate": 4.1996442658340405e-05, "loss": 0.2705, "step": 3345500 }, { "epoch": 2.01, "learning_rate": 4.199434269277984e-05, "loss": 0.2686, "step": 3346000 }, { "epoch": 2.01, "learning_rate": 4.199224272721927e-05, "loss": 0.2695, "step": 3346500 }, { "epoch": 2.01, "learning_rate": 4.199014276165871e-05, "loss": 0.2673, "step": 3347000 }, { "epoch": 2.01, "learning_rate": 4.1988046996029266e-05, "loss": 0.2724, "step": 3347500 }, { "epoch": 2.01, "learning_rate": 4.19859470304687e-05, "loss": 0.2733, "step": 3348000 }, { "epoch": 2.01, "learning_rate": 4.198384706490813e-05, "loss": 0.27, "step": 3348500 }, { "epoch": 2.01, "learning_rate": 4.198174709934757e-05, "loss": 0.2619, "step": 3349000 }, { "epoch": 2.01, "learning_rate": 4.1979647133787006e-05, "loss": 0.2723, "step": 3349500 }, { "epoch": 2.01, "learning_rate": 4.197754716822644e-05, "loss": 0.2715, "step": 3350000 }, { "epoch": 2.01, "learning_rate": 4.197544720266587e-05, "loss": 0.2639, "step": 3350500 }, { "epoch": 2.01, "learning_rate": 4.1973351437036434e-05, "loss": 0.2628, "step": 3351000 }, { "epoch": 2.01, "learning_rate": 4.197125147147587e-05, "loss": 0.2775, "step": 3351500 }, { "epoch": 2.01, "learning_rate": 4.196915150591531e-05, "loss": 0.271, "step": 3352000 }, { "epoch": 2.01, "learning_rate": 4.196705154035474e-05, "loss": 0.2686, "step": 3352500 }, { "epoch": 2.01, "learning_rate": 4.196495157479417e-05, "loss": 0.2702, "step": 3353000 }, { "epoch": 2.01, "learning_rate": 4.196285160923361e-05, "loss": 0.2734, "step": 3353500 }, { "epoch": 2.01, "learning_rate": 4.196075164367304e-05, "loss": 0.2667, "step": 3354000 }, { "epoch": 2.01, "learning_rate": 4.1958651678112475e-05, "loss": 0.2689, "step": 3354500 }, { "epoch": 2.01, "learning_rate": 4.1956551712551915e-05, "loss": 0.268, "step": 3355000 }, { "epoch": 2.01, "learning_rate": 4.195446014685359e-05, "loss": 0.2699, "step": 3355500 }, { "epoch": 2.01, "learning_rate": 4.195236018129303e-05, "loss": 0.2743, "step": 3356000 }, { "epoch": 2.01, "learning_rate": 4.195026021573246e-05, "loss": 0.2683, "step": 3356500 }, { "epoch": 2.01, "learning_rate": 4.1948160250171896e-05, "loss": 0.2678, "step": 3357000 }, { "epoch": 2.01, "learning_rate": 4.194606028461133e-05, "loss": 0.2726, "step": 3357500 }, { "epoch": 2.01, "learning_rate": 4.194396031905076e-05, "loss": 0.2696, "step": 3358000 }, { "epoch": 2.01, "learning_rate": 4.19418603534902e-05, "loss": 0.2694, "step": 3358500 }, { "epoch": 2.01, "learning_rate": 4.1939760387929636e-05, "loss": 0.2729, "step": 3359000 }, { "epoch": 2.01, "learning_rate": 4.193766042236907e-05, "loss": 0.2661, "step": 3359500 }, { "epoch": 2.01, "learning_rate": 4.193556885667075e-05, "loss": 0.27, "step": 3360000 }, { "epoch": 2.01, "learning_rate": 4.1933468891110184e-05, "loss": 0.2706, "step": 3360500 }, { "epoch": 2.02, "learning_rate": 4.1931368925549624e-05, "loss": 0.2684, "step": 3361000 }, { "epoch": 2.02, "learning_rate": 4.192926895998906e-05, "loss": 0.2691, "step": 3361500 }, { "epoch": 2.02, "learning_rate": 4.192717319435961e-05, "loss": 0.2668, "step": 3362000 }, { "epoch": 2.02, "learning_rate": 4.1925073228799044e-05, "loss": 0.2748, "step": 3362500 }, { "epoch": 2.02, "learning_rate": 4.1922973263238485e-05, "loss": 0.27, "step": 3363000 }, { "epoch": 2.02, "learning_rate": 4.192087329767792e-05, "loss": 0.2663, "step": 3363500 }, { "epoch": 2.02, "learning_rate": 4.191877333211736e-05, "loss": 0.2665, "step": 3364000 }, { "epoch": 2.02, "learning_rate": 4.191667336655679e-05, "loss": 0.2772, "step": 3364500 }, { "epoch": 2.02, "learning_rate": 4.191457340099622e-05, "loss": 0.2688, "step": 3365000 }, { "epoch": 2.02, "learning_rate": 4.191247763536678e-05, "loss": 0.2655, "step": 3365500 }, { "epoch": 2.02, "learning_rate": 4.191037766980622e-05, "loss": 0.2712, "step": 3366000 }, { "epoch": 2.02, "learning_rate": 4.190827770424565e-05, "loss": 0.2678, "step": 3366500 }, { "epoch": 2.02, "learning_rate": 4.1906177738685086e-05, "loss": 0.2673, "step": 3367000 }, { "epoch": 2.02, "learning_rate": 4.190407777312452e-05, "loss": 0.2676, "step": 3367500 }, { "epoch": 2.02, "learning_rate": 4.190197780756395e-05, "loss": 0.2685, "step": 3368000 }, { "epoch": 2.02, "learning_rate": 4.1899877842003386e-05, "loss": 0.2701, "step": 3368500 }, { "epoch": 2.02, "learning_rate": 4.189777787644283e-05, "loss": 0.2721, "step": 3369000 }, { "epoch": 2.02, "learning_rate": 4.189568211081338e-05, "loss": 0.2762, "step": 3369500 }, { "epoch": 2.02, "learning_rate": 4.1893582145252814e-05, "loss": 0.2726, "step": 3370000 }, { "epoch": 2.02, "learning_rate": 4.1891486379623374e-05, "loss": 0.2683, "step": 3370500 }, { "epoch": 2.02, "learning_rate": 4.188939061399393e-05, "loss": 0.2694, "step": 3371000 }, { "epoch": 2.02, "learning_rate": 4.188729064843337e-05, "loss": 0.2685, "step": 3371500 }, { "epoch": 2.02, "learning_rate": 4.18851906828728e-05, "loss": 0.2768, "step": 3372000 }, { "epoch": 2.02, "learning_rate": 4.1883090717312235e-05, "loss": 0.2734, "step": 3372500 }, { "epoch": 2.02, "learning_rate": 4.1880990751751675e-05, "loss": 0.2689, "step": 3373000 }, { "epoch": 2.02, "learning_rate": 4.187889078619111e-05, "loss": 0.2715, "step": 3373500 }, { "epoch": 2.02, "learning_rate": 4.187679082063054e-05, "loss": 0.2696, "step": 3374000 }, { "epoch": 2.02, "learning_rate": 4.1874690855069975e-05, "loss": 0.2735, "step": 3374500 }, { "epoch": 2.02, "learning_rate": 4.187259088950941e-05, "loss": 0.2649, "step": 3375000 }, { "epoch": 2.02, "learning_rate": 4.187049512387997e-05, "loss": 0.2597, "step": 3375500 }, { "epoch": 2.02, "learning_rate": 4.18683951583194e-05, "loss": 0.2789, "step": 3376000 }, { "epoch": 2.02, "learning_rate": 4.186629519275884e-05, "loss": 0.2696, "step": 3376500 }, { "epoch": 2.02, "learning_rate": 4.186419522719827e-05, "loss": 0.2713, "step": 3377000 }, { "epoch": 2.02, "learning_rate": 4.186209526163771e-05, "loss": 0.2726, "step": 3377500 }, { "epoch": 2.03, "learning_rate": 4.185999529607714e-05, "loss": 0.2696, "step": 3378000 }, { "epoch": 2.03, "learning_rate": 4.185789533051658e-05, "loss": 0.2699, "step": 3378500 }, { "epoch": 2.03, "learning_rate": 4.185579536495602e-05, "loss": 0.2691, "step": 3379000 }, { "epoch": 2.03, "learning_rate": 4.185369959932657e-05, "loss": 0.2709, "step": 3379500 }, { "epoch": 2.03, "learning_rate": 4.185160383369713e-05, "loss": 0.273, "step": 3380000 }, { "epoch": 2.03, "learning_rate": 4.1849503868136564e-05, "loss": 0.2693, "step": 3380500 }, { "epoch": 2.03, "learning_rate": 4.1847403902576e-05, "loss": 0.272, "step": 3381000 }, { "epoch": 2.03, "learning_rate": 4.184530393701543e-05, "loss": 0.2653, "step": 3381500 }, { "epoch": 2.03, "learning_rate": 4.1843203971454865e-05, "loss": 0.2682, "step": 3382000 }, { "epoch": 2.03, "learning_rate": 4.18411040058943e-05, "loss": 0.2663, "step": 3382500 }, { "epoch": 2.03, "learning_rate": 4.183900404033374e-05, "loss": 0.2719, "step": 3383000 }, { "epoch": 2.03, "learning_rate": 4.183690407477317e-05, "loss": 0.2713, "step": 3383500 }, { "epoch": 2.03, "learning_rate": 4.1834804109212605e-05, "loss": 0.2736, "step": 3384000 }, { "epoch": 2.03, "learning_rate": 4.1832708343583166e-05, "loss": 0.2734, "step": 3384500 }, { "epoch": 2.03, "learning_rate": 4.18306083780226e-05, "loss": 0.2744, "step": 3385000 }, { "epoch": 2.03, "learning_rate": 4.182850841246203e-05, "loss": 0.2732, "step": 3385500 }, { "epoch": 2.03, "learning_rate": 4.182640844690147e-05, "loss": 0.2643, "step": 3386000 }, { "epoch": 2.03, "learning_rate": 4.1824312681272026e-05, "loss": 0.2735, "step": 3386500 }, { "epoch": 2.03, "learning_rate": 4.182221271571146e-05, "loss": 0.2683, "step": 3387000 }, { "epoch": 2.03, "learning_rate": 4.182011275015089e-05, "loss": 0.268, "step": 3387500 }, { "epoch": 2.03, "learning_rate": 4.1818012784590334e-05, "loss": 0.2735, "step": 3388000 }, { "epoch": 2.03, "learning_rate": 4.181591281902977e-05, "loss": 0.2621, "step": 3388500 }, { "epoch": 2.03, "learning_rate": 4.18138128534692e-05, "loss": 0.2669, "step": 3389000 }, { "epoch": 2.03, "learning_rate": 4.1811717087839754e-05, "loss": 0.2674, "step": 3389500 }, { "epoch": 2.03, "learning_rate": 4.1809617122279194e-05, "loss": 0.272, "step": 3390000 }, { "epoch": 2.03, "learning_rate": 4.1807521356649755e-05, "loss": 0.273, "step": 3390500 }, { "epoch": 2.03, "learning_rate": 4.180542139108918e-05, "loss": 0.2691, "step": 3391000 }, { "epoch": 2.03, "learning_rate": 4.180332142552862e-05, "loss": 0.2659, "step": 3391500 }, { "epoch": 2.03, "learning_rate": 4.1801221459968055e-05, "loss": 0.2675, "step": 3392000 }, { "epoch": 2.03, "learning_rate": 4.179912149440749e-05, "loss": 0.2706, "step": 3392500 }, { "epoch": 2.03, "learning_rate": 4.179702152884693e-05, "loss": 0.2663, "step": 3393000 }, { "epoch": 2.03, "learning_rate": 4.179492156328636e-05, "loss": 0.2695, "step": 3393500 }, { "epoch": 2.03, "learning_rate": 4.1792821597725796e-05, "loss": 0.2714, "step": 3394000 }, { "epoch": 2.04, "learning_rate": 4.179072583209635e-05, "loss": 0.2707, "step": 3394500 }, { "epoch": 2.04, "learning_rate": 4.178862586653579e-05, "loss": 0.2746, "step": 3395000 }, { "epoch": 2.04, "learning_rate": 4.178653010090635e-05, "loss": 0.2694, "step": 3395500 }, { "epoch": 2.04, "learning_rate": 4.1784430135345776e-05, "loss": 0.274, "step": 3396000 }, { "epoch": 2.04, "learning_rate": 4.178233016978521e-05, "loss": 0.2691, "step": 3396500 }, { "epoch": 2.04, "learning_rate": 4.178023020422465e-05, "loss": 0.273, "step": 3397000 }, { "epoch": 2.04, "learning_rate": 4.1778130238664084e-05, "loss": 0.2708, "step": 3397500 }, { "epoch": 2.04, "learning_rate": 4.177603027310352e-05, "loss": 0.2722, "step": 3398000 }, { "epoch": 2.04, "learning_rate": 4.177393030754296e-05, "loss": 0.2675, "step": 3398500 }, { "epoch": 2.04, "learning_rate": 4.177183034198239e-05, "loss": 0.2673, "step": 3399000 }, { "epoch": 2.04, "learning_rate": 4.176973877628407e-05, "loss": 0.2709, "step": 3399500 }, { "epoch": 2.04, "learning_rate": 4.1767638810723505e-05, "loss": 0.2732, "step": 3400000 }, { "epoch": 2.04, "eval_loss": 0.25032973289489746, "eval_runtime": 1464.6668, "eval_samples_per_second": 359.618, "eval_steps_per_second": 59.936, "step": 3400000 }, { "epoch": 2.04, "learning_rate": 4.176553884516294e-05, "loss": 0.2722, "step": 3400500 }, { "epoch": 2.04, "learning_rate": 4.176343887960237e-05, "loss": 0.2716, "step": 3401000 }, { "epoch": 2.04, "learning_rate": 4.176134311397293e-05, "loss": 0.2698, "step": 3401500 }, { "epoch": 2.04, "learning_rate": 4.1759243148412365e-05, "loss": 0.2692, "step": 3402000 }, { "epoch": 2.04, "learning_rate": 4.1757143182851806e-05, "loss": 0.2734, "step": 3402500 }, { "epoch": 2.04, "learning_rate": 4.175504321729123e-05, "loss": 0.2757, "step": 3403000 }, { "epoch": 2.04, "learning_rate": 4.1752943251730666e-05, "loss": 0.2724, "step": 3403500 }, { "epoch": 2.04, "learning_rate": 4.1750843286170106e-05, "loss": 0.2737, "step": 3404000 }, { "epoch": 2.04, "learning_rate": 4.174874332060954e-05, "loss": 0.2742, "step": 3404500 }, { "epoch": 2.04, "learning_rate": 4.17466475549801e-05, "loss": 0.2751, "step": 3405000 }, { "epoch": 2.04, "learning_rate": 4.174454758941953e-05, "loss": 0.2719, "step": 3405500 }, { "epoch": 2.04, "learning_rate": 4.174244762385897e-05, "loss": 0.2695, "step": 3406000 }, { "epoch": 2.04, "learning_rate": 4.17403476582984e-05, "loss": 0.2694, "step": 3406500 }, { "epoch": 2.04, "learning_rate": 4.173824769273784e-05, "loss": 0.2803, "step": 3407000 }, { "epoch": 2.04, "learning_rate": 4.1736147727177274e-05, "loss": 0.2701, "step": 3407500 }, { "epoch": 2.04, "learning_rate": 4.173404776161671e-05, "loss": 0.2694, "step": 3408000 }, { "epoch": 2.04, "learning_rate": 4.173194779605615e-05, "loss": 0.2703, "step": 3408500 }, { "epoch": 2.04, "learning_rate": 4.172984783049558e-05, "loss": 0.2693, "step": 3409000 }, { "epoch": 2.04, "learning_rate": 4.1727752064866135e-05, "loss": 0.281, "step": 3409500 }, { "epoch": 2.04, "learning_rate": 4.172565209930557e-05, "loss": 0.2698, "step": 3410000 }, { "epoch": 2.04, "learning_rate": 4.172355213374501e-05, "loss": 0.2756, "step": 3410500 }, { "epoch": 2.05, "learning_rate": 4.172145216818444e-05, "loss": 0.2697, "step": 3411000 }, { "epoch": 2.05, "learning_rate": 4.1719356402554995e-05, "loss": 0.2649, "step": 3411500 }, { "epoch": 2.05, "learning_rate": 4.171725643699443e-05, "loss": 0.2672, "step": 3412000 }, { "epoch": 2.05, "learning_rate": 4.171515647143387e-05, "loss": 0.2641, "step": 3412500 }, { "epoch": 2.05, "learning_rate": 4.17130565058733e-05, "loss": 0.2731, "step": 3413000 }, { "epoch": 2.05, "learning_rate": 4.1710960740243856e-05, "loss": 0.2723, "step": 3413500 }, { "epoch": 2.05, "learning_rate": 4.1708860774683296e-05, "loss": 0.2655, "step": 3414000 }, { "epoch": 2.05, "learning_rate": 4.170676080912273e-05, "loss": 0.2698, "step": 3414500 }, { "epoch": 2.05, "learning_rate": 4.170466084356216e-05, "loss": 0.2759, "step": 3415000 }, { "epoch": 2.05, "learning_rate": 4.170256507793272e-05, "loss": 0.2715, "step": 3415500 }, { "epoch": 2.05, "learning_rate": 4.170046931230328e-05, "loss": 0.2683, "step": 3416000 }, { "epoch": 2.05, "learning_rate": 4.169836934674272e-05, "loss": 0.2715, "step": 3416500 }, { "epoch": 2.05, "learning_rate": 4.169626938118215e-05, "loss": 0.2665, "step": 3417000 }, { "epoch": 2.05, "learning_rate": 4.169416941562158e-05, "loss": 0.274, "step": 3417500 }, { "epoch": 2.05, "learning_rate": 4.169206945006102e-05, "loss": 0.2708, "step": 3418000 }, { "epoch": 2.05, "learning_rate": 4.168996948450045e-05, "loss": 0.2702, "step": 3418500 }, { "epoch": 2.05, "learning_rate": 4.1687869518939885e-05, "loss": 0.2658, "step": 3419000 }, { "epoch": 2.05, "learning_rate": 4.1685769553379325e-05, "loss": 0.2651, "step": 3419500 }, { "epoch": 2.05, "learning_rate": 4.168367378774988e-05, "loss": 0.2683, "step": 3420000 }, { "epoch": 2.05, "learning_rate": 4.168157382218931e-05, "loss": 0.2705, "step": 3420500 }, { "epoch": 2.05, "learning_rate": 4.167947385662875e-05, "loss": 0.2651, "step": 3421000 }, { "epoch": 2.05, "learning_rate": 4.167737809099931e-05, "loss": 0.2687, "step": 3421500 }, { "epoch": 2.05, "learning_rate": 4.167527812543874e-05, "loss": 0.2685, "step": 3422000 }, { "epoch": 2.05, "learning_rate": 4.167317815987817e-05, "loss": 0.2719, "step": 3422500 }, { "epoch": 2.05, "learning_rate": 4.167107819431761e-05, "loss": 0.2681, "step": 3423000 }, { "epoch": 2.05, "learning_rate": 4.1668978228757046e-05, "loss": 0.2677, "step": 3423500 }, { "epoch": 2.05, "learning_rate": 4.166687826319648e-05, "loss": 0.2763, "step": 3424000 }, { "epoch": 2.05, "learning_rate": 4.166477829763592e-05, "loss": 0.2705, "step": 3424500 }, { "epoch": 2.05, "learning_rate": 4.1662678332075354e-05, "loss": 0.2737, "step": 3425000 }, { "epoch": 2.05, "learning_rate": 4.166058256644591e-05, "loss": 0.279, "step": 3425500 }, { "epoch": 2.05, "learning_rate": 4.165848260088534e-05, "loss": 0.2714, "step": 3426000 }, { "epoch": 2.05, "learning_rate": 4.165638263532478e-05, "loss": 0.2738, "step": 3426500 }, { "epoch": 2.05, "learning_rate": 4.1654282669764214e-05, "loss": 0.2709, "step": 3427000 }, { "epoch": 2.05, "learning_rate": 4.165218690413477e-05, "loss": 0.265, "step": 3427500 }, { "epoch": 2.06, "learning_rate": 4.165008693857421e-05, "loss": 0.2698, "step": 3428000 }, { "epoch": 2.06, "learning_rate": 4.164798697301364e-05, "loss": 0.2727, "step": 3428500 }, { "epoch": 2.06, "learning_rate": 4.1645887007453075e-05, "loss": 0.2716, "step": 3429000 }, { "epoch": 2.06, "learning_rate": 4.164379124182363e-05, "loss": 0.2701, "step": 3429500 }, { "epoch": 2.06, "learning_rate": 4.164169547619419e-05, "loss": 0.2709, "step": 3430000 }, { "epoch": 2.06, "learning_rate": 4.163959551063363e-05, "loss": 0.2714, "step": 3430500 }, { "epoch": 2.06, "learning_rate": 4.163749554507306e-05, "loss": 0.2673, "step": 3431000 }, { "epoch": 2.06, "learning_rate": 4.163539557951249e-05, "loss": 0.2678, "step": 3431500 }, { "epoch": 2.06, "learning_rate": 4.163329561395193e-05, "loss": 0.2715, "step": 3432000 }, { "epoch": 2.06, "learning_rate": 4.163119564839136e-05, "loss": 0.271, "step": 3432500 }, { "epoch": 2.06, "learning_rate": 4.1629095682830796e-05, "loss": 0.2704, "step": 3433000 }, { "epoch": 2.06, "learning_rate": 4.162699991720136e-05, "loss": 0.2703, "step": 3433500 }, { "epoch": 2.06, "learning_rate": 4.162489995164079e-05, "loss": 0.271, "step": 3434000 }, { "epoch": 2.06, "learning_rate": 4.1622799986080224e-05, "loss": 0.2759, "step": 3434500 }, { "epoch": 2.06, "learning_rate": 4.1620700020519664e-05, "loss": 0.2675, "step": 3435000 }, { "epoch": 2.06, "learning_rate": 4.16186000549591e-05, "loss": 0.2666, "step": 3435500 }, { "epoch": 2.06, "learning_rate": 4.161650008939853e-05, "loss": 0.269, "step": 3436000 }, { "epoch": 2.06, "learning_rate": 4.161440012383797e-05, "loss": 0.2786, "step": 3436500 }, { "epoch": 2.06, "learning_rate": 4.1612300158277405e-05, "loss": 0.2719, "step": 3437000 }, { "epoch": 2.06, "learning_rate": 4.161020439264796e-05, "loss": 0.2713, "step": 3437500 }, { "epoch": 2.06, "learning_rate": 4.160810442708739e-05, "loss": 0.2741, "step": 3438000 }, { "epoch": 2.06, "learning_rate": 4.160600446152683e-05, "loss": 0.2706, "step": 3438500 }, { "epoch": 2.06, "learning_rate": 4.1603904495966265e-05, "loss": 0.2704, "step": 3439000 }, { "epoch": 2.06, "learning_rate": 4.160180873033682e-05, "loss": 0.2667, "step": 3439500 }, { "epoch": 2.06, "learning_rate": 4.159971296470738e-05, "loss": 0.2633, "step": 3440000 }, { "epoch": 2.06, "learning_rate": 4.159761719907793e-05, "loss": 0.2702, "step": 3440500 }, { "epoch": 2.06, "learning_rate": 4.159551723351737e-05, "loss": 0.2722, "step": 3441000 }, { "epoch": 2.06, "learning_rate": 4.1593417267956807e-05, "loss": 0.2678, "step": 3441500 }, { "epoch": 2.06, "learning_rate": 4.159131730239624e-05, "loss": 0.2734, "step": 3442000 }, { "epoch": 2.06, "learning_rate": 4.158921733683568e-05, "loss": 0.2744, "step": 3442500 }, { "epoch": 2.06, "learning_rate": 4.1587117371275114e-05, "loss": 0.2614, "step": 3443000 }, { "epoch": 2.06, "learning_rate": 4.158501740571454e-05, "loss": 0.2677, "step": 3443500 }, { "epoch": 2.06, "learning_rate": 4.158291744015398e-05, "loss": 0.2683, "step": 3444000 }, { "epoch": 2.07, "learning_rate": 4.1580817474593414e-05, "loss": 0.2733, "step": 3444500 }, { "epoch": 2.07, "learning_rate": 4.157871750903285e-05, "loss": 0.2674, "step": 3445000 }, { "epoch": 2.07, "learning_rate": 4.157661754347229e-05, "loss": 0.2719, "step": 3445500 }, { "epoch": 2.07, "learning_rate": 4.157451757791172e-05, "loss": 0.27, "step": 3446000 }, { "epoch": 2.07, "learning_rate": 4.1572421812282275e-05, "loss": 0.2692, "step": 3446500 }, { "epoch": 2.07, "learning_rate": 4.1570326046652835e-05, "loss": 0.2654, "step": 3447000 }, { "epoch": 2.07, "learning_rate": 4.156822608109227e-05, "loss": 0.2662, "step": 3447500 }, { "epoch": 2.07, "learning_rate": 4.156612611553171e-05, "loss": 0.2727, "step": 3448000 }, { "epoch": 2.07, "learning_rate": 4.1564026149971136e-05, "loss": 0.2707, "step": 3448500 }, { "epoch": 2.07, "learning_rate": 4.1561926184410576e-05, "loss": 0.2642, "step": 3449000 }, { "epoch": 2.07, "learning_rate": 4.155982621885001e-05, "loss": 0.2678, "step": 3449500 }, { "epoch": 2.07, "learning_rate": 4.155772625328944e-05, "loss": 0.2725, "step": 3450000 }, { "epoch": 2.07, "learning_rate": 4.155562628772888e-05, "loss": 0.269, "step": 3450500 }, { "epoch": 2.07, "learning_rate": 4.1553526322168316e-05, "loss": 0.2735, "step": 3451000 }, { "epoch": 2.07, "learning_rate": 4.155143055653887e-05, "loss": 0.27, "step": 3451500 }, { "epoch": 2.07, "learning_rate": 4.1549330590978303e-05, "loss": 0.263, "step": 3452000 }, { "epoch": 2.07, "learning_rate": 4.1547230625417744e-05, "loss": 0.2708, "step": 3452500 }, { "epoch": 2.07, "learning_rate": 4.154513065985718e-05, "loss": 0.2648, "step": 3453000 }, { "epoch": 2.07, "learning_rate": 4.154303489422773e-05, "loss": 0.2725, "step": 3453500 }, { "epoch": 2.07, "learning_rate": 4.1540934928667164e-05, "loss": 0.2694, "step": 3454000 }, { "epoch": 2.07, "learning_rate": 4.1538834963106604e-05, "loss": 0.2704, "step": 3454500 }, { "epoch": 2.07, "learning_rate": 4.153673499754604e-05, "loss": 0.2698, "step": 3455000 }, { "epoch": 2.07, "learning_rate": 4.153463503198548e-05, "loss": 0.2687, "step": 3455500 }, { "epoch": 2.07, "learning_rate": 4.153253926635603e-05, "loss": 0.2703, "step": 3456000 }, { "epoch": 2.07, "learning_rate": 4.1530439300795465e-05, "loss": 0.2737, "step": 3456500 }, { "epoch": 2.07, "learning_rate": 4.15283393352349e-05, "loss": 0.2694, "step": 3457000 }, { "epoch": 2.07, "learning_rate": 4.152624356960546e-05, "loss": 0.2745, "step": 3457500 }, { "epoch": 2.07, "learning_rate": 4.152414360404489e-05, "loss": 0.2665, "step": 3458000 }, { "epoch": 2.07, "learning_rate": 4.1522043638484326e-05, "loss": 0.2717, "step": 3458500 }, { "epoch": 2.07, "learning_rate": 4.151994367292376e-05, "loss": 0.2668, "step": 3459000 }, { "epoch": 2.07, "learning_rate": 4.15178437073632e-05, "loss": 0.2666, "step": 3459500 }, { "epoch": 2.07, "learning_rate": 4.151574374180263e-05, "loss": 0.27, "step": 3460000 }, { "epoch": 2.07, "learning_rate": 4.1513643776242066e-05, "loss": 0.2582, "step": 3460500 }, { "epoch": 2.08, "learning_rate": 4.151154381068151e-05, "loss": 0.2663, "step": 3461000 }, { "epoch": 2.08, "learning_rate": 4.150944804505206e-05, "loss": 0.266, "step": 3461500 }, { "epoch": 2.08, "learning_rate": 4.1507348079491494e-05, "loss": 0.2713, "step": 3462000 }, { "epoch": 2.08, "learning_rate": 4.1505248113930934e-05, "loss": 0.2704, "step": 3462500 }, { "epoch": 2.08, "learning_rate": 4.150314814837037e-05, "loss": 0.2745, "step": 3463000 }, { "epoch": 2.08, "learning_rate": 4.150105238274092e-05, "loss": 0.2693, "step": 3463500 }, { "epoch": 2.08, "learning_rate": 4.1498952417180354e-05, "loss": 0.2687, "step": 3464000 }, { "epoch": 2.08, "learning_rate": 4.1496852451619795e-05, "loss": 0.2696, "step": 3464500 }, { "epoch": 2.08, "learning_rate": 4.149475248605923e-05, "loss": 0.2648, "step": 3465000 }, { "epoch": 2.08, "learning_rate": 4.149265252049866e-05, "loss": 0.2712, "step": 3465500 }, { "epoch": 2.08, "learning_rate": 4.149056095480034e-05, "loss": 0.2739, "step": 3466000 }, { "epoch": 2.08, "learning_rate": 4.1488460989239775e-05, "loss": 0.2705, "step": 3466500 }, { "epoch": 2.08, "learning_rate": 4.1486361023679216e-05, "loss": 0.2699, "step": 3467000 }, { "epoch": 2.08, "learning_rate": 4.148426105811864e-05, "loss": 0.2732, "step": 3467500 }, { "epoch": 2.08, "learning_rate": 4.148216109255808e-05, "loss": 0.2697, "step": 3468000 }, { "epoch": 2.08, "learning_rate": 4.148006532692864e-05, "loss": 0.2718, "step": 3468500 }, { "epoch": 2.08, "learning_rate": 4.1477965361368076e-05, "loss": 0.267, "step": 3469000 }, { "epoch": 2.08, "learning_rate": 4.147586539580751e-05, "loss": 0.2711, "step": 3469500 }, { "epoch": 2.08, "learning_rate": 4.147376543024694e-05, "loss": 0.2705, "step": 3470000 }, { "epoch": 2.08, "learning_rate": 4.147166546468638e-05, "loss": 0.27, "step": 3470500 }, { "epoch": 2.08, "learning_rate": 4.146956549912581e-05, "loss": 0.2712, "step": 3471000 }, { "epoch": 2.08, "learning_rate": 4.146746553356525e-05, "loss": 0.2719, "step": 3471500 }, { "epoch": 2.08, "learning_rate": 4.1465365568004684e-05, "loss": 0.2664, "step": 3472000 }, { "epoch": 2.08, "learning_rate": 4.146326560244412e-05, "loss": 0.2718, "step": 3472500 }, { "epoch": 2.08, "learning_rate": 4.146116983681467e-05, "loss": 0.2679, "step": 3473000 }, { "epoch": 2.08, "learning_rate": 4.145906987125411e-05, "loss": 0.263, "step": 3473500 }, { "epoch": 2.08, "learning_rate": 4.1456969905693545e-05, "loss": 0.2649, "step": 3474000 }, { "epoch": 2.08, "learning_rate": 4.14548741400641e-05, "loss": 0.2737, "step": 3474500 }, { "epoch": 2.08, "learning_rate": 4.145277417450354e-05, "loss": 0.2693, "step": 3475000 }, { "epoch": 2.08, "learning_rate": 4.145067420894297e-05, "loss": 0.2703, "step": 3475500 }, { "epoch": 2.08, "learning_rate": 4.1448574243382405e-05, "loss": 0.2734, "step": 3476000 }, { "epoch": 2.08, "learning_rate": 4.1446474277821846e-05, "loss": 0.2717, "step": 3476500 }, { "epoch": 2.08, "learning_rate": 4.144437431226128e-05, "loss": 0.2702, "step": 3477000 }, { "epoch": 2.08, "learning_rate": 4.144227434670071e-05, "loss": 0.2689, "step": 3477500 }, { "epoch": 2.09, "learning_rate": 4.144017438114015e-05, "loss": 0.2661, "step": 3478000 }, { "epoch": 2.09, "learning_rate": 4.1438078615510706e-05, "loss": 0.2735, "step": 3478500 }, { "epoch": 2.09, "learning_rate": 4.143597864995014e-05, "loss": 0.2669, "step": 3479000 }, { "epoch": 2.09, "learning_rate": 4.1433882884320693e-05, "loss": 0.279, "step": 3479500 }, { "epoch": 2.09, "learning_rate": 4.143178291876013e-05, "loss": 0.2736, "step": 3480000 }, { "epoch": 2.09, "learning_rate": 4.142968295319957e-05, "loss": 0.2688, "step": 3480500 }, { "epoch": 2.09, "learning_rate": 4.142758718757013e-05, "loss": 0.2688, "step": 3481000 }, { "epoch": 2.09, "learning_rate": 4.1425487222009554e-05, "loss": 0.2708, "step": 3481500 }, { "epoch": 2.09, "learning_rate": 4.1423387256448994e-05, "loss": 0.2712, "step": 3482000 }, { "epoch": 2.09, "learning_rate": 4.142128729088843e-05, "loss": 0.2706, "step": 3482500 }, { "epoch": 2.09, "learning_rate": 4.141918732532786e-05, "loss": 0.2709, "step": 3483000 }, { "epoch": 2.09, "learning_rate": 4.14170873597673e-05, "loss": 0.2711, "step": 3483500 }, { "epoch": 2.09, "learning_rate": 4.1414987394206735e-05, "loss": 0.2661, "step": 3484000 }, { "epoch": 2.09, "learning_rate": 4.141288742864617e-05, "loss": 0.2696, "step": 3484500 }, { "epoch": 2.09, "learning_rate": 4.141078746308561e-05, "loss": 0.2662, "step": 3485000 }, { "epoch": 2.09, "learning_rate": 4.140868749752504e-05, "loss": 0.2671, "step": 3485500 }, { "epoch": 2.09, "learning_rate": 4.1406587531964476e-05, "loss": 0.2651, "step": 3486000 }, { "epoch": 2.09, "learning_rate": 4.140449176633503e-05, "loss": 0.2748, "step": 3486500 }, { "epoch": 2.09, "learning_rate": 4.140239180077447e-05, "loss": 0.2659, "step": 3487000 }, { "epoch": 2.09, "learning_rate": 4.14002918352139e-05, "loss": 0.2691, "step": 3487500 }, { "epoch": 2.09, "learning_rate": 4.1398191869653336e-05, "loss": 0.2754, "step": 3488000 }, { "epoch": 2.09, "learning_rate": 4.1396091904092777e-05, "loss": 0.2703, "step": 3488500 }, { "epoch": 2.09, "learning_rate": 4.139399193853221e-05, "loss": 0.2703, "step": 3489000 }, { "epoch": 2.09, "learning_rate": 4.139189197297164e-05, "loss": 0.2726, "step": 3489500 }, { "epoch": 2.09, "learning_rate": 4.138979200741108e-05, "loss": 0.2657, "step": 3490000 }, { "epoch": 2.09, "learning_rate": 4.138769204185051e-05, "loss": 0.2691, "step": 3490500 }, { "epoch": 2.09, "learning_rate": 4.138559627622107e-05, "loss": 0.2686, "step": 3491000 }, { "epoch": 2.09, "learning_rate": 4.1383496310660504e-05, "loss": 0.272, "step": 3491500 }, { "epoch": 2.09, "learning_rate": 4.138139634509994e-05, "loss": 0.268, "step": 3492000 }, { "epoch": 2.09, "learning_rate": 4.137929637953937e-05, "loss": 0.2726, "step": 3492500 }, { "epoch": 2.09, "learning_rate": 4.137720061390993e-05, "loss": 0.2659, "step": 3493000 }, { "epoch": 2.09, "learning_rate": 4.137510064834937e-05, "loss": 0.2609, "step": 3493500 }, { "epoch": 2.09, "learning_rate": 4.1373000682788805e-05, "loss": 0.2737, "step": 3494000 }, { "epoch": 2.1, "learning_rate": 4.137090071722823e-05, "loss": 0.2743, "step": 3494500 }, { "epoch": 2.1, "learning_rate": 4.136880075166767e-05, "loss": 0.2657, "step": 3495000 }, { "epoch": 2.1, "learning_rate": 4.1366700786107106e-05, "loss": 0.2812, "step": 3495500 }, { "epoch": 2.1, "learning_rate": 4.136460082054654e-05, "loss": 0.2726, "step": 3496000 }, { "epoch": 2.1, "learning_rate": 4.136250085498598e-05, "loss": 0.2658, "step": 3496500 }, { "epoch": 2.1, "learning_rate": 4.136040508935653e-05, "loss": 0.2674, "step": 3497000 }, { "epoch": 2.1, "learning_rate": 4.1358305123795966e-05, "loss": 0.2671, "step": 3497500 }, { "epoch": 2.1, "learning_rate": 4.13562051582354e-05, "loss": 0.2713, "step": 3498000 }, { "epoch": 2.1, "learning_rate": 4.135410519267484e-05, "loss": 0.2648, "step": 3498500 }, { "epoch": 2.1, "learning_rate": 4.1352009427045394e-05, "loss": 0.2717, "step": 3499000 }, { "epoch": 2.1, "learning_rate": 4.134990946148483e-05, "loss": 0.2675, "step": 3499500 }, { "epoch": 2.1, "learning_rate": 4.134780949592427e-05, "loss": 0.2686, "step": 3500000 }, { "epoch": 2.1, "eval_loss": 0.24986667931079865, "eval_runtime": 1459.8444, "eval_samples_per_second": 360.806, "eval_steps_per_second": 60.134, "step": 3500000 }, { "epoch": 2.1, "learning_rate": 4.134571373029483e-05, "loss": 0.2725, "step": 3500500 }, { "epoch": 2.1, "learning_rate": 4.134361376473426e-05, "loss": 0.2692, "step": 3501000 }, { "epoch": 2.1, "learning_rate": 4.134151379917369e-05, "loss": 0.2697, "step": 3501500 }, { "epoch": 2.1, "learning_rate": 4.133941383361313e-05, "loss": 0.2703, "step": 3502000 }, { "epoch": 2.1, "learning_rate": 4.133731386805256e-05, "loss": 0.2658, "step": 3502500 }, { "epoch": 2.1, "learning_rate": 4.1335213902491995e-05, "loss": 0.2699, "step": 3503000 }, { "epoch": 2.1, "learning_rate": 4.1333118136862555e-05, "loss": 0.2737, "step": 3503500 }, { "epoch": 2.1, "learning_rate": 4.133101817130199e-05, "loss": 0.2706, "step": 3504000 }, { "epoch": 2.1, "learning_rate": 4.132891820574142e-05, "loss": 0.266, "step": 3504500 }, { "epoch": 2.1, "learning_rate": 4.1326818240180856e-05, "loss": 0.2737, "step": 3505000 }, { "epoch": 2.1, "learning_rate": 4.1324718274620296e-05, "loss": 0.2685, "step": 3505500 }, { "epoch": 2.1, "learning_rate": 4.132261830905973e-05, "loss": 0.2679, "step": 3506000 }, { "epoch": 2.1, "learning_rate": 4.132051834349916e-05, "loss": 0.2662, "step": 3506500 }, { "epoch": 2.1, "learning_rate": 4.13184183779386e-05, "loss": 0.2742, "step": 3507000 }, { "epoch": 2.1, "learning_rate": 4.1316322612309157e-05, "loss": 0.2716, "step": 3507500 }, { "epoch": 2.1, "learning_rate": 4.131422264674859e-05, "loss": 0.2741, "step": 3508000 }, { "epoch": 2.1, "learning_rate": 4.131212268118803e-05, "loss": 0.266, "step": 3508500 }, { "epoch": 2.1, "learning_rate": 4.1310022715627464e-05, "loss": 0.2736, "step": 3509000 }, { "epoch": 2.1, "learning_rate": 4.13079227500669e-05, "loss": 0.2752, "step": 3509500 }, { "epoch": 2.1, "learning_rate": 4.130582278450634e-05, "loss": 0.2691, "step": 3510000 }, { "epoch": 2.1, "learning_rate": 4.130372701887689e-05, "loss": 0.2674, "step": 3510500 }, { "epoch": 2.1, "learning_rate": 4.1301627053316324e-05, "loss": 0.2678, "step": 3511000 }, { "epoch": 2.11, "learning_rate": 4.129952708775576e-05, "loss": 0.2694, "step": 3511500 }, { "epoch": 2.11, "learning_rate": 4.12974271221952e-05, "loss": 0.2674, "step": 3512000 }, { "epoch": 2.11, "learning_rate": 4.129533135656575e-05, "loss": 0.2705, "step": 3512500 }, { "epoch": 2.11, "learning_rate": 4.1293231391005185e-05, "loss": 0.2663, "step": 3513000 }, { "epoch": 2.11, "learning_rate": 4.129113562537574e-05, "loss": 0.2682, "step": 3513500 }, { "epoch": 2.11, "learning_rate": 4.128903565981518e-05, "loss": 0.2693, "step": 3514000 }, { "epoch": 2.11, "learning_rate": 4.128693569425461e-05, "loss": 0.2702, "step": 3514500 }, { "epoch": 2.11, "learning_rate": 4.1284835728694046e-05, "loss": 0.2737, "step": 3515000 }, { "epoch": 2.11, "learning_rate": 4.1282735763133486e-05, "loss": 0.2659, "step": 3515500 }, { "epoch": 2.11, "learning_rate": 4.128063579757292e-05, "loss": 0.2747, "step": 3516000 }, { "epoch": 2.11, "learning_rate": 4.127853583201235e-05, "loss": 0.2717, "step": 3516500 }, { "epoch": 2.11, "learning_rate": 4.127643586645179e-05, "loss": 0.2734, "step": 3517000 }, { "epoch": 2.11, "learning_rate": 4.127434010082235e-05, "loss": 0.2748, "step": 3517500 }, { "epoch": 2.11, "learning_rate": 4.127224013526178e-05, "loss": 0.2691, "step": 3518000 }, { "epoch": 2.11, "learning_rate": 4.1270140169701214e-05, "loss": 0.2693, "step": 3518500 }, { "epoch": 2.11, "learning_rate": 4.1268040204140654e-05, "loss": 0.2645, "step": 3519000 }, { "epoch": 2.11, "learning_rate": 4.126594023858009e-05, "loss": 0.2695, "step": 3519500 }, { "epoch": 2.11, "learning_rate": 4.126384447295064e-05, "loss": 0.2681, "step": 3520000 }, { "epoch": 2.11, "learning_rate": 4.126174450739008e-05, "loss": 0.2731, "step": 3520500 }, { "epoch": 2.11, "learning_rate": 4.1259644541829515e-05, "loss": 0.2707, "step": 3521000 }, { "epoch": 2.11, "learning_rate": 4.125754457626895e-05, "loss": 0.2661, "step": 3521500 }, { "epoch": 2.11, "learning_rate": 4.12554488106395e-05, "loss": 0.2691, "step": 3522000 }, { "epoch": 2.11, "learning_rate": 4.125334884507894e-05, "loss": 0.2667, "step": 3522500 }, { "epoch": 2.11, "learning_rate": 4.1251248879518376e-05, "loss": 0.2725, "step": 3523000 }, { "epoch": 2.11, "learning_rate": 4.124914891395781e-05, "loss": 0.2669, "step": 3523500 }, { "epoch": 2.11, "learning_rate": 4.124705314832836e-05, "loss": 0.2718, "step": 3524000 }, { "epoch": 2.11, "learning_rate": 4.12449531827678e-05, "loss": 0.2702, "step": 3524500 }, { "epoch": 2.11, "learning_rate": 4.1242853217207236e-05, "loss": 0.2702, "step": 3525000 }, { "epoch": 2.11, "learning_rate": 4.124075325164667e-05, "loss": 0.2673, "step": 3525500 }, { "epoch": 2.11, "learning_rate": 4.123865328608611e-05, "loss": 0.2713, "step": 3526000 }, { "epoch": 2.11, "learning_rate": 4.1236557520456664e-05, "loss": 0.271, "step": 3526500 }, { "epoch": 2.11, "learning_rate": 4.12344575548961e-05, "loss": 0.2683, "step": 3527000 }, { "epoch": 2.11, "learning_rate": 4.123235758933554e-05, "loss": 0.2718, "step": 3527500 }, { "epoch": 2.12, "learning_rate": 4.123025762377497e-05, "loss": 0.2746, "step": 3528000 }, { "epoch": 2.12, "learning_rate": 4.1228161858145524e-05, "loss": 0.2729, "step": 3528500 }, { "epoch": 2.12, "learning_rate": 4.122606189258496e-05, "loss": 0.2681, "step": 3529000 }, { "epoch": 2.12, "learning_rate": 4.12239619270244e-05, "loss": 0.2697, "step": 3529500 }, { "epoch": 2.12, "learning_rate": 4.122186196146383e-05, "loss": 0.2684, "step": 3530000 }, { "epoch": 2.12, "learning_rate": 4.1219766195834385e-05, "loss": 0.2713, "step": 3530500 }, { "epoch": 2.12, "learning_rate": 4.121766623027382e-05, "loss": 0.2694, "step": 3531000 }, { "epoch": 2.12, "learning_rate": 4.121556626471326e-05, "loss": 0.2685, "step": 3531500 }, { "epoch": 2.12, "learning_rate": 4.121346629915269e-05, "loss": 0.2707, "step": 3532000 }, { "epoch": 2.12, "learning_rate": 4.1211370533523246e-05, "loss": 0.2649, "step": 3532500 }, { "epoch": 2.12, "learning_rate": 4.1209270567962686e-05, "loss": 0.2601, "step": 3533000 }, { "epoch": 2.12, "learning_rate": 4.120717060240212e-05, "loss": 0.2697, "step": 3533500 }, { "epoch": 2.12, "learning_rate": 4.120507063684155e-05, "loss": 0.2703, "step": 3534000 }, { "epoch": 2.12, "learning_rate": 4.120297067128099e-05, "loss": 0.2738, "step": 3534500 }, { "epoch": 2.12, "learning_rate": 4.120087490565155e-05, "loss": 0.2708, "step": 3535000 }, { "epoch": 2.12, "learning_rate": 4.119877914002211e-05, "loss": 0.2769, "step": 3535500 }, { "epoch": 2.12, "learning_rate": 4.119667917446154e-05, "loss": 0.2712, "step": 3536000 }, { "epoch": 2.12, "learning_rate": 4.1194579208900974e-05, "loss": 0.2723, "step": 3536500 }, { "epoch": 2.12, "learning_rate": 4.119247924334041e-05, "loss": 0.274, "step": 3537000 }, { "epoch": 2.12, "learning_rate": 4.119037927777984e-05, "loss": 0.2738, "step": 3537500 }, { "epoch": 2.12, "learning_rate": 4.1188279312219274e-05, "loss": 0.2714, "step": 3538000 }, { "epoch": 2.12, "learning_rate": 4.1186179346658715e-05, "loss": 0.2655, "step": 3538500 }, { "epoch": 2.12, "learning_rate": 4.1184083581029275e-05, "loss": 0.2738, "step": 3539000 }, { "epoch": 2.12, "learning_rate": 4.11819836154687e-05, "loss": 0.2777, "step": 3539500 }, { "epoch": 2.12, "learning_rate": 4.117988364990814e-05, "loss": 0.2714, "step": 3540000 }, { "epoch": 2.12, "learning_rate": 4.1177783684347575e-05, "loss": 0.2682, "step": 3540500 }, { "epoch": 2.12, "learning_rate": 4.117568371878701e-05, "loss": 0.2742, "step": 3541000 }, { "epoch": 2.12, "learning_rate": 4.117358375322645e-05, "loss": 0.2741, "step": 3541500 }, { "epoch": 2.12, "learning_rate": 4.117148378766588e-05, "loss": 0.267, "step": 3542000 }, { "epoch": 2.12, "learning_rate": 4.1169383822105316e-05, "loss": 0.2686, "step": 3542500 }, { "epoch": 2.12, "learning_rate": 4.116728805647587e-05, "loss": 0.2713, "step": 3543000 }, { "epoch": 2.12, "learning_rate": 4.116518809091531e-05, "loss": 0.2672, "step": 3543500 }, { "epoch": 2.12, "learning_rate": 4.116308812535474e-05, "loss": 0.2715, "step": 3544000 }, { "epoch": 2.13, "learning_rate": 4.116098815979418e-05, "loss": 0.2666, "step": 3544500 }, { "epoch": 2.13, "learning_rate": 4.115888819423362e-05, "loss": 0.2732, "step": 3545000 }, { "epoch": 2.13, "learning_rate": 4.115679242860417e-05, "loss": 0.2696, "step": 3545500 }, { "epoch": 2.13, "learning_rate": 4.1154692463043604e-05, "loss": 0.2686, "step": 3546000 }, { "epoch": 2.13, "learning_rate": 4.115259249748304e-05, "loss": 0.266, "step": 3546500 }, { "epoch": 2.13, "learning_rate": 4.115049253192248e-05, "loss": 0.2748, "step": 3547000 }, { "epoch": 2.13, "learning_rate": 4.114839676629303e-05, "loss": 0.2657, "step": 3547500 }, { "epoch": 2.13, "learning_rate": 4.1146296800732465e-05, "loss": 0.2714, "step": 3548000 }, { "epoch": 2.13, "learning_rate": 4.1144196835171905e-05, "loss": 0.2638, "step": 3548500 }, { "epoch": 2.13, "learning_rate": 4.114209686961134e-05, "loss": 0.2668, "step": 3549000 }, { "epoch": 2.13, "learning_rate": 4.113999690405077e-05, "loss": 0.2695, "step": 3549500 }, { "epoch": 2.13, "learning_rate": 4.113789693849021e-05, "loss": 0.2686, "step": 3550000 }, { "epoch": 2.13, "learning_rate": 4.1135796972929645e-05, "loss": 0.2678, "step": 3550500 }, { "epoch": 2.13, "learning_rate": 4.11337012073002e-05, "loss": 0.2708, "step": 3551000 }, { "epoch": 2.13, "learning_rate": 4.113160124173963e-05, "loss": 0.2746, "step": 3551500 }, { "epoch": 2.13, "learning_rate": 4.112950127617907e-05, "loss": 0.273, "step": 3552000 }, { "epoch": 2.13, "learning_rate": 4.1127401310618506e-05, "loss": 0.2645, "step": 3552500 }, { "epoch": 2.13, "learning_rate": 4.112530134505794e-05, "loss": 0.2669, "step": 3553000 }, { "epoch": 2.13, "learning_rate": 4.112320137949738e-05, "loss": 0.268, "step": 3553500 }, { "epoch": 2.13, "learning_rate": 4.112110141393681e-05, "loss": 0.2717, "step": 3554000 }, { "epoch": 2.13, "learning_rate": 4.111900144837624e-05, "loss": 0.272, "step": 3554500 }, { "epoch": 2.13, "learning_rate": 4.11169056827468e-05, "loss": 0.2739, "step": 3555000 }, { "epoch": 2.13, "learning_rate": 4.111480571718624e-05, "loss": 0.2733, "step": 3555500 }, { "epoch": 2.13, "learning_rate": 4.1112705751625674e-05, "loss": 0.2685, "step": 3556000 }, { "epoch": 2.13, "learning_rate": 4.111060578606511e-05, "loss": 0.2711, "step": 3556500 }, { "epoch": 2.13, "learning_rate": 4.110850582050454e-05, "loss": 0.2718, "step": 3557000 }, { "epoch": 2.13, "learning_rate": 4.11064100548751e-05, "loss": 0.272, "step": 3557500 }, { "epoch": 2.13, "learning_rate": 4.1104310089314535e-05, "loss": 0.2701, "step": 3558000 }, { "epoch": 2.13, "learning_rate": 4.1102210123753975e-05, "loss": 0.2684, "step": 3558500 }, { "epoch": 2.13, "learning_rate": 4.110011015819341e-05, "loss": 0.276, "step": 3559000 }, { "epoch": 2.13, "learning_rate": 4.109801439256396e-05, "loss": 0.271, "step": 3559500 }, { "epoch": 2.13, "learning_rate": 4.1095914427003396e-05, "loss": 0.272, "step": 3560000 }, { "epoch": 2.13, "learning_rate": 4.109381866137395e-05, "loss": 0.2711, "step": 3560500 }, { "epoch": 2.13, "learning_rate": 4.109171869581339e-05, "loss": 0.2719, "step": 3561000 }, { "epoch": 2.14, "learning_rate": 4.108961873025282e-05, "loss": 0.2759, "step": 3561500 }, { "epoch": 2.14, "learning_rate": 4.1087518764692256e-05, "loss": 0.2715, "step": 3562000 }, { "epoch": 2.14, "learning_rate": 4.1085418799131696e-05, "loss": 0.2645, "step": 3562500 }, { "epoch": 2.14, "learning_rate": 4.108331883357113e-05, "loss": 0.2798, "step": 3563000 }, { "epoch": 2.14, "learning_rate": 4.1081218868010563e-05, "loss": 0.2666, "step": 3563500 }, { "epoch": 2.14, "learning_rate": 4.107911890245e-05, "loss": 0.2695, "step": 3564000 }, { "epoch": 2.14, "learning_rate": 4.107702313682056e-05, "loss": 0.2702, "step": 3564500 }, { "epoch": 2.14, "learning_rate": 4.107492317125999e-05, "loss": 0.2724, "step": 3565000 }, { "epoch": 2.14, "learning_rate": 4.107282320569943e-05, "loss": 0.2726, "step": 3565500 }, { "epoch": 2.14, "learning_rate": 4.1070723240138864e-05, "loss": 0.2685, "step": 3566000 }, { "epoch": 2.14, "learning_rate": 4.106862747450942e-05, "loss": 0.2726, "step": 3566500 }, { "epoch": 2.14, "learning_rate": 4.106652750894885e-05, "loss": 0.2702, "step": 3567000 }, { "epoch": 2.14, "learning_rate": 4.106442754338829e-05, "loss": 0.2687, "step": 3567500 }, { "epoch": 2.14, "learning_rate": 4.1062327577827725e-05, "loss": 0.2704, "step": 3568000 }, { "epoch": 2.14, "learning_rate": 4.106022761226716e-05, "loss": 0.2661, "step": 3568500 }, { "epoch": 2.14, "learning_rate": 4.105813184663771e-05, "loss": 0.2691, "step": 3569000 }, { "epoch": 2.14, "learning_rate": 4.105603608100827e-05, "loss": 0.2766, "step": 3569500 }, { "epoch": 2.14, "learning_rate": 4.1053936115447706e-05, "loss": 0.2734, "step": 3570000 }, { "epoch": 2.14, "learning_rate": 4.105183614988714e-05, "loss": 0.2737, "step": 3570500 }, { "epoch": 2.14, "learning_rate": 4.104973618432658e-05, "loss": 0.2739, "step": 3571000 }, { "epoch": 2.14, "learning_rate": 4.104763621876601e-05, "loss": 0.2659, "step": 3571500 }, { "epoch": 2.14, "learning_rate": 4.1045536253205447e-05, "loss": 0.2644, "step": 3572000 }, { "epoch": 2.14, "learning_rate": 4.104343628764489e-05, "loss": 0.2637, "step": 3572500 }, { "epoch": 2.14, "learning_rate": 4.104133632208432e-05, "loss": 0.2605, "step": 3573000 }, { "epoch": 2.14, "learning_rate": 4.1039240556454874e-05, "loss": 0.2673, "step": 3573500 }, { "epoch": 2.14, "learning_rate": 4.103714059089431e-05, "loss": 0.269, "step": 3574000 }, { "epoch": 2.14, "learning_rate": 4.103504062533375e-05, "loss": 0.2686, "step": 3574500 }, { "epoch": 2.14, "learning_rate": 4.103294065977318e-05, "loss": 0.2702, "step": 3575000 }, { "epoch": 2.14, "learning_rate": 4.1030840694212614e-05, "loss": 0.2617, "step": 3575500 }, { "epoch": 2.14, "learning_rate": 4.102874492858317e-05, "loss": 0.2686, "step": 3576000 }, { "epoch": 2.14, "learning_rate": 4.102664496302261e-05, "loss": 0.274, "step": 3576500 }, { "epoch": 2.14, "learning_rate": 4.102454499746204e-05, "loss": 0.2624, "step": 3577000 }, { "epoch": 2.14, "learning_rate": 4.1022445031901475e-05, "loss": 0.2702, "step": 3577500 }, { "epoch": 2.15, "learning_rate": 4.1020349266272036e-05, "loss": 0.263, "step": 3578000 }, { "epoch": 2.15, "learning_rate": 4.101824930071147e-05, "loss": 0.266, "step": 3578500 }, { "epoch": 2.15, "learning_rate": 4.10161493351509e-05, "loss": 0.2703, "step": 3579000 }, { "epoch": 2.15, "learning_rate": 4.101404936959034e-05, "loss": 0.2645, "step": 3579500 }, { "epoch": 2.15, "learning_rate": 4.1011953603960896e-05, "loss": 0.2641, "step": 3580000 }, { "epoch": 2.15, "learning_rate": 4.100985363840033e-05, "loss": 0.2704, "step": 3580500 }, { "epoch": 2.15, "learning_rate": 4.100775367283976e-05, "loss": 0.273, "step": 3581000 }, { "epoch": 2.15, "learning_rate": 4.1005653707279203e-05, "loss": 0.27, "step": 3581500 }, { "epoch": 2.15, "learning_rate": 4.100355794164976e-05, "loss": 0.2707, "step": 3582000 }, { "epoch": 2.15, "learning_rate": 4.100145797608919e-05, "loss": 0.267, "step": 3582500 }, { "epoch": 2.15, "learning_rate": 4.0999358010528624e-05, "loss": 0.2724, "step": 3583000 }, { "epoch": 2.15, "learning_rate": 4.0997258044968064e-05, "loss": 0.2659, "step": 3583500 }, { "epoch": 2.15, "learning_rate": 4.099516227933862e-05, "loss": 0.2639, "step": 3584000 }, { "epoch": 2.15, "learning_rate": 4.099306231377805e-05, "loss": 0.2692, "step": 3584500 }, { "epoch": 2.15, "learning_rate": 4.099096234821749e-05, "loss": 0.2706, "step": 3585000 }, { "epoch": 2.15, "learning_rate": 4.0988862382656925e-05, "loss": 0.2719, "step": 3585500 }, { "epoch": 2.15, "learning_rate": 4.098676241709636e-05, "loss": 0.2697, "step": 3586000 }, { "epoch": 2.15, "learning_rate": 4.09846624515358e-05, "loss": 0.2648, "step": 3586500 }, { "epoch": 2.15, "learning_rate": 4.098256248597523e-05, "loss": 0.2701, "step": 3587000 }, { "epoch": 2.15, "learning_rate": 4.0980462520414665e-05, "loss": 0.2768, "step": 3587500 }, { "epoch": 2.15, "learning_rate": 4.097836675478522e-05, "loss": 0.2658, "step": 3588000 }, { "epoch": 2.15, "learning_rate": 4.097626678922466e-05, "loss": 0.2683, "step": 3588500 }, { "epoch": 2.15, "learning_rate": 4.097416682366409e-05, "loss": 0.2687, "step": 3589000 }, { "epoch": 2.15, "learning_rate": 4.0972066858103526e-05, "loss": 0.2636, "step": 3589500 }, { "epoch": 2.15, "learning_rate": 4.096997109247408e-05, "loss": 0.2631, "step": 3590000 }, { "epoch": 2.15, "learning_rate": 4.096787112691352e-05, "loss": 0.2732, "step": 3590500 }, { "epoch": 2.15, "learning_rate": 4.0965771161352953e-05, "loss": 0.2665, "step": 3591000 }, { "epoch": 2.15, "learning_rate": 4.096367119579239e-05, "loss": 0.2616, "step": 3591500 }, { "epoch": 2.15, "learning_rate": 4.096157123023183e-05, "loss": 0.2729, "step": 3592000 }, { "epoch": 2.15, "learning_rate": 4.095947546460238e-05, "loss": 0.2659, "step": 3592500 }, { "epoch": 2.15, "learning_rate": 4.0957375499041814e-05, "loss": 0.2724, "step": 3593000 }, { "epoch": 2.15, "learning_rate": 4.0955275533481254e-05, "loss": 0.2759, "step": 3593500 }, { "epoch": 2.15, "learning_rate": 4.095317976785181e-05, "loss": 0.2701, "step": 3594000 }, { "epoch": 2.16, "learning_rate": 4.095107980229124e-05, "loss": 0.2748, "step": 3594500 }, { "epoch": 2.16, "learning_rate": 4.0948979836730675e-05, "loss": 0.2681, "step": 3595000 }, { "epoch": 2.16, "learning_rate": 4.0946879871170115e-05, "loss": 0.2729, "step": 3595500 }, { "epoch": 2.16, "learning_rate": 4.094477990560955e-05, "loss": 0.2679, "step": 3596000 }, { "epoch": 2.16, "learning_rate": 4.094267994004898e-05, "loss": 0.2633, "step": 3596500 }, { "epoch": 2.16, "learning_rate": 4.094057997448842e-05, "loss": 0.2685, "step": 3597000 }, { "epoch": 2.16, "learning_rate": 4.093848000892785e-05, "loss": 0.2692, "step": 3597500 }, { "epoch": 2.16, "learning_rate": 4.093638004336728e-05, "loss": 0.2711, "step": 3598000 }, { "epoch": 2.16, "learning_rate": 4.093428427773785e-05, "loss": 0.2626, "step": 3598500 }, { "epoch": 2.16, "learning_rate": 4.09321885121084e-05, "loss": 0.2664, "step": 3599000 }, { "epoch": 2.16, "learning_rate": 4.0930088546547837e-05, "loss": 0.2727, "step": 3599500 }, { "epoch": 2.16, "learning_rate": 4.092798858098727e-05, "loss": 0.2662, "step": 3600000 }, { "epoch": 2.16, "eval_loss": 0.24784673750400543, "eval_runtime": 1466.7303, "eval_samples_per_second": 359.112, "eval_steps_per_second": 59.852, "step": 3600000 }, { "epoch": 2.16, "learning_rate": 4.092588861542671e-05, "loss": 0.2682, "step": 3600500 }, { "epoch": 2.16, "learning_rate": 4.0923788649866144e-05, "loss": 0.2684, "step": 3601000 }, { "epoch": 2.16, "learning_rate": 4.092168868430558e-05, "loss": 0.2771, "step": 3601500 }, { "epoch": 2.16, "learning_rate": 4.091958871874502e-05, "loss": 0.2649, "step": 3602000 }, { "epoch": 2.16, "learning_rate": 4.0917488753184444e-05, "loss": 0.2679, "step": 3602500 }, { "epoch": 2.16, "learning_rate": 4.091538878762388e-05, "loss": 0.2688, "step": 3603000 }, { "epoch": 2.16, "learning_rate": 4.091329302199444e-05, "loss": 0.278, "step": 3603500 }, { "epoch": 2.16, "learning_rate": 4.091119305643388e-05, "loss": 0.2628, "step": 3604000 }, { "epoch": 2.16, "learning_rate": 4.0909093090873305e-05, "loss": 0.2669, "step": 3604500 }, { "epoch": 2.16, "learning_rate": 4.0906997325243865e-05, "loss": 0.2752, "step": 3605000 }, { "epoch": 2.16, "learning_rate": 4.0904897359683305e-05, "loss": 0.2669, "step": 3605500 }, { "epoch": 2.16, "learning_rate": 4.090279739412274e-05, "loss": 0.2746, "step": 3606000 }, { "epoch": 2.16, "learning_rate": 4.090069742856217e-05, "loss": 0.2752, "step": 3606500 }, { "epoch": 2.16, "learning_rate": 4.0898597463001606e-05, "loss": 0.2682, "step": 3607000 }, { "epoch": 2.16, "learning_rate": 4.089649749744104e-05, "loss": 0.2765, "step": 3607500 }, { "epoch": 2.16, "learning_rate": 4.089439753188047e-05, "loss": 0.2743, "step": 3608000 }, { "epoch": 2.16, "learning_rate": 4.089229756631991e-05, "loss": 0.2679, "step": 3608500 }, { "epoch": 2.16, "learning_rate": 4.0890197600759346e-05, "loss": 0.2679, "step": 3609000 }, { "epoch": 2.16, "learning_rate": 4.08881018351299e-05, "loss": 0.2627, "step": 3609500 }, { "epoch": 2.16, "learning_rate": 4.088601026943158e-05, "loss": 0.2619, "step": 3610000 }, { "epoch": 2.16, "learning_rate": 4.0883910303871014e-05, "loss": 0.2704, "step": 3610500 }, { "epoch": 2.16, "learning_rate": 4.0881810338310454e-05, "loss": 0.2675, "step": 3611000 }, { "epoch": 2.17, "learning_rate": 4.087971037274989e-05, "loss": 0.2688, "step": 3611500 }, { "epoch": 2.17, "learning_rate": 4.087761040718932e-05, "loss": 0.2697, "step": 3612000 }, { "epoch": 2.17, "learning_rate": 4.087551044162876e-05, "loss": 0.269, "step": 3612500 }, { "epoch": 2.17, "learning_rate": 4.0873410476068195e-05, "loss": 0.27, "step": 3613000 }, { "epoch": 2.17, "learning_rate": 4.087131051050763e-05, "loss": 0.2647, "step": 3613500 }, { "epoch": 2.17, "learning_rate": 4.086921054494707e-05, "loss": 0.2708, "step": 3614000 }, { "epoch": 2.17, "learning_rate": 4.0867110579386495e-05, "loss": 0.2747, "step": 3614500 }, { "epoch": 2.17, "learning_rate": 4.086501061382593e-05, "loss": 0.2723, "step": 3615000 }, { "epoch": 2.17, "learning_rate": 4.086291064826537e-05, "loss": 0.2649, "step": 3615500 }, { "epoch": 2.17, "learning_rate": 4.086081488263593e-05, "loss": 0.266, "step": 3616000 }, { "epoch": 2.17, "learning_rate": 4.0858714917075356e-05, "loss": 0.276, "step": 3616500 }, { "epoch": 2.17, "learning_rate": 4.085661495151479e-05, "loss": 0.2676, "step": 3617000 }, { "epoch": 2.17, "learning_rate": 4.085451498595423e-05, "loss": 0.2701, "step": 3617500 }, { "epoch": 2.17, "learning_rate": 4.085241922032479e-05, "loss": 0.2786, "step": 3618000 }, { "epoch": 2.17, "learning_rate": 4.0850319254764223e-05, "loss": 0.2663, "step": 3618500 }, { "epoch": 2.17, "learning_rate": 4.084821928920366e-05, "loss": 0.2659, "step": 3619000 }, { "epoch": 2.17, "learning_rate": 4.084611932364309e-05, "loss": 0.2708, "step": 3619500 }, { "epoch": 2.17, "learning_rate": 4.0844019358082524e-05, "loss": 0.2656, "step": 3620000 }, { "epoch": 2.17, "learning_rate": 4.0841919392521964e-05, "loss": 0.2728, "step": 3620500 }, { "epoch": 2.17, "learning_rate": 4.08398194269614e-05, "loss": 0.2719, "step": 3621000 }, { "epoch": 2.17, "learning_rate": 4.083771946140083e-05, "loss": 0.2692, "step": 3621500 }, { "epoch": 2.17, "learning_rate": 4.0835623695771385e-05, "loss": 0.2697, "step": 3622000 }, { "epoch": 2.17, "learning_rate": 4.0833523730210825e-05, "loss": 0.2658, "step": 3622500 }, { "epoch": 2.17, "learning_rate": 4.083142376465026e-05, "loss": 0.2635, "step": 3623000 }, { "epoch": 2.17, "learning_rate": 4.082932379908969e-05, "loss": 0.2712, "step": 3623500 }, { "epoch": 2.17, "learning_rate": 4.082722383352913e-05, "loss": 0.2668, "step": 3624000 }, { "epoch": 2.17, "learning_rate": 4.0825128067899685e-05, "loss": 0.2675, "step": 3624500 }, { "epoch": 2.17, "learning_rate": 4.082302810233912e-05, "loss": 0.2681, "step": 3625000 }, { "epoch": 2.17, "learning_rate": 4.082092813677855e-05, "loss": 0.261, "step": 3625500 }, { "epoch": 2.17, "learning_rate": 4.081882817121799e-05, "loss": 0.2663, "step": 3626000 }, { "epoch": 2.17, "learning_rate": 4.0816732405588546e-05, "loss": 0.2725, "step": 3626500 }, { "epoch": 2.17, "learning_rate": 4.081463244002798e-05, "loss": 0.2737, "step": 3627000 }, { "epoch": 2.17, "learning_rate": 4.081253247446742e-05, "loss": 0.2647, "step": 3627500 }, { "epoch": 2.18, "learning_rate": 4.081043250890685e-05, "loss": 0.2656, "step": 3628000 }, { "epoch": 2.18, "learning_rate": 4.080833674327741e-05, "loss": 0.2647, "step": 3628500 }, { "epoch": 2.18, "learning_rate": 4.080623677771684e-05, "loss": 0.2737, "step": 3629000 }, { "epoch": 2.18, "learning_rate": 4.080413681215628e-05, "loss": 0.2696, "step": 3629500 }, { "epoch": 2.18, "learning_rate": 4.0802036846595714e-05, "loss": 0.2751, "step": 3630000 }, { "epoch": 2.18, "learning_rate": 4.0799941080966274e-05, "loss": 0.2701, "step": 3630500 }, { "epoch": 2.18, "learning_rate": 4.07978411154057e-05, "loss": 0.2711, "step": 3631000 }, { "epoch": 2.18, "learning_rate": 4.079574114984514e-05, "loss": 0.2707, "step": 3631500 }, { "epoch": 2.18, "learning_rate": 4.0793641184284575e-05, "loss": 0.2702, "step": 3632000 }, { "epoch": 2.18, "learning_rate": 4.079154121872401e-05, "loss": 0.2659, "step": 3632500 }, { "epoch": 2.18, "learning_rate": 4.078944545309457e-05, "loss": 0.2692, "step": 3633000 }, { "epoch": 2.18, "learning_rate": 4.0787345487534e-05, "loss": 0.2695, "step": 3633500 }, { "epoch": 2.18, "learning_rate": 4.0785245521973436e-05, "loss": 0.2652, "step": 3634000 }, { "epoch": 2.18, "learning_rate": 4.0783145556412876e-05, "loss": 0.2681, "step": 3634500 }, { "epoch": 2.18, "learning_rate": 4.078104559085231e-05, "loss": 0.2663, "step": 3635000 }, { "epoch": 2.18, "learning_rate": 4.077894982522286e-05, "loss": 0.2711, "step": 3635500 }, { "epoch": 2.18, "learning_rate": 4.0776849859662296e-05, "loss": 0.2639, "step": 3636000 }, { "epoch": 2.18, "learning_rate": 4.0774749894101736e-05, "loss": 0.279, "step": 3636500 }, { "epoch": 2.18, "learning_rate": 4.077264992854117e-05, "loss": 0.269, "step": 3637000 }, { "epoch": 2.18, "learning_rate": 4.0770549962980603e-05, "loss": 0.2723, "step": 3637500 }, { "epoch": 2.18, "learning_rate": 4.076845419735116e-05, "loss": 0.265, "step": 3638000 }, { "epoch": 2.18, "learning_rate": 4.07663542317906e-05, "loss": 0.2671, "step": 3638500 }, { "epoch": 2.18, "learning_rate": 4.076425426623003e-05, "loss": 0.267, "step": 3639000 }, { "epoch": 2.18, "learning_rate": 4.0762154300669464e-05, "loss": 0.2677, "step": 3639500 }, { "epoch": 2.18, "learning_rate": 4.0760058535040024e-05, "loss": 0.2691, "step": 3640000 }, { "epoch": 2.18, "learning_rate": 4.075795856947946e-05, "loss": 0.2654, "step": 3640500 }, { "epoch": 2.18, "learning_rate": 4.075585860391889e-05, "loss": 0.2689, "step": 3641000 }, { "epoch": 2.18, "learning_rate": 4.075375863835833e-05, "loss": 0.2665, "step": 3641500 }, { "epoch": 2.18, "learning_rate": 4.075166287272889e-05, "loss": 0.2644, "step": 3642000 }, { "epoch": 2.18, "learning_rate": 4.0749562907168325e-05, "loss": 0.2755, "step": 3642500 }, { "epoch": 2.18, "learning_rate": 4.074746294160775e-05, "loss": 0.2749, "step": 3643000 }, { "epoch": 2.18, "learning_rate": 4.074536297604719e-05, "loss": 0.2732, "step": 3643500 }, { "epoch": 2.18, "learning_rate": 4.074326721041775e-05, "loss": 0.2705, "step": 3644000 }, { "epoch": 2.19, "learning_rate": 4.0741167244857186e-05, "loss": 0.2704, "step": 3644500 }, { "epoch": 2.19, "learning_rate": 4.073906727929662e-05, "loss": 0.2723, "step": 3645000 }, { "epoch": 2.19, "learning_rate": 4.073696731373605e-05, "loss": 0.2694, "step": 3645500 }, { "epoch": 2.19, "learning_rate": 4.0734867348175487e-05, "loss": 0.2675, "step": 3646000 }, { "epoch": 2.19, "learning_rate": 4.073276738261492e-05, "loss": 0.2707, "step": 3646500 }, { "epoch": 2.19, "learning_rate": 4.073066741705436e-05, "loss": 0.2684, "step": 3647000 }, { "epoch": 2.19, "learning_rate": 4.0728567451493794e-05, "loss": 0.2753, "step": 3647500 }, { "epoch": 2.19, "learning_rate": 4.072647168586435e-05, "loss": 0.2689, "step": 3648000 }, { "epoch": 2.19, "learning_rate": 4.072437172030379e-05, "loss": 0.2686, "step": 3648500 }, { "epoch": 2.19, "learning_rate": 4.072227175474322e-05, "loss": 0.272, "step": 3649000 }, { "epoch": 2.19, "learning_rate": 4.0720171789182654e-05, "loss": 0.2688, "step": 3649500 }, { "epoch": 2.19, "learning_rate": 4.071807602355321e-05, "loss": 0.2728, "step": 3650000 }, { "epoch": 2.19, "learning_rate": 4.071597605799265e-05, "loss": 0.2668, "step": 3650500 }, { "epoch": 2.19, "learning_rate": 4.071387609243208e-05, "loss": 0.266, "step": 3651000 }, { "epoch": 2.19, "learning_rate": 4.0711776126871515e-05, "loss": 0.2696, "step": 3651500 }, { "epoch": 2.19, "learning_rate": 4.0709684561173196e-05, "loss": 0.2718, "step": 3652000 }, { "epoch": 2.19, "learning_rate": 4.070758459561263e-05, "loss": 0.2652, "step": 3652500 }, { "epoch": 2.19, "learning_rate": 4.070548463005207e-05, "loss": 0.2726, "step": 3653000 }, { "epoch": 2.19, "learning_rate": 4.07033846644915e-05, "loss": 0.2692, "step": 3653500 }, { "epoch": 2.19, "learning_rate": 4.0701284698930936e-05, "loss": 0.2621, "step": 3654000 }, { "epoch": 2.19, "learning_rate": 4.0699184733370376e-05, "loss": 0.2678, "step": 3654500 }, { "epoch": 2.19, "learning_rate": 4.069708896774093e-05, "loss": 0.2758, "step": 3655000 }, { "epoch": 2.19, "learning_rate": 4.0694989002180364e-05, "loss": 0.2699, "step": 3655500 }, { "epoch": 2.19, "learning_rate": 4.0692889036619804e-05, "loss": 0.2608, "step": 3656000 }, { "epoch": 2.19, "learning_rate": 4.069078907105924e-05, "loss": 0.2671, "step": 3656500 }, { "epoch": 2.19, "learning_rate": 4.0688689105498664e-05, "loss": 0.2713, "step": 3657000 }, { "epoch": 2.19, "learning_rate": 4.0686589139938104e-05, "loss": 0.2649, "step": 3657500 }, { "epoch": 2.19, "learning_rate": 4.0684493374308664e-05, "loss": 0.2661, "step": 3658000 }, { "epoch": 2.19, "learning_rate": 4.06823934087481e-05, "loss": 0.2683, "step": 3658500 }, { "epoch": 2.19, "learning_rate": 4.068029344318753e-05, "loss": 0.2637, "step": 3659000 }, { "epoch": 2.19, "learning_rate": 4.0678193477626965e-05, "loss": 0.2747, "step": 3659500 }, { "epoch": 2.19, "learning_rate": 4.06760935120664e-05, "loss": 0.265, "step": 3660000 }, { "epoch": 2.19, "learning_rate": 4.067399354650583e-05, "loss": 0.272, "step": 3660500 }, { "epoch": 2.19, "learning_rate": 4.067189358094527e-05, "loss": 0.2731, "step": 3661000 }, { "epoch": 2.2, "learning_rate": 4.0669793615384705e-05, "loss": 0.2727, "step": 3661500 }, { "epoch": 2.2, "learning_rate": 4.066769784975526e-05, "loss": 0.2626, "step": 3662000 }, { "epoch": 2.2, "learning_rate": 4.06655978841947e-05, "loss": 0.2638, "step": 3662500 }, { "epoch": 2.2, "learning_rate": 4.066349791863413e-05, "loss": 0.2703, "step": 3663000 }, { "epoch": 2.2, "learning_rate": 4.0661397953073566e-05, "loss": 0.2696, "step": 3663500 }, { "epoch": 2.2, "learning_rate": 4.0659297987513006e-05, "loss": 0.2667, "step": 3664000 }, { "epoch": 2.2, "learning_rate": 4.065720222188356e-05, "loss": 0.2736, "step": 3664500 }, { "epoch": 2.2, "learning_rate": 4.0655102256322993e-05, "loss": 0.2736, "step": 3665000 }, { "epoch": 2.2, "learning_rate": 4.065300229076243e-05, "loss": 0.2716, "step": 3665500 }, { "epoch": 2.2, "learning_rate": 4.065090232520187e-05, "loss": 0.2731, "step": 3666000 }, { "epoch": 2.2, "learning_rate": 4.064880655957242e-05, "loss": 0.2675, "step": 3666500 }, { "epoch": 2.2, "learning_rate": 4.0646706594011854e-05, "loss": 0.2668, "step": 3667000 }, { "epoch": 2.2, "learning_rate": 4.064460662845129e-05, "loss": 0.2705, "step": 3667500 }, { "epoch": 2.2, "learning_rate": 4.064250666289073e-05, "loss": 0.2683, "step": 3668000 }, { "epoch": 2.2, "learning_rate": 4.064041089726129e-05, "loss": 0.2636, "step": 3668500 }, { "epoch": 2.2, "learning_rate": 4.0638310931700715e-05, "loss": 0.2716, "step": 3669000 }, { "epoch": 2.2, "learning_rate": 4.0636215166071275e-05, "loss": 0.2733, "step": 3669500 }, { "epoch": 2.2, "learning_rate": 4.0634115200510716e-05, "loss": 0.2744, "step": 3670000 }, { "epoch": 2.2, "learning_rate": 4.063201523495015e-05, "loss": 0.2655, "step": 3670500 }, { "epoch": 2.2, "learning_rate": 4.062991526938958e-05, "loss": 0.2715, "step": 3671000 }, { "epoch": 2.2, "learning_rate": 4.0627815303829016e-05, "loss": 0.2674, "step": 3671500 }, { "epoch": 2.2, "learning_rate": 4.062571533826845e-05, "loss": 0.2678, "step": 3672000 }, { "epoch": 2.2, "learning_rate": 4.062361537270788e-05, "loss": 0.2704, "step": 3672500 }, { "epoch": 2.2, "learning_rate": 4.062151540714732e-05, "loss": 0.2681, "step": 3673000 }, { "epoch": 2.2, "learning_rate": 4.0619419641517883e-05, "loss": 0.2646, "step": 3673500 }, { "epoch": 2.2, "learning_rate": 4.061732387588844e-05, "loss": 0.2686, "step": 3674000 }, { "epoch": 2.2, "learning_rate": 4.061522391032787e-05, "loss": 0.267, "step": 3674500 }, { "epoch": 2.2, "learning_rate": 4.0613123944767304e-05, "loss": 0.2718, "step": 3675000 }, { "epoch": 2.2, "learning_rate": 4.0611023979206744e-05, "loss": 0.2687, "step": 3675500 }, { "epoch": 2.2, "learning_rate": 4.06089282135773e-05, "loss": 0.2711, "step": 3676000 }, { "epoch": 2.2, "learning_rate": 4.060682824801673e-05, "loss": 0.2666, "step": 3676500 }, { "epoch": 2.2, "learning_rate": 4.060472828245617e-05, "loss": 0.2707, "step": 3677000 }, { "epoch": 2.2, "learning_rate": 4.0602628316895605e-05, "loss": 0.2687, "step": 3677500 }, { "epoch": 2.21, "learning_rate": 4.060052835133504e-05, "loss": 0.2625, "step": 3678000 }, { "epoch": 2.21, "learning_rate": 4.059842838577447e-05, "loss": 0.2734, "step": 3678500 }, { "epoch": 2.21, "learning_rate": 4.059633262014503e-05, "loss": 0.2716, "step": 3679000 }, { "epoch": 2.21, "learning_rate": 4.0594232654584466e-05, "loss": 0.2637, "step": 3679500 }, { "epoch": 2.21, "learning_rate": 4.05921326890239e-05, "loss": 0.2759, "step": 3680000 }, { "epoch": 2.21, "learning_rate": 4.059003272346334e-05, "loss": 0.267, "step": 3680500 }, { "epoch": 2.21, "learning_rate": 4.0587932757902766e-05, "loss": 0.2648, "step": 3681000 }, { "epoch": 2.21, "learning_rate": 4.05858327923422e-05, "loss": 0.2701, "step": 3681500 }, { "epoch": 2.21, "learning_rate": 4.058373282678164e-05, "loss": 0.2751, "step": 3682000 }, { "epoch": 2.21, "learning_rate": 4.058163286122107e-05, "loss": 0.2648, "step": 3682500 }, { "epoch": 2.21, "learning_rate": 4.057953289566051e-05, "loss": 0.2696, "step": 3683000 }, { "epoch": 2.21, "learning_rate": 4.057744132996219e-05, "loss": 0.2677, "step": 3683500 }, { "epoch": 2.21, "learning_rate": 4.057534136440163e-05, "loss": 0.2666, "step": 3684000 }, { "epoch": 2.21, "learning_rate": 4.057324139884106e-05, "loss": 0.2682, "step": 3684500 }, { "epoch": 2.21, "learning_rate": 4.0571141433280494e-05, "loss": 0.2712, "step": 3685000 }, { "epoch": 2.21, "learning_rate": 4.056904566765105e-05, "loss": 0.266, "step": 3685500 }, { "epoch": 2.21, "learning_rate": 4.056694570209049e-05, "loss": 0.2619, "step": 3686000 }, { "epoch": 2.21, "learning_rate": 4.056484573652992e-05, "loss": 0.2624, "step": 3686500 }, { "epoch": 2.21, "learning_rate": 4.0562745770969355e-05, "loss": 0.2693, "step": 3687000 }, { "epoch": 2.21, "learning_rate": 4.0560645805408795e-05, "loss": 0.2601, "step": 3687500 }, { "epoch": 2.21, "learning_rate": 4.055854583984822e-05, "loss": 0.2705, "step": 3688000 }, { "epoch": 2.21, "learning_rate": 4.0556445874287655e-05, "loss": 0.2657, "step": 3688500 }, { "epoch": 2.21, "learning_rate": 4.0554345908727096e-05, "loss": 0.2714, "step": 3689000 }, { "epoch": 2.21, "learning_rate": 4.055224594316653e-05, "loss": 0.2755, "step": 3689500 }, { "epoch": 2.21, "learning_rate": 4.055014597760597e-05, "loss": 0.2728, "step": 3690000 }, { "epoch": 2.21, "learning_rate": 4.05480460120454e-05, "loss": 0.2719, "step": 3690500 }, { "epoch": 2.21, "learning_rate": 4.0545946046484836e-05, "loss": 0.2745, "step": 3691000 }, { "epoch": 2.21, "learning_rate": 4.054385028085539e-05, "loss": 0.2621, "step": 3691500 }, { "epoch": 2.21, "learning_rate": 4.054175031529483e-05, "loss": 0.2691, "step": 3692000 }, { "epoch": 2.21, "learning_rate": 4.0539650349734263e-05, "loss": 0.2645, "step": 3692500 }, { "epoch": 2.21, "learning_rate": 4.05375503841737e-05, "loss": 0.2698, "step": 3693000 }, { "epoch": 2.21, "learning_rate": 4.053545461854425e-05, "loss": 0.2664, "step": 3693500 }, { "epoch": 2.21, "learning_rate": 4.053335465298369e-05, "loss": 0.2685, "step": 3694000 }, { "epoch": 2.22, "learning_rate": 4.0531254687423124e-05, "loss": 0.268, "step": 3694500 }, { "epoch": 2.22, "learning_rate": 4.052915472186256e-05, "loss": 0.2699, "step": 3695000 }, { "epoch": 2.22, "learning_rate": 4.052705895623311e-05, "loss": 0.2689, "step": 3695500 }, { "epoch": 2.22, "learning_rate": 4.052495899067255e-05, "loss": 0.2639, "step": 3696000 }, { "epoch": 2.22, "learning_rate": 4.052286322504311e-05, "loss": 0.2663, "step": 3696500 }, { "epoch": 2.22, "learning_rate": 4.0520767459413665e-05, "loss": 0.2635, "step": 3697000 }, { "epoch": 2.22, "learning_rate": 4.05186674938531e-05, "loss": 0.2698, "step": 3697500 }, { "epoch": 2.22, "learning_rate": 4.051656752829254e-05, "loss": 0.2687, "step": 3698000 }, { "epoch": 2.22, "learning_rate": 4.051446756273197e-05, "loss": 0.2751, "step": 3698500 }, { "epoch": 2.22, "learning_rate": 4.0512367597171406e-05, "loss": 0.2667, "step": 3699000 }, { "epoch": 2.22, "learning_rate": 4.0510267631610846e-05, "loss": 0.2719, "step": 3699500 }, { "epoch": 2.22, "learning_rate": 4.050816766605027e-05, "loss": 0.2667, "step": 3700000 }, { "epoch": 2.22, "eval_loss": 0.24800211191177368, "eval_runtime": 1464.5788, "eval_samples_per_second": 359.639, "eval_steps_per_second": 59.94, "step": 3700000 }, { "epoch": 2.22, "learning_rate": 4.0506067700489706e-05, "loss": 0.2725, "step": 3700500 }, { "epoch": 2.22, "learning_rate": 4.0503967734929147e-05, "loss": 0.2703, "step": 3701000 }, { "epoch": 2.22, "learning_rate": 4.050186776936858e-05, "loss": 0.2645, "step": 3701500 }, { "epoch": 2.22, "learning_rate": 4.0499767803808013e-05, "loss": 0.2728, "step": 3702000 }, { "epoch": 2.22, "learning_rate": 4.0497667838247454e-05, "loss": 0.2712, "step": 3702500 }, { "epoch": 2.22, "learning_rate": 4.049557207261801e-05, "loss": 0.2701, "step": 3703000 }, { "epoch": 2.22, "learning_rate": 4.049347210705744e-05, "loss": 0.273, "step": 3703500 }, { "epoch": 2.22, "learning_rate": 4.049137214149688e-05, "loss": 0.2689, "step": 3704000 }, { "epoch": 2.22, "learning_rate": 4.0489272175936314e-05, "loss": 0.2691, "step": 3704500 }, { "epoch": 2.22, "learning_rate": 4.048717641030687e-05, "loss": 0.2666, "step": 3705000 }, { "epoch": 2.22, "learning_rate": 4.04850764447463e-05, "loss": 0.2694, "step": 3705500 }, { "epoch": 2.22, "learning_rate": 4.048297647918574e-05, "loss": 0.2657, "step": 3706000 }, { "epoch": 2.22, "learning_rate": 4.0480876513625175e-05, "loss": 0.2691, "step": 3706500 }, { "epoch": 2.22, "learning_rate": 4.047877654806461e-05, "loss": 0.2647, "step": 3707000 }, { "epoch": 2.22, "learning_rate": 4.047668078243516e-05, "loss": 0.2717, "step": 3707500 }, { "epoch": 2.22, "learning_rate": 4.04745808168746e-05, "loss": 0.269, "step": 3708000 }, { "epoch": 2.22, "learning_rate": 4.0472480851314036e-05, "loss": 0.2668, "step": 3708500 }, { "epoch": 2.22, "learning_rate": 4.047038088575347e-05, "loss": 0.2739, "step": 3709000 }, { "epoch": 2.22, "learning_rate": 4.046828512012403e-05, "loss": 0.2685, "step": 3709500 }, { "epoch": 2.22, "learning_rate": 4.046618515456346e-05, "loss": 0.2678, "step": 3710000 }, { "epoch": 2.22, "learning_rate": 4.0464093588865144e-05, "loss": 0.2718, "step": 3710500 }, { "epoch": 2.22, "learning_rate": 4.046199362330458e-05, "loss": 0.2696, "step": 3711000 }, { "epoch": 2.23, "learning_rate": 4.045989365774401e-05, "loss": 0.27, "step": 3711500 }, { "epoch": 2.23, "learning_rate": 4.045779369218345e-05, "loss": 0.2696, "step": 3712000 }, { "epoch": 2.23, "learning_rate": 4.0455693726622884e-05, "loss": 0.2702, "step": 3712500 }, { "epoch": 2.23, "learning_rate": 4.045359376106232e-05, "loss": 0.2695, "step": 3713000 }, { "epoch": 2.23, "learning_rate": 4.045149379550176e-05, "loss": 0.2651, "step": 3713500 }, { "epoch": 2.23, "learning_rate": 4.044939382994119e-05, "loss": 0.2697, "step": 3714000 }, { "epoch": 2.23, "learning_rate": 4.044729386438062e-05, "loss": 0.2651, "step": 3714500 }, { "epoch": 2.23, "learning_rate": 4.044519389882006e-05, "loss": 0.2659, "step": 3715000 }, { "epoch": 2.23, "learning_rate": 4.044309393325949e-05, "loss": 0.2691, "step": 3715500 }, { "epoch": 2.23, "learning_rate": 4.0440993967698925e-05, "loss": 0.2662, "step": 3716000 }, { "epoch": 2.23, "learning_rate": 4.0438898202069486e-05, "loss": 0.2704, "step": 3716500 }, { "epoch": 2.23, "learning_rate": 4.043679823650892e-05, "loss": 0.2684, "step": 3717000 }, { "epoch": 2.23, "learning_rate": 4.043469827094835e-05, "loss": 0.272, "step": 3717500 }, { "epoch": 2.23, "learning_rate": 4.043259830538779e-05, "loss": 0.2718, "step": 3718000 }, { "epoch": 2.23, "learning_rate": 4.043050253975835e-05, "loss": 0.2681, "step": 3718500 }, { "epoch": 2.23, "learning_rate": 4.042840257419778e-05, "loss": 0.275, "step": 3719000 }, { "epoch": 2.23, "learning_rate": 4.042630260863721e-05, "loss": 0.2695, "step": 3719500 }, { "epoch": 2.23, "learning_rate": 4.0424202643076653e-05, "loss": 0.2743, "step": 3720000 }, { "epoch": 2.23, "learning_rate": 4.042210267751609e-05, "loss": 0.2624, "step": 3720500 }, { "epoch": 2.23, "learning_rate": 4.042001111181777e-05, "loss": 0.2695, "step": 3721000 }, { "epoch": 2.23, "learning_rate": 4.04179111462572e-05, "loss": 0.2701, "step": 3721500 }, { "epoch": 2.23, "learning_rate": 4.0415811180696634e-05, "loss": 0.2699, "step": 3722000 }, { "epoch": 2.23, "learning_rate": 4.0413711215136075e-05, "loss": 0.2702, "step": 3722500 }, { "epoch": 2.23, "learning_rate": 4.041161124957551e-05, "loss": 0.2714, "step": 3723000 }, { "epoch": 2.23, "learning_rate": 4.040951128401494e-05, "loss": 0.2682, "step": 3723500 }, { "epoch": 2.23, "learning_rate": 4.0407411318454375e-05, "loss": 0.2715, "step": 3724000 }, { "epoch": 2.23, "learning_rate": 4.040531135289381e-05, "loss": 0.2728, "step": 3724500 }, { "epoch": 2.23, "learning_rate": 4.040321558726437e-05, "loss": 0.2729, "step": 3725000 }, { "epoch": 2.23, "learning_rate": 4.040111982163492e-05, "loss": 0.2709, "step": 3725500 }, { "epoch": 2.23, "learning_rate": 4.039901985607436e-05, "loss": 0.2684, "step": 3726000 }, { "epoch": 2.23, "learning_rate": 4.0396919890513796e-05, "loss": 0.273, "step": 3726500 }, { "epoch": 2.23, "learning_rate": 4.039481992495323e-05, "loss": 0.2743, "step": 3727000 }, { "epoch": 2.23, "learning_rate": 4.039271995939267e-05, "loss": 0.2657, "step": 3727500 }, { "epoch": 2.24, "learning_rate": 4.03906199938321e-05, "loss": 0.2733, "step": 3728000 }, { "epoch": 2.24, "learning_rate": 4.038852002827153e-05, "loss": 0.2664, "step": 3728500 }, { "epoch": 2.24, "learning_rate": 4.038642426264209e-05, "loss": 0.2654, "step": 3729000 }, { "epoch": 2.24, "learning_rate": 4.038432429708153e-05, "loss": 0.2701, "step": 3729500 }, { "epoch": 2.24, "learning_rate": 4.0382224331520964e-05, "loss": 0.2669, "step": 3730000 }, { "epoch": 2.24, "learning_rate": 4.03801243659604e-05, "loss": 0.27, "step": 3730500 }, { "epoch": 2.24, "learning_rate": 4.037802440039983e-05, "loss": 0.2701, "step": 3731000 }, { "epoch": 2.24, "learning_rate": 4.0375924434839264e-05, "loss": 0.2617, "step": 3731500 }, { "epoch": 2.24, "learning_rate": 4.0373824469278704e-05, "loss": 0.2679, "step": 3732000 }, { "epoch": 2.24, "learning_rate": 4.037172450371814e-05, "loss": 0.2669, "step": 3732500 }, { "epoch": 2.24, "learning_rate": 4.036962453815757e-05, "loss": 0.2692, "step": 3733000 }, { "epoch": 2.24, "learning_rate": 4.0367528772528125e-05, "loss": 0.2734, "step": 3733500 }, { "epoch": 2.24, "learning_rate": 4.0365428806967565e-05, "loss": 0.261, "step": 3734000 }, { "epoch": 2.24, "learning_rate": 4.0363328841407e-05, "loss": 0.264, "step": 3734500 }, { "epoch": 2.24, "learning_rate": 4.036122887584643e-05, "loss": 0.2679, "step": 3735000 }, { "epoch": 2.24, "learning_rate": 4.035913311021699e-05, "loss": 0.2706, "step": 3735500 }, { "epoch": 2.24, "learning_rate": 4.0357033144656426e-05, "loss": 0.2707, "step": 3736000 }, { "epoch": 2.24, "learning_rate": 4.0354941578958106e-05, "loss": 0.2709, "step": 3736500 }, { "epoch": 2.24, "learning_rate": 4.035284161339754e-05, "loss": 0.2664, "step": 3737000 }, { "epoch": 2.24, "learning_rate": 4.035074164783697e-05, "loss": 0.2701, "step": 3737500 }, { "epoch": 2.24, "learning_rate": 4.0348641682276414e-05, "loss": 0.2653, "step": 3738000 }, { "epoch": 2.24, "learning_rate": 4.034654171671585e-05, "loss": 0.271, "step": 3738500 }, { "epoch": 2.24, "learning_rate": 4.03444459510864e-05, "loss": 0.2692, "step": 3739000 }, { "epoch": 2.24, "learning_rate": 4.0342345985525834e-05, "loss": 0.2691, "step": 3739500 }, { "epoch": 2.24, "learning_rate": 4.0340246019965274e-05, "loss": 0.2623, "step": 3740000 }, { "epoch": 2.24, "learning_rate": 4.033814605440471e-05, "loss": 0.264, "step": 3740500 }, { "epoch": 2.24, "learning_rate": 4.033604608884414e-05, "loss": 0.2713, "step": 3741000 }, { "epoch": 2.24, "learning_rate": 4.033394612328358e-05, "loss": 0.2715, "step": 3741500 }, { "epoch": 2.24, "learning_rate": 4.0331846157723015e-05, "loss": 0.2726, "step": 3742000 }, { "epoch": 2.24, "learning_rate": 4.032974619216245e-05, "loss": 0.2674, "step": 3742500 }, { "epoch": 2.24, "learning_rate": 4.032764622660188e-05, "loss": 0.2659, "step": 3743000 }, { "epoch": 2.24, "learning_rate": 4.0325546261041315e-05, "loss": 0.2676, "step": 3743500 }, { "epoch": 2.24, "learning_rate": 4.032344629548075e-05, "loss": 0.2681, "step": 3744000 }, { "epoch": 2.24, "learning_rate": 4.032135052985131e-05, "loss": 0.2751, "step": 3744500 }, { "epoch": 2.25, "learning_rate": 4.031925056429075e-05, "loss": 0.2652, "step": 3745000 }, { "epoch": 2.25, "learning_rate": 4.0317150598730176e-05, "loss": 0.2672, "step": 3745500 }, { "epoch": 2.25, "learning_rate": 4.0315054833100736e-05, "loss": 0.2623, "step": 3746000 }, { "epoch": 2.25, "learning_rate": 4.0312954867540177e-05, "loss": 0.2681, "step": 3746500 }, { "epoch": 2.25, "learning_rate": 4.031085490197961e-05, "loss": 0.2705, "step": 3747000 }, { "epoch": 2.25, "learning_rate": 4.0308754936419044e-05, "loss": 0.2647, "step": 3747500 }, { "epoch": 2.25, "learning_rate": 4.030665497085848e-05, "loss": 0.2638, "step": 3748000 }, { "epoch": 2.25, "learning_rate": 4.030455500529791e-05, "loss": 0.2668, "step": 3748500 }, { "epoch": 2.25, "learning_rate": 4.0302455039737344e-05, "loss": 0.2662, "step": 3749000 }, { "epoch": 2.25, "learning_rate": 4.0300355074176784e-05, "loss": 0.2688, "step": 3749500 }, { "epoch": 2.25, "learning_rate": 4.029825510861622e-05, "loss": 0.2711, "step": 3750000 }, { "epoch": 2.25, "learning_rate": 4.029615514305565e-05, "loss": 0.2669, "step": 3750500 }, { "epoch": 2.25, "learning_rate": 4.029405517749509e-05, "loss": 0.2653, "step": 3751000 }, { "epoch": 2.25, "learning_rate": 4.0291955211934525e-05, "loss": 0.271, "step": 3751500 }, { "epoch": 2.25, "learning_rate": 4.028985944630508e-05, "loss": 0.2649, "step": 3752000 }, { "epoch": 2.25, "learning_rate": 4.028775948074451e-05, "loss": 0.2648, "step": 3752500 }, { "epoch": 2.25, "learning_rate": 4.028565951518395e-05, "loss": 0.2772, "step": 3753000 }, { "epoch": 2.25, "learning_rate": 4.0283559549623385e-05, "loss": 0.2693, "step": 3753500 }, { "epoch": 2.25, "learning_rate": 4.028145958406282e-05, "loss": 0.2635, "step": 3754000 }, { "epoch": 2.25, "learning_rate": 4.027935961850226e-05, "loss": 0.2691, "step": 3754500 }, { "epoch": 2.25, "learning_rate": 4.027725965294169e-05, "loss": 0.2719, "step": 3755000 }, { "epoch": 2.25, "learning_rate": 4.0275159687381126e-05, "loss": 0.263, "step": 3755500 }, { "epoch": 2.25, "learning_rate": 4.0273063921751686e-05, "loss": 0.2681, "step": 3756000 }, { "epoch": 2.25, "learning_rate": 4.027096395619112e-05, "loss": 0.265, "step": 3756500 }, { "epoch": 2.25, "learning_rate": 4.026886399063055e-05, "loss": 0.2691, "step": 3757000 }, { "epoch": 2.25, "learning_rate": 4.0266764025069994e-05, "loss": 0.2696, "step": 3757500 }, { "epoch": 2.25, "learning_rate": 4.026466825944055e-05, "loss": 0.2727, "step": 3758000 }, { "epoch": 2.25, "learning_rate": 4.02625724938111e-05, "loss": 0.266, "step": 3758500 }, { "epoch": 2.25, "learning_rate": 4.0260472528250534e-05, "loss": 0.2739, "step": 3759000 }, { "epoch": 2.25, "learning_rate": 4.025837256268997e-05, "loss": 0.2624, "step": 3759500 }, { "epoch": 2.25, "learning_rate": 4.025627259712941e-05, "loss": 0.2689, "step": 3760000 }, { "epoch": 2.25, "learning_rate": 4.025417263156884e-05, "loss": 0.2598, "step": 3760500 }, { "epoch": 2.25, "learning_rate": 4.025207266600828e-05, "loss": 0.2713, "step": 3761000 }, { "epoch": 2.26, "learning_rate": 4.0249972700447715e-05, "loss": 0.2694, "step": 3761500 }, { "epoch": 2.26, "learning_rate": 4.024787273488715e-05, "loss": 0.265, "step": 3762000 }, { "epoch": 2.26, "learning_rate": 4.02457769692577e-05, "loss": 0.2645, "step": 3762500 }, { "epoch": 2.26, "learning_rate": 4.024367700369714e-05, "loss": 0.2674, "step": 3763000 }, { "epoch": 2.26, "learning_rate": 4.0241577038136576e-05, "loss": 0.2754, "step": 3763500 }, { "epoch": 2.26, "learning_rate": 4.023947707257601e-05, "loss": 0.2728, "step": 3764000 }, { "epoch": 2.26, "learning_rate": 4.023738130694656e-05, "loss": 0.2676, "step": 3764500 }, { "epoch": 2.26, "learning_rate": 4.0235281341386e-05, "loss": 0.2682, "step": 3765000 }, { "epoch": 2.26, "learning_rate": 4.0233181375825436e-05, "loss": 0.2643, "step": 3765500 }, { "epoch": 2.26, "learning_rate": 4.023108141026487e-05, "loss": 0.2624, "step": 3766000 }, { "epoch": 2.26, "learning_rate": 4.022898144470431e-05, "loss": 0.2745, "step": 3766500 }, { "epoch": 2.26, "learning_rate": 4.0226885679074864e-05, "loss": 0.2754, "step": 3767000 }, { "epoch": 2.26, "learning_rate": 4.02247857135143e-05, "loss": 0.2727, "step": 3767500 }, { "epoch": 2.26, "learning_rate": 4.022268574795374e-05, "loss": 0.2695, "step": 3768000 }, { "epoch": 2.26, "learning_rate": 4.022058578239317e-05, "loss": 0.2708, "step": 3768500 }, { "epoch": 2.26, "learning_rate": 4.0218490016763724e-05, "loss": 0.2643, "step": 3769000 }, { "epoch": 2.26, "learning_rate": 4.021639005120316e-05, "loss": 0.2694, "step": 3769500 }, { "epoch": 2.26, "learning_rate": 4.02142900856426e-05, "loss": 0.2715, "step": 3770000 }, { "epoch": 2.26, "learning_rate": 4.021219012008203e-05, "loss": 0.2691, "step": 3770500 }, { "epoch": 2.26, "learning_rate": 4.0210094354452585e-05, "loss": 0.2662, "step": 3771000 }, { "epoch": 2.26, "learning_rate": 4.020799438889202e-05, "loss": 0.272, "step": 3771500 }, { "epoch": 2.26, "learning_rate": 4.020589442333146e-05, "loss": 0.2608, "step": 3772000 }, { "epoch": 2.26, "learning_rate": 4.020379445777089e-05, "loss": 0.2696, "step": 3772500 }, { "epoch": 2.26, "learning_rate": 4.0201698692141446e-05, "loss": 0.27, "step": 3773000 }, { "epoch": 2.26, "learning_rate": 4.0199598726580886e-05, "loss": 0.2699, "step": 3773500 }, { "epoch": 2.26, "learning_rate": 4.019749876102032e-05, "loss": 0.2668, "step": 3774000 }, { "epoch": 2.26, "learning_rate": 4.019539879545975e-05, "loss": 0.2707, "step": 3774500 }, { "epoch": 2.26, "learning_rate": 4.019330302983031e-05, "loss": 0.2678, "step": 3775000 }, { "epoch": 2.26, "learning_rate": 4.019120306426975e-05, "loss": 0.2615, "step": 3775500 }, { "epoch": 2.26, "learning_rate": 4.018910309870918e-05, "loss": 0.2617, "step": 3776000 }, { "epoch": 2.26, "learning_rate": 4.0187003133148614e-05, "loss": 0.27, "step": 3776500 }, { "epoch": 2.26, "learning_rate": 4.0184903167588054e-05, "loss": 0.2702, "step": 3777000 }, { "epoch": 2.26, "learning_rate": 4.018280740195861e-05, "loss": 0.2659, "step": 3777500 }, { "epoch": 2.27, "learning_rate": 4.018070743639804e-05, "loss": 0.2672, "step": 3778000 }, { "epoch": 2.27, "learning_rate": 4.0178607470837475e-05, "loss": 0.2729, "step": 3778500 }, { "epoch": 2.27, "learning_rate": 4.017651170520803e-05, "loss": 0.263, "step": 3779000 }, { "epoch": 2.27, "learning_rate": 4.017441173964747e-05, "loss": 0.2634, "step": 3779500 }, { "epoch": 2.27, "learning_rate": 4.01723117740869e-05, "loss": 0.2676, "step": 3780000 }, { "epoch": 2.27, "learning_rate": 4.017021180852634e-05, "loss": 0.2701, "step": 3780500 }, { "epoch": 2.27, "learning_rate": 4.0168111842965776e-05, "loss": 0.2643, "step": 3781000 }, { "epoch": 2.27, "learning_rate": 4.016601187740521e-05, "loss": 0.2744, "step": 3781500 }, { "epoch": 2.27, "learning_rate": 4.016391191184465e-05, "loss": 0.2697, "step": 3782000 }, { "epoch": 2.27, "learning_rate": 4.016181194628408e-05, "loss": 0.2648, "step": 3782500 }, { "epoch": 2.27, "learning_rate": 4.0159716180654636e-05, "loss": 0.2724, "step": 3783000 }, { "epoch": 2.27, "learning_rate": 4.015761621509407e-05, "loss": 0.2668, "step": 3783500 }, { "epoch": 2.27, "learning_rate": 4.015551624953351e-05, "loss": 0.2654, "step": 3784000 }, { "epoch": 2.27, "learning_rate": 4.0153416283972943e-05, "loss": 0.2658, "step": 3784500 }, { "epoch": 2.27, "learning_rate": 4.01513205183435e-05, "loss": 0.2693, "step": 3785000 }, { "epoch": 2.27, "learning_rate": 4.014922055278293e-05, "loss": 0.266, "step": 3785500 }, { "epoch": 2.27, "learning_rate": 4.014712058722237e-05, "loss": 0.2628, "step": 3786000 }, { "epoch": 2.27, "learning_rate": 4.0145020621661804e-05, "loss": 0.2677, "step": 3786500 }, { "epoch": 2.27, "learning_rate": 4.014292485603236e-05, "loss": 0.2632, "step": 3787000 }, { "epoch": 2.27, "learning_rate": 4.01408248904718e-05, "loss": 0.2649, "step": 3787500 }, { "epoch": 2.27, "learning_rate": 4.013872492491123e-05, "loss": 0.2757, "step": 3788000 }, { "epoch": 2.27, "learning_rate": 4.0136624959350665e-05, "loss": 0.2706, "step": 3788500 }, { "epoch": 2.27, "learning_rate": 4.0134524993790105e-05, "loss": 0.2702, "step": 3789000 }, { "epoch": 2.27, "learning_rate": 4.013242922816066e-05, "loss": 0.267, "step": 3789500 }, { "epoch": 2.27, "learning_rate": 4.013032926260009e-05, "loss": 0.2632, "step": 3790000 }, { "epoch": 2.27, "learning_rate": 4.0128229297039526e-05, "loss": 0.2672, "step": 3790500 }, { "epoch": 2.27, "learning_rate": 4.0126129331478966e-05, "loss": 0.2666, "step": 3791000 }, { "epoch": 2.27, "learning_rate": 4.012403356584952e-05, "loss": 0.271, "step": 3791500 }, { "epoch": 2.27, "learning_rate": 4.012193360028895e-05, "loss": 0.2687, "step": 3792000 }, { "epoch": 2.27, "learning_rate": 4.0119833634728386e-05, "loss": 0.2688, "step": 3792500 }, { "epoch": 2.27, "learning_rate": 4.0117733669167827e-05, "loss": 0.2652, "step": 3793000 }, { "epoch": 2.27, "learning_rate": 4.011563790353838e-05, "loss": 0.2728, "step": 3793500 }, { "epoch": 2.27, "learning_rate": 4.0113537937977814e-05, "loss": 0.2676, "step": 3794000 }, { "epoch": 2.27, "learning_rate": 4.0111437972417254e-05, "loss": 0.2673, "step": 3794500 }, { "epoch": 2.28, "learning_rate": 4.010933800685669e-05, "loss": 0.267, "step": 3795000 }, { "epoch": 2.28, "learning_rate": 4.010724224122724e-05, "loss": 0.2679, "step": 3795500 }, { "epoch": 2.28, "learning_rate": 4.01051464755978e-05, "loss": 0.2687, "step": 3796000 }, { "epoch": 2.28, "learning_rate": 4.0103046510037235e-05, "loss": 0.2713, "step": 3796500 }, { "epoch": 2.28, "learning_rate": 4.010095074440779e-05, "loss": 0.274, "step": 3797000 }, { "epoch": 2.28, "learning_rate": 4.009885077884723e-05, "loss": 0.2689, "step": 3797500 }, { "epoch": 2.28, "learning_rate": 4.009675081328666e-05, "loss": 0.2696, "step": 3798000 }, { "epoch": 2.28, "learning_rate": 4.0094650847726095e-05, "loss": 0.262, "step": 3798500 }, { "epoch": 2.28, "learning_rate": 4.0092550882165536e-05, "loss": 0.2671, "step": 3799000 }, { "epoch": 2.28, "learning_rate": 4.009045091660497e-05, "loss": 0.2703, "step": 3799500 }, { "epoch": 2.28, "learning_rate": 4.00883509510444e-05, "loss": 0.2676, "step": 3800000 }, { "epoch": 2.28, "eval_loss": 0.24739134311676025, "eval_runtime": 1489.6136, "eval_samples_per_second": 353.595, "eval_steps_per_second": 58.933, "step": 3800000 }, { "epoch": 2.28, "learning_rate": 4.0086250985483836e-05, "loss": 0.2687, "step": 3800500 }, { "epoch": 2.28, "learning_rate": 4.0084155219854396e-05, "loss": 0.2761, "step": 3801000 }, { "epoch": 2.28, "learning_rate": 4.008205525429383e-05, "loss": 0.2719, "step": 3801500 }, { "epoch": 2.28, "learning_rate": 4.007995528873327e-05, "loss": 0.2688, "step": 3802000 }, { "epoch": 2.28, "learning_rate": 4.00778553231727e-05, "loss": 0.2712, "step": 3802500 }, { "epoch": 2.28, "learning_rate": 4.007575535761213e-05, "loss": 0.2684, "step": 3803000 }, { "epoch": 2.28, "learning_rate": 4.007365539205157e-05, "loss": 0.2714, "step": 3803500 }, { "epoch": 2.28, "learning_rate": 4.0071555426491004e-05, "loss": 0.27, "step": 3804000 }, { "epoch": 2.28, "learning_rate": 4.006945546093044e-05, "loss": 0.2738, "step": 3804500 }, { "epoch": 2.28, "learning_rate": 4.006735969530099e-05, "loss": 0.2639, "step": 3805000 }, { "epoch": 2.28, "learning_rate": 4.006525972974043e-05, "loss": 0.2726, "step": 3805500 }, { "epoch": 2.28, "learning_rate": 4.0063159764179865e-05, "loss": 0.2719, "step": 3806000 }, { "epoch": 2.28, "learning_rate": 4.00610597986193e-05, "loss": 0.2676, "step": 3806500 }, { "epoch": 2.28, "learning_rate": 4.005895983305874e-05, "loss": 0.2678, "step": 3807000 }, { "epoch": 2.28, "learning_rate": 4.005686406742929e-05, "loss": 0.2705, "step": 3807500 }, { "epoch": 2.28, "learning_rate": 4.0054764101868725e-05, "loss": 0.2634, "step": 3808000 }, { "epoch": 2.28, "learning_rate": 4.0052664136308166e-05, "loss": 0.2703, "step": 3808500 }, { "epoch": 2.28, "learning_rate": 4.00505641707476e-05, "loss": 0.2721, "step": 3809000 }, { "epoch": 2.28, "learning_rate": 4.004846840511815e-05, "loss": 0.2703, "step": 3809500 }, { "epoch": 2.28, "learning_rate": 4.0046368439557586e-05, "loss": 0.268, "step": 3810000 }, { "epoch": 2.28, "learning_rate": 4.0044268473997026e-05, "loss": 0.2671, "step": 3810500 }, { "epoch": 2.28, "learning_rate": 4.004216850843646e-05, "loss": 0.2666, "step": 3811000 }, { "epoch": 2.29, "learning_rate": 4.004007274280702e-05, "loss": 0.2736, "step": 3811500 }, { "epoch": 2.29, "learning_rate": 4.003797277724645e-05, "loss": 0.2652, "step": 3812000 }, { "epoch": 2.29, "learning_rate": 4.003587281168589e-05, "loss": 0.2698, "step": 3812500 }, { "epoch": 2.29, "learning_rate": 4.003377284612532e-05, "loss": 0.2755, "step": 3813000 }, { "epoch": 2.29, "learning_rate": 4.003167708049588e-05, "loss": 0.2675, "step": 3813500 }, { "epoch": 2.29, "learning_rate": 4.0029577114935314e-05, "loss": 0.2646, "step": 3814000 }, { "epoch": 2.29, "learning_rate": 4.0027481349305875e-05, "loss": 0.2729, "step": 3814500 }, { "epoch": 2.29, "learning_rate": 4.002538138374531e-05, "loss": 0.2653, "step": 3815000 }, { "epoch": 2.29, "learning_rate": 4.002328141818474e-05, "loss": 0.2652, "step": 3815500 }, { "epoch": 2.29, "learning_rate": 4.002118145262418e-05, "loss": 0.271, "step": 3816000 }, { "epoch": 2.29, "learning_rate": 4.0019081487063615e-05, "loss": 0.2676, "step": 3816500 }, { "epoch": 2.29, "learning_rate": 4.001698152150304e-05, "loss": 0.2728, "step": 3817000 }, { "epoch": 2.29, "learning_rate": 4.001488155594248e-05, "loss": 0.2658, "step": 3817500 }, { "epoch": 2.29, "learning_rate": 4.0012781590381916e-05, "loss": 0.2656, "step": 3818000 }, { "epoch": 2.29, "learning_rate": 4.0010685824752476e-05, "loss": 0.2681, "step": 3818500 }, { "epoch": 2.29, "learning_rate": 4.000858585919191e-05, "loss": 0.2702, "step": 3819000 }, { "epoch": 2.29, "learning_rate": 4.000648589363134e-05, "loss": 0.2668, "step": 3819500 }, { "epoch": 2.29, "learning_rate": 4.0004385928070776e-05, "loss": 0.2675, "step": 3820000 }, { "epoch": 2.29, "learning_rate": 4.000228596251021e-05, "loss": 0.2668, "step": 3820500 }, { "epoch": 2.29, "learning_rate": 4.000019019688077e-05, "loss": 0.2627, "step": 3821000 }, { "epoch": 2.29, "learning_rate": 3.9998090231320204e-05, "loss": 0.2659, "step": 3821500 }, { "epoch": 2.29, "learning_rate": 3.999599026575964e-05, "loss": 0.2691, "step": 3822000 }, { "epoch": 2.29, "learning_rate": 3.999389030019908e-05, "loss": 0.2645, "step": 3822500 }, { "epoch": 2.29, "learning_rate": 3.999179453456964e-05, "loss": 0.2715, "step": 3823000 }, { "epoch": 2.29, "learning_rate": 3.998969456900907e-05, "loss": 0.2672, "step": 3823500 }, { "epoch": 2.29, "learning_rate": 3.99875946034485e-05, "loss": 0.2715, "step": 3824000 }, { "epoch": 2.29, "learning_rate": 3.998549463788794e-05, "loss": 0.2692, "step": 3824500 }, { "epoch": 2.29, "learning_rate": 3.99833988722585e-05, "loss": 0.2645, "step": 3825000 }, { "epoch": 2.29, "learning_rate": 3.998129890669793e-05, "loss": 0.2726, "step": 3825500 }, { "epoch": 2.29, "learning_rate": 3.9979198941137365e-05, "loss": 0.2687, "step": 3826000 }, { "epoch": 2.29, "learning_rate": 3.99770989755768e-05, "loss": 0.2666, "step": 3826500 }, { "epoch": 2.29, "learning_rate": 3.997499901001623e-05, "loss": 0.2673, "step": 3827000 }, { "epoch": 2.29, "learning_rate": 3.9972899044455666e-05, "loss": 0.2643, "step": 3827500 }, { "epoch": 2.3, "learning_rate": 3.9970799078895106e-05, "loss": 0.2738, "step": 3828000 }, { "epoch": 2.3, "learning_rate": 3.996869911333454e-05, "loss": 0.2668, "step": 3828500 }, { "epoch": 2.3, "learning_rate": 3.996660334770509e-05, "loss": 0.266, "step": 3829000 }, { "epoch": 2.3, "learning_rate": 3.996450338214453e-05, "loss": 0.2693, "step": 3829500 }, { "epoch": 2.3, "learning_rate": 3.996240341658397e-05, "loss": 0.2666, "step": 3830000 }, { "epoch": 2.3, "learning_rate": 3.99603034510234e-05, "loss": 0.2668, "step": 3830500 }, { "epoch": 2.3, "learning_rate": 3.9958207685393954e-05, "loss": 0.2667, "step": 3831000 }, { "epoch": 2.3, "learning_rate": 3.9956107719833394e-05, "loss": 0.2714, "step": 3831500 }, { "epoch": 2.3, "learning_rate": 3.995400775427283e-05, "loss": 0.2643, "step": 3832000 }, { "epoch": 2.3, "learning_rate": 3.995190778871226e-05, "loss": 0.2724, "step": 3832500 }, { "epoch": 2.3, "learning_rate": 3.994981202308282e-05, "loss": 0.2685, "step": 3833000 }, { "epoch": 2.3, "learning_rate": 3.9947712057522255e-05, "loss": 0.2648, "step": 3833500 }, { "epoch": 2.3, "learning_rate": 3.994561209196169e-05, "loss": 0.2732, "step": 3834000 }, { "epoch": 2.3, "learning_rate": 3.994351632633225e-05, "loss": 0.2681, "step": 3834500 }, { "epoch": 2.3, "learning_rate": 3.99414205607028e-05, "loss": 0.2747, "step": 3835000 }, { "epoch": 2.3, "learning_rate": 3.993932059514224e-05, "loss": 0.2653, "step": 3835500 }, { "epoch": 2.3, "learning_rate": 3.9937220629581676e-05, "loss": 0.2605, "step": 3836000 }, { "epoch": 2.3, "learning_rate": 3.993512066402111e-05, "loss": 0.2673, "step": 3836500 }, { "epoch": 2.3, "learning_rate": 3.993302069846055e-05, "loss": 0.2681, "step": 3837000 }, { "epoch": 2.3, "learning_rate": 3.993092073289998e-05, "loss": 0.2667, "step": 3837500 }, { "epoch": 2.3, "learning_rate": 3.9928820767339416e-05, "loss": 0.271, "step": 3838000 }, { "epoch": 2.3, "learning_rate": 3.992672080177885e-05, "loss": 0.2677, "step": 3838500 }, { "epoch": 2.3, "learning_rate": 3.992462083621828e-05, "loss": 0.2695, "step": 3839000 }, { "epoch": 2.3, "learning_rate": 3.992252087065772e-05, "loss": 0.269, "step": 3839500 }, { "epoch": 2.3, "learning_rate": 3.992042510502828e-05, "loss": 0.2649, "step": 3840000 }, { "epoch": 2.3, "learning_rate": 3.991832513946771e-05, "loss": 0.2661, "step": 3840500 }, { "epoch": 2.3, "learning_rate": 3.9916225173907144e-05, "loss": 0.2667, "step": 3841000 }, { "epoch": 2.3, "learning_rate": 3.991412520834658e-05, "loss": 0.2678, "step": 3841500 }, { "epoch": 2.3, "learning_rate": 3.991202524278602e-05, "loss": 0.2711, "step": 3842000 }, { "epoch": 2.3, "learning_rate": 3.990992527722545e-05, "loss": 0.2645, "step": 3842500 }, { "epoch": 2.3, "learning_rate": 3.9907825311664885e-05, "loss": 0.2676, "step": 3843000 }, { "epoch": 2.3, "learning_rate": 3.9905725346104325e-05, "loss": 0.2677, "step": 3843500 }, { "epoch": 2.3, "learning_rate": 3.990362958047488e-05, "loss": 0.2661, "step": 3844000 }, { "epoch": 2.3, "learning_rate": 3.990152961491431e-05, "loss": 0.2669, "step": 3844500 }, { "epoch": 2.31, "learning_rate": 3.989942964935375e-05, "loss": 0.2636, "step": 3845000 }, { "epoch": 2.31, "learning_rate": 3.9897329683793186e-05, "loss": 0.2572, "step": 3845500 }, { "epoch": 2.31, "learning_rate": 3.989522971823262e-05, "loss": 0.262, "step": 3846000 }, { "epoch": 2.31, "learning_rate": 3.989313395260317e-05, "loss": 0.2645, "step": 3846500 }, { "epoch": 2.31, "learning_rate": 3.989103398704261e-05, "loss": 0.2687, "step": 3847000 }, { "epoch": 2.31, "learning_rate": 3.9888934021482046e-05, "loss": 0.2665, "step": 3847500 }, { "epoch": 2.31, "learning_rate": 3.988683405592148e-05, "loss": 0.2718, "step": 3848000 }, { "epoch": 2.31, "learning_rate": 3.988473829029203e-05, "loss": 0.2695, "step": 3848500 }, { "epoch": 2.31, "learning_rate": 3.9882638324731474e-05, "loss": 0.269, "step": 3849000 }, { "epoch": 2.31, "learning_rate": 3.988053835917091e-05, "loss": 0.2716, "step": 3849500 }, { "epoch": 2.31, "learning_rate": 3.987843839361034e-05, "loss": 0.2636, "step": 3850000 }, { "epoch": 2.31, "learning_rate": 3.98763426279809e-05, "loss": 0.2699, "step": 3850500 }, { "epoch": 2.31, "learning_rate": 3.9874242662420334e-05, "loss": 0.2755, "step": 3851000 }, { "epoch": 2.31, "learning_rate": 3.987214269685977e-05, "loss": 0.2657, "step": 3851500 }, { "epoch": 2.31, "learning_rate": 3.987004273129921e-05, "loss": 0.2658, "step": 3852000 }, { "epoch": 2.31, "learning_rate": 3.986794696566976e-05, "loss": 0.2679, "step": 3852500 }, { "epoch": 2.31, "learning_rate": 3.9865847000109195e-05, "loss": 0.2664, "step": 3853000 }, { "epoch": 2.31, "learning_rate": 3.986374703454863e-05, "loss": 0.2648, "step": 3853500 }, { "epoch": 2.31, "learning_rate": 3.986164706898807e-05, "loss": 0.2679, "step": 3854000 }, { "epoch": 2.31, "learning_rate": 3.98595471034275e-05, "loss": 0.2671, "step": 3854500 }, { "epoch": 2.31, "learning_rate": 3.9857451337798056e-05, "loss": 0.2704, "step": 3855000 }, { "epoch": 2.31, "learning_rate": 3.985535137223749e-05, "loss": 0.2651, "step": 3855500 }, { "epoch": 2.31, "learning_rate": 3.985325560660805e-05, "loss": 0.2689, "step": 3856000 }, { "epoch": 2.31, "learning_rate": 3.985115564104749e-05, "loss": 0.2716, "step": 3856500 }, { "epoch": 2.31, "learning_rate": 3.984905567548692e-05, "loss": 0.2684, "step": 3857000 }, { "epoch": 2.31, "learning_rate": 3.984695570992636e-05, "loss": 0.2667, "step": 3857500 }, { "epoch": 2.31, "learning_rate": 3.984485574436579e-05, "loss": 0.2667, "step": 3858000 }, { "epoch": 2.31, "learning_rate": 3.9842755778805224e-05, "loss": 0.27, "step": 3858500 }, { "epoch": 2.31, "learning_rate": 3.9840655813244664e-05, "loss": 0.2659, "step": 3859000 }, { "epoch": 2.31, "learning_rate": 3.98385558476841e-05, "loss": 0.2652, "step": 3859500 }, { "epoch": 2.31, "learning_rate": 3.983646008205465e-05, "loss": 0.2683, "step": 3860000 }, { "epoch": 2.31, "learning_rate": 3.9834360116494084e-05, "loss": 0.2699, "step": 3860500 }, { "epoch": 2.31, "learning_rate": 3.9832260150933525e-05, "loss": 0.2675, "step": 3861000 }, { "epoch": 2.32, "learning_rate": 3.983016018537296e-05, "loss": 0.2719, "step": 3861500 }, { "epoch": 2.32, "learning_rate": 3.982806441974351e-05, "loss": 0.2637, "step": 3862000 }, { "epoch": 2.32, "learning_rate": 3.9825964454182945e-05, "loss": 0.2708, "step": 3862500 }, { "epoch": 2.32, "learning_rate": 3.9823864488622385e-05, "loss": 0.273, "step": 3863000 }, { "epoch": 2.32, "learning_rate": 3.982176452306182e-05, "loss": 0.2642, "step": 3863500 }, { "epoch": 2.32, "learning_rate": 3.981966875743238e-05, "loss": 0.2706, "step": 3864000 }, { "epoch": 2.32, "learning_rate": 3.981756879187181e-05, "loss": 0.2665, "step": 3864500 }, { "epoch": 2.32, "learning_rate": 3.9815468826311246e-05, "loss": 0.2633, "step": 3865000 }, { "epoch": 2.32, "learning_rate": 3.981336886075068e-05, "loss": 0.2634, "step": 3865500 }, { "epoch": 2.32, "learning_rate": 3.981127309512124e-05, "loss": 0.2732, "step": 3866000 }, { "epoch": 2.32, "learning_rate": 3.9809177329491793e-05, "loss": 0.2717, "step": 3866500 }, { "epoch": 2.32, "learning_rate": 3.9807077363931234e-05, "loss": 0.2652, "step": 3867000 }, { "epoch": 2.32, "learning_rate": 3.980497739837067e-05, "loss": 0.2731, "step": 3867500 }, { "epoch": 2.32, "learning_rate": 3.98028774328101e-05, "loss": 0.2643, "step": 3868000 }, { "epoch": 2.32, "learning_rate": 3.980077746724954e-05, "loss": 0.2649, "step": 3868500 }, { "epoch": 2.32, "learning_rate": 3.9798677501688974e-05, "loss": 0.2645, "step": 3869000 }, { "epoch": 2.32, "learning_rate": 3.97965775361284e-05, "loss": 0.2697, "step": 3869500 }, { "epoch": 2.32, "learning_rate": 3.979448177049896e-05, "loss": 0.2664, "step": 3870000 }, { "epoch": 2.32, "learning_rate": 3.97923818049384e-05, "loss": 0.2701, "step": 3870500 }, { "epoch": 2.32, "learning_rate": 3.9790281839377835e-05, "loss": 0.2713, "step": 3871000 }, { "epoch": 2.32, "learning_rate": 3.978818187381727e-05, "loss": 0.2703, "step": 3871500 }, { "epoch": 2.32, "learning_rate": 3.97860819082567e-05, "loss": 0.2646, "step": 3872000 }, { "epoch": 2.32, "learning_rate": 3.9783981942696135e-05, "loss": 0.2624, "step": 3872500 }, { "epoch": 2.32, "learning_rate": 3.9781881977135576e-05, "loss": 0.2771, "step": 3873000 }, { "epoch": 2.32, "learning_rate": 3.977978201157501e-05, "loss": 0.2725, "step": 3873500 }, { "epoch": 2.32, "learning_rate": 3.977768624594556e-05, "loss": 0.2643, "step": 3874000 }, { "epoch": 2.32, "learning_rate": 3.9775586280384996e-05, "loss": 0.266, "step": 3874500 }, { "epoch": 2.32, "learning_rate": 3.9773486314824436e-05, "loss": 0.2687, "step": 3875000 }, { "epoch": 2.32, "learning_rate": 3.977138634926387e-05, "loss": 0.2615, "step": 3875500 }, { "epoch": 2.32, "learning_rate": 3.97692863837033e-05, "loss": 0.2646, "step": 3876000 }, { "epoch": 2.32, "learning_rate": 3.976719061807386e-05, "loss": 0.2636, "step": 3876500 }, { "epoch": 2.32, "learning_rate": 3.97650906525133e-05, "loss": 0.2688, "step": 3877000 }, { "epoch": 2.32, "learning_rate": 3.976299068695273e-05, "loss": 0.2665, "step": 3877500 }, { "epoch": 2.33, "learning_rate": 3.976089072139217e-05, "loss": 0.2665, "step": 3878000 }, { "epoch": 2.33, "learning_rate": 3.975879495576273e-05, "loss": 0.269, "step": 3878500 }, { "epoch": 2.33, "learning_rate": 3.975669499020216e-05, "loss": 0.2665, "step": 3879000 }, { "epoch": 2.33, "learning_rate": 3.975459502464159e-05, "loss": 0.2714, "step": 3879500 }, { "epoch": 2.33, "learning_rate": 3.975249505908103e-05, "loss": 0.2657, "step": 3880000 }, { "epoch": 2.33, "learning_rate": 3.975039929345159e-05, "loss": 0.2717, "step": 3880500 }, { "epoch": 2.33, "learning_rate": 3.974829932789102e-05, "loss": 0.2697, "step": 3881000 }, { "epoch": 2.33, "learning_rate": 3.974619936233045e-05, "loss": 0.2709, "step": 3881500 }, { "epoch": 2.33, "learning_rate": 3.974409939676989e-05, "loss": 0.2667, "step": 3882000 }, { "epoch": 2.33, "learning_rate": 3.9741999431209326e-05, "loss": 0.2637, "step": 3882500 }, { "epoch": 2.33, "learning_rate": 3.9739903665579886e-05, "loss": 0.268, "step": 3883000 }, { "epoch": 2.33, "learning_rate": 3.973780370001932e-05, "loss": 0.2632, "step": 3883500 }, { "epoch": 2.33, "learning_rate": 3.973570373445875e-05, "loss": 0.2713, "step": 3884000 }, { "epoch": 2.33, "learning_rate": 3.973360796882931e-05, "loss": 0.2727, "step": 3884500 }, { "epoch": 2.33, "learning_rate": 3.973150800326875e-05, "loss": 0.2716, "step": 3885000 }, { "epoch": 2.33, "learning_rate": 3.972940803770819e-05, "loss": 0.2676, "step": 3885500 }, { "epoch": 2.33, "learning_rate": 3.9727308072147614e-05, "loss": 0.2676, "step": 3886000 }, { "epoch": 2.33, "learning_rate": 3.972520810658705e-05, "loss": 0.2661, "step": 3886500 }, { "epoch": 2.33, "learning_rate": 3.972311234095761e-05, "loss": 0.2684, "step": 3887000 }, { "epoch": 2.33, "learning_rate": 3.972101237539705e-05, "loss": 0.2628, "step": 3887500 }, { "epoch": 2.33, "learning_rate": 3.97189166097676e-05, "loss": 0.2772, "step": 3888000 }, { "epoch": 2.33, "learning_rate": 3.9716816644207035e-05, "loss": 0.2655, "step": 3888500 }, { "epoch": 2.33, "learning_rate": 3.971471667864647e-05, "loss": 0.2695, "step": 3889000 }, { "epoch": 2.33, "learning_rate": 3.971261671308591e-05, "loss": 0.2679, "step": 3889500 }, { "epoch": 2.33, "learning_rate": 3.971051674752534e-05, "loss": 0.2689, "step": 3890000 }, { "epoch": 2.33, "learning_rate": 3.9708416781964775e-05, "loss": 0.2753, "step": 3890500 }, { "epoch": 2.33, "learning_rate": 3.970631681640421e-05, "loss": 0.2671, "step": 3891000 }, { "epoch": 2.33, "learning_rate": 3.970421685084364e-05, "loss": 0.2696, "step": 3891500 }, { "epoch": 2.33, "learning_rate": 3.970211688528308e-05, "loss": 0.2666, "step": 3892000 }, { "epoch": 2.33, "learning_rate": 3.9700016919722516e-05, "loss": 0.265, "step": 3892500 }, { "epoch": 2.33, "learning_rate": 3.969791695416195e-05, "loss": 0.2664, "step": 3893000 }, { "epoch": 2.33, "learning_rate": 3.969581698860139e-05, "loss": 0.265, "step": 3893500 }, { "epoch": 2.33, "learning_rate": 3.969372542290306e-05, "loss": 0.2641, "step": 3894000 }, { "epoch": 2.33, "learning_rate": 3.9691625457342504e-05, "loss": 0.2611, "step": 3894500 }, { "epoch": 2.34, "learning_rate": 3.968952549178194e-05, "loss": 0.266, "step": 3895000 }, { "epoch": 2.34, "learning_rate": 3.9687425526221364e-05, "loss": 0.2736, "step": 3895500 }, { "epoch": 2.34, "learning_rate": 3.9685325560660804e-05, "loss": 0.2708, "step": 3896000 }, { "epoch": 2.34, "learning_rate": 3.968322559510024e-05, "loss": 0.2706, "step": 3896500 }, { "epoch": 2.34, "learning_rate": 3.968112562953967e-05, "loss": 0.2612, "step": 3897000 }, { "epoch": 2.34, "learning_rate": 3.967902566397911e-05, "loss": 0.2647, "step": 3897500 }, { "epoch": 2.34, "learning_rate": 3.9676929898349665e-05, "loss": 0.2647, "step": 3898000 }, { "epoch": 2.34, "learning_rate": 3.96748299327891e-05, "loss": 0.2667, "step": 3898500 }, { "epoch": 2.34, "learning_rate": 3.967272996722854e-05, "loss": 0.2633, "step": 3899000 }, { "epoch": 2.34, "learning_rate": 3.967063000166797e-05, "loss": 0.2661, "step": 3899500 }, { "epoch": 2.34, "learning_rate": 3.966853423603853e-05, "loss": 0.2696, "step": 3900000 }, { "epoch": 2.34, "eval_loss": 0.24585743248462677, "eval_runtime": 1453.3301, "eval_samples_per_second": 362.423, "eval_steps_per_second": 60.404, "step": 3900000 }, { "epoch": 2.34, "learning_rate": 3.966643427047796e-05, "loss": 0.2671, "step": 3900500 }, { "epoch": 2.34, "learning_rate": 3.96643343049174e-05, "loss": 0.2694, "step": 3901000 }, { "epoch": 2.34, "learning_rate": 3.966223433935683e-05, "loss": 0.2649, "step": 3901500 }, { "epoch": 2.34, "learning_rate": 3.9660134373796266e-05, "loss": 0.2733, "step": 3902000 }, { "epoch": 2.34, "learning_rate": 3.965803860816682e-05, "loss": 0.2689, "step": 3902500 }, { "epoch": 2.34, "learning_rate": 3.965593864260626e-05, "loss": 0.2671, "step": 3903000 }, { "epoch": 2.34, "learning_rate": 3.965383867704569e-05, "loss": 0.2694, "step": 3903500 }, { "epoch": 2.34, "learning_rate": 3.965173871148513e-05, "loss": 0.2679, "step": 3904000 }, { "epoch": 2.34, "learning_rate": 3.964964294585569e-05, "loss": 0.2691, "step": 3904500 }, { "epoch": 2.34, "learning_rate": 3.964754298029512e-05, "loss": 0.2601, "step": 3905000 }, { "epoch": 2.34, "learning_rate": 3.9645443014734554e-05, "loss": 0.2682, "step": 3905500 }, { "epoch": 2.34, "learning_rate": 3.9643343049173994e-05, "loss": 0.2685, "step": 3906000 }, { "epoch": 2.34, "learning_rate": 3.964124308361343e-05, "loss": 0.2746, "step": 3906500 }, { "epoch": 2.34, "learning_rate": 3.963914731798399e-05, "loss": 0.2677, "step": 3907000 }, { "epoch": 2.34, "learning_rate": 3.9637047352423415e-05, "loss": 0.2661, "step": 3907500 }, { "epoch": 2.34, "learning_rate": 3.9634947386862855e-05, "loss": 0.267, "step": 3908000 }, { "epoch": 2.34, "learning_rate": 3.963284742130229e-05, "loss": 0.2683, "step": 3908500 }, { "epoch": 2.34, "learning_rate": 3.963075165567285e-05, "loss": 0.2733, "step": 3909000 }, { "epoch": 2.34, "learning_rate": 3.962865169011228e-05, "loss": 0.2694, "step": 3909500 }, { "epoch": 2.34, "learning_rate": 3.9626551724551716e-05, "loss": 0.2627, "step": 3910000 }, { "epoch": 2.34, "learning_rate": 3.962445175899115e-05, "loss": 0.2635, "step": 3910500 }, { "epoch": 2.34, "learning_rate": 3.962235179343058e-05, "loss": 0.267, "step": 3911000 }, { "epoch": 2.35, "learning_rate": 3.962025602780114e-05, "loss": 0.2749, "step": 3911500 }, { "epoch": 2.35, "learning_rate": 3.9618156062240576e-05, "loss": 0.2656, "step": 3912000 }, { "epoch": 2.35, "learning_rate": 3.961605609668001e-05, "loss": 0.2654, "step": 3912500 }, { "epoch": 2.35, "learning_rate": 3.961396033105057e-05, "loss": 0.2654, "step": 3913000 }, { "epoch": 2.35, "learning_rate": 3.961186036549001e-05, "loss": 0.2656, "step": 3913500 }, { "epoch": 2.35, "learning_rate": 3.9609760399929444e-05, "loss": 0.2689, "step": 3914000 }, { "epoch": 2.35, "learning_rate": 3.960766043436887e-05, "loss": 0.2743, "step": 3914500 }, { "epoch": 2.35, "learning_rate": 3.960556466873943e-05, "loss": 0.2645, "step": 3915000 }, { "epoch": 2.35, "learning_rate": 3.960346470317887e-05, "loss": 0.2685, "step": 3915500 }, { "epoch": 2.35, "learning_rate": 3.9601364737618305e-05, "loss": 0.2637, "step": 3916000 }, { "epoch": 2.35, "learning_rate": 3.959926477205774e-05, "loss": 0.2686, "step": 3916500 }, { "epoch": 2.35, "learning_rate": 3.959716480649717e-05, "loss": 0.2659, "step": 3917000 }, { "epoch": 2.35, "learning_rate": 3.9595064840936605e-05, "loss": 0.2646, "step": 3917500 }, { "epoch": 2.35, "learning_rate": 3.959296487537604e-05, "loss": 0.2701, "step": 3918000 }, { "epoch": 2.35, "learning_rate": 3.959086490981548e-05, "loss": 0.2702, "step": 3918500 }, { "epoch": 2.35, "learning_rate": 3.958876494425491e-05, "loss": 0.2659, "step": 3919000 }, { "epoch": 2.35, "learning_rate": 3.9586669178625466e-05, "loss": 0.2689, "step": 3919500 }, { "epoch": 2.35, "learning_rate": 3.9584569213064906e-05, "loss": 0.2631, "step": 3920000 }, { "epoch": 2.35, "learning_rate": 3.9582473447435466e-05, "loss": 0.2661, "step": 3920500 }, { "epoch": 2.35, "learning_rate": 3.95803734818749e-05, "loss": 0.2699, "step": 3921000 }, { "epoch": 2.35, "learning_rate": 3.957827351631433e-05, "loss": 0.2706, "step": 3921500 }, { "epoch": 2.35, "learning_rate": 3.957617355075377e-05, "loss": 0.2708, "step": 3922000 }, { "epoch": 2.35, "learning_rate": 3.95740735851932e-05, "loss": 0.2709, "step": 3922500 }, { "epoch": 2.35, "learning_rate": 3.957197781956376e-05, "loss": 0.2698, "step": 3923000 }, { "epoch": 2.35, "learning_rate": 3.9569877854003194e-05, "loss": 0.27, "step": 3923500 }, { "epoch": 2.35, "learning_rate": 3.956777788844263e-05, "loss": 0.2733, "step": 3924000 }, { "epoch": 2.35, "learning_rate": 3.956567792288206e-05, "loss": 0.2636, "step": 3924500 }, { "epoch": 2.35, "learning_rate": 3.9563577957321494e-05, "loss": 0.2689, "step": 3925000 }, { "epoch": 2.35, "learning_rate": 3.9561477991760935e-05, "loss": 0.2634, "step": 3925500 }, { "epoch": 2.35, "learning_rate": 3.955937802620037e-05, "loss": 0.2692, "step": 3926000 }, { "epoch": 2.35, "learning_rate": 3.95572780606398e-05, "loss": 0.2694, "step": 3926500 }, { "epoch": 2.35, "learning_rate": 3.955518649494148e-05, "loss": 0.2704, "step": 3927000 }, { "epoch": 2.35, "learning_rate": 3.955308652938092e-05, "loss": 0.2703, "step": 3927500 }, { "epoch": 2.35, "learning_rate": 3.9550986563820356e-05, "loss": 0.2626, "step": 3928000 }, { "epoch": 2.36, "learning_rate": 3.954888659825979e-05, "loss": 0.259, "step": 3928500 }, { "epoch": 2.36, "learning_rate": 3.954678663269922e-05, "loss": 0.2703, "step": 3929000 }, { "epoch": 2.36, "learning_rate": 3.9544686667138656e-05, "loss": 0.2702, "step": 3929500 }, { "epoch": 2.36, "learning_rate": 3.954258670157809e-05, "loss": 0.2638, "step": 3930000 }, { "epoch": 2.36, "learning_rate": 3.954049093594865e-05, "loss": 0.2624, "step": 3930500 }, { "epoch": 2.36, "learning_rate": 3.953839097038809e-05, "loss": 0.2722, "step": 3931000 }, { "epoch": 2.36, "learning_rate": 3.953629100482752e-05, "loss": 0.2675, "step": 3931500 }, { "epoch": 2.36, "learning_rate": 3.953419103926695e-05, "loss": 0.269, "step": 3932000 }, { "epoch": 2.36, "learning_rate": 3.953209107370639e-05, "loss": 0.2667, "step": 3932500 }, { "epoch": 2.36, "learning_rate": 3.9529991108145824e-05, "loss": 0.266, "step": 3933000 }, { "epoch": 2.36, "learning_rate": 3.952789114258526e-05, "loss": 0.262, "step": 3933500 }, { "epoch": 2.36, "learning_rate": 3.95257911770247e-05, "loss": 0.2676, "step": 3934000 }, { "epoch": 2.36, "learning_rate": 3.952369541139525e-05, "loss": 0.2681, "step": 3934500 }, { "epoch": 2.36, "learning_rate": 3.9521595445834685e-05, "loss": 0.2663, "step": 3935000 }, { "epoch": 2.36, "learning_rate": 3.9519495480274125e-05, "loss": 0.2712, "step": 3935500 }, { "epoch": 2.36, "learning_rate": 3.951739551471356e-05, "loss": 0.2733, "step": 3936000 }, { "epoch": 2.36, "learning_rate": 3.951529974908411e-05, "loss": 0.266, "step": 3936500 }, { "epoch": 2.36, "learning_rate": 3.9513199783523545e-05, "loss": 0.2679, "step": 3937000 }, { "epoch": 2.36, "learning_rate": 3.9511099817962986e-05, "loss": 0.267, "step": 3937500 }, { "epoch": 2.36, "learning_rate": 3.950900825226466e-05, "loss": 0.2713, "step": 3938000 }, { "epoch": 2.36, "learning_rate": 3.95069082867041e-05, "loss": 0.2711, "step": 3938500 }, { "epoch": 2.36, "learning_rate": 3.950480832114353e-05, "loss": 0.2663, "step": 3939000 }, { "epoch": 2.36, "learning_rate": 3.9502708355582967e-05, "loss": 0.2679, "step": 3939500 }, { "epoch": 2.36, "learning_rate": 3.950060839002241e-05, "loss": 0.2688, "step": 3940000 }, { "epoch": 2.36, "learning_rate": 3.949850842446184e-05, "loss": 0.2662, "step": 3940500 }, { "epoch": 2.36, "learning_rate": 3.9496408458901274e-05, "loss": 0.2699, "step": 3941000 }, { "epoch": 2.36, "learning_rate": 3.9494312693271834e-05, "loss": 0.2722, "step": 3941500 }, { "epoch": 2.36, "learning_rate": 3.949221272771127e-05, "loss": 0.2716, "step": 3942000 }, { "epoch": 2.36, "learning_rate": 3.94901127621507e-05, "loss": 0.265, "step": 3942500 }, { "epoch": 2.36, "learning_rate": 3.9488012796590134e-05, "loss": 0.2682, "step": 3943000 }, { "epoch": 2.36, "learning_rate": 3.948591283102957e-05, "loss": 0.2688, "step": 3943500 }, { "epoch": 2.36, "learning_rate": 3.9483812865469e-05, "loss": 0.2716, "step": 3944000 }, { "epoch": 2.36, "learning_rate": 3.948171289990844e-05, "loss": 0.2602, "step": 3944500 }, { "epoch": 2.37, "learning_rate": 3.9479612934347875e-05, "loss": 0.2661, "step": 3945000 }, { "epoch": 2.37, "learning_rate": 3.947751296878731e-05, "loss": 0.2701, "step": 3945500 }, { "epoch": 2.37, "learning_rate": 3.947541300322675e-05, "loss": 0.2666, "step": 3946000 }, { "epoch": 2.37, "learning_rate": 3.947331303766618e-05, "loss": 0.2602, "step": 3946500 }, { "epoch": 2.37, "learning_rate": 3.9471213072105616e-05, "loss": 0.2678, "step": 3947000 }, { "epoch": 2.37, "learning_rate": 3.946911730647617e-05, "loss": 0.2654, "step": 3947500 }, { "epoch": 2.37, "learning_rate": 3.946701734091561e-05, "loss": 0.2684, "step": 3948000 }, { "epoch": 2.37, "learning_rate": 3.946491737535504e-05, "loss": 0.2611, "step": 3948500 }, { "epoch": 2.37, "learning_rate": 3.946281740979448e-05, "loss": 0.2707, "step": 3949000 }, { "epoch": 2.37, "learning_rate": 3.9460717444233917e-05, "loss": 0.265, "step": 3949500 }, { "epoch": 2.37, "learning_rate": 3.945861747867335e-05, "loss": 0.2617, "step": 3950000 }, { "epoch": 2.37, "learning_rate": 3.9456521713043904e-05, "loss": 0.2669, "step": 3950500 }, { "epoch": 2.37, "learning_rate": 3.9454421747483344e-05, "loss": 0.268, "step": 3951000 }, { "epoch": 2.37, "learning_rate": 3.945232178192278e-05, "loss": 0.2697, "step": 3951500 }, { "epoch": 2.37, "learning_rate": 3.945022181636221e-05, "loss": 0.2706, "step": 3952000 }, { "epoch": 2.37, "learning_rate": 3.9448126050732764e-05, "loss": 0.2704, "step": 3952500 }, { "epoch": 2.37, "learning_rate": 3.9446026085172205e-05, "loss": 0.2596, "step": 3953000 }, { "epoch": 2.37, "learning_rate": 3.944392611961164e-05, "loss": 0.2661, "step": 3953500 }, { "epoch": 2.37, "learning_rate": 3.944182615405107e-05, "loss": 0.2678, "step": 3954000 }, { "epoch": 2.37, "learning_rate": 3.9439730388421625e-05, "loss": 0.2718, "step": 3954500 }, { "epoch": 2.37, "learning_rate": 3.9437630422861065e-05, "loss": 0.2648, "step": 3955000 }, { "epoch": 2.37, "learning_rate": 3.94355304573005e-05, "loss": 0.2713, "step": 3955500 }, { "epoch": 2.37, "learning_rate": 3.943343049173994e-05, "loss": 0.2651, "step": 3956000 }, { "epoch": 2.37, "learning_rate": 3.943133472611049e-05, "loss": 0.2696, "step": 3956500 }, { "epoch": 2.37, "learning_rate": 3.9429234760549926e-05, "loss": 0.26, "step": 3957000 }, { "epoch": 2.37, "learning_rate": 3.942713479498936e-05, "loss": 0.2623, "step": 3957500 }, { "epoch": 2.37, "learning_rate": 3.94250348294288e-05, "loss": 0.2691, "step": 3958000 }, { "epoch": 2.37, "learning_rate": 3.942293906379935e-05, "loss": 0.2714, "step": 3958500 }, { "epoch": 2.37, "learning_rate": 3.942083909823879e-05, "loss": 0.2692, "step": 3959000 }, { "epoch": 2.37, "learning_rate": 3.941873913267822e-05, "loss": 0.263, "step": 3959500 }, { "epoch": 2.37, "learning_rate": 3.941663916711766e-05, "loss": 0.2653, "step": 3960000 }, { "epoch": 2.37, "learning_rate": 3.9414543401488214e-05, "loss": 0.2603, "step": 3960500 }, { "epoch": 2.37, "learning_rate": 3.9412447635858774e-05, "loss": 0.266, "step": 3961000 }, { "epoch": 2.38, "learning_rate": 3.941034767029821e-05, "loss": 0.2683, "step": 3961500 }, { "epoch": 2.38, "learning_rate": 3.940824770473765e-05, "loss": 0.2622, "step": 3962000 }, { "epoch": 2.38, "learning_rate": 3.9406147739177075e-05, "loss": 0.2607, "step": 3962500 }, { "epoch": 2.38, "learning_rate": 3.940404777361651e-05, "loss": 0.2741, "step": 3963000 }, { "epoch": 2.38, "learning_rate": 3.940194780805595e-05, "loss": 0.2683, "step": 3963500 }, { "epoch": 2.38, "learning_rate": 3.939984784249538e-05, "loss": 0.2663, "step": 3964000 }, { "epoch": 2.38, "learning_rate": 3.9397747876934815e-05, "loss": 0.2663, "step": 3964500 }, { "epoch": 2.38, "learning_rate": 3.9395647911374256e-05, "loss": 0.2643, "step": 3965000 }, { "epoch": 2.38, "learning_rate": 3.939355214574481e-05, "loss": 0.267, "step": 3965500 }, { "epoch": 2.38, "learning_rate": 3.939145218018424e-05, "loss": 0.2671, "step": 3966000 }, { "epoch": 2.38, "learning_rate": 3.9389352214623676e-05, "loss": 0.2661, "step": 3966500 }, { "epoch": 2.38, "learning_rate": 3.9387252249063116e-05, "loss": 0.2635, "step": 3967000 }, { "epoch": 2.38, "learning_rate": 3.938515648343367e-05, "loss": 0.2651, "step": 3967500 }, { "epoch": 2.38, "learning_rate": 3.9383056517873103e-05, "loss": 0.2671, "step": 3968000 }, { "epoch": 2.38, "learning_rate": 3.9380956552312544e-05, "loss": 0.2663, "step": 3968500 }, { "epoch": 2.38, "learning_rate": 3.937885658675198e-05, "loss": 0.2693, "step": 3969000 }, { "epoch": 2.38, "learning_rate": 3.937676082112253e-05, "loss": 0.2679, "step": 3969500 }, { "epoch": 2.38, "learning_rate": 3.9374660855561964e-05, "loss": 0.2672, "step": 3970000 }, { "epoch": 2.38, "learning_rate": 3.9372560890001404e-05, "loss": 0.2655, "step": 3970500 }, { "epoch": 2.38, "learning_rate": 3.937046092444084e-05, "loss": 0.2605, "step": 3971000 }, { "epoch": 2.38, "learning_rate": 3.93683651588114e-05, "loss": 0.2633, "step": 3971500 }, { "epoch": 2.38, "learning_rate": 3.936626939318195e-05, "loss": 0.2708, "step": 3972000 }, { "epoch": 2.38, "learning_rate": 3.9364169427621385e-05, "loss": 0.2633, "step": 3972500 }, { "epoch": 2.38, "learning_rate": 3.9362069462060825e-05, "loss": 0.2726, "step": 3973000 }, { "epoch": 2.38, "learning_rate": 3.935996949650026e-05, "loss": 0.27, "step": 3973500 }, { "epoch": 2.38, "learning_rate": 3.9357869530939686e-05, "loss": 0.2666, "step": 3974000 }, { "epoch": 2.38, "learning_rate": 3.9355769565379126e-05, "loss": 0.2683, "step": 3974500 }, { "epoch": 2.38, "learning_rate": 3.935366959981856e-05, "loss": 0.2712, "step": 3975000 }, { "epoch": 2.38, "learning_rate": 3.9351569634258e-05, "loss": 0.2659, "step": 3975500 }, { "epoch": 2.38, "learning_rate": 3.934947386862856e-05, "loss": 0.2694, "step": 3976000 }, { "epoch": 2.38, "learning_rate": 3.9347373903067987e-05, "loss": 0.2667, "step": 3976500 }, { "epoch": 2.38, "learning_rate": 3.934527393750742e-05, "loss": 0.2751, "step": 3977000 }, { "epoch": 2.38, "learning_rate": 3.934317397194686e-05, "loss": 0.2677, "step": 3977500 }, { "epoch": 2.38, "learning_rate": 3.9341074006386294e-05, "loss": 0.2671, "step": 3978000 }, { "epoch": 2.39, "learning_rate": 3.9338978240756854e-05, "loss": 0.2674, "step": 3978500 }, { "epoch": 2.39, "learning_rate": 3.933687827519628e-05, "loss": 0.2657, "step": 3979000 }, { "epoch": 2.39, "learning_rate": 3.933477830963572e-05, "loss": 0.2598, "step": 3979500 }, { "epoch": 2.39, "learning_rate": 3.9332678344075154e-05, "loss": 0.2652, "step": 3980000 }, { "epoch": 2.39, "learning_rate": 3.9330582578445715e-05, "loss": 0.2638, "step": 3980500 }, { "epoch": 2.39, "learning_rate": 3.932848261288515e-05, "loss": 0.2667, "step": 3981000 }, { "epoch": 2.39, "learning_rate": 3.932638264732458e-05, "loss": 0.2636, "step": 3981500 }, { "epoch": 2.39, "learning_rate": 3.9324282681764015e-05, "loss": 0.2676, "step": 3982000 }, { "epoch": 2.39, "learning_rate": 3.9322186916134576e-05, "loss": 0.2702, "step": 3982500 }, { "epoch": 2.39, "learning_rate": 3.9320086950574016e-05, "loss": 0.274, "step": 3983000 }, { "epoch": 2.39, "learning_rate": 3.931798698501344e-05, "loss": 0.271, "step": 3983500 }, { "epoch": 2.39, "learning_rate": 3.9315887019452876e-05, "loss": 0.2739, "step": 3984000 }, { "epoch": 2.39, "learning_rate": 3.9313791253823436e-05, "loss": 0.2611, "step": 3984500 }, { "epoch": 2.39, "learning_rate": 3.9311691288262876e-05, "loss": 0.2619, "step": 3985000 }, { "epoch": 2.39, "learning_rate": 3.930959132270231e-05, "loss": 0.2598, "step": 3985500 }, { "epoch": 2.39, "learning_rate": 3.9307495557072864e-05, "loss": 0.264, "step": 3986000 }, { "epoch": 2.39, "learning_rate": 3.93053955915123e-05, "loss": 0.2658, "step": 3986500 }, { "epoch": 2.39, "learning_rate": 3.930329562595174e-05, "loss": 0.2637, "step": 3987000 }, { "epoch": 2.39, "learning_rate": 3.930119566039117e-05, "loss": 0.262, "step": 3987500 }, { "epoch": 2.39, "learning_rate": 3.9299095694830604e-05, "loss": 0.2697, "step": 3988000 }, { "epoch": 2.39, "learning_rate": 3.929699572927004e-05, "loss": 0.2642, "step": 3988500 }, { "epoch": 2.39, "learning_rate": 3.929489576370947e-05, "loss": 0.2657, "step": 3989000 }, { "epoch": 2.39, "learning_rate": 3.929279579814891e-05, "loss": 0.2638, "step": 3989500 }, { "epoch": 2.39, "learning_rate": 3.9290695832588345e-05, "loss": 0.2665, "step": 3990000 }, { "epoch": 2.39, "learning_rate": 3.9288600066958905e-05, "loss": 0.2678, "step": 3990500 }, { "epoch": 2.39, "learning_rate": 3.928650010139833e-05, "loss": 0.2618, "step": 3991000 }, { "epoch": 2.39, "learning_rate": 3.928440013583777e-05, "loss": 0.2722, "step": 3991500 }, { "epoch": 2.39, "learning_rate": 3.9282300170277205e-05, "loss": 0.2636, "step": 3992000 }, { "epoch": 2.39, "learning_rate": 3.9280204404647766e-05, "loss": 0.2731, "step": 3992500 }, { "epoch": 2.39, "learning_rate": 3.92781044390872e-05, "loss": 0.269, "step": 3993000 }, { "epoch": 2.39, "learning_rate": 3.927600447352663e-05, "loss": 0.2658, "step": 3993500 }, { "epoch": 2.39, "learning_rate": 3.9273904507966066e-05, "loss": 0.269, "step": 3994000 }, { "epoch": 2.39, "learning_rate": 3.9271808742336627e-05, "loss": 0.2695, "step": 3994500 }, { "epoch": 2.4, "learning_rate": 3.926970877677606e-05, "loss": 0.2621, "step": 3995000 }, { "epoch": 2.4, "learning_rate": 3.9267608811215493e-05, "loss": 0.265, "step": 3995500 }, { "epoch": 2.4, "learning_rate": 3.926550884565493e-05, "loss": 0.2661, "step": 3996000 }, { "epoch": 2.4, "learning_rate": 3.926341308002549e-05, "loss": 0.2666, "step": 3996500 }, { "epoch": 2.4, "learning_rate": 3.926131311446493e-05, "loss": 0.2605, "step": 3997000 }, { "epoch": 2.4, "learning_rate": 3.925921314890436e-05, "loss": 0.2612, "step": 3997500 }, { "epoch": 2.4, "learning_rate": 3.925711318334379e-05, "loss": 0.2727, "step": 3998000 }, { "epoch": 2.4, "learning_rate": 3.925501741771435e-05, "loss": 0.2648, "step": 3998500 }, { "epoch": 2.4, "learning_rate": 3.925291745215379e-05, "loss": 0.2664, "step": 3999000 }, { "epoch": 2.4, "learning_rate": 3.925081748659322e-05, "loss": 0.2687, "step": 3999500 }, { "epoch": 2.4, "learning_rate": 3.9248717521032655e-05, "loss": 0.2657, "step": 4000000 }, { "epoch": 2.4, "eval_loss": 0.24457141757011414, "eval_runtime": 1456.0304, "eval_samples_per_second": 361.751, "eval_steps_per_second": 60.292, "step": 4000000 }, { "epoch": 2.4, "learning_rate": 3.924662175540321e-05, "loss": 0.2631, "step": 4000500 }, { "epoch": 2.4, "learning_rate": 3.924452178984265e-05, "loss": 0.2683, "step": 4001000 }, { "epoch": 2.4, "learning_rate": 3.924242182428208e-05, "loss": 0.2608, "step": 4001500 }, { "epoch": 2.4, "learning_rate": 3.9240321858721516e-05, "loss": 0.2633, "step": 4002000 }, { "epoch": 2.4, "learning_rate": 3.9238221893160956e-05, "loss": 0.272, "step": 4002500 }, { "epoch": 2.4, "learning_rate": 3.923612192760038e-05, "loss": 0.2654, "step": 4003000 }, { "epoch": 2.4, "learning_rate": 3.923402196203982e-05, "loss": 0.2685, "step": 4003500 }, { "epoch": 2.4, "learning_rate": 3.9231921996479256e-05, "loss": 0.2626, "step": 4004000 }, { "epoch": 2.4, "learning_rate": 3.922982623084982e-05, "loss": 0.2683, "step": 4004500 }, { "epoch": 2.4, "learning_rate": 3.9227726265289244e-05, "loss": 0.2661, "step": 4005000 }, { "epoch": 2.4, "learning_rate": 3.9225626299728684e-05, "loss": 0.2667, "step": 4005500 }, { "epoch": 2.4, "learning_rate": 3.922352633416812e-05, "loss": 0.2608, "step": 4006000 }, { "epoch": 2.4, "learning_rate": 3.922143056853868e-05, "loss": 0.2658, "step": 4006500 }, { "epoch": 2.4, "learning_rate": 3.921933060297811e-05, "loss": 0.2669, "step": 4007000 }, { "epoch": 2.4, "learning_rate": 3.9217230637417544e-05, "loss": 0.265, "step": 4007500 }, { "epoch": 2.4, "learning_rate": 3.921513067185698e-05, "loss": 0.2665, "step": 4008000 }, { "epoch": 2.4, "learning_rate": 3.921303490622754e-05, "loss": 0.2606, "step": 4008500 }, { "epoch": 2.4, "learning_rate": 3.921093494066697e-05, "loss": 0.2649, "step": 4009000 }, { "epoch": 2.4, "learning_rate": 3.920883497510641e-05, "loss": 0.2639, "step": 4009500 }, { "epoch": 2.4, "learning_rate": 3.920673500954584e-05, "loss": 0.2655, "step": 4010000 }, { "epoch": 2.4, "learning_rate": 3.92046392439164e-05, "loss": 0.2722, "step": 4010500 }, { "epoch": 2.4, "learning_rate": 3.920253927835584e-05, "loss": 0.2623, "step": 4011000 }, { "epoch": 2.41, "learning_rate": 3.920043931279527e-05, "loss": 0.2646, "step": 4011500 }, { "epoch": 2.41, "learning_rate": 3.9198339347234706e-05, "loss": 0.264, "step": 4012000 }, { "epoch": 2.41, "learning_rate": 3.919623938167414e-05, "loss": 0.2688, "step": 4012500 }, { "epoch": 2.41, "learning_rate": 3.919413941611357e-05, "loss": 0.2618, "step": 4013000 }, { "epoch": 2.41, "learning_rate": 3.9192039450553007e-05, "loss": 0.2644, "step": 4013500 }, { "epoch": 2.41, "learning_rate": 3.918993948499245e-05, "loss": 0.2645, "step": 4014000 }, { "epoch": 2.41, "learning_rate": 3.9187843719363e-05, "loss": 0.2705, "step": 4014500 }, { "epoch": 2.41, "learning_rate": 3.9185743753802434e-05, "loss": 0.266, "step": 4015000 }, { "epoch": 2.41, "learning_rate": 3.918364378824187e-05, "loss": 0.263, "step": 4015500 }, { "epoch": 2.41, "learning_rate": 3.918154382268131e-05, "loss": 0.2659, "step": 4016000 }, { "epoch": 2.41, "learning_rate": 3.917945225698299e-05, "loss": 0.2584, "step": 4016500 }, { "epoch": 2.41, "learning_rate": 3.917735229142242e-05, "loss": 0.2677, "step": 4017000 }, { "epoch": 2.41, "learning_rate": 3.9175252325861855e-05, "loss": 0.2691, "step": 4017500 }, { "epoch": 2.41, "learning_rate": 3.9173152360301295e-05, "loss": 0.2637, "step": 4018000 }, { "epoch": 2.41, "learning_rate": 3.917105239474073e-05, "loss": 0.2693, "step": 4018500 }, { "epoch": 2.41, "learning_rate": 3.916895242918016e-05, "loss": 0.268, "step": 4019000 }, { "epoch": 2.41, "learning_rate": 3.9166852463619596e-05, "loss": 0.2648, "step": 4019500 }, { "epoch": 2.41, "learning_rate": 3.916475249805903e-05, "loss": 0.2702, "step": 4020000 }, { "epoch": 2.41, "learning_rate": 3.916265253249846e-05, "loss": 0.262, "step": 4020500 }, { "epoch": 2.41, "learning_rate": 3.916055676686902e-05, "loss": 0.268, "step": 4021000 }, { "epoch": 2.41, "learning_rate": 3.915845680130846e-05, "loss": 0.265, "step": 4021500 }, { "epoch": 2.41, "learning_rate": 3.915635683574789e-05, "loss": 0.2658, "step": 4022000 }, { "epoch": 2.41, "learning_rate": 3.915425687018732e-05, "loss": 0.2655, "step": 4022500 }, { "epoch": 2.41, "learning_rate": 3.9152161104557884e-05, "loss": 0.2644, "step": 4023000 }, { "epoch": 2.41, "learning_rate": 3.9150061138997324e-05, "loss": 0.2629, "step": 4023500 }, { "epoch": 2.41, "learning_rate": 3.914796117343676e-05, "loss": 0.2724, "step": 4024000 }, { "epoch": 2.41, "learning_rate": 3.914586120787619e-05, "loss": 0.2633, "step": 4024500 }, { "epoch": 2.41, "learning_rate": 3.914376544224675e-05, "loss": 0.2687, "step": 4025000 }, { "epoch": 2.41, "learning_rate": 3.9141665476686184e-05, "loss": 0.2739, "step": 4025500 }, { "epoch": 2.41, "learning_rate": 3.913956551112562e-05, "loss": 0.2665, "step": 4026000 }, { "epoch": 2.41, "learning_rate": 3.913746554556505e-05, "loss": 0.2658, "step": 4026500 }, { "epoch": 2.41, "learning_rate": 3.913536977993561e-05, "loss": 0.2647, "step": 4027000 }, { "epoch": 2.41, "learning_rate": 3.9133274014306165e-05, "loss": 0.2666, "step": 4027500 }, { "epoch": 2.41, "learning_rate": 3.91311740487456e-05, "loss": 0.2645, "step": 4028000 }, { "epoch": 2.42, "learning_rate": 3.912907408318503e-05, "loss": 0.2667, "step": 4028500 }, { "epoch": 2.42, "learning_rate": 3.912697411762447e-05, "loss": 0.2659, "step": 4029000 }, { "epoch": 2.42, "learning_rate": 3.9124874152063906e-05, "loss": 0.2735, "step": 4029500 }, { "epoch": 2.42, "learning_rate": 3.912277418650334e-05, "loss": 0.267, "step": 4030000 }, { "epoch": 2.42, "learning_rate": 3.912067422094278e-05, "loss": 0.2715, "step": 4030500 }, { "epoch": 2.42, "learning_rate": 3.911857425538221e-05, "loss": 0.2657, "step": 4031000 }, { "epoch": 2.42, "learning_rate": 3.911647848975277e-05, "loss": 0.2663, "step": 4031500 }, { "epoch": 2.42, "learning_rate": 3.911437852419221e-05, "loss": 0.2742, "step": 4032000 }, { "epoch": 2.42, "learning_rate": 3.911227855863164e-05, "loss": 0.2645, "step": 4032500 }, { "epoch": 2.42, "learning_rate": 3.9110182793002194e-05, "loss": 0.2697, "step": 4033000 }, { "epoch": 2.42, "learning_rate": 3.910808282744163e-05, "loss": 0.2666, "step": 4033500 }, { "epoch": 2.42, "learning_rate": 3.910598286188107e-05, "loss": 0.2746, "step": 4034000 }, { "epoch": 2.42, "learning_rate": 3.91038828963205e-05, "loss": 0.2653, "step": 4034500 }, { "epoch": 2.42, "learning_rate": 3.9101782930759935e-05, "loss": 0.2705, "step": 4035000 }, { "epoch": 2.42, "learning_rate": 3.9099682965199375e-05, "loss": 0.2716, "step": 4035500 }, { "epoch": 2.42, "learning_rate": 3.90975829996388e-05, "loss": 0.2619, "step": 4036000 }, { "epoch": 2.42, "learning_rate": 3.9095483034078235e-05, "loss": 0.2647, "step": 4036500 }, { "epoch": 2.42, "learning_rate": 3.9093387268448795e-05, "loss": 0.2683, "step": 4037000 }, { "epoch": 2.42, "learning_rate": 3.9091287302888235e-05, "loss": 0.2681, "step": 4037500 }, { "epoch": 2.42, "learning_rate": 3.908918733732767e-05, "loss": 0.265, "step": 4038000 }, { "epoch": 2.42, "learning_rate": 3.90870873717671e-05, "loss": 0.2743, "step": 4038500 }, { "epoch": 2.42, "learning_rate": 3.908499160613766e-05, "loss": 0.2698, "step": 4039000 }, { "epoch": 2.42, "learning_rate": 3.9082891640577096e-05, "loss": 0.2654, "step": 4039500 }, { "epoch": 2.42, "learning_rate": 3.908079167501653e-05, "loss": 0.263, "step": 4040000 }, { "epoch": 2.42, "learning_rate": 3.907869590938708e-05, "loss": 0.2741, "step": 4040500 }, { "epoch": 2.42, "learning_rate": 3.9076595943826523e-05, "loss": 0.2665, "step": 4041000 }, { "epoch": 2.42, "learning_rate": 3.907449597826596e-05, "loss": 0.2641, "step": 4041500 }, { "epoch": 2.42, "learning_rate": 3.907239601270539e-05, "loss": 0.258, "step": 4042000 }, { "epoch": 2.42, "learning_rate": 3.907029604714483e-05, "loss": 0.2633, "step": 4042500 }, { "epoch": 2.42, "learning_rate": 3.9068196081584264e-05, "loss": 0.2685, "step": 4043000 }, { "epoch": 2.42, "learning_rate": 3.906609611602369e-05, "loss": 0.2676, "step": 4043500 }, { "epoch": 2.42, "learning_rate": 3.906399615046313e-05, "loss": 0.2635, "step": 4044000 }, { "epoch": 2.42, "learning_rate": 3.906190038483369e-05, "loss": 0.2698, "step": 4044500 }, { "epoch": 2.43, "learning_rate": 3.9059800419273125e-05, "loss": 0.2658, "step": 4045000 }, { "epoch": 2.43, "learning_rate": 3.905770045371256e-05, "loss": 0.2672, "step": 4045500 }, { "epoch": 2.43, "learning_rate": 3.905560048815199e-05, "loss": 0.2734, "step": 4046000 }, { "epoch": 2.43, "learning_rate": 3.905350892245367e-05, "loss": 0.2673, "step": 4046500 }, { "epoch": 2.43, "learning_rate": 3.9051408956893106e-05, "loss": 0.2759, "step": 4047000 }, { "epoch": 2.43, "learning_rate": 3.904930899133254e-05, "loss": 0.2697, "step": 4047500 }, { "epoch": 2.43, "learning_rate": 3.904720902577198e-05, "loss": 0.2681, "step": 4048000 }, { "epoch": 2.43, "learning_rate": 3.904510906021141e-05, "loss": 0.2613, "step": 4048500 }, { "epoch": 2.43, "learning_rate": 3.9043009094650846e-05, "loss": 0.2716, "step": 4049000 }, { "epoch": 2.43, "learning_rate": 3.9040909129090287e-05, "loss": 0.2698, "step": 4049500 }, { "epoch": 2.43, "learning_rate": 3.903880916352972e-05, "loss": 0.2684, "step": 4050000 }, { "epoch": 2.43, "learning_rate": 3.9036713397900274e-05, "loss": 0.2629, "step": 4050500 }, { "epoch": 2.43, "learning_rate": 3.903461343233971e-05, "loss": 0.2699, "step": 4051000 }, { "epoch": 2.43, "learning_rate": 3.903251346677915e-05, "loss": 0.2627, "step": 4051500 }, { "epoch": 2.43, "learning_rate": 3.903041350121858e-05, "loss": 0.2669, "step": 4052000 }, { "epoch": 2.43, "learning_rate": 3.9028317735589134e-05, "loss": 0.2615, "step": 4052500 }, { "epoch": 2.43, "learning_rate": 3.9026217770028575e-05, "loss": 0.2587, "step": 4053000 }, { "epoch": 2.43, "learning_rate": 3.902411780446801e-05, "loss": 0.2674, "step": 4053500 }, { "epoch": 2.43, "learning_rate": 3.902201783890744e-05, "loss": 0.2722, "step": 4054000 }, { "epoch": 2.43, "learning_rate": 3.9019922073277995e-05, "loss": 0.2661, "step": 4054500 }, { "epoch": 2.43, "learning_rate": 3.9017822107717435e-05, "loss": 0.2654, "step": 4055000 }, { "epoch": 2.43, "learning_rate": 3.901572214215687e-05, "loss": 0.2665, "step": 4055500 }, { "epoch": 2.43, "learning_rate": 3.901362637652742e-05, "loss": 0.2644, "step": 4056000 }, { "epoch": 2.43, "learning_rate": 3.9011526410966856e-05, "loss": 0.2686, "step": 4056500 }, { "epoch": 2.43, "learning_rate": 3.9009426445406296e-05, "loss": 0.2633, "step": 4057000 }, { "epoch": 2.43, "learning_rate": 3.900732647984573e-05, "loss": 0.2692, "step": 4057500 }, { "epoch": 2.43, "learning_rate": 3.900522651428516e-05, "loss": 0.2664, "step": 4058000 }, { "epoch": 2.43, "learning_rate": 3.90031265487246e-05, "loss": 0.263, "step": 4058500 }, { "epoch": 2.43, "learning_rate": 3.9001026583164037e-05, "loss": 0.2677, "step": 4059000 }, { "epoch": 2.43, "learning_rate": 3.899892661760348e-05, "loss": 0.268, "step": 4059500 }, { "epoch": 2.43, "learning_rate": 3.899683085197403e-05, "loss": 0.2687, "step": 4060000 }, { "epoch": 2.43, "learning_rate": 3.8994730886413464e-05, "loss": 0.2696, "step": 4060500 }, { "epoch": 2.43, "learning_rate": 3.89926309208529e-05, "loss": 0.2634, "step": 4061000 }, { "epoch": 2.44, "learning_rate": 3.899053095529234e-05, "loss": 0.2606, "step": 4061500 }, { "epoch": 2.44, "learning_rate": 3.898843518966289e-05, "loss": 0.2727, "step": 4062000 }, { "epoch": 2.44, "learning_rate": 3.8986335224102325e-05, "loss": 0.2625, "step": 4062500 }, { "epoch": 2.44, "learning_rate": 3.898423525854176e-05, "loss": 0.2624, "step": 4063000 }, { "epoch": 2.44, "learning_rate": 3.898213949291231e-05, "loss": 0.27, "step": 4063500 }, { "epoch": 2.44, "learning_rate": 3.898003952735175e-05, "loss": 0.2644, "step": 4064000 }, { "epoch": 2.44, "learning_rate": 3.8977939561791185e-05, "loss": 0.271, "step": 4064500 }, { "epoch": 2.44, "learning_rate": 3.8975839596230626e-05, "loss": 0.2705, "step": 4065000 }, { "epoch": 2.44, "learning_rate": 3.897373963067006e-05, "loss": 0.2616, "step": 4065500 }, { "epoch": 2.44, "learning_rate": 3.897163966510949e-05, "loss": 0.2636, "step": 4066000 }, { "epoch": 2.44, "learning_rate": 3.896953969954893e-05, "loss": 0.266, "step": 4066500 }, { "epoch": 2.44, "learning_rate": 3.896743973398836e-05, "loss": 0.2636, "step": 4067000 }, { "epoch": 2.44, "learning_rate": 3.896534396835892e-05, "loss": 0.2663, "step": 4067500 }, { "epoch": 2.44, "learning_rate": 3.896324400279835e-05, "loss": 0.2644, "step": 4068000 }, { "epoch": 2.44, "learning_rate": 3.8961144037237793e-05, "loss": 0.2665, "step": 4068500 }, { "epoch": 2.44, "learning_rate": 3.895904407167723e-05, "loss": 0.2655, "step": 4069000 }, { "epoch": 2.44, "learning_rate": 3.89569525059789e-05, "loss": 0.2693, "step": 4069500 }, { "epoch": 2.44, "learning_rate": 3.8954852540418334e-05, "loss": 0.2701, "step": 4070000 }, { "epoch": 2.44, "learning_rate": 3.895275257485777e-05, "loss": 0.2716, "step": 4070500 }, { "epoch": 2.44, "learning_rate": 3.895065260929721e-05, "loss": 0.2654, "step": 4071000 }, { "epoch": 2.44, "learning_rate": 3.894855264373664e-05, "loss": 0.271, "step": 4071500 }, { "epoch": 2.44, "learning_rate": 3.894645267817608e-05, "loss": 0.2677, "step": 4072000 }, { "epoch": 2.44, "learning_rate": 3.8944352712615515e-05, "loss": 0.2646, "step": 4072500 }, { "epoch": 2.44, "learning_rate": 3.894225274705495e-05, "loss": 0.2622, "step": 4073000 }, { "epoch": 2.44, "learning_rate": 3.89401569814255e-05, "loss": 0.265, "step": 4073500 }, { "epoch": 2.44, "learning_rate": 3.893805701586494e-05, "loss": 0.2668, "step": 4074000 }, { "epoch": 2.44, "learning_rate": 3.8935957050304376e-05, "loss": 0.2658, "step": 4074500 }, { "epoch": 2.44, "learning_rate": 3.893385708474381e-05, "loss": 0.2657, "step": 4075000 }, { "epoch": 2.44, "learning_rate": 3.893176131911436e-05, "loss": 0.2624, "step": 4075500 }, { "epoch": 2.44, "learning_rate": 3.89296613535538e-05, "loss": 0.2718, "step": 4076000 }, { "epoch": 2.44, "learning_rate": 3.8927561387993236e-05, "loss": 0.269, "step": 4076500 }, { "epoch": 2.44, "learning_rate": 3.892546142243267e-05, "loss": 0.2631, "step": 4077000 }, { "epoch": 2.44, "learning_rate": 3.892336565680323e-05, "loss": 0.2628, "step": 4077500 }, { "epoch": 2.44, "learning_rate": 3.8921265691242664e-05, "loss": 0.2656, "step": 4078000 }, { "epoch": 2.45, "learning_rate": 3.89191657256821e-05, "loss": 0.2666, "step": 4078500 }, { "epoch": 2.45, "learning_rate": 3.891706996005265e-05, "loss": 0.2665, "step": 4079000 }, { "epoch": 2.45, "learning_rate": 3.891496999449209e-05, "loss": 0.2682, "step": 4079500 }, { "epoch": 2.45, "learning_rate": 3.8912870028931524e-05, "loss": 0.2614, "step": 4080000 }, { "epoch": 2.45, "learning_rate": 3.891077006337096e-05, "loss": 0.2662, "step": 4080500 }, { "epoch": 2.45, "learning_rate": 3.89086700978104e-05, "loss": 0.271, "step": 4081000 }, { "epoch": 2.45, "learning_rate": 3.890657013224983e-05, "loss": 0.267, "step": 4081500 }, { "epoch": 2.45, "learning_rate": 3.8904470166689265e-05, "loss": 0.2655, "step": 4082000 }, { "epoch": 2.45, "learning_rate": 3.8902370201128705e-05, "loss": 0.2682, "step": 4082500 }, { "epoch": 2.45, "learning_rate": 3.890027443549926e-05, "loss": 0.2709, "step": 4083000 }, { "epoch": 2.45, "learning_rate": 3.889817446993869e-05, "loss": 0.2605, "step": 4083500 }, { "epoch": 2.45, "learning_rate": 3.8896074504378126e-05, "loss": 0.2669, "step": 4084000 }, { "epoch": 2.45, "learning_rate": 3.8893974538817566e-05, "loss": 0.2575, "step": 4084500 }, { "epoch": 2.45, "learning_rate": 3.8891874573257e-05, "loss": 0.2657, "step": 4085000 }, { "epoch": 2.45, "learning_rate": 3.888977880762755e-05, "loss": 0.265, "step": 4085500 }, { "epoch": 2.45, "learning_rate": 3.888767884206699e-05, "loss": 0.2665, "step": 4086000 }, { "epoch": 2.45, "learning_rate": 3.888557887650643e-05, "loss": 0.2676, "step": 4086500 }, { "epoch": 2.45, "learning_rate": 3.888347891094586e-05, "loss": 0.2654, "step": 4087000 }, { "epoch": 2.45, "learning_rate": 3.8881383145316414e-05, "loss": 0.2652, "step": 4087500 }, { "epoch": 2.45, "learning_rate": 3.8879283179755854e-05, "loss": 0.2647, "step": 4088000 }, { "epoch": 2.45, "learning_rate": 3.887718321419529e-05, "loss": 0.2695, "step": 4088500 }, { "epoch": 2.45, "learning_rate": 3.887508324863472e-05, "loss": 0.2597, "step": 4089000 }, { "epoch": 2.45, "learning_rate": 3.8872987483005274e-05, "loss": 0.2703, "step": 4089500 }, { "epoch": 2.45, "learning_rate": 3.8870887517444715e-05, "loss": 0.2702, "step": 4090000 }, { "epoch": 2.45, "learning_rate": 3.886879175181527e-05, "loss": 0.2703, "step": 4090500 }, { "epoch": 2.45, "learning_rate": 3.88666917862547e-05, "loss": 0.2673, "step": 4091000 }, { "epoch": 2.45, "learning_rate": 3.886459182069414e-05, "loss": 0.2731, "step": 4091500 }, { "epoch": 2.45, "learning_rate": 3.8862491855133575e-05, "loss": 0.2712, "step": 4092000 }, { "epoch": 2.45, "learning_rate": 3.886039188957301e-05, "loss": 0.269, "step": 4092500 }, { "epoch": 2.45, "learning_rate": 3.885829192401245e-05, "loss": 0.2703, "step": 4093000 }, { "epoch": 2.45, "learning_rate": 3.885619195845188e-05, "loss": 0.2668, "step": 4093500 }, { "epoch": 2.45, "learning_rate": 3.8854091992891316e-05, "loss": 0.2654, "step": 4094000 }, { "epoch": 2.45, "learning_rate": 3.885199622726187e-05, "loss": 0.2646, "step": 4094500 }, { "epoch": 2.46, "learning_rate": 3.884989626170131e-05, "loss": 0.2646, "step": 4095000 }, { "epoch": 2.46, "learning_rate": 3.884779629614074e-05, "loss": 0.2604, "step": 4095500 }, { "epoch": 2.46, "learning_rate": 3.884569633058018e-05, "loss": 0.2645, "step": 4096000 }, { "epoch": 2.46, "learning_rate": 3.884360056495073e-05, "loss": 0.2704, "step": 4096500 }, { "epoch": 2.46, "learning_rate": 3.884150059939017e-05, "loss": 0.2684, "step": 4097000 }, { "epoch": 2.46, "learning_rate": 3.8839400633829604e-05, "loss": 0.2654, "step": 4097500 }, { "epoch": 2.46, "learning_rate": 3.883730066826904e-05, "loss": 0.2601, "step": 4098000 }, { "epoch": 2.46, "learning_rate": 3.88352049026396e-05, "loss": 0.2651, "step": 4098500 }, { "epoch": 2.46, "learning_rate": 3.883310493707903e-05, "loss": 0.259, "step": 4099000 }, { "epoch": 2.46, "learning_rate": 3.8831004971518465e-05, "loss": 0.2675, "step": 4099500 }, { "epoch": 2.46, "learning_rate": 3.882890920588902e-05, "loss": 0.272, "step": 4100000 }, { "epoch": 2.46, "eval_loss": 0.24459972977638245, "eval_runtime": 1452.9658, "eval_samples_per_second": 362.514, "eval_steps_per_second": 60.419, "step": 4100000 }, { "epoch": 2.46, "learning_rate": 3.882680924032846e-05, "loss": 0.2622, "step": 4100500 }, { "epoch": 2.46, "learning_rate": 3.882470927476789e-05, "loss": 0.2672, "step": 4101000 }, { "epoch": 2.46, "learning_rate": 3.8822609309207325e-05, "loss": 0.2624, "step": 4101500 }, { "epoch": 2.46, "learning_rate": 3.8820509343646766e-05, "loss": 0.267, "step": 4102000 }, { "epoch": 2.46, "learning_rate": 3.88184093780862e-05, "loss": 0.2712, "step": 4102500 }, { "epoch": 2.46, "learning_rate": 3.881630941252563e-05, "loss": 0.2689, "step": 4103000 }, { "epoch": 2.46, "learning_rate": 3.881420944696507e-05, "loss": 0.2656, "step": 4103500 }, { "epoch": 2.46, "learning_rate": 3.8812113681335626e-05, "loss": 0.2664, "step": 4104000 }, { "epoch": 2.46, "learning_rate": 3.881001371577506e-05, "loss": 0.2639, "step": 4104500 }, { "epoch": 2.46, "learning_rate": 3.880791375021449e-05, "loss": 0.2663, "step": 4105000 }, { "epoch": 2.46, "learning_rate": 3.8805813784653934e-05, "loss": 0.269, "step": 4105500 }, { "epoch": 2.46, "learning_rate": 3.880371381909337e-05, "loss": 0.2616, "step": 4106000 }, { "epoch": 2.46, "learning_rate": 3.880161805346392e-05, "loss": 0.2657, "step": 4106500 }, { "epoch": 2.46, "learning_rate": 3.879951808790336e-05, "loss": 0.2706, "step": 4107000 }, { "epoch": 2.46, "learning_rate": 3.8797418122342794e-05, "loss": 0.261, "step": 4107500 }, { "epoch": 2.46, "learning_rate": 3.879531815678223e-05, "loss": 0.2696, "step": 4108000 }, { "epoch": 2.46, "learning_rate": 3.879322239115278e-05, "loss": 0.2654, "step": 4108500 }, { "epoch": 2.46, "learning_rate": 3.879112242559222e-05, "loss": 0.2617, "step": 4109000 }, { "epoch": 2.46, "learning_rate": 3.8789022460031655e-05, "loss": 0.2627, "step": 4109500 }, { "epoch": 2.46, "learning_rate": 3.878692249447109e-05, "loss": 0.2688, "step": 4110000 }, { "epoch": 2.46, "learning_rate": 3.878482252891053e-05, "loss": 0.2681, "step": 4110500 }, { "epoch": 2.46, "learning_rate": 3.878272676328108e-05, "loss": 0.268, "step": 4111000 }, { "epoch": 2.47, "learning_rate": 3.8780626797720516e-05, "loss": 0.2645, "step": 4111500 }, { "epoch": 2.47, "learning_rate": 3.877852683215995e-05, "loss": 0.262, "step": 4112000 }, { "epoch": 2.47, "learning_rate": 3.877643106653051e-05, "loss": 0.2734, "step": 4112500 }, { "epoch": 2.47, "learning_rate": 3.877433110096994e-05, "loss": 0.2722, "step": 4113000 }, { "epoch": 2.47, "learning_rate": 3.8772231135409376e-05, "loss": 0.2653, "step": 4113500 }, { "epoch": 2.47, "learning_rate": 3.877013116984882e-05, "loss": 0.26, "step": 4114000 }, { "epoch": 2.47, "learning_rate": 3.876803120428825e-05, "loss": 0.2696, "step": 4114500 }, { "epoch": 2.47, "learning_rate": 3.8765935438658804e-05, "loss": 0.268, "step": 4115000 }, { "epoch": 2.47, "learning_rate": 3.876383547309824e-05, "loss": 0.2654, "step": 4115500 }, { "epoch": 2.47, "learning_rate": 3.876173550753768e-05, "loss": 0.2676, "step": 4116000 }, { "epoch": 2.47, "learning_rate": 3.875963554197711e-05, "loss": 0.2589, "step": 4116500 }, { "epoch": 2.47, "learning_rate": 3.8757539776347664e-05, "loss": 0.2726, "step": 4117000 }, { "epoch": 2.47, "learning_rate": 3.87554398107871e-05, "loss": 0.2694, "step": 4117500 }, { "epoch": 2.47, "learning_rate": 3.875333984522654e-05, "loss": 0.2632, "step": 4118000 }, { "epoch": 2.47, "learning_rate": 3.875123987966597e-05, "loss": 0.2684, "step": 4118500 }, { "epoch": 2.47, "learning_rate": 3.8749139914105405e-05, "loss": 0.2679, "step": 4119000 }, { "epoch": 2.47, "learning_rate": 3.8747039948544845e-05, "loss": 0.2625, "step": 4119500 }, { "epoch": 2.47, "learning_rate": 3.87449441829154e-05, "loss": 0.2707, "step": 4120000 }, { "epoch": 2.47, "learning_rate": 3.874284421735483e-05, "loss": 0.2627, "step": 4120500 }, { "epoch": 2.47, "learning_rate": 3.874074425179427e-05, "loss": 0.2598, "step": 4121000 }, { "epoch": 2.47, "learning_rate": 3.8738644286233706e-05, "loss": 0.2668, "step": 4121500 }, { "epoch": 2.47, "learning_rate": 3.873654432067314e-05, "loss": 0.2645, "step": 4122000 }, { "epoch": 2.47, "learning_rate": 3.873444435511258e-05, "loss": 0.2675, "step": 4122500 }, { "epoch": 2.47, "learning_rate": 3.873234438955201e-05, "loss": 0.2657, "step": 4123000 }, { "epoch": 2.47, "learning_rate": 3.873024442399145e-05, "loss": 0.2678, "step": 4123500 }, { "epoch": 2.47, "learning_rate": 3.8728148658362e-05, "loss": 0.2674, "step": 4124000 }, { "epoch": 2.47, "learning_rate": 3.872604869280144e-05, "loss": 0.2755, "step": 4124500 }, { "epoch": 2.47, "learning_rate": 3.8723948727240874e-05, "loss": 0.263, "step": 4125000 }, { "epoch": 2.47, "learning_rate": 3.872184876168031e-05, "loss": 0.2657, "step": 4125500 }, { "epoch": 2.47, "learning_rate": 3.871975299605086e-05, "loss": 0.2595, "step": 4126000 }, { "epoch": 2.47, "learning_rate": 3.871765723042142e-05, "loss": 0.27, "step": 4126500 }, { "epoch": 2.47, "learning_rate": 3.8715557264860855e-05, "loss": 0.2665, "step": 4127000 }, { "epoch": 2.47, "learning_rate": 3.871345729930029e-05, "loss": 0.2686, "step": 4127500 }, { "epoch": 2.47, "learning_rate": 3.871135733373973e-05, "loss": 0.2714, "step": 4128000 }, { "epoch": 2.48, "learning_rate": 3.870925736817916e-05, "loss": 0.2677, "step": 4128500 }, { "epoch": 2.48, "learning_rate": 3.8707157402618595e-05, "loss": 0.2679, "step": 4129000 }, { "epoch": 2.48, "learning_rate": 3.8705057437058036e-05, "loss": 0.2683, "step": 4129500 }, { "epoch": 2.48, "learning_rate": 3.870295747149747e-05, "loss": 0.2684, "step": 4130000 }, { "epoch": 2.48, "learning_rate": 3.870086170586802e-05, "loss": 0.2662, "step": 4130500 }, { "epoch": 2.48, "learning_rate": 3.8698761740307456e-05, "loss": 0.265, "step": 4131000 }, { "epoch": 2.48, "learning_rate": 3.8696661774746896e-05, "loss": 0.261, "step": 4131500 }, { "epoch": 2.48, "learning_rate": 3.869456180918633e-05, "loss": 0.2751, "step": 4132000 }, { "epoch": 2.48, "learning_rate": 3.869246604355688e-05, "loss": 0.2648, "step": 4132500 }, { "epoch": 2.48, "learning_rate": 3.869036607799632e-05, "loss": 0.2584, "step": 4133000 }, { "epoch": 2.48, "learning_rate": 3.868827031236688e-05, "loss": 0.2626, "step": 4133500 }, { "epoch": 2.48, "learning_rate": 3.868617034680631e-05, "loss": 0.2598, "step": 4134000 }, { "epoch": 2.48, "learning_rate": 3.8684070381245744e-05, "loss": 0.2653, "step": 4134500 }, { "epoch": 2.48, "learning_rate": 3.8681970415685184e-05, "loss": 0.2668, "step": 4135000 }, { "epoch": 2.48, "learning_rate": 3.867987045012462e-05, "loss": 0.2701, "step": 4135500 }, { "epoch": 2.48, "learning_rate": 3.867777048456405e-05, "loss": 0.2653, "step": 4136000 }, { "epoch": 2.48, "learning_rate": 3.867567051900349e-05, "loss": 0.271, "step": 4136500 }, { "epoch": 2.48, "learning_rate": 3.8673570553442925e-05, "loss": 0.2696, "step": 4137000 }, { "epoch": 2.48, "learning_rate": 3.867147478781348e-05, "loss": 0.2658, "step": 4137500 }, { "epoch": 2.48, "learning_rate": 3.866937482225291e-05, "loss": 0.2652, "step": 4138000 }, { "epoch": 2.48, "learning_rate": 3.866727485669235e-05, "loss": 0.2708, "step": 4138500 }, { "epoch": 2.48, "learning_rate": 3.8665174891131786e-05, "loss": 0.2582, "step": 4139000 }, { "epoch": 2.48, "learning_rate": 3.866307492557122e-05, "loss": 0.2678, "step": 4139500 }, { "epoch": 2.48, "learning_rate": 3.866097915994177e-05, "loss": 0.2705, "step": 4140000 }, { "epoch": 2.48, "learning_rate": 3.865887919438121e-05, "loss": 0.2676, "step": 4140500 }, { "epoch": 2.48, "learning_rate": 3.8656779228820646e-05, "loss": 0.2619, "step": 4141000 }, { "epoch": 2.48, "learning_rate": 3.865467926326008e-05, "loss": 0.2682, "step": 4141500 }, { "epoch": 2.48, "learning_rate": 3.865258349763064e-05, "loss": 0.2749, "step": 4142000 }, { "epoch": 2.48, "learning_rate": 3.8650483532070074e-05, "loss": 0.2664, "step": 4142500 }, { "epoch": 2.48, "learning_rate": 3.864838356650951e-05, "loss": 0.271, "step": 4143000 }, { "epoch": 2.48, "learning_rate": 3.864628360094895e-05, "loss": 0.2676, "step": 4143500 }, { "epoch": 2.48, "learning_rate": 3.86441878353195e-05, "loss": 0.27, "step": 4144000 }, { "epoch": 2.48, "learning_rate": 3.8642087869758934e-05, "loss": 0.2664, "step": 4144500 }, { "epoch": 2.49, "learning_rate": 3.863998790419837e-05, "loss": 0.2639, "step": 4145000 }, { "epoch": 2.49, "learning_rate": 3.863788793863781e-05, "loss": 0.2642, "step": 4145500 }, { "epoch": 2.49, "learning_rate": 3.863579217300836e-05, "loss": 0.2659, "step": 4146000 }, { "epoch": 2.49, "learning_rate": 3.8633692207447795e-05, "loss": 0.2645, "step": 4146500 }, { "epoch": 2.49, "learning_rate": 3.863159224188723e-05, "loss": 0.2655, "step": 4147000 }, { "epoch": 2.49, "learning_rate": 3.862949227632667e-05, "loss": 0.27, "step": 4147500 }, { "epoch": 2.49, "learning_rate": 3.86273923107661e-05, "loss": 0.2658, "step": 4148000 }, { "epoch": 2.49, "learning_rate": 3.8625296545136656e-05, "loss": 0.2648, "step": 4148500 }, { "epoch": 2.49, "learning_rate": 3.8623196579576096e-05, "loss": 0.2595, "step": 4149000 }, { "epoch": 2.49, "learning_rate": 3.862109661401553e-05, "loss": 0.2616, "step": 4149500 }, { "epoch": 2.49, "learning_rate": 3.861899664845496e-05, "loss": 0.2661, "step": 4150000 }, { "epoch": 2.49, "learning_rate": 3.8616900882825517e-05, "loss": 0.2719, "step": 4150500 }, { "epoch": 2.49, "learning_rate": 3.861480091726496e-05, "loss": 0.264, "step": 4151000 }, { "epoch": 2.49, "learning_rate": 3.861270095170439e-05, "loss": 0.2663, "step": 4151500 }, { "epoch": 2.49, "learning_rate": 3.8610600986143824e-05, "loss": 0.2631, "step": 4152000 }, { "epoch": 2.49, "learning_rate": 3.860850522051438e-05, "loss": 0.2716, "step": 4152500 }, { "epoch": 2.49, "learning_rate": 3.860640525495382e-05, "loss": 0.2661, "step": 4153000 }, { "epoch": 2.49, "learning_rate": 3.860430528939325e-05, "loss": 0.2666, "step": 4153500 }, { "epoch": 2.49, "learning_rate": 3.860220952376381e-05, "loss": 0.2638, "step": 4154000 }, { "epoch": 2.49, "learning_rate": 3.8600109558203245e-05, "loss": 0.2676, "step": 4154500 }, { "epoch": 2.49, "learning_rate": 3.859800959264268e-05, "loss": 0.26, "step": 4155000 }, { "epoch": 2.49, "learning_rate": 3.859590962708211e-05, "loss": 0.2672, "step": 4155500 }, { "epoch": 2.49, "learning_rate": 3.859380966152155e-05, "loss": 0.2637, "step": 4156000 }, { "epoch": 2.49, "learning_rate": 3.8591709695960985e-05, "loss": 0.2731, "step": 4156500 }, { "epoch": 2.49, "learning_rate": 3.858960973040042e-05, "loss": 0.2657, "step": 4157000 }, { "epoch": 2.49, "learning_rate": 3.858750976483986e-05, "loss": 0.2678, "step": 4157500 }, { "epoch": 2.49, "learning_rate": 3.858541399921041e-05, "loss": 0.2657, "step": 4158000 }, { "epoch": 2.49, "learning_rate": 3.8583314033649846e-05, "loss": 0.263, "step": 4158500 }, { "epoch": 2.49, "learning_rate": 3.858121406808928e-05, "loss": 0.269, "step": 4159000 }, { "epoch": 2.49, "learning_rate": 3.857911410252872e-05, "loss": 0.2695, "step": 4159500 }, { "epoch": 2.49, "learning_rate": 3.857701413696815e-05, "loss": 0.2647, "step": 4160000 }, { "epoch": 2.49, "learning_rate": 3.857491417140759e-05, "loss": 0.264, "step": 4160500 }, { "epoch": 2.49, "learning_rate": 3.857281420584703e-05, "loss": 0.2645, "step": 4161000 }, { "epoch": 2.49, "learning_rate": 3.857071424028646e-05, "loss": 0.2655, "step": 4161500 }, { "epoch": 2.5, "learning_rate": 3.8568618474657014e-05, "loss": 0.2646, "step": 4162000 }, { "epoch": 2.5, "learning_rate": 3.8566518509096454e-05, "loss": 0.2701, "step": 4162500 }, { "epoch": 2.5, "learning_rate": 3.856441854353589e-05, "loss": 0.2636, "step": 4163000 }, { "epoch": 2.5, "learning_rate": 3.856231857797532e-05, "loss": 0.2674, "step": 4163500 }, { "epoch": 2.5, "learning_rate": 3.856021861241476e-05, "loss": 0.2647, "step": 4164000 }, { "epoch": 2.5, "learning_rate": 3.8558122846785315e-05, "loss": 0.2681, "step": 4164500 }, { "epoch": 2.5, "learning_rate": 3.855602288122475e-05, "loss": 0.267, "step": 4165000 }, { "epoch": 2.5, "learning_rate": 3.855392291566418e-05, "loss": 0.2679, "step": 4165500 }, { "epoch": 2.5, "learning_rate": 3.8551827150034735e-05, "loss": 0.2737, "step": 4166000 }, { "epoch": 2.5, "learning_rate": 3.8549727184474176e-05, "loss": 0.2686, "step": 4166500 }, { "epoch": 2.5, "learning_rate": 3.854762721891361e-05, "loss": 0.2657, "step": 4167000 }, { "epoch": 2.5, "learning_rate": 3.854552725335304e-05, "loss": 0.2668, "step": 4167500 }, { "epoch": 2.5, "learning_rate": 3.854342728779248e-05, "loss": 0.2664, "step": 4168000 }, { "epoch": 2.5, "learning_rate": 3.8541327322231916e-05, "loss": 0.2688, "step": 4168500 }, { "epoch": 2.5, "learning_rate": 3.853922735667135e-05, "loss": 0.2667, "step": 4169000 }, { "epoch": 2.5, "learning_rate": 3.853712739111078e-05, "loss": 0.2676, "step": 4169500 }, { "epoch": 2.5, "learning_rate": 3.8535031625481344e-05, "loss": 0.2651, "step": 4170000 }, { "epoch": 2.5, "learning_rate": 3.853293165992078e-05, "loss": 0.2644, "step": 4170500 }, { "epoch": 2.5, "learning_rate": 3.853083589429133e-05, "loss": 0.269, "step": 4171000 }, { "epoch": 2.5, "learning_rate": 3.852873592873077e-05, "loss": 0.2699, "step": 4171500 }, { "epoch": 2.5, "learning_rate": 3.8526635963170204e-05, "loss": 0.2671, "step": 4172000 }, { "epoch": 2.5, "learning_rate": 3.852453599760964e-05, "loss": 0.2576, "step": 4172500 }, { "epoch": 2.5, "learning_rate": 3.852243603204908e-05, "loss": 0.2661, "step": 4173000 }, { "epoch": 2.5, "learning_rate": 3.852033606648851e-05, "loss": 0.2714, "step": 4173500 }, { "epoch": 2.5, "learning_rate": 3.8518236100927945e-05, "loss": 0.2649, "step": 4174000 }, { "epoch": 2.5, "learning_rate": 3.851613613536738e-05, "loss": 0.2616, "step": 4174500 }, { "epoch": 2.5, "learning_rate": 3.851404036973794e-05, "loss": 0.2632, "step": 4175000 }, { "epoch": 2.5, "learning_rate": 3.851194040417737e-05, "loss": 0.2654, "step": 4175500 }, { "epoch": 2.5, "learning_rate": 3.8509840438616806e-05, "loss": 0.2601, "step": 4176000 }, { "epoch": 2.5, "learning_rate": 3.8507740473056246e-05, "loss": 0.2664, "step": 4176500 }, { "epoch": 2.5, "learning_rate": 3.85056447074268e-05, "loss": 0.261, "step": 4177000 }, { "epoch": 2.5, "learning_rate": 3.850354474186623e-05, "loss": 0.2676, "step": 4177500 }, { "epoch": 2.5, "learning_rate": 3.850144477630567e-05, "loss": 0.2719, "step": 4178000 }, { "epoch": 2.51, "learning_rate": 3.849934901067623e-05, "loss": 0.2628, "step": 4178500 }, { "epoch": 2.51, "learning_rate": 3.849724904511566e-05, "loss": 0.2667, "step": 4179000 }, { "epoch": 2.51, "learning_rate": 3.8495149079555094e-05, "loss": 0.2679, "step": 4179500 }, { "epoch": 2.51, "learning_rate": 3.8493049113994534e-05, "loss": 0.2646, "step": 4180000 }, { "epoch": 2.51, "learning_rate": 3.849094914843397e-05, "loss": 0.2642, "step": 4180500 }, { "epoch": 2.51, "learning_rate": 3.84888491828734e-05, "loss": 0.266, "step": 4181000 }, { "epoch": 2.51, "learning_rate": 3.8486749217312834e-05, "loss": 0.2614, "step": 4181500 }, { "epoch": 2.51, "learning_rate": 3.848464925175227e-05, "loss": 0.2607, "step": 4182000 }, { "epoch": 2.51, "learning_rate": 3.848255348612283e-05, "loss": 0.2587, "step": 4182500 }, { "epoch": 2.51, "learning_rate": 3.848045772049338e-05, "loss": 0.2673, "step": 4183000 }, { "epoch": 2.51, "learning_rate": 3.8478361954863935e-05, "loss": 0.2687, "step": 4183500 }, { "epoch": 2.51, "learning_rate": 3.8476261989303375e-05, "loss": 0.2583, "step": 4184000 }, { "epoch": 2.51, "learning_rate": 3.847416202374281e-05, "loss": 0.2682, "step": 4184500 }, { "epoch": 2.51, "learning_rate": 3.847206205818224e-05, "loss": 0.2692, "step": 4185000 }, { "epoch": 2.51, "learning_rate": 3.846996209262168e-05, "loss": 0.2636, "step": 4185500 }, { "epoch": 2.51, "learning_rate": 3.8467862127061116e-05, "loss": 0.2669, "step": 4186000 }, { "epoch": 2.51, "learning_rate": 3.846576216150055e-05, "loss": 0.2722, "step": 4186500 }, { "epoch": 2.51, "learning_rate": 3.846366219593999e-05, "loss": 0.2684, "step": 4187000 }, { "epoch": 2.51, "learning_rate": 3.846156223037942e-05, "loss": 0.2636, "step": 4187500 }, { "epoch": 2.51, "learning_rate": 3.845946226481886e-05, "loss": 0.2677, "step": 4188000 }, { "epoch": 2.51, "learning_rate": 3.845736229925829e-05, "loss": 0.273, "step": 4188500 }, { "epoch": 2.51, "learning_rate": 3.8455262333697724e-05, "loss": 0.269, "step": 4189000 }, { "epoch": 2.51, "learning_rate": 3.845316236813716e-05, "loss": 0.2714, "step": 4189500 }, { "epoch": 2.51, "learning_rate": 3.845106660250772e-05, "loss": 0.2649, "step": 4190000 }, { "epoch": 2.51, "learning_rate": 3.844897083687828e-05, "loss": 0.2642, "step": 4190500 }, { "epoch": 2.51, "learning_rate": 3.844687087131771e-05, "loss": 0.2666, "step": 4191000 }, { "epoch": 2.51, "learning_rate": 3.8444770905757145e-05, "loss": 0.2662, "step": 4191500 }, { "epoch": 2.51, "learning_rate": 3.8442670940196585e-05, "loss": 0.26, "step": 4192000 }, { "epoch": 2.51, "learning_rate": 3.844057097463602e-05, "loss": 0.2617, "step": 4192500 }, { "epoch": 2.51, "learning_rate": 3.843847100907545e-05, "loss": 0.265, "step": 4193000 }, { "epoch": 2.51, "learning_rate": 3.8436371043514885e-05, "loss": 0.2703, "step": 4193500 }, { "epoch": 2.51, "learning_rate": 3.843427107795432e-05, "loss": 0.2665, "step": 4194000 }, { "epoch": 2.51, "learning_rate": 3.843217531232488e-05, "loss": 0.2646, "step": 4194500 }, { "epoch": 2.52, "learning_rate": 3.843007954669543e-05, "loss": 0.2671, "step": 4195000 }, { "epoch": 2.52, "learning_rate": 3.8427979581134866e-05, "loss": 0.2607, "step": 4195500 }, { "epoch": 2.52, "learning_rate": 3.8425879615574306e-05, "loss": 0.2669, "step": 4196000 }, { "epoch": 2.52, "learning_rate": 3.842377965001374e-05, "loss": 0.2582, "step": 4196500 }, { "epoch": 2.52, "learning_rate": 3.842167968445317e-05, "loss": 0.2643, "step": 4197000 }, { "epoch": 2.52, "learning_rate": 3.8419579718892614e-05, "loss": 0.2654, "step": 4197500 }, { "epoch": 2.52, "learning_rate": 3.841747975333205e-05, "loss": 0.2627, "step": 4198000 }, { "epoch": 2.52, "learning_rate": 3.841537978777148e-05, "loss": 0.2676, "step": 4198500 }, { "epoch": 2.52, "learning_rate": 3.841328402214204e-05, "loss": 0.2575, "step": 4199000 }, { "epoch": 2.52, "learning_rate": 3.8411188256512594e-05, "loss": 0.269, "step": 4199500 }, { "epoch": 2.52, "learning_rate": 3.840908829095203e-05, "loss": 0.2604, "step": 4200000 }, { "epoch": 2.52, "eval_loss": 0.2439488023519516, "eval_runtime": 1437.9082, "eval_samples_per_second": 366.31, "eval_steps_per_second": 61.052, "step": 4200000 }, { "epoch": 2.52, "learning_rate": 3.840698832539146e-05, "loss": 0.2626, "step": 4200500 }, { "epoch": 2.52, "learning_rate": 3.84048883598309e-05, "loss": 0.2645, "step": 4201000 }, { "epoch": 2.52, "learning_rate": 3.8402788394270335e-05, "loss": 0.2661, "step": 4201500 }, { "epoch": 2.52, "learning_rate": 3.840068842870977e-05, "loss": 0.2626, "step": 4202000 }, { "epoch": 2.52, "learning_rate": 3.839859266308032e-05, "loss": 0.2616, "step": 4202500 }, { "epoch": 2.52, "learning_rate": 3.839649269751976e-05, "loss": 0.2621, "step": 4203000 }, { "epoch": 2.52, "learning_rate": 3.8394392731959196e-05, "loss": 0.2664, "step": 4203500 }, { "epoch": 2.52, "learning_rate": 3.839229276639863e-05, "loss": 0.2614, "step": 4204000 }, { "epoch": 2.52, "learning_rate": 3.839019280083807e-05, "loss": 0.2594, "step": 4204500 }, { "epoch": 2.52, "learning_rate": 3.83880928352775e-05, "loss": 0.2644, "step": 4205000 }, { "epoch": 2.52, "learning_rate": 3.8385992869716936e-05, "loss": 0.2636, "step": 4205500 }, { "epoch": 2.52, "learning_rate": 3.838389290415637e-05, "loss": 0.265, "step": 4206000 }, { "epoch": 2.52, "learning_rate": 3.838179713852693e-05, "loss": 0.2638, "step": 4206500 }, { "epoch": 2.52, "learning_rate": 3.8379697172966364e-05, "loss": 0.2607, "step": 4207000 }, { "epoch": 2.52, "learning_rate": 3.837760140733692e-05, "loss": 0.2676, "step": 4207500 }, { "epoch": 2.52, "learning_rate": 3.837550144177636e-05, "loss": 0.2729, "step": 4208000 }, { "epoch": 2.52, "learning_rate": 3.837340147621579e-05, "loss": 0.2703, "step": 4208500 }, { "epoch": 2.52, "learning_rate": 3.8371301510655224e-05, "loss": 0.2631, "step": 4209000 }, { "epoch": 2.52, "learning_rate": 3.8369201545094665e-05, "loss": 0.2634, "step": 4209500 }, { "epoch": 2.52, "learning_rate": 3.836710157953409e-05, "loss": 0.2643, "step": 4210000 }, { "epoch": 2.52, "learning_rate": 3.8365001613973525e-05, "loss": 0.2663, "step": 4210500 }, { "epoch": 2.52, "learning_rate": 3.8362905848344085e-05, "loss": 0.2671, "step": 4211000 }, { "epoch": 2.52, "learning_rate": 3.8360805882783525e-05, "loss": 0.2603, "step": 4211500 }, { "epoch": 2.53, "learning_rate": 3.835870591722296e-05, "loss": 0.2641, "step": 4212000 }, { "epoch": 2.53, "learning_rate": 3.835660595166239e-05, "loss": 0.2679, "step": 4212500 }, { "epoch": 2.53, "learning_rate": 3.8354505986101826e-05, "loss": 0.2622, "step": 4213000 }, { "epoch": 2.53, "learning_rate": 3.835240602054126e-05, "loss": 0.2612, "step": 4213500 }, { "epoch": 2.53, "learning_rate": 3.83503060549807e-05, "loss": 0.2668, "step": 4214000 }, { "epoch": 2.53, "learning_rate": 3.834820608942013e-05, "loss": 0.2638, "step": 4214500 }, { "epoch": 2.53, "learning_rate": 3.8346110323790686e-05, "loss": 0.2637, "step": 4215000 }, { "epoch": 2.53, "learning_rate": 3.834401035823012e-05, "loss": 0.2663, "step": 4215500 }, { "epoch": 2.53, "learning_rate": 3.834191459260068e-05, "loss": 0.2698, "step": 4216000 }, { "epoch": 2.53, "learning_rate": 3.833981462704012e-05, "loss": 0.2665, "step": 4216500 }, { "epoch": 2.53, "learning_rate": 3.8337714661479554e-05, "loss": 0.2667, "step": 4217000 }, { "epoch": 2.53, "learning_rate": 3.833561469591898e-05, "loss": 0.2618, "step": 4217500 }, { "epoch": 2.53, "learning_rate": 3.833351473035842e-05, "loss": 0.2652, "step": 4218000 }, { "epoch": 2.53, "learning_rate": 3.8331414764797854e-05, "loss": 0.2637, "step": 4218500 }, { "epoch": 2.53, "learning_rate": 3.832931479923729e-05, "loss": 0.2667, "step": 4219000 }, { "epoch": 2.53, "learning_rate": 3.832721483367673e-05, "loss": 0.2707, "step": 4219500 }, { "epoch": 2.53, "learning_rate": 3.832511906804728e-05, "loss": 0.26, "step": 4220000 }, { "epoch": 2.53, "learning_rate": 3.8323019102486715e-05, "loss": 0.2616, "step": 4220500 }, { "epoch": 2.53, "learning_rate": 3.8320919136926155e-05, "loss": 0.26, "step": 4221000 }, { "epoch": 2.53, "learning_rate": 3.831881917136559e-05, "loss": 0.2685, "step": 4221500 }, { "epoch": 2.53, "learning_rate": 3.831672340573614e-05, "loss": 0.2614, "step": 4222000 }, { "epoch": 2.53, "learning_rate": 3.83146276401067e-05, "loss": 0.2612, "step": 4222500 }, { "epoch": 2.53, "learning_rate": 3.8312527674546136e-05, "loss": 0.2681, "step": 4223000 }, { "epoch": 2.53, "learning_rate": 3.8310427708985576e-05, "loss": 0.2596, "step": 4223500 }, { "epoch": 2.53, "learning_rate": 3.830832774342501e-05, "loss": 0.2615, "step": 4224000 }, { "epoch": 2.53, "learning_rate": 3.8306227777864436e-05, "loss": 0.2664, "step": 4224500 }, { "epoch": 2.53, "learning_rate": 3.830412781230388e-05, "loss": 0.2654, "step": 4225000 }, { "epoch": 2.53, "learning_rate": 3.830202784674331e-05, "loss": 0.2638, "step": 4225500 }, { "epoch": 2.53, "learning_rate": 3.8299927881182744e-05, "loss": 0.2628, "step": 4226000 }, { "epoch": 2.53, "learning_rate": 3.8297827915622184e-05, "loss": 0.2684, "step": 4226500 }, { "epoch": 2.53, "learning_rate": 3.8295736349923864e-05, "loss": 0.2661, "step": 4227000 }, { "epoch": 2.53, "learning_rate": 3.82936363843633e-05, "loss": 0.2563, "step": 4227500 }, { "epoch": 2.53, "learning_rate": 3.829153641880273e-05, "loss": 0.2669, "step": 4228000 }, { "epoch": 2.54, "learning_rate": 3.828943645324217e-05, "loss": 0.2618, "step": 4228500 }, { "epoch": 2.54, "learning_rate": 3.82873364876816e-05, "loss": 0.2708, "step": 4229000 }, { "epoch": 2.54, "learning_rate": 3.828523652212103e-05, "loss": 0.2607, "step": 4229500 }, { "epoch": 2.54, "learning_rate": 3.828313655656047e-05, "loss": 0.2639, "step": 4230000 }, { "epoch": 2.54, "learning_rate": 3.828104079093103e-05, "loss": 0.2637, "step": 4230500 }, { "epoch": 2.54, "learning_rate": 3.8278940825370466e-05, "loss": 0.2659, "step": 4231000 }, { "epoch": 2.54, "learning_rate": 3.827684085980989e-05, "loss": 0.2584, "step": 4231500 }, { "epoch": 2.54, "learning_rate": 3.827474089424933e-05, "loss": 0.2646, "step": 4232000 }, { "epoch": 2.54, "learning_rate": 3.8272640928688766e-05, "loss": 0.2628, "step": 4232500 }, { "epoch": 2.54, "learning_rate": 3.82705409631282e-05, "loss": 0.2654, "step": 4233000 }, { "epoch": 2.54, "learning_rate": 3.826844099756764e-05, "loss": 0.2668, "step": 4233500 }, { "epoch": 2.54, "learning_rate": 3.826634103200707e-05, "loss": 0.2641, "step": 4234000 }, { "epoch": 2.54, "learning_rate": 3.826424526637763e-05, "loss": 0.2667, "step": 4234500 }, { "epoch": 2.54, "learning_rate": 3.826214530081707e-05, "loss": 0.2662, "step": 4235000 }, { "epoch": 2.54, "learning_rate": 3.82600453352565e-05, "loss": 0.265, "step": 4235500 }, { "epoch": 2.54, "learning_rate": 3.8257945369695934e-05, "loss": 0.2632, "step": 4236000 }, { "epoch": 2.54, "learning_rate": 3.825584960406649e-05, "loss": 0.2675, "step": 4236500 }, { "epoch": 2.54, "learning_rate": 3.825374963850593e-05, "loss": 0.2633, "step": 4237000 }, { "epoch": 2.54, "learning_rate": 3.825164967294536e-05, "loss": 0.2623, "step": 4237500 }, { "epoch": 2.54, "learning_rate": 3.8249549707384795e-05, "loss": 0.2674, "step": 4238000 }, { "epoch": 2.54, "learning_rate": 3.8247453941755355e-05, "loss": 0.2654, "step": 4238500 }, { "epoch": 2.54, "learning_rate": 3.824535817612591e-05, "loss": 0.2627, "step": 4239000 }, { "epoch": 2.54, "learning_rate": 3.824325821056535e-05, "loss": 0.2669, "step": 4239500 }, { "epoch": 2.54, "learning_rate": 3.824115824500478e-05, "loss": 0.2665, "step": 4240000 }, { "epoch": 2.54, "learning_rate": 3.823905827944422e-05, "loss": 0.2684, "step": 4240500 }, { "epoch": 2.54, "learning_rate": 3.823695831388365e-05, "loss": 0.2662, "step": 4241000 }, { "epoch": 2.54, "learning_rate": 3.823485834832308e-05, "loss": 0.265, "step": 4241500 }, { "epoch": 2.54, "learning_rate": 3.823275838276252e-05, "loss": 0.2616, "step": 4242000 }, { "epoch": 2.54, "learning_rate": 3.8230658417201956e-05, "loss": 0.2623, "step": 4242500 }, { "epoch": 2.54, "learning_rate": 3.822855845164139e-05, "loss": 0.2678, "step": 4243000 }, { "epoch": 2.54, "learning_rate": 3.822646688594307e-05, "loss": 0.2608, "step": 4243500 }, { "epoch": 2.54, "learning_rate": 3.8224366920382504e-05, "loss": 0.2641, "step": 4244000 }, { "epoch": 2.54, "learning_rate": 3.8222266954821944e-05, "loss": 0.2692, "step": 4244500 }, { "epoch": 2.55, "learning_rate": 3.822016698926138e-05, "loss": 0.2679, "step": 4245000 }, { "epoch": 2.55, "learning_rate": 3.821806702370081e-05, "loss": 0.2638, "step": 4245500 }, { "epoch": 2.55, "learning_rate": 3.8215967058140244e-05, "loss": 0.2673, "step": 4246000 }, { "epoch": 2.55, "learning_rate": 3.821386709257968e-05, "loss": 0.2661, "step": 4246500 }, { "epoch": 2.55, "learning_rate": 3.821176712701912e-05, "loss": 0.2637, "step": 4247000 }, { "epoch": 2.55, "learning_rate": 3.820967136138968e-05, "loss": 0.2643, "step": 4247500 }, { "epoch": 2.55, "learning_rate": 3.820757139582911e-05, "loss": 0.2641, "step": 4248000 }, { "epoch": 2.55, "learning_rate": 3.820547143026854e-05, "loss": 0.267, "step": 4248500 }, { "epoch": 2.55, "learning_rate": 3.820337146470798e-05, "loss": 0.2627, "step": 4249000 }, { "epoch": 2.55, "learning_rate": 3.820127149914741e-05, "loss": 0.266, "step": 4249500 }, { "epoch": 2.55, "learning_rate": 3.819917573351797e-05, "loss": 0.2592, "step": 4250000 }, { "epoch": 2.55, "learning_rate": 3.81970757679574e-05, "loss": 0.2574, "step": 4250500 }, { "epoch": 2.55, "learning_rate": 3.819497580239684e-05, "loss": 0.2604, "step": 4251000 }, { "epoch": 2.55, "learning_rate": 3.819287583683627e-05, "loss": 0.2686, "step": 4251500 }, { "epoch": 2.55, "learning_rate": 3.819078007120683e-05, "loss": 0.2648, "step": 4252000 }, { "epoch": 2.55, "learning_rate": 3.818868010564627e-05, "loss": 0.2593, "step": 4252500 }, { "epoch": 2.55, "learning_rate": 3.81865801400857e-05, "loss": 0.2619, "step": 4253000 }, { "epoch": 2.55, "learning_rate": 3.8184480174525134e-05, "loss": 0.2611, "step": 4253500 }, { "epoch": 2.55, "learning_rate": 3.8182384408895694e-05, "loss": 0.2597, "step": 4254000 }, { "epoch": 2.55, "learning_rate": 3.8180284443335134e-05, "loss": 0.2674, "step": 4254500 }, { "epoch": 2.55, "learning_rate": 3.817818447777457e-05, "loss": 0.27, "step": 4255000 }, { "epoch": 2.55, "learning_rate": 3.8176084512213994e-05, "loss": 0.2628, "step": 4255500 }, { "epoch": 2.55, "learning_rate": 3.8173988746584555e-05, "loss": 0.2635, "step": 4256000 }, { "epoch": 2.55, "learning_rate": 3.8171888781023995e-05, "loss": 0.2522, "step": 4256500 }, { "epoch": 2.55, "learning_rate": 3.816978881546343e-05, "loss": 0.2731, "step": 4257000 }, { "epoch": 2.55, "learning_rate": 3.816769304983398e-05, "loss": 0.2536, "step": 4257500 }, { "epoch": 2.55, "learning_rate": 3.8165593084273415e-05, "loss": 0.2664, "step": 4258000 }, { "epoch": 2.55, "learning_rate": 3.8163493118712856e-05, "loss": 0.2634, "step": 4258500 }, { "epoch": 2.55, "learning_rate": 3.816139315315229e-05, "loss": 0.269, "step": 4259000 }, { "epoch": 2.55, "learning_rate": 3.815929318759172e-05, "loss": 0.2676, "step": 4259500 }, { "epoch": 2.55, "learning_rate": 3.8157193222031156e-05, "loss": 0.2743, "step": 4260000 }, { "epoch": 2.55, "learning_rate": 3.815509325647059e-05, "loss": 0.2626, "step": 4260500 }, { "epoch": 2.55, "learning_rate": 3.815299329091003e-05, "loss": 0.2673, "step": 4261000 }, { "epoch": 2.55, "learning_rate": 3.815089752528059e-05, "loss": 0.2694, "step": 4261500 }, { "epoch": 2.56, "learning_rate": 3.8148797559720024e-05, "loss": 0.2578, "step": 4262000 }, { "epoch": 2.56, "learning_rate": 3.814670179409058e-05, "loss": 0.2643, "step": 4262500 }, { "epoch": 2.56, "learning_rate": 3.814460182853001e-05, "loss": 0.2698, "step": 4263000 }, { "epoch": 2.56, "learning_rate": 3.814250186296945e-05, "loss": 0.2646, "step": 4263500 }, { "epoch": 2.56, "learning_rate": 3.8140401897408884e-05, "loss": 0.2631, "step": 4264000 }, { "epoch": 2.56, "learning_rate": 3.813830193184832e-05, "loss": 0.2661, "step": 4264500 }, { "epoch": 2.56, "learning_rate": 3.813620196628775e-05, "loss": 0.2674, "step": 4265000 }, { "epoch": 2.56, "learning_rate": 3.8134102000727185e-05, "loss": 0.2624, "step": 4265500 }, { "epoch": 2.56, "learning_rate": 3.813200203516662e-05, "loss": 0.2625, "step": 4266000 }, { "epoch": 2.56, "learning_rate": 3.812990206960606e-05, "loss": 0.265, "step": 4266500 }, { "epoch": 2.56, "learning_rate": 3.812781050390774e-05, "loss": 0.2683, "step": 4267000 }, { "epoch": 2.56, "learning_rate": 3.812571053834717e-05, "loss": 0.2692, "step": 4267500 }, { "epoch": 2.56, "learning_rate": 3.8123610572786606e-05, "loss": 0.2636, "step": 4268000 }, { "epoch": 2.56, "learning_rate": 3.8121510607226046e-05, "loss": 0.2687, "step": 4268500 }, { "epoch": 2.56, "learning_rate": 3.811941064166548e-05, "loss": 0.2629, "step": 4269000 }, { "epoch": 2.56, "learning_rate": 3.811731067610491e-05, "loss": 0.2573, "step": 4269500 }, { "epoch": 2.56, "learning_rate": 3.8115210710544346e-05, "loss": 0.2611, "step": 4270000 }, { "epoch": 2.56, "learning_rate": 3.811311074498378e-05, "loss": 0.2675, "step": 4270500 }, { "epoch": 2.56, "learning_rate": 3.811101077942321e-05, "loss": 0.2673, "step": 4271000 }, { "epoch": 2.56, "learning_rate": 3.8108915013793774e-05, "loss": 0.2642, "step": 4271500 }, { "epoch": 2.56, "learning_rate": 3.810681504823321e-05, "loss": 0.264, "step": 4272000 }, { "epoch": 2.56, "learning_rate": 3.810471508267264e-05, "loss": 0.2657, "step": 4272500 }, { "epoch": 2.56, "learning_rate": 3.8102615117112074e-05, "loss": 0.2678, "step": 4273000 }, { "epoch": 2.56, "learning_rate": 3.8100519351482634e-05, "loss": 0.2653, "step": 4273500 }, { "epoch": 2.56, "learning_rate": 3.8098419385922075e-05, "loss": 0.2692, "step": 4274000 }, { "epoch": 2.56, "learning_rate": 3.80963194203615e-05, "loss": 0.2607, "step": 4274500 }, { "epoch": 2.56, "learning_rate": 3.809421945480094e-05, "loss": 0.2693, "step": 4275000 }, { "epoch": 2.56, "learning_rate": 3.80921236891715e-05, "loss": 0.2643, "step": 4275500 }, { "epoch": 2.56, "learning_rate": 3.8090023723610935e-05, "loss": 0.266, "step": 4276000 }, { "epoch": 2.56, "learning_rate": 3.808792375805037e-05, "loss": 0.2645, "step": 4276500 }, { "epoch": 2.56, "learning_rate": 3.80858237924898e-05, "loss": 0.2568, "step": 4277000 }, { "epoch": 2.56, "learning_rate": 3.808372802686036e-05, "loss": 0.262, "step": 4277500 }, { "epoch": 2.56, "learning_rate": 3.8081628061299796e-05, "loss": 0.2733, "step": 4278000 }, { "epoch": 2.57, "learning_rate": 3.807952809573923e-05, "loss": 0.2639, "step": 4278500 }, { "epoch": 2.57, "learning_rate": 3.807742813017867e-05, "loss": 0.264, "step": 4279000 }, { "epoch": 2.57, "learning_rate": 3.807533236454922e-05, "loss": 0.2635, "step": 4279500 }, { "epoch": 2.57, "learning_rate": 3.807323239898866e-05, "loss": 0.2589, "step": 4280000 }, { "epoch": 2.57, "learning_rate": 3.807113243342809e-05, "loss": 0.2628, "step": 4280500 }, { "epoch": 2.57, "learning_rate": 3.806903246786753e-05, "loss": 0.2712, "step": 4281000 }, { "epoch": 2.57, "learning_rate": 3.806693250230696e-05, "loss": 0.2595, "step": 4281500 }, { "epoch": 2.57, "learning_rate": 3.806483673667752e-05, "loss": 0.2682, "step": 4282000 }, { "epoch": 2.57, "learning_rate": 3.806273677111696e-05, "loss": 0.2686, "step": 4282500 }, { "epoch": 2.57, "learning_rate": 3.806063680555639e-05, "loss": 0.2669, "step": 4283000 }, { "epoch": 2.57, "learning_rate": 3.8058541039926945e-05, "loss": 0.258, "step": 4283500 }, { "epoch": 2.57, "learning_rate": 3.805644107436638e-05, "loss": 0.2679, "step": 4284000 }, { "epoch": 2.57, "learning_rate": 3.805434110880582e-05, "loss": 0.2609, "step": 4284500 }, { "epoch": 2.57, "learning_rate": 3.805224114324525e-05, "loss": 0.2639, "step": 4285000 }, { "epoch": 2.57, "learning_rate": 3.8050141177684685e-05, "loss": 0.2627, "step": 4285500 }, { "epoch": 2.57, "learning_rate": 3.8048041212124126e-05, "loss": 0.2601, "step": 4286000 }, { "epoch": 2.57, "learning_rate": 3.804594124656355e-05, "loss": 0.2693, "step": 4286500 }, { "epoch": 2.57, "learning_rate": 3.8043841281002986e-05, "loss": 0.264, "step": 4287000 }, { "epoch": 2.57, "learning_rate": 3.8041745515373546e-05, "loss": 0.2638, "step": 4287500 }, { "epoch": 2.57, "learning_rate": 3.8039645549812986e-05, "loss": 0.2628, "step": 4288000 }, { "epoch": 2.57, "learning_rate": 3.803754558425242e-05, "loss": 0.264, "step": 4288500 }, { "epoch": 2.57, "learning_rate": 3.803544561869185e-05, "loss": 0.2639, "step": 4289000 }, { "epoch": 2.57, "learning_rate": 3.8033349853062414e-05, "loss": 0.2704, "step": 4289500 }, { "epoch": 2.57, "learning_rate": 3.803124988750185e-05, "loss": 0.2675, "step": 4290000 }, { "epoch": 2.57, "learning_rate": 3.802914992194128e-05, "loss": 0.2629, "step": 4290500 }, { "epoch": 2.57, "learning_rate": 3.8027049956380714e-05, "loss": 0.2665, "step": 4291000 }, { "epoch": 2.57, "learning_rate": 3.8024954190751274e-05, "loss": 0.2594, "step": 4291500 }, { "epoch": 2.57, "learning_rate": 3.802285422519071e-05, "loss": 0.2646, "step": 4292000 }, { "epoch": 2.57, "learning_rate": 3.802075425963014e-05, "loss": 0.2708, "step": 4292500 }, { "epoch": 2.57, "learning_rate": 3.801865429406958e-05, "loss": 0.2672, "step": 4293000 }, { "epoch": 2.57, "learning_rate": 3.801655432850901e-05, "loss": 0.2672, "step": 4293500 }, { "epoch": 2.57, "learning_rate": 3.801445856287957e-05, "loss": 0.2649, "step": 4294000 }, { "epoch": 2.57, "learning_rate": 3.8012358597319e-05, "loss": 0.2658, "step": 4294500 }, { "epoch": 2.58, "learning_rate": 3.801025863175844e-05, "loss": 0.2602, "step": 4295000 }, { "epoch": 2.58, "learning_rate": 3.8008158666197876e-05, "loss": 0.2602, "step": 4295500 }, { "epoch": 2.58, "learning_rate": 3.800606710049955e-05, "loss": 0.2658, "step": 4296000 }, { "epoch": 2.58, "learning_rate": 3.800396713493898e-05, "loss": 0.2677, "step": 4296500 }, { "epoch": 2.58, "learning_rate": 3.800186716937842e-05, "loss": 0.2626, "step": 4297000 }, { "epoch": 2.58, "learning_rate": 3.7999767203817857e-05, "loss": 0.265, "step": 4297500 }, { "epoch": 2.58, "learning_rate": 3.799766723825729e-05, "loss": 0.2651, "step": 4298000 }, { "epoch": 2.58, "learning_rate": 3.799556727269673e-05, "loss": 0.2625, "step": 4298500 }, { "epoch": 2.58, "learning_rate": 3.7993467307136164e-05, "loss": 0.2654, "step": 4299000 }, { "epoch": 2.58, "learning_rate": 3.79913673415756e-05, "loss": 0.267, "step": 4299500 }, { "epoch": 2.58, "learning_rate": 3.798927157594615e-05, "loss": 0.2618, "step": 4300000 }, { "epoch": 2.58, "eval_loss": 0.24335187673568726, "eval_runtime": 1434.6421, "eval_samples_per_second": 367.144, "eval_steps_per_second": 61.191, "step": 4300000 }, { "epoch": 2.58, "learning_rate": 3.798717581031671e-05, "loss": 0.2631, "step": 4300500 }, { "epoch": 2.58, "learning_rate": 3.7985075844756145e-05, "loss": 0.2698, "step": 4301000 }, { "epoch": 2.58, "learning_rate": 3.798297587919558e-05, "loss": 0.2596, "step": 4301500 }, { "epoch": 2.58, "learning_rate": 3.798087591363502e-05, "loss": 0.266, "step": 4302000 }, { "epoch": 2.58, "learning_rate": 3.797877594807445e-05, "loss": 0.2647, "step": 4302500 }, { "epoch": 2.58, "learning_rate": 3.7976675982513885e-05, "loss": 0.2629, "step": 4303000 }, { "epoch": 2.58, "learning_rate": 3.7974576016953325e-05, "loss": 0.2615, "step": 4303500 }, { "epoch": 2.58, "learning_rate": 3.797247605139276e-05, "loss": 0.2689, "step": 4304000 }, { "epoch": 2.58, "learning_rate": 3.797037608583219e-05, "loss": 0.2675, "step": 4304500 }, { "epoch": 2.58, "learning_rate": 3.7968280320202746e-05, "loss": 0.2591, "step": 4305000 }, { "epoch": 2.58, "learning_rate": 3.7966180354642186e-05, "loss": 0.2682, "step": 4305500 }, { "epoch": 2.58, "learning_rate": 3.796408038908162e-05, "loss": 0.2632, "step": 4306000 }, { "epoch": 2.58, "learning_rate": 3.796198462345217e-05, "loss": 0.2714, "step": 4306500 }, { "epoch": 2.58, "learning_rate": 3.795988885782273e-05, "loss": 0.2654, "step": 4307000 }, { "epoch": 2.58, "learning_rate": 3.795778889226217e-05, "loss": 0.2686, "step": 4307500 }, { "epoch": 2.58, "learning_rate": 3.79556889267016e-05, "loss": 0.2688, "step": 4308000 }, { "epoch": 2.58, "learning_rate": 3.7953588961141034e-05, "loss": 0.261, "step": 4308500 }, { "epoch": 2.58, "learning_rate": 3.7951488995580474e-05, "loss": 0.2666, "step": 4309000 }, { "epoch": 2.58, "learning_rate": 3.794938903001991e-05, "loss": 0.2617, "step": 4309500 }, { "epoch": 2.58, "learning_rate": 3.794728906445934e-05, "loss": 0.2607, "step": 4310000 }, { "epoch": 2.58, "learning_rate": 3.794518909889878e-05, "loss": 0.2628, "step": 4310500 }, { "epoch": 2.58, "learning_rate": 3.7943089133338215e-05, "loss": 0.264, "step": 4311000 }, { "epoch": 2.58, "learning_rate": 3.794098916777765e-05, "loss": 0.267, "step": 4311500 }, { "epoch": 2.59, "learning_rate": 3.793888920221709e-05, "loss": 0.2615, "step": 4312000 }, { "epoch": 2.59, "learning_rate": 3.7936789236656515e-05, "loss": 0.2703, "step": 4312500 }, { "epoch": 2.59, "learning_rate": 3.793468927109595e-05, "loss": 0.2641, "step": 4313000 }, { "epoch": 2.59, "learning_rate": 3.793259350546651e-05, "loss": 0.2638, "step": 4313500 }, { "epoch": 2.59, "learning_rate": 3.793049353990595e-05, "loss": 0.2577, "step": 4314000 }, { "epoch": 2.59, "learning_rate": 3.792839357434538e-05, "loss": 0.2639, "step": 4314500 }, { "epoch": 2.59, "learning_rate": 3.792629360878481e-05, "loss": 0.2697, "step": 4315000 }, { "epoch": 2.59, "learning_rate": 3.792419784315537e-05, "loss": 0.2707, "step": 4315500 }, { "epoch": 2.59, "learning_rate": 3.792209787759481e-05, "loss": 0.2645, "step": 4316000 }, { "epoch": 2.59, "learning_rate": 3.791999791203424e-05, "loss": 0.262, "step": 4316500 }, { "epoch": 2.59, "learning_rate": 3.791789794647368e-05, "loss": 0.2683, "step": 4317000 }, { "epoch": 2.59, "learning_rate": 3.791580218084424e-05, "loss": 0.2644, "step": 4317500 }, { "epoch": 2.59, "learning_rate": 3.791370221528367e-05, "loss": 0.2629, "step": 4318000 }, { "epoch": 2.59, "learning_rate": 3.7911602249723104e-05, "loss": 0.2622, "step": 4318500 }, { "epoch": 2.59, "learning_rate": 3.790950648409366e-05, "loss": 0.2602, "step": 4319000 }, { "epoch": 2.59, "learning_rate": 3.79074065185331e-05, "loss": 0.2665, "step": 4319500 }, { "epoch": 2.59, "learning_rate": 3.790530655297253e-05, "loss": 0.2633, "step": 4320000 }, { "epoch": 2.59, "learning_rate": 3.7903206587411965e-05, "loss": 0.2663, "step": 4320500 }, { "epoch": 2.59, "learning_rate": 3.7901106621851405e-05, "loss": 0.269, "step": 4321000 }, { "epoch": 2.59, "learning_rate": 3.789900665629084e-05, "loss": 0.2643, "step": 4321500 }, { "epoch": 2.59, "learning_rate": 3.789691089066139e-05, "loss": 0.2623, "step": 4322000 }, { "epoch": 2.59, "learning_rate": 3.7894810925100826e-05, "loss": 0.2649, "step": 4322500 }, { "epoch": 2.59, "learning_rate": 3.7892710959540266e-05, "loss": 0.2612, "step": 4323000 }, { "epoch": 2.59, "learning_rate": 3.78906109939797e-05, "loss": 0.2682, "step": 4323500 }, { "epoch": 2.59, "learning_rate": 3.788851102841913e-05, "loss": 0.2663, "step": 4324000 }, { "epoch": 2.59, "learning_rate": 3.7886411062858566e-05, "loss": 0.2652, "step": 4324500 }, { "epoch": 2.59, "learning_rate": 3.7884311097298e-05, "loss": 0.2574, "step": 4325000 }, { "epoch": 2.59, "learning_rate": 3.788221113173744e-05, "loss": 0.2612, "step": 4325500 }, { "epoch": 2.59, "learning_rate": 3.7880115366108e-05, "loss": 0.2642, "step": 4326000 }, { "epoch": 2.59, "learning_rate": 3.7878015400547434e-05, "loss": 0.2675, "step": 4326500 }, { "epoch": 2.59, "learning_rate": 3.787591543498686e-05, "loss": 0.2684, "step": 4327000 }, { "epoch": 2.59, "learning_rate": 3.78738154694263e-05, "loss": 0.2626, "step": 4327500 }, { "epoch": 2.59, "learning_rate": 3.787171970379686e-05, "loss": 0.2639, "step": 4328000 }, { "epoch": 2.6, "learning_rate": 3.7869619738236294e-05, "loss": 0.2694, "step": 4328500 }, { "epoch": 2.6, "learning_rate": 3.786751977267573e-05, "loss": 0.2639, "step": 4329000 }, { "epoch": 2.6, "learning_rate": 3.786541980711516e-05, "loss": 0.2664, "step": 4329500 }, { "epoch": 2.6, "learning_rate": 3.786332404148572e-05, "loss": 0.2644, "step": 4330000 }, { "epoch": 2.6, "learning_rate": 3.7861224075925155e-05, "loss": 0.2644, "step": 4330500 }, { "epoch": 2.6, "learning_rate": 3.7859124110364595e-05, "loss": 0.2621, "step": 4331000 }, { "epoch": 2.6, "learning_rate": 3.785702414480402e-05, "loss": 0.2684, "step": 4331500 }, { "epoch": 2.6, "learning_rate": 3.7854924179243456e-05, "loss": 0.2649, "step": 4332000 }, { "epoch": 2.6, "learning_rate": 3.7852828413614016e-05, "loss": 0.2665, "step": 4332500 }, { "epoch": 2.6, "learning_rate": 3.7850728448053456e-05, "loss": 0.2698, "step": 4333000 }, { "epoch": 2.6, "learning_rate": 3.784862848249289e-05, "loss": 0.2659, "step": 4333500 }, { "epoch": 2.6, "learning_rate": 3.7846528516932316e-05, "loss": 0.2634, "step": 4334000 }, { "epoch": 2.6, "learning_rate": 3.7844432751302877e-05, "loss": 0.2634, "step": 4334500 }, { "epoch": 2.6, "learning_rate": 3.784233278574232e-05, "loss": 0.2648, "step": 4335000 }, { "epoch": 2.6, "learning_rate": 3.784023702011287e-05, "loss": 0.2668, "step": 4335500 }, { "epoch": 2.6, "learning_rate": 3.7838137054552304e-05, "loss": 0.2643, "step": 4336000 }, { "epoch": 2.6, "learning_rate": 3.783603708899174e-05, "loss": 0.2618, "step": 4336500 }, { "epoch": 2.6, "learning_rate": 3.783393712343118e-05, "loss": 0.2644, "step": 4337000 }, { "epoch": 2.6, "learning_rate": 3.783183715787061e-05, "loss": 0.2673, "step": 4337500 }, { "epoch": 2.6, "learning_rate": 3.7829741392241165e-05, "loss": 0.2681, "step": 4338000 }, { "epoch": 2.6, "learning_rate": 3.7827641426680605e-05, "loss": 0.2606, "step": 4338500 }, { "epoch": 2.6, "learning_rate": 3.782554146112004e-05, "loss": 0.2583, "step": 4339000 }, { "epoch": 2.6, "learning_rate": 3.782344149555947e-05, "loss": 0.265, "step": 4339500 }, { "epoch": 2.6, "learning_rate": 3.782134152999891e-05, "loss": 0.2615, "step": 4340000 }, { "epoch": 2.6, "learning_rate": 3.7819241564438345e-05, "loss": 0.2642, "step": 4340500 }, { "epoch": 2.6, "learning_rate": 3.781714159887778e-05, "loss": 0.266, "step": 4341000 }, { "epoch": 2.6, "learning_rate": 3.781504163331721e-05, "loss": 0.2668, "step": 4341500 }, { "epoch": 2.6, "learning_rate": 3.781294586768777e-05, "loss": 0.2604, "step": 4342000 }, { "epoch": 2.6, "learning_rate": 3.7810845902127206e-05, "loss": 0.2604, "step": 4342500 }, { "epoch": 2.6, "learning_rate": 3.780874593656664e-05, "loss": 0.2686, "step": 4343000 }, { "epoch": 2.6, "learning_rate": 3.780664597100607e-05, "loss": 0.2649, "step": 4343500 }, { "epoch": 2.6, "learning_rate": 3.7804550205376633e-05, "loss": 0.2617, "step": 4344000 }, { "epoch": 2.6, "learning_rate": 3.780245443974719e-05, "loss": 0.2644, "step": 4344500 }, { "epoch": 2.61, "learning_rate": 3.780035447418662e-05, "loss": 0.2632, "step": 4345000 }, { "epoch": 2.61, "learning_rate": 3.779825450862606e-05, "loss": 0.2626, "step": 4345500 }, { "epoch": 2.61, "learning_rate": 3.7796154543065494e-05, "loss": 0.2697, "step": 4346000 }, { "epoch": 2.61, "learning_rate": 3.779405457750493e-05, "loss": 0.2741, "step": 4346500 }, { "epoch": 2.61, "learning_rate": 3.779195461194437e-05, "loss": 0.2637, "step": 4347000 }, { "epoch": 2.61, "learning_rate": 3.77898546463838e-05, "loss": 0.2662, "step": 4347500 }, { "epoch": 2.61, "learning_rate": 3.7787754680823235e-05, "loss": 0.2641, "step": 4348000 }, { "epoch": 2.61, "learning_rate": 3.778565891519379e-05, "loss": 0.265, "step": 4348500 }, { "epoch": 2.61, "learning_rate": 3.778355894963323e-05, "loss": 0.2635, "step": 4349000 }, { "epoch": 2.61, "learning_rate": 3.778145898407266e-05, "loss": 0.2648, "step": 4349500 }, { "epoch": 2.61, "learning_rate": 3.7779359018512095e-05, "loss": 0.2586, "step": 4350000 }, { "epoch": 2.61, "learning_rate": 3.7777263252882656e-05, "loss": 0.2608, "step": 4350500 }, { "epoch": 2.61, "learning_rate": 3.777516328732209e-05, "loss": 0.2647, "step": 4351000 }, { "epoch": 2.61, "learning_rate": 3.777306752169264e-05, "loss": 0.2631, "step": 4351500 }, { "epoch": 2.61, "learning_rate": 3.7770967556132076e-05, "loss": 0.2622, "step": 4352000 }, { "epoch": 2.61, "learning_rate": 3.7768867590571517e-05, "loss": 0.2679, "step": 4352500 }, { "epoch": 2.61, "learning_rate": 3.776676762501095e-05, "loss": 0.2672, "step": 4353000 }, { "epoch": 2.61, "learning_rate": 3.7764667659450383e-05, "loss": 0.2696, "step": 4353500 }, { "epoch": 2.61, "learning_rate": 3.7762567693889824e-05, "loss": 0.259, "step": 4354000 }, { "epoch": 2.61, "learning_rate": 3.776046772832926e-05, "loss": 0.2729, "step": 4354500 }, { "epoch": 2.61, "learning_rate": 3.775836776276869e-05, "loss": 0.2679, "step": 4355000 }, { "epoch": 2.61, "learning_rate": 3.7756271997139244e-05, "loss": 0.2699, "step": 4355500 }, { "epoch": 2.61, "learning_rate": 3.7754172031578684e-05, "loss": 0.2665, "step": 4356000 }, { "epoch": 2.61, "learning_rate": 3.775207206601812e-05, "loss": 0.263, "step": 4356500 }, { "epoch": 2.61, "learning_rate": 3.774997210045755e-05, "loss": 0.2675, "step": 4357000 }, { "epoch": 2.61, "learning_rate": 3.7747880534759225e-05, "loss": 0.2656, "step": 4357500 }, { "epoch": 2.61, "learning_rate": 3.7745780569198665e-05, "loss": 0.265, "step": 4358000 }, { "epoch": 2.61, "learning_rate": 3.77436806036381e-05, "loss": 0.2621, "step": 4358500 }, { "epoch": 2.61, "learning_rate": 3.774158063807753e-05, "loss": 0.2611, "step": 4359000 }, { "epoch": 2.61, "learning_rate": 3.773948067251697e-05, "loss": 0.2602, "step": 4359500 }, { "epoch": 2.61, "learning_rate": 3.7737380706956406e-05, "loss": 0.2675, "step": 4360000 }, { "epoch": 2.61, "learning_rate": 3.773528074139584e-05, "loss": 0.2597, "step": 4360500 }, { "epoch": 2.61, "learning_rate": 3.773318077583528e-05, "loss": 0.2608, "step": 4361000 }, { "epoch": 2.61, "learning_rate": 3.773108501020583e-05, "loss": 0.2651, "step": 4361500 }, { "epoch": 2.62, "learning_rate": 3.772898504464527e-05, "loss": 0.2609, "step": 4362000 }, { "epoch": 2.62, "learning_rate": 3.77268850790847e-05, "loss": 0.2612, "step": 4362500 }, { "epoch": 2.62, "learning_rate": 3.772478511352414e-05, "loss": 0.2595, "step": 4363000 }, { "epoch": 2.62, "learning_rate": 3.7722685147963574e-05, "loss": 0.26, "step": 4363500 }, { "epoch": 2.62, "learning_rate": 3.772058938233413e-05, "loss": 0.2608, "step": 4364000 }, { "epoch": 2.62, "learning_rate": 3.771848941677357e-05, "loss": 0.2654, "step": 4364500 }, { "epoch": 2.62, "learning_rate": 3.7716389451213e-05, "loss": 0.2619, "step": 4365000 }, { "epoch": 2.62, "learning_rate": 3.7714289485652435e-05, "loss": 0.2636, "step": 4365500 }, { "epoch": 2.62, "learning_rate": 3.771219372002299e-05, "loss": 0.2653, "step": 4366000 }, { "epoch": 2.62, "learning_rate": 3.771009375446243e-05, "loss": 0.2619, "step": 4366500 }, { "epoch": 2.62, "learning_rate": 3.770799798883298e-05, "loss": 0.266, "step": 4367000 }, { "epoch": 2.62, "learning_rate": 3.7705898023272415e-05, "loss": 0.2655, "step": 4367500 }, { "epoch": 2.62, "learning_rate": 3.770379805771185e-05, "loss": 0.2658, "step": 4368000 }, { "epoch": 2.62, "learning_rate": 3.770169809215129e-05, "loss": 0.268, "step": 4368500 }, { "epoch": 2.62, "learning_rate": 3.769959812659072e-05, "loss": 0.2647, "step": 4369000 }, { "epoch": 2.62, "learning_rate": 3.7697498161030156e-05, "loss": 0.2652, "step": 4369500 }, { "epoch": 2.62, "learning_rate": 3.7695398195469596e-05, "loss": 0.2625, "step": 4370000 }, { "epoch": 2.62, "learning_rate": 3.769329822990903e-05, "loss": 0.266, "step": 4370500 }, { "epoch": 2.62, "learning_rate": 3.769120246427958e-05, "loss": 0.2682, "step": 4371000 }, { "epoch": 2.62, "learning_rate": 3.7689102498719023e-05, "loss": 0.2614, "step": 4371500 }, { "epoch": 2.62, "learning_rate": 3.768700253315846e-05, "loss": 0.2626, "step": 4372000 }, { "epoch": 2.62, "learning_rate": 3.768490676752901e-05, "loss": 0.2645, "step": 4372500 }, { "epoch": 2.62, "learning_rate": 3.7682806801968444e-05, "loss": 0.2632, "step": 4373000 }, { "epoch": 2.62, "learning_rate": 3.7680706836407884e-05, "loss": 0.269, "step": 4373500 }, { "epoch": 2.62, "learning_rate": 3.767860687084732e-05, "loss": 0.2637, "step": 4374000 }, { "epoch": 2.62, "learning_rate": 3.767650690528675e-05, "loss": 0.2632, "step": 4374500 }, { "epoch": 2.62, "learning_rate": 3.7674411139657305e-05, "loss": 0.2624, "step": 4375000 }, { "epoch": 2.62, "learning_rate": 3.7672311174096745e-05, "loss": 0.2663, "step": 4375500 }, { "epoch": 2.62, "learning_rate": 3.767021120853618e-05, "loss": 0.26, "step": 4376000 }, { "epoch": 2.62, "learning_rate": 3.766811124297561e-05, "loss": 0.2627, "step": 4376500 }, { "epoch": 2.62, "learning_rate": 3.766601127741505e-05, "loss": 0.26, "step": 4377000 }, { "epoch": 2.62, "learning_rate": 3.7663911311854486e-05, "loss": 0.2664, "step": 4377500 }, { "epoch": 2.62, "learning_rate": 3.766181134629392e-05, "loss": 0.2627, "step": 4378000 }, { "epoch": 2.63, "learning_rate": 3.765971138073336e-05, "loss": 0.2611, "step": 4378500 }, { "epoch": 2.63, "learning_rate": 3.765761561510391e-05, "loss": 0.2642, "step": 4379000 }, { "epoch": 2.63, "learning_rate": 3.7655515649543346e-05, "loss": 0.2677, "step": 4379500 }, { "epoch": 2.63, "learning_rate": 3.7653415683982787e-05, "loss": 0.2708, "step": 4380000 }, { "epoch": 2.63, "learning_rate": 3.765131571842222e-05, "loss": 0.2652, "step": 4380500 }, { "epoch": 2.63, "learning_rate": 3.7649219952792774e-05, "loss": 0.2642, "step": 4381000 }, { "epoch": 2.63, "learning_rate": 3.764711998723221e-05, "loss": 0.2651, "step": 4381500 }, { "epoch": 2.63, "learning_rate": 3.764502002167165e-05, "loss": 0.2651, "step": 4382000 }, { "epoch": 2.63, "learning_rate": 3.764292005611108e-05, "loss": 0.2616, "step": 4382500 }, { "epoch": 2.63, "learning_rate": 3.7640824290481634e-05, "loss": 0.262, "step": 4383000 }, { "epoch": 2.63, "learning_rate": 3.763872432492107e-05, "loss": 0.2633, "step": 4383500 }, { "epoch": 2.63, "learning_rate": 3.763662435936051e-05, "loss": 0.2601, "step": 4384000 }, { "epoch": 2.63, "learning_rate": 3.763452439379994e-05, "loss": 0.263, "step": 4384500 }, { "epoch": 2.63, "learning_rate": 3.7632428628170495e-05, "loss": 0.2659, "step": 4385000 }, { "epoch": 2.63, "learning_rate": 3.7630328662609935e-05, "loss": 0.2665, "step": 4385500 }, { "epoch": 2.63, "learning_rate": 3.762822869704937e-05, "loss": 0.2668, "step": 4386000 }, { "epoch": 2.63, "learning_rate": 3.76261287314888e-05, "loss": 0.2691, "step": 4386500 }, { "epoch": 2.63, "learning_rate": 3.7624032965859356e-05, "loss": 0.2635, "step": 4387000 }, { "epoch": 2.63, "learning_rate": 3.7621933000298796e-05, "loss": 0.2618, "step": 4387500 }, { "epoch": 2.63, "learning_rate": 3.761983303473823e-05, "loss": 0.2693, "step": 4388000 }, { "epoch": 2.63, "learning_rate": 3.761773306917766e-05, "loss": 0.267, "step": 4388500 }, { "epoch": 2.63, "learning_rate": 3.7615637303548216e-05, "loss": 0.2673, "step": 4389000 }, { "epoch": 2.63, "learning_rate": 3.761353733798766e-05, "loss": 0.2613, "step": 4389500 }, { "epoch": 2.63, "learning_rate": 3.761143737242709e-05, "loss": 0.2672, "step": 4390000 }, { "epoch": 2.63, "learning_rate": 3.7609337406866524e-05, "loss": 0.2603, "step": 4390500 }, { "epoch": 2.63, "learning_rate": 3.7607237441305964e-05, "loss": 0.2613, "step": 4391000 }, { "epoch": 2.63, "learning_rate": 3.76051374757454e-05, "loss": 0.2558, "step": 4391500 }, { "epoch": 2.63, "learning_rate": 3.760303751018483e-05, "loss": 0.2608, "step": 4392000 }, { "epoch": 2.63, "learning_rate": 3.760094174455539e-05, "loss": 0.2657, "step": 4392500 }, { "epoch": 2.63, "learning_rate": 3.7598841778994825e-05, "loss": 0.2554, "step": 4393000 }, { "epoch": 2.63, "learning_rate": 3.759674181343426e-05, "loss": 0.2674, "step": 4393500 }, { "epoch": 2.63, "learning_rate": 3.75946418478737e-05, "loss": 0.2585, "step": 4394000 }, { "epoch": 2.63, "learning_rate": 3.759254188231313e-05, "loss": 0.2611, "step": 4394500 }, { "epoch": 2.63, "learning_rate": 3.7590441916752565e-05, "loss": 0.2653, "step": 4395000 }, { "epoch": 2.64, "learning_rate": 3.7588341951192005e-05, "loss": 0.2601, "step": 4395500 }, { "epoch": 2.64, "learning_rate": 3.758624198563143e-05, "loss": 0.2642, "step": 4396000 }, { "epoch": 2.64, "learning_rate": 3.758414622000199e-05, "loss": 0.2673, "step": 4396500 }, { "epoch": 2.64, "learning_rate": 3.7582046254441426e-05, "loss": 0.2594, "step": 4397000 }, { "epoch": 2.64, "learning_rate": 3.7579946288880866e-05, "loss": 0.2644, "step": 4397500 }, { "epoch": 2.64, "learning_rate": 3.75778463233203e-05, "loss": 0.2625, "step": 4398000 }, { "epoch": 2.64, "learning_rate": 3.7575746357759726e-05, "loss": 0.2677, "step": 4398500 }, { "epoch": 2.64, "learning_rate": 3.757365059213029e-05, "loss": 0.2589, "step": 4399000 }, { "epoch": 2.64, "learning_rate": 3.757155062656973e-05, "loss": 0.268, "step": 4399500 }, { "epoch": 2.64, "learning_rate": 3.756945066100916e-05, "loss": 0.2661, "step": 4400000 }, { "epoch": 2.64, "eval_loss": 0.24184562265872955, "eval_runtime": 1459.7085, "eval_samples_per_second": 360.839, "eval_steps_per_second": 60.14, "step": 4400000 }, { "epoch": 2.64, "learning_rate": 3.7567350695448594e-05, "loss": 0.2724, "step": 4400500 }, { "epoch": 2.64, "learning_rate": 3.7565254929819154e-05, "loss": 0.2609, "step": 4401000 }, { "epoch": 2.64, "learning_rate": 3.756315496425859e-05, "loss": 0.264, "step": 4401500 }, { "epoch": 2.64, "learning_rate": 3.756105499869802e-05, "loss": 0.2673, "step": 4402000 }, { "epoch": 2.64, "learning_rate": 3.755895503313746e-05, "loss": 0.26, "step": 4402500 }, { "epoch": 2.64, "learning_rate": 3.7556859267508015e-05, "loss": 0.2615, "step": 4403000 }, { "epoch": 2.64, "learning_rate": 3.755476350187857e-05, "loss": 0.2642, "step": 4403500 }, { "epoch": 2.64, "learning_rate": 3.7552663536318e-05, "loss": 0.2577, "step": 4404000 }, { "epoch": 2.64, "learning_rate": 3.7550563570757435e-05, "loss": 0.2641, "step": 4404500 }, { "epoch": 2.64, "learning_rate": 3.7548463605196876e-05, "loss": 0.2675, "step": 4405000 }, { "epoch": 2.64, "learning_rate": 3.754636783956743e-05, "loss": 0.2665, "step": 4405500 }, { "epoch": 2.64, "learning_rate": 3.754426787400686e-05, "loss": 0.2695, "step": 4406000 }, { "epoch": 2.64, "learning_rate": 3.75421679084463e-05, "loss": 0.2617, "step": 4406500 }, { "epoch": 2.64, "learning_rate": 3.7540067942885736e-05, "loss": 0.2599, "step": 4407000 }, { "epoch": 2.64, "learning_rate": 3.753796797732517e-05, "loss": 0.274, "step": 4407500 }, { "epoch": 2.64, "learning_rate": 3.753586801176461e-05, "loss": 0.2634, "step": 4408000 }, { "epoch": 2.64, "learning_rate": 3.7533768046204043e-05, "loss": 0.2566, "step": 4408500 }, { "epoch": 2.64, "learning_rate": 3.753166808064348e-05, "loss": 0.2625, "step": 4409000 }, { "epoch": 2.64, "learning_rate": 3.752957231501403e-05, "loss": 0.2645, "step": 4409500 }, { "epoch": 2.64, "learning_rate": 3.752747234945347e-05, "loss": 0.2691, "step": 4410000 }, { "epoch": 2.64, "learning_rate": 3.7525372383892904e-05, "loss": 0.2609, "step": 4410500 }, { "epoch": 2.64, "learning_rate": 3.752327241833234e-05, "loss": 0.2652, "step": 4411000 }, { "epoch": 2.64, "learning_rate": 3.752117245277178e-05, "loss": 0.2642, "step": 4411500 }, { "epoch": 2.65, "learning_rate": 3.751907668714233e-05, "loss": 0.2653, "step": 4412000 }, { "epoch": 2.65, "learning_rate": 3.7516976721581765e-05, "loss": 0.264, "step": 4412500 }, { "epoch": 2.65, "learning_rate": 3.75148767560212e-05, "loss": 0.2627, "step": 4413000 }, { "epoch": 2.65, "learning_rate": 3.751278099039176e-05, "loss": 0.265, "step": 4413500 }, { "epoch": 2.65, "learning_rate": 3.751068102483119e-05, "loss": 0.2641, "step": 4414000 }, { "epoch": 2.65, "learning_rate": 3.7508581059270626e-05, "loss": 0.261, "step": 4414500 }, { "epoch": 2.65, "learning_rate": 3.7506481093710066e-05, "loss": 0.2616, "step": 4415000 }, { "epoch": 2.65, "learning_rate": 3.750438532808062e-05, "loss": 0.2632, "step": 4415500 }, { "epoch": 2.65, "learning_rate": 3.750228536252005e-05, "loss": 0.2628, "step": 4416000 }, { "epoch": 2.65, "learning_rate": 3.7500185396959486e-05, "loss": 0.2674, "step": 4416500 }, { "epoch": 2.65, "learning_rate": 3.749808543139893e-05, "loss": 0.2614, "step": 4417000 }, { "epoch": 2.65, "learning_rate": 3.749598546583836e-05, "loss": 0.2649, "step": 4417500 }, { "epoch": 2.65, "learning_rate": 3.7493885500277794e-05, "loss": 0.2604, "step": 4418000 }, { "epoch": 2.65, "learning_rate": 3.7491785534717234e-05, "loss": 0.2654, "step": 4418500 }, { "epoch": 2.65, "learning_rate": 3.748968556915667e-05, "loss": 0.2626, "step": 4419000 }, { "epoch": 2.65, "learning_rate": 3.748758980352722e-05, "loss": 0.2644, "step": 4419500 }, { "epoch": 2.65, "learning_rate": 3.7485489837966654e-05, "loss": 0.2616, "step": 4420000 }, { "epoch": 2.65, "learning_rate": 3.7483389872406095e-05, "loss": 0.2611, "step": 4420500 }, { "epoch": 2.65, "learning_rate": 3.748128990684553e-05, "loss": 0.2616, "step": 4421000 }, { "epoch": 2.65, "learning_rate": 3.747919414121608e-05, "loss": 0.2606, "step": 4421500 }, { "epoch": 2.65, "learning_rate": 3.747709417565552e-05, "loss": 0.259, "step": 4422000 }, { "epoch": 2.65, "learning_rate": 3.7474994210094955e-05, "loss": 0.2609, "step": 4422500 }, { "epoch": 2.65, "learning_rate": 3.747289424453439e-05, "loss": 0.2562, "step": 4423000 }, { "epoch": 2.65, "learning_rate": 3.747079847890494e-05, "loss": 0.2651, "step": 4423500 }, { "epoch": 2.65, "learning_rate": 3.746869851334438e-05, "loss": 0.2715, "step": 4424000 }, { "epoch": 2.65, "learning_rate": 3.7466598547783816e-05, "loss": 0.2641, "step": 4424500 }, { "epoch": 2.65, "learning_rate": 3.746449858222325e-05, "loss": 0.2651, "step": 4425000 }, { "epoch": 2.65, "learning_rate": 3.746239861666269e-05, "loss": 0.2624, "step": 4425500 }, { "epoch": 2.65, "learning_rate": 3.746030285103324e-05, "loss": 0.2597, "step": 4426000 }, { "epoch": 2.65, "learning_rate": 3.745820288547268e-05, "loss": 0.2549, "step": 4426500 }, { "epoch": 2.65, "learning_rate": 3.745610291991211e-05, "loss": 0.2608, "step": 4427000 }, { "epoch": 2.65, "learning_rate": 3.745400295435155e-05, "loss": 0.266, "step": 4427500 }, { "epoch": 2.65, "learning_rate": 3.745191138865323e-05, "loss": 0.2677, "step": 4428000 }, { "epoch": 2.66, "learning_rate": 3.744981142309266e-05, "loss": 0.2602, "step": 4428500 }, { "epoch": 2.66, "learning_rate": 3.744771145753209e-05, "loss": 0.2676, "step": 4429000 }, { "epoch": 2.66, "learning_rate": 3.744561149197153e-05, "loss": 0.2565, "step": 4429500 }, { "epoch": 2.66, "learning_rate": 3.7443511526410965e-05, "loss": 0.2648, "step": 4430000 }, { "epoch": 2.66, "learning_rate": 3.74414115608504e-05, "loss": 0.2639, "step": 4430500 }, { "epoch": 2.66, "learning_rate": 3.743931159528984e-05, "loss": 0.2628, "step": 4431000 }, { "epoch": 2.66, "learning_rate": 3.743721162972927e-05, "loss": 0.2644, "step": 4431500 }, { "epoch": 2.66, "learning_rate": 3.7435115864099825e-05, "loss": 0.2594, "step": 4432000 }, { "epoch": 2.66, "learning_rate": 3.743301589853926e-05, "loss": 0.2645, "step": 4432500 }, { "epoch": 2.66, "learning_rate": 3.74309159329787e-05, "loss": 0.2638, "step": 4433000 }, { "epoch": 2.66, "learning_rate": 3.742881596741813e-05, "loss": 0.2647, "step": 4433500 }, { "epoch": 2.66, "learning_rate": 3.7426720201788686e-05, "loss": 0.2663, "step": 4434000 }, { "epoch": 2.66, "learning_rate": 3.7424624436159246e-05, "loss": 0.2681, "step": 4434500 }, { "epoch": 2.66, "learning_rate": 3.742252447059869e-05, "loss": 0.2629, "step": 4435000 }, { "epoch": 2.66, "learning_rate": 3.742042450503812e-05, "loss": 0.2639, "step": 4435500 }, { "epoch": 2.66, "learning_rate": 3.741832453947755e-05, "loss": 0.2654, "step": 4436000 }, { "epoch": 2.66, "learning_rate": 3.741622457391699e-05, "loss": 0.2645, "step": 4436500 }, { "epoch": 2.66, "learning_rate": 3.741412460835642e-05, "loss": 0.2665, "step": 4437000 }, { "epoch": 2.66, "learning_rate": 3.7412024642795854e-05, "loss": 0.2665, "step": 4437500 }, { "epoch": 2.66, "learning_rate": 3.7409924677235294e-05, "loss": 0.2661, "step": 4438000 }, { "epoch": 2.66, "learning_rate": 3.740782891160585e-05, "loss": 0.2611, "step": 4438500 }, { "epoch": 2.66, "learning_rate": 3.740572894604528e-05, "loss": 0.2684, "step": 4439000 }, { "epoch": 2.66, "learning_rate": 3.7403628980484715e-05, "loss": 0.2672, "step": 4439500 }, { "epoch": 2.66, "learning_rate": 3.7401529014924155e-05, "loss": 0.2613, "step": 4440000 }, { "epoch": 2.66, "learning_rate": 3.739943324929471e-05, "loss": 0.2603, "step": 4440500 }, { "epoch": 2.66, "learning_rate": 3.739733328373414e-05, "loss": 0.2597, "step": 4441000 }, { "epoch": 2.66, "learning_rate": 3.739523331817358e-05, "loss": 0.2607, "step": 4441500 }, { "epoch": 2.66, "learning_rate": 3.7393133352613016e-05, "loss": 0.2556, "step": 4442000 }, { "epoch": 2.66, "learning_rate": 3.7391037586983576e-05, "loss": 0.2697, "step": 4442500 }, { "epoch": 2.66, "learning_rate": 3.7388937621423e-05, "loss": 0.2596, "step": 4443000 }, { "epoch": 2.66, "learning_rate": 3.738683765586244e-05, "loss": 0.2662, "step": 4443500 }, { "epoch": 2.66, "learning_rate": 3.7384737690301876e-05, "loss": 0.2708, "step": 4444000 }, { "epoch": 2.66, "learning_rate": 3.738263772474131e-05, "loss": 0.2638, "step": 4444500 }, { "epoch": 2.66, "learning_rate": 3.738054195911187e-05, "loss": 0.2578, "step": 4445000 }, { "epoch": 2.67, "learning_rate": 3.7378441993551304e-05, "loss": 0.2604, "step": 4445500 }, { "epoch": 2.67, "learning_rate": 3.737634202799074e-05, "loss": 0.2612, "step": 4446000 }, { "epoch": 2.67, "learning_rate": 3.737424206243017e-05, "loss": 0.2656, "step": 4446500 }, { "epoch": 2.67, "learning_rate": 3.737214629680074e-05, "loss": 0.2636, "step": 4447000 }, { "epoch": 2.67, "learning_rate": 3.737004633124017e-05, "loss": 0.2566, "step": 4447500 }, { "epoch": 2.67, "learning_rate": 3.73679463656796e-05, "loss": 0.2708, "step": 4448000 }, { "epoch": 2.67, "learning_rate": 3.736584640011904e-05, "loss": 0.2666, "step": 4448500 }, { "epoch": 2.67, "learning_rate": 3.73637506344896e-05, "loss": 0.2653, "step": 4449000 }, { "epoch": 2.67, "learning_rate": 3.736165066892903e-05, "loss": 0.2594, "step": 4449500 }, { "epoch": 2.67, "learning_rate": 3.735955070336846e-05, "loss": 0.2626, "step": 4450000 }, { "epoch": 2.67, "learning_rate": 3.73574507378079e-05, "loss": 0.2634, "step": 4450500 }, { "epoch": 2.67, "learning_rate": 3.735535497217846e-05, "loss": 0.262, "step": 4451000 }, { "epoch": 2.67, "learning_rate": 3.735325500661789e-05, "loss": 0.2637, "step": 4451500 }, { "epoch": 2.67, "learning_rate": 3.7351155041057326e-05, "loss": 0.2657, "step": 4452000 }, { "epoch": 2.67, "learning_rate": 3.734905507549676e-05, "loss": 0.263, "step": 4452500 }, { "epoch": 2.67, "learning_rate": 3.734695930986732e-05, "loss": 0.26, "step": 4453000 }, { "epoch": 2.67, "learning_rate": 3.7344859344306753e-05, "loss": 0.2614, "step": 4453500 }, { "epoch": 2.67, "learning_rate": 3.7342759378746194e-05, "loss": 0.2622, "step": 4454000 }, { "epoch": 2.67, "learning_rate": 3.734065941318563e-05, "loss": 0.2607, "step": 4454500 }, { "epoch": 2.67, "learning_rate": 3.733856364755618e-05, "loss": 0.266, "step": 4455000 }, { "epoch": 2.67, "learning_rate": 3.7336463681995614e-05, "loss": 0.2631, "step": 4455500 }, { "epoch": 2.67, "learning_rate": 3.7334363716435054e-05, "loss": 0.2671, "step": 4456000 }, { "epoch": 2.67, "learning_rate": 3.733226375087449e-05, "loss": 0.2617, "step": 4456500 }, { "epoch": 2.67, "learning_rate": 3.733016798524504e-05, "loss": 0.2652, "step": 4457000 }, { "epoch": 2.67, "learning_rate": 3.7328068019684475e-05, "loss": 0.2611, "step": 4457500 }, { "epoch": 2.67, "learning_rate": 3.7325968054123915e-05, "loss": 0.2617, "step": 4458000 }, { "epoch": 2.67, "learning_rate": 3.732386808856335e-05, "loss": 0.2694, "step": 4458500 }, { "epoch": 2.67, "learning_rate": 3.732176812300278e-05, "loss": 0.2641, "step": 4459000 }, { "epoch": 2.67, "learning_rate": 3.7319668157442215e-05, "loss": 0.2671, "step": 4459500 }, { "epoch": 2.67, "learning_rate": 3.731756819188165e-05, "loss": 0.26, "step": 4460000 }, { "epoch": 2.67, "learning_rate": 3.731546822632109e-05, "loss": 0.2649, "step": 4460500 }, { "epoch": 2.67, "learning_rate": 3.731337246069165e-05, "loss": 0.2614, "step": 4461000 }, { "epoch": 2.67, "learning_rate": 3.731127249513108e-05, "loss": 0.2669, "step": 4461500 }, { "epoch": 2.68, "learning_rate": 3.730917252957051e-05, "loss": 0.2576, "step": 4462000 }, { "epoch": 2.68, "learning_rate": 3.730707256400995e-05, "loss": 0.2635, "step": 4462500 }, { "epoch": 2.68, "learning_rate": 3.730497679838051e-05, "loss": 0.2729, "step": 4463000 }, { "epoch": 2.68, "learning_rate": 3.7302876832819944e-05, "loss": 0.2538, "step": 4463500 }, { "epoch": 2.68, "learning_rate": 3.730077686725938e-05, "loss": 0.2631, "step": 4464000 }, { "epoch": 2.68, "learning_rate": 3.729867690169881e-05, "loss": 0.2611, "step": 4464500 }, { "epoch": 2.68, "learning_rate": 3.7296576936138244e-05, "loss": 0.265, "step": 4465000 }, { "epoch": 2.68, "learning_rate": 3.729447697057768e-05, "loss": 0.2581, "step": 4465500 }, { "epoch": 2.68, "learning_rate": 3.729237700501712e-05, "loss": 0.2652, "step": 4466000 }, { "epoch": 2.68, "learning_rate": 3.729027703945655e-05, "loss": 0.2628, "step": 4466500 }, { "epoch": 2.68, "learning_rate": 3.7288181273827105e-05, "loss": 0.2689, "step": 4467000 }, { "epoch": 2.68, "learning_rate": 3.7286085508197665e-05, "loss": 0.2587, "step": 4467500 }, { "epoch": 2.68, "learning_rate": 3.7283985542637105e-05, "loss": 0.2619, "step": 4468000 }, { "epoch": 2.68, "learning_rate": 3.728188557707654e-05, "loss": 0.265, "step": 4468500 }, { "epoch": 2.68, "learning_rate": 3.7279785611515966e-05, "loss": 0.2673, "step": 4469000 }, { "epoch": 2.68, "learning_rate": 3.7277685645955406e-05, "loss": 0.2647, "step": 4469500 }, { "epoch": 2.68, "learning_rate": 3.727558568039484e-05, "loss": 0.2719, "step": 4470000 }, { "epoch": 2.68, "learning_rate": 3.727348571483427e-05, "loss": 0.2604, "step": 4470500 }, { "epoch": 2.68, "learning_rate": 3.727138574927371e-05, "loss": 0.2631, "step": 4471000 }, { "epoch": 2.68, "learning_rate": 3.7269289983644267e-05, "loss": 0.2606, "step": 4471500 }, { "epoch": 2.68, "learning_rate": 3.72671900180837e-05, "loss": 0.2654, "step": 4472000 }, { "epoch": 2.68, "learning_rate": 3.7265090052523133e-05, "loss": 0.2602, "step": 4472500 }, { "epoch": 2.68, "learning_rate": 3.7262990086962574e-05, "loss": 0.2594, "step": 4473000 }, { "epoch": 2.68, "learning_rate": 3.7260894321333134e-05, "loss": 0.2611, "step": 4473500 }, { "epoch": 2.68, "learning_rate": 3.725879855570369e-05, "loss": 0.2641, "step": 4474000 }, { "epoch": 2.68, "learning_rate": 3.725669859014312e-05, "loss": 0.2613, "step": 4474500 }, { "epoch": 2.68, "learning_rate": 3.725459862458256e-05, "loss": 0.2666, "step": 4475000 }, { "epoch": 2.68, "learning_rate": 3.7252498659021995e-05, "loss": 0.2647, "step": 4475500 }, { "epoch": 2.68, "learning_rate": 3.725039869346143e-05, "loss": 0.2573, "step": 4476000 }, { "epoch": 2.68, "learning_rate": 3.724829872790086e-05, "loss": 0.2648, "step": 4476500 }, { "epoch": 2.68, "learning_rate": 3.7246198762340295e-05, "loss": 0.2657, "step": 4477000 }, { "epoch": 2.68, "learning_rate": 3.724409879677973e-05, "loss": 0.2612, "step": 4477500 }, { "epoch": 2.68, "learning_rate": 3.724199883121917e-05, "loss": 0.2626, "step": 4478000 }, { "epoch": 2.69, "learning_rate": 3.723990306558973e-05, "loss": 0.2688, "step": 4478500 }, { "epoch": 2.69, "learning_rate": 3.7237803100029156e-05, "loss": 0.2611, "step": 4479000 }, { "epoch": 2.69, "learning_rate": 3.723570313446859e-05, "loss": 0.2648, "step": 4479500 }, { "epoch": 2.69, "learning_rate": 3.723360316890803e-05, "loss": 0.2579, "step": 4480000 }, { "epoch": 2.69, "learning_rate": 3.723150740327859e-05, "loss": 0.273, "step": 4480500 }, { "epoch": 2.69, "learning_rate": 3.7229407437718017e-05, "loss": 0.2582, "step": 4481000 }, { "epoch": 2.69, "learning_rate": 3.722730747215746e-05, "loss": 0.2592, "step": 4481500 }, { "epoch": 2.69, "learning_rate": 3.722520750659689e-05, "loss": 0.2632, "step": 4482000 }, { "epoch": 2.69, "learning_rate": 3.7223107541036324e-05, "loss": 0.2634, "step": 4482500 }, { "epoch": 2.69, "learning_rate": 3.7221007575475764e-05, "loss": 0.2587, "step": 4483000 }, { "epoch": 2.69, "learning_rate": 3.72189076099152e-05, "loss": 0.2602, "step": 4483500 }, { "epoch": 2.69, "learning_rate": 3.721680764435463e-05, "loss": 0.2646, "step": 4484000 }, { "epoch": 2.69, "learning_rate": 3.7214711878725184e-05, "loss": 0.2649, "step": 4484500 }, { "epoch": 2.69, "learning_rate": 3.7212611913164625e-05, "loss": 0.2692, "step": 4485000 }, { "epoch": 2.69, "learning_rate": 3.721051194760406e-05, "loss": 0.2586, "step": 4485500 }, { "epoch": 2.69, "learning_rate": 3.720841198204349e-05, "loss": 0.2632, "step": 4486000 }, { "epoch": 2.69, "learning_rate": 3.720631201648293e-05, "loss": 0.2708, "step": 4486500 }, { "epoch": 2.69, "learning_rate": 3.7204216250853485e-05, "loss": 0.2658, "step": 4487000 }, { "epoch": 2.69, "learning_rate": 3.720211628529292e-05, "loss": 0.2594, "step": 4487500 }, { "epoch": 2.69, "learning_rate": 3.720001631973235e-05, "loss": 0.2625, "step": 4488000 }, { "epoch": 2.69, "learning_rate": 3.719791635417179e-05, "loss": 0.2572, "step": 4488500 }, { "epoch": 2.69, "learning_rate": 3.7195816388611226e-05, "loss": 0.2645, "step": 4489000 }, { "epoch": 2.69, "learning_rate": 3.719371642305066e-05, "loss": 0.2613, "step": 4489500 }, { "epoch": 2.69, "learning_rate": 3.719162065742122e-05, "loss": 0.2629, "step": 4490000 }, { "epoch": 2.69, "learning_rate": 3.7189524891791773e-05, "loss": 0.267, "step": 4490500 }, { "epoch": 2.69, "learning_rate": 3.718742492623121e-05, "loss": 0.266, "step": 4491000 }, { "epoch": 2.69, "learning_rate": 3.718532496067064e-05, "loss": 0.2658, "step": 4491500 }, { "epoch": 2.69, "learning_rate": 3.718322499511008e-05, "loss": 0.2654, "step": 4492000 }, { "epoch": 2.69, "learning_rate": 3.7181125029549514e-05, "loss": 0.2621, "step": 4492500 }, { "epoch": 2.69, "learning_rate": 3.717902506398895e-05, "loss": 0.2642, "step": 4493000 }, { "epoch": 2.69, "learning_rate": 3.717692509842839e-05, "loss": 0.258, "step": 4493500 }, { "epoch": 2.69, "learning_rate": 3.717482513286782e-05, "loss": 0.2648, "step": 4494000 }, { "epoch": 2.69, "learning_rate": 3.7172725167307255e-05, "loss": 0.2638, "step": 4494500 }, { "epoch": 2.69, "learning_rate": 3.7170625201746695e-05, "loss": 0.2654, "step": 4495000 }, { "epoch": 2.7, "learning_rate": 3.716852523618613e-05, "loss": 0.2652, "step": 4495500 }, { "epoch": 2.7, "learning_rate": 3.7166425270625555e-05, "loss": 0.2668, "step": 4496000 }, { "epoch": 2.7, "learning_rate": 3.7164329504996115e-05, "loss": 0.2623, "step": 4496500 }, { "epoch": 2.7, "learning_rate": 3.7162229539435556e-05, "loss": 0.2676, "step": 4497000 }, { "epoch": 2.7, "learning_rate": 3.716012957387499e-05, "loss": 0.2563, "step": 4497500 }, { "epoch": 2.7, "learning_rate": 3.715803380824554e-05, "loss": 0.2629, "step": 4498000 }, { "epoch": 2.7, "learning_rate": 3.715593384268498e-05, "loss": 0.2615, "step": 4498500 }, { "epoch": 2.7, "learning_rate": 3.7153833877124416e-05, "loss": 0.2716, "step": 4499000 }, { "epoch": 2.7, "learning_rate": 3.715173391156385e-05, "loss": 0.2688, "step": 4499500 }, { "epoch": 2.7, "learning_rate": 3.714963394600329e-05, "loss": 0.2682, "step": 4500000 }, { "epoch": 2.7, "eval_loss": 0.24175457656383514, "eval_runtime": 1455.8339, "eval_samples_per_second": 361.8, "eval_steps_per_second": 60.3, "step": 4500000 }, { "epoch": 2.7, "learning_rate": 3.7147533980442723e-05, "loss": 0.2639, "step": 4500500 }, { "epoch": 2.7, "learning_rate": 3.714543401488215e-05, "loss": 0.2605, "step": 4501000 }, { "epoch": 2.7, "learning_rate": 3.714333404932159e-05, "loss": 0.262, "step": 4501500 }, { "epoch": 2.7, "learning_rate": 3.714123828369215e-05, "loss": 0.2589, "step": 4502000 }, { "epoch": 2.7, "learning_rate": 3.7139138318131584e-05, "loss": 0.2641, "step": 4502500 }, { "epoch": 2.7, "learning_rate": 3.713703835257102e-05, "loss": 0.2647, "step": 4503000 }, { "epoch": 2.7, "learning_rate": 3.713493838701045e-05, "loss": 0.2646, "step": 4503500 }, { "epoch": 2.7, "learning_rate": 3.713284262138101e-05, "loss": 0.2641, "step": 4504000 }, { "epoch": 2.7, "learning_rate": 3.7130742655820445e-05, "loss": 0.2623, "step": 4504500 }, { "epoch": 2.7, "learning_rate": 3.712864269025988e-05, "loss": 0.2624, "step": 4505000 }, { "epoch": 2.7, "learning_rate": 3.712654272469931e-05, "loss": 0.2597, "step": 4505500 }, { "epoch": 2.7, "learning_rate": 3.712444695906987e-05, "loss": 0.2641, "step": 4506000 }, { "epoch": 2.7, "learning_rate": 3.7122346993509306e-05, "loss": 0.2613, "step": 4506500 }, { "epoch": 2.7, "learning_rate": 3.7120247027948746e-05, "loss": 0.2619, "step": 4507000 }, { "epoch": 2.7, "learning_rate": 3.711814706238818e-05, "loss": 0.2662, "step": 4507500 }, { "epoch": 2.7, "learning_rate": 3.711605129675873e-05, "loss": 0.2639, "step": 4508000 }, { "epoch": 2.7, "learning_rate": 3.7113951331198166e-05, "loss": 0.2609, "step": 4508500 }, { "epoch": 2.7, "learning_rate": 3.711185136563761e-05, "loss": 0.2632, "step": 4509000 }, { "epoch": 2.7, "learning_rate": 3.710975140007704e-05, "loss": 0.2634, "step": 4509500 }, { "epoch": 2.7, "learning_rate": 3.7107655634447594e-05, "loss": 0.266, "step": 4510000 }, { "epoch": 2.7, "learning_rate": 3.710555566888703e-05, "loss": 0.2641, "step": 4510500 }, { "epoch": 2.7, "learning_rate": 3.710345570332647e-05, "loss": 0.2644, "step": 4511000 }, { "epoch": 2.7, "learning_rate": 3.71013557377659e-05, "loss": 0.2636, "step": 4511500 }, { "epoch": 2.71, "learning_rate": 3.7099255772205334e-05, "loss": 0.264, "step": 4512000 }, { "epoch": 2.71, "learning_rate": 3.7097160006575895e-05, "loss": 0.2635, "step": 4512500 }, { "epoch": 2.71, "learning_rate": 3.709506004101533e-05, "loss": 0.261, "step": 4513000 }, { "epoch": 2.71, "learning_rate": 3.709296427538588e-05, "loss": 0.2603, "step": 4513500 }, { "epoch": 2.71, "learning_rate": 3.7090864309825315e-05, "loss": 0.2695, "step": 4514000 }, { "epoch": 2.71, "learning_rate": 3.7088764344264755e-05, "loss": 0.2632, "step": 4514500 }, { "epoch": 2.71, "learning_rate": 3.708666437870419e-05, "loss": 0.2615, "step": 4515000 }, { "epoch": 2.71, "learning_rate": 3.708456441314362e-05, "loss": 0.2682, "step": 4515500 }, { "epoch": 2.71, "learning_rate": 3.708246444758306e-05, "loss": 0.2567, "step": 4516000 }, { "epoch": 2.71, "learning_rate": 3.7080364482022496e-05, "loss": 0.2656, "step": 4516500 }, { "epoch": 2.71, "learning_rate": 3.707826451646193e-05, "loss": 0.2594, "step": 4517000 }, { "epoch": 2.71, "learning_rate": 3.707616875083248e-05, "loss": 0.2624, "step": 4517500 }, { "epoch": 2.71, "learning_rate": 3.707406878527192e-05, "loss": 0.2648, "step": 4518000 }, { "epoch": 2.71, "learning_rate": 3.707196881971136e-05, "loss": 0.261, "step": 4518500 }, { "epoch": 2.71, "learning_rate": 3.706987305408191e-05, "loss": 0.2704, "step": 4519000 }, { "epoch": 2.71, "learning_rate": 3.706777308852135e-05, "loss": 0.265, "step": 4519500 }, { "epoch": 2.71, "learning_rate": 3.7065673122960784e-05, "loss": 0.2597, "step": 4520000 }, { "epoch": 2.71, "learning_rate": 3.706357315740022e-05, "loss": 0.2642, "step": 4520500 }, { "epoch": 2.71, "learning_rate": 3.706147319183966e-05, "loss": 0.2599, "step": 4521000 }, { "epoch": 2.71, "learning_rate": 3.705937322627909e-05, "loss": 0.267, "step": 4521500 }, { "epoch": 2.71, "learning_rate": 3.7057273260718525e-05, "loss": 0.2634, "step": 4522000 }, { "epoch": 2.71, "learning_rate": 3.705517329515796e-05, "loss": 0.2603, "step": 4522500 }, { "epoch": 2.71, "learning_rate": 3.705307332959739e-05, "loss": 0.2664, "step": 4523000 }, { "epoch": 2.71, "learning_rate": 3.705097756396795e-05, "loss": 0.2655, "step": 4523500 }, { "epoch": 2.71, "learning_rate": 3.7048877598407385e-05, "loss": 0.2644, "step": 4524000 }, { "epoch": 2.71, "learning_rate": 3.7046777632846826e-05, "loss": 0.2652, "step": 4524500 }, { "epoch": 2.71, "learning_rate": 3.704467766728625e-05, "loss": 0.2637, "step": 4525000 }, { "epoch": 2.71, "learning_rate": 3.704258190165681e-05, "loss": 0.263, "step": 4525500 }, { "epoch": 2.71, "learning_rate": 3.704048193609625e-05, "loss": 0.2686, "step": 4526000 }, { "epoch": 2.71, "learning_rate": 3.7038381970535686e-05, "loss": 0.2628, "step": 4526500 }, { "epoch": 2.71, "learning_rate": 3.703628200497511e-05, "loss": 0.2583, "step": 4527000 }, { "epoch": 2.71, "learning_rate": 3.703418623934567e-05, "loss": 0.2535, "step": 4527500 }, { "epoch": 2.71, "learning_rate": 3.7032086273785114e-05, "loss": 0.2599, "step": 4528000 }, { "epoch": 2.72, "learning_rate": 3.702998630822455e-05, "loss": 0.2648, "step": 4528500 }, { "epoch": 2.72, "learning_rate": 3.702788634266398e-05, "loss": 0.2623, "step": 4529000 }, { "epoch": 2.72, "learning_rate": 3.7025790577034534e-05, "loss": 0.2685, "step": 4529500 }, { "epoch": 2.72, "learning_rate": 3.7023690611473974e-05, "loss": 0.2643, "step": 4530000 }, { "epoch": 2.72, "learning_rate": 3.702159064591341e-05, "loss": 0.2565, "step": 4530500 }, { "epoch": 2.72, "learning_rate": 3.701949068035284e-05, "loss": 0.2625, "step": 4531000 }, { "epoch": 2.72, "learning_rate": 3.70173949147234e-05, "loss": 0.2591, "step": 4531500 }, { "epoch": 2.72, "learning_rate": 3.7015294949162835e-05, "loss": 0.2652, "step": 4532000 }, { "epoch": 2.72, "learning_rate": 3.701319498360227e-05, "loss": 0.2669, "step": 4532500 }, { "epoch": 2.72, "learning_rate": 3.701109501804171e-05, "loss": 0.2659, "step": 4533000 }, { "epoch": 2.72, "learning_rate": 3.700899925241226e-05, "loss": 0.2603, "step": 4533500 }, { "epoch": 2.72, "learning_rate": 3.7006899286851696e-05, "loss": 0.2646, "step": 4534000 }, { "epoch": 2.72, "learning_rate": 3.700479932129113e-05, "loss": 0.2667, "step": 4534500 }, { "epoch": 2.72, "learning_rate": 3.700269935573057e-05, "loss": 0.2655, "step": 4535000 }, { "epoch": 2.72, "learning_rate": 3.700060359010112e-05, "loss": 0.2687, "step": 4535500 }, { "epoch": 2.72, "learning_rate": 3.6998503624540556e-05, "loss": 0.2622, "step": 4536000 }, { "epoch": 2.72, "learning_rate": 3.699640365897999e-05, "loss": 0.2631, "step": 4536500 }, { "epoch": 2.72, "learning_rate": 3.699430369341943e-05, "loss": 0.2624, "step": 4537000 }, { "epoch": 2.72, "learning_rate": 3.6992203727858864e-05, "loss": 0.2581, "step": 4537500 }, { "epoch": 2.72, "learning_rate": 3.69901037622983e-05, "loss": 0.2652, "step": 4538000 }, { "epoch": 2.72, "learning_rate": 3.698800379673774e-05, "loss": 0.2623, "step": 4538500 }, { "epoch": 2.72, "learning_rate": 3.6985903831177164e-05, "loss": 0.2647, "step": 4539000 }, { "epoch": 2.72, "learning_rate": 3.6983808065547724e-05, "loss": 0.2577, "step": 4539500 }, { "epoch": 2.72, "learning_rate": 3.6981708099987165e-05, "loss": 0.2621, "step": 4540000 }, { "epoch": 2.72, "learning_rate": 3.69796081344266e-05, "loss": 0.2619, "step": 4540500 }, { "epoch": 2.72, "learning_rate": 3.697750816886603e-05, "loss": 0.259, "step": 4541000 }, { "epoch": 2.72, "learning_rate": 3.6975412403236585e-05, "loss": 0.2647, "step": 4541500 }, { "epoch": 2.72, "learning_rate": 3.6973312437676025e-05, "loss": 0.2626, "step": 4542000 }, { "epoch": 2.72, "learning_rate": 3.697121247211546e-05, "loss": 0.2595, "step": 4542500 }, { "epoch": 2.72, "learning_rate": 3.696911250655489e-05, "loss": 0.2611, "step": 4543000 }, { "epoch": 2.72, "learning_rate": 3.696701254099433e-05, "loss": 0.2707, "step": 4543500 }, { "epoch": 2.72, "learning_rate": 3.696491257543376e-05, "loss": 0.2699, "step": 4544000 }, { "epoch": 2.72, "learning_rate": 3.696281260987319e-05, "loss": 0.2594, "step": 4544500 }, { "epoch": 2.72, "learning_rate": 3.696071264431263e-05, "loss": 0.2675, "step": 4545000 }, { "epoch": 2.73, "learning_rate": 3.695861687868319e-05, "loss": 0.2605, "step": 4545500 }, { "epoch": 2.73, "learning_rate": 3.695651691312263e-05, "loss": 0.2658, "step": 4546000 }, { "epoch": 2.73, "learning_rate": 3.695441694756206e-05, "loss": 0.262, "step": 4546500 }, { "epoch": 2.73, "learning_rate": 3.6952316982001494e-05, "loss": 0.2644, "step": 4547000 }, { "epoch": 2.73, "learning_rate": 3.6950221216372054e-05, "loss": 0.2621, "step": 4547500 }, { "epoch": 2.73, "learning_rate": 3.694812125081149e-05, "loss": 0.257, "step": 4548000 }, { "epoch": 2.73, "learning_rate": 3.694602128525092e-05, "loss": 0.2587, "step": 4548500 }, { "epoch": 2.73, "learning_rate": 3.6943921319690354e-05, "loss": 0.2593, "step": 4549000 }, { "epoch": 2.73, "learning_rate": 3.6941825554060915e-05, "loss": 0.2641, "step": 4549500 }, { "epoch": 2.73, "learning_rate": 3.693972558850035e-05, "loss": 0.2587, "step": 4550000 }, { "epoch": 2.73, "learning_rate": 3.693762562293979e-05, "loss": 0.2591, "step": 4550500 }, { "epoch": 2.73, "learning_rate": 3.6935525657379215e-05, "loss": 0.2609, "step": 4551000 }, { "epoch": 2.73, "learning_rate": 3.6933429891749775e-05, "loss": 0.263, "step": 4551500 }, { "epoch": 2.73, "learning_rate": 3.693132992618921e-05, "loss": 0.258, "step": 4552000 }, { "epoch": 2.73, "learning_rate": 3.692922996062865e-05, "loss": 0.2605, "step": 4552500 }, { "epoch": 2.73, "learning_rate": 3.692712999506808e-05, "loss": 0.2632, "step": 4553000 }, { "epoch": 2.73, "learning_rate": 3.6925034229438636e-05, "loss": 0.2637, "step": 4553500 }, { "epoch": 2.73, "learning_rate": 3.6922934263878076e-05, "loss": 0.2585, "step": 4554000 }, { "epoch": 2.73, "learning_rate": 3.692083429831751e-05, "loss": 0.2632, "step": 4554500 }, { "epoch": 2.73, "learning_rate": 3.691873433275694e-05, "loss": 0.2624, "step": 4555000 }, { "epoch": 2.73, "learning_rate": 3.69166385671275e-05, "loss": 0.2632, "step": 4555500 }, { "epoch": 2.73, "learning_rate": 3.691453860156694e-05, "loss": 0.262, "step": 4556000 }, { "epoch": 2.73, "learning_rate": 3.691243863600637e-05, "loss": 0.27, "step": 4556500 }, { "epoch": 2.73, "learning_rate": 3.6910338670445804e-05, "loss": 0.2624, "step": 4557000 }, { "epoch": 2.73, "learning_rate": 3.690824290481636e-05, "loss": 0.2531, "step": 4557500 }, { "epoch": 2.73, "learning_rate": 3.69061429392558e-05, "loss": 0.2675, "step": 4558000 }, { "epoch": 2.73, "learning_rate": 3.690404297369523e-05, "loss": 0.2598, "step": 4558500 }, { "epoch": 2.73, "learning_rate": 3.6901943008134665e-05, "loss": 0.2642, "step": 4559000 }, { "epoch": 2.73, "learning_rate": 3.6899843042574105e-05, "loss": 0.2631, "step": 4559500 }, { "epoch": 2.73, "learning_rate": 3.689774727694466e-05, "loss": 0.2628, "step": 4560000 }, { "epoch": 2.73, "learning_rate": 3.689564731138409e-05, "loss": 0.262, "step": 4560500 }, { "epoch": 2.73, "learning_rate": 3.689354734582353e-05, "loss": 0.2637, "step": 4561000 }, { "epoch": 2.73, "learning_rate": 3.6891447380262966e-05, "loss": 0.2574, "step": 4561500 }, { "epoch": 2.74, "learning_rate": 3.688935161463352e-05, "loss": 0.2622, "step": 4562000 }, { "epoch": 2.74, "learning_rate": 3.688725164907295e-05, "loss": 0.2633, "step": 4562500 }, { "epoch": 2.74, "learning_rate": 3.688515168351239e-05, "loss": 0.2584, "step": 4563000 }, { "epoch": 2.74, "learning_rate": 3.6883051717951826e-05, "loss": 0.2696, "step": 4563500 }, { "epoch": 2.74, "learning_rate": 3.688095595232238e-05, "loss": 0.2618, "step": 4564000 }, { "epoch": 2.74, "learning_rate": 3.6878855986761813e-05, "loss": 0.2576, "step": 4564500 }, { "epoch": 2.74, "learning_rate": 3.6876756021201254e-05, "loss": 0.2587, "step": 4565000 }, { "epoch": 2.74, "learning_rate": 3.687465605564069e-05, "loss": 0.2643, "step": 4565500 }, { "epoch": 2.74, "learning_rate": 3.687256029001124e-05, "loss": 0.2629, "step": 4566000 }, { "epoch": 2.74, "learning_rate": 3.687046032445068e-05, "loss": 0.2531, "step": 4566500 }, { "epoch": 2.74, "learning_rate": 3.6868360358890114e-05, "loss": 0.2632, "step": 4567000 }, { "epoch": 2.74, "learning_rate": 3.686626039332955e-05, "loss": 0.2694, "step": 4567500 }, { "epoch": 2.74, "learning_rate": 3.68641646277001e-05, "loss": 0.2592, "step": 4568000 }, { "epoch": 2.74, "learning_rate": 3.686206466213954e-05, "loss": 0.2635, "step": 4568500 }, { "epoch": 2.74, "learning_rate": 3.6859964696578975e-05, "loss": 0.2646, "step": 4569000 }, { "epoch": 2.74, "learning_rate": 3.685786473101841e-05, "loss": 0.2585, "step": 4569500 }, { "epoch": 2.74, "learning_rate": 3.685576896538896e-05, "loss": 0.2695, "step": 4570000 }, { "epoch": 2.74, "learning_rate": 3.68536689998284e-05, "loss": 0.2697, "step": 4570500 }, { "epoch": 2.74, "learning_rate": 3.6851569034267836e-05, "loss": 0.2575, "step": 4571000 }, { "epoch": 2.74, "learning_rate": 3.684946906870727e-05, "loss": 0.2626, "step": 4571500 }, { "epoch": 2.74, "learning_rate": 3.684736910314671e-05, "loss": 0.2629, "step": 4572000 }, { "epoch": 2.74, "learning_rate": 3.684527333751726e-05, "loss": 0.2565, "step": 4572500 }, { "epoch": 2.74, "learning_rate": 3.6843173371956697e-05, "loss": 0.2611, "step": 4573000 }, { "epoch": 2.74, "learning_rate": 3.684107340639614e-05, "loss": 0.2585, "step": 4573500 }, { "epoch": 2.74, "learning_rate": 3.683897344083557e-05, "loss": 0.2636, "step": 4574000 }, { "epoch": 2.74, "learning_rate": 3.6836877675206124e-05, "loss": 0.2637, "step": 4574500 }, { "epoch": 2.74, "learning_rate": 3.683477770964556e-05, "loss": 0.2602, "step": 4575000 }, { "epoch": 2.74, "learning_rate": 3.6832677744085e-05, "loss": 0.2682, "step": 4575500 }, { "epoch": 2.74, "learning_rate": 3.683057777852443e-05, "loss": 0.2583, "step": 4576000 }, { "epoch": 2.74, "learning_rate": 3.6828482012894985e-05, "loss": 0.2663, "step": 4576500 }, { "epoch": 2.74, "learning_rate": 3.682638204733442e-05, "loss": 0.264, "step": 4577000 }, { "epoch": 2.74, "learning_rate": 3.682428208177386e-05, "loss": 0.2632, "step": 4577500 }, { "epoch": 2.74, "learning_rate": 3.682218211621329e-05, "loss": 0.2631, "step": 4578000 }, { "epoch": 2.74, "learning_rate": 3.682008635058385e-05, "loss": 0.2608, "step": 4578500 }, { "epoch": 2.75, "learning_rate": 3.6817986385023286e-05, "loss": 0.2624, "step": 4579000 }, { "epoch": 2.75, "learning_rate": 3.681588641946272e-05, "loss": 0.2671, "step": 4579500 }, { "epoch": 2.75, "learning_rate": 3.681378645390215e-05, "loss": 0.2617, "step": 4580000 }, { "epoch": 2.75, "learning_rate": 3.681169068827271e-05, "loss": 0.2622, "step": 4580500 }, { "epoch": 2.75, "learning_rate": 3.6809594922643266e-05, "loss": 0.2627, "step": 4581000 }, { "epoch": 2.75, "learning_rate": 3.6807494957082707e-05, "loss": 0.2635, "step": 4581500 }, { "epoch": 2.75, "learning_rate": 3.680539499152214e-05, "loss": 0.2591, "step": 4582000 }, { "epoch": 2.75, "learning_rate": 3.6803295025961574e-05, "loss": 0.2561, "step": 4582500 }, { "epoch": 2.75, "learning_rate": 3.6801195060401014e-05, "loss": 0.262, "step": 4583000 }, { "epoch": 2.75, "learning_rate": 3.679909509484044e-05, "loss": 0.2603, "step": 4583500 }, { "epoch": 2.75, "learning_rate": 3.6796995129279874e-05, "loss": 0.2637, "step": 4584000 }, { "epoch": 2.75, "learning_rate": 3.6794895163719314e-05, "loss": 0.2675, "step": 4584500 }, { "epoch": 2.75, "learning_rate": 3.6792799398089874e-05, "loss": 0.26, "step": 4585000 }, { "epoch": 2.75, "learning_rate": 3.679069943252931e-05, "loss": 0.2651, "step": 4585500 }, { "epoch": 2.75, "learning_rate": 3.678859946696874e-05, "loss": 0.2638, "step": 4586000 }, { "epoch": 2.75, "learning_rate": 3.6786499501408175e-05, "loss": 0.2551, "step": 4586500 }, { "epoch": 2.75, "learning_rate": 3.6784403735778735e-05, "loss": 0.2567, "step": 4587000 }, { "epoch": 2.75, "learning_rate": 3.678230377021817e-05, "loss": 0.2629, "step": 4587500 }, { "epoch": 2.75, "learning_rate": 3.678020380465761e-05, "loss": 0.2632, "step": 4588000 }, { "epoch": 2.75, "learning_rate": 3.6778103839097036e-05, "loss": 0.2579, "step": 4588500 }, { "epoch": 2.75, "learning_rate": 3.6776008073467596e-05, "loss": 0.2587, "step": 4589000 }, { "epoch": 2.75, "learning_rate": 3.677390810790703e-05, "loss": 0.2614, "step": 4589500 }, { "epoch": 2.75, "learning_rate": 3.677180814234647e-05, "loss": 0.2629, "step": 4590000 }, { "epoch": 2.75, "learning_rate": 3.67697081767859e-05, "loss": 0.2607, "step": 4590500 }, { "epoch": 2.75, "learning_rate": 3.676761241115646e-05, "loss": 0.256, "step": 4591000 }, { "epoch": 2.75, "learning_rate": 3.676551244559589e-05, "loss": 0.2642, "step": 4591500 }, { "epoch": 2.75, "learning_rate": 3.676341248003533e-05, "loss": 0.2595, "step": 4592000 }, { "epoch": 2.75, "learning_rate": 3.6761312514474764e-05, "loss": 0.2589, "step": 4592500 }, { "epoch": 2.75, "learning_rate": 3.675921674884532e-05, "loss": 0.2668, "step": 4593000 }, { "epoch": 2.75, "learning_rate": 3.675711678328476e-05, "loss": 0.2567, "step": 4593500 }, { "epoch": 2.75, "learning_rate": 3.675501681772419e-05, "loss": 0.2649, "step": 4594000 }, { "epoch": 2.75, "learning_rate": 3.6752916852163625e-05, "loss": 0.2622, "step": 4594500 }, { "epoch": 2.75, "learning_rate": 3.6750816886603065e-05, "loss": 0.2617, "step": 4595000 }, { "epoch": 2.76, "learning_rate": 3.674871692104249e-05, "loss": 0.2618, "step": 4595500 }, { "epoch": 2.76, "learning_rate": 3.6746616955481925e-05, "loss": 0.2642, "step": 4596000 }, { "epoch": 2.76, "learning_rate": 3.6744516989921365e-05, "loss": 0.2658, "step": 4596500 }, { "epoch": 2.76, "learning_rate": 3.67424170243608e-05, "loss": 0.2522, "step": 4597000 }, { "epoch": 2.76, "learning_rate": 3.674031705880023e-05, "loss": 0.2605, "step": 4597500 }, { "epoch": 2.76, "learning_rate": 3.673821709323967e-05, "loss": 0.2551, "step": 4598000 }, { "epoch": 2.76, "learning_rate": 3.6736117127679106e-05, "loss": 0.2632, "step": 4598500 }, { "epoch": 2.76, "learning_rate": 3.673402136204966e-05, "loss": 0.2639, "step": 4599000 }, { "epoch": 2.76, "learning_rate": 3.673192139648909e-05, "loss": 0.2556, "step": 4599500 }, { "epoch": 2.76, "learning_rate": 3.672982143092853e-05, "loss": 0.2627, "step": 4600000 }, { "epoch": 2.76, "eval_loss": 0.24249973893165588, "eval_runtime": 1456.9071, "eval_samples_per_second": 361.533, "eval_steps_per_second": 60.256, "step": 4600000 }, { "epoch": 2.76, "learning_rate": 3.6727721465367967e-05, "loss": 0.2662, "step": 4600500 }, { "epoch": 2.76, "learning_rate": 3.672562569973852e-05, "loss": 0.2591, "step": 4601000 }, { "epoch": 2.76, "learning_rate": 3.672352573417796e-05, "loss": 0.2624, "step": 4601500 }, { "epoch": 2.76, "learning_rate": 3.6721425768617394e-05, "loss": 0.2642, "step": 4602000 }, { "epoch": 2.76, "learning_rate": 3.671932580305683e-05, "loss": 0.2656, "step": 4602500 }, { "epoch": 2.76, "learning_rate": 3.671723003742738e-05, "loss": 0.2607, "step": 4603000 }, { "epoch": 2.76, "learning_rate": 3.671513007186682e-05, "loss": 0.2641, "step": 4603500 }, { "epoch": 2.76, "learning_rate": 3.6713030106306255e-05, "loss": 0.2646, "step": 4604000 }, { "epoch": 2.76, "learning_rate": 3.671093014074569e-05, "loss": 0.2587, "step": 4604500 }, { "epoch": 2.76, "learning_rate": 3.670883437511624e-05, "loss": 0.2639, "step": 4605000 }, { "epoch": 2.76, "learning_rate": 3.670673440955568e-05, "loss": 0.2597, "step": 4605500 }, { "epoch": 2.76, "learning_rate": 3.6704634443995115e-05, "loss": 0.2635, "step": 4606000 }, { "epoch": 2.76, "learning_rate": 3.670253447843455e-05, "loss": 0.2615, "step": 4606500 }, { "epoch": 2.76, "learning_rate": 3.670043451287399e-05, "loss": 0.2584, "step": 4607000 }, { "epoch": 2.76, "learning_rate": 3.669833874724454e-05, "loss": 0.2642, "step": 4607500 }, { "epoch": 2.76, "learning_rate": 3.66962429816151e-05, "loss": 0.2652, "step": 4608000 }, { "epoch": 2.76, "learning_rate": 3.6694143016054536e-05, "loss": 0.2623, "step": 4608500 }, { "epoch": 2.76, "learning_rate": 3.6692043050493977e-05, "loss": 0.2661, "step": 4609000 }, { "epoch": 2.76, "learning_rate": 3.668994308493341e-05, "loss": 0.2598, "step": 4609500 }, { "epoch": 2.76, "learning_rate": 3.6687847319303964e-05, "loss": 0.2667, "step": 4610000 }, { "epoch": 2.76, "learning_rate": 3.66857473537434e-05, "loss": 0.261, "step": 4610500 }, { "epoch": 2.76, "learning_rate": 3.668364738818284e-05, "loss": 0.2654, "step": 4611000 }, { "epoch": 2.76, "learning_rate": 3.668154742262227e-05, "loss": 0.2613, "step": 4611500 }, { "epoch": 2.77, "learning_rate": 3.6679447457061704e-05, "loss": 0.2621, "step": 4612000 }, { "epoch": 2.77, "learning_rate": 3.667734749150114e-05, "loss": 0.2571, "step": 4612500 }, { "epoch": 2.77, "learning_rate": 3.667524752594057e-05, "loss": 0.2602, "step": 4613000 }, { "epoch": 2.77, "learning_rate": 3.6673147560380005e-05, "loss": 0.2648, "step": 4613500 }, { "epoch": 2.77, "learning_rate": 3.6671051794750565e-05, "loss": 0.2627, "step": 4614000 }, { "epoch": 2.77, "learning_rate": 3.666895182919e-05, "loss": 0.2646, "step": 4614500 }, { "epoch": 2.77, "learning_rate": 3.666685186362943e-05, "loss": 0.257, "step": 4615000 }, { "epoch": 2.77, "learning_rate": 3.666475189806887e-05, "loss": 0.2599, "step": 4615500 }, { "epoch": 2.77, "learning_rate": 3.6662651932508306e-05, "loss": 0.2606, "step": 4616000 }, { "epoch": 2.77, "learning_rate": 3.6660556166878866e-05, "loss": 0.2601, "step": 4616500 }, { "epoch": 2.77, "learning_rate": 3.665845620131829e-05, "loss": 0.2586, "step": 4617000 }, { "epoch": 2.77, "learning_rate": 3.665635623575773e-05, "loss": 0.2601, "step": 4617500 }, { "epoch": 2.77, "learning_rate": 3.6654256270197166e-05, "loss": 0.2634, "step": 4618000 }, { "epoch": 2.77, "learning_rate": 3.6652160504567727e-05, "loss": 0.2672, "step": 4618500 }, { "epoch": 2.77, "learning_rate": 3.665006053900716e-05, "loss": 0.2549, "step": 4619000 }, { "epoch": 2.77, "learning_rate": 3.6647960573446594e-05, "loss": 0.2546, "step": 4619500 }, { "epoch": 2.77, "learning_rate": 3.664586060788603e-05, "loss": 0.2627, "step": 4620000 }, { "epoch": 2.77, "learning_rate": 3.664376484225659e-05, "loss": 0.2652, "step": 4620500 }, { "epoch": 2.77, "learning_rate": 3.664166487669602e-05, "loss": 0.2577, "step": 4621000 }, { "epoch": 2.77, "learning_rate": 3.663956491113546e-05, "loss": 0.2645, "step": 4621500 }, { "epoch": 2.77, "learning_rate": 3.663746494557489e-05, "loss": 0.2703, "step": 4622000 }, { "epoch": 2.77, "learning_rate": 3.663536917994545e-05, "loss": 0.2627, "step": 4622500 }, { "epoch": 2.77, "learning_rate": 3.663326921438489e-05, "loss": 0.2579, "step": 4623000 }, { "epoch": 2.77, "learning_rate": 3.663116924882432e-05, "loss": 0.2617, "step": 4623500 }, { "epoch": 2.77, "learning_rate": 3.662906928326375e-05, "loss": 0.2627, "step": 4624000 }, { "epoch": 2.77, "learning_rate": 3.662697351763431e-05, "loss": 0.2662, "step": 4624500 }, { "epoch": 2.77, "learning_rate": 3.662487775200486e-05, "loss": 0.262, "step": 4625000 }, { "epoch": 2.77, "learning_rate": 3.66227777864443e-05, "loss": 0.2621, "step": 4625500 }, { "epoch": 2.77, "learning_rate": 3.6620677820883736e-05, "loss": 0.2686, "step": 4626000 }, { "epoch": 2.77, "learning_rate": 3.661857785532317e-05, "loss": 0.2609, "step": 4626500 }, { "epoch": 2.77, "learning_rate": 3.661647788976261e-05, "loss": 0.2603, "step": 4627000 }, { "epoch": 2.77, "learning_rate": 3.661437792420204e-05, "loss": 0.2638, "step": 4627500 }, { "epoch": 2.77, "learning_rate": 3.661227795864148e-05, "loss": 0.2586, "step": 4628000 }, { "epoch": 2.77, "learning_rate": 3.661017799308092e-05, "loss": 0.2584, "step": 4628500 }, { "epoch": 2.78, "learning_rate": 3.660808222745147e-05, "loss": 0.2602, "step": 4629000 }, { "epoch": 2.78, "learning_rate": 3.6605982261890904e-05, "loss": 0.262, "step": 4629500 }, { "epoch": 2.78, "learning_rate": 3.6603882296330344e-05, "loss": 0.2632, "step": 4630000 }, { "epoch": 2.78, "learning_rate": 3.660178233076978e-05, "loss": 0.2685, "step": 4630500 }, { "epoch": 2.78, "learning_rate": 3.659968656514033e-05, "loss": 0.2614, "step": 4631000 }, { "epoch": 2.78, "learning_rate": 3.6597586599579765e-05, "loss": 0.2677, "step": 4631500 }, { "epoch": 2.78, "learning_rate": 3.6595486634019205e-05, "loss": 0.2625, "step": 4632000 }, { "epoch": 2.78, "learning_rate": 3.659338666845864e-05, "loss": 0.2632, "step": 4632500 }, { "epoch": 2.78, "learning_rate": 3.659129090282919e-05, "loss": 0.2617, "step": 4633000 }, { "epoch": 2.78, "learning_rate": 3.6589190937268625e-05, "loss": 0.2588, "step": 4633500 }, { "epoch": 2.78, "learning_rate": 3.6587090971708066e-05, "loss": 0.2519, "step": 4634000 }, { "epoch": 2.78, "learning_rate": 3.65849910061475e-05, "loss": 0.2562, "step": 4634500 }, { "epoch": 2.78, "learning_rate": 3.658289524051805e-05, "loss": 0.259, "step": 4635000 }, { "epoch": 2.78, "learning_rate": 3.658079527495749e-05, "loss": 0.2599, "step": 4635500 }, { "epoch": 2.78, "learning_rate": 3.6578695309396926e-05, "loss": 0.2555, "step": 4636000 }, { "epoch": 2.78, "learning_rate": 3.657659534383636e-05, "loss": 0.2584, "step": 4636500 }, { "epoch": 2.78, "learning_rate": 3.6574499578206913e-05, "loss": 0.2571, "step": 4637000 }, { "epoch": 2.78, "learning_rate": 3.6572399612646354e-05, "loss": 0.261, "step": 4637500 }, { "epoch": 2.78, "learning_rate": 3.657029964708579e-05, "loss": 0.265, "step": 4638000 }, { "epoch": 2.78, "learning_rate": 3.656819968152522e-05, "loss": 0.2548, "step": 4638500 }, { "epoch": 2.78, "learning_rate": 3.656609971596466e-05, "loss": 0.2601, "step": 4639000 }, { "epoch": 2.78, "learning_rate": 3.6564003950335214e-05, "loss": 0.2623, "step": 4639500 }, { "epoch": 2.78, "learning_rate": 3.656190398477465e-05, "loss": 0.2568, "step": 4640000 }, { "epoch": 2.78, "learning_rate": 3.655980401921408e-05, "loss": 0.2608, "step": 4640500 }, { "epoch": 2.78, "learning_rate": 3.655770405365352e-05, "loss": 0.2609, "step": 4641000 }, { "epoch": 2.78, "learning_rate": 3.6555608288024075e-05, "loss": 0.2639, "step": 4641500 }, { "epoch": 2.78, "learning_rate": 3.655350832246351e-05, "loss": 0.2582, "step": 4642000 }, { "epoch": 2.78, "learning_rate": 3.655140835690295e-05, "loss": 0.2574, "step": 4642500 }, { "epoch": 2.78, "learning_rate": 3.65493125912735e-05, "loss": 0.2596, "step": 4643000 }, { "epoch": 2.78, "learning_rate": 3.6547212625712936e-05, "loss": 0.2568, "step": 4643500 }, { "epoch": 2.78, "learning_rate": 3.654511266015237e-05, "loss": 0.2602, "step": 4644000 }, { "epoch": 2.78, "learning_rate": 3.654301269459181e-05, "loss": 0.2587, "step": 4644500 }, { "epoch": 2.78, "learning_rate": 3.654091272903124e-05, "loss": 0.259, "step": 4645000 }, { "epoch": 2.79, "learning_rate": 3.6538812763470676e-05, "loss": 0.2638, "step": 4645500 }, { "epoch": 2.79, "learning_rate": 3.653671279791012e-05, "loss": 0.2631, "step": 4646000 }, { "epoch": 2.79, "learning_rate": 3.653461283234955e-05, "loss": 0.2607, "step": 4646500 }, { "epoch": 2.79, "learning_rate": 3.6532517066720104e-05, "loss": 0.2612, "step": 4647000 }, { "epoch": 2.79, "learning_rate": 3.6530417101159544e-05, "loss": 0.2613, "step": 4647500 }, { "epoch": 2.79, "learning_rate": 3.652831713559898e-05, "loss": 0.2591, "step": 4648000 }, { "epoch": 2.79, "learning_rate": 3.652621717003841e-05, "loss": 0.2654, "step": 4648500 }, { "epoch": 2.79, "learning_rate": 3.6524121404408964e-05, "loss": 0.2617, "step": 4649000 }, { "epoch": 2.79, "learning_rate": 3.6522021438848405e-05, "loss": 0.2635, "step": 4649500 }, { "epoch": 2.79, "learning_rate": 3.651992147328784e-05, "loss": 0.27, "step": 4650000 }, { "epoch": 2.79, "learning_rate": 3.651782570765839e-05, "loss": 0.2635, "step": 4650500 }, { "epoch": 2.79, "learning_rate": 3.6515725742097825e-05, "loss": 0.2603, "step": 4651000 }, { "epoch": 2.79, "learning_rate": 3.6513625776537265e-05, "loss": 0.2615, "step": 4651500 }, { "epoch": 2.79, "learning_rate": 3.65115258109767e-05, "loss": 0.2631, "step": 4652000 }, { "epoch": 2.79, "learning_rate": 3.650942584541613e-05, "loss": 0.2534, "step": 4652500 }, { "epoch": 2.79, "learning_rate": 3.6507330079786686e-05, "loss": 0.2635, "step": 4653000 }, { "epoch": 2.79, "learning_rate": 3.6505230114226126e-05, "loss": 0.266, "step": 4653500 }, { "epoch": 2.79, "learning_rate": 3.650313014866556e-05, "loss": 0.2593, "step": 4654000 }, { "epoch": 2.79, "learning_rate": 3.6501030183105e-05, "loss": 0.2601, "step": 4654500 }, { "epoch": 2.79, "learning_rate": 3.649893021754443e-05, "loss": 0.2593, "step": 4655000 }, { "epoch": 2.79, "learning_rate": 3.649683025198387e-05, "loss": 0.2613, "step": 4655500 }, { "epoch": 2.79, "learning_rate": 3.649473028642331e-05, "loss": 0.2586, "step": 4656000 }, { "epoch": 2.79, "learning_rate": 3.649263032086274e-05, "loss": 0.2652, "step": 4656500 }, { "epoch": 2.79, "learning_rate": 3.6490534555233294e-05, "loss": 0.2588, "step": 4657000 }, { "epoch": 2.79, "learning_rate": 3.648843458967273e-05, "loss": 0.259, "step": 4657500 }, { "epoch": 2.79, "learning_rate": 3.648633462411217e-05, "loss": 0.2684, "step": 4658000 }, { "epoch": 2.79, "learning_rate": 3.64842346585516e-05, "loss": 0.266, "step": 4658500 }, { "epoch": 2.79, "learning_rate": 3.6482138892922155e-05, "loss": 0.2569, "step": 4659000 }, { "epoch": 2.79, "learning_rate": 3.648003892736159e-05, "loss": 0.2535, "step": 4659500 }, { "epoch": 2.79, "learning_rate": 3.647793896180103e-05, "loss": 0.2583, "step": 4660000 }, { "epoch": 2.79, "learning_rate": 3.647583899624046e-05, "loss": 0.2587, "step": 4660500 }, { "epoch": 2.79, "learning_rate": 3.6473743230611015e-05, "loss": 0.2637, "step": 4661000 }, { "epoch": 2.79, "learning_rate": 3.6471643265050456e-05, "loss": 0.2633, "step": 4661500 }, { "epoch": 2.8, "learning_rate": 3.646954329948989e-05, "loss": 0.2626, "step": 4662000 }, { "epoch": 2.8, "learning_rate": 3.646744333392932e-05, "loss": 0.2579, "step": 4662500 }, { "epoch": 2.8, "learning_rate": 3.6465347568299876e-05, "loss": 0.2619, "step": 4663000 }, { "epoch": 2.8, "learning_rate": 3.6463247602739316e-05, "loss": 0.2619, "step": 4663500 }, { "epoch": 2.8, "learning_rate": 3.646114763717875e-05, "loss": 0.2596, "step": 4664000 }, { "epoch": 2.8, "learning_rate": 3.645904767161818e-05, "loss": 0.264, "step": 4664500 }, { "epoch": 2.8, "learning_rate": 3.6456947706057624e-05, "loss": 0.2617, "step": 4665000 }, { "epoch": 2.8, "learning_rate": 3.645485194042818e-05, "loss": 0.2662, "step": 4665500 }, { "epoch": 2.8, "learning_rate": 3.645275197486761e-05, "loss": 0.2578, "step": 4666000 }, { "epoch": 2.8, "learning_rate": 3.6450652009307044e-05, "loss": 0.2642, "step": 4666500 }, { "epoch": 2.8, "learning_rate": 3.6448552043746484e-05, "loss": 0.2638, "step": 4667000 }, { "epoch": 2.8, "learning_rate": 3.644645627811704e-05, "loss": 0.2566, "step": 4667500 }, { "epoch": 2.8, "learning_rate": 3.644435631255647e-05, "loss": 0.256, "step": 4668000 }, { "epoch": 2.8, "learning_rate": 3.644225634699591e-05, "loss": 0.257, "step": 4668500 }, { "epoch": 2.8, "learning_rate": 3.6440156381435345e-05, "loss": 0.2612, "step": 4669000 }, { "epoch": 2.8, "learning_rate": 3.64380606158059e-05, "loss": 0.2626, "step": 4669500 }, { "epoch": 2.8, "learning_rate": 3.643596065024533e-05, "loss": 0.2649, "step": 4670000 }, { "epoch": 2.8, "learning_rate": 3.643386068468477e-05, "loss": 0.2593, "step": 4670500 }, { "epoch": 2.8, "learning_rate": 3.6431760719124206e-05, "loss": 0.2569, "step": 4671000 }, { "epoch": 2.8, "learning_rate": 3.642966495349476e-05, "loss": 0.2568, "step": 4671500 }, { "epoch": 2.8, "learning_rate": 3.642756498793419e-05, "loss": 0.2654, "step": 4672000 }, { "epoch": 2.8, "learning_rate": 3.642546502237363e-05, "loss": 0.2648, "step": 4672500 }, { "epoch": 2.8, "learning_rate": 3.6423365056813066e-05, "loss": 0.2637, "step": 4673000 }, { "epoch": 2.8, "learning_rate": 3.642126929118362e-05, "loss": 0.2671, "step": 4673500 }, { "epoch": 2.8, "learning_rate": 3.641916932562306e-05, "loss": 0.2658, "step": 4674000 }, { "epoch": 2.8, "learning_rate": 3.6417069360062494e-05, "loss": 0.2584, "step": 4674500 }, { "epoch": 2.8, "learning_rate": 3.641496939450193e-05, "loss": 0.2636, "step": 4675000 }, { "epoch": 2.8, "learning_rate": 3.641287362887249e-05, "loss": 0.2635, "step": 4675500 }, { "epoch": 2.8, "learning_rate": 3.641077366331192e-05, "loss": 0.2635, "step": 4676000 }, { "epoch": 2.8, "learning_rate": 3.6408673697751354e-05, "loss": 0.2633, "step": 4676500 }, { "epoch": 2.8, "learning_rate": 3.640657373219079e-05, "loss": 0.2623, "step": 4677000 }, { "epoch": 2.8, "learning_rate": 3.640447796656135e-05, "loss": 0.2613, "step": 4677500 }, { "epoch": 2.8, "learning_rate": 3.640237800100079e-05, "loss": 0.2607, "step": 4678000 }, { "epoch": 2.8, "learning_rate": 3.6400278035440215e-05, "loss": 0.2577, "step": 4678500 }, { "epoch": 2.81, "learning_rate": 3.639817806987965e-05, "loss": 0.2618, "step": 4679000 }, { "epoch": 2.81, "learning_rate": 3.639608230425021e-05, "loss": 0.2647, "step": 4679500 }, { "epoch": 2.81, "learning_rate": 3.639398233868965e-05, "loss": 0.2675, "step": 4680000 }, { "epoch": 2.81, "learning_rate": 3.6391882373129076e-05, "loss": 0.257, "step": 4680500 }, { "epoch": 2.81, "learning_rate": 3.6389782407568516e-05, "loss": 0.2598, "step": 4681000 }, { "epoch": 2.81, "learning_rate": 3.638768244200795e-05, "loss": 0.2602, "step": 4681500 }, { "epoch": 2.81, "learning_rate": 3.638558667637851e-05, "loss": 0.263, "step": 4682000 }, { "epoch": 2.81, "learning_rate": 3.6383486710817943e-05, "loss": 0.2611, "step": 4682500 }, { "epoch": 2.81, "learning_rate": 3.638138674525738e-05, "loss": 0.2575, "step": 4683000 }, { "epoch": 2.81, "learning_rate": 3.637928677969681e-05, "loss": 0.2584, "step": 4683500 }, { "epoch": 2.81, "learning_rate": 3.637719101406737e-05, "loss": 0.2627, "step": 4684000 }, { "epoch": 2.81, "learning_rate": 3.6375091048506804e-05, "loss": 0.258, "step": 4684500 }, { "epoch": 2.81, "learning_rate": 3.6372991082946244e-05, "loss": 0.2588, "step": 4685000 }, { "epoch": 2.81, "learning_rate": 3.637089111738567e-05, "loss": 0.2612, "step": 4685500 }, { "epoch": 2.81, "learning_rate": 3.636879535175623e-05, "loss": 0.2633, "step": 4686000 }, { "epoch": 2.81, "learning_rate": 3.6366695386195665e-05, "loss": 0.2621, "step": 4686500 }, { "epoch": 2.81, "learning_rate": 3.6364595420635105e-05, "loss": 0.2612, "step": 4687000 }, { "epoch": 2.81, "learning_rate": 3.636249545507454e-05, "loss": 0.2673, "step": 4687500 }, { "epoch": 2.81, "learning_rate": 3.636039968944509e-05, "loss": 0.2625, "step": 4688000 }, { "epoch": 2.81, "learning_rate": 3.635829972388453e-05, "loss": 0.2632, "step": 4688500 }, { "epoch": 2.81, "learning_rate": 3.6356199758323966e-05, "loss": 0.2569, "step": 4689000 }, { "epoch": 2.81, "learning_rate": 3.63540997927634e-05, "loss": 0.2588, "step": 4689500 }, { "epoch": 2.81, "learning_rate": 3.635200402713395e-05, "loss": 0.264, "step": 4690000 }, { "epoch": 2.81, "learning_rate": 3.634990406157339e-05, "loss": 0.2631, "step": 4690500 }, { "epoch": 2.81, "learning_rate": 3.634780829594395e-05, "loss": 0.2635, "step": 4691000 }, { "epoch": 2.81, "learning_rate": 3.634570833038338e-05, "loss": 0.2588, "step": 4691500 }, { "epoch": 2.81, "learning_rate": 3.6343608364822814e-05, "loss": 0.2581, "step": 4692000 }, { "epoch": 2.81, "learning_rate": 3.6341508399262254e-05, "loss": 0.2609, "step": 4692500 }, { "epoch": 2.81, "learning_rate": 3.633940843370169e-05, "loss": 0.2622, "step": 4693000 }, { "epoch": 2.81, "learning_rate": 3.633730846814112e-05, "loss": 0.2647, "step": 4693500 }, { "epoch": 2.81, "learning_rate": 3.633520850258056e-05, "loss": 0.2628, "step": 4694000 }, { "epoch": 2.81, "learning_rate": 3.6333108537019994e-05, "loss": 0.2636, "step": 4694500 }, { "epoch": 2.81, "learning_rate": 3.633101277139055e-05, "loss": 0.2563, "step": 4695000 }, { "epoch": 2.82, "learning_rate": 3.632891280582999e-05, "loss": 0.2593, "step": 4695500 }, { "epoch": 2.82, "learning_rate": 3.632681284026942e-05, "loss": 0.2633, "step": 4696000 }, { "epoch": 2.82, "learning_rate": 3.6324712874708855e-05, "loss": 0.2583, "step": 4696500 }, { "epoch": 2.82, "learning_rate": 3.6322612909148295e-05, "loss": 0.258, "step": 4697000 }, { "epoch": 2.82, "learning_rate": 3.632051714351885e-05, "loss": 0.2599, "step": 4697500 }, { "epoch": 2.82, "learning_rate": 3.63184213778894e-05, "loss": 0.2659, "step": 4698000 }, { "epoch": 2.82, "learning_rate": 3.6316321412328836e-05, "loss": 0.264, "step": 4698500 }, { "epoch": 2.82, "learning_rate": 3.631422144676827e-05, "loss": 0.2564, "step": 4699000 }, { "epoch": 2.82, "learning_rate": 3.631212148120771e-05, "loss": 0.2664, "step": 4699500 }, { "epoch": 2.82, "learning_rate": 3.631002151564714e-05, "loss": 0.2564, "step": 4700000 }, { "epoch": 2.82, "eval_loss": 0.24051640927791595, "eval_runtime": 1461.9433, "eval_samples_per_second": 360.288, "eval_steps_per_second": 60.048, "step": 4700000 }, { "epoch": 2.82, "learning_rate": 3.630792155008658e-05, "loss": 0.261, "step": 4700500 }, { "epoch": 2.82, "learning_rate": 3.630582158452602e-05, "loss": 0.2584, "step": 4701000 }, { "epoch": 2.82, "learning_rate": 3.630372161896545e-05, "loss": 0.2579, "step": 4701500 }, { "epoch": 2.82, "learning_rate": 3.6301625853336004e-05, "loss": 0.263, "step": 4702000 }, { "epoch": 2.82, "learning_rate": 3.6299525887775444e-05, "loss": 0.2629, "step": 4702500 }, { "epoch": 2.82, "learning_rate": 3.629742592221488e-05, "loss": 0.2546, "step": 4703000 }, { "epoch": 2.82, "learning_rate": 3.629532595665431e-05, "loss": 0.2609, "step": 4703500 }, { "epoch": 2.82, "learning_rate": 3.6293230191024865e-05, "loss": 0.2608, "step": 4704000 }, { "epoch": 2.82, "learning_rate": 3.6291130225464305e-05, "loss": 0.2553, "step": 4704500 }, { "epoch": 2.82, "learning_rate": 3.628903025990374e-05, "loss": 0.2617, "step": 4705000 }, { "epoch": 2.82, "learning_rate": 3.628693029434317e-05, "loss": 0.2631, "step": 4705500 }, { "epoch": 2.82, "learning_rate": 3.6284834528713725e-05, "loss": 0.2623, "step": 4706000 }, { "epoch": 2.82, "learning_rate": 3.6282734563153166e-05, "loss": 0.2561, "step": 4706500 }, { "epoch": 2.82, "learning_rate": 3.62806345975926e-05, "loss": 0.2638, "step": 4707000 }, { "epoch": 2.82, "learning_rate": 3.627853463203203e-05, "loss": 0.2596, "step": 4707500 }, { "epoch": 2.82, "learning_rate": 3.627643886640259e-05, "loss": 0.2637, "step": 4708000 }, { "epoch": 2.82, "learning_rate": 3.6274338900842026e-05, "loss": 0.26, "step": 4708500 }, { "epoch": 2.82, "learning_rate": 3.627223893528146e-05, "loss": 0.2569, "step": 4709000 }, { "epoch": 2.82, "learning_rate": 3.62701389697209e-05, "loss": 0.2646, "step": 4709500 }, { "epoch": 2.82, "learning_rate": 3.6268043204091454e-05, "loss": 0.2611, "step": 4710000 }, { "epoch": 2.82, "learning_rate": 3.626594323853089e-05, "loss": 0.2588, "step": 4710500 }, { "epoch": 2.82, "learning_rate": 3.626384327297032e-05, "loss": 0.2632, "step": 4711000 }, { "epoch": 2.82, "learning_rate": 3.626174330740976e-05, "loss": 0.2631, "step": 4711500 }, { "epoch": 2.83, "learning_rate": 3.6259647541780314e-05, "loss": 0.2594, "step": 4712000 }, { "epoch": 2.83, "learning_rate": 3.625754757621975e-05, "loss": 0.2612, "step": 4712500 }, { "epoch": 2.83, "learning_rate": 3.625544761065918e-05, "loss": 0.2595, "step": 4713000 }, { "epoch": 2.83, "learning_rate": 3.625334764509862e-05, "loss": 0.2586, "step": 4713500 }, { "epoch": 2.83, "learning_rate": 3.6251247679538055e-05, "loss": 0.2595, "step": 4714000 }, { "epoch": 2.83, "learning_rate": 3.624914771397749e-05, "loss": 0.259, "step": 4714500 }, { "epoch": 2.83, "learning_rate": 3.624704774841693e-05, "loss": 0.2677, "step": 4715000 }, { "epoch": 2.83, "learning_rate": 3.624494778285636e-05, "loss": 0.2591, "step": 4715500 }, { "epoch": 2.83, "learning_rate": 3.6242852017226916e-05, "loss": 0.2672, "step": 4716000 }, { "epoch": 2.83, "learning_rate": 3.6240752051666356e-05, "loss": 0.2618, "step": 4716500 }, { "epoch": 2.83, "learning_rate": 3.623865208610579e-05, "loss": 0.2581, "step": 4717000 }, { "epoch": 2.83, "learning_rate": 3.623655212054522e-05, "loss": 0.2652, "step": 4717500 }, { "epoch": 2.83, "learning_rate": 3.6234456354915776e-05, "loss": 0.2613, "step": 4718000 }, { "epoch": 2.83, "learning_rate": 3.623235638935522e-05, "loss": 0.2671, "step": 4718500 }, { "epoch": 2.83, "learning_rate": 3.623025642379465e-05, "loss": 0.2627, "step": 4719000 }, { "epoch": 2.83, "learning_rate": 3.6228156458234084e-05, "loss": 0.2565, "step": 4719500 }, { "epoch": 2.83, "learning_rate": 3.622606069260464e-05, "loss": 0.2631, "step": 4720000 }, { "epoch": 2.83, "learning_rate": 3.622396072704408e-05, "loss": 0.2604, "step": 4720500 }, { "epoch": 2.83, "learning_rate": 3.622186076148351e-05, "loss": 0.2652, "step": 4721000 }, { "epoch": 2.83, "learning_rate": 3.6219760795922944e-05, "loss": 0.2545, "step": 4721500 }, { "epoch": 2.83, "learning_rate": 3.6217665030293505e-05, "loss": 0.2632, "step": 4722000 }, { "epoch": 2.83, "learning_rate": 3.621556506473294e-05, "loss": 0.2611, "step": 4722500 }, { "epoch": 2.83, "learning_rate": 3.621346509917237e-05, "loss": 0.2608, "step": 4723000 }, { "epoch": 2.83, "learning_rate": 3.621136513361181e-05, "loss": 0.2584, "step": 4723500 }, { "epoch": 2.83, "learning_rate": 3.6209265168051245e-05, "loss": 0.2569, "step": 4724000 }, { "epoch": 2.83, "learning_rate": 3.62071694024218e-05, "loss": 0.2561, "step": 4724500 }, { "epoch": 2.83, "learning_rate": 3.620506943686123e-05, "loss": 0.2697, "step": 4725000 }, { "epoch": 2.83, "learning_rate": 3.620296947130067e-05, "loss": 0.2604, "step": 4725500 }, { "epoch": 2.83, "learning_rate": 3.6200869505740106e-05, "loss": 0.2568, "step": 4726000 }, { "epoch": 2.83, "learning_rate": 3.619877374011066e-05, "loss": 0.262, "step": 4726500 }, { "epoch": 2.83, "learning_rate": 3.619667377455009e-05, "loss": 0.2589, "step": 4727000 }, { "epoch": 2.83, "learning_rate": 3.619457380898953e-05, "loss": 0.2599, "step": 4727500 }, { "epoch": 2.83, "learning_rate": 3.619247384342897e-05, "loss": 0.2664, "step": 4728000 }, { "epoch": 2.83, "learning_rate": 3.619037807779952e-05, "loss": 0.2617, "step": 4728500 }, { "epoch": 2.84, "learning_rate": 3.618827811223896e-05, "loss": 0.2582, "step": 4729000 }, { "epoch": 2.84, "learning_rate": 3.6186178146678394e-05, "loss": 0.2585, "step": 4729500 }, { "epoch": 2.84, "learning_rate": 3.618407818111783e-05, "loss": 0.2577, "step": 4730000 }, { "epoch": 2.84, "learning_rate": 3.618198241548838e-05, "loss": 0.2584, "step": 4730500 }, { "epoch": 2.84, "learning_rate": 3.617988244992782e-05, "loss": 0.2603, "step": 4731000 }, { "epoch": 2.84, "learning_rate": 3.6177782484367255e-05, "loss": 0.2654, "step": 4731500 }, { "epoch": 2.84, "learning_rate": 3.617568251880669e-05, "loss": 0.2613, "step": 4732000 }, { "epoch": 2.84, "learning_rate": 3.617358675317724e-05, "loss": 0.2661, "step": 4732500 }, { "epoch": 2.84, "learning_rate": 3.617148678761668e-05, "loss": 0.2593, "step": 4733000 }, { "epoch": 2.84, "learning_rate": 3.6169386822056115e-05, "loss": 0.2644, "step": 4733500 }, { "epoch": 2.84, "learning_rate": 3.616728685649555e-05, "loss": 0.2615, "step": 4734000 }, { "epoch": 2.84, "learning_rate": 3.616519109086611e-05, "loss": 0.2631, "step": 4734500 }, { "epoch": 2.84, "learning_rate": 3.616309112530554e-05, "loss": 0.2644, "step": 4735000 }, { "epoch": 2.84, "learning_rate": 3.61609953596761e-05, "loss": 0.262, "step": 4735500 }, { "epoch": 2.84, "learning_rate": 3.6158895394115537e-05, "loss": 0.2585, "step": 4736000 }, { "epoch": 2.84, "learning_rate": 3.615679542855498e-05, "loss": 0.26, "step": 4736500 }, { "epoch": 2.84, "learning_rate": 3.6154695462994403e-05, "loss": 0.26, "step": 4737000 }, { "epoch": 2.84, "learning_rate": 3.615259549743384e-05, "loss": 0.2659, "step": 4737500 }, { "epoch": 2.84, "learning_rate": 3.615049553187328e-05, "loss": 0.2565, "step": 4738000 }, { "epoch": 2.84, "learning_rate": 3.614839556631271e-05, "loss": 0.2557, "step": 4738500 }, { "epoch": 2.84, "learning_rate": 3.6146295600752144e-05, "loss": 0.2597, "step": 4739000 }, { "epoch": 2.84, "learning_rate": 3.6144195635191584e-05, "loss": 0.2602, "step": 4739500 }, { "epoch": 2.84, "learning_rate": 3.614209986956214e-05, "loss": 0.2577, "step": 4740000 }, { "epoch": 2.84, "learning_rate": 3.61400041039327e-05, "loss": 0.2608, "step": 4740500 }, { "epoch": 2.84, "learning_rate": 3.613790413837213e-05, "loss": 0.2615, "step": 4741000 }, { "epoch": 2.84, "learning_rate": 3.6135804172811565e-05, "loss": 0.2607, "step": 4741500 }, { "epoch": 2.84, "learning_rate": 3.6133704207251e-05, "loss": 0.2618, "step": 4742000 }, { "epoch": 2.84, "learning_rate": 3.613160424169043e-05, "loss": 0.2623, "step": 4742500 }, { "epoch": 2.84, "learning_rate": 3.612950847606099e-05, "loss": 0.2651, "step": 4743000 }, { "epoch": 2.84, "learning_rate": 3.612740851050043e-05, "loss": 0.2614, "step": 4743500 }, { "epoch": 2.84, "learning_rate": 3.612530854493986e-05, "loss": 0.2626, "step": 4744000 }, { "epoch": 2.84, "learning_rate": 3.612320857937929e-05, "loss": 0.2559, "step": 4744500 }, { "epoch": 2.84, "learning_rate": 3.612110861381873e-05, "loss": 0.2568, "step": 4745000 }, { "epoch": 2.85, "learning_rate": 3.6119008648258166e-05, "loss": 0.2645, "step": 4745500 }, { "epoch": 2.85, "learning_rate": 3.61169086826976e-05, "loss": 0.2645, "step": 4746000 }, { "epoch": 2.85, "learning_rate": 3.611480871713704e-05, "loss": 0.2565, "step": 4746500 }, { "epoch": 2.85, "learning_rate": 3.6112712951507594e-05, "loss": 0.2612, "step": 4747000 }, { "epoch": 2.85, "learning_rate": 3.611061298594703e-05, "loss": 0.2638, "step": 4747500 }, { "epoch": 2.85, "learning_rate": 3.610851302038646e-05, "loss": 0.2584, "step": 4748000 }, { "epoch": 2.85, "learning_rate": 3.61064130548259e-05, "loss": 0.2571, "step": 4748500 }, { "epoch": 2.85, "learning_rate": 3.6104313089265334e-05, "loss": 0.263, "step": 4749000 }, { "epoch": 2.85, "learning_rate": 3.610221312370477e-05, "loss": 0.2614, "step": 4749500 }, { "epoch": 2.85, "learning_rate": 3.610011315814421e-05, "loss": 0.262, "step": 4750000 }, { "epoch": 2.85, "learning_rate": 3.609801319258364e-05, "loss": 0.2579, "step": 4750500 }, { "epoch": 2.85, "learning_rate": 3.6095917426954195e-05, "loss": 0.2632, "step": 4751000 }, { "epoch": 2.85, "learning_rate": 3.6093817461393635e-05, "loss": 0.2588, "step": 4751500 }, { "epoch": 2.85, "learning_rate": 3.609171749583307e-05, "loss": 0.2666, "step": 4752000 }, { "epoch": 2.85, "learning_rate": 3.60896175302725e-05, "loss": 0.2564, "step": 4752500 }, { "epoch": 2.85, "learning_rate": 3.6087521764643056e-05, "loss": 0.2574, "step": 4753000 }, { "epoch": 2.85, "learning_rate": 3.6085421799082496e-05, "loss": 0.2629, "step": 4753500 }, { "epoch": 2.85, "learning_rate": 3.608332183352193e-05, "loss": 0.2632, "step": 4754000 }, { "epoch": 2.85, "learning_rate": 3.608122186796136e-05, "loss": 0.2613, "step": 4754500 }, { "epoch": 2.85, "learning_rate": 3.6079126102331917e-05, "loss": 0.2545, "step": 4755000 }, { "epoch": 2.85, "learning_rate": 3.607702613677136e-05, "loss": 0.257, "step": 4755500 }, { "epoch": 2.85, "learning_rate": 3.607493037114191e-05, "loss": 0.2663, "step": 4756000 }, { "epoch": 2.85, "learning_rate": 3.6072830405581344e-05, "loss": 0.2548, "step": 4756500 }, { "epoch": 2.85, "learning_rate": 3.6070730440020784e-05, "loss": 0.2645, "step": 4757000 }, { "epoch": 2.85, "learning_rate": 3.606863047446022e-05, "loss": 0.2681, "step": 4757500 }, { "epoch": 2.85, "learning_rate": 3.606653050889965e-05, "loss": 0.2585, "step": 4758000 }, { "epoch": 2.85, "learning_rate": 3.606443054333909e-05, "loss": 0.2605, "step": 4758500 }, { "epoch": 2.85, "learning_rate": 3.6062330577778525e-05, "loss": 0.2558, "step": 4759000 }, { "epoch": 2.85, "learning_rate": 3.606023061221796e-05, "loss": 0.2569, "step": 4759500 }, { "epoch": 2.85, "learning_rate": 3.605813484658851e-05, "loss": 0.2598, "step": 4760000 }, { "epoch": 2.85, "learning_rate": 3.605603488102795e-05, "loss": 0.264, "step": 4760500 }, { "epoch": 2.85, "learning_rate": 3.6053934915467385e-05, "loss": 0.2655, "step": 4761000 }, { "epoch": 2.85, "learning_rate": 3.605183494990682e-05, "loss": 0.2604, "step": 4761500 }, { "epoch": 2.86, "learning_rate": 3.604973918427737e-05, "loss": 0.2585, "step": 4762000 }, { "epoch": 2.86, "learning_rate": 3.604763921871681e-05, "loss": 0.2592, "step": 4762500 }, { "epoch": 2.86, "learning_rate": 3.6045539253156246e-05, "loss": 0.2704, "step": 4763000 }, { "epoch": 2.86, "learning_rate": 3.6043439287595686e-05, "loss": 0.2615, "step": 4763500 }, { "epoch": 2.86, "learning_rate": 3.604134352196624e-05, "loss": 0.2687, "step": 4764000 }, { "epoch": 2.86, "learning_rate": 3.603924355640567e-05, "loss": 0.2614, "step": 4764500 }, { "epoch": 2.86, "learning_rate": 3.603714359084511e-05, "loss": 0.2647, "step": 4765000 }, { "epoch": 2.86, "learning_rate": 3.603504362528455e-05, "loss": 0.258, "step": 4765500 }, { "epoch": 2.86, "learning_rate": 3.603294365972398e-05, "loss": 0.2562, "step": 4766000 }, { "epoch": 2.86, "learning_rate": 3.6030843694163414e-05, "loss": 0.2566, "step": 4766500 }, { "epoch": 2.86, "learning_rate": 3.602874792853397e-05, "loss": 0.2575, "step": 4767000 }, { "epoch": 2.86, "learning_rate": 3.602664796297341e-05, "loss": 0.2643, "step": 4767500 }, { "epoch": 2.86, "learning_rate": 3.602454799741284e-05, "loss": 0.262, "step": 4768000 }, { "epoch": 2.86, "learning_rate": 3.6022448031852275e-05, "loss": 0.2596, "step": 4768500 }, { "epoch": 2.86, "learning_rate": 3.6020348066291715e-05, "loss": 0.2665, "step": 4769000 }, { "epoch": 2.86, "learning_rate": 3.601824810073115e-05, "loss": 0.2534, "step": 4769500 }, { "epoch": 2.86, "learning_rate": 3.601614813517058e-05, "loss": 0.2592, "step": 4770000 }, { "epoch": 2.86, "learning_rate": 3.601404816961002e-05, "loss": 0.2592, "step": 4770500 }, { "epoch": 2.86, "learning_rate": 3.601194820404945e-05, "loss": 0.2583, "step": 4771000 }, { "epoch": 2.86, "learning_rate": 3.600985243842001e-05, "loss": 0.2605, "step": 4771500 }, { "epoch": 2.86, "learning_rate": 3.600775247285945e-05, "loss": 0.2622, "step": 4772000 }, { "epoch": 2.86, "learning_rate": 3.600565250729888e-05, "loss": 0.2635, "step": 4772500 }, { "epoch": 2.86, "learning_rate": 3.6003552541738316e-05, "loss": 0.2587, "step": 4773000 }, { "epoch": 2.86, "learning_rate": 3.600145677610887e-05, "loss": 0.2584, "step": 4773500 }, { "epoch": 2.86, "learning_rate": 3.599935681054831e-05, "loss": 0.2635, "step": 4774000 }, { "epoch": 2.86, "learning_rate": 3.5997256844987744e-05, "loss": 0.2587, "step": 4774500 }, { "epoch": 2.86, "learning_rate": 3.599515687942718e-05, "loss": 0.2628, "step": 4775000 }, { "epoch": 2.86, "learning_rate": 3.599306111379773e-05, "loss": 0.2631, "step": 4775500 }, { "epoch": 2.86, "learning_rate": 3.599096114823717e-05, "loss": 0.2619, "step": 4776000 }, { "epoch": 2.86, "learning_rate": 3.5988861182676604e-05, "loss": 0.2564, "step": 4776500 }, { "epoch": 2.86, "learning_rate": 3.598676121711604e-05, "loss": 0.2588, "step": 4777000 }, { "epoch": 2.86, "learning_rate": 3.59846654514866e-05, "loss": 0.265, "step": 4777500 }, { "epoch": 2.86, "learning_rate": 3.598256548592603e-05, "loss": 0.2616, "step": 4778000 }, { "epoch": 2.86, "learning_rate": 3.5980465520365465e-05, "loss": 0.2595, "step": 4778500 }, { "epoch": 2.87, "learning_rate": 3.5978365554804905e-05, "loss": 0.2637, "step": 4779000 }, { "epoch": 2.87, "learning_rate": 3.597626978917546e-05, "loss": 0.2611, "step": 4779500 }, { "epoch": 2.87, "learning_rate": 3.597416982361489e-05, "loss": 0.2637, "step": 4780000 }, { "epoch": 2.87, "learning_rate": 3.5972069858054326e-05, "loss": 0.2582, "step": 4780500 }, { "epoch": 2.87, "learning_rate": 3.5969969892493766e-05, "loss": 0.263, "step": 4781000 }, { "epoch": 2.87, "learning_rate": 3.596787412686432e-05, "loss": 0.2595, "step": 4781500 }, { "epoch": 2.87, "learning_rate": 3.596577416130375e-05, "loss": 0.2583, "step": 4782000 }, { "epoch": 2.87, "learning_rate": 3.5963674195743186e-05, "loss": 0.2668, "step": 4782500 }, { "epoch": 2.87, "learning_rate": 3.596157423018263e-05, "loss": 0.2583, "step": 4783000 }, { "epoch": 2.87, "learning_rate": 3.595947846455318e-05, "loss": 0.2543, "step": 4783500 }, { "epoch": 2.87, "learning_rate": 3.5957378498992614e-05, "loss": 0.2606, "step": 4784000 }, { "epoch": 2.87, "learning_rate": 3.5955278533432054e-05, "loss": 0.2657, "step": 4784500 }, { "epoch": 2.87, "learning_rate": 3.595317856787149e-05, "loss": 0.2538, "step": 4785000 }, { "epoch": 2.87, "learning_rate": 3.595108280224204e-05, "loss": 0.2712, "step": 4785500 }, { "epoch": 2.87, "learning_rate": 3.5948982836681474e-05, "loss": 0.2589, "step": 4786000 }, { "epoch": 2.87, "learning_rate": 3.5946882871120915e-05, "loss": 0.2592, "step": 4786500 }, { "epoch": 2.87, "learning_rate": 3.594478290556035e-05, "loss": 0.261, "step": 4787000 }, { "epoch": 2.87, "learning_rate": 3.594268293999978e-05, "loss": 0.2633, "step": 4787500 }, { "epoch": 2.87, "learning_rate": 3.594058297443922e-05, "loss": 0.2649, "step": 4788000 }, { "epoch": 2.87, "learning_rate": 3.5938483008878655e-05, "loss": 0.2593, "step": 4788500 }, { "epoch": 2.87, "learning_rate": 3.593638304331809e-05, "loss": 0.2699, "step": 4789000 }, { "epoch": 2.87, "learning_rate": 3.593428307775753e-05, "loss": 0.2643, "step": 4789500 }, { "epoch": 2.87, "learning_rate": 3.593218731212808e-05, "loss": 0.2635, "step": 4790000 }, { "epoch": 2.87, "learning_rate": 3.5930087346567516e-05, "loss": 0.2626, "step": 4790500 }, { "epoch": 2.87, "learning_rate": 3.592798738100695e-05, "loss": 0.2567, "step": 4791000 }, { "epoch": 2.87, "learning_rate": 3.592588741544639e-05, "loss": 0.2629, "step": 4791500 }, { "epoch": 2.87, "learning_rate": 3.592379164981694e-05, "loss": 0.261, "step": 4792000 }, { "epoch": 2.87, "learning_rate": 3.592169168425638e-05, "loss": 0.2624, "step": 4792500 }, { "epoch": 2.87, "learning_rate": 3.591959171869582e-05, "loss": 0.2594, "step": 4793000 }, { "epoch": 2.87, "learning_rate": 3.591749595306637e-05, "loss": 0.2608, "step": 4793500 }, { "epoch": 2.87, "learning_rate": 3.5915395987505804e-05, "loss": 0.2521, "step": 4794000 }, { "epoch": 2.87, "learning_rate": 3.591329602194524e-05, "loss": 0.2623, "step": 4794500 }, { "epoch": 2.87, "learning_rate": 3.591119605638468e-05, "loss": 0.262, "step": 4795000 }, { "epoch": 2.88, "learning_rate": 3.590909609082411e-05, "loss": 0.2632, "step": 4795500 }, { "epoch": 2.88, "learning_rate": 3.5906996125263545e-05, "loss": 0.2642, "step": 4796000 }, { "epoch": 2.88, "learning_rate": 3.5904896159702985e-05, "loss": 0.2594, "step": 4796500 }, { "epoch": 2.88, "learning_rate": 3.590279619414242e-05, "loss": 0.2587, "step": 4797000 }, { "epoch": 2.88, "learning_rate": 3.590070042851297e-05, "loss": 0.2548, "step": 4797500 }, { "epoch": 2.88, "learning_rate": 3.5898600462952405e-05, "loss": 0.2611, "step": 4798000 }, { "epoch": 2.88, "learning_rate": 3.5896500497391846e-05, "loss": 0.2635, "step": 4798500 }, { "epoch": 2.88, "learning_rate": 3.589440053183128e-05, "loss": 0.2611, "step": 4799000 }, { "epoch": 2.88, "learning_rate": 3.589230476620183e-05, "loss": 0.2587, "step": 4799500 }, { "epoch": 2.88, "learning_rate": 3.589020480064127e-05, "loss": 0.2564, "step": 4800000 }, { "epoch": 2.88, "eval_loss": 0.23914866149425507, "eval_runtime": 1457.6246, "eval_samples_per_second": 361.355, "eval_steps_per_second": 60.226, "step": 4800000 }, { "epoch": 2.88, "learning_rate": 3.5888104835080706e-05, "loss": 0.2664, "step": 4800500 }, { "epoch": 2.88, "learning_rate": 3.588600486952014e-05, "loss": 0.2589, "step": 4801000 }, { "epoch": 2.88, "learning_rate": 3.588390490395958e-05, "loss": 0.2574, "step": 4801500 }, { "epoch": 2.88, "learning_rate": 3.5881809138330134e-05, "loss": 0.2603, "step": 4802000 }, { "epoch": 2.88, "learning_rate": 3.587971337270069e-05, "loss": 0.2625, "step": 4802500 }, { "epoch": 2.88, "learning_rate": 3.587761340714012e-05, "loss": 0.2612, "step": 4803000 }, { "epoch": 2.88, "learning_rate": 3.5875513441579554e-05, "loss": 0.2616, "step": 4803500 }, { "epoch": 2.88, "learning_rate": 3.5873413476018994e-05, "loss": 0.2641, "step": 4804000 }, { "epoch": 2.88, "learning_rate": 3.587131351045843e-05, "loss": 0.2604, "step": 4804500 }, { "epoch": 2.88, "learning_rate": 3.586921354489786e-05, "loss": 0.2578, "step": 4805000 }, { "epoch": 2.88, "learning_rate": 3.58671135793373e-05, "loss": 0.2652, "step": 4805500 }, { "epoch": 2.88, "learning_rate": 3.5865013613776735e-05, "loss": 0.2556, "step": 4806000 }, { "epoch": 2.88, "learning_rate": 3.586291784814729e-05, "loss": 0.2593, "step": 4806500 }, { "epoch": 2.88, "learning_rate": 3.586081788258673e-05, "loss": 0.2611, "step": 4807000 }, { "epoch": 2.88, "learning_rate": 3.585871791702616e-05, "loss": 0.2571, "step": 4807500 }, { "epoch": 2.88, "learning_rate": 3.5856617951465596e-05, "loss": 0.2609, "step": 4808000 }, { "epoch": 2.88, "learning_rate": 3.585452218583615e-05, "loss": 0.2607, "step": 4808500 }, { "epoch": 2.88, "learning_rate": 3.585242222027559e-05, "loss": 0.2678, "step": 4809000 }, { "epoch": 2.88, "learning_rate": 3.585032225471502e-05, "loss": 0.2565, "step": 4809500 }, { "epoch": 2.88, "learning_rate": 3.5848222289154456e-05, "loss": 0.2615, "step": 4810000 }, { "epoch": 2.88, "learning_rate": 3.584612652352501e-05, "loss": 0.2649, "step": 4810500 }, { "epoch": 2.88, "learning_rate": 3.584402655796445e-05, "loss": 0.2577, "step": 4811000 }, { "epoch": 2.88, "learning_rate": 3.5841926592403884e-05, "loss": 0.2608, "step": 4811500 }, { "epoch": 2.88, "learning_rate": 3.583982662684332e-05, "loss": 0.2593, "step": 4812000 }, { "epoch": 2.89, "learning_rate": 3.583773086121388e-05, "loss": 0.2592, "step": 4812500 }, { "epoch": 2.89, "learning_rate": 3.583563089565331e-05, "loss": 0.2573, "step": 4813000 }, { "epoch": 2.89, "learning_rate": 3.5833530930092744e-05, "loss": 0.2616, "step": 4813500 }, { "epoch": 2.89, "learning_rate": 3.5831430964532185e-05, "loss": 0.2562, "step": 4814000 }, { "epoch": 2.89, "learning_rate": 3.582933099897162e-05, "loss": 0.2578, "step": 4814500 }, { "epoch": 2.89, "learning_rate": 3.582723523334217e-05, "loss": 0.2623, "step": 4815000 }, { "epoch": 2.89, "learning_rate": 3.5825135267781605e-05, "loss": 0.2609, "step": 4815500 }, { "epoch": 2.89, "learning_rate": 3.5823035302221045e-05, "loss": 0.2642, "step": 4816000 }, { "epoch": 2.89, "learning_rate": 3.582093533666048e-05, "loss": 0.2624, "step": 4816500 }, { "epoch": 2.89, "learning_rate": 3.581883957103103e-05, "loss": 0.2591, "step": 4817000 }, { "epoch": 2.89, "learning_rate": 3.5816739605470466e-05, "loss": 0.2573, "step": 4817500 }, { "epoch": 2.89, "learning_rate": 3.5814639639909906e-05, "loss": 0.2577, "step": 4818000 }, { "epoch": 2.89, "learning_rate": 3.581253967434934e-05, "loss": 0.2615, "step": 4818500 }, { "epoch": 2.89, "learning_rate": 3.581044390871989e-05, "loss": 0.2576, "step": 4819000 }, { "epoch": 2.89, "learning_rate": 3.580834394315933e-05, "loss": 0.2611, "step": 4819500 }, { "epoch": 2.89, "learning_rate": 3.580624397759877e-05, "loss": 0.2569, "step": 4820000 }, { "epoch": 2.89, "learning_rate": 3.58041440120382e-05, "loss": 0.2619, "step": 4820500 }, { "epoch": 2.89, "learning_rate": 3.5802048246408754e-05, "loss": 0.2581, "step": 4821000 }, { "epoch": 2.89, "learning_rate": 3.5799948280848194e-05, "loss": 0.2605, "step": 4821500 }, { "epoch": 2.89, "learning_rate": 3.5797852515218754e-05, "loss": 0.2615, "step": 4822000 }, { "epoch": 2.89, "learning_rate": 3.579575254965819e-05, "loss": 0.2545, "step": 4822500 }, { "epoch": 2.89, "learning_rate": 3.5793652584097615e-05, "loss": 0.2633, "step": 4823000 }, { "epoch": 2.89, "learning_rate": 3.5791552618537055e-05, "loss": 0.2565, "step": 4823500 }, { "epoch": 2.89, "learning_rate": 3.578945265297649e-05, "loss": 0.2591, "step": 4824000 }, { "epoch": 2.89, "learning_rate": 3.578735688734705e-05, "loss": 0.2582, "step": 4824500 }, { "epoch": 2.89, "learning_rate": 3.578525692178648e-05, "loss": 0.2635, "step": 4825000 }, { "epoch": 2.89, "learning_rate": 3.5783156956225916e-05, "loss": 0.2626, "step": 4825500 }, { "epoch": 2.89, "learning_rate": 3.578105699066535e-05, "loss": 0.2593, "step": 4826000 }, { "epoch": 2.89, "learning_rate": 3.577895702510479e-05, "loss": 0.2609, "step": 4826500 }, { "epoch": 2.89, "learning_rate": 3.577685705954422e-05, "loss": 0.2649, "step": 4827000 }, { "epoch": 2.89, "learning_rate": 3.5774757093983656e-05, "loss": 0.2599, "step": 4827500 }, { "epoch": 2.89, "learning_rate": 3.5772657128423096e-05, "loss": 0.2581, "step": 4828000 }, { "epoch": 2.89, "learning_rate": 3.577056136279365e-05, "loss": 0.2643, "step": 4828500 }, { "epoch": 2.9, "learning_rate": 3.5768461397233083e-05, "loss": 0.2595, "step": 4829000 }, { "epoch": 2.9, "learning_rate": 3.576636143167252e-05, "loss": 0.2546, "step": 4829500 }, { "epoch": 2.9, "learning_rate": 3.576426146611196e-05, "loss": 0.2598, "step": 4830000 }, { "epoch": 2.9, "learning_rate": 3.576216570048251e-05, "loss": 0.2615, "step": 4830500 }, { "epoch": 2.9, "learning_rate": 3.5760065734921944e-05, "loss": 0.261, "step": 4831000 }, { "epoch": 2.9, "learning_rate": 3.575796576936138e-05, "loss": 0.2611, "step": 4831500 }, { "epoch": 2.9, "learning_rate": 3.575586580380082e-05, "loss": 0.2626, "step": 4832000 }, { "epoch": 2.9, "learning_rate": 3.575377003817137e-05, "loss": 0.2652, "step": 4832500 }, { "epoch": 2.9, "learning_rate": 3.5751670072610805e-05, "loss": 0.2635, "step": 4833000 }, { "epoch": 2.9, "learning_rate": 3.5749570107050245e-05, "loss": 0.2603, "step": 4833500 }, { "epoch": 2.9, "learning_rate": 3.574747014148968e-05, "loss": 0.2621, "step": 4834000 }, { "epoch": 2.9, "learning_rate": 3.574537437586024e-05, "loss": 0.2625, "step": 4834500 }, { "epoch": 2.9, "learning_rate": 3.5743274410299666e-05, "loss": 0.259, "step": 4835000 }, { "epoch": 2.9, "learning_rate": 3.5741174444739106e-05, "loss": 0.2582, "step": 4835500 }, { "epoch": 2.9, "learning_rate": 3.573907447917854e-05, "loss": 0.2551, "step": 4836000 }, { "epoch": 2.9, "learning_rate": 3.57369787135491e-05, "loss": 0.2547, "step": 4836500 }, { "epoch": 2.9, "learning_rate": 3.5734878747988526e-05, "loss": 0.2634, "step": 4837000 }, { "epoch": 2.9, "learning_rate": 3.5732778782427967e-05, "loss": 0.2568, "step": 4837500 }, { "epoch": 2.9, "learning_rate": 3.57306788168674e-05, "loss": 0.2599, "step": 4838000 }, { "epoch": 2.9, "learning_rate": 3.572858305123796e-05, "loss": 0.2592, "step": 4838500 }, { "epoch": 2.9, "learning_rate": 3.5726483085677394e-05, "loss": 0.2594, "step": 4839000 }, { "epoch": 2.9, "learning_rate": 3.572438312011683e-05, "loss": 0.2594, "step": 4839500 }, { "epoch": 2.9, "learning_rate": 3.572228315455626e-05, "loss": 0.2585, "step": 4840000 }, { "epoch": 2.9, "learning_rate": 3.572018738892682e-05, "loss": 0.2655, "step": 4840500 }, { "epoch": 2.9, "learning_rate": 3.571808742336626e-05, "loss": 0.2595, "step": 4841000 }, { "epoch": 2.9, "learning_rate": 3.5715987457805695e-05, "loss": 0.2636, "step": 4841500 }, { "epoch": 2.9, "learning_rate": 3.571388749224512e-05, "loss": 0.2668, "step": 4842000 }, { "epoch": 2.9, "learning_rate": 3.571179172661568e-05, "loss": 0.2603, "step": 4842500 }, { "epoch": 2.9, "learning_rate": 3.570969176105512e-05, "loss": 0.263, "step": 4843000 }, { "epoch": 2.9, "learning_rate": 3.5707591795494556e-05, "loss": 0.2553, "step": 4843500 }, { "epoch": 2.9, "learning_rate": 3.570549182993399e-05, "loss": 0.2613, "step": 4844000 }, { "epoch": 2.9, "learning_rate": 3.570339186437342e-05, "loss": 0.2546, "step": 4844500 }, { "epoch": 2.9, "learning_rate": 3.570129609874398e-05, "loss": 0.2657, "step": 4845000 }, { "epoch": 2.91, "learning_rate": 3.5699196133183416e-05, "loss": 0.2591, "step": 4845500 }, { "epoch": 2.91, "learning_rate": 3.569709616762285e-05, "loss": 0.2621, "step": 4846000 }, { "epoch": 2.91, "learning_rate": 3.569499620206228e-05, "loss": 0.2583, "step": 4846500 }, { "epoch": 2.91, "learning_rate": 3.5692900436432844e-05, "loss": 0.2577, "step": 4847000 }, { "epoch": 2.91, "learning_rate": 3.569080047087228e-05, "loss": 0.2532, "step": 4847500 }, { "epoch": 2.91, "learning_rate": 3.568870470524283e-05, "loss": 0.2579, "step": 4848000 }, { "epoch": 2.91, "learning_rate": 3.568660473968227e-05, "loss": 0.2607, "step": 4848500 }, { "epoch": 2.91, "learning_rate": 3.5684504774121704e-05, "loss": 0.2592, "step": 4849000 }, { "epoch": 2.91, "learning_rate": 3.568240480856114e-05, "loss": 0.259, "step": 4849500 }, { "epoch": 2.91, "learning_rate": 3.568030484300058e-05, "loss": 0.2612, "step": 4850000 }, { "epoch": 2.91, "learning_rate": 3.567820487744001e-05, "loss": 0.2674, "step": 4850500 }, { "epoch": 2.91, "learning_rate": 3.5676104911879445e-05, "loss": 0.2672, "step": 4851000 }, { "epoch": 2.91, "learning_rate": 3.567400494631888e-05, "loss": 0.2546, "step": 4851500 }, { "epoch": 2.91, "learning_rate": 3.567190918068944e-05, "loss": 0.2589, "step": 4852000 }, { "epoch": 2.91, "learning_rate": 3.566980921512887e-05, "loss": 0.2643, "step": 4852500 }, { "epoch": 2.91, "learning_rate": 3.5667709249568306e-05, "loss": 0.2623, "step": 4853000 }, { "epoch": 2.91, "learning_rate": 3.5665609284007746e-05, "loss": 0.2548, "step": 4853500 }, { "epoch": 2.91, "learning_rate": 3.56635135183783e-05, "loss": 0.2605, "step": 4854000 }, { "epoch": 2.91, "learning_rate": 3.566141355281773e-05, "loss": 0.2691, "step": 4854500 }, { "epoch": 2.91, "learning_rate": 3.5659317787188286e-05, "loss": 0.2531, "step": 4855000 }, { "epoch": 2.91, "learning_rate": 3.565721782162773e-05, "loss": 0.2597, "step": 4855500 }, { "epoch": 2.91, "learning_rate": 3.565511785606716e-05, "loss": 0.2616, "step": 4856000 }, { "epoch": 2.91, "learning_rate": 3.5653017890506594e-05, "loss": 0.2566, "step": 4856500 }, { "epoch": 2.91, "learning_rate": 3.5650917924946034e-05, "loss": 0.2664, "step": 4857000 }, { "epoch": 2.91, "learning_rate": 3.564881795938547e-05, "loss": 0.26, "step": 4857500 }, { "epoch": 2.91, "learning_rate": 3.56467179938249e-05, "loss": 0.2554, "step": 4858000 }, { "epoch": 2.91, "learning_rate": 3.5644618028264334e-05, "loss": 0.2678, "step": 4858500 }, { "epoch": 2.91, "learning_rate": 3.5642522262634895e-05, "loss": 0.261, "step": 4859000 }, { "epoch": 2.91, "learning_rate": 3.564042229707433e-05, "loss": 0.2645, "step": 4859500 }, { "epoch": 2.91, "learning_rate": 3.563832233151376e-05, "loss": 0.2673, "step": 4860000 }, { "epoch": 2.91, "learning_rate": 3.56362223659532e-05, "loss": 0.2628, "step": 4860500 }, { "epoch": 2.91, "learning_rate": 3.5634126600323755e-05, "loss": 0.2585, "step": 4861000 }, { "epoch": 2.91, "learning_rate": 3.563202663476319e-05, "loss": 0.262, "step": 4861500 }, { "epoch": 2.91, "learning_rate": 3.562992666920263e-05, "loss": 0.2607, "step": 4862000 }, { "epoch": 2.92, "learning_rate": 3.562782670364206e-05, "loss": 0.2592, "step": 4862500 }, { "epoch": 2.92, "learning_rate": 3.5625730938012616e-05, "loss": 0.2565, "step": 4863000 }, { "epoch": 2.92, "learning_rate": 3.562363517238317e-05, "loss": 0.2605, "step": 4863500 }, { "epoch": 2.92, "learning_rate": 3.56215352068226e-05, "loss": 0.2615, "step": 4864000 }, { "epoch": 2.92, "learning_rate": 3.561943524126204e-05, "loss": 0.2595, "step": 4864500 }, { "epoch": 2.92, "learning_rate": 3.561733527570148e-05, "loss": 0.2601, "step": 4865000 }, { "epoch": 2.92, "learning_rate": 3.561523531014091e-05, "loss": 0.2618, "step": 4865500 }, { "epoch": 2.92, "learning_rate": 3.561313534458035e-05, "loss": 0.2621, "step": 4866000 }, { "epoch": 2.92, "learning_rate": 3.5611035379019784e-05, "loss": 0.2646, "step": 4866500 }, { "epoch": 2.92, "learning_rate": 3.5608935413459224e-05, "loss": 0.262, "step": 4867000 }, { "epoch": 2.92, "learning_rate": 3.560683964782978e-05, "loss": 0.2549, "step": 4867500 }, { "epoch": 2.92, "learning_rate": 3.560474388220033e-05, "loss": 0.2616, "step": 4868000 }, { "epoch": 2.92, "learning_rate": 3.560264811657089e-05, "loss": 0.2618, "step": 4868500 }, { "epoch": 2.92, "learning_rate": 3.5600548151010325e-05, "loss": 0.2651, "step": 4869000 }, { "epoch": 2.92, "learning_rate": 3.559844818544976e-05, "loss": 0.2617, "step": 4869500 }, { "epoch": 2.92, "learning_rate": 3.559634821988919e-05, "loss": 0.262, "step": 4870000 }, { "epoch": 2.92, "learning_rate": 3.5594248254328625e-05, "loss": 0.2612, "step": 4870500 }, { "epoch": 2.92, "learning_rate": 3.559214828876806e-05, "loss": 0.2536, "step": 4871000 }, { "epoch": 2.92, "learning_rate": 3.55900483232075e-05, "loss": 0.2643, "step": 4871500 }, { "epoch": 2.92, "learning_rate": 3.558794835764693e-05, "loss": 0.2602, "step": 4872000 }, { "epoch": 2.92, "learning_rate": 3.5585848392086366e-05, "loss": 0.2676, "step": 4872500 }, { "epoch": 2.92, "learning_rate": 3.5583748426525806e-05, "loss": 0.2559, "step": 4873000 }, { "epoch": 2.92, "learning_rate": 3.558164846096524e-05, "loss": 0.2603, "step": 4873500 }, { "epoch": 2.92, "learning_rate": 3.557954849540468e-05, "loss": 0.261, "step": 4874000 }, { "epoch": 2.92, "learning_rate": 3.5577452729775234e-05, "loss": 0.2571, "step": 4874500 }, { "epoch": 2.92, "learning_rate": 3.557535276421467e-05, "loss": 0.2586, "step": 4875000 }, { "epoch": 2.92, "learning_rate": 3.55732527986541e-05, "loss": 0.2678, "step": 4875500 }, { "epoch": 2.92, "learning_rate": 3.557115283309354e-05, "loss": 0.2556, "step": 4876000 }, { "epoch": 2.92, "learning_rate": 3.5569057067464094e-05, "loss": 0.2601, "step": 4876500 }, { "epoch": 2.92, "learning_rate": 3.556695710190353e-05, "loss": 0.2555, "step": 4877000 }, { "epoch": 2.92, "learning_rate": 3.556486133627408e-05, "loss": 0.2594, "step": 4877500 }, { "epoch": 2.92, "learning_rate": 3.5562761370713515e-05, "loss": 0.2596, "step": 4878000 }, { "epoch": 2.92, "learning_rate": 3.5560661405152955e-05, "loss": 0.2608, "step": 4878500 }, { "epoch": 2.93, "learning_rate": 3.555856143959239e-05, "loss": 0.2571, "step": 4879000 }, { "epoch": 2.93, "learning_rate": 3.555646147403182e-05, "loss": 0.2649, "step": 4879500 }, { "epoch": 2.93, "learning_rate": 3.555436150847126e-05, "loss": 0.2544, "step": 4880000 }, { "epoch": 2.93, "learning_rate": 3.5552261542910696e-05, "loss": 0.2615, "step": 4880500 }, { "epoch": 2.93, "learning_rate": 3.5550161577350136e-05, "loss": 0.2588, "step": 4881000 }, { "epoch": 2.93, "learning_rate": 3.554806581172069e-05, "loss": 0.2595, "step": 4881500 }, { "epoch": 2.93, "learning_rate": 3.554596584616012e-05, "loss": 0.2644, "step": 4882000 }, { "epoch": 2.93, "learning_rate": 3.5543865880599556e-05, "loss": 0.258, "step": 4882500 }, { "epoch": 2.93, "learning_rate": 3.5541765915039e-05, "loss": 0.2573, "step": 4883000 }, { "epoch": 2.93, "learning_rate": 3.553967014940955e-05, "loss": 0.2565, "step": 4883500 }, { "epoch": 2.93, "learning_rate": 3.5537570183848984e-05, "loss": 0.2601, "step": 4884000 }, { "epoch": 2.93, "learning_rate": 3.553547021828842e-05, "loss": 0.2695, "step": 4884500 }, { "epoch": 2.93, "learning_rate": 3.553337025272786e-05, "loss": 0.2594, "step": 4885000 }, { "epoch": 2.93, "learning_rate": 3.553127448709841e-05, "loss": 0.2592, "step": 4885500 }, { "epoch": 2.93, "learning_rate": 3.5529174521537844e-05, "loss": 0.2592, "step": 4886000 }, { "epoch": 2.93, "learning_rate": 3.5527074555977285e-05, "loss": 0.2614, "step": 4886500 }, { "epoch": 2.93, "learning_rate": 3.552497459041672e-05, "loss": 0.2561, "step": 4887000 }, { "epoch": 2.93, "learning_rate": 3.552287882478727e-05, "loss": 0.2591, "step": 4887500 }, { "epoch": 2.93, "learning_rate": 3.5520778859226705e-05, "loss": 0.2536, "step": 4888000 }, { "epoch": 2.93, "learning_rate": 3.5518678893666145e-05, "loss": 0.2549, "step": 4888500 }, { "epoch": 2.93, "learning_rate": 3.551657892810558e-05, "loss": 0.2525, "step": 4889000 }, { "epoch": 2.93, "learning_rate": 3.551448316247613e-05, "loss": 0.2578, "step": 4889500 }, { "epoch": 2.93, "learning_rate": 3.5512383196915566e-05, "loss": 0.2678, "step": 4890000 }, { "epoch": 2.93, "learning_rate": 3.5510283231355006e-05, "loss": 0.2598, "step": 4890500 }, { "epoch": 2.93, "learning_rate": 3.550818326579444e-05, "loss": 0.265, "step": 4891000 }, { "epoch": 2.93, "learning_rate": 3.550608750016499e-05, "loss": 0.259, "step": 4891500 }, { "epoch": 2.93, "learning_rate": 3.5503987534604427e-05, "loss": 0.2545, "step": 4892000 }, { "epoch": 2.93, "learning_rate": 3.550188756904387e-05, "loss": 0.2612, "step": 4892500 }, { "epoch": 2.93, "learning_rate": 3.54997876034833e-05, "loss": 0.2649, "step": 4893000 }, { "epoch": 2.93, "learning_rate": 3.5497691837853854e-05, "loss": 0.262, "step": 4893500 }, { "epoch": 2.93, "learning_rate": 3.5495591872293294e-05, "loss": 0.2574, "step": 4894000 }, { "epoch": 2.93, "learning_rate": 3.549349190673273e-05, "loss": 0.2609, "step": 4894500 }, { "epoch": 2.93, "learning_rate": 3.549139614110329e-05, "loss": 0.2586, "step": 4895000 }, { "epoch": 2.94, "learning_rate": 3.548929617554272e-05, "loss": 0.2627, "step": 4895500 }, { "epoch": 2.94, "learning_rate": 3.5487196209982155e-05, "loss": 0.2591, "step": 4896000 }, { "epoch": 2.94, "learning_rate": 3.548509624442159e-05, "loss": 0.2685, "step": 4896500 }, { "epoch": 2.94, "learning_rate": 3.548300047879215e-05, "loss": 0.2632, "step": 4897000 }, { "epoch": 2.94, "learning_rate": 3.548090051323158e-05, "loss": 0.2593, "step": 4897500 }, { "epoch": 2.94, "learning_rate": 3.547880054767102e-05, "loss": 0.2601, "step": 4898000 }, { "epoch": 2.94, "learning_rate": 3.547670058211045e-05, "loss": 0.26, "step": 4898500 }, { "epoch": 2.94, "learning_rate": 3.547460061654989e-05, "loss": 0.2597, "step": 4899000 }, { "epoch": 2.94, "learning_rate": 3.547250065098932e-05, "loss": 0.2581, "step": 4899500 }, { "epoch": 2.94, "learning_rate": 3.5470400685428756e-05, "loss": 0.2626, "step": 4900000 }, { "epoch": 2.94, "eval_loss": 0.23919837176799774, "eval_runtime": 1452.8451, "eval_samples_per_second": 362.544, "eval_steps_per_second": 60.424, "step": 4900000 }, { "epoch": 2.94, "learning_rate": 3.5468300719868196e-05, "loss": 0.253, "step": 4900500 }, { "epoch": 2.94, "learning_rate": 3.546620075430763e-05, "loss": 0.256, "step": 4901000 }, { "epoch": 2.94, "learning_rate": 3.546410078874706e-05, "loss": 0.2568, "step": 4901500 }, { "epoch": 2.94, "learning_rate": 3.5462000823186504e-05, "loss": 0.2584, "step": 4902000 }, { "epoch": 2.94, "learning_rate": 3.545990085762594e-05, "loss": 0.2615, "step": 4902500 }, { "epoch": 2.94, "learning_rate": 3.545780509199649e-05, "loss": 0.2542, "step": 4903000 }, { "epoch": 2.94, "learning_rate": 3.5455705126435924e-05, "loss": 0.256, "step": 4903500 }, { "epoch": 2.94, "learning_rate": 3.5453605160875364e-05, "loss": 0.2598, "step": 4904000 }, { "epoch": 2.94, "learning_rate": 3.54515051953148e-05, "loss": 0.2558, "step": 4904500 }, { "epoch": 2.94, "learning_rate": 3.544940942968535e-05, "loss": 0.2575, "step": 4905000 }, { "epoch": 2.94, "learning_rate": 3.5447309464124785e-05, "loss": 0.2589, "step": 4905500 }, { "epoch": 2.94, "learning_rate": 3.5445209498564225e-05, "loss": 0.2557, "step": 4906000 }, { "epoch": 2.94, "learning_rate": 3.544310953300366e-05, "loss": 0.2588, "step": 4906500 }, { "epoch": 2.94, "learning_rate": 3.544101376737421e-05, "loss": 0.2655, "step": 4907000 }, { "epoch": 2.94, "learning_rate": 3.543891380181365e-05, "loss": 0.256, "step": 4907500 }, { "epoch": 2.94, "learning_rate": 3.5436813836253086e-05, "loss": 0.2593, "step": 4908000 }, { "epoch": 2.94, "learning_rate": 3.543471387069252e-05, "loss": 0.2615, "step": 4908500 }, { "epoch": 2.94, "learning_rate": 3.543261390513196e-05, "loss": 0.2583, "step": 4909000 }, { "epoch": 2.94, "learning_rate": 3.543051393957139e-05, "loss": 0.2609, "step": 4909500 }, { "epoch": 2.94, "learning_rate": 3.5428413974010826e-05, "loss": 0.2634, "step": 4910000 }, { "epoch": 2.94, "learning_rate": 3.5426314008450267e-05, "loss": 0.2607, "step": 4910500 }, { "epoch": 2.94, "learning_rate": 3.542421824282082e-05, "loss": 0.2653, "step": 4911000 }, { "epoch": 2.94, "learning_rate": 3.5422118277260254e-05, "loss": 0.257, "step": 4911500 }, { "epoch": 2.94, "learning_rate": 3.542001831169969e-05, "loss": 0.2568, "step": 4912000 }, { "epoch": 2.95, "learning_rate": 3.541791834613913e-05, "loss": 0.2587, "step": 4912500 }, { "epoch": 2.95, "learning_rate": 3.541582258050968e-05, "loss": 0.2529, "step": 4913000 }, { "epoch": 2.95, "learning_rate": 3.5413722614949114e-05, "loss": 0.2604, "step": 4913500 }, { "epoch": 2.95, "learning_rate": 3.541162264938855e-05, "loss": 0.2568, "step": 4914000 }, { "epoch": 2.95, "learning_rate": 3.540952268382799e-05, "loss": 0.2621, "step": 4914500 }, { "epoch": 2.95, "learning_rate": 3.540742691819854e-05, "loss": 0.2557, "step": 4915000 }, { "epoch": 2.95, "learning_rate": 3.5405326952637975e-05, "loss": 0.2599, "step": 4915500 }, { "epoch": 2.95, "learning_rate": 3.5403226987077415e-05, "loss": 0.2594, "step": 4916000 }, { "epoch": 2.95, "learning_rate": 3.540112702151685e-05, "loss": 0.2544, "step": 4916500 }, { "epoch": 2.95, "learning_rate": 3.53990312558874e-05, "loss": 0.2558, "step": 4917000 }, { "epoch": 2.95, "learning_rate": 3.5396931290326836e-05, "loss": 0.2523, "step": 4917500 }, { "epoch": 2.95, "learning_rate": 3.5394831324766276e-05, "loss": 0.2564, "step": 4918000 }, { "epoch": 2.95, "learning_rate": 3.539273135920571e-05, "loss": 0.2568, "step": 4918500 }, { "epoch": 2.95, "learning_rate": 3.539063559357626e-05, "loss": 0.2563, "step": 4919000 }, { "epoch": 2.95, "learning_rate": 3.5388535628015697e-05, "loss": 0.2599, "step": 4919500 }, { "epoch": 2.95, "learning_rate": 3.538643566245514e-05, "loss": 0.2617, "step": 4920000 }, { "epoch": 2.95, "learning_rate": 3.538433569689457e-05, "loss": 0.2573, "step": 4920500 }, { "epoch": 2.95, "learning_rate": 3.5382235731334004e-05, "loss": 0.2622, "step": 4921000 }, { "epoch": 2.95, "learning_rate": 3.5380139965704564e-05, "loss": 0.2574, "step": 4921500 }, { "epoch": 2.95, "learning_rate": 3.5378040000144e-05, "loss": 0.264, "step": 4922000 }, { "epoch": 2.95, "learning_rate": 3.537594003458343e-05, "loss": 0.2594, "step": 4922500 }, { "epoch": 2.95, "learning_rate": 3.537384006902287e-05, "loss": 0.2595, "step": 4923000 }, { "epoch": 2.95, "learning_rate": 3.5371744303393425e-05, "loss": 0.2631, "step": 4923500 }, { "epoch": 2.95, "learning_rate": 3.536964433783286e-05, "loss": 0.257, "step": 4924000 }, { "epoch": 2.95, "learning_rate": 3.536754437227229e-05, "loss": 0.2615, "step": 4924500 }, { "epoch": 2.95, "learning_rate": 3.536544440671173e-05, "loss": 0.2522, "step": 4925000 }, { "epoch": 2.95, "learning_rate": 3.5363348641082285e-05, "loss": 0.2632, "step": 4925500 }, { "epoch": 2.95, "learning_rate": 3.536124867552172e-05, "loss": 0.2608, "step": 4926000 }, { "epoch": 2.95, "learning_rate": 3.535914870996115e-05, "loss": 0.2549, "step": 4926500 }, { "epoch": 2.95, "learning_rate": 3.535704874440059e-05, "loss": 0.2586, "step": 4927000 }, { "epoch": 2.95, "learning_rate": 3.5354952978771146e-05, "loss": 0.2562, "step": 4927500 }, { "epoch": 2.95, "learning_rate": 3.5352857213141707e-05, "loss": 0.2585, "step": 4928000 }, { "epoch": 2.95, "learning_rate": 3.535075724758114e-05, "loss": 0.2549, "step": 4928500 }, { "epoch": 2.96, "learning_rate": 3.534865728202058e-05, "loss": 0.2544, "step": 4929000 }, { "epoch": 2.96, "learning_rate": 3.534655731646001e-05, "loss": 0.2621, "step": 4929500 }, { "epoch": 2.96, "learning_rate": 3.534445735089944e-05, "loss": 0.265, "step": 4930000 }, { "epoch": 2.96, "learning_rate": 3.534235738533888e-05, "loss": 0.2635, "step": 4930500 }, { "epoch": 2.96, "learning_rate": 3.5340257419778314e-05, "loss": 0.2576, "step": 4931000 }, { "epoch": 2.96, "learning_rate": 3.533815745421775e-05, "loss": 0.2592, "step": 4931500 }, { "epoch": 2.96, "learning_rate": 3.533605748865719e-05, "loss": 0.2605, "step": 4932000 }, { "epoch": 2.96, "learning_rate": 3.533395752309662e-05, "loss": 0.256, "step": 4932500 }, { "epoch": 2.96, "learning_rate": 3.5331857557536055e-05, "loss": 0.2584, "step": 4933000 }, { "epoch": 2.96, "learning_rate": 3.5329757591975495e-05, "loss": 0.2622, "step": 4933500 }, { "epoch": 2.96, "learning_rate": 3.532766182634605e-05, "loss": 0.2607, "step": 4934000 }, { "epoch": 2.96, "learning_rate": 3.532556186078548e-05, "loss": 0.2564, "step": 4934500 }, { "epoch": 2.96, "learning_rate": 3.5323461895224915e-05, "loss": 0.2575, "step": 4935000 }, { "epoch": 2.96, "learning_rate": 3.5321361929664356e-05, "loss": 0.2607, "step": 4935500 }, { "epoch": 2.96, "learning_rate": 3.531926616403491e-05, "loss": 0.2606, "step": 4936000 }, { "epoch": 2.96, "learning_rate": 3.531716619847434e-05, "loss": 0.2629, "step": 4936500 }, { "epoch": 2.96, "learning_rate": 3.531506623291378e-05, "loss": 0.2541, "step": 4937000 }, { "epoch": 2.96, "learning_rate": 3.5312966267353216e-05, "loss": 0.2582, "step": 4937500 }, { "epoch": 2.96, "learning_rate": 3.531087050172377e-05, "loss": 0.2555, "step": 4938000 }, { "epoch": 2.96, "learning_rate": 3.5308770536163203e-05, "loss": 0.2572, "step": 4938500 }, { "epoch": 2.96, "learning_rate": 3.5306670570602644e-05, "loss": 0.2616, "step": 4939000 }, { "epoch": 2.96, "learning_rate": 3.530457060504208e-05, "loss": 0.2592, "step": 4939500 }, { "epoch": 2.96, "learning_rate": 3.530247063948151e-05, "loss": 0.2557, "step": 4940000 }, { "epoch": 2.96, "learning_rate": 3.5300374873852064e-05, "loss": 0.2608, "step": 4940500 }, { "epoch": 2.96, "learning_rate": 3.5298274908291504e-05, "loss": 0.2599, "step": 4941000 }, { "epoch": 2.96, "learning_rate": 3.529617494273094e-05, "loss": 0.2614, "step": 4941500 }, { "epoch": 2.96, "learning_rate": 3.529407497717037e-05, "loss": 0.2556, "step": 4942000 }, { "epoch": 2.96, "learning_rate": 3.529197921154093e-05, "loss": 0.2605, "step": 4942500 }, { "epoch": 2.96, "learning_rate": 3.5289879245980365e-05, "loss": 0.2608, "step": 4943000 }, { "epoch": 2.96, "learning_rate": 3.52877792804198e-05, "loss": 0.2564, "step": 4943500 }, { "epoch": 2.96, "learning_rate": 3.528567931485924e-05, "loss": 0.2615, "step": 4944000 }, { "epoch": 2.96, "learning_rate": 3.528358354922979e-05, "loss": 0.2578, "step": 4944500 }, { "epoch": 2.96, "learning_rate": 3.528148778360035e-05, "loss": 0.2592, "step": 4945000 }, { "epoch": 2.97, "learning_rate": 3.5279387818039786e-05, "loss": 0.2588, "step": 4945500 }, { "epoch": 2.97, "learning_rate": 3.527728785247921e-05, "loss": 0.2615, "step": 4946000 }, { "epoch": 2.97, "learning_rate": 3.527518788691865e-05, "loss": 0.2516, "step": 4946500 }, { "epoch": 2.97, "learning_rate": 3.5273087921358087e-05, "loss": 0.2621, "step": 4947000 }, { "epoch": 2.97, "learning_rate": 3.527098795579752e-05, "loss": 0.2582, "step": 4947500 }, { "epoch": 2.97, "learning_rate": 3.526888799023696e-05, "loss": 0.2637, "step": 4948000 }, { "epoch": 2.97, "learning_rate": 3.5266788024676394e-05, "loss": 0.2595, "step": 4948500 }, { "epoch": 2.97, "learning_rate": 3.5264696458978074e-05, "loss": 0.2598, "step": 4949000 }, { "epoch": 2.97, "learning_rate": 3.526259649341751e-05, "loss": 0.2585, "step": 4949500 }, { "epoch": 2.97, "learning_rate": 3.526049652785695e-05, "loss": 0.2607, "step": 4950000 }, { "epoch": 2.97, "learning_rate": 3.525839656229638e-05, "loss": 0.2606, "step": 4950500 }, { "epoch": 2.97, "learning_rate": 3.525629659673581e-05, "loss": 0.2614, "step": 4951000 }, { "epoch": 2.97, "learning_rate": 3.525419663117525e-05, "loss": 0.2589, "step": 4951500 }, { "epoch": 2.97, "learning_rate": 3.525209666561468e-05, "loss": 0.2631, "step": 4952000 }, { "epoch": 2.97, "learning_rate": 3.5249996700054115e-05, "loss": 0.2568, "step": 4952500 }, { "epoch": 2.97, "learning_rate": 3.5247900934424676e-05, "loss": 0.2614, "step": 4953000 }, { "epoch": 2.97, "learning_rate": 3.524580096886411e-05, "loss": 0.2642, "step": 4953500 }, { "epoch": 2.97, "learning_rate": 3.524370100330354e-05, "loss": 0.2579, "step": 4954000 }, { "epoch": 2.97, "learning_rate": 3.5241601037742976e-05, "loss": 0.2551, "step": 4954500 }, { "epoch": 2.97, "learning_rate": 3.5239505272113536e-05, "loss": 0.2615, "step": 4955000 }, { "epoch": 2.97, "learning_rate": 3.523740530655297e-05, "loss": 0.2652, "step": 4955500 }, { "epoch": 2.97, "learning_rate": 3.523531374085465e-05, "loss": 0.2628, "step": 4956000 }, { "epoch": 2.97, "learning_rate": 3.5233213775294084e-05, "loss": 0.2595, "step": 4956500 }, { "epoch": 2.97, "learning_rate": 3.523111380973352e-05, "loss": 0.2615, "step": 4957000 }, { "epoch": 2.97, "learning_rate": 3.522901384417296e-05, "loss": 0.2568, "step": 4957500 }, { "epoch": 2.97, "learning_rate": 3.522691387861239e-05, "loss": 0.2637, "step": 4958000 }, { "epoch": 2.97, "learning_rate": 3.5224813913051824e-05, "loss": 0.2596, "step": 4958500 }, { "epoch": 2.97, "learning_rate": 3.5222713947491264e-05, "loss": 0.2588, "step": 4959000 }, { "epoch": 2.97, "learning_rate": 3.52206139819307e-05, "loss": 0.2628, "step": 4959500 }, { "epoch": 2.97, "learning_rate": 3.521851821630125e-05, "loss": 0.2564, "step": 4960000 }, { "epoch": 2.97, "learning_rate": 3.5216418250740685e-05, "loss": 0.2595, "step": 4960500 }, { "epoch": 2.97, "learning_rate": 3.5214322485111245e-05, "loss": 0.2639, "step": 4961000 }, { "epoch": 2.97, "learning_rate": 3.521222251955068e-05, "loss": 0.259, "step": 4961500 }, { "epoch": 2.97, "learning_rate": 3.521012255399011e-05, "loss": 0.256, "step": 4962000 }, { "epoch": 2.98, "learning_rate": 3.520802258842955e-05, "loss": 0.2675, "step": 4962500 }, { "epoch": 2.98, "learning_rate": 3.5205922622868986e-05, "loss": 0.2682, "step": 4963000 }, { "epoch": 2.98, "learning_rate": 3.520382265730842e-05, "loss": 0.2602, "step": 4963500 }, { "epoch": 2.98, "learning_rate": 3.520172269174786e-05, "loss": 0.2587, "step": 4964000 }, { "epoch": 2.98, "learning_rate": 3.519962272618729e-05, "loss": 0.2563, "step": 4964500 }, { "epoch": 2.98, "learning_rate": 3.519752276062672e-05, "loss": 0.2564, "step": 4965000 }, { "epoch": 2.98, "learning_rate": 3.519542699499728e-05, "loss": 0.2576, "step": 4965500 }, { "epoch": 2.98, "learning_rate": 3.519332702943672e-05, "loss": 0.2557, "step": 4966000 }, { "epoch": 2.98, "learning_rate": 3.5191227063876154e-05, "loss": 0.268, "step": 4966500 }, { "epoch": 2.98, "learning_rate": 3.518912709831559e-05, "loss": 0.2594, "step": 4967000 }, { "epoch": 2.98, "learning_rate": 3.518702713275502e-05, "loss": 0.2518, "step": 4967500 }, { "epoch": 2.98, "learning_rate": 3.5184927167194454e-05, "loss": 0.2577, "step": 4968000 }, { "epoch": 2.98, "learning_rate": 3.518282720163389e-05, "loss": 0.2597, "step": 4968500 }, { "epoch": 2.98, "learning_rate": 3.518072723607333e-05, "loss": 0.2591, "step": 4969000 }, { "epoch": 2.98, "learning_rate": 3.517862727051276e-05, "loss": 0.2588, "step": 4969500 }, { "epoch": 2.98, "learning_rate": 3.5176531504883315e-05, "loss": 0.265, "step": 4970000 }, { "epoch": 2.98, "learning_rate": 3.5174435739253875e-05, "loss": 0.2613, "step": 4970500 }, { "epoch": 2.98, "learning_rate": 3.5172335773693316e-05, "loss": 0.258, "step": 4971000 }, { "epoch": 2.98, "learning_rate": 3.517023580813275e-05, "loss": 0.2555, "step": 4971500 }, { "epoch": 2.98, "learning_rate": 3.516813584257218e-05, "loss": 0.2558, "step": 4972000 }, { "epoch": 2.98, "learning_rate": 3.5166035877011616e-05, "loss": 0.2597, "step": 4972500 }, { "epoch": 2.98, "learning_rate": 3.516393591145105e-05, "loss": 0.2586, "step": 4973000 }, { "epoch": 2.98, "learning_rate": 3.516183594589048e-05, "loss": 0.2611, "step": 4973500 }, { "epoch": 2.98, "learning_rate": 3.515973598032992e-05, "loss": 0.2593, "step": 4974000 }, { "epoch": 2.98, "learning_rate": 3.515764021470048e-05, "loss": 0.2568, "step": 4974500 }, { "epoch": 2.98, "learning_rate": 3.515554024913991e-05, "loss": 0.2666, "step": 4975000 }, { "epoch": 2.98, "learning_rate": 3.5153440283579344e-05, "loss": 0.2546, "step": 4975500 }, { "epoch": 2.98, "learning_rate": 3.5151340318018784e-05, "loss": 0.2571, "step": 4976000 }, { "epoch": 2.98, "learning_rate": 3.5149244552389344e-05, "loss": 0.2622, "step": 4976500 }, { "epoch": 2.98, "learning_rate": 3.514714458682877e-05, "loss": 0.259, "step": 4977000 }, { "epoch": 2.98, "learning_rate": 3.514504462126821e-05, "loss": 0.2686, "step": 4977500 }, { "epoch": 2.98, "learning_rate": 3.5142944655707645e-05, "loss": 0.2625, "step": 4978000 }, { "epoch": 2.98, "learning_rate": 3.5140848890078205e-05, "loss": 0.2591, "step": 4978500 }, { "epoch": 2.99, "learning_rate": 3.513874892451764e-05, "loss": 0.2592, "step": 4979000 }, { "epoch": 2.99, "learning_rate": 3.513664895895707e-05, "loss": 0.2586, "step": 4979500 }, { "epoch": 2.99, "learning_rate": 3.5134548993396505e-05, "loss": 0.26, "step": 4980000 }, { "epoch": 2.99, "learning_rate": 3.5132453227767066e-05, "loss": 0.2586, "step": 4980500 }, { "epoch": 2.99, "learning_rate": 3.51303532622065e-05, "loss": 0.2625, "step": 4981000 }, { "epoch": 2.99, "learning_rate": 3.512825329664594e-05, "loss": 0.2577, "step": 4981500 }, { "epoch": 2.99, "learning_rate": 3.5126153331085366e-05, "loss": 0.2511, "step": 4982000 }, { "epoch": 2.99, "learning_rate": 3.5124057565455926e-05, "loss": 0.2573, "step": 4982500 }, { "epoch": 2.99, "learning_rate": 3.5121957599895367e-05, "loss": 0.2605, "step": 4983000 }, { "epoch": 2.99, "learning_rate": 3.51198576343348e-05, "loss": 0.2561, "step": 4983500 }, { "epoch": 2.99, "learning_rate": 3.511775766877423e-05, "loss": 0.2542, "step": 4984000 }, { "epoch": 2.99, "learning_rate": 3.511565770321367e-05, "loss": 0.2602, "step": 4984500 }, { "epoch": 2.99, "learning_rate": 3.51135577376531e-05, "loss": 0.2593, "step": 4985000 }, { "epoch": 2.99, "learning_rate": 3.5111457772092534e-05, "loss": 0.2567, "step": 4985500 }, { "epoch": 2.99, "learning_rate": 3.5109357806531974e-05, "loss": 0.2601, "step": 4986000 }, { "epoch": 2.99, "learning_rate": 3.510726204090253e-05, "loss": 0.2645, "step": 4986500 }, { "epoch": 2.99, "learning_rate": 3.510516207534196e-05, "loss": 0.263, "step": 4987000 }, { "epoch": 2.99, "learning_rate": 3.5103062109781395e-05, "loss": 0.2636, "step": 4987500 }, { "epoch": 2.99, "learning_rate": 3.5100962144220835e-05, "loss": 0.2648, "step": 4988000 }, { "epoch": 2.99, "learning_rate": 3.5098866378591395e-05, "loss": 0.2594, "step": 4988500 }, { "epoch": 2.99, "learning_rate": 3.509676641303082e-05, "loss": 0.2582, "step": 4989000 }, { "epoch": 2.99, "learning_rate": 3.509467064740138e-05, "loss": 0.2614, "step": 4989500 }, { "epoch": 2.99, "learning_rate": 3.509257068184082e-05, "loss": 0.2638, "step": 4990000 }, { "epoch": 2.99, "learning_rate": 3.5090470716280256e-05, "loss": 0.2574, "step": 4990500 }, { "epoch": 2.99, "learning_rate": 3.508837075071969e-05, "loss": 0.2662, "step": 4991000 }, { "epoch": 2.99, "learning_rate": 3.508627078515912e-05, "loss": 0.263, "step": 4991500 }, { "epoch": 2.99, "learning_rate": 3.5084170819598556e-05, "loss": 0.2554, "step": 4992000 }, { "epoch": 2.99, "learning_rate": 3.508207085403799e-05, "loss": 0.2665, "step": 4992500 }, { "epoch": 2.99, "learning_rate": 3.507997088847743e-05, "loss": 0.2624, "step": 4993000 }, { "epoch": 2.99, "learning_rate": 3.507787512284799e-05, "loss": 0.2617, "step": 4993500 }, { "epoch": 2.99, "learning_rate": 3.507577515728742e-05, "loss": 0.2586, "step": 4994000 }, { "epoch": 2.99, "learning_rate": 3.507367519172685e-05, "loss": 0.2604, "step": 4994500 }, { "epoch": 2.99, "learning_rate": 3.507157522616629e-05, "loss": 0.2632, "step": 4995000 }, { "epoch": 3.0, "learning_rate": 3.506947946053685e-05, "loss": 0.2676, "step": 4995500 }, { "epoch": 3.0, "learning_rate": 3.506737949497628e-05, "loss": 0.2569, "step": 4996000 }, { "epoch": 3.0, "learning_rate": 3.506527952941572e-05, "loss": 0.2523, "step": 4996500 }, { "epoch": 3.0, "learning_rate": 3.506317956385515e-05, "loss": 0.2606, "step": 4997000 }, { "epoch": 3.0, "learning_rate": 3.506108379822571e-05, "loss": 0.261, "step": 4997500 }, { "epoch": 3.0, "learning_rate": 3.5058983832665145e-05, "loss": 0.258, "step": 4998000 }, { "epoch": 3.0, "learning_rate": 3.505688386710458e-05, "loss": 0.2582, "step": 4998500 }, { "epoch": 3.0, "learning_rate": 3.505478390154401e-05, "loss": 0.2578, "step": 4999000 }, { "epoch": 3.0, "learning_rate": 3.5052683935983446e-05, "loss": 0.2593, "step": 4999500 }, { "epoch": 3.0, "learning_rate": 3.5050583970422886e-05, "loss": 0.2615, "step": 5000000 }, { "epoch": 3.0, "eval_loss": 0.23785440623760223, "eval_runtime": 1455.1743, "eval_samples_per_second": 361.964, "eval_steps_per_second": 60.327, "step": 5000000 }, { "epoch": 3.0, "learning_rate": 3.504848400486232e-05, "loss": 0.2595, "step": 5000500 }, { "epoch": 3.0, "learning_rate": 3.504638403930175e-05, "loss": 0.2606, "step": 5001000 }, { "epoch": 3.0, "learning_rate": 3.5044288273672306e-05, "loss": 0.2657, "step": 5001500 }, { "epoch": 3.0, "learning_rate": 3.5042188308111747e-05, "loss": 0.2551, "step": 5002000 }, { "epoch": 3.0, "learning_rate": 3.504009674241343e-05, "loss": 0.2581, "step": 5002500 }, { "epoch": 3.0, "learning_rate": 3.503799677685286e-05, "loss": 0.257, "step": 5003000 }, { "epoch": 3.0, "learning_rate": 3.5035896811292294e-05, "loss": 0.2624, "step": 5003500 }, { "epoch": 3.0, "learning_rate": 3.5033796845731734e-05, "loss": 0.2572, "step": 5004000 }, { "epoch": 3.0, "learning_rate": 3.503169688017117e-05, "loss": 0.2479, "step": 5004500 }, { "epoch": 3.0, "learning_rate": 3.50295969146106e-05, "loss": 0.2434, "step": 5005000 }, { "epoch": 3.0, "learning_rate": 3.5027496949050035e-05, "loss": 0.2483, "step": 5005500 }, { "epoch": 3.0, "learning_rate": 3.502539698348947e-05, "loss": 0.2604, "step": 5006000 }, { "epoch": 3.0, "learning_rate": 3.50232970179289e-05, "loss": 0.252, "step": 5006500 }, { "epoch": 3.0, "learning_rate": 3.502119705236834e-05, "loss": 0.2558, "step": 5007000 }, { "epoch": 3.0, "learning_rate": 3.50191012867389e-05, "loss": 0.256, "step": 5007500 }, { "epoch": 3.0, "learning_rate": 3.501700132117833e-05, "loss": 0.2542, "step": 5008000 }, { "epoch": 3.0, "learning_rate": 3.501490135561776e-05, "loss": 0.2535, "step": 5008500 }, { "epoch": 3.0, "learning_rate": 3.50128013900572e-05, "loss": 0.2477, "step": 5009000 }, { "epoch": 3.0, "learning_rate": 3.5010701424496636e-05, "loss": 0.2521, "step": 5009500 }, { "epoch": 3.0, "learning_rate": 3.500860145893607e-05, "loss": 0.2525, "step": 5010000 }, { "epoch": 3.0, "learning_rate": 3.500650149337551e-05, "loss": 0.2594, "step": 5010500 }, { "epoch": 3.0, "learning_rate": 3.500440152781494e-05, "loss": 0.254, "step": 5011000 }, { "epoch": 3.0, "learning_rate": 3.50023057621855e-05, "loss": 0.2573, "step": 5011500 }, { "epoch": 3.0, "learning_rate": 3.500020579662494e-05, "loss": 0.2555, "step": 5012000 }, { "epoch": 3.01, "learning_rate": 3.499810583106437e-05, "loss": 0.2575, "step": 5012500 }, { "epoch": 3.01, "learning_rate": 3.4996005865503804e-05, "loss": 0.2558, "step": 5013000 }, { "epoch": 3.01, "learning_rate": 3.499391009987436e-05, "loss": 0.2505, "step": 5013500 }, { "epoch": 3.01, "learning_rate": 3.49918101343138e-05, "loss": 0.252, "step": 5014000 }, { "epoch": 3.01, "learning_rate": 3.498971016875323e-05, "loss": 0.2514, "step": 5014500 }, { "epoch": 3.01, "learning_rate": 3.4987610203192665e-05, "loss": 0.2515, "step": 5015000 }, { "epoch": 3.01, "learning_rate": 3.498551443756322e-05, "loss": 0.2565, "step": 5015500 }, { "epoch": 3.01, "learning_rate": 3.498341447200266e-05, "loss": 0.256, "step": 5016000 }, { "epoch": 3.01, "learning_rate": 3.498131450644209e-05, "loss": 0.2496, "step": 5016500 }, { "epoch": 3.01, "learning_rate": 3.4979214540881525e-05, "loss": 0.252, "step": 5017000 }, { "epoch": 3.01, "learning_rate": 3.4977118775252086e-05, "loss": 0.2515, "step": 5017500 }, { "epoch": 3.01, "learning_rate": 3.4975023009622646e-05, "loss": 0.254, "step": 5018000 }, { "epoch": 3.01, "learning_rate": 3.497292304406208e-05, "loss": 0.25, "step": 5018500 }, { "epoch": 3.01, "learning_rate": 3.497082307850151e-05, "loss": 0.2558, "step": 5019000 }, { "epoch": 3.01, "learning_rate": 3.496872311294095e-05, "loss": 0.2547, "step": 5019500 }, { "epoch": 3.01, "learning_rate": 3.496662314738038e-05, "loss": 0.2547, "step": 5020000 }, { "epoch": 3.01, "learning_rate": 3.496452738175094e-05, "loss": 0.2477, "step": 5020500 }, { "epoch": 3.01, "learning_rate": 3.4962427416190374e-05, "loss": 0.2526, "step": 5021000 }, { "epoch": 3.01, "learning_rate": 3.4960327450629814e-05, "loss": 0.2555, "step": 5021500 }, { "epoch": 3.01, "learning_rate": 3.495822748506925e-05, "loss": 0.2564, "step": 5022000 }, { "epoch": 3.01, "learning_rate": 3.4956127519508674e-05, "loss": 0.2535, "step": 5022500 }, { "epoch": 3.01, "learning_rate": 3.4954027553948114e-05, "loss": 0.2527, "step": 5023000 }, { "epoch": 3.01, "learning_rate": 3.495192758838755e-05, "loss": 0.258, "step": 5023500 }, { "epoch": 3.01, "learning_rate": 3.494982762282698e-05, "loss": 0.2507, "step": 5024000 }, { "epoch": 3.01, "learning_rate": 3.494773185719754e-05, "loss": 0.2507, "step": 5024500 }, { "epoch": 3.01, "learning_rate": 3.4945631891636975e-05, "loss": 0.2499, "step": 5025000 }, { "epoch": 3.01, "learning_rate": 3.494353192607641e-05, "loss": 0.2558, "step": 5025500 }, { "epoch": 3.01, "learning_rate": 3.494143196051585e-05, "loss": 0.2491, "step": 5026000 }, { "epoch": 3.01, "learning_rate": 3.493933199495528e-05, "loss": 0.2515, "step": 5026500 }, { "epoch": 3.01, "learning_rate": 3.493724042925696e-05, "loss": 0.2571, "step": 5027000 }, { "epoch": 3.01, "learning_rate": 3.4935140463696396e-05, "loss": 0.2511, "step": 5027500 }, { "epoch": 3.01, "learning_rate": 3.493304049813583e-05, "loss": 0.2504, "step": 5028000 }, { "epoch": 3.01, "learning_rate": 3.493094053257527e-05, "loss": 0.2501, "step": 5028500 }, { "epoch": 3.02, "learning_rate": 3.49288405670147e-05, "loss": 0.2549, "step": 5029000 }, { "epoch": 3.02, "learning_rate": 3.492674060145413e-05, "loss": 0.2531, "step": 5029500 }, { "epoch": 3.02, "learning_rate": 3.492464063589357e-05, "loss": 0.2567, "step": 5030000 }, { "epoch": 3.02, "learning_rate": 3.4922540670333004e-05, "loss": 0.2561, "step": 5030500 }, { "epoch": 3.02, "learning_rate": 3.4920444904703564e-05, "loss": 0.2529, "step": 5031000 }, { "epoch": 3.02, "learning_rate": 3.491834913907412e-05, "loss": 0.248, "step": 5031500 }, { "epoch": 3.02, "learning_rate": 3.491624917351356e-05, "loss": 0.2524, "step": 5032000 }, { "epoch": 3.02, "learning_rate": 3.491414920795299e-05, "loss": 0.2538, "step": 5032500 }, { "epoch": 3.02, "learning_rate": 3.4912049242392425e-05, "loss": 0.26, "step": 5033000 }, { "epoch": 3.02, "learning_rate": 3.4909949276831865e-05, "loss": 0.2533, "step": 5033500 }, { "epoch": 3.02, "learning_rate": 3.49078493112713e-05, "loss": 0.2512, "step": 5034000 }, { "epoch": 3.02, "learning_rate": 3.4905749345710725e-05, "loss": 0.254, "step": 5034500 }, { "epoch": 3.02, "learning_rate": 3.4903649380150165e-05, "loss": 0.2491, "step": 5035000 }, { "epoch": 3.02, "learning_rate": 3.4901553614520726e-05, "loss": 0.2555, "step": 5035500 }, { "epoch": 3.02, "learning_rate": 3.489945364896016e-05, "loss": 0.2509, "step": 5036000 }, { "epoch": 3.02, "learning_rate": 3.4897353683399586e-05, "loss": 0.2551, "step": 5036500 }, { "epoch": 3.02, "learning_rate": 3.4895253717839026e-05, "loss": 0.2569, "step": 5037000 }, { "epoch": 3.02, "learning_rate": 3.4893157952209586e-05, "loss": 0.2571, "step": 5037500 }, { "epoch": 3.02, "learning_rate": 3.489105798664902e-05, "loss": 0.252, "step": 5038000 }, { "epoch": 3.02, "learning_rate": 3.488896222101957e-05, "loss": 0.2529, "step": 5038500 }, { "epoch": 3.02, "learning_rate": 3.4886862255459014e-05, "loss": 0.2546, "step": 5039000 }, { "epoch": 3.02, "learning_rate": 3.488476228989845e-05, "loss": 0.2524, "step": 5039500 }, { "epoch": 3.02, "learning_rate": 3.488266232433788e-05, "loss": 0.2531, "step": 5040000 }, { "epoch": 3.02, "learning_rate": 3.4880566558708434e-05, "loss": 0.2562, "step": 5040500 }, { "epoch": 3.02, "learning_rate": 3.4878466593147874e-05, "loss": 0.2591, "step": 5041000 }, { "epoch": 3.02, "learning_rate": 3.487636662758731e-05, "loss": 0.2581, "step": 5041500 }, { "epoch": 3.02, "learning_rate": 3.487426666202674e-05, "loss": 0.2554, "step": 5042000 }, { "epoch": 3.02, "learning_rate": 3.487216669646618e-05, "loss": 0.263, "step": 5042500 }, { "epoch": 3.02, "learning_rate": 3.4870066730905615e-05, "loss": 0.2493, "step": 5043000 }, { "epoch": 3.02, "learning_rate": 3.486797096527617e-05, "loss": 0.2579, "step": 5043500 }, { "epoch": 3.02, "learning_rate": 3.48658709997156e-05, "loss": 0.2542, "step": 5044000 }, { "epoch": 3.02, "learning_rate": 3.486377103415504e-05, "loss": 0.2559, "step": 5044500 }, { "epoch": 3.02, "learning_rate": 3.4861671068594476e-05, "loss": 0.2568, "step": 5045000 }, { "epoch": 3.02, "learning_rate": 3.485957110303391e-05, "loss": 0.252, "step": 5045500 }, { "epoch": 3.03, "learning_rate": 3.485747113747334e-05, "loss": 0.2586, "step": 5046000 }, { "epoch": 3.03, "learning_rate": 3.4855371171912776e-05, "loss": 0.2617, "step": 5046500 }, { "epoch": 3.03, "learning_rate": 3.4853271206352216e-05, "loss": 0.2507, "step": 5047000 }, { "epoch": 3.03, "learning_rate": 3.4851175440722777e-05, "loss": 0.2552, "step": 5047500 }, { "epoch": 3.03, "learning_rate": 3.484907967509333e-05, "loss": 0.2527, "step": 5048000 }, { "epoch": 3.03, "learning_rate": 3.4846979709532764e-05, "loss": 0.251, "step": 5048500 }, { "epoch": 3.03, "learning_rate": 3.48448797439722e-05, "loss": 0.2553, "step": 5049000 }, { "epoch": 3.03, "learning_rate": 3.484277977841164e-05, "loss": 0.2498, "step": 5049500 }, { "epoch": 3.03, "learning_rate": 3.484067981285107e-05, "loss": 0.2577, "step": 5050000 }, { "epoch": 3.03, "learning_rate": 3.4838579847290504e-05, "loss": 0.2529, "step": 5050500 }, { "epoch": 3.03, "learning_rate": 3.483647988172994e-05, "loss": 0.2512, "step": 5051000 }, { "epoch": 3.03, "learning_rate": 3.483437991616937e-05, "loss": 0.2541, "step": 5051500 }, { "epoch": 3.03, "learning_rate": 3.483228415053993e-05, "loss": 0.2498, "step": 5052000 }, { "epoch": 3.03, "learning_rate": 3.4830184184979365e-05, "loss": 0.2516, "step": 5052500 }, { "epoch": 3.03, "learning_rate": 3.4828084219418805e-05, "loss": 0.2562, "step": 5053000 }, { "epoch": 3.03, "learning_rate": 3.482598425385823e-05, "loss": 0.2566, "step": 5053500 }, { "epoch": 3.03, "learning_rate": 3.482388848822879e-05, "loss": 0.2574, "step": 5054000 }, { "epoch": 3.03, "learning_rate": 3.482178852266823e-05, "loss": 0.257, "step": 5054500 }, { "epoch": 3.03, "learning_rate": 3.4819688557107666e-05, "loss": 0.2577, "step": 5055000 }, { "epoch": 3.03, "learning_rate": 3.48175885915471e-05, "loss": 0.2495, "step": 5055500 }, { "epoch": 3.03, "learning_rate": 3.481549282591765e-05, "loss": 0.256, "step": 5056000 }, { "epoch": 3.03, "learning_rate": 3.481339286035709e-05, "loss": 0.2524, "step": 5056500 }, { "epoch": 3.03, "learning_rate": 3.481129289479653e-05, "loss": 0.2569, "step": 5057000 }, { "epoch": 3.03, "learning_rate": 3.480919292923596e-05, "loss": 0.2622, "step": 5057500 }, { "epoch": 3.03, "learning_rate": 3.4807097163606514e-05, "loss": 0.2547, "step": 5058000 }, { "epoch": 3.03, "learning_rate": 3.4804997198045954e-05, "loss": 0.2614, "step": 5058500 }, { "epoch": 3.03, "learning_rate": 3.480289723248539e-05, "loss": 0.2572, "step": 5059000 }, { "epoch": 3.03, "learning_rate": 3.480079726692482e-05, "loss": 0.2545, "step": 5059500 }, { "epoch": 3.03, "learning_rate": 3.479870150129538e-05, "loss": 0.2531, "step": 5060000 }, { "epoch": 3.03, "learning_rate": 3.4796601535734815e-05, "loss": 0.2659, "step": 5060500 }, { "epoch": 3.03, "learning_rate": 3.479450157017425e-05, "loss": 0.2504, "step": 5061000 }, { "epoch": 3.03, "learning_rate": 3.479240160461369e-05, "loss": 0.2535, "step": 5061500 }, { "epoch": 3.03, "learning_rate": 3.479030583898424e-05, "loss": 0.2525, "step": 5062000 }, { "epoch": 3.04, "learning_rate": 3.4788205873423675e-05, "loss": 0.2582, "step": 5062500 }, { "epoch": 3.04, "learning_rate": 3.478610590786311e-05, "loss": 0.2527, "step": 5063000 }, { "epoch": 3.04, "learning_rate": 3.478400594230255e-05, "loss": 0.2537, "step": 5063500 }, { "epoch": 3.04, "learning_rate": 3.47819101766731e-05, "loss": 0.2594, "step": 5064000 }, { "epoch": 3.04, "learning_rate": 3.4779810211112536e-05, "loss": 0.2538, "step": 5064500 }, { "epoch": 3.04, "learning_rate": 3.477771024555197e-05, "loss": 0.2553, "step": 5065000 }, { "epoch": 3.04, "learning_rate": 3.477561027999141e-05, "loss": 0.2475, "step": 5065500 }, { "epoch": 3.04, "learning_rate": 3.4773514514361963e-05, "loss": 0.2525, "step": 5066000 }, { "epoch": 3.04, "learning_rate": 3.47714145488014e-05, "loss": 0.2555, "step": 5066500 }, { "epoch": 3.04, "learning_rate": 3.476931458324084e-05, "loss": 0.2552, "step": 5067000 }, { "epoch": 3.04, "learning_rate": 3.476721461768027e-05, "loss": 0.2547, "step": 5067500 }, { "epoch": 3.04, "learning_rate": 3.4765114652119704e-05, "loss": 0.2549, "step": 5068000 }, { "epoch": 3.04, "learning_rate": 3.476301888649026e-05, "loss": 0.2585, "step": 5068500 }, { "epoch": 3.04, "learning_rate": 3.47609189209297e-05, "loss": 0.2583, "step": 5069000 }, { "epoch": 3.04, "learning_rate": 3.475882315530025e-05, "loss": 0.2582, "step": 5069500 }, { "epoch": 3.04, "learning_rate": 3.4756723189739685e-05, "loss": 0.2566, "step": 5070000 }, { "epoch": 3.04, "learning_rate": 3.475462322417912e-05, "loss": 0.2513, "step": 5070500 }, { "epoch": 3.04, "learning_rate": 3.475252325861856e-05, "loss": 0.2512, "step": 5071000 }, { "epoch": 3.04, "learning_rate": 3.475042749298911e-05, "loss": 0.2572, "step": 5071500 }, { "epoch": 3.04, "learning_rate": 3.4748327527428546e-05, "loss": 0.2509, "step": 5072000 }, { "epoch": 3.04, "learning_rate": 3.4746227561867986e-05, "loss": 0.2554, "step": 5072500 }, { "epoch": 3.04, "learning_rate": 3.474412759630742e-05, "loss": 0.2568, "step": 5073000 }, { "epoch": 3.04, "learning_rate": 3.474202763074685e-05, "loss": 0.2477, "step": 5073500 }, { "epoch": 3.04, "learning_rate": 3.473992766518629e-05, "loss": 0.2537, "step": 5074000 }, { "epoch": 3.04, "learning_rate": 3.4737827699625726e-05, "loss": 0.254, "step": 5074500 }, { "epoch": 3.04, "learning_rate": 3.473572773406516e-05, "loss": 0.2554, "step": 5075000 }, { "epoch": 3.04, "learning_rate": 3.4733631968435713e-05, "loss": 0.2475, "step": 5075500 }, { "epoch": 3.04, "learning_rate": 3.4731532002875154e-05, "loss": 0.2551, "step": 5076000 }, { "epoch": 3.04, "learning_rate": 3.472943203731459e-05, "loss": 0.2504, "step": 5076500 }, { "epoch": 3.04, "learning_rate": 3.472733207175402e-05, "loss": 0.2486, "step": 5077000 }, { "epoch": 3.04, "learning_rate": 3.4725236306124574e-05, "loss": 0.2492, "step": 5077500 }, { "epoch": 3.04, "learning_rate": 3.4723140540495135e-05, "loss": 0.2528, "step": 5078000 }, { "epoch": 3.04, "learning_rate": 3.4721040574934575e-05, "loss": 0.2615, "step": 5078500 }, { "epoch": 3.05, "learning_rate": 3.4718940609374e-05, "loss": 0.2519, "step": 5079000 }, { "epoch": 3.05, "learning_rate": 3.471684064381344e-05, "loss": 0.254, "step": 5079500 }, { "epoch": 3.05, "learning_rate": 3.4714740678252875e-05, "loss": 0.2491, "step": 5080000 }, { "epoch": 3.05, "learning_rate": 3.471264071269231e-05, "loss": 0.2571, "step": 5080500 }, { "epoch": 3.05, "learning_rate": 3.471054494706286e-05, "loss": 0.2561, "step": 5081000 }, { "epoch": 3.05, "learning_rate": 3.47084449815023e-05, "loss": 0.2576, "step": 5081500 }, { "epoch": 3.05, "learning_rate": 3.4706345015941736e-05, "loss": 0.257, "step": 5082000 }, { "epoch": 3.05, "learning_rate": 3.470424505038117e-05, "loss": 0.2557, "step": 5082500 }, { "epoch": 3.05, "learning_rate": 3.470214508482061e-05, "loss": 0.255, "step": 5083000 }, { "epoch": 3.05, "learning_rate": 3.470004511926004e-05, "loss": 0.2495, "step": 5083500 }, { "epoch": 3.05, "learning_rate": 3.4697945153699476e-05, "loss": 0.2539, "step": 5084000 }, { "epoch": 3.05, "learning_rate": 3.469584518813892e-05, "loss": 0.2587, "step": 5084500 }, { "epoch": 3.05, "learning_rate": 3.469374942250947e-05, "loss": 0.249, "step": 5085000 }, { "epoch": 3.05, "learning_rate": 3.4691649456948904e-05, "loss": 0.2572, "step": 5085500 }, { "epoch": 3.05, "learning_rate": 3.468954949138834e-05, "loss": 0.2584, "step": 5086000 }, { "epoch": 3.05, "learning_rate": 3.468744952582778e-05, "loss": 0.2565, "step": 5086500 }, { "epoch": 3.05, "learning_rate": 3.468535376019833e-05, "loss": 0.2552, "step": 5087000 }, { "epoch": 3.05, "learning_rate": 3.4683253794637764e-05, "loss": 0.2551, "step": 5087500 }, { "epoch": 3.05, "learning_rate": 3.4681153829077205e-05, "loss": 0.2531, "step": 5088000 }, { "epoch": 3.05, "learning_rate": 3.467905386351664e-05, "loss": 0.2525, "step": 5088500 }, { "epoch": 3.05, "learning_rate": 3.467695809788719e-05, "loss": 0.2545, "step": 5089000 }, { "epoch": 3.05, "learning_rate": 3.4674858132326625e-05, "loss": 0.2581, "step": 5089500 }, { "epoch": 3.05, "learning_rate": 3.4672758166766065e-05, "loss": 0.2593, "step": 5090000 }, { "epoch": 3.05, "learning_rate": 3.46706582012055e-05, "loss": 0.2535, "step": 5090500 }, { "epoch": 3.05, "learning_rate": 3.466856243557605e-05, "loss": 0.2562, "step": 5091000 }, { "epoch": 3.05, "learning_rate": 3.4666462470015486e-05, "loss": 0.2557, "step": 5091500 }, { "epoch": 3.05, "learning_rate": 3.4664362504454926e-05, "loss": 0.2597, "step": 5092000 }, { "epoch": 3.05, "learning_rate": 3.466226253889436e-05, "loss": 0.2549, "step": 5092500 }, { "epoch": 3.05, "learning_rate": 3.466016677326491e-05, "loss": 0.2538, "step": 5093000 }, { "epoch": 3.05, "learning_rate": 3.4658066807704353e-05, "loss": 0.2492, "step": 5093500 }, { "epoch": 3.05, "learning_rate": 3.465596684214379e-05, "loss": 0.2563, "step": 5094000 }, { "epoch": 3.05, "learning_rate": 3.465386687658322e-05, "loss": 0.2533, "step": 5094500 }, { "epoch": 3.05, "learning_rate": 3.465176691102266e-05, "loss": 0.2588, "step": 5095000 }, { "epoch": 3.05, "learning_rate": 3.4649671145393214e-05, "loss": 0.2579, "step": 5095500 }, { "epoch": 3.06, "learning_rate": 3.464757117983265e-05, "loss": 0.2507, "step": 5096000 }, { "epoch": 3.06, "learning_rate": 3.464547121427208e-05, "loss": 0.2561, "step": 5096500 }, { "epoch": 3.06, "learning_rate": 3.464337124871152e-05, "loss": 0.2493, "step": 5097000 }, { "epoch": 3.06, "learning_rate": 3.464127548308208e-05, "loss": 0.2529, "step": 5097500 }, { "epoch": 3.06, "learning_rate": 3.463917551752151e-05, "loss": 0.2503, "step": 5098000 }, { "epoch": 3.06, "learning_rate": 3.463707555196094e-05, "loss": 0.2523, "step": 5098500 }, { "epoch": 3.06, "learning_rate": 3.463497558640038e-05, "loss": 0.2541, "step": 5099000 }, { "epoch": 3.06, "learning_rate": 3.463287982077094e-05, "loss": 0.2523, "step": 5099500 }, { "epoch": 3.06, "learning_rate": 3.4630779855210376e-05, "loss": 0.2504, "step": 5100000 }, { "epoch": 3.06, "eval_loss": 0.23820264637470245, "eval_runtime": 1454.4595, "eval_samples_per_second": 362.141, "eval_steps_per_second": 60.357, "step": 5100000 }, { "epoch": 3.06, "learning_rate": 3.462867988964981e-05, "loss": 0.2548, "step": 5100500 }, { "epoch": 3.06, "learning_rate": 3.462657992408924e-05, "loss": 0.2501, "step": 5101000 }, { "epoch": 3.06, "learning_rate": 3.46244841584598e-05, "loss": 0.253, "step": 5101500 }, { "epoch": 3.06, "learning_rate": 3.4622384192899237e-05, "loss": 0.2494, "step": 5102000 }, { "epoch": 3.06, "learning_rate": 3.462028842726979e-05, "loss": 0.2568, "step": 5102500 }, { "epoch": 3.06, "learning_rate": 3.461818846170923e-05, "loss": 0.2519, "step": 5103000 }, { "epoch": 3.06, "learning_rate": 3.4616088496148664e-05, "loss": 0.2547, "step": 5103500 }, { "epoch": 3.06, "learning_rate": 3.46139885305881e-05, "loss": 0.2609, "step": 5104000 }, { "epoch": 3.06, "learning_rate": 3.461188856502754e-05, "loss": 0.2527, "step": 5104500 }, { "epoch": 3.06, "learning_rate": 3.4609788599466964e-05, "loss": 0.2544, "step": 5105000 }, { "epoch": 3.06, "learning_rate": 3.4607692833837525e-05, "loss": 0.2555, "step": 5105500 }, { "epoch": 3.06, "learning_rate": 3.4605592868276965e-05, "loss": 0.2536, "step": 5106000 }, { "epoch": 3.06, "learning_rate": 3.46034929027164e-05, "loss": 0.2581, "step": 5106500 }, { "epoch": 3.06, "learning_rate": 3.460139293715583e-05, "loss": 0.257, "step": 5107000 }, { "epoch": 3.06, "learning_rate": 3.4599292971595265e-05, "loss": 0.2541, "step": 5107500 }, { "epoch": 3.06, "learning_rate": 3.45971930060347e-05, "loss": 0.2544, "step": 5108000 }, { "epoch": 3.06, "learning_rate": 3.459509304047413e-05, "loss": 0.253, "step": 5108500 }, { "epoch": 3.06, "learning_rate": 3.459299727484469e-05, "loss": 0.2509, "step": 5109000 }, { "epoch": 3.06, "learning_rate": 3.459089730928413e-05, "loss": 0.2565, "step": 5109500 }, { "epoch": 3.06, "learning_rate": 3.458879734372356e-05, "loss": 0.254, "step": 5110000 }, { "epoch": 3.06, "learning_rate": 3.458669737816299e-05, "loss": 0.2543, "step": 5110500 }, { "epoch": 3.06, "learning_rate": 3.458459741260243e-05, "loss": 0.2531, "step": 5111000 }, { "epoch": 3.06, "learning_rate": 3.4582497447041867e-05, "loss": 0.2584, "step": 5111500 }, { "epoch": 3.06, "learning_rate": 3.45803974814813e-05, "loss": 0.2507, "step": 5112000 }, { "epoch": 3.07, "learning_rate": 3.457829751592074e-05, "loss": 0.2521, "step": 5112500 }, { "epoch": 3.07, "learning_rate": 3.4576201750291294e-05, "loss": 0.2546, "step": 5113000 }, { "epoch": 3.07, "learning_rate": 3.4574105984661854e-05, "loss": 0.2574, "step": 5113500 }, { "epoch": 3.07, "learning_rate": 3.457200601910129e-05, "loss": 0.2563, "step": 5114000 }, { "epoch": 3.07, "learning_rate": 3.456990605354072e-05, "loss": 0.2533, "step": 5114500 }, { "epoch": 3.07, "learning_rate": 3.4567806087980155e-05, "loss": 0.2543, "step": 5115000 }, { "epoch": 3.07, "learning_rate": 3.456570612241959e-05, "loss": 0.2636, "step": 5115500 }, { "epoch": 3.07, "learning_rate": 3.456360615685903e-05, "loss": 0.2577, "step": 5116000 }, { "epoch": 3.07, "learning_rate": 3.456150619129846e-05, "loss": 0.2582, "step": 5116500 }, { "epoch": 3.07, "learning_rate": 3.4559406225737895e-05, "loss": 0.2542, "step": 5117000 }, { "epoch": 3.07, "learning_rate": 3.455731046010845e-05, "loss": 0.2524, "step": 5117500 }, { "epoch": 3.07, "learning_rate": 3.455521049454789e-05, "loss": 0.2571, "step": 5118000 }, { "epoch": 3.07, "learning_rate": 3.455311052898732e-05, "loss": 0.2548, "step": 5118500 }, { "epoch": 3.07, "learning_rate": 3.4551010563426756e-05, "loss": 0.2534, "step": 5119000 }, { "epoch": 3.07, "learning_rate": 3.4548914797797316e-05, "loss": 0.2562, "step": 5119500 }, { "epoch": 3.07, "learning_rate": 3.454681483223675e-05, "loss": 0.2598, "step": 5120000 }, { "epoch": 3.07, "learning_rate": 3.454471486667618e-05, "loss": 0.2598, "step": 5120500 }, { "epoch": 3.07, "learning_rate": 3.454261490111562e-05, "loss": 0.2555, "step": 5121000 }, { "epoch": 3.07, "learning_rate": 3.45405233354173e-05, "loss": 0.2608, "step": 5121500 }, { "epoch": 3.07, "learning_rate": 3.453842756978785e-05, "loss": 0.2564, "step": 5122000 }, { "epoch": 3.07, "learning_rate": 3.453632760422729e-05, "loss": 0.2488, "step": 5122500 }, { "epoch": 3.07, "learning_rate": 3.453423183859785e-05, "loss": 0.2531, "step": 5123000 }, { "epoch": 3.07, "learning_rate": 3.453213187303728e-05, "loss": 0.2525, "step": 5123500 }, { "epoch": 3.07, "learning_rate": 3.453003190747671e-05, "loss": 0.256, "step": 5124000 }, { "epoch": 3.07, "learning_rate": 3.452793194191615e-05, "loss": 0.2587, "step": 5124500 }, { "epoch": 3.07, "learning_rate": 3.4525831976355585e-05, "loss": 0.2477, "step": 5125000 }, { "epoch": 3.07, "learning_rate": 3.4523732010795025e-05, "loss": 0.2574, "step": 5125500 }, { "epoch": 3.07, "learning_rate": 3.452163204523446e-05, "loss": 0.2472, "step": 5126000 }, { "epoch": 3.07, "learning_rate": 3.451953207967389e-05, "loss": 0.2542, "step": 5126500 }, { "epoch": 3.07, "learning_rate": 3.451743211411333e-05, "loss": 0.2519, "step": 5127000 }, { "epoch": 3.07, "learning_rate": 3.4515332148552766e-05, "loss": 0.2547, "step": 5127500 }, { "epoch": 3.07, "learning_rate": 3.451323638292332e-05, "loss": 0.2571, "step": 5128000 }, { "epoch": 3.07, "learning_rate": 3.451113641736275e-05, "loss": 0.2472, "step": 5128500 }, { "epoch": 3.08, "learning_rate": 3.450903645180219e-05, "loss": 0.2588, "step": 5129000 }, { "epoch": 3.08, "learning_rate": 3.450693648624163e-05, "loss": 0.2533, "step": 5129500 }, { "epoch": 3.08, "learning_rate": 3.450483652068106e-05, "loss": 0.2544, "step": 5130000 }, { "epoch": 3.08, "learning_rate": 3.45027365551205e-05, "loss": 0.2514, "step": 5130500 }, { "epoch": 3.08, "learning_rate": 3.4500636589559934e-05, "loss": 0.2541, "step": 5131000 }, { "epoch": 3.08, "learning_rate": 3.449853662399936e-05, "loss": 0.2551, "step": 5131500 }, { "epoch": 3.08, "learning_rate": 3.44964366584388e-05, "loss": 0.2546, "step": 5132000 }, { "epoch": 3.08, "learning_rate": 3.4494336692878234e-05, "loss": 0.2523, "step": 5132500 }, { "epoch": 3.08, "learning_rate": 3.449223672731767e-05, "loss": 0.2618, "step": 5133000 }, { "epoch": 3.08, "learning_rate": 3.449013676175711e-05, "loss": 0.2466, "step": 5133500 }, { "epoch": 3.08, "learning_rate": 3.448803679619654e-05, "loss": 0.2522, "step": 5134000 }, { "epoch": 3.08, "learning_rate": 3.4485941030567095e-05, "loss": 0.2576, "step": 5134500 }, { "epoch": 3.08, "learning_rate": 3.4483841065006535e-05, "loss": 0.2584, "step": 5135000 }, { "epoch": 3.08, "learning_rate": 3.448174109944597e-05, "loss": 0.2474, "step": 5135500 }, { "epoch": 3.08, "learning_rate": 3.44796411338854e-05, "loss": 0.2521, "step": 5136000 }, { "epoch": 3.08, "learning_rate": 3.4477545368255956e-05, "loss": 0.2535, "step": 5136500 }, { "epoch": 3.08, "learning_rate": 3.4475445402695396e-05, "loss": 0.2609, "step": 5137000 }, { "epoch": 3.08, "learning_rate": 3.447334543713483e-05, "loss": 0.2583, "step": 5137500 }, { "epoch": 3.08, "learning_rate": 3.447124967150539e-05, "loss": 0.2508, "step": 5138000 }, { "epoch": 3.08, "learning_rate": 3.4469149705944816e-05, "loss": 0.2504, "step": 5138500 }, { "epoch": 3.08, "learning_rate": 3.4467049740384257e-05, "loss": 0.2563, "step": 5139000 }, { "epoch": 3.08, "learning_rate": 3.446494977482369e-05, "loss": 0.2566, "step": 5139500 }, { "epoch": 3.08, "learning_rate": 3.4462849809263124e-05, "loss": 0.2503, "step": 5140000 }, { "epoch": 3.08, "learning_rate": 3.4460749843702564e-05, "loss": 0.2563, "step": 5140500 }, { "epoch": 3.08, "learning_rate": 3.4458649878142e-05, "loss": 0.2493, "step": 5141000 }, { "epoch": 3.08, "learning_rate": 3.445654991258143e-05, "loss": 0.2542, "step": 5141500 }, { "epoch": 3.08, "learning_rate": 3.445445414695199e-05, "loss": 0.2558, "step": 5142000 }, { "epoch": 3.08, "learning_rate": 3.4452354181391424e-05, "loss": 0.2569, "step": 5142500 }, { "epoch": 3.08, "learning_rate": 3.445025421583086e-05, "loss": 0.2567, "step": 5143000 }, { "epoch": 3.08, "learning_rate": 3.44481542502703e-05, "loss": 0.2487, "step": 5143500 }, { "epoch": 3.08, "learning_rate": 3.444605848464085e-05, "loss": 0.2572, "step": 5144000 }, { "epoch": 3.08, "learning_rate": 3.444396271901141e-05, "loss": 0.2686, "step": 5144500 }, { "epoch": 3.08, "learning_rate": 3.4441862753450846e-05, "loss": 0.2574, "step": 5145000 }, { "epoch": 3.08, "learning_rate": 3.443976278789027e-05, "loss": 0.2476, "step": 5145500 }, { "epoch": 3.09, "learning_rate": 3.443766282232971e-05, "loss": 0.2477, "step": 5146000 }, { "epoch": 3.09, "learning_rate": 3.4435562856769146e-05, "loss": 0.2613, "step": 5146500 }, { "epoch": 3.09, "learning_rate": 3.443346289120858e-05, "loss": 0.2544, "step": 5147000 }, { "epoch": 3.09, "learning_rate": 3.443136292564802e-05, "loss": 0.258, "step": 5147500 }, { "epoch": 3.09, "learning_rate": 3.442926296008745e-05, "loss": 0.2548, "step": 5148000 }, { "epoch": 3.09, "learning_rate": 3.442716719445801e-05, "loss": 0.2488, "step": 5148500 }, { "epoch": 3.09, "learning_rate": 3.442506722889745e-05, "loss": 0.2534, "step": 5149000 }, { "epoch": 3.09, "learning_rate": 3.442296726333688e-05, "loss": 0.2563, "step": 5149500 }, { "epoch": 3.09, "learning_rate": 3.4420867297776314e-05, "loss": 0.2577, "step": 5150000 }, { "epoch": 3.09, "learning_rate": 3.441877153214687e-05, "loss": 0.2563, "step": 5150500 }, { "epoch": 3.09, "learning_rate": 3.441667156658631e-05, "loss": 0.2547, "step": 5151000 }, { "epoch": 3.09, "learning_rate": 3.441457160102574e-05, "loss": 0.2535, "step": 5151500 }, { "epoch": 3.09, "learning_rate": 3.4412471635465175e-05, "loss": 0.2467, "step": 5152000 }, { "epoch": 3.09, "learning_rate": 3.441037586983573e-05, "loss": 0.2502, "step": 5152500 }, { "epoch": 3.09, "learning_rate": 3.440827590427517e-05, "loss": 0.2548, "step": 5153000 }, { "epoch": 3.09, "learning_rate": 3.44061759387146e-05, "loss": 0.2542, "step": 5153500 }, { "epoch": 3.09, "learning_rate": 3.4404075973154035e-05, "loss": 0.2479, "step": 5154000 }, { "epoch": 3.09, "learning_rate": 3.4401980207524596e-05, "loss": 0.2525, "step": 5154500 }, { "epoch": 3.09, "learning_rate": 3.439988024196403e-05, "loss": 0.2554, "step": 5155000 }, { "epoch": 3.09, "learning_rate": 3.439778027640346e-05, "loss": 0.2565, "step": 5155500 }, { "epoch": 3.09, "learning_rate": 3.439568451077402e-05, "loss": 0.2546, "step": 5156000 }, { "epoch": 3.09, "learning_rate": 3.439358454521346e-05, "loss": 0.2629, "step": 5156500 }, { "epoch": 3.09, "learning_rate": 3.4391484579652897e-05, "loss": 0.2564, "step": 5157000 }, { "epoch": 3.09, "learning_rate": 3.438938461409232e-05, "loss": 0.2546, "step": 5157500 }, { "epoch": 3.09, "learning_rate": 3.4387284648531764e-05, "loss": 0.2539, "step": 5158000 }, { "epoch": 3.09, "learning_rate": 3.43851846829712e-05, "loss": 0.26, "step": 5158500 }, { "epoch": 3.09, "learning_rate": 3.438308471741063e-05, "loss": 0.2549, "step": 5159000 }, { "epoch": 3.09, "learning_rate": 3.438098475185007e-05, "loss": 0.2586, "step": 5159500 }, { "epoch": 3.09, "learning_rate": 3.4378888986220624e-05, "loss": 0.254, "step": 5160000 }, { "epoch": 3.09, "learning_rate": 3.437678902066006e-05, "loss": 0.2486, "step": 5160500 }, { "epoch": 3.09, "learning_rate": 3.437468905509949e-05, "loss": 0.2518, "step": 5161000 }, { "epoch": 3.09, "learning_rate": 3.437258908953893e-05, "loss": 0.2563, "step": 5161500 }, { "epoch": 3.09, "learning_rate": 3.437049332390949e-05, "loss": 0.2549, "step": 5162000 }, { "epoch": 3.1, "learning_rate": 3.436839335834892e-05, "loss": 0.2558, "step": 5162500 }, { "epoch": 3.1, "learning_rate": 3.436629339278836e-05, "loss": 0.257, "step": 5163000 }, { "epoch": 3.1, "learning_rate": 3.436419342722779e-05, "loss": 0.2551, "step": 5163500 }, { "epoch": 3.1, "learning_rate": 3.436209766159835e-05, "loss": 0.2503, "step": 5164000 }, { "epoch": 3.1, "learning_rate": 3.435999769603778e-05, "loss": 0.2603, "step": 5164500 }, { "epoch": 3.1, "learning_rate": 3.435789773047722e-05, "loss": 0.2601, "step": 5165000 }, { "epoch": 3.1, "learning_rate": 3.435580196484778e-05, "loss": 0.2506, "step": 5165500 }, { "epoch": 3.1, "learning_rate": 3.435370199928721e-05, "loss": 0.2568, "step": 5166000 }, { "epoch": 3.1, "learning_rate": 3.435160203372665e-05, "loss": 0.2546, "step": 5166500 }, { "epoch": 3.1, "learning_rate": 3.434950206816608e-05, "loss": 0.2518, "step": 5167000 }, { "epoch": 3.1, "learning_rate": 3.4347402102605514e-05, "loss": 0.2517, "step": 5167500 }, { "epoch": 3.1, "learning_rate": 3.434530213704495e-05, "loss": 0.2588, "step": 5168000 }, { "epoch": 3.1, "learning_rate": 3.434320217148439e-05, "loss": 0.2499, "step": 5168500 }, { "epoch": 3.1, "learning_rate": 3.434110220592382e-05, "loss": 0.2572, "step": 5169000 }, { "epoch": 3.1, "learning_rate": 3.4339006440294374e-05, "loss": 0.2513, "step": 5169500 }, { "epoch": 3.1, "learning_rate": 3.4336906474733815e-05, "loss": 0.2546, "step": 5170000 }, { "epoch": 3.1, "learning_rate": 3.433480650917325e-05, "loss": 0.2553, "step": 5170500 }, { "epoch": 3.1, "learning_rate": 3.433270654361268e-05, "loss": 0.2521, "step": 5171000 }, { "epoch": 3.1, "learning_rate": 3.433061497791436e-05, "loss": 0.2526, "step": 5171500 }, { "epoch": 3.1, "learning_rate": 3.4328515012353795e-05, "loss": 0.2523, "step": 5172000 }, { "epoch": 3.1, "learning_rate": 3.4326415046793236e-05, "loss": 0.2517, "step": 5172500 }, { "epoch": 3.1, "learning_rate": 3.432431508123267e-05, "loss": 0.256, "step": 5173000 }, { "epoch": 3.1, "learning_rate": 3.43222151156721e-05, "loss": 0.2536, "step": 5173500 }, { "epoch": 3.1, "learning_rate": 3.4320115150111536e-05, "loss": 0.2556, "step": 5174000 }, { "epoch": 3.1, "learning_rate": 3.431801518455097e-05, "loss": 0.2551, "step": 5174500 }, { "epoch": 3.1, "learning_rate": 3.43159152189904e-05, "loss": 0.258, "step": 5175000 }, { "epoch": 3.1, "learning_rate": 3.431381945336096e-05, "loss": 0.2532, "step": 5175500 }, { "epoch": 3.1, "learning_rate": 3.4311719487800403e-05, "loss": 0.2534, "step": 5176000 }, { "epoch": 3.1, "learning_rate": 3.430961952223983e-05, "loss": 0.2542, "step": 5176500 }, { "epoch": 3.1, "learning_rate": 3.430751955667927e-05, "loss": 0.2584, "step": 5177000 }, { "epoch": 3.1, "learning_rate": 3.430542379104983e-05, "loss": 0.2501, "step": 5177500 }, { "epoch": 3.1, "learning_rate": 3.4303323825489264e-05, "loss": 0.253, "step": 5178000 }, { "epoch": 3.1, "learning_rate": 3.43012238599287e-05, "loss": 0.2537, "step": 5178500 }, { "epoch": 3.11, "learning_rate": 3.429912389436813e-05, "loss": 0.2463, "step": 5179000 }, { "epoch": 3.11, "learning_rate": 3.429702812873869e-05, "loss": 0.2519, "step": 5179500 }, { "epoch": 3.11, "learning_rate": 3.4294928163178125e-05, "loss": 0.2561, "step": 5180000 }, { "epoch": 3.11, "learning_rate": 3.429282819761756e-05, "loss": 0.2569, "step": 5180500 }, { "epoch": 3.11, "learning_rate": 3.4290728232057e-05, "loss": 0.2598, "step": 5181000 }, { "epoch": 3.11, "learning_rate": 3.428863246642755e-05, "loss": 0.2565, "step": 5181500 }, { "epoch": 3.11, "learning_rate": 3.4286532500866986e-05, "loss": 0.2546, "step": 5182000 }, { "epoch": 3.11, "learning_rate": 3.428443253530642e-05, "loss": 0.2509, "step": 5182500 }, { "epoch": 3.11, "learning_rate": 3.428233256974586e-05, "loss": 0.2545, "step": 5183000 }, { "epoch": 3.11, "learning_rate": 3.428023680411641e-05, "loss": 0.258, "step": 5183500 }, { "epoch": 3.11, "learning_rate": 3.4278136838555846e-05, "loss": 0.2607, "step": 5184000 }, { "epoch": 3.11, "learning_rate": 3.427603687299529e-05, "loss": 0.2567, "step": 5184500 }, { "epoch": 3.11, "learning_rate": 3.427393690743472e-05, "loss": 0.2506, "step": 5185000 }, { "epoch": 3.11, "learning_rate": 3.4271841141805274e-05, "loss": 0.2573, "step": 5185500 }, { "epoch": 3.11, "learning_rate": 3.426974117624471e-05, "loss": 0.2557, "step": 5186000 }, { "epoch": 3.11, "learning_rate": 3.426764121068415e-05, "loss": 0.2563, "step": 5186500 }, { "epoch": 3.11, "learning_rate": 3.426554124512358e-05, "loss": 0.263, "step": 5187000 }, { "epoch": 3.11, "learning_rate": 3.4263441279563014e-05, "loss": 0.2521, "step": 5187500 }, { "epoch": 3.11, "learning_rate": 3.426134551393357e-05, "loss": 0.2537, "step": 5188000 }, { "epoch": 3.11, "learning_rate": 3.425924554837301e-05, "loss": 0.2491, "step": 5188500 }, { "epoch": 3.11, "learning_rate": 3.425714558281244e-05, "loss": 0.2587, "step": 5189000 }, { "epoch": 3.11, "learning_rate": 3.4255045617251875e-05, "loss": 0.2558, "step": 5189500 }, { "epoch": 3.11, "learning_rate": 3.4252949851622435e-05, "loss": 0.2545, "step": 5190000 }, { "epoch": 3.11, "learning_rate": 3.425084988606187e-05, "loss": 0.2521, "step": 5190500 }, { "epoch": 3.11, "learning_rate": 3.42487499205013e-05, "loss": 0.2586, "step": 5191000 }, { "epoch": 3.11, "learning_rate": 3.424664995494074e-05, "loss": 0.2538, "step": 5191500 }, { "epoch": 3.11, "learning_rate": 3.4244554189311296e-05, "loss": 0.2545, "step": 5192000 }, { "epoch": 3.11, "learning_rate": 3.424245422375073e-05, "loss": 0.2504, "step": 5192500 }, { "epoch": 3.11, "learning_rate": 3.424035425819016e-05, "loss": 0.2558, "step": 5193000 }, { "epoch": 3.11, "learning_rate": 3.42382542926296e-05, "loss": 0.2498, "step": 5193500 }, { "epoch": 3.11, "learning_rate": 3.423615852700016e-05, "loss": 0.2549, "step": 5194000 }, { "epoch": 3.11, "learning_rate": 3.423405856143959e-05, "loss": 0.2544, "step": 5194500 }, { "epoch": 3.11, "learning_rate": 3.4231958595879024e-05, "loss": 0.252, "step": 5195000 }, { "epoch": 3.11, "learning_rate": 3.4229858630318464e-05, "loss": 0.2523, "step": 5195500 }, { "epoch": 3.12, "learning_rate": 3.422776286468902e-05, "loss": 0.2571, "step": 5196000 }, { "epoch": 3.12, "learning_rate": 3.422566289912845e-05, "loss": 0.2586, "step": 5196500 }, { "epoch": 3.12, "learning_rate": 3.422356293356789e-05, "loss": 0.2592, "step": 5197000 }, { "epoch": 3.12, "learning_rate": 3.4221467167938445e-05, "loss": 0.2541, "step": 5197500 }, { "epoch": 3.12, "learning_rate": 3.421936720237788e-05, "loss": 0.2528, "step": 5198000 }, { "epoch": 3.12, "learning_rate": 3.421726723681731e-05, "loss": 0.257, "step": 5198500 }, { "epoch": 3.12, "learning_rate": 3.421516727125675e-05, "loss": 0.2533, "step": 5199000 }, { "epoch": 3.12, "learning_rate": 3.4213067305696185e-05, "loss": 0.2537, "step": 5199500 }, { "epoch": 3.12, "learning_rate": 3.421096734013562e-05, "loss": 0.2575, "step": 5200000 }, { "epoch": 3.12, "eval_loss": 0.23752982914447784, "eval_runtime": 1454.8465, "eval_samples_per_second": 362.045, "eval_steps_per_second": 60.341, "step": 5200000 }, { "epoch": 3.12, "learning_rate": 3.420886737457506e-05, "loss": 0.2592, "step": 5200500 }, { "epoch": 3.12, "learning_rate": 3.420676740901449e-05, "loss": 0.2542, "step": 5201000 }, { "epoch": 3.12, "learning_rate": 3.4204671643385046e-05, "loss": 0.2472, "step": 5201500 }, { "epoch": 3.12, "learning_rate": 3.420257167782448e-05, "loss": 0.2536, "step": 5202000 }, { "epoch": 3.12, "learning_rate": 3.420047171226392e-05, "loss": 0.2529, "step": 5202500 }, { "epoch": 3.12, "learning_rate": 3.419837174670335e-05, "loss": 0.2502, "step": 5203000 }, { "epoch": 3.12, "learning_rate": 3.419627598107391e-05, "loss": 0.2548, "step": 5203500 }, { "epoch": 3.12, "learning_rate": 3.419417601551335e-05, "loss": 0.2559, "step": 5204000 }, { "epoch": 3.12, "learning_rate": 3.41920802498839e-05, "loss": 0.2623, "step": 5204500 }, { "epoch": 3.12, "learning_rate": 3.4189980284323334e-05, "loss": 0.2489, "step": 5205000 }, { "epoch": 3.12, "learning_rate": 3.418788031876277e-05, "loss": 0.2564, "step": 5205500 }, { "epoch": 3.12, "learning_rate": 3.418578035320221e-05, "loss": 0.254, "step": 5206000 }, { "epoch": 3.12, "learning_rate": 3.418368038764164e-05, "loss": 0.255, "step": 5206500 }, { "epoch": 3.12, "learning_rate": 3.4181580422081075e-05, "loss": 0.2538, "step": 5207000 }, { "epoch": 3.12, "learning_rate": 3.4179480456520515e-05, "loss": 0.2502, "step": 5207500 }, { "epoch": 3.12, "learning_rate": 3.417738049095995e-05, "loss": 0.2507, "step": 5208000 }, { "epoch": 3.12, "learning_rate": 3.41752847253305e-05, "loss": 0.2557, "step": 5208500 }, { "epoch": 3.12, "learning_rate": 3.417318475976994e-05, "loss": 0.2603, "step": 5209000 }, { "epoch": 3.12, "learning_rate": 3.4171084794209376e-05, "loss": 0.2551, "step": 5209500 }, { "epoch": 3.12, "learning_rate": 3.416898482864881e-05, "loss": 0.2537, "step": 5210000 }, { "epoch": 3.12, "learning_rate": 3.416688906301936e-05, "loss": 0.2492, "step": 5210500 }, { "epoch": 3.12, "learning_rate": 3.416479329738992e-05, "loss": 0.253, "step": 5211000 }, { "epoch": 3.12, "learning_rate": 3.4162693331829357e-05, "loss": 0.2527, "step": 5211500 }, { "epoch": 3.12, "learning_rate": 3.416059336626879e-05, "loss": 0.254, "step": 5212000 }, { "epoch": 3.13, "learning_rate": 3.415849760063935e-05, "loss": 0.2584, "step": 5212500 }, { "epoch": 3.13, "learning_rate": 3.4156397635078784e-05, "loss": 0.2446, "step": 5213000 }, { "epoch": 3.13, "learning_rate": 3.4154297669518224e-05, "loss": 0.253, "step": 5213500 }, { "epoch": 3.13, "learning_rate": 3.415219770395765e-05, "loss": 0.2518, "step": 5214000 }, { "epoch": 3.13, "learning_rate": 3.4150097738397084e-05, "loss": 0.256, "step": 5214500 }, { "epoch": 3.13, "learning_rate": 3.4147997772836524e-05, "loss": 0.251, "step": 5215000 }, { "epoch": 3.13, "learning_rate": 3.414589780727596e-05, "loss": 0.2535, "step": 5215500 }, { "epoch": 3.13, "learning_rate": 3.41437978417154e-05, "loss": 0.2564, "step": 5216000 }, { "epoch": 3.13, "learning_rate": 3.414169787615483e-05, "loss": 0.2565, "step": 5216500 }, { "epoch": 3.13, "learning_rate": 3.4139597910594265e-05, "loss": 0.2588, "step": 5217000 }, { "epoch": 3.13, "learning_rate": 3.4137497945033705e-05, "loss": 0.2602, "step": 5217500 }, { "epoch": 3.13, "learning_rate": 3.413539797947314e-05, "loss": 0.2501, "step": 5218000 }, { "epoch": 3.13, "learning_rate": 3.413330221384369e-05, "loss": 0.2626, "step": 5218500 }, { "epoch": 3.13, "learning_rate": 3.4131206448214246e-05, "loss": 0.258, "step": 5219000 }, { "epoch": 3.13, "learning_rate": 3.412910648265368e-05, "loss": 0.2507, "step": 5219500 }, { "epoch": 3.13, "learning_rate": 3.412700651709312e-05, "loss": 0.2591, "step": 5220000 }, { "epoch": 3.13, "learning_rate": 3.412490655153255e-05, "loss": 0.2594, "step": 5220500 }, { "epoch": 3.13, "learning_rate": 3.4122806585971987e-05, "loss": 0.2463, "step": 5221000 }, { "epoch": 3.13, "learning_rate": 3.412070662041143e-05, "loss": 0.2518, "step": 5221500 }, { "epoch": 3.13, "learning_rate": 3.411860665485086e-05, "loss": 0.2528, "step": 5222000 }, { "epoch": 3.13, "learning_rate": 3.4116506689290294e-05, "loss": 0.253, "step": 5222500 }, { "epoch": 3.13, "learning_rate": 3.4114410923660854e-05, "loss": 0.2499, "step": 5223000 }, { "epoch": 3.13, "learning_rate": 3.411231095810029e-05, "loss": 0.2572, "step": 5223500 }, { "epoch": 3.13, "learning_rate": 3.411021099253972e-05, "loss": 0.2511, "step": 5224000 }, { "epoch": 3.13, "learning_rate": 3.410811102697916e-05, "loss": 0.2626, "step": 5224500 }, { "epoch": 3.13, "learning_rate": 3.4106015261349715e-05, "loss": 0.2549, "step": 5225000 }, { "epoch": 3.13, "learning_rate": 3.410391529578915e-05, "loss": 0.2568, "step": 5225500 }, { "epoch": 3.13, "learning_rate": 3.410181533022858e-05, "loss": 0.2539, "step": 5226000 }, { "epoch": 3.13, "learning_rate": 3.409971536466802e-05, "loss": 0.2565, "step": 5226500 }, { "epoch": 3.13, "learning_rate": 3.4097619599038575e-05, "loss": 0.2611, "step": 5227000 }, { "epoch": 3.13, "learning_rate": 3.409551963347801e-05, "loss": 0.2561, "step": 5227500 }, { "epoch": 3.13, "learning_rate": 3.409342386784857e-05, "loss": 0.2574, "step": 5228000 }, { "epoch": 3.13, "learning_rate": 3.4091323902288e-05, "loss": 0.2537, "step": 5228500 }, { "epoch": 3.13, "learning_rate": 3.4089223936727436e-05, "loss": 0.2585, "step": 5229000 }, { "epoch": 3.14, "learning_rate": 3.408712397116687e-05, "loss": 0.2502, "step": 5229500 }, { "epoch": 3.14, "learning_rate": 3.408502400560631e-05, "loss": 0.2536, "step": 5230000 }, { "epoch": 3.14, "learning_rate": 3.408292404004574e-05, "loss": 0.2533, "step": 5230500 }, { "epoch": 3.14, "learning_rate": 3.408082407448518e-05, "loss": 0.2557, "step": 5231000 }, { "epoch": 3.14, "learning_rate": 3.407872410892462e-05, "loss": 0.2581, "step": 5231500 }, { "epoch": 3.14, "learning_rate": 3.407662414336405e-05, "loss": 0.2531, "step": 5232000 }, { "epoch": 3.14, "learning_rate": 3.4074528377734604e-05, "loss": 0.2479, "step": 5232500 }, { "epoch": 3.14, "learning_rate": 3.407242841217404e-05, "loss": 0.2518, "step": 5233000 }, { "epoch": 3.14, "learning_rate": 3.407032844661348e-05, "loss": 0.2536, "step": 5233500 }, { "epoch": 3.14, "learning_rate": 3.406822848105291e-05, "loss": 0.2518, "step": 5234000 }, { "epoch": 3.14, "learning_rate": 3.4066132715423465e-05, "loss": 0.256, "step": 5234500 }, { "epoch": 3.14, "learning_rate": 3.40640327498629e-05, "loss": 0.247, "step": 5235000 }, { "epoch": 3.14, "learning_rate": 3.406193278430234e-05, "loss": 0.2606, "step": 5235500 }, { "epoch": 3.14, "learning_rate": 3.405983701867289e-05, "loss": 0.2537, "step": 5236000 }, { "epoch": 3.14, "learning_rate": 3.4057737053112326e-05, "loss": 0.2579, "step": 5236500 }, { "epoch": 3.14, "learning_rate": 3.4055637087551766e-05, "loss": 0.2557, "step": 5237000 }, { "epoch": 3.14, "learning_rate": 3.40535371219912e-05, "loss": 0.2572, "step": 5237500 }, { "epoch": 3.14, "learning_rate": 3.405143715643063e-05, "loss": 0.249, "step": 5238000 }, { "epoch": 3.14, "learning_rate": 3.4049341390801186e-05, "loss": 0.2525, "step": 5238500 }, { "epoch": 3.14, "learning_rate": 3.4047241425240627e-05, "loss": 0.2527, "step": 5239000 }, { "epoch": 3.14, "learning_rate": 3.404514145968006e-05, "loss": 0.256, "step": 5239500 }, { "epoch": 3.14, "learning_rate": 3.4043045694050614e-05, "loss": 0.2518, "step": 5240000 }, { "epoch": 3.14, "learning_rate": 3.404094572849005e-05, "loss": 0.2595, "step": 5240500 }, { "epoch": 3.14, "learning_rate": 3.403884576292949e-05, "loss": 0.2569, "step": 5241000 }, { "epoch": 3.14, "learning_rate": 3.403674579736892e-05, "loss": 0.2536, "step": 5241500 }, { "epoch": 3.14, "learning_rate": 3.4034645831808354e-05, "loss": 0.2503, "step": 5242000 }, { "epoch": 3.14, "learning_rate": 3.4032545866247794e-05, "loss": 0.2591, "step": 5242500 }, { "epoch": 3.14, "learning_rate": 3.403044590068723e-05, "loss": 0.2567, "step": 5243000 }, { "epoch": 3.14, "learning_rate": 3.402834593512666e-05, "loss": 0.2546, "step": 5243500 }, { "epoch": 3.14, "learning_rate": 3.402625016949722e-05, "loss": 0.2556, "step": 5244000 }, { "epoch": 3.14, "learning_rate": 3.4024150203936655e-05, "loss": 0.2542, "step": 5244500 }, { "epoch": 3.14, "learning_rate": 3.402205023837609e-05, "loss": 0.2514, "step": 5245000 }, { "epoch": 3.14, "learning_rate": 3.401995027281553e-05, "loss": 0.255, "step": 5245500 }, { "epoch": 3.15, "learning_rate": 3.401785030725496e-05, "loss": 0.2479, "step": 5246000 }, { "epoch": 3.15, "learning_rate": 3.4015750341694396e-05, "loss": 0.2508, "step": 5246500 }, { "epoch": 3.15, "learning_rate": 3.4013650376133836e-05, "loss": 0.2485, "step": 5247000 }, { "epoch": 3.15, "learning_rate": 3.401155041057327e-05, "loss": 0.2533, "step": 5247500 }, { "epoch": 3.15, "learning_rate": 3.400945464494382e-05, "loss": 0.2579, "step": 5248000 }, { "epoch": 3.15, "learning_rate": 3.4007358879314377e-05, "loss": 0.2598, "step": 5248500 }, { "epoch": 3.15, "learning_rate": 3.400525891375381e-05, "loss": 0.2464, "step": 5249000 }, { "epoch": 3.15, "learning_rate": 3.400315894819325e-05, "loss": 0.2529, "step": 5249500 }, { "epoch": 3.15, "learning_rate": 3.4001058982632684e-05, "loss": 0.2486, "step": 5250000 }, { "epoch": 3.15, "learning_rate": 3.399895901707212e-05, "loss": 0.2605, "step": 5250500 }, { "epoch": 3.15, "learning_rate": 3.399685905151156e-05, "loss": 0.2558, "step": 5251000 }, { "epoch": 3.15, "learning_rate": 3.399475908595099e-05, "loss": 0.2496, "step": 5251500 }, { "epoch": 3.15, "learning_rate": 3.3992659120390424e-05, "loss": 0.2558, "step": 5252000 }, { "epoch": 3.15, "learning_rate": 3.3990563354760985e-05, "loss": 0.2571, "step": 5252500 }, { "epoch": 3.15, "learning_rate": 3.398846338920042e-05, "loss": 0.2562, "step": 5253000 }, { "epoch": 3.15, "learning_rate": 3.398636342363985e-05, "loss": 0.2596, "step": 5253500 }, { "epoch": 3.15, "learning_rate": 3.398426345807929e-05, "loss": 0.2516, "step": 5254000 }, { "epoch": 3.15, "learning_rate": 3.3982163492518725e-05, "loss": 0.252, "step": 5254500 }, { "epoch": 3.15, "learning_rate": 3.398006772688928e-05, "loss": 0.2546, "step": 5255000 }, { "epoch": 3.15, "learning_rate": 3.397796776132871e-05, "loss": 0.254, "step": 5255500 }, { "epoch": 3.15, "learning_rate": 3.397586779576815e-05, "loss": 0.2551, "step": 5256000 }, { "epoch": 3.15, "learning_rate": 3.3973767830207586e-05, "loss": 0.2544, "step": 5256500 }, { "epoch": 3.15, "learning_rate": 3.397167206457814e-05, "loss": 0.2501, "step": 5257000 }, { "epoch": 3.15, "learning_rate": 3.396957209901757e-05, "loss": 0.2532, "step": 5257500 }, { "epoch": 3.15, "learning_rate": 3.396747213345701e-05, "loss": 0.2499, "step": 5258000 }, { "epoch": 3.15, "learning_rate": 3.396537216789645e-05, "loss": 0.257, "step": 5258500 }, { "epoch": 3.15, "learning_rate": 3.3963276402267e-05, "loss": 0.2563, "step": 5259000 }, { "epoch": 3.15, "learning_rate": 3.3961180636637554e-05, "loss": 0.2565, "step": 5259500 }, { "epoch": 3.15, "learning_rate": 3.3959080671076994e-05, "loss": 0.2536, "step": 5260000 }, { "epoch": 3.15, "learning_rate": 3.395698070551643e-05, "loss": 0.2561, "step": 5260500 }, { "epoch": 3.15, "learning_rate": 3.395488073995586e-05, "loss": 0.2572, "step": 5261000 }, { "epoch": 3.15, "learning_rate": 3.39527807743953e-05, "loss": 0.2549, "step": 5261500 }, { "epoch": 3.15, "learning_rate": 3.3950680808834735e-05, "loss": 0.2533, "step": 5262000 }, { "epoch": 3.16, "learning_rate": 3.394858084327417e-05, "loss": 0.2565, "step": 5262500 }, { "epoch": 3.16, "learning_rate": 3.394648087771361e-05, "loss": 0.2597, "step": 5263000 }, { "epoch": 3.16, "learning_rate": 3.394438511208416e-05, "loss": 0.2579, "step": 5263500 }, { "epoch": 3.16, "learning_rate": 3.3942285146523595e-05, "loss": 0.2614, "step": 5264000 }, { "epoch": 3.16, "learning_rate": 3.394018518096303e-05, "loss": 0.2583, "step": 5264500 }, { "epoch": 3.16, "learning_rate": 3.393808521540247e-05, "loss": 0.2529, "step": 5265000 }, { "epoch": 3.16, "learning_rate": 3.39359852498419e-05, "loss": 0.2567, "step": 5265500 }, { "epoch": 3.16, "learning_rate": 3.3933885284281336e-05, "loss": 0.261, "step": 5266000 }, { "epoch": 3.16, "learning_rate": 3.3931785318720776e-05, "loss": 0.2516, "step": 5266500 }, { "epoch": 3.16, "learning_rate": 3.39296853531602e-05, "loss": 0.2529, "step": 5267000 }, { "epoch": 3.16, "learning_rate": 3.392758958753076e-05, "loss": 0.2516, "step": 5267500 }, { "epoch": 3.16, "learning_rate": 3.3925489621970204e-05, "loss": 0.2542, "step": 5268000 }, { "epoch": 3.16, "learning_rate": 3.392338965640964e-05, "loss": 0.2556, "step": 5268500 }, { "epoch": 3.16, "learning_rate": 3.392128969084907e-05, "loss": 0.2503, "step": 5269000 }, { "epoch": 3.16, "learning_rate": 3.3919193925219624e-05, "loss": 0.2584, "step": 5269500 }, { "epoch": 3.16, "learning_rate": 3.3917093959659064e-05, "loss": 0.2513, "step": 5270000 }, { "epoch": 3.16, "learning_rate": 3.39149939940985e-05, "loss": 0.2567, "step": 5270500 }, { "epoch": 3.16, "learning_rate": 3.391289822846905e-05, "loss": 0.2631, "step": 5271000 }, { "epoch": 3.16, "learning_rate": 3.3910798262908485e-05, "loss": 0.2557, "step": 5271500 }, { "epoch": 3.16, "learning_rate": 3.3908698297347925e-05, "loss": 0.2569, "step": 5272000 }, { "epoch": 3.16, "learning_rate": 3.390659833178736e-05, "loss": 0.2568, "step": 5272500 }, { "epoch": 3.16, "learning_rate": 3.390449836622679e-05, "loss": 0.2523, "step": 5273000 }, { "epoch": 3.16, "learning_rate": 3.390239840066623e-05, "loss": 0.2515, "step": 5273500 }, { "epoch": 3.16, "learning_rate": 3.3900298435105666e-05, "loss": 0.2522, "step": 5274000 }, { "epoch": 3.16, "learning_rate": 3.38981984695451e-05, "loss": 0.2531, "step": 5274500 }, { "epoch": 3.16, "learning_rate": 3.389610270391566e-05, "loss": 0.2605, "step": 5275000 }, { "epoch": 3.16, "learning_rate": 3.389400273835509e-05, "loss": 0.2587, "step": 5275500 }, { "epoch": 3.16, "learning_rate": 3.3891902772794526e-05, "loss": 0.2596, "step": 5276000 }, { "epoch": 3.16, "learning_rate": 3.388980280723396e-05, "loss": 0.2485, "step": 5276500 }, { "epoch": 3.16, "learning_rate": 3.388770284167339e-05, "loss": 0.2592, "step": 5277000 }, { "epoch": 3.16, "learning_rate": 3.3885611275975074e-05, "loss": 0.2545, "step": 5277500 }, { "epoch": 3.16, "learning_rate": 3.388351131041451e-05, "loss": 0.2507, "step": 5278000 }, { "epoch": 3.16, "learning_rate": 3.388141134485394e-05, "loss": 0.259, "step": 5278500 }, { "epoch": 3.16, "learning_rate": 3.387931137929338e-05, "loss": 0.2563, "step": 5279000 }, { "epoch": 3.17, "learning_rate": 3.3877211413732814e-05, "loss": 0.2576, "step": 5279500 }, { "epoch": 3.17, "learning_rate": 3.387511144817225e-05, "loss": 0.2471, "step": 5280000 }, { "epoch": 3.17, "learning_rate": 3.387301148261169e-05, "loss": 0.258, "step": 5280500 }, { "epoch": 3.17, "learning_rate": 3.387091151705112e-05, "loss": 0.2527, "step": 5281000 }, { "epoch": 3.17, "learning_rate": 3.3868815751421675e-05, "loss": 0.2552, "step": 5281500 }, { "epoch": 3.17, "learning_rate": 3.3866715785861115e-05, "loss": 0.2552, "step": 5282000 }, { "epoch": 3.17, "learning_rate": 3.386461582030055e-05, "loss": 0.2535, "step": 5282500 }, { "epoch": 3.17, "learning_rate": 3.386251585473998e-05, "loss": 0.2545, "step": 5283000 }, { "epoch": 3.17, "learning_rate": 3.386041588917942e-05, "loss": 0.2551, "step": 5283500 }, { "epoch": 3.17, "learning_rate": 3.3858320123549976e-05, "loss": 0.2533, "step": 5284000 }, { "epoch": 3.17, "learning_rate": 3.385622015798941e-05, "loss": 0.255, "step": 5284500 }, { "epoch": 3.17, "learning_rate": 3.385412019242884e-05, "loss": 0.2553, "step": 5285000 }, { "epoch": 3.17, "learning_rate": 3.385202022686828e-05, "loss": 0.2562, "step": 5285500 }, { "epoch": 3.17, "learning_rate": 3.384992446123884e-05, "loss": 0.2574, "step": 5286000 }, { "epoch": 3.17, "learning_rate": 3.384782449567827e-05, "loss": 0.2525, "step": 5286500 }, { "epoch": 3.17, "learning_rate": 3.384572453011771e-05, "loss": 0.2488, "step": 5287000 }, { "epoch": 3.17, "learning_rate": 3.3843624564557144e-05, "loss": 0.2606, "step": 5287500 }, { "epoch": 3.17, "learning_rate": 3.38415287989277e-05, "loss": 0.2558, "step": 5288000 }, { "epoch": 3.17, "learning_rate": 3.383942883336713e-05, "loss": 0.257, "step": 5288500 }, { "epoch": 3.17, "learning_rate": 3.383732886780657e-05, "loss": 0.2513, "step": 5289000 }, { "epoch": 3.17, "learning_rate": 3.3835228902246005e-05, "loss": 0.2567, "step": 5289500 }, { "epoch": 3.17, "learning_rate": 3.383313313661656e-05, "loss": 0.2552, "step": 5290000 }, { "epoch": 3.17, "learning_rate": 3.383103737098711e-05, "loss": 0.2554, "step": 5290500 }, { "epoch": 3.17, "learning_rate": 3.3828937405426545e-05, "loss": 0.2557, "step": 5291000 }, { "epoch": 3.17, "learning_rate": 3.3826837439865986e-05, "loss": 0.2614, "step": 5291500 }, { "epoch": 3.17, "learning_rate": 3.382473747430542e-05, "loss": 0.2547, "step": 5292000 }, { "epoch": 3.17, "learning_rate": 3.382263750874485e-05, "loss": 0.2513, "step": 5292500 }, { "epoch": 3.17, "learning_rate": 3.382053754318429e-05, "loss": 0.2552, "step": 5293000 }, { "epoch": 3.17, "learning_rate": 3.3818437577623726e-05, "loss": 0.2542, "step": 5293500 }, { "epoch": 3.17, "learning_rate": 3.3816337612063166e-05, "loss": 0.2522, "step": 5294000 }, { "epoch": 3.17, "learning_rate": 3.381424184643372e-05, "loss": 0.2501, "step": 5294500 }, { "epoch": 3.17, "learning_rate": 3.3812141880873153e-05, "loss": 0.2577, "step": 5295000 }, { "epoch": 3.17, "learning_rate": 3.381004191531259e-05, "loss": 0.2546, "step": 5295500 }, { "epoch": 3.18, "learning_rate": 3.380794194975203e-05, "loss": 0.2575, "step": 5296000 }, { "epoch": 3.18, "learning_rate": 3.380584618412258e-05, "loss": 0.2498, "step": 5296500 }, { "epoch": 3.18, "learning_rate": 3.3803746218562014e-05, "loss": 0.2517, "step": 5297000 }, { "epoch": 3.18, "learning_rate": 3.380164625300145e-05, "loss": 0.2513, "step": 5297500 }, { "epoch": 3.18, "learning_rate": 3.379954628744089e-05, "loss": 0.2496, "step": 5298000 }, { "epoch": 3.18, "learning_rate": 3.379745052181144e-05, "loss": 0.2512, "step": 5298500 }, { "epoch": 3.18, "learning_rate": 3.3795350556250875e-05, "loss": 0.2527, "step": 5299000 }, { "epoch": 3.18, "learning_rate": 3.379325059069031e-05, "loss": 0.2517, "step": 5299500 }, { "epoch": 3.18, "learning_rate": 3.379115062512975e-05, "loss": 0.2518, "step": 5300000 }, { "epoch": 3.18, "eval_loss": 0.23735223710536957, "eval_runtime": 1454.4863, "eval_samples_per_second": 362.135, "eval_steps_per_second": 60.356, "step": 5300000 }, { "epoch": 3.18, "learning_rate": 3.37890548595003e-05, "loss": 0.2539, "step": 5300500 }, { "epoch": 3.18, "learning_rate": 3.378695909387086e-05, "loss": 0.2582, "step": 5301000 }, { "epoch": 3.18, "learning_rate": 3.3784859128310296e-05, "loss": 0.2483, "step": 5301500 }, { "epoch": 3.18, "learning_rate": 3.378275916274973e-05, "loss": 0.2557, "step": 5302000 }, { "epoch": 3.18, "learning_rate": 3.378065919718916e-05, "loss": 0.2657, "step": 5302500 }, { "epoch": 3.18, "learning_rate": 3.3778559231628596e-05, "loss": 0.2534, "step": 5303000 }, { "epoch": 3.18, "learning_rate": 3.3776459266068037e-05, "loss": 0.2534, "step": 5303500 }, { "epoch": 3.18, "learning_rate": 3.377435930050747e-05, "loss": 0.2567, "step": 5304000 }, { "epoch": 3.18, "learning_rate": 3.3772259334946903e-05, "loss": 0.2536, "step": 5304500 }, { "epoch": 3.18, "learning_rate": 3.377016356931746e-05, "loss": 0.2561, "step": 5305000 }, { "epoch": 3.18, "learning_rate": 3.37680636037569e-05, "loss": 0.2483, "step": 5305500 }, { "epoch": 3.18, "learning_rate": 3.376596363819633e-05, "loss": 0.2532, "step": 5306000 }, { "epoch": 3.18, "learning_rate": 3.376386787256689e-05, "loss": 0.2497, "step": 5306500 }, { "epoch": 3.18, "learning_rate": 3.3761767907006325e-05, "loss": 0.253, "step": 5307000 }, { "epoch": 3.18, "learning_rate": 3.375966794144576e-05, "loss": 0.2532, "step": 5307500 }, { "epoch": 3.18, "learning_rate": 3.375757217581632e-05, "loss": 0.2546, "step": 5308000 }, { "epoch": 3.18, "learning_rate": 3.375547221025575e-05, "loss": 0.254, "step": 5308500 }, { "epoch": 3.18, "learning_rate": 3.375337224469519e-05, "loss": 0.2535, "step": 5309000 }, { "epoch": 3.18, "learning_rate": 3.375127227913462e-05, "loss": 0.2502, "step": 5309500 }, { "epoch": 3.18, "learning_rate": 3.374917231357405e-05, "loss": 0.2585, "step": 5310000 }, { "epoch": 3.18, "learning_rate": 3.374707234801349e-05, "loss": 0.253, "step": 5310500 }, { "epoch": 3.18, "learning_rate": 3.3744972382452926e-05, "loss": 0.2536, "step": 5311000 }, { "epoch": 3.18, "learning_rate": 3.374287241689236e-05, "loss": 0.253, "step": 5311500 }, { "epoch": 3.18, "learning_rate": 3.37407724513318e-05, "loss": 0.2562, "step": 5312000 }, { "epoch": 3.19, "learning_rate": 3.373867248577123e-05, "loss": 0.2557, "step": 5312500 }, { "epoch": 3.19, "learning_rate": 3.3736572520210667e-05, "loss": 0.2626, "step": 5313000 }, { "epoch": 3.19, "learning_rate": 3.373447255465011e-05, "loss": 0.2634, "step": 5313500 }, { "epoch": 3.19, "learning_rate": 3.373237678902066e-05, "loss": 0.2541, "step": 5314000 }, { "epoch": 3.19, "learning_rate": 3.3730276823460094e-05, "loss": 0.2522, "step": 5314500 }, { "epoch": 3.19, "learning_rate": 3.3728176857899534e-05, "loss": 0.256, "step": 5315000 }, { "epoch": 3.19, "learning_rate": 3.372607689233897e-05, "loss": 0.2526, "step": 5315500 }, { "epoch": 3.19, "learning_rate": 3.372398112670952e-05, "loss": 0.25, "step": 5316000 }, { "epoch": 3.19, "learning_rate": 3.3721885361080075e-05, "loss": 0.2571, "step": 5316500 }, { "epoch": 3.19, "learning_rate": 3.371978539551951e-05, "loss": 0.2569, "step": 5317000 }, { "epoch": 3.19, "learning_rate": 3.371768542995895e-05, "loss": 0.2534, "step": 5317500 }, { "epoch": 3.19, "learning_rate": 3.371558546439838e-05, "loss": 0.2603, "step": 5318000 }, { "epoch": 3.19, "learning_rate": 3.3713485498837815e-05, "loss": 0.2516, "step": 5318500 }, { "epoch": 3.19, "learning_rate": 3.3711385533277255e-05, "loss": 0.2544, "step": 5319000 }, { "epoch": 3.19, "learning_rate": 3.370928556771669e-05, "loss": 0.2526, "step": 5319500 }, { "epoch": 3.19, "learning_rate": 3.370718560215612e-05, "loss": 0.2536, "step": 5320000 }, { "epoch": 3.19, "learning_rate": 3.370508983652668e-05, "loss": 0.2541, "step": 5320500 }, { "epoch": 3.19, "learning_rate": 3.3702989870966116e-05, "loss": 0.2553, "step": 5321000 }, { "epoch": 3.19, "learning_rate": 3.370088990540555e-05, "loss": 0.2522, "step": 5321500 }, { "epoch": 3.19, "learning_rate": 3.369878993984499e-05, "loss": 0.252, "step": 5322000 }, { "epoch": 3.19, "learning_rate": 3.3696694174215543e-05, "loss": 0.2552, "step": 5322500 }, { "epoch": 3.19, "learning_rate": 3.369459420865498e-05, "loss": 0.2562, "step": 5323000 }, { "epoch": 3.19, "learning_rate": 3.369249424309441e-05, "loss": 0.256, "step": 5323500 }, { "epoch": 3.19, "learning_rate": 3.369039427753385e-05, "loss": 0.2539, "step": 5324000 }, { "epoch": 3.19, "learning_rate": 3.3688294311973284e-05, "loss": 0.2554, "step": 5324500 }, { "epoch": 3.19, "learning_rate": 3.368619854634384e-05, "loss": 0.2549, "step": 5325000 }, { "epoch": 3.19, "learning_rate": 3.36841027807144e-05, "loss": 0.2595, "step": 5325500 }, { "epoch": 3.19, "learning_rate": 3.368200281515383e-05, "loss": 0.2544, "step": 5326000 }, { "epoch": 3.19, "learning_rate": 3.3679902849593265e-05, "loss": 0.2471, "step": 5326500 }, { "epoch": 3.19, "learning_rate": 3.36778028840327e-05, "loss": 0.2548, "step": 5327000 }, { "epoch": 3.19, "learning_rate": 3.367570291847214e-05, "loss": 0.2553, "step": 5327500 }, { "epoch": 3.19, "learning_rate": 3.367360295291157e-05, "loss": 0.2529, "step": 5328000 }, { "epoch": 3.19, "learning_rate": 3.3671502987351006e-05, "loss": 0.2522, "step": 5328500 }, { "epoch": 3.19, "learning_rate": 3.3669403021790446e-05, "loss": 0.2502, "step": 5329000 }, { "epoch": 3.2, "learning_rate": 3.3667307256161e-05, "loss": 0.2517, "step": 5329500 }, { "epoch": 3.2, "learning_rate": 3.366521149053156e-05, "loss": 0.2532, "step": 5330000 }, { "epoch": 3.2, "learning_rate": 3.366311152497099e-05, "loss": 0.2507, "step": 5330500 }, { "epoch": 3.2, "learning_rate": 3.366101155941042e-05, "loss": 0.255, "step": 5331000 }, { "epoch": 3.2, "learning_rate": 3.365891159384986e-05, "loss": 0.2541, "step": 5331500 }, { "epoch": 3.2, "learning_rate": 3.3656811628289294e-05, "loss": 0.2571, "step": 5332000 }, { "epoch": 3.2, "learning_rate": 3.365471166272873e-05, "loss": 0.2555, "step": 5332500 }, { "epoch": 3.2, "learning_rate": 3.365261169716817e-05, "loss": 0.2614, "step": 5333000 }, { "epoch": 3.2, "learning_rate": 3.36505117316076e-05, "loss": 0.2566, "step": 5333500 }, { "epoch": 3.2, "learning_rate": 3.3648415965978154e-05, "loss": 0.2596, "step": 5334000 }, { "epoch": 3.2, "learning_rate": 3.3646316000417595e-05, "loss": 0.2603, "step": 5334500 }, { "epoch": 3.2, "learning_rate": 3.364421603485703e-05, "loss": 0.2544, "step": 5335000 }, { "epoch": 3.2, "learning_rate": 3.364211606929646e-05, "loss": 0.2552, "step": 5335500 }, { "epoch": 3.2, "learning_rate": 3.3640020303667015e-05, "loss": 0.2539, "step": 5336000 }, { "epoch": 3.2, "learning_rate": 3.3637920338106455e-05, "loss": 0.2524, "step": 5336500 }, { "epoch": 3.2, "learning_rate": 3.3635824572477016e-05, "loss": 0.2504, "step": 5337000 }, { "epoch": 3.2, "learning_rate": 3.363372460691645e-05, "loss": 0.2547, "step": 5337500 }, { "epoch": 3.2, "learning_rate": 3.3631624641355876e-05, "loss": 0.2549, "step": 5338000 }, { "epoch": 3.2, "learning_rate": 3.3629524675795316e-05, "loss": 0.2499, "step": 5338500 }, { "epoch": 3.2, "learning_rate": 3.362742471023475e-05, "loss": 0.2497, "step": 5339000 }, { "epoch": 3.2, "learning_rate": 3.362532474467418e-05, "loss": 0.255, "step": 5339500 }, { "epoch": 3.2, "learning_rate": 3.362322477911362e-05, "loss": 0.2553, "step": 5340000 }, { "epoch": 3.2, "learning_rate": 3.3621124813553057e-05, "loss": 0.2584, "step": 5340500 }, { "epoch": 3.2, "learning_rate": 3.361902484799249e-05, "loss": 0.2539, "step": 5341000 }, { "epoch": 3.2, "learning_rate": 3.361692908236305e-05, "loss": 0.2544, "step": 5341500 }, { "epoch": 3.2, "learning_rate": 3.3614829116802484e-05, "loss": 0.2509, "step": 5342000 }, { "epoch": 3.2, "learning_rate": 3.361272915124192e-05, "loss": 0.2552, "step": 5342500 }, { "epoch": 3.2, "learning_rate": 3.361062918568136e-05, "loss": 0.2626, "step": 5343000 }, { "epoch": 3.2, "learning_rate": 3.360853342005191e-05, "loss": 0.256, "step": 5343500 }, { "epoch": 3.2, "learning_rate": 3.3606433454491345e-05, "loss": 0.249, "step": 5344000 }, { "epoch": 3.2, "learning_rate": 3.360433348893078e-05, "loss": 0.2506, "step": 5344500 }, { "epoch": 3.2, "learning_rate": 3.360223772330133e-05, "loss": 0.2565, "step": 5345000 }, { "epoch": 3.2, "learning_rate": 3.360013775774077e-05, "loss": 0.2474, "step": 5345500 }, { "epoch": 3.21, "learning_rate": 3.3598037792180205e-05, "loss": 0.258, "step": 5346000 }, { "epoch": 3.21, "learning_rate": 3.359593782661964e-05, "loss": 0.258, "step": 5346500 }, { "epoch": 3.21, "learning_rate": 3.359383786105908e-05, "loss": 0.2536, "step": 5347000 }, { "epoch": 3.21, "learning_rate": 3.359174209542963e-05, "loss": 0.2545, "step": 5347500 }, { "epoch": 3.21, "learning_rate": 3.3589642129869066e-05, "loss": 0.2492, "step": 5348000 }, { "epoch": 3.21, "learning_rate": 3.3587542164308506e-05, "loss": 0.2528, "step": 5348500 }, { "epoch": 3.21, "learning_rate": 3.358544219874794e-05, "loss": 0.253, "step": 5349000 }, { "epoch": 3.21, "learning_rate": 3.358334223318737e-05, "loss": 0.251, "step": 5349500 }, { "epoch": 3.21, "learning_rate": 3.3581242267626813e-05, "loss": 0.2609, "step": 5350000 }, { "epoch": 3.21, "learning_rate": 3.357914230206625e-05, "loss": 0.257, "step": 5350500 }, { "epoch": 3.21, "learning_rate": 3.357704233650568e-05, "loss": 0.2572, "step": 5351000 }, { "epoch": 3.21, "learning_rate": 3.3574946570876234e-05, "loss": 0.2498, "step": 5351500 }, { "epoch": 3.21, "learning_rate": 3.3572846605315674e-05, "loss": 0.2536, "step": 5352000 }, { "epoch": 3.21, "learning_rate": 3.357074663975511e-05, "loss": 0.2572, "step": 5352500 }, { "epoch": 3.21, "learning_rate": 3.356864667419454e-05, "loss": 0.2532, "step": 5353000 }, { "epoch": 3.21, "learning_rate": 3.356654670863398e-05, "loss": 0.2563, "step": 5353500 }, { "epoch": 3.21, "learning_rate": 3.3564450943004535e-05, "loss": 0.2546, "step": 5354000 }, { "epoch": 3.21, "learning_rate": 3.356235097744397e-05, "loss": 0.2553, "step": 5354500 }, { "epoch": 3.21, "learning_rate": 3.35602510118834e-05, "loss": 0.2567, "step": 5355000 }, { "epoch": 3.21, "learning_rate": 3.355815104632284e-05, "loss": 0.2548, "step": 5355500 }, { "epoch": 3.21, "learning_rate": 3.3556055280693396e-05, "loss": 0.2599, "step": 5356000 }, { "epoch": 3.21, "learning_rate": 3.355395531513283e-05, "loss": 0.2524, "step": 5356500 }, { "epoch": 3.21, "learning_rate": 3.355185534957227e-05, "loss": 0.2534, "step": 5357000 }, { "epoch": 3.21, "learning_rate": 3.35497553840117e-05, "loss": 0.2585, "step": 5357500 }, { "epoch": 3.21, "learning_rate": 3.3547655418451136e-05, "loss": 0.2561, "step": 5358000 }, { "epoch": 3.21, "learning_rate": 3.3545555452890576e-05, "loss": 0.2523, "step": 5358500 }, { "epoch": 3.21, "learning_rate": 3.354345548733001e-05, "loss": 0.2534, "step": 5359000 }, { "epoch": 3.21, "learning_rate": 3.354135552176944e-05, "loss": 0.2536, "step": 5359500 }, { "epoch": 3.21, "learning_rate": 3.353925975614e-05, "loss": 0.2562, "step": 5360000 }, { "epoch": 3.21, "learning_rate": 3.353715979057944e-05, "loss": 0.2541, "step": 5360500 }, { "epoch": 3.21, "learning_rate": 3.353505982501887e-05, "loss": 0.2548, "step": 5361000 }, { "epoch": 3.21, "learning_rate": 3.3532959859458304e-05, "loss": 0.2524, "step": 5361500 }, { "epoch": 3.21, "learning_rate": 3.353086409382886e-05, "loss": 0.2531, "step": 5362000 }, { "epoch": 3.22, "learning_rate": 3.35287641282683e-05, "loss": 0.2537, "step": 5362500 }, { "epoch": 3.22, "learning_rate": 3.352666416270773e-05, "loss": 0.2538, "step": 5363000 }, { "epoch": 3.22, "learning_rate": 3.3524564197147165e-05, "loss": 0.2563, "step": 5363500 }, { "epoch": 3.22, "learning_rate": 3.3522468431517725e-05, "loss": 0.2511, "step": 5364000 }, { "epoch": 3.22, "learning_rate": 3.352036846595716e-05, "loss": 0.2696, "step": 5364500 }, { "epoch": 3.22, "learning_rate": 3.351826850039659e-05, "loss": 0.2546, "step": 5365000 }, { "epoch": 3.22, "learning_rate": 3.351616853483603e-05, "loss": 0.2555, "step": 5365500 }, { "epoch": 3.22, "learning_rate": 3.3514072769206586e-05, "loss": 0.2568, "step": 5366000 }, { "epoch": 3.22, "learning_rate": 3.351197280364602e-05, "loss": 0.2541, "step": 5366500 }, { "epoch": 3.22, "learning_rate": 3.350987283808545e-05, "loss": 0.2531, "step": 5367000 }, { "epoch": 3.22, "learning_rate": 3.350777287252489e-05, "loss": 0.247, "step": 5367500 }, { "epoch": 3.22, "learning_rate": 3.3505672906964327e-05, "loss": 0.2468, "step": 5368000 }, { "epoch": 3.22, "learning_rate": 3.350357714133488e-05, "loss": 0.2594, "step": 5368500 }, { "epoch": 3.22, "learning_rate": 3.3501477175774314e-05, "loss": 0.2515, "step": 5369000 }, { "epoch": 3.22, "learning_rate": 3.3499377210213754e-05, "loss": 0.252, "step": 5369500 }, { "epoch": 3.22, "learning_rate": 3.349727724465319e-05, "loss": 0.2547, "step": 5370000 }, { "epoch": 3.22, "learning_rate": 3.349517727909262e-05, "loss": 0.2573, "step": 5370500 }, { "epoch": 3.22, "learning_rate": 3.349307731353206e-05, "loss": 0.2474, "step": 5371000 }, { "epoch": 3.22, "learning_rate": 3.3490977347971494e-05, "loss": 0.2502, "step": 5371500 }, { "epoch": 3.22, "learning_rate": 3.348887738241093e-05, "loss": 0.255, "step": 5372000 }, { "epoch": 3.22, "learning_rate": 3.348678161678149e-05, "loss": 0.2567, "step": 5372500 }, { "epoch": 3.22, "learning_rate": 3.348468165122092e-05, "loss": 0.2514, "step": 5373000 }, { "epoch": 3.22, "learning_rate": 3.3482581685660355e-05, "loss": 0.2573, "step": 5373500 }, { "epoch": 3.22, "learning_rate": 3.3480481720099795e-05, "loss": 0.2538, "step": 5374000 }, { "epoch": 3.22, "learning_rate": 3.347838595447035e-05, "loss": 0.2546, "step": 5374500 }, { "epoch": 3.22, "learning_rate": 3.347628598890978e-05, "loss": 0.2518, "step": 5375000 }, { "epoch": 3.22, "learning_rate": 3.3474186023349216e-05, "loss": 0.2577, "step": 5375500 }, { "epoch": 3.22, "learning_rate": 3.3472086057788656e-05, "loss": 0.248, "step": 5376000 }, { "epoch": 3.22, "learning_rate": 3.346999029215921e-05, "loss": 0.2518, "step": 5376500 }, { "epoch": 3.22, "learning_rate": 3.346789032659864e-05, "loss": 0.2563, "step": 5377000 }, { "epoch": 3.22, "learning_rate": 3.3465790361038077e-05, "loss": 0.2553, "step": 5377500 }, { "epoch": 3.22, "learning_rate": 3.346369039547752e-05, "loss": 0.2573, "step": 5378000 }, { "epoch": 3.22, "learning_rate": 3.346159042991695e-05, "loss": 0.2474, "step": 5378500 }, { "epoch": 3.22, "learning_rate": 3.3459490464356384e-05, "loss": 0.2536, "step": 5379000 }, { "epoch": 3.23, "learning_rate": 3.345739049879582e-05, "loss": 0.2524, "step": 5379500 }, { "epoch": 3.23, "learning_rate": 3.345529053323525e-05, "loss": 0.2513, "step": 5380000 }, { "epoch": 3.23, "learning_rate": 3.345319476760581e-05, "loss": 0.2572, "step": 5380500 }, { "epoch": 3.23, "learning_rate": 3.345109480204525e-05, "loss": 0.2527, "step": 5381000 }, { "epoch": 3.23, "learning_rate": 3.344899483648468e-05, "loss": 0.2523, "step": 5381500 }, { "epoch": 3.23, "learning_rate": 3.344690327078636e-05, "loss": 0.2573, "step": 5382000 }, { "epoch": 3.23, "learning_rate": 3.344480330522579e-05, "loss": 0.2503, "step": 5382500 }, { "epoch": 3.23, "learning_rate": 3.3442703339665225e-05, "loss": 0.2452, "step": 5383000 }, { "epoch": 3.23, "learning_rate": 3.3440603374104666e-05, "loss": 0.2564, "step": 5383500 }, { "epoch": 3.23, "learning_rate": 3.34385034085441e-05, "loss": 0.2532, "step": 5384000 }, { "epoch": 3.23, "learning_rate": 3.343640344298354e-05, "loss": 0.2542, "step": 5384500 }, { "epoch": 3.23, "learning_rate": 3.343430347742297e-05, "loss": 0.2495, "step": 5385000 }, { "epoch": 3.23, "learning_rate": 3.3432203511862406e-05, "loss": 0.2538, "step": 5385500 }, { "epoch": 3.23, "learning_rate": 3.3430103546301846e-05, "loss": 0.257, "step": 5386000 }, { "epoch": 3.23, "learning_rate": 3.342800358074127e-05, "loss": 0.2493, "step": 5386500 }, { "epoch": 3.23, "learning_rate": 3.3425903615180707e-05, "loss": 0.2542, "step": 5387000 }, { "epoch": 3.23, "learning_rate": 3.342380364962015e-05, "loss": 0.2558, "step": 5387500 }, { "epoch": 3.23, "learning_rate": 3.342170368405958e-05, "loss": 0.2514, "step": 5388000 }, { "epoch": 3.23, "learning_rate": 3.3419607918430134e-05, "loss": 0.258, "step": 5388500 }, { "epoch": 3.23, "learning_rate": 3.341750795286957e-05, "loss": 0.2553, "step": 5389000 }, { "epoch": 3.23, "learning_rate": 3.341541218724013e-05, "loss": 0.2483, "step": 5389500 }, { "epoch": 3.23, "learning_rate": 3.341331222167957e-05, "loss": 0.258, "step": 5390000 }, { "epoch": 3.23, "learning_rate": 3.3411212256119e-05, "loss": 0.2552, "step": 5390500 }, { "epoch": 3.23, "learning_rate": 3.3409112290558435e-05, "loss": 0.2581, "step": 5391000 }, { "epoch": 3.23, "learning_rate": 3.340701232499787e-05, "loss": 0.2564, "step": 5391500 }, { "epoch": 3.23, "learning_rate": 3.34049123594373e-05, "loss": 0.2524, "step": 5392000 }, { "epoch": 3.23, "learning_rate": 3.340281239387674e-05, "loss": 0.2579, "step": 5392500 }, { "epoch": 3.23, "learning_rate": 3.3400712428316175e-05, "loss": 0.2524, "step": 5393000 }, { "epoch": 3.23, "learning_rate": 3.339861666268673e-05, "loss": 0.2556, "step": 5393500 }, { "epoch": 3.23, "learning_rate": 3.339651669712616e-05, "loss": 0.2531, "step": 5394000 }, { "epoch": 3.23, "learning_rate": 3.33944167315656e-05, "loss": 0.2546, "step": 5394500 }, { "epoch": 3.23, "learning_rate": 3.3392316766005036e-05, "loss": 0.2492, "step": 5395000 }, { "epoch": 3.23, "learning_rate": 3.3390221000375596e-05, "loss": 0.2552, "step": 5395500 }, { "epoch": 3.24, "learning_rate": 3.338812103481502e-05, "loss": 0.2535, "step": 5396000 }, { "epoch": 3.24, "learning_rate": 3.338602106925446e-05, "loss": 0.2571, "step": 5396500 }, { "epoch": 3.24, "learning_rate": 3.3383925303625024e-05, "loss": 0.2509, "step": 5397000 }, { "epoch": 3.24, "learning_rate": 3.338182533806446e-05, "loss": 0.256, "step": 5397500 }, { "epoch": 3.24, "learning_rate": 3.337972537250389e-05, "loss": 0.2537, "step": 5398000 }, { "epoch": 3.24, "learning_rate": 3.3377625406943324e-05, "loss": 0.2589, "step": 5398500 }, { "epoch": 3.24, "learning_rate": 3.337552544138276e-05, "loss": 0.2517, "step": 5399000 }, { "epoch": 3.24, "learning_rate": 3.33734254758222e-05, "loss": 0.2654, "step": 5399500 }, { "epoch": 3.24, "learning_rate": 3.337132551026163e-05, "loss": 0.2548, "step": 5400000 }, { "epoch": 3.24, "eval_loss": 0.2361367791891098, "eval_runtime": 1453.7115, "eval_samples_per_second": 362.328, "eval_steps_per_second": 60.388, "step": 5400000 }, { "epoch": 3.24, "learning_rate": 3.3369225544701065e-05, "loss": 0.2508, "step": 5400500 }, { "epoch": 3.24, "learning_rate": 3.336712977907162e-05, "loss": 0.2485, "step": 5401000 }, { "epoch": 3.24, "learning_rate": 3.336502981351106e-05, "loss": 0.2525, "step": 5401500 }, { "epoch": 3.24, "learning_rate": 3.336292984795049e-05, "loss": 0.2617, "step": 5402000 }, { "epoch": 3.24, "learning_rate": 3.3360829882389925e-05, "loss": 0.2524, "step": 5402500 }, { "epoch": 3.24, "learning_rate": 3.335873411676048e-05, "loss": 0.2587, "step": 5403000 }, { "epoch": 3.24, "learning_rate": 3.335663415119992e-05, "loss": 0.2513, "step": 5403500 }, { "epoch": 3.24, "learning_rate": 3.335453838557048e-05, "loss": 0.249, "step": 5404000 }, { "epoch": 3.24, "learning_rate": 3.335243842000991e-05, "loss": 0.2568, "step": 5404500 }, { "epoch": 3.24, "learning_rate": 3.3350338454449347e-05, "loss": 0.254, "step": 5405000 }, { "epoch": 3.24, "learning_rate": 3.334823848888878e-05, "loss": 0.2526, "step": 5405500 }, { "epoch": 3.24, "learning_rate": 3.334614272325934e-05, "loss": 0.2501, "step": 5406000 }, { "epoch": 3.24, "learning_rate": 3.3344042757698774e-05, "loss": 0.2539, "step": 5406500 }, { "epoch": 3.24, "learning_rate": 3.3341942792138214e-05, "loss": 0.2545, "step": 5407000 }, { "epoch": 3.24, "learning_rate": 3.333984282657765e-05, "loss": 0.2534, "step": 5407500 }, { "epoch": 3.24, "learning_rate": 3.3337742861017074e-05, "loss": 0.2526, "step": 5408000 }, { "epoch": 3.24, "learning_rate": 3.3335642895456514e-05, "loss": 0.2499, "step": 5408500 }, { "epoch": 3.24, "learning_rate": 3.333354292989595e-05, "loss": 0.2523, "step": 5409000 }, { "epoch": 3.24, "learning_rate": 3.333144296433538e-05, "loss": 0.2559, "step": 5409500 }, { "epoch": 3.24, "learning_rate": 3.332934299877482e-05, "loss": 0.2485, "step": 5410000 }, { "epoch": 3.24, "learning_rate": 3.3327247233145375e-05, "loss": 0.2538, "step": 5410500 }, { "epoch": 3.24, "learning_rate": 3.3325151467515935e-05, "loss": 0.2542, "step": 5411000 }, { "epoch": 3.24, "learning_rate": 3.332305150195537e-05, "loss": 0.2598, "step": 5411500 }, { "epoch": 3.24, "learning_rate": 3.33209515363948e-05, "loss": 0.2528, "step": 5412000 }, { "epoch": 3.25, "learning_rate": 3.3318851570834236e-05, "loss": 0.2512, "step": 5412500 }, { "epoch": 3.25, "learning_rate": 3.331675160527367e-05, "loss": 0.2523, "step": 5413000 }, { "epoch": 3.25, "learning_rate": 3.331465583964423e-05, "loss": 0.2522, "step": 5413500 }, { "epoch": 3.25, "learning_rate": 3.331255587408367e-05, "loss": 0.2549, "step": 5414000 }, { "epoch": 3.25, "learning_rate": 3.33104559085231e-05, "loss": 0.2534, "step": 5414500 }, { "epoch": 3.25, "learning_rate": 3.330835594296253e-05, "loss": 0.2518, "step": 5415000 }, { "epoch": 3.25, "learning_rate": 3.330625597740197e-05, "loss": 0.2602, "step": 5415500 }, { "epoch": 3.25, "learning_rate": 3.3304156011841404e-05, "loss": 0.2483, "step": 5416000 }, { "epoch": 3.25, "learning_rate": 3.330205604628084e-05, "loss": 0.2539, "step": 5416500 }, { "epoch": 3.25, "learning_rate": 3.329995608072028e-05, "loss": 0.2467, "step": 5417000 }, { "epoch": 3.25, "learning_rate": 3.329786031509083e-05, "loss": 0.2529, "step": 5417500 }, { "epoch": 3.25, "learning_rate": 3.3295760349530264e-05, "loss": 0.2587, "step": 5418000 }, { "epoch": 3.25, "learning_rate": 3.3293664583900825e-05, "loss": 0.2592, "step": 5418500 }, { "epoch": 3.25, "learning_rate": 3.329156461834026e-05, "loss": 0.2531, "step": 5419000 }, { "epoch": 3.25, "learning_rate": 3.328946465277969e-05, "loss": 0.2554, "step": 5419500 }, { "epoch": 3.25, "learning_rate": 3.3287364687219125e-05, "loss": 0.253, "step": 5420000 }, { "epoch": 3.25, "learning_rate": 3.3285264721658565e-05, "loss": 0.251, "step": 5420500 }, { "epoch": 3.25, "learning_rate": 3.3283164756098e-05, "loss": 0.2472, "step": 5421000 }, { "epoch": 3.25, "learning_rate": 3.328106479053743e-05, "loss": 0.2511, "step": 5421500 }, { "epoch": 3.25, "learning_rate": 3.3278969024907986e-05, "loss": 0.254, "step": 5422000 }, { "epoch": 3.25, "learning_rate": 3.3276869059347426e-05, "loss": 0.2521, "step": 5422500 }, { "epoch": 3.25, "learning_rate": 3.327476909378686e-05, "loss": 0.2535, "step": 5423000 }, { "epoch": 3.25, "learning_rate": 3.327266912822629e-05, "loss": 0.2524, "step": 5423500 }, { "epoch": 3.25, "learning_rate": 3.327056916266573e-05, "loss": 0.2569, "step": 5424000 }, { "epoch": 3.25, "learning_rate": 3.326846919710517e-05, "loss": 0.2571, "step": 5424500 }, { "epoch": 3.25, "learning_rate": 3.326637343147572e-05, "loss": 0.2531, "step": 5425000 }, { "epoch": 3.25, "learning_rate": 3.3264273465915154e-05, "loss": 0.2493, "step": 5425500 }, { "epoch": 3.25, "learning_rate": 3.3262173500354594e-05, "loss": 0.2485, "step": 5426000 }, { "epoch": 3.25, "learning_rate": 3.326007353479403e-05, "loss": 0.2508, "step": 5426500 }, { "epoch": 3.25, "learning_rate": 3.325797356923346e-05, "loss": 0.252, "step": 5427000 }, { "epoch": 3.25, "learning_rate": 3.32558736036729e-05, "loss": 0.2536, "step": 5427500 }, { "epoch": 3.25, "learning_rate": 3.3253773638112335e-05, "loss": 0.2477, "step": 5428000 }, { "epoch": 3.25, "learning_rate": 3.325167367255177e-05, "loss": 0.2528, "step": 5428500 }, { "epoch": 3.25, "learning_rate": 3.324957790692233e-05, "loss": 0.2586, "step": 5429000 }, { "epoch": 3.26, "learning_rate": 3.324747794136176e-05, "loss": 0.2525, "step": 5429500 }, { "epoch": 3.26, "learning_rate": 3.3245382175732315e-05, "loss": 0.2492, "step": 5430000 }, { "epoch": 3.26, "learning_rate": 3.324328221017175e-05, "loss": 0.2544, "step": 5430500 }, { "epoch": 3.26, "learning_rate": 3.324118224461119e-05, "loss": 0.2509, "step": 5431000 }, { "epoch": 3.26, "learning_rate": 3.323908227905062e-05, "loss": 0.2519, "step": 5431500 }, { "epoch": 3.26, "learning_rate": 3.3236982313490056e-05, "loss": 0.261, "step": 5432000 }, { "epoch": 3.26, "learning_rate": 3.323488654786061e-05, "loss": 0.2533, "step": 5432500 }, { "epoch": 3.26, "learning_rate": 3.323278658230005e-05, "loss": 0.2509, "step": 5433000 }, { "epoch": 3.26, "learning_rate": 3.323068661673948e-05, "loss": 0.2561, "step": 5433500 }, { "epoch": 3.26, "learning_rate": 3.322858665117892e-05, "loss": 0.25, "step": 5434000 }, { "epoch": 3.26, "learning_rate": 3.322648668561836e-05, "loss": 0.2555, "step": 5434500 }, { "epoch": 3.26, "learning_rate": 3.322438672005779e-05, "loss": 0.2517, "step": 5435000 }, { "epoch": 3.26, "learning_rate": 3.3222286754497224e-05, "loss": 0.2538, "step": 5435500 }, { "epoch": 3.26, "learning_rate": 3.3220186788936664e-05, "loss": 0.253, "step": 5436000 }, { "epoch": 3.26, "learning_rate": 3.321809102330722e-05, "loss": 0.2545, "step": 5436500 }, { "epoch": 3.26, "learning_rate": 3.321599105774665e-05, "loss": 0.2574, "step": 5437000 }, { "epoch": 3.26, "learning_rate": 3.321389109218609e-05, "loss": 0.2488, "step": 5437500 }, { "epoch": 3.26, "learning_rate": 3.3211791126625525e-05, "loss": 0.2512, "step": 5438000 }, { "epoch": 3.26, "learning_rate": 3.320969536099608e-05, "loss": 0.2601, "step": 5438500 }, { "epoch": 3.26, "learning_rate": 3.320759539543551e-05, "loss": 0.2541, "step": 5439000 }, { "epoch": 3.26, "learning_rate": 3.320549542987495e-05, "loss": 0.2534, "step": 5439500 }, { "epoch": 3.26, "learning_rate": 3.3203395464314386e-05, "loss": 0.2527, "step": 5440000 }, { "epoch": 3.26, "learning_rate": 3.320129969868494e-05, "loss": 0.2543, "step": 5440500 }, { "epoch": 3.26, "learning_rate": 3.319919973312437e-05, "loss": 0.2534, "step": 5441000 }, { "epoch": 3.26, "learning_rate": 3.319709976756381e-05, "loss": 0.2517, "step": 5441500 }, { "epoch": 3.26, "learning_rate": 3.3194999802003246e-05, "loss": 0.2533, "step": 5442000 }, { "epoch": 3.26, "learning_rate": 3.31929040363738e-05, "loss": 0.2509, "step": 5442500 }, { "epoch": 3.26, "learning_rate": 3.319080407081324e-05, "loss": 0.2559, "step": 5443000 }, { "epoch": 3.26, "learning_rate": 3.3188704105252674e-05, "loss": 0.2556, "step": 5443500 }, { "epoch": 3.26, "learning_rate": 3.318660413969211e-05, "loss": 0.2555, "step": 5444000 }, { "epoch": 3.26, "learning_rate": 3.318450417413155e-05, "loss": 0.2491, "step": 5444500 }, { "epoch": 3.26, "learning_rate": 3.31824084085021e-05, "loss": 0.2538, "step": 5445000 }, { "epoch": 3.26, "learning_rate": 3.3180308442941534e-05, "loss": 0.2507, "step": 5445500 }, { "epoch": 3.27, "learning_rate": 3.317820847738097e-05, "loss": 0.2559, "step": 5446000 }, { "epoch": 3.27, "learning_rate": 3.317610851182041e-05, "loss": 0.2572, "step": 5446500 }, { "epoch": 3.27, "learning_rate": 3.317400854625984e-05, "loss": 0.2504, "step": 5447000 }, { "epoch": 3.27, "learning_rate": 3.3171908580699275e-05, "loss": 0.2543, "step": 5447500 }, { "epoch": 3.27, "learning_rate": 3.3169808615138715e-05, "loss": 0.2508, "step": 5448000 }, { "epoch": 3.27, "learning_rate": 3.316770864957815e-05, "loss": 0.2539, "step": 5448500 }, { "epoch": 3.27, "learning_rate": 3.31656128839487e-05, "loss": 0.2561, "step": 5449000 }, { "epoch": 3.27, "learning_rate": 3.316351291838814e-05, "loss": 0.25, "step": 5449500 }, { "epoch": 3.27, "learning_rate": 3.3161412952827576e-05, "loss": 0.2501, "step": 5450000 }, { "epoch": 3.27, "learning_rate": 3.315931718719813e-05, "loss": 0.2575, "step": 5450500 }, { "epoch": 3.27, "learning_rate": 3.315721722163756e-05, "loss": 0.2532, "step": 5451000 }, { "epoch": 3.27, "learning_rate": 3.3155117256077e-05, "loss": 0.2586, "step": 5451500 }, { "epoch": 3.27, "learning_rate": 3.315301729051644e-05, "loss": 0.2542, "step": 5452000 }, { "epoch": 3.27, "learning_rate": 3.315091732495587e-05, "loss": 0.2545, "step": 5452500 }, { "epoch": 3.27, "learning_rate": 3.314881735939531e-05, "loss": 0.2535, "step": 5453000 }, { "epoch": 3.27, "learning_rate": 3.3146717393834744e-05, "loss": 0.2525, "step": 5453500 }, { "epoch": 3.27, "learning_rate": 3.31446216282053e-05, "loss": 0.2524, "step": 5454000 }, { "epoch": 3.27, "learning_rate": 3.314252166264473e-05, "loss": 0.2559, "step": 5454500 }, { "epoch": 3.27, "learning_rate": 3.314042169708417e-05, "loss": 0.2549, "step": 5455000 }, { "epoch": 3.27, "learning_rate": 3.3138321731523605e-05, "loss": 0.2578, "step": 5455500 }, { "epoch": 3.27, "learning_rate": 3.313622176596303e-05, "loss": 0.2509, "step": 5456000 }, { "epoch": 3.27, "learning_rate": 3.313412180040247e-05, "loss": 0.2508, "step": 5456500 }, { "epoch": 3.27, "learning_rate": 3.3132021834841905e-05, "loss": 0.2542, "step": 5457000 }, { "epoch": 3.27, "learning_rate": 3.3129921869281345e-05, "loss": 0.2506, "step": 5457500 }, { "epoch": 3.27, "learning_rate": 3.3127826103651906e-05, "loss": 0.2609, "step": 5458000 }, { "epoch": 3.27, "learning_rate": 3.312572613809133e-05, "loss": 0.2554, "step": 5458500 }, { "epoch": 3.27, "learning_rate": 3.3123626172530766e-05, "loss": 0.2571, "step": 5459000 }, { "epoch": 3.27, "learning_rate": 3.3121526206970206e-05, "loss": 0.2554, "step": 5459500 }, { "epoch": 3.27, "learning_rate": 3.3119430441340766e-05, "loss": 0.2505, "step": 5460000 }, { "epoch": 3.27, "learning_rate": 3.311733467571132e-05, "loss": 0.2514, "step": 5460500 }, { "epoch": 3.27, "learning_rate": 3.311523471015075e-05, "loss": 0.2546, "step": 5461000 }, { "epoch": 3.27, "learning_rate": 3.311313474459019e-05, "loss": 0.2607, "step": 5461500 }, { "epoch": 3.27, "learning_rate": 3.311103477902963e-05, "loss": 0.2509, "step": 5462000 }, { "epoch": 3.27, "learning_rate": 3.310893481346906e-05, "loss": 0.2449, "step": 5462500 }, { "epoch": 3.28, "learning_rate": 3.3106834847908494e-05, "loss": 0.251, "step": 5463000 }, { "epoch": 3.28, "learning_rate": 3.310473488234793e-05, "loss": 0.2511, "step": 5463500 }, { "epoch": 3.28, "learning_rate": 3.310263491678736e-05, "loss": 0.2575, "step": 5464000 }, { "epoch": 3.28, "learning_rate": 3.31005349512268e-05, "loss": 0.2478, "step": 5464500 }, { "epoch": 3.28, "learning_rate": 3.309843918559736e-05, "loss": 0.2491, "step": 5465000 }, { "epoch": 3.28, "learning_rate": 3.309633922003679e-05, "loss": 0.2469, "step": 5465500 }, { "epoch": 3.28, "learning_rate": 3.309424345440735e-05, "loss": 0.2506, "step": 5466000 }, { "epoch": 3.28, "learning_rate": 3.309214348884678e-05, "loss": 0.2549, "step": 5466500 }, { "epoch": 3.28, "learning_rate": 3.309004352328622e-05, "loss": 0.2558, "step": 5467000 }, { "epoch": 3.28, "learning_rate": 3.3087943557725656e-05, "loss": 0.2512, "step": 5467500 }, { "epoch": 3.28, "learning_rate": 3.308584359216508e-05, "loss": 0.2535, "step": 5468000 }, { "epoch": 3.28, "learning_rate": 3.308374362660452e-05, "loss": 0.2576, "step": 5468500 }, { "epoch": 3.28, "learning_rate": 3.3081643661043956e-05, "loss": 0.2519, "step": 5469000 }, { "epoch": 3.28, "learning_rate": 3.307954369548339e-05, "loss": 0.2481, "step": 5469500 }, { "epoch": 3.28, "learning_rate": 3.307744792985395e-05, "loss": 0.254, "step": 5470000 }, { "epoch": 3.28, "learning_rate": 3.307534796429338e-05, "loss": 0.2487, "step": 5470500 }, { "epoch": 3.28, "learning_rate": 3.307324799873282e-05, "loss": 0.2592, "step": 5471000 }, { "epoch": 3.28, "learning_rate": 3.307114803317226e-05, "loss": 0.2536, "step": 5471500 }, { "epoch": 3.28, "learning_rate": 3.306905226754282e-05, "loss": 0.2472, "step": 5472000 }, { "epoch": 3.28, "learning_rate": 3.306695230198225e-05, "loss": 0.2603, "step": 5472500 }, { "epoch": 3.28, "learning_rate": 3.306485233642168e-05, "loss": 0.2544, "step": 5473000 }, { "epoch": 3.28, "learning_rate": 3.306275237086112e-05, "loss": 0.2529, "step": 5473500 }, { "epoch": 3.28, "learning_rate": 3.306065660523168e-05, "loss": 0.2492, "step": 5474000 }, { "epoch": 3.28, "learning_rate": 3.305855663967111e-05, "loss": 0.2587, "step": 5474500 }, { "epoch": 3.28, "learning_rate": 3.3056460874041665e-05, "loss": 0.2539, "step": 5475000 }, { "epoch": 3.28, "learning_rate": 3.30543609084811e-05, "loss": 0.2569, "step": 5475500 }, { "epoch": 3.28, "learning_rate": 3.305226094292054e-05, "loss": 0.263, "step": 5476000 }, { "epoch": 3.28, "learning_rate": 3.305016097735997e-05, "loss": 0.2561, "step": 5476500 }, { "epoch": 3.28, "learning_rate": 3.3048061011799406e-05, "loss": 0.2543, "step": 5477000 }, { "epoch": 3.28, "learning_rate": 3.304596104623884e-05, "loss": 0.2511, "step": 5477500 }, { "epoch": 3.28, "learning_rate": 3.304386108067827e-05, "loss": 0.2522, "step": 5478000 }, { "epoch": 3.28, "learning_rate": 3.304176111511771e-05, "loss": 0.2485, "step": 5478500 }, { "epoch": 3.28, "learning_rate": 3.303966534948827e-05, "loss": 0.2503, "step": 5479000 }, { "epoch": 3.29, "learning_rate": 3.303756538392771e-05, "loss": 0.2496, "step": 5479500 }, { "epoch": 3.29, "learning_rate": 3.303546541836713e-05, "loss": 0.2535, "step": 5480000 }, { "epoch": 3.29, "learning_rate": 3.3033365452806574e-05, "loss": 0.2524, "step": 5480500 }, { "epoch": 3.29, "learning_rate": 3.303126548724601e-05, "loss": 0.2487, "step": 5481000 }, { "epoch": 3.29, "learning_rate": 3.302916972161657e-05, "loss": 0.2594, "step": 5481500 }, { "epoch": 3.29, "learning_rate": 3.3027069756056e-05, "loss": 0.258, "step": 5482000 }, { "epoch": 3.29, "learning_rate": 3.3024969790495434e-05, "loss": 0.2527, "step": 5482500 }, { "epoch": 3.29, "learning_rate": 3.302286982493487e-05, "loss": 0.2548, "step": 5483000 }, { "epoch": 3.29, "learning_rate": 3.302077405930543e-05, "loss": 0.255, "step": 5483500 }, { "epoch": 3.29, "learning_rate": 3.301867409374486e-05, "loss": 0.2508, "step": 5484000 }, { "epoch": 3.29, "learning_rate": 3.30165741281843e-05, "loss": 0.2533, "step": 5484500 }, { "epoch": 3.29, "learning_rate": 3.301447416262373e-05, "loss": 0.2573, "step": 5485000 }, { "epoch": 3.29, "learning_rate": 3.301237839699429e-05, "loss": 0.2562, "step": 5485500 }, { "epoch": 3.29, "learning_rate": 3.301027843143373e-05, "loss": 0.2579, "step": 5486000 }, { "epoch": 3.29, "learning_rate": 3.300817846587316e-05, "loss": 0.2487, "step": 5486500 }, { "epoch": 3.29, "learning_rate": 3.300607850031259e-05, "loss": 0.251, "step": 5487000 }, { "epoch": 3.29, "learning_rate": 3.300398273468315e-05, "loss": 0.2611, "step": 5487500 }, { "epoch": 3.29, "learning_rate": 3.300188276912259e-05, "loss": 0.2622, "step": 5488000 }, { "epoch": 3.29, "learning_rate": 3.299978280356202e-05, "loss": 0.2527, "step": 5488500 }, { "epoch": 3.29, "learning_rate": 3.299768283800146e-05, "loss": 0.2569, "step": 5489000 }, { "epoch": 3.29, "learning_rate": 3.299558707237201e-05, "loss": 0.2542, "step": 5489500 }, { "epoch": 3.29, "learning_rate": 3.299348710681145e-05, "loss": 0.2465, "step": 5490000 }, { "epoch": 3.29, "learning_rate": 3.2991387141250884e-05, "loss": 0.2574, "step": 5490500 }, { "epoch": 3.29, "learning_rate": 3.298928717569032e-05, "loss": 0.2523, "step": 5491000 }, { "epoch": 3.29, "learning_rate": 3.298719141006088e-05, "loss": 0.2514, "step": 5491500 }, { "epoch": 3.29, "learning_rate": 3.298509144450031e-05, "loss": 0.252, "step": 5492000 }, { "epoch": 3.29, "learning_rate": 3.2982991478939745e-05, "loss": 0.2511, "step": 5492500 }, { "epoch": 3.29, "learning_rate": 3.2980891513379185e-05, "loss": 0.2584, "step": 5493000 }, { "epoch": 3.29, "learning_rate": 3.297879574774974e-05, "loss": 0.2521, "step": 5493500 }, { "epoch": 3.29, "learning_rate": 3.297669578218917e-05, "loss": 0.2489, "step": 5494000 }, { "epoch": 3.29, "learning_rate": 3.2974595816628605e-05, "loss": 0.2486, "step": 5494500 }, { "epoch": 3.29, "learning_rate": 3.2972495851068046e-05, "loss": 0.2608, "step": 5495000 }, { "epoch": 3.29, "learning_rate": 3.29704000854386e-05, "loss": 0.2486, "step": 5495500 }, { "epoch": 3.3, "learning_rate": 3.296830011987803e-05, "loss": 0.2517, "step": 5496000 }, { "epoch": 3.3, "learning_rate": 3.2966200154317466e-05, "loss": 0.2603, "step": 5496500 }, { "epoch": 3.3, "learning_rate": 3.2964100188756906e-05, "loss": 0.2497, "step": 5497000 }, { "epoch": 3.3, "learning_rate": 3.296200442312746e-05, "loss": 0.2577, "step": 5497500 }, { "epoch": 3.3, "learning_rate": 3.2959904457566893e-05, "loss": 0.2543, "step": 5498000 }, { "epoch": 3.3, "learning_rate": 3.2957804492006334e-05, "loss": 0.2575, "step": 5498500 }, { "epoch": 3.3, "learning_rate": 3.295570452644577e-05, "loss": 0.2484, "step": 5499000 }, { "epoch": 3.3, "learning_rate": 3.295360876081632e-05, "loss": 0.25, "step": 5499500 }, { "epoch": 3.3, "learning_rate": 3.2951508795255754e-05, "loss": 0.2507, "step": 5500000 }, { "epoch": 3.3, "eval_loss": 0.2369259148836136, "eval_runtime": 1461.9047, "eval_samples_per_second": 360.297, "eval_steps_per_second": 60.05, "step": 5500000 }, { "epoch": 3.3, "learning_rate": 3.2949408829695194e-05, "loss": 0.2526, "step": 5500500 }, { "epoch": 3.3, "learning_rate": 3.294730886413463e-05, "loss": 0.2578, "step": 5501000 }, { "epoch": 3.3, "learning_rate": 3.294520889857406e-05, "loss": 0.2506, "step": 5501500 }, { "epoch": 3.3, "learning_rate": 3.29431089330135e-05, "loss": 0.2604, "step": 5502000 }, { "epoch": 3.3, "learning_rate": 3.2941008967452935e-05, "loss": 0.2569, "step": 5502500 }, { "epoch": 3.3, "learning_rate": 3.293890900189237e-05, "loss": 0.2515, "step": 5503000 }, { "epoch": 3.3, "learning_rate": 3.293681323626292e-05, "loss": 0.2509, "step": 5503500 }, { "epoch": 3.3, "learning_rate": 3.293471327070236e-05, "loss": 0.2531, "step": 5504000 }, { "epoch": 3.3, "learning_rate": 3.2932613305141796e-05, "loss": 0.253, "step": 5504500 }, { "epoch": 3.3, "learning_rate": 3.293051333958123e-05, "loss": 0.2456, "step": 5505000 }, { "epoch": 3.3, "learning_rate": 3.29284217738829e-05, "loss": 0.2563, "step": 5505500 }, { "epoch": 3.3, "learning_rate": 3.292632180832234e-05, "loss": 0.2534, "step": 5506000 }, { "epoch": 3.3, "learning_rate": 3.2924221842761777e-05, "loss": 0.2553, "step": 5506500 }, { "epoch": 3.3, "learning_rate": 3.292212187720121e-05, "loss": 0.2507, "step": 5507000 }, { "epoch": 3.3, "learning_rate": 3.292002191164065e-05, "loss": 0.2547, "step": 5507500 }, { "epoch": 3.3, "learning_rate": 3.2917921946080084e-05, "loss": 0.2456, "step": 5508000 }, { "epoch": 3.3, "learning_rate": 3.291582198051952e-05, "loss": 0.2561, "step": 5508500 }, { "epoch": 3.3, "learning_rate": 3.291372201495896e-05, "loss": 0.2462, "step": 5509000 }, { "epoch": 3.3, "learning_rate": 3.291162624932951e-05, "loss": 0.2509, "step": 5509500 }, { "epoch": 3.3, "learning_rate": 3.2909526283768944e-05, "loss": 0.2549, "step": 5510000 }, { "epoch": 3.3, "learning_rate": 3.290742631820838e-05, "loss": 0.2553, "step": 5510500 }, { "epoch": 3.3, "learning_rate": 3.290533055257894e-05, "loss": 0.2612, "step": 5511000 }, { "epoch": 3.3, "learning_rate": 3.290323058701837e-05, "loss": 0.2555, "step": 5511500 }, { "epoch": 3.3, "learning_rate": 3.2901130621457805e-05, "loss": 0.2534, "step": 5512000 }, { "epoch": 3.3, "learning_rate": 3.2899030655897245e-05, "loss": 0.2506, "step": 5512500 }, { "epoch": 3.31, "learning_rate": 3.289693069033668e-05, "loss": 0.2517, "step": 5513000 }, { "epoch": 3.31, "learning_rate": 3.289483072477611e-05, "loss": 0.2556, "step": 5513500 }, { "epoch": 3.31, "learning_rate": 3.289273075921555e-05, "loss": 0.2555, "step": 5514000 }, { "epoch": 3.31, "learning_rate": 3.2890634993586106e-05, "loss": 0.2555, "step": 5514500 }, { "epoch": 3.31, "learning_rate": 3.288853502802554e-05, "loss": 0.2487, "step": 5515000 }, { "epoch": 3.31, "learning_rate": 3.288643506246497e-05, "loss": 0.2504, "step": 5515500 }, { "epoch": 3.31, "learning_rate": 3.288433509690441e-05, "loss": 0.2488, "step": 5516000 }, { "epoch": 3.31, "learning_rate": 3.288223513134385e-05, "loss": 0.2497, "step": 5516500 }, { "epoch": 3.31, "learning_rate": 3.288013516578328e-05, "loss": 0.2496, "step": 5517000 }, { "epoch": 3.31, "learning_rate": 3.287803520022272e-05, "loss": 0.248, "step": 5517500 }, { "epoch": 3.31, "learning_rate": 3.287593523466215e-05, "loss": 0.2492, "step": 5518000 }, { "epoch": 3.31, "learning_rate": 3.287383946903271e-05, "loss": 0.2544, "step": 5518500 }, { "epoch": 3.31, "learning_rate": 3.287173950347214e-05, "loss": 0.2546, "step": 5519000 }, { "epoch": 3.31, "learning_rate": 3.286963953791158e-05, "loss": 0.2516, "step": 5519500 }, { "epoch": 3.31, "learning_rate": 3.2867539572351015e-05, "loss": 0.2508, "step": 5520000 }, { "epoch": 3.31, "learning_rate": 3.286544380672157e-05, "loss": 0.2553, "step": 5520500 }, { "epoch": 3.31, "learning_rate": 3.286334384116101e-05, "loss": 0.2485, "step": 5521000 }, { "epoch": 3.31, "learning_rate": 3.286124387560044e-05, "loss": 0.2511, "step": 5521500 }, { "epoch": 3.31, "learning_rate": 3.2859143910039875e-05, "loss": 0.2545, "step": 5522000 }, { "epoch": 3.31, "learning_rate": 3.2857043944479316e-05, "loss": 0.2499, "step": 5522500 }, { "epoch": 3.31, "learning_rate": 3.285494817884987e-05, "loss": 0.2487, "step": 5523000 }, { "epoch": 3.31, "learning_rate": 3.28528482132893e-05, "loss": 0.2487, "step": 5523500 }, { "epoch": 3.31, "learning_rate": 3.2850748247728736e-05, "loss": 0.2502, "step": 5524000 }, { "epoch": 3.31, "learning_rate": 3.2848648282168176e-05, "loss": 0.2612, "step": 5524500 }, { "epoch": 3.31, "learning_rate": 3.284655251653873e-05, "loss": 0.2579, "step": 5525000 }, { "epoch": 3.31, "learning_rate": 3.284445255097816e-05, "loss": 0.2503, "step": 5525500 }, { "epoch": 3.31, "learning_rate": 3.28423525854176e-05, "loss": 0.2561, "step": 5526000 }, { "epoch": 3.31, "learning_rate": 3.284025681978816e-05, "loss": 0.251, "step": 5526500 }, { "epoch": 3.31, "learning_rate": 3.283815685422759e-05, "loss": 0.2542, "step": 5527000 }, { "epoch": 3.31, "learning_rate": 3.2836056888667024e-05, "loss": 0.254, "step": 5527500 }, { "epoch": 3.31, "learning_rate": 3.2833956923106464e-05, "loss": 0.2551, "step": 5528000 }, { "epoch": 3.31, "learning_rate": 3.28318569575459e-05, "loss": 0.2553, "step": 5528500 }, { "epoch": 3.31, "learning_rate": 3.282975699198533e-05, "loss": 0.2533, "step": 5529000 }, { "epoch": 3.32, "learning_rate": 3.282765702642477e-05, "loss": 0.2449, "step": 5529500 }, { "epoch": 3.32, "learning_rate": 3.28255570608642e-05, "loss": 0.2497, "step": 5530000 }, { "epoch": 3.32, "learning_rate": 3.282346129523476e-05, "loss": 0.2522, "step": 5530500 }, { "epoch": 3.32, "learning_rate": 3.282136132967419e-05, "loss": 0.2603, "step": 5531000 }, { "epoch": 3.32, "learning_rate": 3.281926136411363e-05, "loss": 0.2532, "step": 5531500 }, { "epoch": 3.32, "learning_rate": 3.2817161398553066e-05, "loss": 0.2531, "step": 5532000 }, { "epoch": 3.32, "learning_rate": 3.281506563292362e-05, "loss": 0.2464, "step": 5532500 }, { "epoch": 3.32, "learning_rate": 3.281296566736305e-05, "loss": 0.2563, "step": 5533000 }, { "epoch": 3.32, "learning_rate": 3.281086570180249e-05, "loss": 0.2555, "step": 5533500 }, { "epoch": 3.32, "learning_rate": 3.2808769936173047e-05, "loss": 0.2526, "step": 5534000 }, { "epoch": 3.32, "learning_rate": 3.280666997061248e-05, "loss": 0.2537, "step": 5534500 }, { "epoch": 3.32, "learning_rate": 3.280457000505192e-05, "loss": 0.2537, "step": 5535000 }, { "epoch": 3.32, "learning_rate": 3.2802470039491354e-05, "loss": 0.2561, "step": 5535500 }, { "epoch": 3.32, "learning_rate": 3.280037007393079e-05, "loss": 0.2566, "step": 5536000 }, { "epoch": 3.32, "learning_rate": 3.279827430830134e-05, "loss": 0.2584, "step": 5536500 }, { "epoch": 3.32, "learning_rate": 3.279617434274078e-05, "loss": 0.2443, "step": 5537000 }, { "epoch": 3.32, "learning_rate": 3.2794074377180214e-05, "loss": 0.2521, "step": 5537500 }, { "epoch": 3.32, "learning_rate": 3.279197441161965e-05, "loss": 0.2508, "step": 5538000 }, { "epoch": 3.32, "learning_rate": 3.278987444605909e-05, "loss": 0.2559, "step": 5538500 }, { "epoch": 3.32, "learning_rate": 3.278777448049852e-05, "loss": 0.2559, "step": 5539000 }, { "epoch": 3.32, "learning_rate": 3.278567451493795e-05, "loss": 0.2559, "step": 5539500 }, { "epoch": 3.32, "learning_rate": 3.278357454937739e-05, "loss": 0.2569, "step": 5540000 }, { "epoch": 3.32, "learning_rate": 3.278147458381682e-05, "loss": 0.2534, "step": 5540500 }, { "epoch": 3.32, "learning_rate": 3.277937881818738e-05, "loss": 0.2502, "step": 5541000 }, { "epoch": 3.32, "learning_rate": 3.277727885262682e-05, "loss": 0.2535, "step": 5541500 }, { "epoch": 3.32, "learning_rate": 3.277517888706625e-05, "loss": 0.2586, "step": 5542000 }, { "epoch": 3.32, "learning_rate": 3.277307892150568e-05, "loss": 0.2569, "step": 5542500 }, { "epoch": 3.32, "learning_rate": 3.277098315587624e-05, "loss": 0.2504, "step": 5543000 }, { "epoch": 3.32, "learning_rate": 3.276888319031568e-05, "loss": 0.2554, "step": 5543500 }, { "epoch": 3.32, "learning_rate": 3.276678322475512e-05, "loss": 0.2551, "step": 5544000 }, { "epoch": 3.32, "learning_rate": 3.2764683259194543e-05, "loss": 0.2543, "step": 5544500 }, { "epoch": 3.32, "learning_rate": 3.2762587493565104e-05, "loss": 0.2547, "step": 5545000 }, { "epoch": 3.32, "learning_rate": 3.2760487528004544e-05, "loss": 0.2507, "step": 5545500 }, { "epoch": 3.33, "learning_rate": 3.275838756244398e-05, "loss": 0.2502, "step": 5546000 }, { "epoch": 3.33, "learning_rate": 3.275628759688341e-05, "loss": 0.2552, "step": 5546500 }, { "epoch": 3.33, "learning_rate": 3.275419183125397e-05, "loss": 0.2502, "step": 5547000 }, { "epoch": 3.33, "learning_rate": 3.2752091865693405e-05, "loss": 0.2544, "step": 5547500 }, { "epoch": 3.33, "learning_rate": 3.274999190013284e-05, "loss": 0.2534, "step": 5548000 }, { "epoch": 3.33, "learning_rate": 3.274789193457228e-05, "loss": 0.2547, "step": 5548500 }, { "epoch": 3.33, "learning_rate": 3.274579616894283e-05, "loss": 0.2486, "step": 5549000 }, { "epoch": 3.33, "learning_rate": 3.2743696203382265e-05, "loss": 0.2496, "step": 5549500 }, { "epoch": 3.33, "learning_rate": 3.27415962378217e-05, "loss": 0.2503, "step": 5550000 }, { "epoch": 3.33, "learning_rate": 3.273949627226114e-05, "loss": 0.2491, "step": 5550500 }, { "epoch": 3.33, "learning_rate": 3.273740050663169e-05, "loss": 0.2504, "step": 5551000 }, { "epoch": 3.33, "learning_rate": 3.2735300541071126e-05, "loss": 0.2548, "step": 5551500 }, { "epoch": 3.33, "learning_rate": 3.273320057551056e-05, "loss": 0.252, "step": 5552000 }, { "epoch": 3.33, "learning_rate": 3.273110060995e-05, "loss": 0.2543, "step": 5552500 }, { "epoch": 3.33, "learning_rate": 3.2729004844320553e-05, "loss": 0.2497, "step": 5553000 }, { "epoch": 3.33, "learning_rate": 3.272690487875999e-05, "loss": 0.2522, "step": 5553500 }, { "epoch": 3.33, "learning_rate": 3.272480491319943e-05, "loss": 0.2523, "step": 5554000 }, { "epoch": 3.33, "learning_rate": 3.272270494763886e-05, "loss": 0.2543, "step": 5554500 }, { "epoch": 3.33, "learning_rate": 3.2720609182009414e-05, "loss": 0.2564, "step": 5555000 }, { "epoch": 3.33, "learning_rate": 3.271850921644885e-05, "loss": 0.2512, "step": 5555500 }, { "epoch": 3.33, "learning_rate": 3.271640925088829e-05, "loss": 0.2469, "step": 5556000 }, { "epoch": 3.33, "learning_rate": 3.271431348525884e-05, "loss": 0.2521, "step": 5556500 }, { "epoch": 3.33, "learning_rate": 3.2712213519698275e-05, "loss": 0.2521, "step": 5557000 }, { "epoch": 3.33, "learning_rate": 3.2710117754068835e-05, "loss": 0.2529, "step": 5557500 }, { "epoch": 3.33, "learning_rate": 3.270801778850826e-05, "loss": 0.2532, "step": 5558000 }, { "epoch": 3.33, "learning_rate": 3.27059178229477e-05, "loss": 0.2596, "step": 5558500 }, { "epoch": 3.33, "learning_rate": 3.2703817857387136e-05, "loss": 0.254, "step": 5559000 }, { "epoch": 3.33, "learning_rate": 3.2701717891826576e-05, "loss": 0.2555, "step": 5559500 }, { "epoch": 3.33, "learning_rate": 3.269961792626601e-05, "loss": 0.2493, "step": 5560000 }, { "epoch": 3.33, "learning_rate": 3.269751796070544e-05, "loss": 0.2583, "step": 5560500 }, { "epoch": 3.33, "learning_rate": 3.269541799514488e-05, "loss": 0.2582, "step": 5561000 }, { "epoch": 3.33, "learning_rate": 3.2693318029584316e-05, "loss": 0.2502, "step": 5561500 }, { "epoch": 3.33, "learning_rate": 3.269121806402375e-05, "loss": 0.2546, "step": 5562000 }, { "epoch": 3.33, "learning_rate": 3.268911809846319e-05, "loss": 0.2558, "step": 5562500 }, { "epoch": 3.34, "learning_rate": 3.2687018132902624e-05, "loss": 0.2568, "step": 5563000 }, { "epoch": 3.34, "learning_rate": 3.268492236727318e-05, "loss": 0.2481, "step": 5563500 }, { "epoch": 3.34, "learning_rate": 3.268282240171261e-05, "loss": 0.2539, "step": 5564000 }, { "epoch": 3.34, "learning_rate": 3.268072243615205e-05, "loss": 0.2578, "step": 5564500 }, { "epoch": 3.34, "learning_rate": 3.2678622470591484e-05, "loss": 0.2549, "step": 5565000 }, { "epoch": 3.34, "learning_rate": 3.267652670496204e-05, "loss": 0.2596, "step": 5565500 }, { "epoch": 3.34, "learning_rate": 3.267442673940147e-05, "loss": 0.2578, "step": 5566000 }, { "epoch": 3.34, "learning_rate": 3.267232677384091e-05, "loss": 0.2475, "step": 5566500 }, { "epoch": 3.34, "learning_rate": 3.2670226808280345e-05, "loss": 0.2515, "step": 5567000 }, { "epoch": 3.34, "learning_rate": 3.26681310426509e-05, "loss": 0.2516, "step": 5567500 }, { "epoch": 3.34, "learning_rate": 3.266603527702145e-05, "loss": 0.2503, "step": 5568000 }, { "epoch": 3.34, "learning_rate": 3.266393531146089e-05, "loss": 0.2526, "step": 5568500 }, { "epoch": 3.34, "learning_rate": 3.266183954583145e-05, "loss": 0.2533, "step": 5569000 }, { "epoch": 3.34, "learning_rate": 3.2659739580270886e-05, "loss": 0.256, "step": 5569500 }, { "epoch": 3.34, "learning_rate": 3.265763961471031e-05, "loss": 0.2506, "step": 5570000 }, { "epoch": 3.34, "learning_rate": 3.265553964914975e-05, "loss": 0.2583, "step": 5570500 }, { "epoch": 3.34, "learning_rate": 3.265343968358919e-05, "loss": 0.2505, "step": 5571000 }, { "epoch": 3.34, "learning_rate": 3.265133971802862e-05, "loss": 0.2538, "step": 5571500 }, { "epoch": 3.34, "learning_rate": 3.264923975246806e-05, "loss": 0.2547, "step": 5572000 }, { "epoch": 3.34, "learning_rate": 3.2647139786907494e-05, "loss": 0.252, "step": 5572500 }, { "epoch": 3.34, "learning_rate": 3.264503982134693e-05, "loss": 0.2557, "step": 5573000 }, { "epoch": 3.34, "learning_rate": 3.264293985578637e-05, "loss": 0.2571, "step": 5573500 }, { "epoch": 3.34, "learning_rate": 3.26408398902258e-05, "loss": 0.2517, "step": 5574000 }, { "epoch": 3.34, "learning_rate": 3.2638739924665234e-05, "loss": 0.2546, "step": 5574500 }, { "epoch": 3.34, "learning_rate": 3.2636644159035795e-05, "loss": 0.2526, "step": 5575000 }, { "epoch": 3.34, "learning_rate": 3.263454419347523e-05, "loss": 0.2528, "step": 5575500 }, { "epoch": 3.34, "learning_rate": 3.263244422791466e-05, "loss": 0.249, "step": 5576000 }, { "epoch": 3.34, "learning_rate": 3.26303442623541e-05, "loss": 0.2456, "step": 5576500 }, { "epoch": 3.34, "learning_rate": 3.262825269665577e-05, "loss": 0.2567, "step": 5577000 }, { "epoch": 3.34, "learning_rate": 3.262615273109521e-05, "loss": 0.2534, "step": 5577500 }, { "epoch": 3.34, "learning_rate": 3.262405276553464e-05, "loss": 0.2518, "step": 5578000 }, { "epoch": 3.34, "learning_rate": 3.2621952799974076e-05, "loss": 0.2552, "step": 5578500 }, { "epoch": 3.34, "learning_rate": 3.2619852834413516e-05, "loss": 0.2601, "step": 5579000 }, { "epoch": 3.35, "learning_rate": 3.261775286885295e-05, "loss": 0.2441, "step": 5579500 }, { "epoch": 3.35, "learning_rate": 3.261565290329238e-05, "loss": 0.2531, "step": 5580000 }, { "epoch": 3.35, "learning_rate": 3.261355293773182e-05, "loss": 0.2505, "step": 5580500 }, { "epoch": 3.35, "learning_rate": 3.261145717210238e-05, "loss": 0.2447, "step": 5581000 }, { "epoch": 3.35, "learning_rate": 3.260936140647294e-05, "loss": 0.2523, "step": 5581500 }, { "epoch": 3.35, "learning_rate": 3.2607261440912364e-05, "loss": 0.2524, "step": 5582000 }, { "epoch": 3.35, "learning_rate": 3.2605161475351804e-05, "loss": 0.2539, "step": 5582500 }, { "epoch": 3.35, "learning_rate": 3.260306150979124e-05, "loss": 0.2448, "step": 5583000 }, { "epoch": 3.35, "learning_rate": 3.260096154423067e-05, "loss": 0.2505, "step": 5583500 }, { "epoch": 3.35, "learning_rate": 3.259886157867011e-05, "loss": 0.2498, "step": 5584000 }, { "epoch": 3.35, "learning_rate": 3.2596761613109545e-05, "loss": 0.2529, "step": 5584500 }, { "epoch": 3.35, "learning_rate": 3.259466164754898e-05, "loss": 0.2485, "step": 5585000 }, { "epoch": 3.35, "learning_rate": 3.259256588191953e-05, "loss": 0.2544, "step": 5585500 }, { "epoch": 3.35, "learning_rate": 3.259046591635897e-05, "loss": 0.2563, "step": 5586000 }, { "epoch": 3.35, "learning_rate": 3.2588370150729526e-05, "loss": 0.2536, "step": 5586500 }, { "epoch": 3.35, "learning_rate": 3.258627018516896e-05, "loss": 0.2595, "step": 5587000 }, { "epoch": 3.35, "learning_rate": 3.25841702196084e-05, "loss": 0.2531, "step": 5587500 }, { "epoch": 3.35, "learning_rate": 3.258207025404783e-05, "loss": 0.2544, "step": 5588000 }, { "epoch": 3.35, "learning_rate": 3.2579970288487266e-05, "loss": 0.2555, "step": 5588500 }, { "epoch": 3.35, "learning_rate": 3.2577870322926707e-05, "loss": 0.253, "step": 5589000 }, { "epoch": 3.35, "learning_rate": 3.257577035736614e-05, "loss": 0.2535, "step": 5589500 }, { "epoch": 3.35, "learning_rate": 3.2573670391805573e-05, "loss": 0.2528, "step": 5590000 }, { "epoch": 3.35, "learning_rate": 3.257157462617613e-05, "loss": 0.2526, "step": 5590500 }, { "epoch": 3.35, "learning_rate": 3.256947466061557e-05, "loss": 0.2469, "step": 5591000 }, { "epoch": 3.35, "learning_rate": 3.2567374695055e-05, "loss": 0.2516, "step": 5591500 }, { "epoch": 3.35, "learning_rate": 3.2565274729494434e-05, "loss": 0.2518, "step": 5592000 }, { "epoch": 3.35, "learning_rate": 3.256317896386499e-05, "loss": 0.2467, "step": 5592500 }, { "epoch": 3.35, "learning_rate": 3.256108319823555e-05, "loss": 0.2516, "step": 5593000 }, { "epoch": 3.35, "learning_rate": 3.255898323267498e-05, "loss": 0.2559, "step": 5593500 }, { "epoch": 3.35, "learning_rate": 3.2556883267114415e-05, "loss": 0.2537, "step": 5594000 }, { "epoch": 3.35, "learning_rate": 3.2554783301553855e-05, "loss": 0.2507, "step": 5594500 }, { "epoch": 3.35, "learning_rate": 3.255268333599329e-05, "loss": 0.2567, "step": 5595000 }, { "epoch": 3.35, "learning_rate": 3.255058337043272e-05, "loss": 0.2542, "step": 5595500 }, { "epoch": 3.36, "learning_rate": 3.254848340487216e-05, "loss": 0.2512, "step": 5596000 }, { "epoch": 3.36, "learning_rate": 3.2546383439311596e-05, "loss": 0.2528, "step": 5596500 }, { "epoch": 3.36, "learning_rate": 3.254428767368215e-05, "loss": 0.2534, "step": 5597000 }, { "epoch": 3.36, "learning_rate": 3.254218770812158e-05, "loss": 0.256, "step": 5597500 }, { "epoch": 3.36, "learning_rate": 3.254008774256102e-05, "loss": 0.2561, "step": 5598000 }, { "epoch": 3.36, "learning_rate": 3.2537987777000457e-05, "loss": 0.2534, "step": 5598500 }, { "epoch": 3.36, "learning_rate": 3.253589201137101e-05, "loss": 0.2571, "step": 5599000 }, { "epoch": 3.36, "learning_rate": 3.253379624574157e-05, "loss": 0.2582, "step": 5599500 }, { "epoch": 3.36, "learning_rate": 3.2531696280181004e-05, "loss": 0.2584, "step": 5600000 }, { "epoch": 3.36, "eval_loss": 0.23417864739894867, "eval_runtime": 1460.3769, "eval_samples_per_second": 360.674, "eval_steps_per_second": 60.113, "step": 5600000 }, { "epoch": 3.36, "learning_rate": 3.2529596314620444e-05, "loss": 0.2542, "step": 5600500 }, { "epoch": 3.36, "learning_rate": 3.252749634905987e-05, "loss": 0.2562, "step": 5601000 }, { "epoch": 3.36, "learning_rate": 3.252539638349931e-05, "loss": 0.249, "step": 5601500 }, { "epoch": 3.36, "learning_rate": 3.2523296417938745e-05, "loss": 0.2471, "step": 5602000 }, { "epoch": 3.36, "learning_rate": 3.252119645237818e-05, "loss": 0.2502, "step": 5602500 }, { "epoch": 3.36, "learning_rate": 3.251909648681762e-05, "loss": 0.2494, "step": 5603000 }, { "epoch": 3.36, "learning_rate": 3.251700072118817e-05, "loss": 0.2527, "step": 5603500 }, { "epoch": 3.36, "learning_rate": 3.2514900755627605e-05, "loss": 0.2539, "step": 5604000 }, { "epoch": 3.36, "learning_rate": 3.251280079006704e-05, "loss": 0.2527, "step": 5604500 }, { "epoch": 3.36, "learning_rate": 3.251070082450648e-05, "loss": 0.2549, "step": 5605000 }, { "epoch": 3.36, "learning_rate": 3.250860505887703e-05, "loss": 0.25, "step": 5605500 }, { "epoch": 3.36, "learning_rate": 3.2506505093316466e-05, "loss": 0.2487, "step": 5606000 }, { "epoch": 3.36, "learning_rate": 3.25044051277559e-05, "loss": 0.2529, "step": 5606500 }, { "epoch": 3.36, "learning_rate": 3.250230936212646e-05, "loss": 0.2612, "step": 5607000 }, { "epoch": 3.36, "learning_rate": 3.25002093965659e-05, "loss": 0.2483, "step": 5607500 }, { "epoch": 3.36, "learning_rate": 3.249810943100533e-05, "loss": 0.2464, "step": 5608000 }, { "epoch": 3.36, "learning_rate": 3.249600946544477e-05, "loss": 0.2621, "step": 5608500 }, { "epoch": 3.36, "learning_rate": 3.24939094998842e-05, "loss": 0.2538, "step": 5609000 }, { "epoch": 3.36, "learning_rate": 3.2491809534323634e-05, "loss": 0.2637, "step": 5609500 }, { "epoch": 3.36, "learning_rate": 3.2489709568763074e-05, "loss": 0.257, "step": 5610000 }, { "epoch": 3.36, "learning_rate": 3.248761380313363e-05, "loss": 0.2566, "step": 5610500 }, { "epoch": 3.36, "learning_rate": 3.248551383757306e-05, "loss": 0.2544, "step": 5611000 }, { "epoch": 3.36, "learning_rate": 3.2483413872012495e-05, "loss": 0.2555, "step": 5611500 }, { "epoch": 3.36, "learning_rate": 3.2481313906451935e-05, "loss": 0.2549, "step": 5612000 }, { "epoch": 3.36, "learning_rate": 3.247921394089137e-05, "loss": 0.2547, "step": 5612500 }, { "epoch": 3.37, "learning_rate": 3.24771139753308e-05, "loss": 0.25, "step": 5613000 }, { "epoch": 3.37, "learning_rate": 3.247501400977024e-05, "loss": 0.2576, "step": 5613500 }, { "epoch": 3.37, "learning_rate": 3.2472918244140796e-05, "loss": 0.258, "step": 5614000 }, { "epoch": 3.37, "learning_rate": 3.247081827858023e-05, "loss": 0.2519, "step": 5614500 }, { "epoch": 3.37, "learning_rate": 3.246871831301966e-05, "loss": 0.2561, "step": 5615000 }, { "epoch": 3.37, "learning_rate": 3.24666183474591e-05, "loss": 0.2504, "step": 5615500 }, { "epoch": 3.37, "learning_rate": 3.2464518381898536e-05, "loss": 0.2551, "step": 5616000 }, { "epoch": 3.37, "learning_rate": 3.246241841633797e-05, "loss": 0.2577, "step": 5616500 }, { "epoch": 3.37, "learning_rate": 3.246031845077741e-05, "loss": 0.253, "step": 5617000 }, { "epoch": 3.37, "learning_rate": 3.245821848521684e-05, "loss": 0.2551, "step": 5617500 }, { "epoch": 3.37, "learning_rate": 3.245611851965628e-05, "loss": 0.2497, "step": 5618000 }, { "epoch": 3.37, "learning_rate": 3.245402275402684e-05, "loss": 0.2589, "step": 5618500 }, { "epoch": 3.37, "learning_rate": 3.245192278846627e-05, "loss": 0.2579, "step": 5619000 }, { "epoch": 3.37, "learning_rate": 3.2449827022836824e-05, "loss": 0.2539, "step": 5619500 }, { "epoch": 3.37, "learning_rate": 3.244772705727626e-05, "loss": 0.254, "step": 5620000 }, { "epoch": 3.37, "learning_rate": 3.24456270917157e-05, "loss": 0.256, "step": 5620500 }, { "epoch": 3.37, "learning_rate": 3.244352712615513e-05, "loss": 0.2539, "step": 5621000 }, { "epoch": 3.37, "learning_rate": 3.2441427160594565e-05, "loss": 0.2572, "step": 5621500 }, { "epoch": 3.37, "learning_rate": 3.2439327195034005e-05, "loss": 0.254, "step": 5622000 }, { "epoch": 3.37, "learning_rate": 3.243722722947344e-05, "loss": 0.2558, "step": 5622500 }, { "epoch": 3.37, "learning_rate": 3.2435127263912865e-05, "loss": 0.2565, "step": 5623000 }, { "epoch": 3.37, "learning_rate": 3.2433031498283426e-05, "loss": 0.2512, "step": 5623500 }, { "epoch": 3.37, "learning_rate": 3.2430931532722866e-05, "loss": 0.2497, "step": 5624000 }, { "epoch": 3.37, "learning_rate": 3.24288315671623e-05, "loss": 0.2515, "step": 5624500 }, { "epoch": 3.37, "learning_rate": 3.242673160160174e-05, "loss": 0.2547, "step": 5625000 }, { "epoch": 3.37, "learning_rate": 3.242463583597229e-05, "loss": 0.2528, "step": 5625500 }, { "epoch": 3.37, "learning_rate": 3.2422535870411727e-05, "loss": 0.2591, "step": 5626000 }, { "epoch": 3.37, "learning_rate": 3.242043590485116e-05, "loss": 0.2538, "step": 5626500 }, { "epoch": 3.37, "learning_rate": 3.24183359392906e-05, "loss": 0.2521, "step": 5627000 }, { "epoch": 3.37, "learning_rate": 3.2416240173661154e-05, "loss": 0.2555, "step": 5627500 }, { "epoch": 3.37, "learning_rate": 3.241414020810059e-05, "loss": 0.248, "step": 5628000 }, { "epoch": 3.37, "learning_rate": 3.241204024254002e-05, "loss": 0.2464, "step": 5628500 }, { "epoch": 3.37, "learning_rate": 3.2409944476910574e-05, "loss": 0.2562, "step": 5629000 }, { "epoch": 3.38, "learning_rate": 3.2407844511350015e-05, "loss": 0.2563, "step": 5629500 }, { "epoch": 3.38, "learning_rate": 3.240574454578945e-05, "loss": 0.2533, "step": 5630000 }, { "epoch": 3.38, "learning_rate": 3.240364458022888e-05, "loss": 0.2544, "step": 5630500 }, { "epoch": 3.38, "learning_rate": 3.240154461466832e-05, "loss": 0.2546, "step": 5631000 }, { "epoch": 3.38, "learning_rate": 3.2399444649107755e-05, "loss": 0.2536, "step": 5631500 }, { "epoch": 3.38, "learning_rate": 3.2397344683547195e-05, "loss": 0.2529, "step": 5632000 }, { "epoch": 3.38, "learning_rate": 3.239524471798662e-05, "loss": 0.2482, "step": 5632500 }, { "epoch": 3.38, "learning_rate": 3.23931531522883e-05, "loss": 0.2474, "step": 5633000 }, { "epoch": 3.38, "learning_rate": 3.2391053186727736e-05, "loss": 0.2576, "step": 5633500 }, { "epoch": 3.38, "learning_rate": 3.238895322116717e-05, "loss": 0.2518, "step": 5634000 }, { "epoch": 3.38, "learning_rate": 3.238685325560661e-05, "loss": 0.2543, "step": 5634500 }, { "epoch": 3.38, "learning_rate": 3.238475329004604e-05, "loss": 0.2494, "step": 5635000 }, { "epoch": 3.38, "learning_rate": 3.2382653324485477e-05, "loss": 0.2534, "step": 5635500 }, { "epoch": 3.38, "learning_rate": 3.238055335892492e-05, "loss": 0.2573, "step": 5636000 }, { "epoch": 3.38, "learning_rate": 3.237845339336435e-05, "loss": 0.2551, "step": 5636500 }, { "epoch": 3.38, "learning_rate": 3.2376357627734904e-05, "loss": 0.2585, "step": 5637000 }, { "epoch": 3.38, "learning_rate": 3.2374257662174344e-05, "loss": 0.2525, "step": 5637500 }, { "epoch": 3.38, "learning_rate": 3.237215769661378e-05, "loss": 0.2547, "step": 5638000 }, { "epoch": 3.38, "learning_rate": 3.237005773105321e-05, "loss": 0.2546, "step": 5638500 }, { "epoch": 3.38, "learning_rate": 3.2367961965423765e-05, "loss": 0.2578, "step": 5639000 }, { "epoch": 3.38, "learning_rate": 3.236586619979432e-05, "loss": 0.2558, "step": 5639500 }, { "epoch": 3.38, "learning_rate": 3.236376623423376e-05, "loss": 0.2556, "step": 5640000 }, { "epoch": 3.38, "learning_rate": 3.236166626867319e-05, "loss": 0.254, "step": 5640500 }, { "epoch": 3.38, "learning_rate": 3.2359566303112625e-05, "loss": 0.25, "step": 5641000 }, { "epoch": 3.38, "learning_rate": 3.2357466337552066e-05, "loss": 0.2515, "step": 5641500 }, { "epoch": 3.38, "learning_rate": 3.23553663719915e-05, "loss": 0.2541, "step": 5642000 }, { "epoch": 3.38, "learning_rate": 3.235326640643093e-05, "loss": 0.2534, "step": 5642500 }, { "epoch": 3.38, "learning_rate": 3.235116644087037e-05, "loss": 0.2568, "step": 5643000 }, { "epoch": 3.38, "learning_rate": 3.2349070675240926e-05, "loss": 0.2593, "step": 5643500 }, { "epoch": 3.38, "learning_rate": 3.234697070968036e-05, "loss": 0.2544, "step": 5644000 }, { "epoch": 3.38, "learning_rate": 3.23448707441198e-05, "loss": 0.2529, "step": 5644500 }, { "epoch": 3.38, "learning_rate": 3.2342770778559233e-05, "loss": 0.252, "step": 5645000 }, { "epoch": 3.38, "learning_rate": 3.234067501292979e-05, "loss": 0.2592, "step": 5645500 }, { "epoch": 3.39, "learning_rate": 3.233857504736922e-05, "loss": 0.2515, "step": 5646000 }, { "epoch": 3.39, "learning_rate": 3.233647508180866e-05, "loss": 0.2563, "step": 5646500 }, { "epoch": 3.39, "learning_rate": 3.2334375116248094e-05, "loss": 0.2546, "step": 5647000 }, { "epoch": 3.39, "learning_rate": 3.233227935061865e-05, "loss": 0.2517, "step": 5647500 }, { "epoch": 3.39, "learning_rate": 3.233018358498921e-05, "loss": 0.2531, "step": 5648000 }, { "epoch": 3.39, "learning_rate": 3.2328083619428635e-05, "loss": 0.2617, "step": 5648500 }, { "epoch": 3.39, "learning_rate": 3.2325983653868075e-05, "loss": 0.2569, "step": 5649000 }, { "epoch": 3.39, "learning_rate": 3.232388368830751e-05, "loss": 0.2524, "step": 5649500 }, { "epoch": 3.39, "learning_rate": 3.232178372274694e-05, "loss": 0.2438, "step": 5650000 }, { "epoch": 3.39, "learning_rate": 3.231968375718638e-05, "loss": 0.254, "step": 5650500 }, { "epoch": 3.39, "learning_rate": 3.2317583791625816e-05, "loss": 0.2522, "step": 5651000 }, { "epoch": 3.39, "learning_rate": 3.2315483826065256e-05, "loss": 0.2522, "step": 5651500 }, { "epoch": 3.39, "learning_rate": 3.231339226036693e-05, "loss": 0.2586, "step": 5652000 }, { "epoch": 3.39, "learning_rate": 3.231129229480637e-05, "loss": 0.2497, "step": 5652500 }, { "epoch": 3.39, "learning_rate": 3.23091923292458e-05, "loss": 0.2436, "step": 5653000 }, { "epoch": 3.39, "learning_rate": 3.230709236368523e-05, "loss": 0.2547, "step": 5653500 }, { "epoch": 3.39, "learning_rate": 3.230499239812467e-05, "loss": 0.2542, "step": 5654000 }, { "epoch": 3.39, "learning_rate": 3.2302892432564104e-05, "loss": 0.2445, "step": 5654500 }, { "epoch": 3.39, "learning_rate": 3.230079246700354e-05, "loss": 0.2599, "step": 5655000 }, { "epoch": 3.39, "learning_rate": 3.229869250144298e-05, "loss": 0.2583, "step": 5655500 }, { "epoch": 3.39, "learning_rate": 3.229659673581353e-05, "loss": 0.2493, "step": 5656000 }, { "epoch": 3.39, "learning_rate": 3.2294496770252964e-05, "loss": 0.2617, "step": 5656500 }, { "epoch": 3.39, "learning_rate": 3.2292396804692405e-05, "loss": 0.2491, "step": 5657000 }, { "epoch": 3.39, "learning_rate": 3.2290301039062965e-05, "loss": 0.2561, "step": 5657500 }, { "epoch": 3.39, "learning_rate": 3.228820107350239e-05, "loss": 0.2534, "step": 5658000 }, { "epoch": 3.39, "learning_rate": 3.2286101107941825e-05, "loss": 0.2515, "step": 5658500 }, { "epoch": 3.39, "learning_rate": 3.2284001142381265e-05, "loss": 0.2497, "step": 5659000 }, { "epoch": 3.39, "learning_rate": 3.22819011768207e-05, "loss": 0.257, "step": 5659500 }, { "epoch": 3.39, "learning_rate": 3.227980121126013e-05, "loss": 0.2559, "step": 5660000 }, { "epoch": 3.39, "learning_rate": 3.2277705445630686e-05, "loss": 0.2513, "step": 5660500 }, { "epoch": 3.39, "learning_rate": 3.2275605480070126e-05, "loss": 0.2579, "step": 5661000 }, { "epoch": 3.39, "learning_rate": 3.227350551450956e-05, "loss": 0.258, "step": 5661500 }, { "epoch": 3.39, "learning_rate": 3.227140554894899e-05, "loss": 0.2571, "step": 5662000 }, { "epoch": 3.39, "learning_rate": 3.226930978331955e-05, "loss": 0.2479, "step": 5662500 }, { "epoch": 3.4, "learning_rate": 3.226720981775899e-05, "loss": 0.247, "step": 5663000 }, { "epoch": 3.4, "learning_rate": 3.226510985219842e-05, "loss": 0.2564, "step": 5663500 }, { "epoch": 3.4, "learning_rate": 3.226300988663786e-05, "loss": 0.2557, "step": 5664000 }, { "epoch": 3.4, "learning_rate": 3.2260909921077294e-05, "loss": 0.2514, "step": 5664500 }, { "epoch": 3.4, "learning_rate": 3.225880995551673e-05, "loss": 0.2557, "step": 5665000 }, { "epoch": 3.4, "learning_rate": 3.225670998995617e-05, "loss": 0.253, "step": 5665500 }, { "epoch": 3.4, "learning_rate": 3.22546100243956e-05, "loss": 0.2539, "step": 5666000 }, { "epoch": 3.4, "learning_rate": 3.2252510058835035e-05, "loss": 0.2526, "step": 5666500 }, { "epoch": 3.4, "learning_rate": 3.2250410093274475e-05, "loss": 0.2547, "step": 5667000 }, { "epoch": 3.4, "learning_rate": 3.224831012771391e-05, "loss": 0.2526, "step": 5667500 }, { "epoch": 3.4, "learning_rate": 3.224621016215334e-05, "loss": 0.2522, "step": 5668000 }, { "epoch": 3.4, "learning_rate": 3.2244114396523895e-05, "loss": 0.2572, "step": 5668500 }, { "epoch": 3.4, "learning_rate": 3.2242014430963335e-05, "loss": 0.2546, "step": 5669000 }, { "epoch": 3.4, "learning_rate": 3.223991446540277e-05, "loss": 0.2538, "step": 5669500 }, { "epoch": 3.4, "learning_rate": 3.22378144998422e-05, "loss": 0.2513, "step": 5670000 }, { "epoch": 3.4, "learning_rate": 3.2235718734212756e-05, "loss": 0.2496, "step": 5670500 }, { "epoch": 3.4, "learning_rate": 3.2233618768652196e-05, "loss": 0.2495, "step": 5671000 }, { "epoch": 3.4, "learning_rate": 3.223151880309163e-05, "loss": 0.2493, "step": 5671500 }, { "epoch": 3.4, "learning_rate": 3.222941883753106e-05, "loss": 0.2528, "step": 5672000 }, { "epoch": 3.4, "learning_rate": 3.2227323071901623e-05, "loss": 0.2549, "step": 5672500 }, { "epoch": 3.4, "learning_rate": 3.222522310634106e-05, "loss": 0.2524, "step": 5673000 }, { "epoch": 3.4, "learning_rate": 3.222312314078049e-05, "loss": 0.2572, "step": 5673500 }, { "epoch": 3.4, "learning_rate": 3.222102317521993e-05, "loss": 0.2507, "step": 5674000 }, { "epoch": 3.4, "learning_rate": 3.2218927409590484e-05, "loss": 0.2566, "step": 5674500 }, { "epoch": 3.4, "learning_rate": 3.221682744402992e-05, "loss": 0.251, "step": 5675000 }, { "epoch": 3.4, "learning_rate": 3.221472747846935e-05, "loss": 0.2505, "step": 5675500 }, { "epoch": 3.4, "learning_rate": 3.221262751290879e-05, "loss": 0.2568, "step": 5676000 }, { "epoch": 3.4, "learning_rate": 3.2210531747279345e-05, "loss": 0.2528, "step": 5676500 }, { "epoch": 3.4, "learning_rate": 3.220843178171878e-05, "loss": 0.254, "step": 5677000 }, { "epoch": 3.4, "learning_rate": 3.220633181615821e-05, "loss": 0.2546, "step": 5677500 }, { "epoch": 3.4, "learning_rate": 3.220423185059765e-05, "loss": 0.256, "step": 5678000 }, { "epoch": 3.4, "learning_rate": 3.2202136084968206e-05, "loss": 0.2564, "step": 5678500 }, { "epoch": 3.4, "learning_rate": 3.2200040319338766e-05, "loss": 0.2556, "step": 5679000 }, { "epoch": 3.41, "learning_rate": 3.219794035377819e-05, "loss": 0.2572, "step": 5679500 }, { "epoch": 3.41, "learning_rate": 3.219584038821763e-05, "loss": 0.2591, "step": 5680000 }, { "epoch": 3.41, "learning_rate": 3.2193740422657066e-05, "loss": 0.2511, "step": 5680500 }, { "epoch": 3.41, "learning_rate": 3.21916404570965e-05, "loss": 0.2523, "step": 5681000 }, { "epoch": 3.41, "learning_rate": 3.218954049153594e-05, "loss": 0.2498, "step": 5681500 }, { "epoch": 3.41, "learning_rate": 3.2187440525975374e-05, "loss": 0.2477, "step": 5682000 }, { "epoch": 3.41, "learning_rate": 3.218534056041481e-05, "loss": 0.2535, "step": 5682500 }, { "epoch": 3.41, "learning_rate": 3.218324059485425e-05, "loss": 0.2545, "step": 5683000 }, { "epoch": 3.41, "learning_rate": 3.21811448292248e-05, "loss": 0.2541, "step": 5683500 }, { "epoch": 3.41, "learning_rate": 3.2179044863664234e-05, "loss": 0.2563, "step": 5684000 }, { "epoch": 3.41, "learning_rate": 3.217694489810367e-05, "loss": 0.2543, "step": 5684500 }, { "epoch": 3.41, "learning_rate": 3.217484913247423e-05, "loss": 0.2507, "step": 5685000 }, { "epoch": 3.41, "learning_rate": 3.217274916691366e-05, "loss": 0.2527, "step": 5685500 }, { "epoch": 3.41, "learning_rate": 3.2170649201353095e-05, "loss": 0.2577, "step": 5686000 }, { "epoch": 3.41, "learning_rate": 3.2168549235792535e-05, "loss": 0.2555, "step": 5686500 }, { "epoch": 3.41, "learning_rate": 3.216644927023197e-05, "loss": 0.256, "step": 5687000 }, { "epoch": 3.41, "learning_rate": 3.21643493046714e-05, "loss": 0.2438, "step": 5687500 }, { "epoch": 3.41, "learning_rate": 3.216224933911084e-05, "loss": 0.2582, "step": 5688000 }, { "epoch": 3.41, "learning_rate": 3.2160149373550276e-05, "loss": 0.2462, "step": 5688500 }, { "epoch": 3.41, "learning_rate": 3.215805360792083e-05, "loss": 0.2544, "step": 5689000 }, { "epoch": 3.41, "learning_rate": 3.215595364236026e-05, "loss": 0.2506, "step": 5689500 }, { "epoch": 3.41, "learning_rate": 3.2153857876730816e-05, "loss": 0.2584, "step": 5690000 }, { "epoch": 3.41, "learning_rate": 3.215175791117026e-05, "loss": 0.2501, "step": 5690500 }, { "epoch": 3.41, "learning_rate": 3.214965794560969e-05, "loss": 0.2521, "step": 5691000 }, { "epoch": 3.41, "learning_rate": 3.2147557980049124e-05, "loss": 0.2505, "step": 5691500 }, { "epoch": 3.41, "learning_rate": 3.2145458014488564e-05, "loss": 0.2532, "step": 5692000 }, { "epoch": 3.41, "learning_rate": 3.2143358048928e-05, "loss": 0.2511, "step": 5692500 }, { "epoch": 3.41, "learning_rate": 3.214125808336743e-05, "loss": 0.2538, "step": 5693000 }, { "epoch": 3.41, "learning_rate": 3.213916231773799e-05, "loss": 0.2559, "step": 5693500 }, { "epoch": 3.41, "learning_rate": 3.2137062352177425e-05, "loss": 0.2499, "step": 5694000 }, { "epoch": 3.41, "learning_rate": 3.213496238661686e-05, "loss": 0.2575, "step": 5694500 }, { "epoch": 3.41, "learning_rate": 3.21328624210563e-05, "loss": 0.2539, "step": 5695000 }, { "epoch": 3.41, "learning_rate": 3.213076245549573e-05, "loss": 0.247, "step": 5695500 }, { "epoch": 3.41, "learning_rate": 3.2128662489935165e-05, "loss": 0.2517, "step": 5696000 }, { "epoch": 3.42, "learning_rate": 3.2126562524374605e-05, "loss": 0.2526, "step": 5696500 }, { "epoch": 3.42, "learning_rate": 3.212446255881403e-05, "loss": 0.2553, "step": 5697000 }, { "epoch": 3.42, "learning_rate": 3.212236679318459e-05, "loss": 0.2523, "step": 5697500 }, { "epoch": 3.42, "learning_rate": 3.2120271027555146e-05, "loss": 0.2539, "step": 5698000 }, { "epoch": 3.42, "learning_rate": 3.211817106199458e-05, "loss": 0.2585, "step": 5698500 }, { "epoch": 3.42, "learning_rate": 3.211607109643402e-05, "loss": 0.2561, "step": 5699000 }, { "epoch": 3.42, "learning_rate": 3.211397113087345e-05, "loss": 0.2514, "step": 5699500 }, { "epoch": 3.42, "learning_rate": 3.211187116531289e-05, "loss": 0.2485, "step": 5700000 }, { "epoch": 3.42, "eval_loss": 0.2346460372209549, "eval_runtime": 1457.6782, "eval_samples_per_second": 361.342, "eval_steps_per_second": 60.224, "step": 5700000 }, { "epoch": 3.42, "learning_rate": 3.210977119975233e-05, "loss": 0.2563, "step": 5700500 }, { "epoch": 3.42, "learning_rate": 3.210767123419176e-05, "loss": 0.247, "step": 5701000 }, { "epoch": 3.42, "learning_rate": 3.2105571268631194e-05, "loss": 0.2589, "step": 5701500 }, { "epoch": 3.42, "learning_rate": 3.2103475503001754e-05, "loss": 0.2505, "step": 5702000 }, { "epoch": 3.42, "learning_rate": 3.210137553744119e-05, "loss": 0.251, "step": 5702500 }, { "epoch": 3.42, "learning_rate": 3.209927557188062e-05, "loss": 0.2523, "step": 5703000 }, { "epoch": 3.42, "learning_rate": 3.209717560632006e-05, "loss": 0.2495, "step": 5703500 }, { "epoch": 3.42, "learning_rate": 3.2095079840690615e-05, "loss": 0.2518, "step": 5704000 }, { "epoch": 3.42, "learning_rate": 3.209298407506117e-05, "loss": 0.2524, "step": 5704500 }, { "epoch": 3.42, "learning_rate": 3.20908841095006e-05, "loss": 0.2491, "step": 5705000 }, { "epoch": 3.42, "learning_rate": 3.2088784143940035e-05, "loss": 0.2536, "step": 5705500 }, { "epoch": 3.42, "learning_rate": 3.2086684178379476e-05, "loss": 0.2498, "step": 5706000 }, { "epoch": 3.42, "learning_rate": 3.208458421281891e-05, "loss": 0.2596, "step": 5706500 }, { "epoch": 3.42, "learning_rate": 3.208248424725834e-05, "loss": 0.2552, "step": 5707000 }, { "epoch": 3.42, "learning_rate": 3.208038428169778e-05, "loss": 0.2498, "step": 5707500 }, { "epoch": 3.42, "learning_rate": 3.2078284316137216e-05, "loss": 0.2503, "step": 5708000 }, { "epoch": 3.42, "learning_rate": 3.207618855050777e-05, "loss": 0.249, "step": 5708500 }, { "epoch": 3.42, "learning_rate": 3.207408858494721e-05, "loss": 0.2501, "step": 5709000 }, { "epoch": 3.42, "learning_rate": 3.2071988619386643e-05, "loss": 0.253, "step": 5709500 }, { "epoch": 3.42, "learning_rate": 3.206988865382608e-05, "loss": 0.2529, "step": 5710000 }, { "epoch": 3.42, "learning_rate": 3.206779288819663e-05, "loss": 0.2535, "step": 5710500 }, { "epoch": 3.42, "learning_rate": 3.206569292263607e-05, "loss": 0.2566, "step": 5711000 }, { "epoch": 3.42, "learning_rate": 3.2063592957075504e-05, "loss": 0.2512, "step": 5711500 }, { "epoch": 3.42, "learning_rate": 3.206149299151494e-05, "loss": 0.2522, "step": 5712000 }, { "epoch": 3.42, "learning_rate": 3.205939722588549e-05, "loss": 0.2527, "step": 5712500 }, { "epoch": 3.43, "learning_rate": 3.205729726032493e-05, "loss": 0.2536, "step": 5713000 }, { "epoch": 3.43, "learning_rate": 3.2055197294764365e-05, "loss": 0.2485, "step": 5713500 }, { "epoch": 3.43, "learning_rate": 3.20530973292038e-05, "loss": 0.2545, "step": 5714000 }, { "epoch": 3.43, "learning_rate": 3.205100156357436e-05, "loss": 0.2579, "step": 5714500 }, { "epoch": 3.43, "learning_rate": 3.204890159801379e-05, "loss": 0.255, "step": 5715000 }, { "epoch": 3.43, "learning_rate": 3.2046805832384346e-05, "loss": 0.2548, "step": 5715500 }, { "epoch": 3.43, "learning_rate": 3.204470586682378e-05, "loss": 0.2549, "step": 5716000 }, { "epoch": 3.43, "learning_rate": 3.204260590126322e-05, "loss": 0.2526, "step": 5716500 }, { "epoch": 3.43, "learning_rate": 3.204050593570265e-05, "loss": 0.2548, "step": 5717000 }, { "epoch": 3.43, "learning_rate": 3.2038405970142086e-05, "loss": 0.2524, "step": 5717500 }, { "epoch": 3.43, "learning_rate": 3.203630600458153e-05, "loss": 0.2448, "step": 5718000 }, { "epoch": 3.43, "learning_rate": 3.203420603902096e-05, "loss": 0.2446, "step": 5718500 }, { "epoch": 3.43, "learning_rate": 3.2032106073460394e-05, "loss": 0.2562, "step": 5719000 }, { "epoch": 3.43, "learning_rate": 3.203001030783095e-05, "loss": 0.2534, "step": 5719500 }, { "epoch": 3.43, "learning_rate": 3.202791034227039e-05, "loss": 0.2493, "step": 5720000 }, { "epoch": 3.43, "learning_rate": 3.202581037670982e-05, "loss": 0.2512, "step": 5720500 }, { "epoch": 3.43, "learning_rate": 3.2023710411149254e-05, "loss": 0.2518, "step": 5721000 }, { "epoch": 3.43, "learning_rate": 3.2021614645519815e-05, "loss": 0.2556, "step": 5721500 }, { "epoch": 3.43, "learning_rate": 3.201951467995925e-05, "loss": 0.2514, "step": 5722000 }, { "epoch": 3.43, "learning_rate": 3.201741471439868e-05, "loss": 0.2504, "step": 5722500 }, { "epoch": 3.43, "learning_rate": 3.201531474883812e-05, "loss": 0.2454, "step": 5723000 }, { "epoch": 3.43, "learning_rate": 3.2013218983208675e-05, "loss": 0.2518, "step": 5723500 }, { "epoch": 3.43, "learning_rate": 3.201111901764811e-05, "loss": 0.2512, "step": 5724000 }, { "epoch": 3.43, "learning_rate": 3.200901905208754e-05, "loss": 0.2458, "step": 5724500 }, { "epoch": 3.43, "learning_rate": 3.200691908652698e-05, "loss": 0.246, "step": 5725000 }, { "epoch": 3.43, "learning_rate": 3.2004827520828656e-05, "loss": 0.258, "step": 5725500 }, { "epoch": 3.43, "learning_rate": 3.2002727555268096e-05, "loss": 0.2596, "step": 5726000 }, { "epoch": 3.43, "learning_rate": 3.200062758970753e-05, "loss": 0.2515, "step": 5726500 }, { "epoch": 3.43, "learning_rate": 3.199852762414696e-05, "loss": 0.2517, "step": 5727000 }, { "epoch": 3.43, "learning_rate": 3.19964276585864e-05, "loss": 0.2491, "step": 5727500 }, { "epoch": 3.43, "learning_rate": 3.199432769302583e-05, "loss": 0.251, "step": 5728000 }, { "epoch": 3.43, "learning_rate": 3.199222772746527e-05, "loss": 0.2482, "step": 5728500 }, { "epoch": 3.43, "learning_rate": 3.1990127761904704e-05, "loss": 0.2538, "step": 5729000 }, { "epoch": 3.44, "learning_rate": 3.198803199627526e-05, "loss": 0.2496, "step": 5729500 }, { "epoch": 3.44, "learning_rate": 3.198593203071469e-05, "loss": 0.2538, "step": 5730000 }, { "epoch": 3.44, "learning_rate": 3.198383206515413e-05, "loss": 0.2527, "step": 5730500 }, { "epoch": 3.44, "learning_rate": 3.198173629952469e-05, "loss": 0.2544, "step": 5731000 }, { "epoch": 3.44, "learning_rate": 3.1979636333964125e-05, "loss": 0.2528, "step": 5731500 }, { "epoch": 3.44, "learning_rate": 3.197753636840355e-05, "loss": 0.2544, "step": 5732000 }, { "epoch": 3.44, "learning_rate": 3.197543640284299e-05, "loss": 0.2473, "step": 5732500 }, { "epoch": 3.44, "learning_rate": 3.1973336437282425e-05, "loss": 0.2529, "step": 5733000 }, { "epoch": 3.44, "learning_rate": 3.197123647172186e-05, "loss": 0.2526, "step": 5733500 }, { "epoch": 3.44, "learning_rate": 3.19691365061613e-05, "loss": 0.2528, "step": 5734000 }, { "epoch": 3.44, "learning_rate": 3.196703654060073e-05, "loss": 0.2543, "step": 5734500 }, { "epoch": 3.44, "learning_rate": 3.196494497490241e-05, "loss": 0.2547, "step": 5735000 }, { "epoch": 3.44, "learning_rate": 3.1962845009341847e-05, "loss": 0.2507, "step": 5735500 }, { "epoch": 3.44, "learning_rate": 3.196074504378129e-05, "loss": 0.255, "step": 5736000 }, { "epoch": 3.44, "learning_rate": 3.1958645078220713e-05, "loss": 0.252, "step": 5736500 }, { "epoch": 3.44, "learning_rate": 3.195654511266015e-05, "loss": 0.2511, "step": 5737000 }, { "epoch": 3.44, "learning_rate": 3.195444514709959e-05, "loss": 0.2518, "step": 5737500 }, { "epoch": 3.44, "learning_rate": 3.195234518153902e-05, "loss": 0.2515, "step": 5738000 }, { "epoch": 3.44, "learning_rate": 3.1950245215978454e-05, "loss": 0.2549, "step": 5738500 }, { "epoch": 3.44, "learning_rate": 3.194814945034901e-05, "loss": 0.2545, "step": 5739000 }, { "epoch": 3.44, "learning_rate": 3.194604948478845e-05, "loss": 0.2551, "step": 5739500 }, { "epoch": 3.44, "learning_rate": 3.194394951922788e-05, "loss": 0.2557, "step": 5740000 }, { "epoch": 3.44, "learning_rate": 3.1941849553667315e-05, "loss": 0.2517, "step": 5740500 }, { "epoch": 3.44, "learning_rate": 3.1939749588106755e-05, "loss": 0.2506, "step": 5741000 }, { "epoch": 3.44, "learning_rate": 3.193764962254619e-05, "loss": 0.2479, "step": 5741500 }, { "epoch": 3.44, "learning_rate": 3.193554965698563e-05, "loss": 0.2486, "step": 5742000 }, { "epoch": 3.44, "learning_rate": 3.193344969142506e-05, "loss": 0.2494, "step": 5742500 }, { "epoch": 3.44, "learning_rate": 3.193135812572674e-05, "loss": 0.2571, "step": 5743000 }, { "epoch": 3.44, "learning_rate": 3.1929258160166176e-05, "loss": 0.2526, "step": 5743500 }, { "epoch": 3.44, "learning_rate": 3.19271581946056e-05, "loss": 0.2536, "step": 5744000 }, { "epoch": 3.44, "learning_rate": 3.192505822904504e-05, "loss": 0.2532, "step": 5744500 }, { "epoch": 3.44, "learning_rate": 3.1922958263484476e-05, "loss": 0.2536, "step": 5745000 }, { "epoch": 3.44, "learning_rate": 3.192085829792391e-05, "loss": 0.2571, "step": 5745500 }, { "epoch": 3.44, "learning_rate": 3.191876673222559e-05, "loss": 0.2496, "step": 5746000 }, { "epoch": 3.45, "learning_rate": 3.1916666766665024e-05, "loss": 0.2521, "step": 5746500 }, { "epoch": 3.45, "learning_rate": 3.1914566801104464e-05, "loss": 0.2519, "step": 5747000 }, { "epoch": 3.45, "learning_rate": 3.19124668355439e-05, "loss": 0.2454, "step": 5747500 }, { "epoch": 3.45, "learning_rate": 3.191036686998334e-05, "loss": 0.2554, "step": 5748000 }, { "epoch": 3.45, "learning_rate": 3.1908266904422764e-05, "loss": 0.2564, "step": 5748500 }, { "epoch": 3.45, "learning_rate": 3.19061669388622e-05, "loss": 0.2546, "step": 5749000 }, { "epoch": 3.45, "learning_rate": 3.190406697330164e-05, "loss": 0.251, "step": 5749500 }, { "epoch": 3.45, "learning_rate": 3.190196700774107e-05, "loss": 0.2532, "step": 5750000 }, { "epoch": 3.45, "learning_rate": 3.1899867042180505e-05, "loss": 0.2488, "step": 5750500 }, { "epoch": 3.45, "learning_rate": 3.1897767076619945e-05, "loss": 0.2508, "step": 5751000 }, { "epoch": 3.45, "learning_rate": 3.18956713109905e-05, "loss": 0.2496, "step": 5751500 }, { "epoch": 3.45, "learning_rate": 3.189357134542993e-05, "loss": 0.2541, "step": 5752000 }, { "epoch": 3.45, "learning_rate": 3.1891471379869366e-05, "loss": 0.2597, "step": 5752500 }, { "epoch": 3.45, "learning_rate": 3.1889371414308806e-05, "loss": 0.2589, "step": 5753000 }, { "epoch": 3.45, "learning_rate": 3.188727144874824e-05, "loss": 0.2579, "step": 5753500 }, { "epoch": 3.45, "learning_rate": 3.188517568311879e-05, "loss": 0.2525, "step": 5754000 }, { "epoch": 3.45, "learning_rate": 3.188307571755823e-05, "loss": 0.2547, "step": 5754500 }, { "epoch": 3.45, "learning_rate": 3.188097575199767e-05, "loss": 0.2552, "step": 5755000 }, { "epoch": 3.45, "learning_rate": 3.18788757864371e-05, "loss": 0.2504, "step": 5755500 }, { "epoch": 3.45, "learning_rate": 3.187677582087654e-05, "loss": 0.2456, "step": 5756000 }, { "epoch": 3.45, "learning_rate": 3.1874675855315974e-05, "loss": 0.2488, "step": 5756500 }, { "epoch": 3.45, "learning_rate": 3.187257588975541e-05, "loss": 0.2543, "step": 5757000 }, { "epoch": 3.45, "learning_rate": 3.187047592419485e-05, "loss": 0.2555, "step": 5757500 }, { "epoch": 3.45, "learning_rate": 3.186837595863428e-05, "loss": 0.2501, "step": 5758000 }, { "epoch": 3.45, "learning_rate": 3.1866280193004835e-05, "loss": 0.2473, "step": 5758500 }, { "epoch": 3.45, "learning_rate": 3.186418022744427e-05, "loss": 0.2557, "step": 5759000 }, { "epoch": 3.45, "learning_rate": 3.186208026188371e-05, "loss": 0.2543, "step": 5759500 }, { "epoch": 3.45, "learning_rate": 3.185998029632314e-05, "loss": 0.2511, "step": 5760000 }, { "epoch": 3.45, "learning_rate": 3.1857884530693695e-05, "loss": 0.251, "step": 5760500 }, { "epoch": 3.45, "learning_rate": 3.185578456513313e-05, "loss": 0.2529, "step": 5761000 }, { "epoch": 3.45, "learning_rate": 3.185368459957257e-05, "loss": 0.2523, "step": 5761500 }, { "epoch": 3.45, "learning_rate": 3.1851584634012e-05, "loss": 0.2534, "step": 5762000 }, { "epoch": 3.45, "learning_rate": 3.1849488868382556e-05, "loss": 0.2511, "step": 5762500 }, { "epoch": 3.46, "learning_rate": 3.184739310275311e-05, "loss": 0.2579, "step": 5763000 }, { "epoch": 3.46, "learning_rate": 3.184529313719255e-05, "loss": 0.2568, "step": 5763500 }, { "epoch": 3.46, "learning_rate": 3.184319317163198e-05, "loss": 0.2608, "step": 5764000 }, { "epoch": 3.46, "learning_rate": 3.184109320607142e-05, "loss": 0.2608, "step": 5764500 }, { "epoch": 3.46, "learning_rate": 3.183899324051086e-05, "loss": 0.2498, "step": 5765000 }, { "epoch": 3.46, "learning_rate": 3.183689327495029e-05, "loss": 0.251, "step": 5765500 }, { "epoch": 3.46, "learning_rate": 3.1834797509320844e-05, "loss": 0.2566, "step": 5766000 }, { "epoch": 3.46, "learning_rate": 3.183269754376028e-05, "loss": 0.2533, "step": 5766500 }, { "epoch": 3.46, "learning_rate": 3.183059757819972e-05, "loss": 0.2558, "step": 5767000 }, { "epoch": 3.46, "learning_rate": 3.182849761263915e-05, "loss": 0.2533, "step": 5767500 }, { "epoch": 3.46, "learning_rate": 3.1826397647078585e-05, "loss": 0.254, "step": 5768000 }, { "epoch": 3.46, "learning_rate": 3.1824297681518025e-05, "loss": 0.2433, "step": 5768500 }, { "epoch": 3.46, "learning_rate": 3.182220191588858e-05, "loss": 0.2549, "step": 5769000 }, { "epoch": 3.46, "learning_rate": 3.182010195032801e-05, "loss": 0.2512, "step": 5769500 }, { "epoch": 3.46, "learning_rate": 3.181800198476745e-05, "loss": 0.2471, "step": 5770000 }, { "epoch": 3.46, "learning_rate": 3.1815902019206886e-05, "loss": 0.2537, "step": 5770500 }, { "epoch": 3.46, "learning_rate": 3.181380205364632e-05, "loss": 0.2453, "step": 5771000 }, { "epoch": 3.46, "learning_rate": 3.181170208808576e-05, "loss": 0.2512, "step": 5771500 }, { "epoch": 3.46, "learning_rate": 3.180960212252519e-05, "loss": 0.2569, "step": 5772000 }, { "epoch": 3.46, "learning_rate": 3.1807502156964626e-05, "loss": 0.2561, "step": 5772500 }, { "epoch": 3.46, "learning_rate": 3.180540639133518e-05, "loss": 0.2544, "step": 5773000 }, { "epoch": 3.46, "learning_rate": 3.180330642577462e-05, "loss": 0.2525, "step": 5773500 }, { "epoch": 3.46, "learning_rate": 3.1801206460214054e-05, "loss": 0.2518, "step": 5774000 }, { "epoch": 3.46, "learning_rate": 3.179910649465349e-05, "loss": 0.2523, "step": 5774500 }, { "epoch": 3.46, "learning_rate": 3.179701072902404e-05, "loss": 0.2546, "step": 5775000 }, { "epoch": 3.46, "learning_rate": 3.179491076346348e-05, "loss": 0.2493, "step": 5775500 }, { "epoch": 3.46, "learning_rate": 3.1792810797902914e-05, "loss": 0.2569, "step": 5776000 }, { "epoch": 3.46, "learning_rate": 3.179071083234235e-05, "loss": 0.2552, "step": 5776500 }, { "epoch": 3.46, "learning_rate": 3.178861506671291e-05, "loss": 0.2499, "step": 5777000 }, { "epoch": 3.46, "learning_rate": 3.178651510115234e-05, "loss": 0.2532, "step": 5777500 }, { "epoch": 3.46, "learning_rate": 3.1784415135591775e-05, "loss": 0.254, "step": 5778000 }, { "epoch": 3.46, "learning_rate": 3.1782315170031215e-05, "loss": 0.2507, "step": 5778500 }, { "epoch": 3.46, "learning_rate": 3.178021940440177e-05, "loss": 0.2557, "step": 5779000 }, { "epoch": 3.47, "learning_rate": 3.17781194388412e-05, "loss": 0.2558, "step": 5779500 }, { "epoch": 3.47, "learning_rate": 3.1776019473280636e-05, "loss": 0.2551, "step": 5780000 }, { "epoch": 3.47, "learning_rate": 3.177392370765119e-05, "loss": 0.2539, "step": 5780500 }, { "epoch": 3.47, "learning_rate": 3.177182374209063e-05, "loss": 0.2487, "step": 5781000 }, { "epoch": 3.47, "learning_rate": 3.176972797646119e-05, "loss": 0.2493, "step": 5781500 }, { "epoch": 3.47, "learning_rate": 3.1767628010900617e-05, "loss": 0.2536, "step": 5782000 }, { "epoch": 3.47, "learning_rate": 3.176552804534006e-05, "loss": 0.2519, "step": 5782500 }, { "epoch": 3.47, "learning_rate": 3.176342807977949e-05, "loss": 0.2516, "step": 5783000 }, { "epoch": 3.47, "learning_rate": 3.1761328114218924e-05, "loss": 0.2535, "step": 5783500 }, { "epoch": 3.47, "learning_rate": 3.1759228148658364e-05, "loss": 0.2523, "step": 5784000 }, { "epoch": 3.47, "learning_rate": 3.17571281830978e-05, "loss": 0.2558, "step": 5784500 }, { "epoch": 3.47, "learning_rate": 3.175502821753723e-05, "loss": 0.2517, "step": 5785000 }, { "epoch": 3.47, "learning_rate": 3.175292825197667e-05, "loss": 0.254, "step": 5785500 }, { "epoch": 3.47, "learning_rate": 3.1750828286416105e-05, "loss": 0.2538, "step": 5786000 }, { "epoch": 3.47, "learning_rate": 3.174872832085554e-05, "loss": 0.2564, "step": 5786500 }, { "epoch": 3.47, "learning_rate": 3.174662835529498e-05, "loss": 0.257, "step": 5787000 }, { "epoch": 3.47, "learning_rate": 3.174453258966553e-05, "loss": 0.2502, "step": 5787500 }, { "epoch": 3.47, "learning_rate": 3.1742432624104965e-05, "loss": 0.2508, "step": 5788000 }, { "epoch": 3.47, "learning_rate": 3.17403326585444e-05, "loss": 0.2508, "step": 5788500 }, { "epoch": 3.47, "learning_rate": 3.173823269298384e-05, "loss": 0.2551, "step": 5789000 }, { "epoch": 3.47, "learning_rate": 3.173613692735439e-05, "loss": 0.255, "step": 5789500 }, { "epoch": 3.47, "learning_rate": 3.1734036961793826e-05, "loss": 0.252, "step": 5790000 }, { "epoch": 3.47, "learning_rate": 3.173193699623326e-05, "loss": 0.2522, "step": 5790500 }, { "epoch": 3.47, "learning_rate": 3.17298370306727e-05, "loss": 0.2477, "step": 5791000 }, { "epoch": 3.47, "learning_rate": 3.172774126504325e-05, "loss": 0.2565, "step": 5791500 }, { "epoch": 3.47, "learning_rate": 3.172564129948269e-05, "loss": 0.259, "step": 5792000 }, { "epoch": 3.47, "learning_rate": 3.172354133392213e-05, "loss": 0.2484, "step": 5792500 }, { "epoch": 3.47, "learning_rate": 3.172144136836156e-05, "loss": 0.2513, "step": 5793000 }, { "epoch": 3.47, "learning_rate": 3.1719345602732114e-05, "loss": 0.2557, "step": 5793500 }, { "epoch": 3.47, "learning_rate": 3.171724563717155e-05, "loss": 0.2538, "step": 5794000 }, { "epoch": 3.47, "learning_rate": 3.171514567161099e-05, "loss": 0.2538, "step": 5794500 }, { "epoch": 3.47, "learning_rate": 3.171304570605042e-05, "loss": 0.251, "step": 5795000 }, { "epoch": 3.47, "learning_rate": 3.1710949940420975e-05, "loss": 0.2552, "step": 5795500 }, { "epoch": 3.47, "learning_rate": 3.170884997486041e-05, "loss": 0.2553, "step": 5796000 }, { "epoch": 3.48, "learning_rate": 3.170675000929985e-05, "loss": 0.2539, "step": 5796500 }, { "epoch": 3.48, "learning_rate": 3.170465004373928e-05, "loss": 0.2492, "step": 5797000 }, { "epoch": 3.48, "learning_rate": 3.1702554278109835e-05, "loss": 0.2535, "step": 5797500 }, { "epoch": 3.48, "learning_rate": 3.1700454312549276e-05, "loss": 0.2561, "step": 5798000 }, { "epoch": 3.48, "learning_rate": 3.169835434698871e-05, "loss": 0.2535, "step": 5798500 }, { "epoch": 3.48, "learning_rate": 3.169625858135926e-05, "loss": 0.2439, "step": 5799000 }, { "epoch": 3.48, "learning_rate": 3.1694158615798696e-05, "loss": 0.254, "step": 5799500 }, { "epoch": 3.48, "learning_rate": 3.1692058650238136e-05, "loss": 0.2532, "step": 5800000 }, { "epoch": 3.48, "eval_loss": 0.2328924834728241, "eval_runtime": 1461.3477, "eval_samples_per_second": 360.434, "eval_steps_per_second": 60.073, "step": 5800000 }, { "epoch": 3.48, "learning_rate": 3.168995868467757e-05, "loss": 0.2516, "step": 5800500 }, { "epoch": 3.48, "learning_rate": 3.1687858719117e-05, "loss": 0.2539, "step": 5801000 }, { "epoch": 3.48, "learning_rate": 3.1685758753556444e-05, "loss": 0.2489, "step": 5801500 }, { "epoch": 3.48, "learning_rate": 3.168365878799588e-05, "loss": 0.2519, "step": 5802000 }, { "epoch": 3.48, "learning_rate": 3.168155882243531e-05, "loss": 0.2542, "step": 5802500 }, { "epoch": 3.48, "learning_rate": 3.1679463056805864e-05, "loss": 0.253, "step": 5803000 }, { "epoch": 3.48, "learning_rate": 3.1677363091245304e-05, "loss": 0.2529, "step": 5803500 }, { "epoch": 3.48, "learning_rate": 3.167526312568474e-05, "loss": 0.2471, "step": 5804000 }, { "epoch": 3.48, "learning_rate": 3.167316316012417e-05, "loss": 0.2485, "step": 5804500 }, { "epoch": 3.48, "learning_rate": 3.167106739449473e-05, "loss": 0.2475, "step": 5805000 }, { "epoch": 3.48, "learning_rate": 3.1668967428934165e-05, "loss": 0.2475, "step": 5805500 }, { "epoch": 3.48, "learning_rate": 3.16668674633736e-05, "loss": 0.2478, "step": 5806000 }, { "epoch": 3.48, "learning_rate": 3.166476749781304e-05, "loss": 0.2449, "step": 5806500 }, { "epoch": 3.48, "learning_rate": 3.166267173218359e-05, "loss": 0.2554, "step": 5807000 }, { "epoch": 3.48, "learning_rate": 3.1660571766623026e-05, "loss": 0.2568, "step": 5807500 }, { "epoch": 3.48, "learning_rate": 3.165847600099358e-05, "loss": 0.2528, "step": 5808000 }, { "epoch": 3.48, "learning_rate": 3.165637603543301e-05, "loss": 0.2474, "step": 5808500 }, { "epoch": 3.48, "learning_rate": 3.165427606987245e-05, "loss": 0.254, "step": 5809000 }, { "epoch": 3.48, "learning_rate": 3.1652176104311887e-05, "loss": 0.2472, "step": 5809500 }, { "epoch": 3.48, "learning_rate": 3.165007613875132e-05, "loss": 0.2496, "step": 5810000 }, { "epoch": 3.48, "learning_rate": 3.164797617319076e-05, "loss": 0.2535, "step": 5810500 }, { "epoch": 3.48, "learning_rate": 3.1645876207630194e-05, "loss": 0.2521, "step": 5811000 }, { "epoch": 3.48, "learning_rate": 3.164377624206963e-05, "loss": 0.2513, "step": 5811500 }, { "epoch": 3.48, "learning_rate": 3.164168047644019e-05, "loss": 0.2516, "step": 5812000 }, { "epoch": 3.48, "learning_rate": 3.163958051087962e-05, "loss": 0.2515, "step": 5812500 }, { "epoch": 3.49, "learning_rate": 3.1637480545319054e-05, "loss": 0.2543, "step": 5813000 }, { "epoch": 3.49, "learning_rate": 3.1635380579758495e-05, "loss": 0.2502, "step": 5813500 }, { "epoch": 3.49, "learning_rate": 3.163328481412905e-05, "loss": 0.2574, "step": 5814000 }, { "epoch": 3.49, "learning_rate": 3.163118484856848e-05, "loss": 0.2536, "step": 5814500 }, { "epoch": 3.49, "learning_rate": 3.1629084883007915e-05, "loss": 0.2534, "step": 5815000 }, { "epoch": 3.49, "learning_rate": 3.1626984917447355e-05, "loss": 0.2522, "step": 5815500 }, { "epoch": 3.49, "learning_rate": 3.162488915181791e-05, "loss": 0.2555, "step": 5816000 }, { "epoch": 3.49, "learning_rate": 3.162278918625734e-05, "loss": 0.2507, "step": 5816500 }, { "epoch": 3.49, "learning_rate": 3.1620689220696776e-05, "loss": 0.2508, "step": 5817000 }, { "epoch": 3.49, "learning_rate": 3.1618589255136216e-05, "loss": 0.2524, "step": 5817500 }, { "epoch": 3.49, "learning_rate": 3.161648928957565e-05, "loss": 0.2505, "step": 5818000 }, { "epoch": 3.49, "learning_rate": 3.161438932401508e-05, "loss": 0.2437, "step": 5818500 }, { "epoch": 3.49, "learning_rate": 3.161228935845452e-05, "loss": 0.2529, "step": 5819000 }, { "epoch": 3.49, "learning_rate": 3.161018939289396e-05, "loss": 0.2513, "step": 5819500 }, { "epoch": 3.49, "learning_rate": 3.160809362726451e-05, "loss": 0.2516, "step": 5820000 }, { "epoch": 3.49, "learning_rate": 3.160599366170395e-05, "loss": 0.2515, "step": 5820500 }, { "epoch": 3.49, "learning_rate": 3.1603893696143384e-05, "loss": 0.2518, "step": 5821000 }, { "epoch": 3.49, "learning_rate": 3.160179373058282e-05, "loss": 0.2516, "step": 5821500 }, { "epoch": 3.49, "learning_rate": 3.159969796495337e-05, "loss": 0.257, "step": 5822000 }, { "epoch": 3.49, "learning_rate": 3.159759799939281e-05, "loss": 0.2544, "step": 5822500 }, { "epoch": 3.49, "learning_rate": 3.1595498033832245e-05, "loss": 0.2467, "step": 5823000 }, { "epoch": 3.49, "learning_rate": 3.159339806827168e-05, "loss": 0.2559, "step": 5823500 }, { "epoch": 3.49, "learning_rate": 3.159130650257336e-05, "loss": 0.2524, "step": 5824000 }, { "epoch": 3.49, "learning_rate": 3.158920653701279e-05, "loss": 0.2558, "step": 5824500 }, { "epoch": 3.49, "learning_rate": 3.1587106571452226e-05, "loss": 0.2545, "step": 5825000 }, { "epoch": 3.49, "learning_rate": 3.158500660589166e-05, "loss": 0.2541, "step": 5825500 }, { "epoch": 3.49, "learning_rate": 3.15829066403311e-05, "loss": 0.2524, "step": 5826000 }, { "epoch": 3.49, "learning_rate": 3.158080667477053e-05, "loss": 0.2561, "step": 5826500 }, { "epoch": 3.49, "learning_rate": 3.1578706709209966e-05, "loss": 0.251, "step": 5827000 }, { "epoch": 3.49, "learning_rate": 3.1576606743649406e-05, "loss": 0.2486, "step": 5827500 }, { "epoch": 3.49, "learning_rate": 3.157451097801996e-05, "loss": 0.2563, "step": 5828000 }, { "epoch": 3.49, "learning_rate": 3.1572411012459393e-05, "loss": 0.2449, "step": 5828500 }, { "epoch": 3.49, "learning_rate": 3.157031104689883e-05, "loss": 0.2532, "step": 5829000 }, { "epoch": 3.5, "learning_rate": 3.156821108133827e-05, "loss": 0.2483, "step": 5829500 }, { "epoch": 3.5, "learning_rate": 3.156611951563994e-05, "loss": 0.2563, "step": 5830000 }, { "epoch": 3.5, "learning_rate": 3.156401955007938e-05, "loss": 0.2537, "step": 5830500 }, { "epoch": 3.5, "learning_rate": 3.1561919584518815e-05, "loss": 0.2536, "step": 5831000 }, { "epoch": 3.5, "learning_rate": 3.1559819618958255e-05, "loss": 0.2526, "step": 5831500 }, { "epoch": 3.5, "learning_rate": 3.155771965339768e-05, "loss": 0.2467, "step": 5832000 }, { "epoch": 3.5, "learning_rate": 3.1555619687837115e-05, "loss": 0.2449, "step": 5832500 }, { "epoch": 3.5, "learning_rate": 3.1553519722276555e-05, "loss": 0.2553, "step": 5833000 }, { "epoch": 3.5, "learning_rate": 3.155141975671599e-05, "loss": 0.2516, "step": 5833500 }, { "epoch": 3.5, "learning_rate": 3.154932399108655e-05, "loss": 0.2486, "step": 5834000 }, { "epoch": 3.5, "learning_rate": 3.1547224025525976e-05, "loss": 0.2464, "step": 5834500 }, { "epoch": 3.5, "learning_rate": 3.1545124059965416e-05, "loss": 0.249, "step": 5835000 }, { "epoch": 3.5, "learning_rate": 3.154302409440485e-05, "loss": 0.2524, "step": 5835500 }, { "epoch": 3.5, "learning_rate": 3.154092832877541e-05, "loss": 0.2474, "step": 5836000 }, { "epoch": 3.5, "learning_rate": 3.153882836321484e-05, "loss": 0.247, "step": 5836500 }, { "epoch": 3.5, "learning_rate": 3.1536728397654277e-05, "loss": 0.2468, "step": 5837000 }, { "epoch": 3.5, "learning_rate": 3.153462843209371e-05, "loss": 0.2507, "step": 5837500 }, { "epoch": 3.5, "learning_rate": 3.153253266646427e-05, "loss": 0.2508, "step": 5838000 }, { "epoch": 3.5, "learning_rate": 3.153043270090371e-05, "loss": 0.251, "step": 5838500 }, { "epoch": 3.5, "learning_rate": 3.152833273534314e-05, "loss": 0.2499, "step": 5839000 }, { "epoch": 3.5, "learning_rate": 3.152623276978257e-05, "loss": 0.2538, "step": 5839500 }, { "epoch": 3.5, "learning_rate": 3.152413700415313e-05, "loss": 0.2516, "step": 5840000 }, { "epoch": 3.5, "learning_rate": 3.152203703859257e-05, "loss": 0.2479, "step": 5840500 }, { "epoch": 3.5, "learning_rate": 3.1519937073032005e-05, "loss": 0.2558, "step": 5841000 }, { "epoch": 3.5, "learning_rate": 3.151783710747143e-05, "loss": 0.2488, "step": 5841500 }, { "epoch": 3.5, "learning_rate": 3.151573714191087e-05, "loss": 0.2518, "step": 5842000 }, { "epoch": 3.5, "learning_rate": 3.1513637176350305e-05, "loss": 0.2554, "step": 5842500 }, { "epoch": 3.5, "learning_rate": 3.151153721078974e-05, "loss": 0.2474, "step": 5843000 }, { "epoch": 3.5, "learning_rate": 3.150943724522918e-05, "loss": 0.2528, "step": 5843500 }, { "epoch": 3.5, "learning_rate": 3.150734147959973e-05, "loss": 0.2508, "step": 5844000 }, { "epoch": 3.5, "learning_rate": 3.1505241514039166e-05, "loss": 0.2535, "step": 5844500 }, { "epoch": 3.5, "learning_rate": 3.1503141548478606e-05, "loss": 0.2537, "step": 5845000 }, { "epoch": 3.5, "learning_rate": 3.150104158291804e-05, "loss": 0.2505, "step": 5845500 }, { "epoch": 3.5, "learning_rate": 3.14989458172886e-05, "loss": 0.2519, "step": 5846000 }, { "epoch": 3.51, "learning_rate": 3.149684585172803e-05, "loss": 0.251, "step": 5846500 }, { "epoch": 3.51, "learning_rate": 3.149474588616747e-05, "loss": 0.2518, "step": 5847000 }, { "epoch": 3.51, "learning_rate": 3.14926459206069e-05, "loss": 0.2529, "step": 5847500 }, { "epoch": 3.51, "learning_rate": 3.149055015497746e-05, "loss": 0.2503, "step": 5848000 }, { "epoch": 3.51, "learning_rate": 3.1488454389348014e-05, "loss": 0.2494, "step": 5848500 }, { "epoch": 3.51, "learning_rate": 3.148635442378745e-05, "loss": 0.2589, "step": 5849000 }, { "epoch": 3.51, "learning_rate": 3.148425445822689e-05, "loss": 0.2512, "step": 5849500 }, { "epoch": 3.51, "learning_rate": 3.148215449266632e-05, "loss": 0.2592, "step": 5850000 }, { "epoch": 3.51, "learning_rate": 3.1480054527105755e-05, "loss": 0.2497, "step": 5850500 }, { "epoch": 3.51, "learning_rate": 3.147795456154519e-05, "loss": 0.2526, "step": 5851000 }, { "epoch": 3.51, "learning_rate": 3.147585879591575e-05, "loss": 0.2493, "step": 5851500 }, { "epoch": 3.51, "learning_rate": 3.147375883035518e-05, "loss": 0.2528, "step": 5852000 }, { "epoch": 3.51, "learning_rate": 3.147165886479462e-05, "loss": 0.25, "step": 5852500 }, { "epoch": 3.51, "learning_rate": 3.1469558899234056e-05, "loss": 0.2516, "step": 5853000 }, { "epoch": 3.51, "learning_rate": 3.146745893367348e-05, "loss": 0.2518, "step": 5853500 }, { "epoch": 3.51, "learning_rate": 3.146535896811292e-05, "loss": 0.2568, "step": 5854000 }, { "epoch": 3.51, "learning_rate": 3.1463259002552356e-05, "loss": 0.2536, "step": 5854500 }, { "epoch": 3.51, "learning_rate": 3.146115903699179e-05, "loss": 0.2525, "step": 5855000 }, { "epoch": 3.51, "learning_rate": 3.145906327136235e-05, "loss": 0.252, "step": 5855500 }, { "epoch": 3.51, "learning_rate": 3.1456963305801783e-05, "loss": 0.2517, "step": 5856000 }, { "epoch": 3.51, "learning_rate": 3.145486334024122e-05, "loss": 0.257, "step": 5856500 }, { "epoch": 3.51, "learning_rate": 3.145276337468065e-05, "loss": 0.2515, "step": 5857000 }, { "epoch": 3.51, "learning_rate": 3.145066760905121e-05, "loss": 0.248, "step": 5857500 }, { "epoch": 3.51, "learning_rate": 3.144856764349065e-05, "loss": 0.2547, "step": 5858000 }, { "epoch": 3.51, "learning_rate": 3.144646767793008e-05, "loss": 0.2506, "step": 5858500 }, { "epoch": 3.51, "learning_rate": 3.144436771236952e-05, "loss": 0.2432, "step": 5859000 }, { "epoch": 3.51, "learning_rate": 3.144227194674008e-05, "loss": 0.2543, "step": 5859500 }, { "epoch": 3.51, "learning_rate": 3.144017198117951e-05, "loss": 0.2545, "step": 5860000 }, { "epoch": 3.51, "learning_rate": 3.143807201561894e-05, "loss": 0.2425, "step": 5860500 }, { "epoch": 3.51, "learning_rate": 3.143597205005838e-05, "loss": 0.2545, "step": 5861000 }, { "epoch": 3.51, "learning_rate": 3.143387628442894e-05, "loss": 0.254, "step": 5861500 }, { "epoch": 3.51, "learning_rate": 3.143177631886837e-05, "loss": 0.2494, "step": 5862000 }, { "epoch": 3.51, "learning_rate": 3.1429676353307806e-05, "loss": 0.2489, "step": 5862500 }, { "epoch": 3.52, "learning_rate": 3.142757638774724e-05, "loss": 0.2517, "step": 5863000 }, { "epoch": 3.52, "learning_rate": 3.14254806221178e-05, "loss": 0.25, "step": 5863500 }, { "epoch": 3.52, "learning_rate": 3.142338485648835e-05, "loss": 0.2546, "step": 5864000 }, { "epoch": 3.52, "learning_rate": 3.142128489092779e-05, "loss": 0.2558, "step": 5864500 }, { "epoch": 3.52, "learning_rate": 3.141918492536723e-05, "loss": 0.2511, "step": 5865000 }, { "epoch": 3.52, "learning_rate": 3.141708495980666e-05, "loss": 0.2604, "step": 5865500 }, { "epoch": 3.52, "learning_rate": 3.1414984994246094e-05, "loss": 0.2488, "step": 5866000 }, { "epoch": 3.52, "learning_rate": 3.1412885028685534e-05, "loss": 0.2451, "step": 5866500 }, { "epoch": 3.52, "learning_rate": 3.141078506312497e-05, "loss": 0.2451, "step": 5867000 }, { "epoch": 3.52, "learning_rate": 3.14086850975644e-05, "loss": 0.2441, "step": 5867500 }, { "epoch": 3.52, "learning_rate": 3.1406589331934955e-05, "loss": 0.2542, "step": 5868000 }, { "epoch": 3.52, "learning_rate": 3.1404489366374395e-05, "loss": 0.2523, "step": 5868500 }, { "epoch": 3.52, "learning_rate": 3.140238940081383e-05, "loss": 0.2554, "step": 5869000 }, { "epoch": 3.52, "learning_rate": 3.140028943525326e-05, "loss": 0.249, "step": 5869500 }, { "epoch": 3.52, "learning_rate": 3.1398189469692695e-05, "loss": 0.2505, "step": 5870000 }, { "epoch": 3.52, "learning_rate": 3.139608950413213e-05, "loss": 0.2443, "step": 5870500 }, { "epoch": 3.52, "learning_rate": 3.139398953857156e-05, "loss": 0.2607, "step": 5871000 }, { "epoch": 3.52, "learning_rate": 3.139189377294212e-05, "loss": 0.25, "step": 5871500 }, { "epoch": 3.52, "learning_rate": 3.138979380738156e-05, "loss": 0.2555, "step": 5872000 }, { "epoch": 3.52, "learning_rate": 3.138769384182099e-05, "loss": 0.2537, "step": 5872500 }, { "epoch": 3.52, "learning_rate": 3.138559387626043e-05, "loss": 0.2555, "step": 5873000 }, { "epoch": 3.52, "learning_rate": 3.138349391069986e-05, "loss": 0.253, "step": 5873500 }, { "epoch": 3.52, "learning_rate": 3.1381393945139297e-05, "loss": 0.2482, "step": 5874000 }, { "epoch": 3.52, "learning_rate": 3.137929397957874e-05, "loss": 0.2512, "step": 5874500 }, { "epoch": 3.52, "learning_rate": 3.137719401401817e-05, "loss": 0.248, "step": 5875000 }, { "epoch": 3.52, "learning_rate": 3.1375098248388724e-05, "loss": 0.2534, "step": 5875500 }, { "epoch": 3.52, "learning_rate": 3.137299828282816e-05, "loss": 0.2497, "step": 5876000 }, { "epoch": 3.52, "learning_rate": 3.13708983172676e-05, "loss": 0.2553, "step": 5876500 }, { "epoch": 3.52, "learning_rate": 3.136879835170703e-05, "loss": 0.2574, "step": 5877000 }, { "epoch": 3.52, "learning_rate": 3.1366702586077585e-05, "loss": 0.2523, "step": 5877500 }, { "epoch": 3.52, "learning_rate": 3.136460262051702e-05, "loss": 0.2518, "step": 5878000 }, { "epoch": 3.52, "learning_rate": 3.136250265495646e-05, "loss": 0.2514, "step": 5878500 }, { "epoch": 3.52, "learning_rate": 3.136040268939589e-05, "loss": 0.2486, "step": 5879000 }, { "epoch": 3.52, "learning_rate": 3.1358306923766445e-05, "loss": 0.2472, "step": 5879500 }, { "epoch": 3.53, "learning_rate": 3.1356206958205886e-05, "loss": 0.2506, "step": 5880000 }, { "epoch": 3.53, "learning_rate": 3.135410699264532e-05, "loss": 0.2489, "step": 5880500 }, { "epoch": 3.53, "learning_rate": 3.135200702708475e-05, "loss": 0.2533, "step": 5881000 }, { "epoch": 3.53, "learning_rate": 3.134991126145531e-05, "loss": 0.2547, "step": 5881500 }, { "epoch": 3.53, "learning_rate": 3.1347811295894746e-05, "loss": 0.2583, "step": 5882000 }, { "epoch": 3.53, "learning_rate": 3.134571133033418e-05, "loss": 0.248, "step": 5882500 }, { "epoch": 3.53, "learning_rate": 3.134361136477361e-05, "loss": 0.252, "step": 5883000 }, { "epoch": 3.53, "learning_rate": 3.1341515599144174e-05, "loss": 0.2528, "step": 5883500 }, { "epoch": 3.53, "learning_rate": 3.1339415633583614e-05, "loss": 0.2482, "step": 5884000 }, { "epoch": 3.53, "learning_rate": 3.133731566802304e-05, "loss": 0.254, "step": 5884500 }, { "epoch": 3.53, "learning_rate": 3.1335215702462474e-05, "loss": 0.2514, "step": 5885000 }, { "epoch": 3.53, "learning_rate": 3.1333119936833034e-05, "loss": 0.2521, "step": 5885500 }, { "epoch": 3.53, "learning_rate": 3.1331019971272474e-05, "loss": 0.2503, "step": 5886000 }, { "epoch": 3.53, "learning_rate": 3.132892000571191e-05, "loss": 0.2488, "step": 5886500 }, { "epoch": 3.53, "learning_rate": 3.132682004015134e-05, "loss": 0.2515, "step": 5887000 }, { "epoch": 3.53, "learning_rate": 3.13247242745219e-05, "loss": 0.2516, "step": 5887500 }, { "epoch": 3.53, "learning_rate": 3.1322624308961335e-05, "loss": 0.2504, "step": 5888000 }, { "epoch": 3.53, "learning_rate": 3.132052434340077e-05, "loss": 0.2507, "step": 5888500 }, { "epoch": 3.53, "learning_rate": 3.131842437784021e-05, "loss": 0.2474, "step": 5889000 }, { "epoch": 3.53, "learning_rate": 3.131632861221076e-05, "loss": 0.2479, "step": 5889500 }, { "epoch": 3.53, "learning_rate": 3.1314228646650196e-05, "loss": 0.2499, "step": 5890000 }, { "epoch": 3.53, "learning_rate": 3.131212868108963e-05, "loss": 0.2555, "step": 5890500 }, { "epoch": 3.53, "learning_rate": 3.131002871552907e-05, "loss": 0.253, "step": 5891000 }, { "epoch": 3.53, "learning_rate": 3.130793294989962e-05, "loss": 0.2576, "step": 5891500 }, { "epoch": 3.53, "learning_rate": 3.130583298433906e-05, "loss": 0.2522, "step": 5892000 }, { "epoch": 3.53, "learning_rate": 3.130373301877849e-05, "loss": 0.2523, "step": 5892500 }, { "epoch": 3.53, "learning_rate": 3.130163305321793e-05, "loss": 0.2528, "step": 5893000 }, { "epoch": 3.53, "learning_rate": 3.1299537287588484e-05, "loss": 0.247, "step": 5893500 }, { "epoch": 3.53, "learning_rate": 3.129743732202792e-05, "loss": 0.2487, "step": 5894000 }, { "epoch": 3.53, "learning_rate": 3.129533735646736e-05, "loss": 0.2436, "step": 5894500 }, { "epoch": 3.53, "learning_rate": 3.129323739090679e-05, "loss": 0.2586, "step": 5895000 }, { "epoch": 3.53, "learning_rate": 3.1291137425346225e-05, "loss": 0.2449, "step": 5895500 }, { "epoch": 3.53, "learning_rate": 3.128904165971678e-05, "loss": 0.253, "step": 5896000 }, { "epoch": 3.54, "learning_rate": 3.128694169415622e-05, "loss": 0.2521, "step": 5896500 }, { "epoch": 3.54, "learning_rate": 3.128484172859565e-05, "loss": 0.2505, "step": 5897000 }, { "epoch": 3.54, "learning_rate": 3.1282741763035085e-05, "loss": 0.2513, "step": 5897500 }, { "epoch": 3.54, "learning_rate": 3.128064599740564e-05, "loss": 0.2543, "step": 5898000 }, { "epoch": 3.54, "learning_rate": 3.127854603184508e-05, "loss": 0.2511, "step": 5898500 }, { "epoch": 3.54, "learning_rate": 3.127644606628451e-05, "loss": 0.2485, "step": 5899000 }, { "epoch": 3.54, "learning_rate": 3.1274346100723946e-05, "loss": 0.2501, "step": 5899500 }, { "epoch": 3.54, "learning_rate": 3.1272250335094506e-05, "loss": 0.2588, "step": 5900000 }, { "epoch": 3.54, "eval_loss": 0.2330145537853241, "eval_runtime": 1460.116, "eval_samples_per_second": 360.738, "eval_steps_per_second": 60.123, "step": 5900000 }, { "epoch": 3.54, "learning_rate": 3.127015036953394e-05, "loss": 0.2523, "step": 5900500 }, { "epoch": 3.54, "learning_rate": 3.126805040397337e-05, "loss": 0.2525, "step": 5901000 }, { "epoch": 3.54, "learning_rate": 3.1265950438412814e-05, "loss": 0.2468, "step": 5901500 }, { "epoch": 3.54, "learning_rate": 3.126385467278337e-05, "loss": 0.2509, "step": 5902000 }, { "epoch": 3.54, "learning_rate": 3.12617547072228e-05, "loss": 0.2494, "step": 5902500 }, { "epoch": 3.54, "learning_rate": 3.1259654741662234e-05, "loss": 0.2385, "step": 5903000 }, { "epoch": 3.54, "learning_rate": 3.1257554776101674e-05, "loss": 0.2488, "step": 5903500 }, { "epoch": 3.54, "learning_rate": 3.125545901047223e-05, "loss": 0.2482, "step": 5904000 }, { "epoch": 3.54, "learning_rate": 3.125335904491166e-05, "loss": 0.2516, "step": 5904500 }, { "epoch": 3.54, "learning_rate": 3.1251259079351095e-05, "loss": 0.2498, "step": 5905000 }, { "epoch": 3.54, "learning_rate": 3.1249159113790535e-05, "loss": 0.2558, "step": 5905500 }, { "epoch": 3.54, "learning_rate": 3.124706334816109e-05, "loss": 0.2468, "step": 5906000 }, { "epoch": 3.54, "learning_rate": 3.124496338260052e-05, "loss": 0.2508, "step": 5906500 }, { "epoch": 3.54, "learning_rate": 3.124286341703996e-05, "loss": 0.2553, "step": 5907000 }, { "epoch": 3.54, "learning_rate": 3.1240763451479396e-05, "loss": 0.2552, "step": 5907500 }, { "epoch": 3.54, "learning_rate": 3.123866768584995e-05, "loss": 0.2509, "step": 5908000 }, { "epoch": 3.54, "learning_rate": 3.123656772028938e-05, "loss": 0.2461, "step": 5908500 }, { "epoch": 3.54, "learning_rate": 3.123446775472882e-05, "loss": 0.2535, "step": 5909000 }, { "epoch": 3.54, "learning_rate": 3.1232367789168256e-05, "loss": 0.248, "step": 5909500 }, { "epoch": 3.54, "learning_rate": 3.123027202353881e-05, "loss": 0.2505, "step": 5910000 }, { "epoch": 3.54, "learning_rate": 3.1228172057978243e-05, "loss": 0.254, "step": 5910500 }, { "epoch": 3.54, "learning_rate": 3.1226072092417684e-05, "loss": 0.249, "step": 5911000 }, { "epoch": 3.54, "learning_rate": 3.122397212685712e-05, "loss": 0.2518, "step": 5911500 }, { "epoch": 3.54, "learning_rate": 3.122187636122768e-05, "loss": 0.2492, "step": 5912000 }, { "epoch": 3.54, "learning_rate": 3.121977639566711e-05, "loss": 0.25, "step": 5912500 }, { "epoch": 3.55, "learning_rate": 3.1217676430106544e-05, "loss": 0.2538, "step": 5913000 }, { "epoch": 3.55, "learning_rate": 3.1215580664477105e-05, "loss": 0.2501, "step": 5913500 }, { "epoch": 3.55, "learning_rate": 3.121348069891654e-05, "loss": 0.2507, "step": 5914000 }, { "epoch": 3.55, "learning_rate": 3.121138073335598e-05, "loss": 0.2476, "step": 5914500 }, { "epoch": 3.55, "learning_rate": 3.1209280767795405e-05, "loss": 0.2514, "step": 5915000 }, { "epoch": 3.55, "learning_rate": 3.120718080223484e-05, "loss": 0.2444, "step": 5915500 }, { "epoch": 3.55, "learning_rate": 3.120508083667428e-05, "loss": 0.2528, "step": 5916000 }, { "epoch": 3.55, "learning_rate": 3.120298087111371e-05, "loss": 0.2529, "step": 5916500 }, { "epoch": 3.55, "learning_rate": 3.1200880905553146e-05, "loss": 0.2561, "step": 5917000 }, { "epoch": 3.55, "learning_rate": 3.11987851399237e-05, "loss": 0.2496, "step": 5917500 }, { "epoch": 3.55, "learning_rate": 3.119668517436314e-05, "loss": 0.2499, "step": 5918000 }, { "epoch": 3.55, "learning_rate": 3.119458520880257e-05, "loss": 0.2525, "step": 5918500 }, { "epoch": 3.55, "learning_rate": 3.1192485243242006e-05, "loss": 0.25, "step": 5919000 }, { "epoch": 3.55, "learning_rate": 3.119038947761257e-05, "loss": 0.2481, "step": 5919500 }, { "epoch": 3.55, "learning_rate": 3.1188289512052e-05, "loss": 0.2499, "step": 5920000 }, { "epoch": 3.55, "learning_rate": 3.1186189546491434e-05, "loss": 0.255, "step": 5920500 }, { "epoch": 3.55, "learning_rate": 3.1184089580930874e-05, "loss": 0.2521, "step": 5921000 }, { "epoch": 3.55, "learning_rate": 3.118198961537031e-05, "loss": 0.2488, "step": 5921500 }, { "epoch": 3.55, "learning_rate": 3.117989384974086e-05, "loss": 0.2478, "step": 5922000 }, { "epoch": 3.55, "learning_rate": 3.1177793884180294e-05, "loss": 0.2543, "step": 5922500 }, { "epoch": 3.55, "learning_rate": 3.1175693918619735e-05, "loss": 0.2471, "step": 5923000 }, { "epoch": 3.55, "learning_rate": 3.117359395305917e-05, "loss": 0.2489, "step": 5923500 }, { "epoch": 3.55, "learning_rate": 3.117150238736085e-05, "loss": 0.252, "step": 5924000 }, { "epoch": 3.55, "learning_rate": 3.116940242180028e-05, "loss": 0.2566, "step": 5924500 }, { "epoch": 3.55, "learning_rate": 3.1167302456239716e-05, "loss": 0.2525, "step": 5925000 }, { "epoch": 3.55, "learning_rate": 3.1165202490679156e-05, "loss": 0.2606, "step": 5925500 }, { "epoch": 3.55, "learning_rate": 3.116310252511859e-05, "loss": 0.258, "step": 5926000 }, { "epoch": 3.55, "learning_rate": 3.116100255955802e-05, "loss": 0.2515, "step": 5926500 }, { "epoch": 3.55, "learning_rate": 3.1158902593997456e-05, "loss": 0.2534, "step": 5927000 }, { "epoch": 3.55, "learning_rate": 3.115680262843689e-05, "loss": 0.2518, "step": 5927500 }, { "epoch": 3.55, "learning_rate": 3.115470686280745e-05, "loss": 0.255, "step": 5928000 }, { "epoch": 3.55, "learning_rate": 3.115260689724689e-05, "loss": 0.2487, "step": 5928500 }, { "epoch": 3.55, "learning_rate": 3.115050693168632e-05, "loss": 0.254, "step": 5929000 }, { "epoch": 3.55, "learning_rate": 3.114840696612575e-05, "loss": 0.2476, "step": 5929500 }, { "epoch": 3.56, "learning_rate": 3.114631540042743e-05, "loss": 0.2575, "step": 5930000 }, { "epoch": 3.56, "learning_rate": 3.1144215434866864e-05, "loss": 0.2486, "step": 5930500 }, { "epoch": 3.56, "learning_rate": 3.1142115469306305e-05, "loss": 0.2473, "step": 5931000 }, { "epoch": 3.56, "learning_rate": 3.114001550374574e-05, "loss": 0.2498, "step": 5931500 }, { "epoch": 3.56, "learning_rate": 3.113791553818517e-05, "loss": 0.2561, "step": 5932000 }, { "epoch": 3.56, "learning_rate": 3.113581557262461e-05, "loss": 0.2462, "step": 5932500 }, { "epoch": 3.56, "learning_rate": 3.1133715607064045e-05, "loss": 0.2506, "step": 5933000 }, { "epoch": 3.56, "learning_rate": 3.113161564150348e-05, "loss": 0.2495, "step": 5933500 }, { "epoch": 3.56, "learning_rate": 3.112951987587404e-05, "loss": 0.2516, "step": 5934000 }, { "epoch": 3.56, "learning_rate": 3.112741991031347e-05, "loss": 0.2499, "step": 5934500 }, { "epoch": 3.56, "learning_rate": 3.1125319944752906e-05, "loss": 0.2513, "step": 5935000 }, { "epoch": 3.56, "learning_rate": 3.1123219979192346e-05, "loss": 0.248, "step": 5935500 }, { "epoch": 3.56, "learning_rate": 3.11211242135629e-05, "loss": 0.2512, "step": 5936000 }, { "epoch": 3.56, "learning_rate": 3.111902424800233e-05, "loss": 0.2562, "step": 5936500 }, { "epoch": 3.56, "learning_rate": 3.1116924282441767e-05, "loss": 0.254, "step": 5937000 }, { "epoch": 3.56, "learning_rate": 3.111482431688121e-05, "loss": 0.2478, "step": 5937500 }, { "epoch": 3.56, "learning_rate": 3.111272435132064e-05, "loss": 0.2539, "step": 5938000 }, { "epoch": 3.56, "learning_rate": 3.1110628585691194e-05, "loss": 0.25, "step": 5938500 }, { "epoch": 3.56, "learning_rate": 3.110852862013063e-05, "loss": 0.2597, "step": 5939000 }, { "epoch": 3.56, "learning_rate": 3.110642865457007e-05, "loss": 0.2556, "step": 5939500 }, { "epoch": 3.56, "learning_rate": 3.11043286890095e-05, "loss": 0.251, "step": 5940000 }, { "epoch": 3.56, "learning_rate": 3.1102232923380055e-05, "loss": 0.2586, "step": 5940500 }, { "epoch": 3.56, "learning_rate": 3.1100132957819495e-05, "loss": 0.2526, "step": 5941000 }, { "epoch": 3.56, "learning_rate": 3.109803299225893e-05, "loss": 0.2493, "step": 5941500 }, { "epoch": 3.56, "learning_rate": 3.109593302669836e-05, "loss": 0.25, "step": 5942000 }, { "epoch": 3.56, "learning_rate": 3.10938330611378e-05, "loss": 0.2479, "step": 5942500 }, { "epoch": 3.56, "learning_rate": 3.1091733095577235e-05, "loss": 0.2477, "step": 5943000 }, { "epoch": 3.56, "learning_rate": 3.108963313001666e-05, "loss": 0.2574, "step": 5943500 }, { "epoch": 3.56, "learning_rate": 3.10875331644561e-05, "loss": 0.2475, "step": 5944000 }, { "epoch": 3.56, "learning_rate": 3.108543739882666e-05, "loss": 0.2497, "step": 5944500 }, { "epoch": 3.56, "learning_rate": 3.1083337433266096e-05, "loss": 0.2517, "step": 5945000 }, { "epoch": 3.56, "learning_rate": 3.108123746770552e-05, "loss": 0.246, "step": 5945500 }, { "epoch": 3.56, "learning_rate": 3.107913750214496e-05, "loss": 0.2528, "step": 5946000 }, { "epoch": 3.57, "learning_rate": 3.1077041736515523e-05, "loss": 0.2508, "step": 5946500 }, { "epoch": 3.57, "learning_rate": 3.107494177095496e-05, "loss": 0.249, "step": 5947000 }, { "epoch": 3.57, "learning_rate": 3.10728418053944e-05, "loss": 0.2515, "step": 5947500 }, { "epoch": 3.57, "learning_rate": 3.1070741839833824e-05, "loss": 0.2552, "step": 5948000 }, { "epoch": 3.57, "learning_rate": 3.1068646074204384e-05, "loss": 0.2557, "step": 5948500 }, { "epoch": 3.57, "learning_rate": 3.106654610864382e-05, "loss": 0.2508, "step": 5949000 }, { "epoch": 3.57, "learning_rate": 3.106444614308326e-05, "loss": 0.2455, "step": 5949500 }, { "epoch": 3.57, "learning_rate": 3.106234617752269e-05, "loss": 0.2509, "step": 5950000 }, { "epoch": 3.57, "learning_rate": 3.1060250411893245e-05, "loss": 0.2519, "step": 5950500 }, { "epoch": 3.57, "learning_rate": 3.105815044633268e-05, "loss": 0.2542, "step": 5951000 }, { "epoch": 3.57, "learning_rate": 3.105605048077212e-05, "loss": 0.2559, "step": 5951500 }, { "epoch": 3.57, "learning_rate": 3.105395051521155e-05, "loss": 0.2504, "step": 5952000 }, { "epoch": 3.57, "learning_rate": 3.1051854749582106e-05, "loss": 0.2513, "step": 5952500 }, { "epoch": 3.57, "learning_rate": 3.104975478402154e-05, "loss": 0.2526, "step": 5953000 }, { "epoch": 3.57, "learning_rate": 3.104765481846098e-05, "loss": 0.247, "step": 5953500 }, { "epoch": 3.57, "learning_rate": 3.104555905283153e-05, "loss": 0.2562, "step": 5954000 }, { "epoch": 3.57, "learning_rate": 3.1043459087270966e-05, "loss": 0.25, "step": 5954500 }, { "epoch": 3.57, "learning_rate": 3.1041359121710407e-05, "loss": 0.2516, "step": 5955000 }, { "epoch": 3.57, "learning_rate": 3.103925915614984e-05, "loss": 0.2591, "step": 5955500 }, { "epoch": 3.57, "learning_rate": 3.1037159190589274e-05, "loss": 0.2442, "step": 5956000 }, { "epoch": 3.57, "learning_rate": 3.1035059225028714e-05, "loss": 0.2501, "step": 5956500 }, { "epoch": 3.57, "learning_rate": 3.103295925946815e-05, "loss": 0.2525, "step": 5957000 }, { "epoch": 3.57, "learning_rate": 3.1030859293907574e-05, "loss": 0.2469, "step": 5957500 }, { "epoch": 3.57, "learning_rate": 3.1028763528278134e-05, "loss": 0.2572, "step": 5958000 }, { "epoch": 3.57, "learning_rate": 3.1026663562717574e-05, "loss": 0.2453, "step": 5958500 }, { "epoch": 3.57, "learning_rate": 3.102456359715701e-05, "loss": 0.2479, "step": 5959000 }, { "epoch": 3.57, "learning_rate": 3.102246363159644e-05, "loss": 0.2491, "step": 5959500 }, { "epoch": 3.57, "learning_rate": 3.1020367865966995e-05, "loss": 0.2514, "step": 5960000 }, { "epoch": 3.57, "learning_rate": 3.1018267900406435e-05, "loss": 0.2552, "step": 5960500 }, { "epoch": 3.57, "learning_rate": 3.101616793484587e-05, "loss": 0.2519, "step": 5961000 }, { "epoch": 3.57, "learning_rate": 3.101406796928531e-05, "loss": 0.2495, "step": 5961500 }, { "epoch": 3.57, "learning_rate": 3.101197220365586e-05, "loss": 0.2567, "step": 5962000 }, { "epoch": 3.57, "learning_rate": 3.1009872238095296e-05, "loss": 0.2497, "step": 5962500 }, { "epoch": 3.58, "learning_rate": 3.100777227253473e-05, "loss": 0.2535, "step": 5963000 }, { "epoch": 3.58, "learning_rate": 3.100567230697417e-05, "loss": 0.2521, "step": 5963500 }, { "epoch": 3.58, "learning_rate": 3.10035723414136e-05, "loss": 0.2531, "step": 5964000 }, { "epoch": 3.58, "learning_rate": 3.1001472375853037e-05, "loss": 0.2484, "step": 5964500 }, { "epoch": 3.58, "learning_rate": 3.099937241029247e-05, "loss": 0.2569, "step": 5965000 }, { "epoch": 3.58, "learning_rate": 3.0997272444731903e-05, "loss": 0.2507, "step": 5965500 }, { "epoch": 3.58, "learning_rate": 3.0995176679102464e-05, "loss": 0.2544, "step": 5966000 }, { "epoch": 3.58, "learning_rate": 3.09930767135419e-05, "loss": 0.2567, "step": 5966500 }, { "epoch": 3.58, "learning_rate": 3.099098094791246e-05, "loss": 0.2515, "step": 5967000 }, { "epoch": 3.58, "learning_rate": 3.098888098235189e-05, "loss": 0.253, "step": 5967500 }, { "epoch": 3.58, "learning_rate": 3.0986781016791325e-05, "loss": 0.2471, "step": 5968000 }, { "epoch": 3.58, "learning_rate": 3.0984681051230765e-05, "loss": 0.2483, "step": 5968500 }, { "epoch": 3.58, "learning_rate": 3.09825810856702e-05, "loss": 0.256, "step": 5969000 }, { "epoch": 3.58, "learning_rate": 3.0980481120109625e-05, "loss": 0.2464, "step": 5969500 }, { "epoch": 3.58, "learning_rate": 3.0978381154549065e-05, "loss": 0.2554, "step": 5970000 }, { "epoch": 3.58, "learning_rate": 3.09762811889885e-05, "loss": 0.2507, "step": 5970500 }, { "epoch": 3.58, "learning_rate": 3.097418542335906e-05, "loss": 0.2448, "step": 5971000 }, { "epoch": 3.58, "learning_rate": 3.097208965772961e-05, "loss": 0.2473, "step": 5971500 }, { "epoch": 3.58, "learning_rate": 3.0969989692169046e-05, "loss": 0.2545, "step": 5972000 }, { "epoch": 3.58, "learning_rate": 3.0967889726608486e-05, "loss": 0.2453, "step": 5972500 }, { "epoch": 3.58, "learning_rate": 3.096578976104792e-05, "loss": 0.2461, "step": 5973000 }, { "epoch": 3.58, "learning_rate": 3.096368979548735e-05, "loss": 0.2547, "step": 5973500 }, { "epoch": 3.58, "learning_rate": 3.096158982992679e-05, "loss": 0.2452, "step": 5974000 }, { "epoch": 3.58, "learning_rate": 3.095948986436622e-05, "loss": 0.2524, "step": 5974500 }, { "epoch": 3.58, "learning_rate": 3.095738989880566e-05, "loss": 0.2516, "step": 5975000 }, { "epoch": 3.58, "learning_rate": 3.095529413317622e-05, "loss": 0.2458, "step": 5975500 }, { "epoch": 3.58, "learning_rate": 3.0953194167615654e-05, "loss": 0.2541, "step": 5976000 }, { "epoch": 3.58, "learning_rate": 3.095109420205508e-05, "loss": 0.2505, "step": 5976500 }, { "epoch": 3.58, "learning_rate": 3.094899423649452e-05, "loss": 0.2546, "step": 5977000 }, { "epoch": 3.58, "learning_rate": 3.0946894270933954e-05, "loss": 0.2516, "step": 5977500 }, { "epoch": 3.58, "learning_rate": 3.0944798505304515e-05, "loss": 0.2538, "step": 5978000 }, { "epoch": 3.58, "learning_rate": 3.094269853974395e-05, "loss": 0.2471, "step": 5978500 }, { "epoch": 3.58, "learning_rate": 3.094059857418338e-05, "loss": 0.2477, "step": 5979000 }, { "epoch": 3.58, "learning_rate": 3.0938498608622815e-05, "loss": 0.2477, "step": 5979500 }, { "epoch": 3.59, "learning_rate": 3.0936402842993376e-05, "loss": 0.2498, "step": 5980000 }, { "epoch": 3.59, "learning_rate": 3.093430287743281e-05, "loss": 0.2556, "step": 5980500 }, { "epoch": 3.59, "learning_rate": 3.093220291187225e-05, "loss": 0.2509, "step": 5981000 }, { "epoch": 3.59, "learning_rate": 3.09301071462428e-05, "loss": 0.2551, "step": 5981500 }, { "epoch": 3.59, "learning_rate": 3.0928007180682236e-05, "loss": 0.2474, "step": 5982000 }, { "epoch": 3.59, "learning_rate": 3.0925907215121677e-05, "loss": 0.2491, "step": 5982500 }, { "epoch": 3.59, "learning_rate": 3.092380724956111e-05, "loss": 0.2485, "step": 5983000 }, { "epoch": 3.59, "learning_rate": 3.0921707284000543e-05, "loss": 0.2517, "step": 5983500 }, { "epoch": 3.59, "learning_rate": 3.091960731843998e-05, "loss": 0.2524, "step": 5984000 }, { "epoch": 3.59, "learning_rate": 3.091750735287941e-05, "loss": 0.2485, "step": 5984500 }, { "epoch": 3.59, "learning_rate": 3.0915407387318844e-05, "loss": 0.2504, "step": 5985000 }, { "epoch": 3.59, "learning_rate": 3.0913311621689404e-05, "loss": 0.2539, "step": 5985500 }, { "epoch": 3.59, "learning_rate": 3.0911211656128844e-05, "loss": 0.2556, "step": 5986000 }, { "epoch": 3.59, "learning_rate": 3.090911169056827e-05, "loss": 0.2505, "step": 5986500 }, { "epoch": 3.59, "learning_rate": 3.0907011725007705e-05, "loss": 0.2477, "step": 5987000 }, { "epoch": 3.59, "learning_rate": 3.0904915959378265e-05, "loss": 0.2503, "step": 5987500 }, { "epoch": 3.59, "learning_rate": 3.0902820193748825e-05, "loss": 0.2433, "step": 5988000 }, { "epoch": 3.59, "learning_rate": 3.090072022818826e-05, "loss": 0.2555, "step": 5988500 }, { "epoch": 3.59, "learning_rate": 3.089862026262769e-05, "loss": 0.2531, "step": 5989000 }, { "epoch": 3.59, "learning_rate": 3.089652029706713e-05, "loss": 0.2478, "step": 5989500 }, { "epoch": 3.59, "learning_rate": 3.0894420331506566e-05, "loss": 0.2461, "step": 5990000 }, { "epoch": 3.59, "learning_rate": 3.0892320365946e-05, "loss": 0.253, "step": 5990500 }, { "epoch": 3.59, "learning_rate": 3.089022040038543e-05, "loss": 0.2538, "step": 5991000 }, { "epoch": 3.59, "learning_rate": 3.0888120434824866e-05, "loss": 0.2587, "step": 5991500 }, { "epoch": 3.59, "learning_rate": 3.088602886912655e-05, "loss": 0.2578, "step": 5992000 }, { "epoch": 3.59, "learning_rate": 3.088392890356598e-05, "loss": 0.2502, "step": 5992500 }, { "epoch": 3.59, "learning_rate": 3.0881828938005414e-05, "loss": 0.2519, "step": 5993000 }, { "epoch": 3.59, "learning_rate": 3.0879728972444854e-05, "loss": 0.2545, "step": 5993500 }, { "epoch": 3.59, "learning_rate": 3.087762900688429e-05, "loss": 0.2466, "step": 5994000 }, { "epoch": 3.59, "learning_rate": 3.087553324125484e-05, "loss": 0.248, "step": 5994500 }, { "epoch": 3.59, "learning_rate": 3.087343327569428e-05, "loss": 0.2458, "step": 5995000 }, { "epoch": 3.59, "learning_rate": 3.0871333310133715e-05, "loss": 0.2518, "step": 5995500 }, { "epoch": 3.59, "learning_rate": 3.086923334457315e-05, "loss": 0.2513, "step": 5996000 }, { "epoch": 3.6, "learning_rate": 3.086713337901259e-05, "loss": 0.2528, "step": 5996500 }, { "epoch": 3.6, "learning_rate": 3.086503341345202e-05, "loss": 0.2513, "step": 5997000 }, { "epoch": 3.6, "learning_rate": 3.0862933447891455e-05, "loss": 0.2489, "step": 5997500 }, { "epoch": 3.6, "learning_rate": 3.086083348233089e-05, "loss": 0.2464, "step": 5998000 }, { "epoch": 3.6, "learning_rate": 3.085873771670145e-05, "loss": 0.2479, "step": 5998500 }, { "epoch": 3.6, "learning_rate": 3.085663775114088e-05, "loss": 0.2541, "step": 5999000 }, { "epoch": 3.6, "learning_rate": 3.0854537785580316e-05, "loss": 0.2446, "step": 5999500 }, { "epoch": 3.6, "learning_rate": 3.0852437820019756e-05, "loss": 0.2503, "step": 6000000 }, { "epoch": 3.6, "eval_loss": 0.23173731565475464, "eval_runtime": 1453.5442, "eval_samples_per_second": 362.369, "eval_steps_per_second": 60.395, "step": 6000000 }, { "epoch": 3.6, "learning_rate": 3.085034205439031e-05, "loss": 0.2487, "step": 6000500 }, { "epoch": 3.6, "learning_rate": 3.084824208882974e-05, "loss": 0.2473, "step": 6001000 }, { "epoch": 3.6, "learning_rate": 3.084614212326918e-05, "loss": 0.2521, "step": 6001500 }, { "epoch": 3.6, "learning_rate": 3.084404215770862e-05, "loss": 0.2518, "step": 6002000 }, { "epoch": 3.6, "learning_rate": 3.084194639207917e-05, "loss": 0.2503, "step": 6002500 }, { "epoch": 3.6, "learning_rate": 3.0839846426518604e-05, "loss": 0.249, "step": 6003000 }, { "epoch": 3.6, "learning_rate": 3.0837746460958044e-05, "loss": 0.2511, "step": 6003500 }, { "epoch": 3.6, "learning_rate": 3.083564649539748e-05, "loss": 0.2556, "step": 6004000 }, { "epoch": 3.6, "learning_rate": 3.083355072976803e-05, "loss": 0.2515, "step": 6004500 }, { "epoch": 3.6, "learning_rate": 3.0831450764207465e-05, "loss": 0.2474, "step": 6005000 }, { "epoch": 3.6, "learning_rate": 3.0829350798646905e-05, "loss": 0.2515, "step": 6005500 }, { "epoch": 3.6, "learning_rate": 3.082725083308634e-05, "loss": 0.252, "step": 6006000 }, { "epoch": 3.6, "learning_rate": 3.082515506745689e-05, "loss": 0.2522, "step": 6006500 }, { "epoch": 3.6, "learning_rate": 3.0823055101896325e-05, "loss": 0.2487, "step": 6007000 }, { "epoch": 3.6, "learning_rate": 3.0820955136335766e-05, "loss": 0.2464, "step": 6007500 }, { "epoch": 3.6, "learning_rate": 3.08188551707752e-05, "loss": 0.2487, "step": 6008000 }, { "epoch": 3.6, "learning_rate": 3.081675940514575e-05, "loss": 0.2534, "step": 6008500 }, { "epoch": 3.6, "learning_rate": 3.081465943958519e-05, "loss": 0.2518, "step": 6009000 }, { "epoch": 3.6, "learning_rate": 3.0812559474024626e-05, "loss": 0.2525, "step": 6009500 }, { "epoch": 3.6, "learning_rate": 3.081045950846406e-05, "loss": 0.2571, "step": 6010000 }, { "epoch": 3.6, "learning_rate": 3.080836374283461e-05, "loss": 0.2557, "step": 6010500 }, { "epoch": 3.6, "learning_rate": 3.0806263777274054e-05, "loss": 0.2497, "step": 6011000 }, { "epoch": 3.6, "learning_rate": 3.080416381171349e-05, "loss": 0.2443, "step": 6011500 }, { "epoch": 3.6, "learning_rate": 3.080206384615292e-05, "loss": 0.2501, "step": 6012000 }, { "epoch": 3.6, "learning_rate": 3.0799968080523474e-05, "loss": 0.2533, "step": 6012500 }, { "epoch": 3.61, "learning_rate": 3.0797868114962914e-05, "loss": 0.2495, "step": 6013000 }, { "epoch": 3.61, "learning_rate": 3.0795772349333475e-05, "loss": 0.2493, "step": 6013500 }, { "epoch": 3.61, "learning_rate": 3.07936723837729e-05, "loss": 0.2484, "step": 6014000 }, { "epoch": 3.61, "learning_rate": 3.079157241821234e-05, "loss": 0.2498, "step": 6014500 }, { "epoch": 3.61, "learning_rate": 3.0789472452651775e-05, "loss": 0.2519, "step": 6015000 }, { "epoch": 3.61, "learning_rate": 3.078737248709121e-05, "loss": 0.2498, "step": 6015500 }, { "epoch": 3.61, "learning_rate": 3.078527252153065e-05, "loss": 0.2433, "step": 6016000 }, { "epoch": 3.61, "learning_rate": 3.078317255597008e-05, "loss": 0.2479, "step": 6016500 }, { "epoch": 3.61, "learning_rate": 3.0781072590409516e-05, "loss": 0.2483, "step": 6017000 }, { "epoch": 3.61, "learning_rate": 3.077897682478007e-05, "loss": 0.2514, "step": 6017500 }, { "epoch": 3.61, "learning_rate": 3.077687685921951e-05, "loss": 0.2547, "step": 6018000 }, { "epoch": 3.61, "learning_rate": 3.077477689365894e-05, "loss": 0.254, "step": 6018500 }, { "epoch": 3.61, "learning_rate": 3.0772676928098376e-05, "loss": 0.2453, "step": 6019000 }, { "epoch": 3.61, "learning_rate": 3.077058116246893e-05, "loss": 0.247, "step": 6019500 }, { "epoch": 3.61, "learning_rate": 3.076848119690837e-05, "loss": 0.245, "step": 6020000 }, { "epoch": 3.61, "learning_rate": 3.0766381231347804e-05, "loss": 0.2435, "step": 6020500 }, { "epoch": 3.61, "learning_rate": 3.076428126578724e-05, "loss": 0.2488, "step": 6021000 }, { "epoch": 3.61, "learning_rate": 3.07621855001578e-05, "loss": 0.2501, "step": 6021500 }, { "epoch": 3.61, "learning_rate": 3.076008553459723e-05, "loss": 0.2518, "step": 6022000 }, { "epoch": 3.61, "learning_rate": 3.0757985569036664e-05, "loss": 0.2477, "step": 6022500 }, { "epoch": 3.61, "learning_rate": 3.0755885603476105e-05, "loss": 0.256, "step": 6023000 }, { "epoch": 3.61, "learning_rate": 3.075378983784666e-05, "loss": 0.25, "step": 6023500 }, { "epoch": 3.61, "learning_rate": 3.075168987228609e-05, "loss": 0.2541, "step": 6024000 }, { "epoch": 3.61, "learning_rate": 3.0749589906725525e-05, "loss": 0.2494, "step": 6024500 }, { "epoch": 3.61, "learning_rate": 3.0747489941164965e-05, "loss": 0.2497, "step": 6025000 }, { "epoch": 3.61, "learning_rate": 3.0745394175535526e-05, "loss": 0.2458, "step": 6025500 }, { "epoch": 3.61, "learning_rate": 3.074329840990608e-05, "loss": 0.248, "step": 6026000 }, { "epoch": 3.61, "learning_rate": 3.074119844434551e-05, "loss": 0.2559, "step": 6026500 }, { "epoch": 3.61, "learning_rate": 3.0739098478784946e-05, "loss": 0.2505, "step": 6027000 }, { "epoch": 3.61, "learning_rate": 3.0736998513224386e-05, "loss": 0.2506, "step": 6027500 }, { "epoch": 3.61, "learning_rate": 3.073489854766382e-05, "loss": 0.2542, "step": 6028000 }, { "epoch": 3.61, "learning_rate": 3.073279858210325e-05, "loss": 0.2538, "step": 6028500 }, { "epoch": 3.61, "learning_rate": 3.073069861654269e-05, "loss": 0.2574, "step": 6029000 }, { "epoch": 3.61, "learning_rate": 3.072859865098212e-05, "loss": 0.254, "step": 6029500 }, { "epoch": 3.62, "learning_rate": 3.072650288535268e-05, "loss": 0.2493, "step": 6030000 }, { "epoch": 3.62, "learning_rate": 3.072440291979212e-05, "loss": 0.2516, "step": 6030500 }, { "epoch": 3.62, "learning_rate": 3.072230295423155e-05, "loss": 0.2532, "step": 6031000 }, { "epoch": 3.62, "learning_rate": 3.072020718860211e-05, "loss": 0.2478, "step": 6031500 }, { "epoch": 3.62, "learning_rate": 3.071810722304154e-05, "loss": 0.2485, "step": 6032000 }, { "epoch": 3.62, "learning_rate": 3.071600725748098e-05, "loss": 0.2521, "step": 6032500 }, { "epoch": 3.62, "learning_rate": 3.071390729192041e-05, "loss": 0.2485, "step": 6033000 }, { "epoch": 3.62, "learning_rate": 3.071180732635984e-05, "loss": 0.251, "step": 6033500 }, { "epoch": 3.62, "learning_rate": 3.070970736079928e-05, "loss": 0.2564, "step": 6034000 }, { "epoch": 3.62, "learning_rate": 3.0707607395238715e-05, "loss": 0.2492, "step": 6034500 }, { "epoch": 3.62, "learning_rate": 3.070550742967815e-05, "loss": 0.2434, "step": 6035000 }, { "epoch": 3.62, "learning_rate": 3.070341166404871e-05, "loss": 0.2516, "step": 6035500 }, { "epoch": 3.62, "learning_rate": 3.070131169848814e-05, "loss": 0.2524, "step": 6036000 }, { "epoch": 3.62, "learning_rate": 3.0699211732927576e-05, "loss": 0.2492, "step": 6036500 }, { "epoch": 3.62, "learning_rate": 3.0697111767367016e-05, "loss": 0.2509, "step": 6037000 }, { "epoch": 3.62, "learning_rate": 3.069501600173758e-05, "loss": 0.255, "step": 6037500 }, { "epoch": 3.62, "learning_rate": 3.0692916036177003e-05, "loss": 0.2482, "step": 6038000 }, { "epoch": 3.62, "learning_rate": 3.069081607061644e-05, "loss": 0.2548, "step": 6038500 }, { "epoch": 3.62, "learning_rate": 3.068871610505588e-05, "loss": 0.2496, "step": 6039000 }, { "epoch": 3.62, "learning_rate": 3.068662033942644e-05, "loss": 0.2513, "step": 6039500 }, { "epoch": 3.62, "learning_rate": 3.068452037386587e-05, "loss": 0.2484, "step": 6040000 }, { "epoch": 3.62, "learning_rate": 3.06824204083053e-05, "loss": 0.2467, "step": 6040500 }, { "epoch": 3.62, "learning_rate": 3.068032044274474e-05, "loss": 0.2459, "step": 6041000 }, { "epoch": 3.62, "learning_rate": 3.067822047718417e-05, "loss": 0.2503, "step": 6041500 }, { "epoch": 3.62, "learning_rate": 3.067612471155473e-05, "loss": 0.2461, "step": 6042000 }, { "epoch": 3.62, "learning_rate": 3.0674024745994165e-05, "loss": 0.2509, "step": 6042500 }, { "epoch": 3.62, "learning_rate": 3.06719247804336e-05, "loss": 0.2474, "step": 6043000 }, { "epoch": 3.62, "learning_rate": 3.066982481487303e-05, "loss": 0.2554, "step": 6043500 }, { "epoch": 3.62, "learning_rate": 3.066772904924359e-05, "loss": 0.2501, "step": 6044000 }, { "epoch": 3.62, "learning_rate": 3.066562908368303e-05, "loss": 0.251, "step": 6044500 }, { "epoch": 3.62, "learning_rate": 3.066352911812246e-05, "loss": 0.2517, "step": 6045000 }, { "epoch": 3.62, "learning_rate": 3.066142915256189e-05, "loss": 0.2521, "step": 6045500 }, { "epoch": 3.62, "learning_rate": 3.065933338693245e-05, "loss": 0.2425, "step": 6046000 }, { "epoch": 3.63, "learning_rate": 3.065723342137189e-05, "loss": 0.2472, "step": 6046500 }, { "epoch": 3.63, "learning_rate": 3.065513345581133e-05, "loss": 0.2479, "step": 6047000 }, { "epoch": 3.63, "learning_rate": 3.0653033490250754e-05, "loss": 0.252, "step": 6047500 }, { "epoch": 3.63, "learning_rate": 3.0650937724621314e-05, "loss": 0.2571, "step": 6048000 }, { "epoch": 3.63, "learning_rate": 3.0648837759060754e-05, "loss": 0.2455, "step": 6048500 }, { "epoch": 3.63, "learning_rate": 3.064673779350019e-05, "loss": 0.2526, "step": 6049000 }, { "epoch": 3.63, "learning_rate": 3.064463782793962e-05, "loss": 0.2493, "step": 6049500 }, { "epoch": 3.63, "learning_rate": 3.064254206231018e-05, "loss": 0.2507, "step": 6050000 }, { "epoch": 3.63, "learning_rate": 3.0640442096749615e-05, "loss": 0.2573, "step": 6050500 }, { "epoch": 3.63, "learning_rate": 3.063834213118905e-05, "loss": 0.2436, "step": 6051000 }, { "epoch": 3.63, "learning_rate": 3.06362463655596e-05, "loss": 0.2507, "step": 6051500 }, { "epoch": 3.63, "learning_rate": 3.063414639999904e-05, "loss": 0.2478, "step": 6052000 }, { "epoch": 3.63, "learning_rate": 3.0632046434438476e-05, "loss": 0.2479, "step": 6052500 }, { "epoch": 3.63, "learning_rate": 3.062994646887791e-05, "loss": 0.2486, "step": 6053000 }, { "epoch": 3.63, "learning_rate": 3.062784650331735e-05, "loss": 0.2485, "step": 6053500 }, { "epoch": 3.63, "learning_rate": 3.062574653775678e-05, "loss": 0.2481, "step": 6054000 }, { "epoch": 3.63, "learning_rate": 3.0623650772127336e-05, "loss": 0.2484, "step": 6054500 }, { "epoch": 3.63, "learning_rate": 3.062155080656677e-05, "loss": 0.2545, "step": 6055000 }, { "epoch": 3.63, "learning_rate": 3.061945084100621e-05, "loss": 0.2505, "step": 6055500 }, { "epoch": 3.63, "learning_rate": 3.0617350875445643e-05, "loss": 0.2574, "step": 6056000 }, { "epoch": 3.63, "learning_rate": 3.061525090988508e-05, "loss": 0.2519, "step": 6056500 }, { "epoch": 3.63, "learning_rate": 3.061315514425564e-05, "loss": 0.2502, "step": 6057000 }, { "epoch": 3.63, "learning_rate": 3.061105517869507e-05, "loss": 0.2512, "step": 6057500 }, { "epoch": 3.63, "learning_rate": 3.0608955213134504e-05, "loss": 0.2571, "step": 6058000 }, { "epoch": 3.63, "learning_rate": 3.0606855247573944e-05, "loss": 0.2571, "step": 6058500 }, { "epoch": 3.63, "learning_rate": 3.060475528201338e-05, "loss": 0.2525, "step": 6059000 }, { "epoch": 3.63, "learning_rate": 3.0602655316452805e-05, "loss": 0.2528, "step": 6059500 }, { "epoch": 3.63, "learning_rate": 3.0600555350892245e-05, "loss": 0.2465, "step": 6060000 }, { "epoch": 3.63, "learning_rate": 3.059845538533168e-05, "loss": 0.2535, "step": 6060500 }, { "epoch": 3.63, "learning_rate": 3.059635961970224e-05, "loss": 0.2516, "step": 6061000 }, { "epoch": 3.63, "learning_rate": 3.059425965414167e-05, "loss": 0.2541, "step": 6061500 }, { "epoch": 3.63, "learning_rate": 3.0592163888512226e-05, "loss": 0.2515, "step": 6062000 }, { "epoch": 3.63, "learning_rate": 3.0590063922951666e-05, "loss": 0.2511, "step": 6062500 }, { "epoch": 3.64, "learning_rate": 3.05879639573911e-05, "loss": 0.2454, "step": 6063000 }, { "epoch": 3.64, "learning_rate": 3.058586399183053e-05, "loss": 0.2556, "step": 6063500 }, { "epoch": 3.64, "learning_rate": 3.0583764026269966e-05, "loss": 0.2521, "step": 6064000 }, { "epoch": 3.64, "learning_rate": 3.05816640607094e-05, "loss": 0.2523, "step": 6064500 }, { "epoch": 3.64, "learning_rate": 3.057956409514884e-05, "loss": 0.2542, "step": 6065000 }, { "epoch": 3.64, "learning_rate": 3.057746412958827e-05, "loss": 0.2446, "step": 6065500 }, { "epoch": 3.64, "learning_rate": 3.0575368363958834e-05, "loss": 0.2559, "step": 6066000 }, { "epoch": 3.64, "learning_rate": 3.057326839839826e-05, "loss": 0.2492, "step": 6066500 }, { "epoch": 3.64, "learning_rate": 3.05711684328377e-05, "loss": 0.2502, "step": 6067000 }, { "epoch": 3.64, "learning_rate": 3.0569068467277134e-05, "loss": 0.2493, "step": 6067500 }, { "epoch": 3.64, "learning_rate": 3.0566972701647694e-05, "loss": 0.2527, "step": 6068000 }, { "epoch": 3.64, "learning_rate": 3.056487273608713e-05, "loss": 0.2567, "step": 6068500 }, { "epoch": 3.64, "learning_rate": 3.056277277052656e-05, "loss": 0.257, "step": 6069000 }, { "epoch": 3.64, "learning_rate": 3.0560672804965995e-05, "loss": 0.2475, "step": 6069500 }, { "epoch": 3.64, "learning_rate": 3.0558577039336555e-05, "loss": 0.2439, "step": 6070000 }, { "epoch": 3.64, "learning_rate": 3.0556477073775995e-05, "loss": 0.2508, "step": 6070500 }, { "epoch": 3.64, "learning_rate": 3.055437710821543e-05, "loss": 0.2487, "step": 6071000 }, { "epoch": 3.64, "learning_rate": 3.0552277142654856e-05, "loss": 0.2487, "step": 6071500 }, { "epoch": 3.64, "learning_rate": 3.0550181377025416e-05, "loss": 0.252, "step": 6072000 }, { "epoch": 3.64, "learning_rate": 3.0548081411464856e-05, "loss": 0.2478, "step": 6072500 }, { "epoch": 3.64, "learning_rate": 3.054598144590429e-05, "loss": 0.2507, "step": 6073000 }, { "epoch": 3.64, "learning_rate": 3.0543881480343716e-05, "loss": 0.2495, "step": 6073500 }, { "epoch": 3.64, "learning_rate": 3.054178571471428e-05, "loss": 0.2504, "step": 6074000 }, { "epoch": 3.64, "learning_rate": 3.053968574915372e-05, "loss": 0.245, "step": 6074500 }, { "epoch": 3.64, "learning_rate": 3.053758578359315e-05, "loss": 0.2518, "step": 6075000 }, { "epoch": 3.64, "learning_rate": 3.0535485818032584e-05, "loss": 0.2466, "step": 6075500 }, { "epoch": 3.64, "learning_rate": 3.053339005240314e-05, "loss": 0.2447, "step": 6076000 }, { "epoch": 3.64, "learning_rate": 3.053129008684258e-05, "loss": 0.2537, "step": 6076500 }, { "epoch": 3.64, "learning_rate": 3.052919012128201e-05, "loss": 0.2564, "step": 6077000 }, { "epoch": 3.64, "learning_rate": 3.052709015572145e-05, "loss": 0.2473, "step": 6077500 }, { "epoch": 3.64, "learning_rate": 3.0524994390092005e-05, "loss": 0.2507, "step": 6078000 }, { "epoch": 3.64, "learning_rate": 3.052289442453144e-05, "loss": 0.2528, "step": 6078500 }, { "epoch": 3.64, "learning_rate": 3.052079445897087e-05, "loss": 0.2468, "step": 6079000 }, { "epoch": 3.64, "learning_rate": 3.051869449341031e-05, "loss": 0.2473, "step": 6079500 }, { "epoch": 3.65, "learning_rate": 3.0516598727780862e-05, "loss": 0.2483, "step": 6080000 }, { "epoch": 3.65, "learning_rate": 3.05144987622203e-05, "loss": 0.2525, "step": 6080500 }, { "epoch": 3.65, "learning_rate": 3.0512398796659736e-05, "loss": 0.2514, "step": 6081000 }, { "epoch": 3.65, "learning_rate": 3.051029883109917e-05, "loss": 0.2481, "step": 6081500 }, { "epoch": 3.65, "learning_rate": 3.0508203065469726e-05, "loss": 0.2548, "step": 6082000 }, { "epoch": 3.65, "learning_rate": 3.050610729984028e-05, "loss": 0.2551, "step": 6082500 }, { "epoch": 3.65, "learning_rate": 3.0504007334279717e-05, "loss": 0.2496, "step": 6083000 }, { "epoch": 3.65, "learning_rate": 3.050190736871915e-05, "loss": 0.2544, "step": 6083500 }, { "epoch": 3.65, "learning_rate": 3.0499807403158587e-05, "loss": 0.248, "step": 6084000 }, { "epoch": 3.65, "learning_rate": 3.0497707437598024e-05, "loss": 0.2528, "step": 6084500 }, { "epoch": 3.65, "learning_rate": 3.0495607472037457e-05, "loss": 0.2521, "step": 6085000 }, { "epoch": 3.65, "learning_rate": 3.0493507506476894e-05, "loss": 0.2492, "step": 6085500 }, { "epoch": 3.65, "learning_rate": 3.049140754091633e-05, "loss": 0.2491, "step": 6086000 }, { "epoch": 3.65, "learning_rate": 3.0489307575355765e-05, "loss": 0.2467, "step": 6086500 }, { "epoch": 3.65, "learning_rate": 3.0487211809726318e-05, "loss": 0.254, "step": 6087000 }, { "epoch": 3.65, "learning_rate": 3.0485111844165755e-05, "loss": 0.2482, "step": 6087500 }, { "epoch": 3.65, "learning_rate": 3.0483011878605192e-05, "loss": 0.2493, "step": 6088000 }, { "epoch": 3.65, "learning_rate": 3.0480911913044625e-05, "loss": 0.2517, "step": 6088500 }, { "epoch": 3.65, "learning_rate": 3.0478816147415182e-05, "loss": 0.2554, "step": 6089000 }, { "epoch": 3.65, "learning_rate": 3.0476716181854616e-05, "loss": 0.2452, "step": 6089500 }, { "epoch": 3.65, "learning_rate": 3.0474616216294053e-05, "loss": 0.2525, "step": 6090000 }, { "epoch": 3.65, "learning_rate": 3.047251625073349e-05, "loss": 0.2458, "step": 6090500 }, { "epoch": 3.65, "learning_rate": 3.0470420485104043e-05, "loss": 0.2515, "step": 6091000 }, { "epoch": 3.65, "learning_rate": 3.046832051954348e-05, "loss": 0.2488, "step": 6091500 }, { "epoch": 3.65, "learning_rate": 3.0466220553982913e-05, "loss": 0.2506, "step": 6092000 }, { "epoch": 3.65, "learning_rate": 3.046412058842235e-05, "loss": 0.2506, "step": 6092500 }, { "epoch": 3.65, "learning_rate": 3.0462024822792904e-05, "loss": 0.2489, "step": 6093000 }, { "epoch": 3.65, "learning_rate": 3.045992485723234e-05, "loss": 0.2546, "step": 6093500 }, { "epoch": 3.65, "learning_rate": 3.0457824891671774e-05, "loss": 0.2478, "step": 6094000 }, { "epoch": 3.65, "learning_rate": 3.045572492611121e-05, "loss": 0.2534, "step": 6094500 }, { "epoch": 3.65, "learning_rate": 3.0453629160481764e-05, "loss": 0.2551, "step": 6095000 }, { "epoch": 3.65, "learning_rate": 3.04515291949212e-05, "loss": 0.2519, "step": 6095500 }, { "epoch": 3.65, "learning_rate": 3.0449429229360638e-05, "loss": 0.2551, "step": 6096000 }, { "epoch": 3.66, "learning_rate": 3.044732926380007e-05, "loss": 0.2524, "step": 6096500 }, { "epoch": 3.66, "learning_rate": 3.044523349817063e-05, "loss": 0.2563, "step": 6097000 }, { "epoch": 3.66, "learning_rate": 3.0443133532610062e-05, "loss": 0.249, "step": 6097500 }, { "epoch": 3.66, "learning_rate": 3.04410335670495e-05, "loss": 0.2469, "step": 6098000 }, { "epoch": 3.66, "learning_rate": 3.0438933601488936e-05, "loss": 0.2514, "step": 6098500 }, { "epoch": 3.66, "learning_rate": 3.043683783585949e-05, "loss": 0.2467, "step": 6099000 }, { "epoch": 3.66, "learning_rate": 3.0434737870298923e-05, "loss": 0.2511, "step": 6099500 }, { "epoch": 3.66, "learning_rate": 3.043263790473836e-05, "loss": 0.2457, "step": 6100000 }, { "epoch": 3.66, "eval_loss": 0.23238635063171387, "eval_runtime": 1455.0933, "eval_samples_per_second": 361.984, "eval_steps_per_second": 60.331, "step": 6100000 }, { "epoch": 3.66, "learning_rate": 3.0430537939177796e-05, "loss": 0.2532, "step": 6100500 }, { "epoch": 3.66, "learning_rate": 3.0428442173548357e-05, "loss": 0.2395, "step": 6101000 }, { "epoch": 3.66, "learning_rate": 3.0426342207987787e-05, "loss": 0.2533, "step": 6101500 }, { "epoch": 3.66, "learning_rate": 3.042424224242722e-05, "loss": 0.2475, "step": 6102000 }, { "epoch": 3.66, "learning_rate": 3.0422142276866657e-05, "loss": 0.2522, "step": 6102500 }, { "epoch": 3.66, "learning_rate": 3.0420046511237217e-05, "loss": 0.2528, "step": 6103000 }, { "epoch": 3.66, "learning_rate": 3.0417946545676654e-05, "loss": 0.2487, "step": 6103500 }, { "epoch": 3.66, "learning_rate": 3.0415846580116084e-05, "loss": 0.2508, "step": 6104000 }, { "epoch": 3.66, "learning_rate": 3.0413750814486645e-05, "loss": 0.2445, "step": 6104500 }, { "epoch": 3.66, "learning_rate": 3.0411650848926078e-05, "loss": 0.2486, "step": 6105000 }, { "epoch": 3.66, "learning_rate": 3.0409550883365515e-05, "loss": 0.2502, "step": 6105500 }, { "epoch": 3.66, "learning_rate": 3.0407450917804952e-05, "loss": 0.2522, "step": 6106000 }, { "epoch": 3.66, "learning_rate": 3.040535095224438e-05, "loss": 0.2439, "step": 6106500 }, { "epoch": 3.66, "learning_rate": 3.0403250986683815e-05, "loss": 0.2507, "step": 6107000 }, { "epoch": 3.66, "learning_rate": 3.0401151021123252e-05, "loss": 0.2469, "step": 6107500 }, { "epoch": 3.66, "learning_rate": 3.0399051055562686e-05, "loss": 0.2549, "step": 6108000 }, { "epoch": 3.66, "learning_rate": 3.0396955289933243e-05, "loss": 0.2546, "step": 6108500 }, { "epoch": 3.66, "learning_rate": 3.0394855324372676e-05, "loss": 0.2475, "step": 6109000 }, { "epoch": 3.66, "learning_rate": 3.0392755358812113e-05, "loss": 0.2483, "step": 6109500 }, { "epoch": 3.66, "learning_rate": 3.039065539325155e-05, "loss": 0.2501, "step": 6110000 }, { "epoch": 3.66, "learning_rate": 3.038855962762211e-05, "loss": 0.2537, "step": 6110500 }, { "epoch": 3.66, "learning_rate": 3.038645966206154e-05, "loss": 0.2488, "step": 6111000 }, { "epoch": 3.66, "learning_rate": 3.0384359696500974e-05, "loss": 0.2478, "step": 6111500 }, { "epoch": 3.66, "learning_rate": 3.038225973094041e-05, "loss": 0.2486, "step": 6112000 }, { "epoch": 3.66, "learning_rate": 3.038016396531097e-05, "loss": 0.2512, "step": 6112500 }, { "epoch": 3.66, "learning_rate": 3.0378063999750408e-05, "loss": 0.2448, "step": 6113000 }, { "epoch": 3.67, "learning_rate": 3.0375964034189834e-05, "loss": 0.2531, "step": 6113500 }, { "epoch": 3.67, "learning_rate": 3.037386406862927e-05, "loss": 0.251, "step": 6114000 }, { "epoch": 3.67, "learning_rate": 3.037176830299983e-05, "loss": 0.2537, "step": 6114500 }, { "epoch": 3.67, "learning_rate": 3.036966833743927e-05, "loss": 0.2505, "step": 6115000 }, { "epoch": 3.67, "learning_rate": 3.0367568371878705e-05, "loss": 0.2478, "step": 6115500 }, { "epoch": 3.67, "learning_rate": 3.036547260624926e-05, "loss": 0.259, "step": 6116000 }, { "epoch": 3.67, "learning_rate": 3.0363372640688692e-05, "loss": 0.252, "step": 6116500 }, { "epoch": 3.67, "learning_rate": 3.036127267512813e-05, "loss": 0.2519, "step": 6117000 }, { "epoch": 3.67, "learning_rate": 3.0359172709567566e-05, "loss": 0.2501, "step": 6117500 }, { "epoch": 3.67, "learning_rate": 3.035707694393812e-05, "loss": 0.2576, "step": 6118000 }, { "epoch": 3.67, "learning_rate": 3.0354976978377557e-05, "loss": 0.2432, "step": 6118500 }, { "epoch": 3.67, "learning_rate": 3.035287701281699e-05, "loss": 0.2533, "step": 6119000 }, { "epoch": 3.67, "learning_rate": 3.0350777047256427e-05, "loss": 0.2555, "step": 6119500 }, { "epoch": 3.67, "learning_rate": 3.0348677081695864e-05, "loss": 0.2532, "step": 6120000 }, { "epoch": 3.67, "learning_rate": 3.034657711613529e-05, "loss": 0.2489, "step": 6120500 }, { "epoch": 3.67, "learning_rate": 3.0344477150574727e-05, "loss": 0.2494, "step": 6121000 }, { "epoch": 3.67, "learning_rate": 3.0342377185014164e-05, "loss": 0.252, "step": 6121500 }, { "epoch": 3.67, "learning_rate": 3.0340281419384724e-05, "loss": 0.2501, "step": 6122000 }, { "epoch": 3.67, "learning_rate": 3.033818145382416e-05, "loss": 0.2528, "step": 6122500 }, { "epoch": 3.67, "learning_rate": 3.0336081488263588e-05, "loss": 0.2463, "step": 6123000 }, { "epoch": 3.67, "learning_rate": 3.0333981522703025e-05, "loss": 0.2507, "step": 6123500 }, { "epoch": 3.67, "learning_rate": 3.0331885757073585e-05, "loss": 0.2494, "step": 6124000 }, { "epoch": 3.67, "learning_rate": 3.0329785791513022e-05, "loss": 0.2558, "step": 6124500 }, { "epoch": 3.67, "learning_rate": 3.0327685825952455e-05, "loss": 0.2464, "step": 6125000 }, { "epoch": 3.67, "learning_rate": 3.0325585860391885e-05, "loss": 0.244, "step": 6125500 }, { "epoch": 3.67, "learning_rate": 3.0323490094762446e-05, "loss": 0.2453, "step": 6126000 }, { "epoch": 3.67, "learning_rate": 3.0321390129201883e-05, "loss": 0.2519, "step": 6126500 }, { "epoch": 3.67, "learning_rate": 3.031929016364132e-05, "loss": 0.2527, "step": 6127000 }, { "epoch": 3.67, "learning_rate": 3.0317190198080753e-05, "loss": 0.2485, "step": 6127500 }, { "epoch": 3.67, "learning_rate": 3.031509443245131e-05, "loss": 0.2542, "step": 6128000 }, { "epoch": 3.67, "learning_rate": 3.0312998666821864e-05, "loss": 0.2526, "step": 6128500 }, { "epoch": 3.67, "learning_rate": 3.0310898701261297e-05, "loss": 0.2463, "step": 6129000 }, { "epoch": 3.67, "learning_rate": 3.0308798735700734e-05, "loss": 0.2511, "step": 6129500 }, { "epoch": 3.68, "learning_rate": 3.030669877014017e-05, "loss": 0.2437, "step": 6130000 }, { "epoch": 3.68, "learning_rate": 3.0304598804579604e-05, "loss": 0.2447, "step": 6130500 }, { "epoch": 3.68, "learning_rate": 3.030249883901904e-05, "loss": 0.2491, "step": 6131000 }, { "epoch": 3.68, "learning_rate": 3.0300398873458478e-05, "loss": 0.2489, "step": 6131500 }, { "epoch": 3.68, "learning_rate": 3.029829890789791e-05, "loss": 0.2443, "step": 6132000 }, { "epoch": 3.68, "learning_rate": 3.0296203142268468e-05, "loss": 0.2572, "step": 6132500 }, { "epoch": 3.68, "learning_rate": 3.0294103176707902e-05, "loss": 0.2496, "step": 6133000 }, { "epoch": 3.68, "learning_rate": 3.029200321114734e-05, "loss": 0.2442, "step": 6133500 }, { "epoch": 3.68, "learning_rate": 3.0289903245586775e-05, "loss": 0.25, "step": 6134000 }, { "epoch": 3.68, "learning_rate": 3.028780747995733e-05, "loss": 0.2461, "step": 6134500 }, { "epoch": 3.68, "learning_rate": 3.0285707514396766e-05, "loss": 0.2507, "step": 6135000 }, { "epoch": 3.68, "learning_rate": 3.02836075488362e-05, "loss": 0.252, "step": 6135500 }, { "epoch": 3.68, "learning_rate": 3.0281507583275636e-05, "loss": 0.2508, "step": 6136000 }, { "epoch": 3.68, "learning_rate": 3.027941181764619e-05, "loss": 0.2531, "step": 6136500 }, { "epoch": 3.68, "learning_rate": 3.0277311852085627e-05, "loss": 0.2481, "step": 6137000 }, { "epoch": 3.68, "learning_rate": 3.027521188652506e-05, "loss": 0.2497, "step": 6137500 }, { "epoch": 3.68, "learning_rate": 3.0273111920964497e-05, "loss": 0.2521, "step": 6138000 }, { "epoch": 3.68, "learning_rate": 3.027101615533505e-05, "loss": 0.2516, "step": 6138500 }, { "epoch": 3.68, "learning_rate": 3.0268916189774487e-05, "loss": 0.247, "step": 6139000 }, { "epoch": 3.68, "learning_rate": 3.026682042414504e-05, "loss": 0.2464, "step": 6139500 }, { "epoch": 3.68, "learning_rate": 3.0264720458584478e-05, "loss": 0.2492, "step": 6140000 }, { "epoch": 3.68, "learning_rate": 3.0262620493023915e-05, "loss": 0.2497, "step": 6140500 }, { "epoch": 3.68, "learning_rate": 3.0260520527463348e-05, "loss": 0.2521, "step": 6141000 }, { "epoch": 3.68, "learning_rate": 3.0258420561902785e-05, "loss": 0.2458, "step": 6141500 }, { "epoch": 3.68, "learning_rate": 3.0256320596342222e-05, "loss": 0.2523, "step": 6142000 }, { "epoch": 3.68, "learning_rate": 3.0254220630781655e-05, "loss": 0.2529, "step": 6142500 }, { "epoch": 3.68, "learning_rate": 3.0252120665221092e-05, "loss": 0.255, "step": 6143000 }, { "epoch": 3.68, "learning_rate": 3.0250024899591646e-05, "loss": 0.2463, "step": 6143500 }, { "epoch": 3.68, "learning_rate": 3.0247924934031082e-05, "loss": 0.2463, "step": 6144000 }, { "epoch": 3.68, "learning_rate": 3.0245824968470516e-05, "loss": 0.2506, "step": 6144500 }, { "epoch": 3.68, "learning_rate": 3.0243725002909953e-05, "loss": 0.2492, "step": 6145000 }, { "epoch": 3.68, "learning_rate": 3.0241629237280506e-05, "loss": 0.2528, "step": 6145500 }, { "epoch": 3.68, "learning_rate": 3.0239529271719943e-05, "loss": 0.252, "step": 6146000 }, { "epoch": 3.69, "learning_rate": 3.023742930615938e-05, "loss": 0.2429, "step": 6146500 }, { "epoch": 3.69, "learning_rate": 3.0235329340598813e-05, "loss": 0.2516, "step": 6147000 }, { "epoch": 3.69, "learning_rate": 3.023323357496937e-05, "loss": 0.2504, "step": 6147500 }, { "epoch": 3.69, "learning_rate": 3.0231133609408804e-05, "loss": 0.2485, "step": 6148000 }, { "epoch": 3.69, "learning_rate": 3.022903364384824e-05, "loss": 0.2526, "step": 6148500 }, { "epoch": 3.69, "learning_rate": 3.0226933678287678e-05, "loss": 0.2446, "step": 6149000 }, { "epoch": 3.69, "learning_rate": 3.022483791265823e-05, "loss": 0.2493, "step": 6149500 }, { "epoch": 3.69, "learning_rate": 3.022274214702879e-05, "loss": 0.2455, "step": 6150000 }, { "epoch": 3.69, "learning_rate": 3.0220642181468225e-05, "loss": 0.2548, "step": 6150500 }, { "epoch": 3.69, "learning_rate": 3.0218542215907655e-05, "loss": 0.2516, "step": 6151000 }, { "epoch": 3.69, "learning_rate": 3.0216442250347092e-05, "loss": 0.2486, "step": 6151500 }, { "epoch": 3.69, "learning_rate": 3.021434228478653e-05, "loss": 0.2469, "step": 6152000 }, { "epoch": 3.69, "learning_rate": 3.0212242319225962e-05, "loss": 0.257, "step": 6152500 }, { "epoch": 3.69, "learning_rate": 3.02101423536654e-05, "loss": 0.2549, "step": 6153000 }, { "epoch": 3.69, "learning_rate": 3.0208042388104836e-05, "loss": 0.251, "step": 6153500 }, { "epoch": 3.69, "learning_rate": 3.020594242254427e-05, "loss": 0.2534, "step": 6154000 }, { "epoch": 3.69, "learning_rate": 3.0203846656914826e-05, "loss": 0.2561, "step": 6154500 }, { "epoch": 3.69, "learning_rate": 3.020174669135426e-05, "loss": 0.2436, "step": 6155000 }, { "epoch": 3.69, "learning_rate": 3.0199646725793697e-05, "loss": 0.2512, "step": 6155500 }, { "epoch": 3.69, "learning_rate": 3.0197546760233133e-05, "loss": 0.2535, "step": 6156000 }, { "epoch": 3.69, "learning_rate": 3.0195450994603687e-05, "loss": 0.2474, "step": 6156500 }, { "epoch": 3.69, "learning_rate": 3.019335102904312e-05, "loss": 0.2498, "step": 6157000 }, { "epoch": 3.69, "learning_rate": 3.0191251063482557e-05, "loss": 0.2541, "step": 6157500 }, { "epoch": 3.69, "learning_rate": 3.018915529785311e-05, "loss": 0.2465, "step": 6158000 }, { "epoch": 3.69, "learning_rate": 3.0187055332292548e-05, "loss": 0.2551, "step": 6158500 }, { "epoch": 3.69, "learning_rate": 3.0184955366731985e-05, "loss": 0.2442, "step": 6159000 }, { "epoch": 3.69, "learning_rate": 3.0182855401171418e-05, "loss": 0.2549, "step": 6159500 }, { "epoch": 3.69, "learning_rate": 3.0180755435610855e-05, "loss": 0.2466, "step": 6160000 }, { "epoch": 3.69, "learning_rate": 3.0178655470050292e-05, "loss": 0.2514, "step": 6160500 }, { "epoch": 3.69, "learning_rate": 3.0176555504489725e-05, "loss": 0.2473, "step": 6161000 }, { "epoch": 3.69, "learning_rate": 3.0174455538929162e-05, "loss": 0.2489, "step": 6161500 }, { "epoch": 3.69, "learning_rate": 3.0172359773299716e-05, "loss": 0.2507, "step": 6162000 }, { "epoch": 3.69, "learning_rate": 3.0170259807739152e-05, "loss": 0.2483, "step": 6162500 }, { "epoch": 3.69, "learning_rate": 3.016815984217859e-05, "loss": 0.2526, "step": 6163000 }, { "epoch": 3.7, "learning_rate": 3.0166059876618023e-05, "loss": 0.2512, "step": 6163500 }, { "epoch": 3.7, "learning_rate": 3.0163964110988576e-05, "loss": 0.2471, "step": 6164000 }, { "epoch": 3.7, "learning_rate": 3.0161864145428013e-05, "loss": 0.2485, "step": 6164500 }, { "epoch": 3.7, "learning_rate": 3.015976417986745e-05, "loss": 0.2477, "step": 6165000 }, { "epoch": 3.7, "learning_rate": 3.0157664214306884e-05, "loss": 0.2507, "step": 6165500 }, { "epoch": 3.7, "learning_rate": 3.015556424874632e-05, "loss": 0.2462, "step": 6166000 }, { "epoch": 3.7, "learning_rate": 3.0153468483116874e-05, "loss": 0.253, "step": 6166500 }, { "epoch": 3.7, "learning_rate": 3.0151372717487434e-05, "loss": 0.2476, "step": 6167000 }, { "epoch": 3.7, "learning_rate": 3.0149272751926864e-05, "loss": 0.2566, "step": 6167500 }, { "epoch": 3.7, "learning_rate": 3.01471727863663e-05, "loss": 0.2527, "step": 6168000 }, { "epoch": 3.7, "learning_rate": 3.0145072820805738e-05, "loss": 0.2518, "step": 6168500 }, { "epoch": 3.7, "learning_rate": 3.014297285524517e-05, "loss": 0.2433, "step": 6169000 }, { "epoch": 3.7, "learning_rate": 3.014087288968461e-05, "loss": 0.249, "step": 6169500 }, { "epoch": 3.7, "learning_rate": 3.0138772924124045e-05, "loss": 0.2542, "step": 6170000 }, { "epoch": 3.7, "learning_rate": 3.013667295856348e-05, "loss": 0.2467, "step": 6170500 }, { "epoch": 3.7, "learning_rate": 3.0134577192934032e-05, "loss": 0.2468, "step": 6171000 }, { "epoch": 3.7, "learning_rate": 3.013247722737347e-05, "loss": 0.2514, "step": 6171500 }, { "epoch": 3.7, "learning_rate": 3.0130377261812906e-05, "loss": 0.2506, "step": 6172000 }, { "epoch": 3.7, "learning_rate": 3.012828149618346e-05, "loss": 0.2517, "step": 6172500 }, { "epoch": 3.7, "learning_rate": 3.0126181530622896e-05, "loss": 0.2548, "step": 6173000 }, { "epoch": 3.7, "learning_rate": 3.012408156506233e-05, "loss": 0.2522, "step": 6173500 }, { "epoch": 3.7, "learning_rate": 3.0121981599501767e-05, "loss": 0.2493, "step": 6174000 }, { "epoch": 3.7, "learning_rate": 3.0119881633941204e-05, "loss": 0.2442, "step": 6174500 }, { "epoch": 3.7, "learning_rate": 3.0117781668380637e-05, "loss": 0.2536, "step": 6175000 }, { "epoch": 3.7, "learning_rate": 3.0115681702820074e-05, "loss": 0.2522, "step": 6175500 }, { "epoch": 3.7, "learning_rate": 3.011358173725951e-05, "loss": 0.2509, "step": 6176000 }, { "epoch": 3.7, "learning_rate": 3.0111485971630064e-05, "loss": 0.2565, "step": 6176500 }, { "epoch": 3.7, "learning_rate": 3.01093860060695e-05, "loss": 0.2482, "step": 6177000 }, { "epoch": 3.7, "learning_rate": 3.0107286040508935e-05, "loss": 0.2511, "step": 6177500 }, { "epoch": 3.7, "learning_rate": 3.0105190274879488e-05, "loss": 0.2503, "step": 6178000 }, { "epoch": 3.7, "learning_rate": 3.0103090309318925e-05, "loss": 0.2537, "step": 6178500 }, { "epoch": 3.7, "learning_rate": 3.0100990343758362e-05, "loss": 0.2502, "step": 6179000 }, { "epoch": 3.7, "learning_rate": 3.00988903781978e-05, "loss": 0.2547, "step": 6179500 }, { "epoch": 3.71, "learning_rate": 3.0096790412637232e-05, "loss": 0.2497, "step": 6180000 }, { "epoch": 3.71, "learning_rate": 3.009469044707667e-05, "loss": 0.2524, "step": 6180500 }, { "epoch": 3.71, "learning_rate": 3.0092590481516106e-05, "loss": 0.2513, "step": 6181000 }, { "epoch": 3.71, "learning_rate": 3.009049051595554e-05, "loss": 0.2471, "step": 6181500 }, { "epoch": 3.71, "learning_rate": 3.0088394750326093e-05, "loss": 0.2522, "step": 6182000 }, { "epoch": 3.71, "learning_rate": 3.008629478476553e-05, "loss": 0.2529, "step": 6182500 }, { "epoch": 3.71, "learning_rate": 3.0084194819204967e-05, "loss": 0.2545, "step": 6183000 }, { "epoch": 3.71, "learning_rate": 3.00820948536444e-05, "loss": 0.2543, "step": 6183500 }, { "epoch": 3.71, "learning_rate": 3.0079999088014957e-05, "loss": 0.2464, "step": 6184000 }, { "epoch": 3.71, "learning_rate": 3.007789912245439e-05, "loss": 0.256, "step": 6184500 }, { "epoch": 3.71, "learning_rate": 3.0075799156893827e-05, "loss": 0.2494, "step": 6185000 }, { "epoch": 3.71, "learning_rate": 3.0073699191333264e-05, "loss": 0.2522, "step": 6185500 }, { "epoch": 3.71, "learning_rate": 3.0071603425703818e-05, "loss": 0.2487, "step": 6186000 }, { "epoch": 3.71, "learning_rate": 3.0069503460143255e-05, "loss": 0.2453, "step": 6186500 }, { "epoch": 3.71, "learning_rate": 3.0067403494582688e-05, "loss": 0.251, "step": 6187000 }, { "epoch": 3.71, "learning_rate": 3.0065303529022125e-05, "loss": 0.2487, "step": 6187500 }, { "epoch": 3.71, "learning_rate": 3.0063203563461562e-05, "loss": 0.2546, "step": 6188000 }, { "epoch": 3.71, "learning_rate": 3.0061107797832115e-05, "loss": 0.251, "step": 6188500 }, { "epoch": 3.71, "learning_rate": 3.005900783227155e-05, "loss": 0.2497, "step": 6189000 }, { "epoch": 3.71, "learning_rate": 3.0056907866710986e-05, "loss": 0.2457, "step": 6189500 }, { "epoch": 3.71, "learning_rate": 3.0054807901150422e-05, "loss": 0.2517, "step": 6190000 }, { "epoch": 3.71, "learning_rate": 3.0052712135520976e-05, "loss": 0.2479, "step": 6190500 }, { "epoch": 3.71, "learning_rate": 3.0050612169960413e-05, "loss": 0.2452, "step": 6191000 }, { "epoch": 3.71, "learning_rate": 3.0048512204399846e-05, "loss": 0.246, "step": 6191500 }, { "epoch": 3.71, "learning_rate": 3.0046412238839283e-05, "loss": 0.245, "step": 6192000 }, { "epoch": 3.71, "learning_rate": 3.0044316473209837e-05, "loss": 0.2568, "step": 6192500 }, { "epoch": 3.71, "learning_rate": 3.0042216507649274e-05, "loss": 0.2413, "step": 6193000 }, { "epoch": 3.71, "learning_rate": 3.004011654208871e-05, "loss": 0.2434, "step": 6193500 }, { "epoch": 3.71, "learning_rate": 3.0038020776459264e-05, "loss": 0.2625, "step": 6194000 }, { "epoch": 3.71, "learning_rate": 3.0035920810898697e-05, "loss": 0.2457, "step": 6194500 }, { "epoch": 3.71, "learning_rate": 3.0033820845338134e-05, "loss": 0.244, "step": 6195000 }, { "epoch": 3.71, "learning_rate": 3.003172087977757e-05, "loss": 0.2508, "step": 6195500 }, { "epoch": 3.71, "learning_rate": 3.0029620914217005e-05, "loss": 0.2482, "step": 6196000 }, { "epoch": 3.72, "learning_rate": 3.002752094865644e-05, "loss": 0.2536, "step": 6196500 }, { "epoch": 3.72, "learning_rate": 3.002542098309588e-05, "loss": 0.251, "step": 6197000 }, { "epoch": 3.72, "learning_rate": 3.0023321017535312e-05, "loss": 0.2446, "step": 6197500 }, { "epoch": 3.72, "learning_rate": 3.002122525190587e-05, "loss": 0.2531, "step": 6198000 }, { "epoch": 3.72, "learning_rate": 3.0019125286345302e-05, "loss": 0.2547, "step": 6198500 }, { "epoch": 3.72, "learning_rate": 3.001702532078474e-05, "loss": 0.2475, "step": 6199000 }, { "epoch": 3.72, "learning_rate": 3.0014925355224176e-05, "loss": 0.2522, "step": 6199500 }, { "epoch": 3.72, "learning_rate": 3.0012833789525853e-05, "loss": 0.2495, "step": 6200000 }, { "epoch": 3.72, "eval_loss": 0.2311820089817047, "eval_runtime": 1455.3093, "eval_samples_per_second": 361.93, "eval_steps_per_second": 60.322, "step": 6200000 }, { "epoch": 3.72, "learning_rate": 3.001073382396529e-05, "loss": 0.2457, "step": 6200500 }, { "epoch": 3.72, "learning_rate": 3.000863385840472e-05, "loss": 0.2476, "step": 6201000 }, { "epoch": 3.72, "learning_rate": 3.000653809277528e-05, "loss": 0.249, "step": 6201500 }, { "epoch": 3.72, "learning_rate": 3.0004438127214714e-05, "loss": 0.2478, "step": 6202000 }, { "epoch": 3.72, "learning_rate": 3.000233816165415e-05, "loss": 0.2483, "step": 6202500 }, { "epoch": 3.72, "learning_rate": 3.0000238196093587e-05, "loss": 0.2513, "step": 6203000 }, { "epoch": 3.72, "learning_rate": 2.9998138230533017e-05, "loss": 0.2485, "step": 6203500 }, { "epoch": 3.72, "learning_rate": 2.999603826497245e-05, "loss": 0.2499, "step": 6204000 }, { "epoch": 3.72, "learning_rate": 2.9993938299411888e-05, "loss": 0.2465, "step": 6204500 }, { "epoch": 3.72, "learning_rate": 2.9991838333851325e-05, "loss": 0.2523, "step": 6205000 }, { "epoch": 3.72, "learning_rate": 2.9989738368290758e-05, "loss": 0.2494, "step": 6205500 }, { "epoch": 3.72, "learning_rate": 2.9987638402730195e-05, "loss": 0.2468, "step": 6206000 }, { "epoch": 3.72, "learning_rate": 2.9985538437169632e-05, "loss": 0.2459, "step": 6206500 }, { "epoch": 3.72, "learning_rate": 2.9983438471609065e-05, "loss": 0.2487, "step": 6207000 }, { "epoch": 3.72, "learning_rate": 2.9981342705979622e-05, "loss": 0.2516, "step": 6207500 }, { "epoch": 3.72, "learning_rate": 2.9979242740419056e-05, "loss": 0.2506, "step": 6208000 }, { "epoch": 3.72, "learning_rate": 2.9977142774858492e-05, "loss": 0.2523, "step": 6208500 }, { "epoch": 3.72, "learning_rate": 2.997504280929793e-05, "loss": 0.2438, "step": 6209000 }, { "epoch": 3.72, "learning_rate": 2.9972947043668483e-05, "loss": 0.2477, "step": 6209500 }, { "epoch": 3.72, "learning_rate": 2.9970847078107916e-05, "loss": 0.248, "step": 6210000 }, { "epoch": 3.72, "learning_rate": 2.9968747112547353e-05, "loss": 0.2501, "step": 6210500 }, { "epoch": 3.72, "learning_rate": 2.996664714698679e-05, "loss": 0.254, "step": 6211000 }, { "epoch": 3.72, "learning_rate": 2.9964551381357344e-05, "loss": 0.2463, "step": 6211500 }, { "epoch": 3.72, "learning_rate": 2.996245141579678e-05, "loss": 0.247, "step": 6212000 }, { "epoch": 3.72, "learning_rate": 2.9960351450236214e-05, "loss": 0.2458, "step": 6212500 }, { "epoch": 3.72, "learning_rate": 2.995825148467565e-05, "loss": 0.2529, "step": 6213000 }, { "epoch": 3.73, "learning_rate": 2.9956155719046204e-05, "loss": 0.2546, "step": 6213500 }, { "epoch": 3.73, "learning_rate": 2.995405575348564e-05, "loss": 0.2535, "step": 6214000 }, { "epoch": 3.73, "learning_rate": 2.9951955787925078e-05, "loss": 0.2476, "step": 6214500 }, { "epoch": 3.73, "learning_rate": 2.994985582236451e-05, "loss": 0.2451, "step": 6215000 }, { "epoch": 3.73, "learning_rate": 2.9947760056735065e-05, "loss": 0.2476, "step": 6215500 }, { "epoch": 3.73, "learning_rate": 2.9945660091174502e-05, "loss": 0.2493, "step": 6216000 }, { "epoch": 3.73, "learning_rate": 2.994356012561394e-05, "loss": 0.2452, "step": 6216500 }, { "epoch": 3.73, "learning_rate": 2.99414643599845e-05, "loss": 0.2537, "step": 6217000 }, { "epoch": 3.73, "learning_rate": 2.993936439442393e-05, "loss": 0.2557, "step": 6217500 }, { "epoch": 3.73, "learning_rate": 2.9937264428863363e-05, "loss": 0.2425, "step": 6218000 }, { "epoch": 3.73, "learning_rate": 2.99351644633028e-05, "loss": 0.2552, "step": 6218500 }, { "epoch": 3.73, "learning_rate": 2.9933064497742236e-05, "loss": 0.2519, "step": 6219000 }, { "epoch": 3.73, "learning_rate": 2.9930968732112797e-05, "loss": 0.2497, "step": 6219500 }, { "epoch": 3.73, "learning_rate": 2.9928868766552227e-05, "loss": 0.2486, "step": 6220000 }, { "epoch": 3.73, "learning_rate": 2.992676880099166e-05, "loss": 0.25, "step": 6220500 }, { "epoch": 3.73, "learning_rate": 2.9924668835431097e-05, "loss": 0.2539, "step": 6221000 }, { "epoch": 3.73, "learning_rate": 2.9922573069801657e-05, "loss": 0.251, "step": 6221500 }, { "epoch": 3.73, "learning_rate": 2.9920473104241094e-05, "loss": 0.2502, "step": 6222000 }, { "epoch": 3.73, "learning_rate": 2.991837313868052e-05, "loss": 0.2482, "step": 6222500 }, { "epoch": 3.73, "learning_rate": 2.9916273173119958e-05, "loss": 0.2492, "step": 6223000 }, { "epoch": 3.73, "learning_rate": 2.9914173207559395e-05, "loss": 0.2456, "step": 6223500 }, { "epoch": 3.73, "learning_rate": 2.9912073241998828e-05, "loss": 0.2468, "step": 6224000 }, { "epoch": 3.73, "learning_rate": 2.9909973276438265e-05, "loss": 0.2511, "step": 6224500 }, { "epoch": 3.73, "learning_rate": 2.9907873310877702e-05, "loss": 0.2491, "step": 6225000 }, { "epoch": 3.73, "learning_rate": 2.990578174517938e-05, "loss": 0.2556, "step": 6225500 }, { "epoch": 3.73, "learning_rate": 2.9903681779618816e-05, "loss": 0.2534, "step": 6226000 }, { "epoch": 3.73, "learning_rate": 2.9901581814058253e-05, "loss": 0.2558, "step": 6226500 }, { "epoch": 3.73, "learning_rate": 2.9899481848497683e-05, "loss": 0.2514, "step": 6227000 }, { "epoch": 3.73, "learning_rate": 2.9897381882937116e-05, "loss": 0.2508, "step": 6227500 }, { "epoch": 3.73, "learning_rate": 2.9895281917376553e-05, "loss": 0.2462, "step": 6228000 }, { "epoch": 3.73, "learning_rate": 2.989318195181599e-05, "loss": 0.2515, "step": 6228500 }, { "epoch": 3.73, "learning_rate": 2.9891081986255423e-05, "loss": 0.2495, "step": 6229000 }, { "epoch": 3.73, "learning_rate": 2.988898202069486e-05, "loss": 0.2523, "step": 6229500 }, { "epoch": 3.74, "learning_rate": 2.9886886255065414e-05, "loss": 0.2488, "step": 6230000 }, { "epoch": 3.74, "learning_rate": 2.988478628950485e-05, "loss": 0.2472, "step": 6230500 }, { "epoch": 3.74, "learning_rate": 2.9882686323944284e-05, "loss": 0.2505, "step": 6231000 }, { "epoch": 3.74, "learning_rate": 2.988058635838372e-05, "loss": 0.2461, "step": 6231500 }, { "epoch": 3.74, "learning_rate": 2.9878490592754274e-05, "loss": 0.2497, "step": 6232000 }, { "epoch": 3.74, "learning_rate": 2.987639062719371e-05, "loss": 0.2473, "step": 6232500 }, { "epoch": 3.74, "learning_rate": 2.9874290661633148e-05, "loss": 0.2536, "step": 6233000 }, { "epoch": 3.74, "learning_rate": 2.987219069607258e-05, "loss": 0.2505, "step": 6233500 }, { "epoch": 3.74, "learning_rate": 2.9870094930443142e-05, "loss": 0.2543, "step": 6234000 }, { "epoch": 3.74, "learning_rate": 2.9867994964882572e-05, "loss": 0.2489, "step": 6234500 }, { "epoch": 3.74, "learning_rate": 2.986589499932201e-05, "loss": 0.2528, "step": 6235000 }, { "epoch": 3.74, "learning_rate": 2.9863795033761446e-05, "loss": 0.2508, "step": 6235500 }, { "epoch": 3.74, "learning_rate": 2.9861699268132006e-05, "loss": 0.2493, "step": 6236000 }, { "epoch": 3.74, "learning_rate": 2.9859599302571433e-05, "loss": 0.2482, "step": 6236500 }, { "epoch": 3.74, "learning_rate": 2.985749933701087e-05, "loss": 0.2526, "step": 6237000 }, { "epoch": 3.74, "learning_rate": 2.9855399371450306e-05, "loss": 0.2527, "step": 6237500 }, { "epoch": 3.74, "learning_rate": 2.9853303605820867e-05, "loss": 0.245, "step": 6238000 }, { "epoch": 3.74, "learning_rate": 2.9851203640260304e-05, "loss": 0.2482, "step": 6238500 }, { "epoch": 3.74, "learning_rate": 2.984910367469973e-05, "loss": 0.2509, "step": 6239000 }, { "epoch": 3.74, "learning_rate": 2.9847003709139167e-05, "loss": 0.2456, "step": 6239500 }, { "epoch": 3.74, "learning_rate": 2.9844907943509728e-05, "loss": 0.2466, "step": 6240000 }, { "epoch": 3.74, "learning_rate": 2.9842807977949164e-05, "loss": 0.2475, "step": 6240500 }, { "epoch": 3.74, "learning_rate": 2.9840708012388598e-05, "loss": 0.2488, "step": 6241000 }, { "epoch": 3.74, "learning_rate": 2.9838608046828028e-05, "loss": 0.2548, "step": 6241500 }, { "epoch": 3.74, "learning_rate": 2.9836512281198588e-05, "loss": 0.2485, "step": 6242000 }, { "epoch": 3.74, "learning_rate": 2.9834416515569142e-05, "loss": 0.2483, "step": 6242500 }, { "epoch": 3.74, "learning_rate": 2.983231655000858e-05, "loss": 0.2506, "step": 6243000 }, { "epoch": 3.74, "learning_rate": 2.9830216584448016e-05, "loss": 0.2488, "step": 6243500 }, { "epoch": 3.74, "learning_rate": 2.9828116618887452e-05, "loss": 0.2477, "step": 6244000 }, { "epoch": 3.74, "learning_rate": 2.9826016653326886e-05, "loss": 0.2523, "step": 6244500 }, { "epoch": 3.74, "learning_rate": 2.9823916687766323e-05, "loss": 0.2506, "step": 6245000 }, { "epoch": 3.74, "learning_rate": 2.982181672220576e-05, "loss": 0.2491, "step": 6245500 }, { "epoch": 3.74, "learning_rate": 2.9819716756645186e-05, "loss": 0.2413, "step": 6246000 }, { "epoch": 3.75, "learning_rate": 2.9817620991015747e-05, "loss": 0.2469, "step": 6246500 }, { "epoch": 3.75, "learning_rate": 2.9815521025455183e-05, "loss": 0.2486, "step": 6247000 }, { "epoch": 3.75, "learning_rate": 2.981342105989462e-05, "loss": 0.2485, "step": 6247500 }, { "epoch": 3.75, "learning_rate": 2.9811321094334054e-05, "loss": 0.2454, "step": 6248000 }, { "epoch": 3.75, "learning_rate": 2.980922532870461e-05, "loss": 0.248, "step": 6248500 }, { "epoch": 3.75, "learning_rate": 2.9807125363144044e-05, "loss": 0.2438, "step": 6249000 }, { "epoch": 3.75, "learning_rate": 2.980502539758348e-05, "loss": 0.248, "step": 6249500 }, { "epoch": 3.75, "learning_rate": 2.9802925432022918e-05, "loss": 0.2457, "step": 6250000 }, { "epoch": 3.75, "learning_rate": 2.980082966639347e-05, "loss": 0.2449, "step": 6250500 }, { "epoch": 3.75, "learning_rate": 2.9798729700832908e-05, "loss": 0.2404, "step": 6251000 }, { "epoch": 3.75, "learning_rate": 2.9796629735272342e-05, "loss": 0.2529, "step": 6251500 }, { "epoch": 3.75, "learning_rate": 2.979452976971178e-05, "loss": 0.2491, "step": 6252000 }, { "epoch": 3.75, "learning_rate": 2.9792434004082332e-05, "loss": 0.2524, "step": 6252500 }, { "epoch": 3.75, "learning_rate": 2.979033403852177e-05, "loss": 0.2462, "step": 6253000 }, { "epoch": 3.75, "learning_rate": 2.9788234072961202e-05, "loss": 0.2524, "step": 6253500 }, { "epoch": 3.75, "learning_rate": 2.978613410740064e-05, "loss": 0.2489, "step": 6254000 }, { "epoch": 3.75, "learning_rate": 2.9784038341771193e-05, "loss": 0.2517, "step": 6254500 }, { "epoch": 3.75, "learning_rate": 2.978193837621063e-05, "loss": 0.2512, "step": 6255000 }, { "epoch": 3.75, "learning_rate": 2.9779842610581183e-05, "loss": 0.254, "step": 6255500 }, { "epoch": 3.75, "learning_rate": 2.977774264502062e-05, "loss": 0.2463, "step": 6256000 }, { "epoch": 3.75, "learning_rate": 2.977564687939118e-05, "loss": 0.2485, "step": 6256500 }, { "epoch": 3.75, "learning_rate": 2.9773546913830617e-05, "loss": 0.2473, "step": 6257000 }, { "epoch": 3.75, "learning_rate": 2.9771446948270044e-05, "loss": 0.248, "step": 6257500 }, { "epoch": 3.75, "learning_rate": 2.976934698270948e-05, "loss": 0.2479, "step": 6258000 }, { "epoch": 3.75, "learning_rate": 2.9767247017148918e-05, "loss": 0.2457, "step": 6258500 }, { "epoch": 3.75, "learning_rate": 2.976514705158835e-05, "loss": 0.2511, "step": 6259000 }, { "epoch": 3.75, "learning_rate": 2.9763047086027788e-05, "loss": 0.2467, "step": 6259500 }, { "epoch": 3.75, "learning_rate": 2.9760947120467225e-05, "loss": 0.2493, "step": 6260000 }, { "epoch": 3.75, "learning_rate": 2.9758847154906658e-05, "loss": 0.2586, "step": 6260500 }, { "epoch": 3.75, "learning_rate": 2.9756747189346095e-05, "loss": 0.2513, "step": 6261000 }, { "epoch": 3.75, "learning_rate": 2.9754647223785532e-05, "loss": 0.2473, "step": 6261500 }, { "epoch": 3.75, "learning_rate": 2.9752547258224965e-05, "loss": 0.2469, "step": 6262000 }, { "epoch": 3.75, "learning_rate": 2.9750451492595522e-05, "loss": 0.2539, "step": 6262500 }, { "epoch": 3.75, "learning_rate": 2.9748351527034956e-05, "loss": 0.2532, "step": 6263000 }, { "epoch": 3.76, "learning_rate": 2.9746251561474393e-05, "loss": 0.2509, "step": 6263500 }, { "epoch": 3.76, "learning_rate": 2.974415159591383e-05, "loss": 0.2505, "step": 6264000 }, { "epoch": 3.76, "learning_rate": 2.9742055830284383e-05, "loss": 0.248, "step": 6264500 }, { "epoch": 3.76, "learning_rate": 2.973995586472382e-05, "loss": 0.248, "step": 6265000 }, { "epoch": 3.76, "learning_rate": 2.9737855899163253e-05, "loss": 0.2512, "step": 6265500 }, { "epoch": 3.76, "learning_rate": 2.973575593360269e-05, "loss": 0.2476, "step": 6266000 }, { "epoch": 3.76, "learning_rate": 2.9733660167973244e-05, "loss": 0.2496, "step": 6266500 }, { "epoch": 3.76, "learning_rate": 2.973156020241268e-05, "loss": 0.2439, "step": 6267000 }, { "epoch": 3.76, "learning_rate": 2.9729464436783234e-05, "loss": 0.2526, "step": 6267500 }, { "epoch": 3.76, "learning_rate": 2.972736447122267e-05, "loss": 0.2457, "step": 6268000 }, { "epoch": 3.76, "learning_rate": 2.9725264505662105e-05, "loss": 0.2472, "step": 6268500 }, { "epoch": 3.76, "learning_rate": 2.972316454010154e-05, "loss": 0.2468, "step": 6269000 }, { "epoch": 3.76, "learning_rate": 2.9721064574540978e-05, "loss": 0.2483, "step": 6269500 }, { "epoch": 3.76, "learning_rate": 2.9718968808911532e-05, "loss": 0.2513, "step": 6270000 }, { "epoch": 3.76, "learning_rate": 2.971686884335097e-05, "loss": 0.2501, "step": 6270500 }, { "epoch": 3.76, "learning_rate": 2.9714768877790402e-05, "loss": 0.247, "step": 6271000 }, { "epoch": 3.76, "learning_rate": 2.971266891222984e-05, "loss": 0.2457, "step": 6271500 }, { "epoch": 3.76, "learning_rate": 2.9710568946669276e-05, "loss": 0.2443, "step": 6272000 }, { "epoch": 3.76, "learning_rate": 2.970846898110871e-05, "loss": 0.2485, "step": 6272500 }, { "epoch": 3.76, "learning_rate": 2.9706369015548146e-05, "loss": 0.2472, "step": 6273000 }, { "epoch": 3.76, "learning_rate": 2.9704269049987583e-05, "loss": 0.2544, "step": 6273500 }, { "epoch": 3.76, "learning_rate": 2.9702173284358137e-05, "loss": 0.2507, "step": 6274000 }, { "epoch": 3.76, "learning_rate": 2.970007331879757e-05, "loss": 0.2533, "step": 6274500 }, { "epoch": 3.76, "learning_rate": 2.9697973353237007e-05, "loss": 0.2469, "step": 6275000 }, { "epoch": 3.76, "learning_rate": 2.9695873387676444e-05, "loss": 0.2494, "step": 6275500 }, { "epoch": 3.76, "learning_rate": 2.9693773422115877e-05, "loss": 0.2534, "step": 6276000 }, { "epoch": 3.76, "learning_rate": 2.969168185641755e-05, "loss": 0.2482, "step": 6276500 }, { "epoch": 3.76, "learning_rate": 2.9689581890856988e-05, "loss": 0.2501, "step": 6277000 }, { "epoch": 3.76, "learning_rate": 2.9687481925296425e-05, "loss": 0.2504, "step": 6277500 }, { "epoch": 3.76, "learning_rate": 2.9685381959735858e-05, "loss": 0.2517, "step": 6278000 }, { "epoch": 3.76, "learning_rate": 2.9683281994175295e-05, "loss": 0.2517, "step": 6278500 }, { "epoch": 3.76, "learning_rate": 2.9681182028614732e-05, "loss": 0.2492, "step": 6279000 }, { "epoch": 3.76, "learning_rate": 2.9679082063054165e-05, "loss": 0.2493, "step": 6279500 }, { "epoch": 3.77, "learning_rate": 2.9676982097493602e-05, "loss": 0.2499, "step": 6280000 }, { "epoch": 3.77, "learning_rate": 2.9674886331864156e-05, "loss": 0.2507, "step": 6280500 }, { "epoch": 3.77, "learning_rate": 2.9672786366303592e-05, "loss": 0.2516, "step": 6281000 }, { "epoch": 3.77, "learning_rate": 2.9670686400743026e-05, "loss": 0.2499, "step": 6281500 }, { "epoch": 3.77, "learning_rate": 2.9668586435182463e-05, "loss": 0.2539, "step": 6282000 }, { "epoch": 3.77, "learning_rate": 2.9666490669553016e-05, "loss": 0.246, "step": 6282500 }, { "epoch": 3.77, "learning_rate": 2.9664390703992453e-05, "loss": 0.2539, "step": 6283000 }, { "epoch": 3.77, "learning_rate": 2.966229073843189e-05, "loss": 0.2457, "step": 6283500 }, { "epoch": 3.77, "learning_rate": 2.9660190772871324e-05, "loss": 0.2567, "step": 6284000 }, { "epoch": 3.77, "learning_rate": 2.965809500724188e-05, "loss": 0.2486, "step": 6284500 }, { "epoch": 3.77, "learning_rate": 2.9655995041681314e-05, "loss": 0.2502, "step": 6285000 }, { "epoch": 3.77, "learning_rate": 2.965389507612075e-05, "loss": 0.2514, "step": 6285500 }, { "epoch": 3.77, "learning_rate": 2.9651795110560188e-05, "loss": 0.2507, "step": 6286000 }, { "epoch": 3.77, "learning_rate": 2.964969934493074e-05, "loss": 0.2477, "step": 6286500 }, { "epoch": 3.77, "learning_rate": 2.9647599379370175e-05, "loss": 0.2524, "step": 6287000 }, { "epoch": 3.77, "learning_rate": 2.964549941380961e-05, "loss": 0.2452, "step": 6287500 }, { "epoch": 3.77, "learning_rate": 2.964339944824905e-05, "loss": 0.2463, "step": 6288000 }, { "epoch": 3.77, "learning_rate": 2.9641303682619602e-05, "loss": 0.2489, "step": 6288500 }, { "epoch": 3.77, "learning_rate": 2.963920371705904e-05, "loss": 0.2466, "step": 6289000 }, { "epoch": 3.77, "learning_rate": 2.9637103751498472e-05, "loss": 0.2525, "step": 6289500 }, { "epoch": 3.77, "learning_rate": 2.963500378593791e-05, "loss": 0.2562, "step": 6290000 }, { "epoch": 3.77, "learning_rate": 2.9632903820377346e-05, "loss": 0.2499, "step": 6290500 }, { "epoch": 3.77, "learning_rate": 2.96308080547479e-05, "loss": 0.245, "step": 6291000 }, { "epoch": 3.77, "learning_rate": 2.9628708089187336e-05, "loss": 0.2516, "step": 6291500 }, { "epoch": 3.77, "learning_rate": 2.962660812362677e-05, "loss": 0.2508, "step": 6292000 }, { "epoch": 3.77, "learning_rate": 2.9624508158066207e-05, "loss": 0.2521, "step": 6292500 }, { "epoch": 3.77, "learning_rate": 2.962241239243676e-05, "loss": 0.2526, "step": 6293000 }, { "epoch": 3.77, "learning_rate": 2.9620312426876197e-05, "loss": 0.2479, "step": 6293500 }, { "epoch": 3.77, "learning_rate": 2.961821246131563e-05, "loss": 0.2523, "step": 6294000 }, { "epoch": 3.77, "learning_rate": 2.9616112495755067e-05, "loss": 0.2564, "step": 6294500 }, { "epoch": 3.77, "learning_rate": 2.9614016730125628e-05, "loss": 0.2552, "step": 6295000 }, { "epoch": 3.77, "learning_rate": 2.9611916764565058e-05, "loss": 0.2479, "step": 6295500 }, { "epoch": 3.77, "learning_rate": 2.9609816799004495e-05, "loss": 0.2446, "step": 6296000 }, { "epoch": 3.78, "learning_rate": 2.9607716833443928e-05, "loss": 0.2546, "step": 6296500 }, { "epoch": 3.78, "learning_rate": 2.960562106781449e-05, "loss": 0.2545, "step": 6297000 }, { "epoch": 3.78, "learning_rate": 2.9603525302185045e-05, "loss": 0.2552, "step": 6297500 }, { "epoch": 3.78, "learning_rate": 2.960142533662448e-05, "loss": 0.2492, "step": 6298000 }, { "epoch": 3.78, "learning_rate": 2.9599325371063916e-05, "loss": 0.2548, "step": 6298500 }, { "epoch": 3.78, "learning_rate": 2.9597225405503353e-05, "loss": 0.249, "step": 6299000 }, { "epoch": 3.78, "learning_rate": 2.9595125439942786e-05, "loss": 0.2485, "step": 6299500 }, { "epoch": 3.78, "learning_rate": 2.9593025474382223e-05, "loss": 0.2446, "step": 6300000 }, { "epoch": 3.78, "eval_loss": 0.2304326593875885, "eval_runtime": 1454.6341, "eval_samples_per_second": 362.098, "eval_steps_per_second": 60.35, "step": 6300000 }, { "epoch": 3.78, "learning_rate": 2.9590925508821653e-05, "loss": 0.2501, "step": 6300500 }, { "epoch": 3.78, "learning_rate": 2.9588825543261086e-05, "loss": 0.25, "step": 6301000 }, { "epoch": 3.78, "learning_rate": 2.9586733977562767e-05, "loss": 0.2492, "step": 6301500 }, { "epoch": 3.78, "learning_rate": 2.9584634012002204e-05, "loss": 0.2502, "step": 6302000 }, { "epoch": 3.78, "learning_rate": 2.9582534046441637e-05, "loss": 0.2512, "step": 6302500 }, { "epoch": 3.78, "learning_rate": 2.9580434080881074e-05, "loss": 0.25, "step": 6303000 }, { "epoch": 3.78, "learning_rate": 2.957833411532051e-05, "loss": 0.249, "step": 6303500 }, { "epoch": 3.78, "learning_rate": 2.9576234149759944e-05, "loss": 0.2433, "step": 6304000 }, { "epoch": 3.78, "learning_rate": 2.957413418419938e-05, "loss": 0.2514, "step": 6304500 }, { "epoch": 3.78, "learning_rate": 2.957203421863881e-05, "loss": 0.2481, "step": 6305000 }, { "epoch": 3.78, "learning_rate": 2.956993845300937e-05, "loss": 0.2508, "step": 6305500 }, { "epoch": 3.78, "learning_rate": 2.9567842687379925e-05, "loss": 0.2484, "step": 6306000 }, { "epoch": 3.78, "learning_rate": 2.9565742721819362e-05, "loss": 0.2494, "step": 6306500 }, { "epoch": 3.78, "learning_rate": 2.9563642756258795e-05, "loss": 0.2486, "step": 6307000 }, { "epoch": 3.78, "learning_rate": 2.9561546990629352e-05, "loss": 0.2445, "step": 6307500 }, { "epoch": 3.78, "learning_rate": 2.9559447025068786e-05, "loss": 0.2486, "step": 6308000 }, { "epoch": 3.78, "learning_rate": 2.9557347059508223e-05, "loss": 0.2449, "step": 6308500 }, { "epoch": 3.78, "learning_rate": 2.955524709394766e-05, "loss": 0.248, "step": 6309000 }, { "epoch": 3.78, "learning_rate": 2.9553147128387093e-05, "loss": 0.251, "step": 6309500 }, { "epoch": 3.78, "learning_rate": 2.955104716282653e-05, "loss": 0.2397, "step": 6310000 }, { "epoch": 3.78, "learning_rate": 2.9548947197265967e-05, "loss": 0.2485, "step": 6310500 }, { "epoch": 3.78, "learning_rate": 2.95468472317054e-05, "loss": 0.2429, "step": 6311000 }, { "epoch": 3.78, "learning_rate": 2.9544751466075957e-05, "loss": 0.2621, "step": 6311500 }, { "epoch": 3.78, "learning_rate": 2.954265150051539e-05, "loss": 0.2526, "step": 6312000 }, { "epoch": 3.78, "learning_rate": 2.9540551534954827e-05, "loss": 0.2524, "step": 6312500 }, { "epoch": 3.78, "learning_rate": 2.9538451569394264e-05, "loss": 0.252, "step": 6313000 }, { "epoch": 3.79, "learning_rate": 2.9536351603833698e-05, "loss": 0.2506, "step": 6313500 }, { "epoch": 3.79, "learning_rate": 2.9534251638273135e-05, "loss": 0.2497, "step": 6314000 }, { "epoch": 3.79, "learning_rate": 2.9532155872643688e-05, "loss": 0.268, "step": 6314500 }, { "epoch": 3.79, "learning_rate": 2.9530055907083125e-05, "loss": 0.2477, "step": 6315000 }, { "epoch": 3.79, "learning_rate": 2.9527955941522562e-05, "loss": 0.251, "step": 6315500 }, { "epoch": 3.79, "learning_rate": 2.9525855975961995e-05, "loss": 0.2484, "step": 6316000 }, { "epoch": 3.79, "learning_rate": 2.9523756010401432e-05, "loss": 0.2518, "step": 6316500 }, { "epoch": 3.79, "learning_rate": 2.9521656044840862e-05, "loss": 0.2517, "step": 6317000 }, { "epoch": 3.79, "learning_rate": 2.9519556079280296e-05, "loss": 0.2475, "step": 6317500 }, { "epoch": 3.79, "learning_rate": 2.9517456113719733e-05, "loss": 0.251, "step": 6318000 }, { "epoch": 3.79, "learning_rate": 2.9515360348090293e-05, "loss": 0.2506, "step": 6318500 }, { "epoch": 3.79, "learning_rate": 2.951326038252973e-05, "loss": 0.2474, "step": 6319000 }, { "epoch": 3.79, "learning_rate": 2.9511164616900283e-05, "loss": 0.2474, "step": 6319500 }, { "epoch": 3.79, "learning_rate": 2.950906465133972e-05, "loss": 0.2551, "step": 6320000 }, { "epoch": 3.79, "learning_rate": 2.9506968885710274e-05, "loss": 0.2508, "step": 6320500 }, { "epoch": 3.79, "learning_rate": 2.950486892014971e-05, "loss": 0.2549, "step": 6321000 }, { "epoch": 3.79, "learning_rate": 2.9502768954589144e-05, "loss": 0.2548, "step": 6321500 }, { "epoch": 3.79, "learning_rate": 2.950066898902858e-05, "loss": 0.2428, "step": 6322000 }, { "epoch": 3.79, "learning_rate": 2.9498569023468018e-05, "loss": 0.2457, "step": 6322500 }, { "epoch": 3.79, "learning_rate": 2.949646905790745e-05, "loss": 0.2519, "step": 6323000 }, { "epoch": 3.79, "learning_rate": 2.9494369092346888e-05, "loss": 0.2517, "step": 6323500 }, { "epoch": 3.79, "learning_rate": 2.9492269126786318e-05, "loss": 0.2468, "step": 6324000 }, { "epoch": 3.79, "learning_rate": 2.949016916122575e-05, "loss": 0.2541, "step": 6324500 }, { "epoch": 3.79, "learning_rate": 2.948806919566519e-05, "loss": 0.2539, "step": 6325000 }, { "epoch": 3.79, "learning_rate": 2.9485969230104625e-05, "loss": 0.2476, "step": 6325500 }, { "epoch": 3.79, "learning_rate": 2.948386926454406e-05, "loss": 0.244, "step": 6326000 }, { "epoch": 3.79, "learning_rate": 2.9481773498914616e-05, "loss": 0.2473, "step": 6326500 }, { "epoch": 3.79, "learning_rate": 2.947967353335405e-05, "loss": 0.25, "step": 6327000 }, { "epoch": 3.79, "learning_rate": 2.9477573567793486e-05, "loss": 0.2459, "step": 6327500 }, { "epoch": 3.79, "learning_rate": 2.9475473602232923e-05, "loss": 0.2445, "step": 6328000 }, { "epoch": 3.79, "learning_rate": 2.9473377836603483e-05, "loss": 0.2493, "step": 6328500 }, { "epoch": 3.79, "learning_rate": 2.9471277871042913e-05, "loss": 0.249, "step": 6329000 }, { "epoch": 3.79, "learning_rate": 2.9469177905482347e-05, "loss": 0.246, "step": 6329500 }, { "epoch": 3.8, "learning_rate": 2.9467077939921784e-05, "loss": 0.2486, "step": 6330000 }, { "epoch": 3.8, "learning_rate": 2.9464982174292344e-05, "loss": 0.2496, "step": 6330500 }, { "epoch": 3.8, "learning_rate": 2.946288220873178e-05, "loss": 0.2477, "step": 6331000 }, { "epoch": 3.8, "learning_rate": 2.9460782243171208e-05, "loss": 0.2474, "step": 6331500 }, { "epoch": 3.8, "learning_rate": 2.9458682277610644e-05, "loss": 0.2454, "step": 6332000 }, { "epoch": 3.8, "learning_rate": 2.9456586511981205e-05, "loss": 0.2554, "step": 6332500 }, { "epoch": 3.8, "learning_rate": 2.945448654642064e-05, "loss": 0.2501, "step": 6333000 }, { "epoch": 3.8, "learning_rate": 2.945238658086007e-05, "loss": 0.2474, "step": 6333500 }, { "epoch": 3.8, "learning_rate": 2.9450286615299505e-05, "loss": 0.2525, "step": 6334000 }, { "epoch": 3.8, "learning_rate": 2.9448190849670065e-05, "loss": 0.2454, "step": 6334500 }, { "epoch": 3.8, "learning_rate": 2.9446090884109502e-05, "loss": 0.249, "step": 6335000 }, { "epoch": 3.8, "learning_rate": 2.944399091854894e-05, "loss": 0.2476, "step": 6335500 }, { "epoch": 3.8, "learning_rate": 2.9441895152919493e-05, "loss": 0.2514, "step": 6336000 }, { "epoch": 3.8, "learning_rate": 2.9439799387290046e-05, "loss": 0.245, "step": 6336500 }, { "epoch": 3.8, "learning_rate": 2.9437699421729483e-05, "loss": 0.2518, "step": 6337000 }, { "epoch": 3.8, "learning_rate": 2.9435599456168917e-05, "loss": 0.2505, "step": 6337500 }, { "epoch": 3.8, "learning_rate": 2.9433499490608353e-05, "loss": 0.2473, "step": 6338000 }, { "epoch": 3.8, "learning_rate": 2.943139952504779e-05, "loss": 0.2496, "step": 6338500 }, { "epoch": 3.8, "learning_rate": 2.9429299559487224e-05, "loss": 0.2423, "step": 6339000 }, { "epoch": 3.8, "learning_rate": 2.942719959392666e-05, "loss": 0.2528, "step": 6339500 }, { "epoch": 3.8, "learning_rate": 2.9425099628366097e-05, "loss": 0.2424, "step": 6340000 }, { "epoch": 3.8, "learning_rate": 2.942299966280553e-05, "loss": 0.2501, "step": 6340500 }, { "epoch": 3.8, "learning_rate": 2.942089969724496e-05, "loss": 0.2442, "step": 6341000 }, { "epoch": 3.8, "learning_rate": 2.9418799731684398e-05, "loss": 0.2458, "step": 6341500 }, { "epoch": 3.8, "learning_rate": 2.9416699766123835e-05, "loss": 0.247, "step": 6342000 }, { "epoch": 3.8, "learning_rate": 2.9414604000494395e-05, "loss": 0.2523, "step": 6342500 }, { "epoch": 3.8, "learning_rate": 2.9412504034933825e-05, "loss": 0.2479, "step": 6343000 }, { "epoch": 3.8, "learning_rate": 2.941040406937326e-05, "loss": 0.2529, "step": 6343500 }, { "epoch": 3.8, "learning_rate": 2.9408304103812695e-05, "loss": 0.2553, "step": 6344000 }, { "epoch": 3.8, "learning_rate": 2.9406208338183256e-05, "loss": 0.2545, "step": 6344500 }, { "epoch": 3.8, "learning_rate": 2.9404108372622693e-05, "loss": 0.2494, "step": 6345000 }, { "epoch": 3.8, "learning_rate": 2.940200840706212e-05, "loss": 0.2509, "step": 6345500 }, { "epoch": 3.8, "learning_rate": 2.9399908441501556e-05, "loss": 0.2497, "step": 6346000 }, { "epoch": 3.8, "learning_rate": 2.9397812675872116e-05, "loss": 0.2522, "step": 6346500 }, { "epoch": 3.81, "learning_rate": 2.9395712710311553e-05, "loss": 0.2475, "step": 6347000 }, { "epoch": 3.81, "learning_rate": 2.9393616944682107e-05, "loss": 0.2432, "step": 6347500 }, { "epoch": 3.81, "learning_rate": 2.9391516979121544e-05, "loss": 0.2511, "step": 6348000 }, { "epoch": 3.81, "learning_rate": 2.9389417013560977e-05, "loss": 0.2444, "step": 6348500 }, { "epoch": 3.81, "learning_rate": 2.9387317048000414e-05, "loss": 0.2438, "step": 6349000 }, { "epoch": 3.81, "learning_rate": 2.938521708243985e-05, "loss": 0.2457, "step": 6349500 }, { "epoch": 3.81, "learning_rate": 2.9383117116879284e-05, "loss": 0.2504, "step": 6350000 }, { "epoch": 3.81, "learning_rate": 2.9381017151318714e-05, "loss": 0.2454, "step": 6350500 }, { "epoch": 3.81, "learning_rate": 2.937891718575815e-05, "loss": 0.2505, "step": 6351000 }, { "epoch": 3.81, "learning_rate": 2.937682142012871e-05, "loss": 0.2547, "step": 6351500 }, { "epoch": 3.81, "learning_rate": 2.937472145456815e-05, "loss": 0.2513, "step": 6352000 }, { "epoch": 3.81, "learning_rate": 2.9372621489007575e-05, "loss": 0.2443, "step": 6352500 }, { "epoch": 3.81, "learning_rate": 2.9370521523447012e-05, "loss": 0.2434, "step": 6353000 }, { "epoch": 3.81, "learning_rate": 2.9368429957748692e-05, "loss": 0.2586, "step": 6353500 }, { "epoch": 3.81, "learning_rate": 2.9366329992188126e-05, "loss": 0.2458, "step": 6354000 }, { "epoch": 3.81, "learning_rate": 2.9364230026627563e-05, "loss": 0.252, "step": 6354500 }, { "epoch": 3.81, "learning_rate": 2.9362130061067e-05, "loss": 0.2433, "step": 6355000 }, { "epoch": 3.81, "learning_rate": 2.9360034295437553e-05, "loss": 0.25, "step": 6355500 }, { "epoch": 3.81, "learning_rate": 2.935793432987699e-05, "loss": 0.2433, "step": 6356000 }, { "epoch": 3.81, "learning_rate": 2.9355834364316423e-05, "loss": 0.2552, "step": 6356500 }, { "epoch": 3.81, "learning_rate": 2.935373439875586e-05, "loss": 0.2541, "step": 6357000 }, { "epoch": 3.81, "learning_rate": 2.9351634433195297e-05, "loss": 0.2459, "step": 6357500 }, { "epoch": 3.81, "learning_rate": 2.934953446763473e-05, "loss": 0.2465, "step": 6358000 }, { "epoch": 3.81, "learning_rate": 2.9347434502074167e-05, "loss": 0.245, "step": 6358500 }, { "epoch": 3.81, "learning_rate": 2.9345334536513604e-05, "loss": 0.2508, "step": 6359000 }, { "epoch": 3.81, "learning_rate": 2.9343238770884158e-05, "loss": 0.2482, "step": 6359500 }, { "epoch": 3.81, "learning_rate": 2.934114300525471e-05, "loss": 0.2436, "step": 6360000 }, { "epoch": 3.81, "learning_rate": 2.933904303969415e-05, "loss": 0.2465, "step": 6360500 }, { "epoch": 3.81, "learning_rate": 2.9336943074133582e-05, "loss": 0.25, "step": 6361000 }, { "epoch": 3.81, "learning_rate": 2.933484310857302e-05, "loss": 0.25, "step": 6361500 }, { "epoch": 3.81, "learning_rate": 2.9332743143012455e-05, "loss": 0.2499, "step": 6362000 }, { "epoch": 3.81, "learning_rate": 2.933064317745189e-05, "loss": 0.2518, "step": 6362500 }, { "epoch": 3.81, "learning_rate": 2.9328543211891326e-05, "loss": 0.2503, "step": 6363000 }, { "epoch": 3.82, "learning_rate": 2.9326443246330763e-05, "loss": 0.2489, "step": 6363500 }, { "epoch": 3.82, "learning_rate": 2.9324347480701316e-05, "loss": 0.251, "step": 6364000 }, { "epoch": 3.82, "learning_rate": 2.932225171507187e-05, "loss": 0.254, "step": 6364500 }, { "epoch": 3.82, "learning_rate": 2.9320151749511307e-05, "loss": 0.2443, "step": 6365000 }, { "epoch": 3.82, "learning_rate": 2.931805178395074e-05, "loss": 0.2559, "step": 6365500 }, { "epoch": 3.82, "learning_rate": 2.9315951818390177e-05, "loss": 0.2499, "step": 6366000 }, { "epoch": 3.82, "learning_rate": 2.9313851852829614e-05, "loss": 0.2403, "step": 6366500 }, { "epoch": 3.82, "learning_rate": 2.931175188726905e-05, "loss": 0.2416, "step": 6367000 }, { "epoch": 3.82, "learning_rate": 2.9309651921708484e-05, "loss": 0.2459, "step": 6367500 }, { "epoch": 3.82, "learning_rate": 2.930755195614792e-05, "loss": 0.2437, "step": 6368000 }, { "epoch": 3.82, "learning_rate": 2.9305451990587358e-05, "loss": 0.2506, "step": 6368500 }, { "epoch": 3.82, "learning_rate": 2.930335622495791e-05, "loss": 0.2412, "step": 6369000 }, { "epoch": 3.82, "learning_rate": 2.9301256259397345e-05, "loss": 0.2481, "step": 6369500 }, { "epoch": 3.82, "learning_rate": 2.929915629383678e-05, "loss": 0.2446, "step": 6370000 }, { "epoch": 3.82, "learning_rate": 2.929705632827622e-05, "loss": 0.2485, "step": 6370500 }, { "epoch": 3.82, "learning_rate": 2.9294960562646772e-05, "loss": 0.2467, "step": 6371000 }, { "epoch": 3.82, "learning_rate": 2.929286059708621e-05, "loss": 0.247, "step": 6371500 }, { "epoch": 3.82, "learning_rate": 2.9290760631525642e-05, "loss": 0.2545, "step": 6372000 }, { "epoch": 3.82, "learning_rate": 2.928866066596508e-05, "loss": 0.2465, "step": 6372500 }, { "epoch": 3.82, "learning_rate": 2.9286564900335633e-05, "loss": 0.251, "step": 6373000 }, { "epoch": 3.82, "learning_rate": 2.928446493477507e-05, "loss": 0.2507, "step": 6373500 }, { "epoch": 3.82, "learning_rate": 2.9282364969214507e-05, "loss": 0.2471, "step": 6374000 }, { "epoch": 3.82, "learning_rate": 2.928026920358506e-05, "loss": 0.2528, "step": 6374500 }, { "epoch": 3.82, "learning_rate": 2.9278169238024494e-05, "loss": 0.2465, "step": 6375000 }, { "epoch": 3.82, "learning_rate": 2.927606927246393e-05, "loss": 0.2502, "step": 6375500 }, { "epoch": 3.82, "learning_rate": 2.9273969306903367e-05, "loss": 0.2489, "step": 6376000 }, { "epoch": 3.82, "learning_rate": 2.92718693413428e-05, "loss": 0.2482, "step": 6376500 }, { "epoch": 3.82, "learning_rate": 2.9269769375782238e-05, "loss": 0.2456, "step": 6377000 }, { "epoch": 3.82, "learning_rate": 2.9267669410221674e-05, "loss": 0.249, "step": 6377500 }, { "epoch": 3.82, "learning_rate": 2.9265569444661108e-05, "loss": 0.2446, "step": 6378000 }, { "epoch": 3.82, "learning_rate": 2.9263473679031665e-05, "loss": 0.2513, "step": 6378500 }, { "epoch": 3.82, "learning_rate": 2.9261373713471098e-05, "loss": 0.2433, "step": 6379000 }, { "epoch": 3.82, "learning_rate": 2.9259273747910535e-05, "loss": 0.2473, "step": 6379500 }, { "epoch": 3.83, "learning_rate": 2.9257173782349972e-05, "loss": 0.2496, "step": 6380000 }, { "epoch": 3.83, "learning_rate": 2.9255082216651642e-05, "loss": 0.2493, "step": 6380500 }, { "epoch": 3.83, "learning_rate": 2.925298225109108e-05, "loss": 0.252, "step": 6381000 }, { "epoch": 3.83, "learning_rate": 2.9250882285530516e-05, "loss": 0.2475, "step": 6381500 }, { "epoch": 3.83, "learning_rate": 2.924878231996995e-05, "loss": 0.249, "step": 6382000 }, { "epoch": 3.83, "learning_rate": 2.9246682354409386e-05, "loss": 0.2504, "step": 6382500 }, { "epoch": 3.83, "learning_rate": 2.9244582388848823e-05, "loss": 0.2489, "step": 6383000 }, { "epoch": 3.83, "learning_rate": 2.9242482423288257e-05, "loss": 0.2454, "step": 6383500 }, { "epoch": 3.83, "learning_rate": 2.9240382457727693e-05, "loss": 0.2542, "step": 6384000 }, { "epoch": 3.83, "learning_rate": 2.9238286692098247e-05, "loss": 0.2478, "step": 6384500 }, { "epoch": 3.83, "learning_rate": 2.9236186726537684e-05, "loss": 0.2466, "step": 6385000 }, { "epoch": 3.83, "learning_rate": 2.923408676097712e-05, "loss": 0.2524, "step": 6385500 }, { "epoch": 3.83, "learning_rate": 2.9231986795416554e-05, "loss": 0.2468, "step": 6386000 }, { "epoch": 3.83, "learning_rate": 2.922989102978711e-05, "loss": 0.2499, "step": 6386500 }, { "epoch": 3.83, "learning_rate": 2.9227791064226545e-05, "loss": 0.2448, "step": 6387000 }, { "epoch": 3.83, "learning_rate": 2.922569109866598e-05, "loss": 0.2461, "step": 6387500 }, { "epoch": 3.83, "learning_rate": 2.9223591133105418e-05, "loss": 0.2442, "step": 6388000 }, { "epoch": 3.83, "learning_rate": 2.9221495367475972e-05, "loss": 0.2512, "step": 6388500 }, { "epoch": 3.83, "learning_rate": 2.9219395401915405e-05, "loss": 0.2474, "step": 6389000 }, { "epoch": 3.83, "learning_rate": 2.9217295436354842e-05, "loss": 0.2432, "step": 6389500 }, { "epoch": 3.83, "learning_rate": 2.921519547079428e-05, "loss": 0.2531, "step": 6390000 }, { "epoch": 3.83, "learning_rate": 2.9213099705164833e-05, "loss": 0.2506, "step": 6390500 }, { "epoch": 3.83, "learning_rate": 2.921099973960427e-05, "loss": 0.2519, "step": 6391000 }, { "epoch": 3.83, "learning_rate": 2.9208899774043703e-05, "loss": 0.2508, "step": 6391500 }, { "epoch": 3.83, "learning_rate": 2.920679980848314e-05, "loss": 0.2512, "step": 6392000 }, { "epoch": 3.83, "learning_rate": 2.9204704042853693e-05, "loss": 0.2431, "step": 6392500 }, { "epoch": 3.83, "learning_rate": 2.920260407729313e-05, "loss": 0.2476, "step": 6393000 }, { "epoch": 3.83, "learning_rate": 2.9200504111732567e-05, "loss": 0.2457, "step": 6393500 }, { "epoch": 3.83, "learning_rate": 2.9198404146172e-05, "loss": 0.2461, "step": 6394000 }, { "epoch": 3.83, "learning_rate": 2.919630838054256e-05, "loss": 0.2471, "step": 6394500 }, { "epoch": 3.83, "learning_rate": 2.9194212614913114e-05, "loss": 0.2507, "step": 6395000 }, { "epoch": 3.83, "learning_rate": 2.919211264935255e-05, "loss": 0.249, "step": 6395500 }, { "epoch": 3.83, "learning_rate": 2.9190012683791988e-05, "loss": 0.2464, "step": 6396000 }, { "epoch": 3.83, "learning_rate": 2.918791271823142e-05, "loss": 0.252, "step": 6396500 }, { "epoch": 3.84, "learning_rate": 2.918581275267086e-05, "loss": 0.2549, "step": 6397000 }, { "epoch": 3.84, "learning_rate": 2.918371278711029e-05, "loss": 0.2497, "step": 6397500 }, { "epoch": 3.84, "learning_rate": 2.9181612821549725e-05, "loss": 0.2493, "step": 6398000 }, { "epoch": 3.84, "learning_rate": 2.917951285598916e-05, "loss": 0.2433, "step": 6398500 }, { "epoch": 3.84, "learning_rate": 2.917742129029084e-05, "loss": 0.2465, "step": 6399000 }, { "epoch": 3.84, "learning_rate": 2.9175321324730276e-05, "loss": 0.2505, "step": 6399500 }, { "epoch": 3.84, "learning_rate": 2.917322135916971e-05, "loss": 0.2445, "step": 6400000 }, { "epoch": 3.84, "eval_loss": 0.2303137332201004, "eval_runtime": 1454.0686, "eval_samples_per_second": 362.239, "eval_steps_per_second": 60.373, "step": 6400000 }, { "epoch": 3.84, "learning_rate": 2.9171121393609146e-05, "loss": 0.2505, "step": 6400500 }, { "epoch": 3.84, "learning_rate": 2.9169021428048583e-05, "loss": 0.2457, "step": 6401000 }, { "epoch": 3.84, "learning_rate": 2.9166921462488017e-05, "loss": 0.2458, "step": 6401500 }, { "epoch": 3.84, "learning_rate": 2.9164821496927447e-05, "loss": 0.2496, "step": 6402000 }, { "epoch": 3.84, "learning_rate": 2.9162721531366884e-05, "loss": 0.2471, "step": 6402500 }, { "epoch": 3.84, "learning_rate": 2.9160625765737444e-05, "loss": 0.2464, "step": 6403000 }, { "epoch": 3.84, "learning_rate": 2.9158525800176877e-05, "loss": 0.2498, "step": 6403500 }, { "epoch": 3.84, "learning_rate": 2.9156425834616314e-05, "loss": 0.2573, "step": 6404000 }, { "epoch": 3.84, "learning_rate": 2.9154325869055744e-05, "loss": 0.2488, "step": 6404500 }, { "epoch": 3.84, "learning_rate": 2.9152230103426305e-05, "loss": 0.2511, "step": 6405000 }, { "epoch": 3.84, "learning_rate": 2.915013013786574e-05, "loss": 0.2456, "step": 6405500 }, { "epoch": 3.84, "learning_rate": 2.9148030172305175e-05, "loss": 0.2468, "step": 6406000 }, { "epoch": 3.84, "learning_rate": 2.9145930206744612e-05, "loss": 0.2489, "step": 6406500 }, { "epoch": 3.84, "learning_rate": 2.9143834441115165e-05, "loss": 0.2522, "step": 6407000 }, { "epoch": 3.84, "learning_rate": 2.914173867548572e-05, "loss": 0.2454, "step": 6407500 }, { "epoch": 3.84, "learning_rate": 2.9139638709925156e-05, "loss": 0.2497, "step": 6408000 }, { "epoch": 3.84, "learning_rate": 2.9137538744364593e-05, "loss": 0.2475, "step": 6408500 }, { "epoch": 3.84, "learning_rate": 2.9135438778804026e-05, "loss": 0.2526, "step": 6409000 }, { "epoch": 3.84, "learning_rate": 2.9133338813243463e-05, "loss": 0.2529, "step": 6409500 }, { "epoch": 3.84, "learning_rate": 2.91312388476829e-05, "loss": 0.2436, "step": 6410000 }, { "epoch": 3.84, "learning_rate": 2.9129143082053453e-05, "loss": 0.2474, "step": 6410500 }, { "epoch": 3.84, "learning_rate": 2.912704311649289e-05, "loss": 0.2531, "step": 6411000 }, { "epoch": 3.84, "learning_rate": 2.9124943150932324e-05, "loss": 0.2479, "step": 6411500 }, { "epoch": 3.84, "learning_rate": 2.912284318537176e-05, "loss": 0.2462, "step": 6412000 }, { "epoch": 3.84, "learning_rate": 2.9120743219811197e-05, "loss": 0.2487, "step": 6412500 }, { "epoch": 3.84, "learning_rate": 2.911864325425063e-05, "loss": 0.2482, "step": 6413000 }, { "epoch": 3.85, "learning_rate": 2.9116543288690068e-05, "loss": 0.2447, "step": 6413500 }, { "epoch": 3.85, "learning_rate": 2.911444752306062e-05, "loss": 0.2491, "step": 6414000 }, { "epoch": 3.85, "learning_rate": 2.9112347557500058e-05, "loss": 0.2506, "step": 6414500 }, { "epoch": 3.85, "learning_rate": 2.9110247591939495e-05, "loss": 0.2492, "step": 6415000 }, { "epoch": 3.85, "learning_rate": 2.910814762637893e-05, "loss": 0.2519, "step": 6415500 }, { "epoch": 3.85, "learning_rate": 2.9106047660818365e-05, "loss": 0.2489, "step": 6416000 }, { "epoch": 3.85, "learning_rate": 2.9103947695257795e-05, "loss": 0.2485, "step": 6416500 }, { "epoch": 3.85, "learning_rate": 2.910184772969723e-05, "loss": 0.2468, "step": 6417000 }, { "epoch": 3.85, "learning_rate": 2.9099747764136666e-05, "loss": 0.2477, "step": 6417500 }, { "epoch": 3.85, "learning_rate": 2.9097651998507226e-05, "loss": 0.2457, "step": 6418000 }, { "epoch": 3.85, "learning_rate": 2.9095552032946656e-05, "loss": 0.2515, "step": 6418500 }, { "epoch": 3.85, "learning_rate": 2.9093452067386093e-05, "loss": 0.2454, "step": 6419000 }, { "epoch": 3.85, "learning_rate": 2.9091352101825526e-05, "loss": 0.2468, "step": 6419500 }, { "epoch": 3.85, "learning_rate": 2.9089252136264963e-05, "loss": 0.2486, "step": 6420000 }, { "epoch": 3.85, "learning_rate": 2.9087156370635524e-05, "loss": 0.2475, "step": 6420500 }, { "epoch": 3.85, "learning_rate": 2.9085056405074954e-05, "loss": 0.2447, "step": 6421000 }, { "epoch": 3.85, "learning_rate": 2.9082960639445514e-05, "loss": 0.2493, "step": 6421500 }, { "epoch": 3.85, "learning_rate": 2.908086067388495e-05, "loss": 0.2473, "step": 6422000 }, { "epoch": 3.85, "learning_rate": 2.9078760708324384e-05, "loss": 0.2463, "step": 6422500 }, { "epoch": 3.85, "learning_rate": 2.907666074276382e-05, "loss": 0.2476, "step": 6423000 }, { "epoch": 3.85, "learning_rate": 2.907456077720325e-05, "loss": 0.2501, "step": 6423500 }, { "epoch": 3.85, "learning_rate": 2.9072460811642685e-05, "loss": 0.2473, "step": 6424000 }, { "epoch": 3.85, "learning_rate": 2.907036084608212e-05, "loss": 0.2478, "step": 6424500 }, { "epoch": 3.85, "learning_rate": 2.906826088052156e-05, "loss": 0.2503, "step": 6425000 }, { "epoch": 3.85, "learning_rate": 2.906616511489212e-05, "loss": 0.2483, "step": 6425500 }, { "epoch": 3.85, "learning_rate": 2.906406514933155e-05, "loss": 0.253, "step": 6426000 }, { "epoch": 3.85, "learning_rate": 2.9061965183770982e-05, "loss": 0.2458, "step": 6426500 }, { "epoch": 3.85, "learning_rate": 2.905986521821042e-05, "loss": 0.2453, "step": 6427000 }, { "epoch": 3.85, "learning_rate": 2.905776945258098e-05, "loss": 0.2474, "step": 6427500 }, { "epoch": 3.85, "learning_rate": 2.9055669487020416e-05, "loss": 0.2443, "step": 6428000 }, { "epoch": 3.85, "learning_rate": 2.9053569521459846e-05, "loss": 0.2461, "step": 6428500 }, { "epoch": 3.85, "learning_rate": 2.905146955589928e-05, "loss": 0.2487, "step": 6429000 }, { "epoch": 3.85, "learning_rate": 2.904937379026984e-05, "loss": 0.249, "step": 6429500 }, { "epoch": 3.86, "learning_rate": 2.9047273824709277e-05, "loss": 0.25, "step": 6430000 }, { "epoch": 3.86, "learning_rate": 2.9045173859148707e-05, "loss": 0.2494, "step": 6430500 }, { "epoch": 3.86, "learning_rate": 2.9043073893588144e-05, "loss": 0.2471, "step": 6431000 }, { "epoch": 3.86, "learning_rate": 2.904098232788982e-05, "loss": 0.2492, "step": 6431500 }, { "epoch": 3.86, "learning_rate": 2.9038882362329258e-05, "loss": 0.2478, "step": 6432000 }, { "epoch": 3.86, "learning_rate": 2.903678239676869e-05, "loss": 0.2425, "step": 6432500 }, { "epoch": 3.86, "learning_rate": 2.9034682431208128e-05, "loss": 0.2463, "step": 6433000 }, { "epoch": 3.86, "learning_rate": 2.9032582465647565e-05, "loss": 0.2503, "step": 6433500 }, { "epoch": 3.86, "learning_rate": 2.9030482500087e-05, "loss": 0.2535, "step": 6434000 }, { "epoch": 3.86, "learning_rate": 2.9028386734457555e-05, "loss": 0.2444, "step": 6434500 }, { "epoch": 3.86, "learning_rate": 2.902628676889699e-05, "loss": 0.2502, "step": 6435000 }, { "epoch": 3.86, "learning_rate": 2.9024186803336426e-05, "loss": 0.2501, "step": 6435500 }, { "epoch": 3.86, "learning_rate": 2.9022086837775863e-05, "loss": 0.2514, "step": 6436000 }, { "epoch": 3.86, "learning_rate": 2.9019986872215296e-05, "loss": 0.25, "step": 6436500 }, { "epoch": 3.86, "learning_rate": 2.9017886906654733e-05, "loss": 0.2466, "step": 6437000 }, { "epoch": 3.86, "learning_rate": 2.901578694109417e-05, "loss": 0.254, "step": 6437500 }, { "epoch": 3.86, "learning_rate": 2.90136869755336e-05, "loss": 0.2485, "step": 6438000 }, { "epoch": 3.86, "learning_rate": 2.901159120990416e-05, "loss": 0.2489, "step": 6438500 }, { "epoch": 3.86, "learning_rate": 2.9009491244343594e-05, "loss": 0.2463, "step": 6439000 }, { "epoch": 3.86, "learning_rate": 2.900739127878303e-05, "loss": 0.2432, "step": 6439500 }, { "epoch": 3.86, "learning_rate": 2.900529131322246e-05, "loss": 0.2458, "step": 6440000 }, { "epoch": 3.86, "learning_rate": 2.900319554759302e-05, "loss": 0.2522, "step": 6440500 }, { "epoch": 3.86, "learning_rate": 2.9001095582032454e-05, "loss": 0.2469, "step": 6441000 }, { "epoch": 3.86, "learning_rate": 2.899899561647189e-05, "loss": 0.2514, "step": 6441500 }, { "epoch": 3.86, "learning_rate": 2.8996895650911328e-05, "loss": 0.2476, "step": 6442000 }, { "epoch": 3.86, "learning_rate": 2.899479988528188e-05, "loss": 0.2605, "step": 6442500 }, { "epoch": 3.86, "learning_rate": 2.899269991972132e-05, "loss": 0.2525, "step": 6443000 }, { "epoch": 3.86, "learning_rate": 2.8990599954160752e-05, "loss": 0.2576, "step": 6443500 }, { "epoch": 3.86, "learning_rate": 2.898849998860019e-05, "loss": 0.2509, "step": 6444000 }, { "epoch": 3.86, "learning_rate": 2.8986400023039626e-05, "loss": 0.2456, "step": 6444500 }, { "epoch": 3.86, "learning_rate": 2.8984300057479056e-05, "loss": 0.2502, "step": 6445000 }, { "epoch": 3.86, "learning_rate": 2.898220009191849e-05, "loss": 0.2493, "step": 6445500 }, { "epoch": 3.86, "learning_rate": 2.8980100126357926e-05, "loss": 0.2489, "step": 6446000 }, { "epoch": 3.86, "learning_rate": 2.8978004360728486e-05, "loss": 0.2425, "step": 6446500 }, { "epoch": 3.87, "learning_rate": 2.8975904395167923e-05, "loss": 0.2524, "step": 6447000 }, { "epoch": 3.87, "learning_rate": 2.897380442960735e-05, "loss": 0.2549, "step": 6447500 }, { "epoch": 3.87, "learning_rate": 2.897170866397791e-05, "loss": 0.2441, "step": 6448000 }, { "epoch": 3.87, "learning_rate": 2.8969608698417347e-05, "loss": 0.2469, "step": 6448500 }, { "epoch": 3.87, "learning_rate": 2.8967508732856784e-05, "loss": 0.2456, "step": 6449000 }, { "epoch": 3.87, "learning_rate": 2.8965408767296214e-05, "loss": 0.2461, "step": 6449500 }, { "epoch": 3.87, "learning_rate": 2.8963308801735647e-05, "loss": 0.2458, "step": 6450000 }, { "epoch": 3.87, "learning_rate": 2.8961208836175084e-05, "loss": 0.251, "step": 6450500 }, { "epoch": 3.87, "learning_rate": 2.895910887061452e-05, "loss": 0.2426, "step": 6451000 }, { "epoch": 3.87, "learning_rate": 2.8957008905053955e-05, "loss": 0.2463, "step": 6451500 }, { "epoch": 3.87, "learning_rate": 2.895490893949339e-05, "loss": 0.2546, "step": 6452000 }, { "epoch": 3.87, "learning_rate": 2.8952813173863945e-05, "loss": 0.2523, "step": 6452500 }, { "epoch": 3.87, "learning_rate": 2.8950717408234505e-05, "loss": 0.2491, "step": 6453000 }, { "epoch": 3.87, "learning_rate": 2.8948617442673942e-05, "loss": 0.2454, "step": 6453500 }, { "epoch": 3.87, "learning_rate": 2.894651747711338e-05, "loss": 0.2507, "step": 6454000 }, { "epoch": 3.87, "learning_rate": 2.8944417511552806e-05, "loss": 0.2464, "step": 6454500 }, { "epoch": 3.87, "learning_rate": 2.8942317545992243e-05, "loss": 0.2494, "step": 6455000 }, { "epoch": 3.87, "learning_rate": 2.894021758043168e-05, "loss": 0.251, "step": 6455500 }, { "epoch": 3.87, "learning_rate": 2.893812181480224e-05, "loss": 0.2536, "step": 6456000 }, { "epoch": 3.87, "learning_rate": 2.8936021849241673e-05, "loss": 0.25, "step": 6456500 }, { "epoch": 3.87, "learning_rate": 2.8933921883681103e-05, "loss": 0.2514, "step": 6457000 }, { "epoch": 3.87, "learning_rate": 2.893182191812054e-05, "loss": 0.2452, "step": 6457500 }, { "epoch": 3.87, "learning_rate": 2.8929721952559977e-05, "loss": 0.2458, "step": 6458000 }, { "epoch": 3.87, "learning_rate": 2.892762198699941e-05, "loss": 0.2423, "step": 6458500 }, { "epoch": 3.87, "learning_rate": 2.8925522021438847e-05, "loss": 0.2466, "step": 6459000 }, { "epoch": 3.87, "learning_rate": 2.8923422055878284e-05, "loss": 0.2494, "step": 6459500 }, { "epoch": 3.87, "learning_rate": 2.8921326290248838e-05, "loss": 0.2446, "step": 6460000 }, { "epoch": 3.87, "learning_rate": 2.8919226324688275e-05, "loss": 0.2552, "step": 6460500 }, { "epoch": 3.87, "learning_rate": 2.8917126359127708e-05, "loss": 0.2445, "step": 6461000 }, { "epoch": 3.87, "learning_rate": 2.8915026393567145e-05, "loss": 0.2423, "step": 6461500 }, { "epoch": 3.87, "learning_rate": 2.89129306279377e-05, "loss": 0.2399, "step": 6462000 }, { "epoch": 3.87, "learning_rate": 2.891083486230826e-05, "loss": 0.2532, "step": 6462500 }, { "epoch": 3.87, "learning_rate": 2.8908734896747696e-05, "loss": 0.2501, "step": 6463000 }, { "epoch": 3.88, "learning_rate": 2.8906634931187133e-05, "loss": 0.2483, "step": 6463500 }, { "epoch": 3.88, "learning_rate": 2.890453496562656e-05, "loss": 0.2496, "step": 6464000 }, { "epoch": 3.88, "learning_rate": 2.8902435000065996e-05, "loss": 0.2458, "step": 6464500 }, { "epoch": 3.88, "learning_rate": 2.8900335034505433e-05, "loss": 0.2473, "step": 6465000 }, { "epoch": 3.88, "learning_rate": 2.8898235068944866e-05, "loss": 0.2485, "step": 6465500 }, { "epoch": 3.88, "learning_rate": 2.8896135103384303e-05, "loss": 0.2523, "step": 6466000 }, { "epoch": 3.88, "learning_rate": 2.8894043537685984e-05, "loss": 0.2502, "step": 6466500 }, { "epoch": 3.88, "learning_rate": 2.8891943572125417e-05, "loss": 0.2493, "step": 6467000 }, { "epoch": 3.88, "learning_rate": 2.8889843606564854e-05, "loss": 0.2443, "step": 6467500 }, { "epoch": 3.88, "learning_rate": 2.888774364100429e-05, "loss": 0.249, "step": 6468000 }, { "epoch": 3.88, "learning_rate": 2.8885643675443724e-05, "loss": 0.2454, "step": 6468500 }, { "epoch": 3.88, "learning_rate": 2.8883543709883154e-05, "loss": 0.2478, "step": 6469000 }, { "epoch": 3.88, "learning_rate": 2.888144374432259e-05, "loss": 0.2405, "step": 6469500 }, { "epoch": 3.88, "learning_rate": 2.8879343778762028e-05, "loss": 0.2482, "step": 6470000 }, { "epoch": 3.88, "learning_rate": 2.887724801313259e-05, "loss": 0.2479, "step": 6470500 }, { "epoch": 3.88, "learning_rate": 2.8875148047572015e-05, "loss": 0.2464, "step": 6471000 }, { "epoch": 3.88, "learning_rate": 2.8873048082011452e-05, "loss": 0.2487, "step": 6471500 }, { "epoch": 3.88, "learning_rate": 2.8870952316382012e-05, "loss": 0.2489, "step": 6472000 }, { "epoch": 3.88, "learning_rate": 2.886885235082145e-05, "loss": 0.247, "step": 6472500 }, { "epoch": 3.88, "learning_rate": 2.8866752385260883e-05, "loss": 0.2541, "step": 6473000 }, { "epoch": 3.88, "learning_rate": 2.8864652419700313e-05, "loss": 0.248, "step": 6473500 }, { "epoch": 3.88, "learning_rate": 2.886255245413975e-05, "loss": 0.2482, "step": 6474000 }, { "epoch": 3.88, "learning_rate": 2.8860452488579186e-05, "loss": 0.2453, "step": 6474500 }, { "epoch": 3.88, "learning_rate": 2.885835252301862e-05, "loss": 0.2532, "step": 6475000 }, { "epoch": 3.88, "learning_rate": 2.8856252557458057e-05, "loss": 0.2479, "step": 6475500 }, { "epoch": 3.88, "learning_rate": 2.885415679182861e-05, "loss": 0.2447, "step": 6476000 }, { "epoch": 3.88, "learning_rate": 2.8852056826268047e-05, "loss": 0.2484, "step": 6476500 }, { "epoch": 3.88, "learning_rate": 2.8849956860707484e-05, "loss": 0.2505, "step": 6477000 }, { "epoch": 3.88, "learning_rate": 2.8847856895146917e-05, "loss": 0.2462, "step": 6477500 }, { "epoch": 3.88, "learning_rate": 2.8845761129517478e-05, "loss": 0.2464, "step": 6478000 }, { "epoch": 3.88, "learning_rate": 2.8843661163956908e-05, "loss": 0.2463, "step": 6478500 }, { "epoch": 3.88, "learning_rate": 2.8841561198396345e-05, "loss": 0.2477, "step": 6479000 }, { "epoch": 3.88, "learning_rate": 2.8839461232835778e-05, "loss": 0.247, "step": 6479500 }, { "epoch": 3.89, "learning_rate": 2.883736546720634e-05, "loss": 0.251, "step": 6480000 }, { "epoch": 3.89, "learning_rate": 2.883526550164577e-05, "loss": 0.2483, "step": 6480500 }, { "epoch": 3.89, "learning_rate": 2.883316973601633e-05, "loss": 0.2468, "step": 6481000 }, { "epoch": 3.89, "learning_rate": 2.8831069770455766e-05, "loss": 0.2428, "step": 6481500 }, { "epoch": 3.89, "learning_rate": 2.8828969804895203e-05, "loss": 0.2427, "step": 6482000 }, { "epoch": 3.89, "learning_rate": 2.8826869839334636e-05, "loss": 0.2525, "step": 6482500 }, { "epoch": 3.89, "learning_rate": 2.8824769873774066e-05, "loss": 0.2416, "step": 6483000 }, { "epoch": 3.89, "learning_rate": 2.8822669908213503e-05, "loss": 0.2479, "step": 6483500 }, { "epoch": 3.89, "learning_rate": 2.882056994265294e-05, "loss": 0.2453, "step": 6484000 }, { "epoch": 3.89, "learning_rate": 2.8818469977092373e-05, "loss": 0.2459, "step": 6484500 }, { "epoch": 3.89, "learning_rate": 2.8816374211462934e-05, "loss": 0.2469, "step": 6485000 }, { "epoch": 3.89, "learning_rate": 2.8814278445833487e-05, "loss": 0.2463, "step": 6485500 }, { "epoch": 3.89, "learning_rate": 2.8812178480272924e-05, "loss": 0.2472, "step": 6486000 }, { "epoch": 3.89, "learning_rate": 2.881007851471236e-05, "loss": 0.2481, "step": 6486500 }, { "epoch": 3.89, "learning_rate": 2.8807978549151794e-05, "loss": 0.249, "step": 6487000 }, { "epoch": 3.89, "learning_rate": 2.880587858359123e-05, "loss": 0.247, "step": 6487500 }, { "epoch": 3.89, "learning_rate": 2.880377861803066e-05, "loss": 0.2493, "step": 6488000 }, { "epoch": 3.89, "learning_rate": 2.8801678652470098e-05, "loss": 0.2411, "step": 6488500 }, { "epoch": 3.89, "learning_rate": 2.879957868690953e-05, "loss": 0.2477, "step": 6489000 }, { "epoch": 3.89, "learning_rate": 2.8797482921280092e-05, "loss": 0.244, "step": 6489500 }, { "epoch": 3.89, "learning_rate": 2.8795382955719522e-05, "loss": 0.2483, "step": 6490000 }, { "epoch": 3.89, "learning_rate": 2.879328299015896e-05, "loss": 0.248, "step": 6490500 }, { "epoch": 3.89, "learning_rate": 2.879118722452952e-05, "loss": 0.2503, "step": 6491000 }, { "epoch": 3.89, "learning_rate": 2.8789087258968956e-05, "loss": 0.2457, "step": 6491500 }, { "epoch": 3.89, "learning_rate": 2.878698729340839e-05, "loss": 0.2486, "step": 6492000 }, { "epoch": 3.89, "learning_rate": 2.878488732784782e-05, "loss": 0.2438, "step": 6492500 }, { "epoch": 3.89, "learning_rate": 2.8782787362287256e-05, "loss": 0.2386, "step": 6493000 }, { "epoch": 3.89, "learning_rate": 2.878068739672669e-05, "loss": 0.2432, "step": 6493500 }, { "epoch": 3.89, "learning_rate": 2.8778587431166127e-05, "loss": 0.2508, "step": 6494000 }, { "epoch": 3.89, "learning_rate": 2.8776487465605564e-05, "loss": 0.2521, "step": 6494500 }, { "epoch": 3.89, "learning_rate": 2.8774391699976117e-05, "loss": 0.249, "step": 6495000 }, { "epoch": 3.89, "learning_rate": 2.8772291734415554e-05, "loss": 0.249, "step": 6495500 }, { "epoch": 3.89, "learning_rate": 2.8770195968786114e-05, "loss": 0.2441, "step": 6496000 }, { "epoch": 3.89, "learning_rate": 2.8768096003225548e-05, "loss": 0.2462, "step": 6496500 }, { "epoch": 3.9, "learning_rate": 2.8765996037664985e-05, "loss": 0.2442, "step": 6497000 }, { "epoch": 3.9, "learning_rate": 2.8763896072104415e-05, "loss": 0.2516, "step": 6497500 }, { "epoch": 3.9, "learning_rate": 2.8761800306474975e-05, "loss": 0.2488, "step": 6498000 }, { "epoch": 3.9, "learning_rate": 2.8759700340914412e-05, "loss": 0.2446, "step": 6498500 }, { "epoch": 3.9, "learning_rate": 2.8757600375353845e-05, "loss": 0.2491, "step": 6499000 }, { "epoch": 3.9, "learning_rate": 2.8755500409793282e-05, "loss": 0.2499, "step": 6499500 }, { "epoch": 3.9, "learning_rate": 2.8753400444232712e-05, "loss": 0.2533, "step": 6500000 }, { "epoch": 3.9, "eval_loss": 0.22911366820335388, "eval_runtime": 1458.3506, "eval_samples_per_second": 361.175, "eval_steps_per_second": 60.196, "step": 6500000 }, { "epoch": 3.9, "learning_rate": 2.8751300478672146e-05, "loss": 0.2429, "step": 6500500 }, { "epoch": 3.9, "learning_rate": 2.8749200513111583e-05, "loss": 0.2493, "step": 6501000 }, { "epoch": 3.9, "learning_rate": 2.874710054755102e-05, "loss": 0.2503, "step": 6501500 }, { "epoch": 3.9, "learning_rate": 2.8745004781921573e-05, "loss": 0.2491, "step": 6502000 }, { "epoch": 3.9, "learning_rate": 2.874290481636101e-05, "loss": 0.2485, "step": 6502500 }, { "epoch": 3.9, "learning_rate": 2.8740804850800443e-05, "loss": 0.2442, "step": 6503000 }, { "epoch": 3.9, "learning_rate": 2.873870488523988e-05, "loss": 0.2535, "step": 6503500 }, { "epoch": 3.9, "learning_rate": 2.873660911961044e-05, "loss": 0.2465, "step": 6504000 }, { "epoch": 3.9, "learning_rate": 2.873450915404987e-05, "loss": 0.2487, "step": 6504500 }, { "epoch": 3.9, "learning_rate": 2.8732409188489307e-05, "loss": 0.2477, "step": 6505000 }, { "epoch": 3.9, "learning_rate": 2.873030922292874e-05, "loss": 0.2483, "step": 6505500 }, { "epoch": 3.9, "learning_rate": 2.87282134572993e-05, "loss": 0.2521, "step": 6506000 }, { "epoch": 3.9, "learning_rate": 2.8726113491738738e-05, "loss": 0.2518, "step": 6506500 }, { "epoch": 3.9, "learning_rate": 2.8724013526178168e-05, "loss": 0.2437, "step": 6507000 }, { "epoch": 3.9, "learning_rate": 2.87219135606176e-05, "loss": 0.2412, "step": 6507500 }, { "epoch": 3.9, "learning_rate": 2.871981359505704e-05, "loss": 0.2413, "step": 6508000 }, { "epoch": 3.9, "learning_rate": 2.8717713629496475e-05, "loss": 0.2452, "step": 6508500 }, { "epoch": 3.9, "learning_rate": 2.8715613663935912e-05, "loss": 0.2537, "step": 6509000 }, { "epoch": 3.9, "learning_rate": 2.8713513698375346e-05, "loss": 0.24, "step": 6509500 }, { "epoch": 3.9, "learning_rate": 2.87114179327459e-05, "loss": 0.244, "step": 6510000 }, { "epoch": 3.9, "learning_rate": 2.8709317967185336e-05, "loss": 0.2454, "step": 6510500 }, { "epoch": 3.9, "learning_rate": 2.8707218001624773e-05, "loss": 0.2494, "step": 6511000 }, { "epoch": 3.9, "learning_rate": 2.8705118036064206e-05, "loss": 0.2497, "step": 6511500 }, { "epoch": 3.9, "learning_rate": 2.8703018070503643e-05, "loss": 0.2478, "step": 6512000 }, { "epoch": 3.9, "learning_rate": 2.8700926504805324e-05, "loss": 0.2465, "step": 6512500 }, { "epoch": 3.9, "learning_rate": 2.8698826539244757e-05, "loss": 0.2408, "step": 6513000 }, { "epoch": 3.91, "learning_rate": 2.8696726573684194e-05, "loss": 0.2423, "step": 6513500 }, { "epoch": 3.91, "learning_rate": 2.8694626608123624e-05, "loss": 0.2476, "step": 6514000 }, { "epoch": 3.91, "learning_rate": 2.8692530842494184e-05, "loss": 0.2503, "step": 6514500 }, { "epoch": 3.91, "learning_rate": 2.869043087693362e-05, "loss": 0.246, "step": 6515000 }, { "epoch": 3.91, "learning_rate": 2.8688330911373055e-05, "loss": 0.2461, "step": 6515500 }, { "epoch": 3.91, "learning_rate": 2.868623094581249e-05, "loss": 0.2498, "step": 6516000 }, { "epoch": 3.91, "learning_rate": 2.868413098025192e-05, "loss": 0.2467, "step": 6516500 }, { "epoch": 3.91, "learning_rate": 2.8682031014691355e-05, "loss": 0.2468, "step": 6517000 }, { "epoch": 3.91, "learning_rate": 2.8679931049130792e-05, "loss": 0.2523, "step": 6517500 }, { "epoch": 3.91, "learning_rate": 2.867783108357023e-05, "loss": 0.2413, "step": 6518000 }, { "epoch": 3.91, "learning_rate": 2.8675731118009662e-05, "loss": 0.2431, "step": 6518500 }, { "epoch": 3.91, "learning_rate": 2.86736311524491e-05, "loss": 0.2484, "step": 6519000 }, { "epoch": 3.91, "learning_rate": 2.8671531186888536e-05, "loss": 0.2449, "step": 6519500 }, { "epoch": 3.91, "learning_rate": 2.866943122132797e-05, "loss": 0.2458, "step": 6520000 }, { "epoch": 3.91, "learning_rate": 2.8667335455698526e-05, "loss": 0.2523, "step": 6520500 }, { "epoch": 3.91, "learning_rate": 2.866523549013796e-05, "loss": 0.2476, "step": 6521000 }, { "epoch": 3.91, "learning_rate": 2.8663135524577397e-05, "loss": 0.2497, "step": 6521500 }, { "epoch": 3.91, "learning_rate": 2.8661035559016834e-05, "loss": 0.2492, "step": 6522000 }, { "epoch": 3.91, "learning_rate": 2.8658939793387387e-05, "loss": 0.2468, "step": 6522500 }, { "epoch": 3.91, "learning_rate": 2.8656839827826824e-05, "loss": 0.2479, "step": 6523000 }, { "epoch": 3.91, "learning_rate": 2.8654739862266257e-05, "loss": 0.2496, "step": 6523500 }, { "epoch": 3.91, "learning_rate": 2.865264409663681e-05, "loss": 0.256, "step": 6524000 }, { "epoch": 3.91, "learning_rate": 2.8650544131076248e-05, "loss": 0.2467, "step": 6524500 }, { "epoch": 3.91, "learning_rate": 2.8648444165515685e-05, "loss": 0.2454, "step": 6525000 }, { "epoch": 3.91, "learning_rate": 2.8646344199955118e-05, "loss": 0.2491, "step": 6525500 }, { "epoch": 3.91, "learning_rate": 2.8644248434325675e-05, "loss": 0.2452, "step": 6526000 }, { "epoch": 3.91, "learning_rate": 2.8642152668696235e-05, "loss": 0.245, "step": 6526500 }, { "epoch": 3.91, "learning_rate": 2.864005270313567e-05, "loss": 0.2502, "step": 6527000 }, { "epoch": 3.91, "learning_rate": 2.8637952737575106e-05, "loss": 0.2412, "step": 6527500 }, { "epoch": 3.91, "learning_rate": 2.8635852772014543e-05, "loss": 0.2517, "step": 6528000 }, { "epoch": 3.91, "learning_rate": 2.8633752806453973e-05, "loss": 0.2465, "step": 6528500 }, { "epoch": 3.91, "learning_rate": 2.8631652840893406e-05, "loss": 0.2542, "step": 6529000 }, { "epoch": 3.91, "learning_rate": 2.8629552875332843e-05, "loss": 0.2472, "step": 6529500 }, { "epoch": 3.91, "learning_rate": 2.862745290977228e-05, "loss": 0.2548, "step": 6530000 }, { "epoch": 3.92, "learning_rate": 2.8625352944211713e-05, "loss": 0.2441, "step": 6530500 }, { "epoch": 3.92, "learning_rate": 2.862325297865115e-05, "loss": 0.2502, "step": 6531000 }, { "epoch": 3.92, "learning_rate": 2.8621153013090587e-05, "loss": 0.249, "step": 6531500 }, { "epoch": 3.92, "learning_rate": 2.861905304753002e-05, "loss": 0.2386, "step": 6532000 }, { "epoch": 3.92, "learning_rate": 2.8616953081969457e-05, "loss": 0.2431, "step": 6532500 }, { "epoch": 3.92, "learning_rate": 2.861485731634001e-05, "loss": 0.2466, "step": 6533000 }, { "epoch": 3.92, "learning_rate": 2.8612757350779448e-05, "loss": 0.2437, "step": 6533500 }, { "epoch": 3.92, "learning_rate": 2.861065738521888e-05, "loss": 0.2539, "step": 6534000 }, { "epoch": 3.92, "learning_rate": 2.8608557419658318e-05, "loss": 0.2531, "step": 6534500 }, { "epoch": 3.92, "learning_rate": 2.8606457454097755e-05, "loss": 0.2474, "step": 6535000 }, { "epoch": 3.92, "learning_rate": 2.8604357488537192e-05, "loss": 0.2528, "step": 6535500 }, { "epoch": 3.92, "learning_rate": 2.8602257522976625e-05, "loss": 0.2418, "step": 6536000 }, { "epoch": 3.92, "learning_rate": 2.8600157557416055e-05, "loss": 0.2401, "step": 6536500 }, { "epoch": 3.92, "learning_rate": 2.8598061791786616e-05, "loss": 0.2529, "step": 6537000 }, { "epoch": 3.92, "learning_rate": 2.8595961826226052e-05, "loss": 0.2459, "step": 6537500 }, { "epoch": 3.92, "learning_rate": 2.8593866060596606e-05, "loss": 0.2454, "step": 6538000 }, { "epoch": 3.92, "learning_rate": 2.8591766095036043e-05, "loss": 0.2502, "step": 6538500 }, { "epoch": 3.92, "learning_rate": 2.8589666129475476e-05, "loss": 0.2462, "step": 6539000 }, { "epoch": 3.92, "learning_rate": 2.8587566163914913e-05, "loss": 0.2504, "step": 6539500 }, { "epoch": 3.92, "learning_rate": 2.858546619835435e-05, "loss": 0.2453, "step": 6540000 }, { "epoch": 3.92, "learning_rate": 2.8583366232793783e-05, "loss": 0.2488, "step": 6540500 }, { "epoch": 3.92, "learning_rate": 2.8581266267233214e-05, "loss": 0.2525, "step": 6541000 }, { "epoch": 3.92, "learning_rate": 2.857916630167265e-05, "loss": 0.245, "step": 6541500 }, { "epoch": 3.92, "learning_rate": 2.857707053604321e-05, "loss": 0.2424, "step": 6542000 }, { "epoch": 3.92, "learning_rate": 2.8574970570482648e-05, "loss": 0.249, "step": 6542500 }, { "epoch": 3.92, "learning_rate": 2.857287060492208e-05, "loss": 0.2498, "step": 6543000 }, { "epoch": 3.92, "learning_rate": 2.857077063936151e-05, "loss": 0.2387, "step": 6543500 }, { "epoch": 3.92, "learning_rate": 2.8568670673800948e-05, "loss": 0.2442, "step": 6544000 }, { "epoch": 3.92, "learning_rate": 2.856657070824038e-05, "loss": 0.2443, "step": 6544500 }, { "epoch": 3.92, "learning_rate": 2.8564470742679818e-05, "loss": 0.252, "step": 6545000 }, { "epoch": 3.92, "learning_rate": 2.8562370777119255e-05, "loss": 0.2441, "step": 6545500 }, { "epoch": 3.92, "learning_rate": 2.856027501148981e-05, "loss": 0.2457, "step": 6546000 }, { "epoch": 3.92, "learning_rate": 2.8558175045929246e-05, "loss": 0.248, "step": 6546500 }, { "epoch": 3.93, "learning_rate": 2.855607508036868e-05, "loss": 0.2455, "step": 6547000 }, { "epoch": 3.93, "learning_rate": 2.8553975114808116e-05, "loss": 0.2503, "step": 6547500 }, { "epoch": 3.93, "learning_rate": 2.855187934917867e-05, "loss": 0.2472, "step": 6548000 }, { "epoch": 3.93, "learning_rate": 2.8549779383618106e-05, "loss": 0.2523, "step": 6548500 }, { "epoch": 3.93, "learning_rate": 2.8547679418057543e-05, "loss": 0.2483, "step": 6549000 }, { "epoch": 3.93, "learning_rate": 2.8545579452496977e-05, "loss": 0.2454, "step": 6549500 }, { "epoch": 3.93, "learning_rate": 2.8543483686867537e-05, "loss": 0.2422, "step": 6550000 }, { "epoch": 3.93, "learning_rate": 2.8541383721306967e-05, "loss": 0.2513, "step": 6550500 }, { "epoch": 3.93, "learning_rate": 2.8539283755746404e-05, "loss": 0.247, "step": 6551000 }, { "epoch": 3.93, "learning_rate": 2.8537183790185837e-05, "loss": 0.2525, "step": 6551500 }, { "epoch": 3.93, "learning_rate": 2.8535088024556398e-05, "loss": 0.2491, "step": 6552000 }, { "epoch": 3.93, "learning_rate": 2.8532988058995835e-05, "loss": 0.2447, "step": 6552500 }, { "epoch": 3.93, "learning_rate": 2.8530888093435265e-05, "loss": 0.2434, "step": 6553000 }, { "epoch": 3.93, "learning_rate": 2.8528792327805825e-05, "loss": 0.2454, "step": 6553500 }, { "epoch": 3.93, "learning_rate": 2.8526692362245262e-05, "loss": 0.2527, "step": 6554000 }, { "epoch": 3.93, "learning_rate": 2.8524592396684695e-05, "loss": 0.246, "step": 6554500 }, { "epoch": 3.93, "learning_rate": 2.8522492431124132e-05, "loss": 0.25, "step": 6555000 }, { "epoch": 3.93, "learning_rate": 2.8520392465563562e-05, "loss": 0.2473, "step": 6555500 }, { "epoch": 3.93, "learning_rate": 2.8518292500003e-05, "loss": 0.249, "step": 6556000 }, { "epoch": 3.93, "learning_rate": 2.8516192534442432e-05, "loss": 0.2424, "step": 6556500 }, { "epoch": 3.93, "learning_rate": 2.851409256888187e-05, "loss": 0.2435, "step": 6557000 }, { "epoch": 3.93, "learning_rate": 2.8511996803252423e-05, "loss": 0.2502, "step": 6557500 }, { "epoch": 3.93, "learning_rate": 2.850989683769186e-05, "loss": 0.2452, "step": 6558000 }, { "epoch": 3.93, "learning_rate": 2.8507796872131293e-05, "loss": 0.2538, "step": 6558500 }, { "epoch": 3.93, "learning_rate": 2.850569690657073e-05, "loss": 0.2472, "step": 6559000 }, { "epoch": 3.93, "learning_rate": 2.850360114094129e-05, "loss": 0.2485, "step": 6559500 }, { "epoch": 3.93, "learning_rate": 2.850150117538072e-05, "loss": 0.2408, "step": 6560000 }, { "epoch": 3.93, "learning_rate": 2.8499401209820157e-05, "loss": 0.2481, "step": 6560500 }, { "epoch": 3.93, "learning_rate": 2.849730124425959e-05, "loss": 0.2482, "step": 6561000 }, { "epoch": 3.93, "learning_rate": 2.849520547863015e-05, "loss": 0.2482, "step": 6561500 }, { "epoch": 3.93, "learning_rate": 2.8493105513069588e-05, "loss": 0.2442, "step": 6562000 }, { "epoch": 3.93, "learning_rate": 2.8491005547509018e-05, "loss": 0.249, "step": 6562500 }, { "epoch": 3.93, "learning_rate": 2.8488905581948455e-05, "loss": 0.2448, "step": 6563000 }, { "epoch": 3.94, "learning_rate": 2.848680561638789e-05, "loss": 0.2534, "step": 6563500 }, { "epoch": 3.94, "learning_rate": 2.848470985075845e-05, "loss": 0.2469, "step": 6564000 }, { "epoch": 3.94, "learning_rate": 2.8482609885197886e-05, "loss": 0.2452, "step": 6564500 }, { "epoch": 3.94, "learning_rate": 2.8480509919637316e-05, "loss": 0.2464, "step": 6565000 }, { "epoch": 3.94, "learning_rate": 2.847840995407675e-05, "loss": 0.2451, "step": 6565500 }, { "epoch": 3.94, "learning_rate": 2.847631418844731e-05, "loss": 0.2484, "step": 6566000 }, { "epoch": 3.94, "learning_rate": 2.8474214222886746e-05, "loss": 0.2455, "step": 6566500 }, { "epoch": 3.94, "learning_rate": 2.8472114257326183e-05, "loss": 0.2477, "step": 6567000 }, { "epoch": 3.94, "learning_rate": 2.8470014291765613e-05, "loss": 0.2504, "step": 6567500 }, { "epoch": 3.94, "learning_rate": 2.8467918526136174e-05, "loss": 0.2448, "step": 6568000 }, { "epoch": 3.94, "learning_rate": 2.8465818560575607e-05, "loss": 0.2488, "step": 6568500 }, { "epoch": 3.94, "learning_rate": 2.8463718595015044e-05, "loss": 0.2444, "step": 6569000 }, { "epoch": 3.94, "learning_rate": 2.8461618629454474e-05, "loss": 0.2489, "step": 6569500 }, { "epoch": 3.94, "learning_rate": 2.8459522863825034e-05, "loss": 0.2512, "step": 6570000 }, { "epoch": 3.94, "learning_rate": 2.845742289826447e-05, "loss": 0.2472, "step": 6570500 }, { "epoch": 3.94, "learning_rate": 2.8455322932703905e-05, "loss": 0.247, "step": 6571000 }, { "epoch": 3.94, "learning_rate": 2.845322296714334e-05, "loss": 0.245, "step": 6571500 }, { "epoch": 3.94, "learning_rate": 2.8451127201513895e-05, "loss": 0.2521, "step": 6572000 }, { "epoch": 3.94, "learning_rate": 2.8449027235953332e-05, "loss": 0.2528, "step": 6572500 }, { "epoch": 3.94, "learning_rate": 2.8446931470323885e-05, "loss": 0.252, "step": 6573000 }, { "epoch": 3.94, "learning_rate": 2.8444831504763322e-05, "loss": 0.2431, "step": 6573500 }, { "epoch": 3.94, "learning_rate": 2.8442731539202756e-05, "loss": 0.2455, "step": 6574000 }, { "epoch": 3.94, "learning_rate": 2.8440631573642193e-05, "loss": 0.2501, "step": 6574500 }, { "epoch": 3.94, "learning_rate": 2.843853160808163e-05, "loss": 0.2549, "step": 6575000 }, { "epoch": 3.94, "learning_rate": 2.8436435842452183e-05, "loss": 0.2459, "step": 6575500 }, { "epoch": 3.94, "learning_rate": 2.843433587689162e-05, "loss": 0.2411, "step": 6576000 }, { "epoch": 3.94, "learning_rate": 2.8432235911331053e-05, "loss": 0.2474, "step": 6576500 }, { "epoch": 3.94, "learning_rate": 2.843013594577049e-05, "loss": 0.249, "step": 6577000 }, { "epoch": 3.94, "learning_rate": 2.8428035980209927e-05, "loss": 0.2533, "step": 6577500 }, { "epoch": 3.94, "learning_rate": 2.842593601464936e-05, "loss": 0.2465, "step": 6578000 }, { "epoch": 3.94, "learning_rate": 2.8423840249019914e-05, "loss": 0.2499, "step": 6578500 }, { "epoch": 3.94, "learning_rate": 2.842174028345935e-05, "loss": 0.2438, "step": 6579000 }, { "epoch": 3.94, "learning_rate": 2.8419640317898788e-05, "loss": 0.2453, "step": 6579500 }, { "epoch": 3.94, "learning_rate": 2.841754035233822e-05, "loss": 0.2431, "step": 6580000 }, { "epoch": 3.95, "learning_rate": 2.8415440386777658e-05, "loss": 0.2488, "step": 6580500 }, { "epoch": 3.95, "learning_rate": 2.8413340421217095e-05, "loss": 0.2459, "step": 6581000 }, { "epoch": 3.95, "learning_rate": 2.8411240455656525e-05, "loss": 0.2456, "step": 6581500 }, { "epoch": 3.95, "learning_rate": 2.840914049009596e-05, "loss": 0.2485, "step": 6582000 }, { "epoch": 3.95, "learning_rate": 2.840704472446652e-05, "loss": 0.2462, "step": 6582500 }, { "epoch": 3.95, "learning_rate": 2.8404944758905956e-05, "loss": 0.247, "step": 6583000 }, { "epoch": 3.95, "learning_rate": 2.8402844793345392e-05, "loss": 0.2477, "step": 6583500 }, { "epoch": 3.95, "learning_rate": 2.8400744827784823e-05, "loss": 0.2441, "step": 6584000 }, { "epoch": 3.95, "learning_rate": 2.8398644862224256e-05, "loss": 0.2495, "step": 6584500 }, { "epoch": 3.95, "learning_rate": 2.8396549096594816e-05, "loss": 0.2463, "step": 6585000 }, { "epoch": 3.95, "learning_rate": 2.8394449131034253e-05, "loss": 0.2447, "step": 6585500 }, { "epoch": 3.95, "learning_rate": 2.839234916547369e-05, "loss": 0.2509, "step": 6586000 }, { "epoch": 3.95, "learning_rate": 2.8390249199913117e-05, "loss": 0.24, "step": 6586500 }, { "epoch": 3.95, "learning_rate": 2.8388157634214797e-05, "loss": 0.2424, "step": 6587000 }, { "epoch": 3.95, "learning_rate": 2.8386057668654234e-05, "loss": 0.244, "step": 6587500 }, { "epoch": 3.95, "learning_rate": 2.8383957703093667e-05, "loss": 0.2462, "step": 6588000 }, { "epoch": 3.95, "learning_rate": 2.8381857737533104e-05, "loss": 0.2465, "step": 6588500 }, { "epoch": 3.95, "learning_rate": 2.837975777197254e-05, "loss": 0.2523, "step": 6589000 }, { "epoch": 3.95, "learning_rate": 2.8377657806411975e-05, "loss": 0.247, "step": 6589500 }, { "epoch": 3.95, "learning_rate": 2.837555784085141e-05, "loss": 0.247, "step": 6590000 }, { "epoch": 3.95, "learning_rate": 2.837345787529085e-05, "loss": 0.2419, "step": 6590500 }, { "epoch": 3.95, "learning_rate": 2.8371362109661402e-05, "loss": 0.2434, "step": 6591000 }, { "epoch": 3.95, "learning_rate": 2.836926214410084e-05, "loss": 0.2451, "step": 6591500 }, { "epoch": 3.95, "learning_rate": 2.8367162178540272e-05, "loss": 0.2477, "step": 6592000 }, { "epoch": 3.95, "learning_rate": 2.836506221297971e-05, "loss": 0.2461, "step": 6592500 }, { "epoch": 3.95, "learning_rate": 2.8362966447350263e-05, "loss": 0.245, "step": 6593000 }, { "epoch": 3.95, "learning_rate": 2.83608664817897e-05, "loss": 0.2509, "step": 6593500 }, { "epoch": 3.95, "learning_rate": 2.8358766516229136e-05, "loss": 0.2468, "step": 6594000 }, { "epoch": 3.95, "learning_rate": 2.835666655066857e-05, "loss": 0.2424, "step": 6594500 }, { "epoch": 3.95, "learning_rate": 2.8354570785039123e-05, "loss": 0.2478, "step": 6595000 }, { "epoch": 3.95, "learning_rate": 2.835247081947856e-05, "loss": 0.2511, "step": 6595500 }, { "epoch": 3.95, "learning_rate": 2.8350370853917997e-05, "loss": 0.2483, "step": 6596000 }, { "epoch": 3.95, "learning_rate": 2.834827088835743e-05, "loss": 0.2412, "step": 6596500 }, { "epoch": 3.96, "learning_rate": 2.8346175122727987e-05, "loss": 0.2449, "step": 6597000 }, { "epoch": 3.96, "learning_rate": 2.834407515716742e-05, "loss": 0.2543, "step": 6597500 }, { "epoch": 3.96, "learning_rate": 2.8341975191606858e-05, "loss": 0.2444, "step": 6598000 }, { "epoch": 3.96, "learning_rate": 2.8339875226046295e-05, "loss": 0.2472, "step": 6598500 }, { "epoch": 3.96, "learning_rate": 2.8337779460416848e-05, "loss": 0.2471, "step": 6599000 }, { "epoch": 3.96, "learning_rate": 2.8335679494856285e-05, "loss": 0.2458, "step": 6599500 }, { "epoch": 3.96, "learning_rate": 2.833357952929572e-05, "loss": 0.2434, "step": 6600000 }, { "epoch": 3.96, "eval_loss": 0.22884120047092438, "eval_runtime": 1459.2386, "eval_samples_per_second": 360.955, "eval_steps_per_second": 60.159, "step": 6600000 }, { "epoch": 3.96, "learning_rate": 2.8331479563735155e-05, "loss": 0.2434, "step": 6600500 }, { "epoch": 3.96, "learning_rate": 2.8329379598174592e-05, "loss": 0.256, "step": 6601000 }, { "epoch": 3.96, "learning_rate": 2.8327283832545146e-05, "loss": 0.2479, "step": 6601500 }, { "epoch": 3.96, "learning_rate": 2.832518386698458e-05, "loss": 0.2454, "step": 6602000 }, { "epoch": 3.96, "learning_rate": 2.8323083901424016e-05, "loss": 0.242, "step": 6602500 }, { "epoch": 3.96, "learning_rate": 2.8320983935863453e-05, "loss": 0.2423, "step": 6603000 }, { "epoch": 3.96, "learning_rate": 2.8318888170234007e-05, "loss": 0.2582, "step": 6603500 }, { "epoch": 3.96, "learning_rate": 2.8316788204673443e-05, "loss": 0.246, "step": 6604000 }, { "epoch": 3.96, "learning_rate": 2.8314688239112877e-05, "loss": 0.246, "step": 6604500 }, { "epoch": 3.96, "learning_rate": 2.8312588273552314e-05, "loss": 0.2495, "step": 6605000 }, { "epoch": 3.96, "learning_rate": 2.8310492507922867e-05, "loss": 0.2467, "step": 6605500 }, { "epoch": 3.96, "learning_rate": 2.8308392542362304e-05, "loss": 0.2458, "step": 6606000 }, { "epoch": 3.96, "learning_rate": 2.830629257680174e-05, "loss": 0.2499, "step": 6606500 }, { "epoch": 3.96, "learning_rate": 2.8304192611241174e-05, "loss": 0.2419, "step": 6607000 }, { "epoch": 3.96, "learning_rate": 2.8302096845611728e-05, "loss": 0.2422, "step": 6607500 }, { "epoch": 3.96, "learning_rate": 2.8299996880051165e-05, "loss": 0.2507, "step": 6608000 }, { "epoch": 3.96, "learning_rate": 2.82978969144906e-05, "loss": 0.25, "step": 6608500 }, { "epoch": 3.96, "learning_rate": 2.8295796948930035e-05, "loss": 0.2475, "step": 6609000 }, { "epoch": 3.96, "learning_rate": 2.8293701183300592e-05, "loss": 0.2514, "step": 6609500 }, { "epoch": 3.96, "learning_rate": 2.8291601217740026e-05, "loss": 0.2464, "step": 6610000 }, { "epoch": 3.96, "learning_rate": 2.8289501252179462e-05, "loss": 0.2456, "step": 6610500 }, { "epoch": 3.96, "learning_rate": 2.82874012866189e-05, "loss": 0.2438, "step": 6611000 }, { "epoch": 3.96, "learning_rate": 2.8285309720920576e-05, "loss": 0.2487, "step": 6611500 }, { "epoch": 3.96, "learning_rate": 2.8283209755360013e-05, "loss": 0.2455, "step": 6612000 }, { "epoch": 3.96, "learning_rate": 2.828110978979945e-05, "loss": 0.2541, "step": 6612500 }, { "epoch": 3.96, "learning_rate": 2.8279009824238883e-05, "loss": 0.2489, "step": 6613000 }, { "epoch": 3.97, "learning_rate": 2.827690985867832e-05, "loss": 0.2522, "step": 6613500 }, { "epoch": 3.97, "learning_rate": 2.827480989311775e-05, "loss": 0.2514, "step": 6614000 }, { "epoch": 3.97, "learning_rate": 2.8272709927557184e-05, "loss": 0.2455, "step": 6614500 }, { "epoch": 3.97, "learning_rate": 2.827060996199662e-05, "loss": 0.2477, "step": 6615000 }, { "epoch": 3.97, "learning_rate": 2.826851419636718e-05, "loss": 0.2439, "step": 6615500 }, { "epoch": 3.97, "learning_rate": 2.8266414230806618e-05, "loss": 0.2534, "step": 6616000 }, { "epoch": 3.97, "learning_rate": 2.8264314265246048e-05, "loss": 0.2431, "step": 6616500 }, { "epoch": 3.97, "learning_rate": 2.826221429968548e-05, "loss": 0.2461, "step": 6617000 }, { "epoch": 3.97, "learning_rate": 2.8260118534056042e-05, "loss": 0.2487, "step": 6617500 }, { "epoch": 3.97, "learning_rate": 2.825801856849548e-05, "loss": 0.2487, "step": 6618000 }, { "epoch": 3.97, "learning_rate": 2.8255918602934915e-05, "loss": 0.252, "step": 6618500 }, { "epoch": 3.97, "learning_rate": 2.8253818637374346e-05, "loss": 0.2456, "step": 6619000 }, { "epoch": 3.97, "learning_rate": 2.825171867181378e-05, "loss": 0.2472, "step": 6619500 }, { "epoch": 3.97, "learning_rate": 2.8249618706253216e-05, "loss": 0.2498, "step": 6620000 }, { "epoch": 3.97, "learning_rate": 2.8247518740692653e-05, "loss": 0.2501, "step": 6620500 }, { "epoch": 3.97, "learning_rate": 2.8245418775132086e-05, "loss": 0.247, "step": 6621000 }, { "epoch": 3.97, "learning_rate": 2.824332300950264e-05, "loss": 0.2448, "step": 6621500 }, { "epoch": 3.97, "learning_rate": 2.8241223043942077e-05, "loss": 0.2494, "step": 6622000 }, { "epoch": 3.97, "learning_rate": 2.8239123078381513e-05, "loss": 0.2467, "step": 6622500 }, { "epoch": 3.97, "learning_rate": 2.8237023112820947e-05, "loss": 0.2433, "step": 6623000 }, { "epoch": 3.97, "learning_rate": 2.8234927347191504e-05, "loss": 0.247, "step": 6623500 }, { "epoch": 3.97, "learning_rate": 2.8232827381630937e-05, "loss": 0.247, "step": 6624000 }, { "epoch": 3.97, "learning_rate": 2.8230727416070374e-05, "loss": 0.2446, "step": 6624500 }, { "epoch": 3.97, "learning_rate": 2.822862745050981e-05, "loss": 0.2466, "step": 6625000 }, { "epoch": 3.97, "learning_rate": 2.822653168488037e-05, "loss": 0.2448, "step": 6625500 }, { "epoch": 3.97, "learning_rate": 2.82244317193198e-05, "loss": 0.2461, "step": 6626000 }, { "epoch": 3.97, "learning_rate": 2.8222331753759235e-05, "loss": 0.2446, "step": 6626500 }, { "epoch": 3.97, "learning_rate": 2.8220231788198672e-05, "loss": 0.2513, "step": 6627000 }, { "epoch": 3.97, "learning_rate": 2.8218136022569232e-05, "loss": 0.2455, "step": 6627500 }, { "epoch": 3.97, "learning_rate": 2.821603605700867e-05, "loss": 0.249, "step": 6628000 }, { "epoch": 3.97, "learning_rate": 2.8213936091448096e-05, "loss": 0.241, "step": 6628500 }, { "epoch": 3.97, "learning_rate": 2.8211836125887532e-05, "loss": 0.2388, "step": 6629000 }, { "epoch": 3.97, "learning_rate": 2.8209740360258093e-05, "loss": 0.2511, "step": 6629500 }, { "epoch": 3.97, "learning_rate": 2.820764039469753e-05, "loss": 0.2539, "step": 6630000 }, { "epoch": 3.98, "learning_rate": 2.8205540429136963e-05, "loss": 0.2446, "step": 6630500 }, { "epoch": 3.98, "learning_rate": 2.8203440463576393e-05, "loss": 0.248, "step": 6631000 }, { "epoch": 3.98, "learning_rate": 2.8201344697946954e-05, "loss": 0.247, "step": 6631500 }, { "epoch": 3.98, "learning_rate": 2.819924473238639e-05, "loss": 0.2382, "step": 6632000 }, { "epoch": 3.98, "learning_rate": 2.8197144766825827e-05, "loss": 0.2455, "step": 6632500 }, { "epoch": 3.98, "learning_rate": 2.819504480126526e-05, "loss": 0.2465, "step": 6633000 }, { "epoch": 3.98, "learning_rate": 2.8192949035635818e-05, "loss": 0.2473, "step": 6633500 }, { "epoch": 3.98, "learning_rate": 2.819084907007525e-05, "loss": 0.2479, "step": 6634000 }, { "epoch": 3.98, "learning_rate": 2.8188749104514688e-05, "loss": 0.2479, "step": 6634500 }, { "epoch": 3.98, "learning_rate": 2.8186649138954125e-05, "loss": 0.2463, "step": 6635000 }, { "epoch": 3.98, "learning_rate": 2.818455337332468e-05, "loss": 0.2509, "step": 6635500 }, { "epoch": 3.98, "learning_rate": 2.8182453407764112e-05, "loss": 0.254, "step": 6636000 }, { "epoch": 3.98, "learning_rate": 2.818035344220355e-05, "loss": 0.2459, "step": 6636500 }, { "epoch": 3.98, "learning_rate": 2.8178253476642986e-05, "loss": 0.2502, "step": 6637000 }, { "epoch": 3.98, "learning_rate": 2.817615351108242e-05, "loss": 0.245, "step": 6637500 }, { "epoch": 3.98, "learning_rate": 2.817405354552185e-05, "loss": 0.243, "step": 6638000 }, { "epoch": 3.98, "learning_rate": 2.817195777989241e-05, "loss": 0.2484, "step": 6638500 }, { "epoch": 3.98, "learning_rate": 2.8169857814331846e-05, "loss": 0.2496, "step": 6639000 }, { "epoch": 3.98, "learning_rate": 2.8167757848771283e-05, "loss": 0.2465, "step": 6639500 }, { "epoch": 3.98, "learning_rate": 2.8165657883210717e-05, "loss": 0.245, "step": 6640000 }, { "epoch": 3.98, "learning_rate": 2.8163557917650147e-05, "loss": 0.2502, "step": 6640500 }, { "epoch": 3.98, "learning_rate": 2.8161457952089583e-05, "loss": 0.2489, "step": 6641000 }, { "epoch": 3.98, "learning_rate": 2.8159362186460144e-05, "loss": 0.2453, "step": 6641500 }, { "epoch": 3.98, "learning_rate": 2.815726222089958e-05, "loss": 0.2507, "step": 6642000 }, { "epoch": 3.98, "learning_rate": 2.8155162255339014e-05, "loss": 0.2484, "step": 6642500 }, { "epoch": 3.98, "learning_rate": 2.8153062289778444e-05, "loss": 0.2472, "step": 6643000 }, { "epoch": 3.98, "learning_rate": 2.815096232421788e-05, "loss": 0.2487, "step": 6643500 }, { "epoch": 3.98, "learning_rate": 2.814886655858844e-05, "loss": 0.2466, "step": 6644000 }, { "epoch": 3.98, "learning_rate": 2.8146766593027875e-05, "loss": 0.2465, "step": 6644500 }, { "epoch": 3.98, "learning_rate": 2.8144666627467305e-05, "loss": 0.252, "step": 6645000 }, { "epoch": 3.98, "learning_rate": 2.8142566661906742e-05, "loss": 0.2477, "step": 6645500 }, { "epoch": 3.98, "learning_rate": 2.814046669634618e-05, "loss": 0.2501, "step": 6646000 }, { "epoch": 3.98, "learning_rate": 2.8138366730785612e-05, "loss": 0.2497, "step": 6646500 }, { "epoch": 3.99, "learning_rate": 2.813626676522505e-05, "loss": 0.2429, "step": 6647000 }, { "epoch": 3.99, "learning_rate": 2.8134166799664486e-05, "loss": 0.2526, "step": 6647500 }, { "epoch": 3.99, "learning_rate": 2.8132075233966163e-05, "loss": 0.2445, "step": 6648000 }, { "epoch": 3.99, "learning_rate": 2.81299752684056e-05, "loss": 0.2468, "step": 6648500 }, { "epoch": 3.99, "learning_rate": 2.8127875302845037e-05, "loss": 0.2551, "step": 6649000 }, { "epoch": 3.99, "learning_rate": 2.812577533728447e-05, "loss": 0.2525, "step": 6649500 }, { "epoch": 3.99, "learning_rate": 2.81236753717239e-05, "loss": 0.2529, "step": 6650000 }, { "epoch": 3.99, "learning_rate": 2.812157960609446e-05, "loss": 0.2525, "step": 6650500 }, { "epoch": 3.99, "learning_rate": 2.8119479640533897e-05, "loss": 0.2467, "step": 6651000 }, { "epoch": 3.99, "learning_rate": 2.8117379674973334e-05, "loss": 0.2468, "step": 6651500 }, { "epoch": 3.99, "learning_rate": 2.8115279709412768e-05, "loss": 0.2471, "step": 6652000 }, { "epoch": 3.99, "learning_rate": 2.8113179743852198e-05, "loss": 0.2418, "step": 6652500 }, { "epoch": 3.99, "learning_rate": 2.8111079778291635e-05, "loss": 0.2452, "step": 6653000 }, { "epoch": 3.99, "learning_rate": 2.8108979812731068e-05, "loss": 0.2452, "step": 6653500 }, { "epoch": 3.99, "learning_rate": 2.810688404710163e-05, "loss": 0.2409, "step": 6654000 }, { "epoch": 3.99, "learning_rate": 2.810478408154106e-05, "loss": 0.2465, "step": 6654500 }, { "epoch": 3.99, "learning_rate": 2.8102684115980495e-05, "loss": 0.2488, "step": 6655000 }, { "epoch": 3.99, "learning_rate": 2.8100584150419932e-05, "loss": 0.2511, "step": 6655500 }, { "epoch": 3.99, "learning_rate": 2.8098484184859366e-05, "loss": 0.249, "step": 6656000 }, { "epoch": 3.99, "learning_rate": 2.8096384219298802e-05, "loss": 0.253, "step": 6656500 }, { "epoch": 3.99, "learning_rate": 2.8094288453669356e-05, "loss": 0.2466, "step": 6657000 }, { "epoch": 3.99, "learning_rate": 2.8092188488108793e-05, "loss": 0.2482, "step": 6657500 }, { "epoch": 3.99, "learning_rate": 2.809008852254823e-05, "loss": 0.2454, "step": 6658000 }, { "epoch": 3.99, "learning_rate": 2.8087988556987663e-05, "loss": 0.251, "step": 6658500 }, { "epoch": 3.99, "learning_rate": 2.80858885914271e-05, "loss": 0.2456, "step": 6659000 }, { "epoch": 3.99, "learning_rate": 2.8083788625866537e-05, "loss": 0.2476, "step": 6659500 }, { "epoch": 3.99, "learning_rate": 2.808168866030597e-05, "loss": 0.2433, "step": 6660000 }, { "epoch": 3.99, "learning_rate": 2.8079588694745407e-05, "loss": 0.2465, "step": 6660500 }, { "epoch": 3.99, "learning_rate": 2.807749292911596e-05, "loss": 0.2521, "step": 6661000 }, { "epoch": 3.99, "learning_rate": 2.8075392963555398e-05, "loss": 0.2464, "step": 6661500 }, { "epoch": 3.99, "learning_rate": 2.807329719792595e-05, "loss": 0.2491, "step": 6662000 }, { "epoch": 3.99, "learning_rate": 2.8071197232365388e-05, "loss": 0.2457, "step": 6662500 }, { "epoch": 3.99, "learning_rate": 2.806909726680482e-05, "loss": 0.2507, "step": 6663000 }, { "epoch": 4.0, "learning_rate": 2.8066997301244258e-05, "loss": 0.2415, "step": 6663500 }, { "epoch": 4.0, "learning_rate": 2.8064897335683695e-05, "loss": 0.2476, "step": 6664000 }, { "epoch": 4.0, "learning_rate": 2.806279737012313e-05, "loss": 0.2461, "step": 6664500 }, { "epoch": 4.0, "learning_rate": 2.8060697404562565e-05, "loss": 0.2477, "step": 6665000 }, { "epoch": 4.0, "learning_rate": 2.8058597439002002e-05, "loss": 0.2498, "step": 6665500 }, { "epoch": 4.0, "learning_rate": 2.8056501673372556e-05, "loss": 0.2476, "step": 6666000 }, { "epoch": 4.0, "learning_rate": 2.8054401707811993e-05, "loss": 0.2432, "step": 6666500 }, { "epoch": 4.0, "learning_rate": 2.8052305942182546e-05, "loss": 0.2479, "step": 6667000 }, { "epoch": 4.0, "learning_rate": 2.805020597662198e-05, "loss": 0.2467, "step": 6667500 }, { "epoch": 4.0, "learning_rate": 2.8048106011061417e-05, "loss": 0.2478, "step": 6668000 }, { "epoch": 4.0, "learning_rate": 2.8046006045500853e-05, "loss": 0.2424, "step": 6668500 }, { "epoch": 4.0, "learning_rate": 2.8043906079940287e-05, "loss": 0.2506, "step": 6669000 }, { "epoch": 4.0, "learning_rate": 2.8041806114379724e-05, "loss": 0.2503, "step": 6669500 }, { "epoch": 4.0, "learning_rate": 2.803970614881916e-05, "loss": 0.2486, "step": 6670000 }, { "epoch": 4.0, "learning_rate": 2.8037606183258594e-05, "loss": 0.2491, "step": 6670500 }, { "epoch": 4.0, "learning_rate": 2.803550621769803e-05, "loss": 0.2481, "step": 6671000 }, { "epoch": 4.0, "learning_rate": 2.8033410452068584e-05, "loss": 0.2468, "step": 6671500 }, { "epoch": 4.0, "learning_rate": 2.803131048650802e-05, "loss": 0.2411, "step": 6672000 }, { "epoch": 4.0, "learning_rate": 2.8029210520947458e-05, "loss": 0.2404, "step": 6672500 }, { "epoch": 4.0, "learning_rate": 2.802711055538689e-05, "loss": 0.2478, "step": 6673000 }, { "epoch": 4.0, "learning_rate": 2.802501478975745e-05, "loss": 0.2373, "step": 6673500 }, { "epoch": 4.0, "learning_rate": 2.8022914824196882e-05, "loss": 0.2375, "step": 6674000 }, { "epoch": 4.0, "learning_rate": 2.802081485863632e-05, "loss": 0.2338, "step": 6674500 }, { "epoch": 4.0, "learning_rate": 2.8018714893075756e-05, "loss": 0.2421, "step": 6675000 }, { "epoch": 4.0, "learning_rate": 2.801661912744631e-05, "loss": 0.2424, "step": 6675500 }, { "epoch": 4.0, "learning_rate": 2.8014519161885743e-05, "loss": 0.2428, "step": 6676000 }, { "epoch": 4.0, "learning_rate": 2.801241919632518e-05, "loss": 0.2397, "step": 6676500 }, { "epoch": 4.0, "learning_rate": 2.8010319230764616e-05, "loss": 0.2411, "step": 6677000 }, { "epoch": 4.0, "learning_rate": 2.8008227665066294e-05, "loss": 0.2396, "step": 6677500 }, { "epoch": 4.0, "learning_rate": 2.800612769950573e-05, "loss": 0.2399, "step": 6678000 }, { "epoch": 4.0, "learning_rate": 2.800402773394516e-05, "loss": 0.2451, "step": 6678500 }, { "epoch": 4.0, "learning_rate": 2.8001927768384597e-05, "loss": 0.2468, "step": 6679000 }, { "epoch": 4.0, "learning_rate": 2.7999827802824034e-05, "loss": 0.2385, "step": 6679500 }, { "epoch": 4.0, "learning_rate": 2.7997727837263468e-05, "loss": 0.2457, "step": 6680000 }, { "epoch": 4.01, "learning_rate": 2.7995627871702904e-05, "loss": 0.2475, "step": 6680500 }, { "epoch": 4.01, "learning_rate": 2.7993527906142338e-05, "loss": 0.2425, "step": 6681000 }, { "epoch": 4.01, "learning_rate": 2.7991432140512895e-05, "loss": 0.2427, "step": 6681500 }, { "epoch": 4.01, "learning_rate": 2.7989332174952332e-05, "loss": 0.2399, "step": 6682000 }, { "epoch": 4.01, "learning_rate": 2.7987232209391765e-05, "loss": 0.247, "step": 6682500 }, { "epoch": 4.01, "learning_rate": 2.79851322438312e-05, "loss": 0.2425, "step": 6683000 }, { "epoch": 4.01, "learning_rate": 2.798303647820176e-05, "loss": 0.237, "step": 6683500 }, { "epoch": 4.01, "learning_rate": 2.7980936512641192e-05, "loss": 0.2437, "step": 6684000 }, { "epoch": 4.01, "learning_rate": 2.7978836547080626e-05, "loss": 0.243, "step": 6684500 }, { "epoch": 4.01, "learning_rate": 2.7976736581520063e-05, "loss": 0.2353, "step": 6685000 }, { "epoch": 4.01, "learning_rate": 2.797464081589062e-05, "loss": 0.2423, "step": 6685500 }, { "epoch": 4.01, "learning_rate": 2.7972540850330053e-05, "loss": 0.2388, "step": 6686000 }, { "epoch": 4.01, "learning_rate": 2.797044088476949e-05, "loss": 0.2401, "step": 6686500 }, { "epoch": 4.01, "learning_rate": 2.7968340919208923e-05, "loss": 0.2472, "step": 6687000 }, { "epoch": 4.01, "learning_rate": 2.796624095364836e-05, "loss": 0.2418, "step": 6687500 }, { "epoch": 4.01, "learning_rate": 2.7964145188018917e-05, "loss": 0.244, "step": 6688000 }, { "epoch": 4.01, "learning_rate": 2.796204522245835e-05, "loss": 0.2396, "step": 6688500 }, { "epoch": 4.01, "learning_rate": 2.7959945256897788e-05, "loss": 0.236, "step": 6689000 }, { "epoch": 4.01, "learning_rate": 2.795784529133722e-05, "loss": 0.2405, "step": 6689500 }, { "epoch": 4.01, "learning_rate": 2.7955749525707778e-05, "loss": 0.2414, "step": 6690000 }, { "epoch": 4.01, "learning_rate": 2.7953649560147215e-05, "loss": 0.2428, "step": 6690500 }, { "epoch": 4.01, "learning_rate": 2.7951553794517772e-05, "loss": 0.2412, "step": 6691000 }, { "epoch": 4.01, "learning_rate": 2.7949453828957202e-05, "loss": 0.2394, "step": 6691500 }, { "epoch": 4.01, "learning_rate": 2.794735386339664e-05, "loss": 0.2354, "step": 6692000 }, { "epoch": 4.01, "learning_rate": 2.7945253897836076e-05, "loss": 0.2346, "step": 6692500 }, { "epoch": 4.01, "learning_rate": 2.794315393227551e-05, "loss": 0.2445, "step": 6693000 }, { "epoch": 4.01, "learning_rate": 2.7941053966714946e-05, "loss": 0.2417, "step": 6693500 }, { "epoch": 4.01, "learning_rate": 2.793895400115438e-05, "loss": 0.2389, "step": 6694000 }, { "epoch": 4.01, "learning_rate": 2.7936854035593816e-05, "loss": 0.2388, "step": 6694500 }, { "epoch": 4.01, "learning_rate": 2.7934758269964373e-05, "loss": 0.2387, "step": 6695000 }, { "epoch": 4.01, "learning_rate": 2.7932658304403807e-05, "loss": 0.2449, "step": 6695500 }, { "epoch": 4.01, "learning_rate": 2.7930562538774364e-05, "loss": 0.2412, "step": 6696000 }, { "epoch": 4.01, "learning_rate": 2.7928462573213797e-05, "loss": 0.2411, "step": 6696500 }, { "epoch": 4.02, "learning_rate": 2.7926362607653234e-05, "loss": 0.2367, "step": 6697000 }, { "epoch": 4.02, "learning_rate": 2.792426264209267e-05, "loss": 0.2349, "step": 6697500 }, { "epoch": 4.02, "learning_rate": 2.7922162676532104e-05, "loss": 0.2383, "step": 6698000 }, { "epoch": 4.02, "learning_rate": 2.792006271097154e-05, "loss": 0.2435, "step": 6698500 }, { "epoch": 4.02, "learning_rate": 2.7917962745410975e-05, "loss": 0.2422, "step": 6699000 }, { "epoch": 4.02, "learning_rate": 2.7915862779850408e-05, "loss": 0.2423, "step": 6699500 }, { "epoch": 4.02, "learning_rate": 2.791377121415209e-05, "loss": 0.2381, "step": 6700000 }, { "epoch": 4.02, "eval_loss": 0.22862936556339264, "eval_runtime": 1462.8676, "eval_samples_per_second": 360.06, "eval_steps_per_second": 60.01, "step": 6700000 }, { "epoch": 4.02, "learning_rate": 2.7911671248591525e-05, "loss": 0.2376, "step": 6700500 }, { "epoch": 4.02, "learning_rate": 2.790957548296208e-05, "loss": 0.2457, "step": 6701000 }, { "epoch": 4.02, "learning_rate": 2.7907475517401512e-05, "loss": 0.2445, "step": 6701500 }, { "epoch": 4.02, "learning_rate": 2.790537555184095e-05, "loss": 0.2392, "step": 6702000 }, { "epoch": 4.02, "learning_rate": 2.7903275586280386e-05, "loss": 0.2514, "step": 6702500 }, { "epoch": 4.02, "learning_rate": 2.7901175620719823e-05, "loss": 0.2372, "step": 6703000 }, { "epoch": 4.02, "learning_rate": 2.7899075655159253e-05, "loss": 0.2363, "step": 6703500 }, { "epoch": 4.02, "learning_rate": 2.789697568959869e-05, "loss": 0.2435, "step": 6704000 }, { "epoch": 4.02, "learning_rate": 2.7894879923969247e-05, "loss": 0.2456, "step": 6704500 }, { "epoch": 4.02, "learning_rate": 2.7892779958408684e-05, "loss": 0.2311, "step": 6705000 }, { "epoch": 4.02, "learning_rate": 2.7890679992848117e-05, "loss": 0.2421, "step": 6705500 }, { "epoch": 4.02, "learning_rate": 2.788858002728755e-05, "loss": 0.2462, "step": 6706000 }, { "epoch": 4.02, "learning_rate": 2.7886480061726987e-05, "loss": 0.2399, "step": 6706500 }, { "epoch": 4.02, "learning_rate": 2.7884380096166424e-05, "loss": 0.2419, "step": 6707000 }, { "epoch": 4.02, "learning_rate": 2.7882280130605858e-05, "loss": 0.2422, "step": 6707500 }, { "epoch": 4.02, "learning_rate": 2.7880180165045295e-05, "loss": 0.2408, "step": 6708000 }, { "epoch": 4.02, "learning_rate": 2.7878080199484728e-05, "loss": 0.2431, "step": 6708500 }, { "epoch": 4.02, "learning_rate": 2.787598023392416e-05, "loss": 0.2455, "step": 6709000 }, { "epoch": 4.02, "learning_rate": 2.7873880268363598e-05, "loss": 0.2359, "step": 6709500 }, { "epoch": 4.02, "learning_rate": 2.7871780302803035e-05, "loss": 0.2435, "step": 6710000 }, { "epoch": 4.02, "learning_rate": 2.7869684537173592e-05, "loss": 0.2419, "step": 6710500 }, { "epoch": 4.02, "learning_rate": 2.7867584571613026e-05, "loss": 0.2379, "step": 6711000 }, { "epoch": 4.02, "learning_rate": 2.786548460605246e-05, "loss": 0.2409, "step": 6711500 }, { "epoch": 4.02, "learning_rate": 2.7863384640491896e-05, "loss": 0.2364, "step": 6712000 }, { "epoch": 4.02, "learning_rate": 2.7861288874862453e-05, "loss": 0.2435, "step": 6712500 }, { "epoch": 4.02, "learning_rate": 2.785918890930189e-05, "loss": 0.2406, "step": 6713000 }, { "epoch": 4.03, "learning_rate": 2.785708894374132e-05, "loss": 0.2417, "step": 6713500 }, { "epoch": 4.03, "learning_rate": 2.7854988978180757e-05, "loss": 0.245, "step": 6714000 }, { "epoch": 4.03, "learning_rate": 2.7852893212551314e-05, "loss": 0.2426, "step": 6714500 }, { "epoch": 4.03, "learning_rate": 2.785079324699075e-05, "loss": 0.241, "step": 6715000 }, { "epoch": 4.03, "learning_rate": 2.7848693281430184e-05, "loss": 0.2452, "step": 6715500 }, { "epoch": 4.03, "learning_rate": 2.7846593315869617e-05, "loss": 0.239, "step": 6716000 }, { "epoch": 4.03, "learning_rate": 2.7844497550240174e-05, "loss": 0.2432, "step": 6716500 }, { "epoch": 4.03, "learning_rate": 2.784239758467961e-05, "loss": 0.2404, "step": 6717000 }, { "epoch": 4.03, "learning_rate": 2.7840297619119048e-05, "loss": 0.2393, "step": 6717500 }, { "epoch": 4.03, "learning_rate": 2.783819765355848e-05, "loss": 0.241, "step": 6718000 }, { "epoch": 4.03, "learning_rate": 2.783610188792904e-05, "loss": 0.2381, "step": 6718500 }, { "epoch": 4.03, "learning_rate": 2.7834001922368472e-05, "loss": 0.2463, "step": 6719000 }, { "epoch": 4.03, "learning_rate": 2.783190195680791e-05, "loss": 0.241, "step": 6719500 }, { "epoch": 4.03, "learning_rate": 2.7829801991247346e-05, "loss": 0.2434, "step": 6720000 }, { "epoch": 4.03, "learning_rate": 2.7827702025686776e-05, "loss": 0.2447, "step": 6720500 }, { "epoch": 4.03, "learning_rate": 2.7825602060126212e-05, "loss": 0.235, "step": 6721000 }, { "epoch": 4.03, "learning_rate": 2.782350209456565e-05, "loss": 0.2463, "step": 6721500 }, { "epoch": 4.03, "learning_rate": 2.7821402129005083e-05, "loss": 0.2399, "step": 6722000 }, { "epoch": 4.03, "learning_rate": 2.7819306363375643e-05, "loss": 0.2514, "step": 6722500 }, { "epoch": 4.03, "learning_rate": 2.7817206397815073e-05, "loss": 0.2424, "step": 6723000 }, { "epoch": 4.03, "learning_rate": 2.781510643225451e-05, "loss": 0.2411, "step": 6723500 }, { "epoch": 4.03, "learning_rate": 2.7813006466693947e-05, "loss": 0.2395, "step": 6724000 }, { "epoch": 4.03, "learning_rate": 2.7810910701064504e-05, "loss": 0.2401, "step": 6724500 }, { "epoch": 4.03, "learning_rate": 2.7808810735503937e-05, "loss": 0.2405, "step": 6725000 }, { "epoch": 4.03, "learning_rate": 2.780671076994337e-05, "loss": 0.2377, "step": 6725500 }, { "epoch": 4.03, "learning_rate": 2.7804610804382808e-05, "loss": 0.2446, "step": 6726000 }, { "epoch": 4.03, "learning_rate": 2.7802515038753365e-05, "loss": 0.2458, "step": 6726500 }, { "epoch": 4.03, "learning_rate": 2.78004150731928e-05, "loss": 0.2399, "step": 6727000 }, { "epoch": 4.03, "learning_rate": 2.779831510763223e-05, "loss": 0.237, "step": 6727500 }, { "epoch": 4.03, "learning_rate": 2.7796219342002792e-05, "loss": 0.2413, "step": 6728000 }, { "epoch": 4.03, "learning_rate": 2.7794119376442225e-05, "loss": 0.244, "step": 6728500 }, { "epoch": 4.03, "learning_rate": 2.7792019410881662e-05, "loss": 0.2407, "step": 6729000 }, { "epoch": 4.03, "learning_rate": 2.77899194453211e-05, "loss": 0.2422, "step": 6729500 }, { "epoch": 4.03, "learning_rate": 2.778781947976053e-05, "loss": 0.2435, "step": 6730000 }, { "epoch": 4.04, "learning_rate": 2.7785719514199966e-05, "loss": 0.2389, "step": 6730500 }, { "epoch": 4.04, "learning_rate": 2.7783619548639403e-05, "loss": 0.2385, "step": 6731000 }, { "epoch": 4.04, "learning_rate": 2.7781519583078836e-05, "loss": 0.2391, "step": 6731500 }, { "epoch": 4.04, "learning_rate": 2.7779423817449393e-05, "loss": 0.2385, "step": 6732000 }, { "epoch": 4.04, "learning_rate": 2.7777323851888827e-05, "loss": 0.2385, "step": 6732500 }, { "epoch": 4.04, "learning_rate": 2.7775223886328263e-05, "loss": 0.2389, "step": 6733000 }, { "epoch": 4.04, "learning_rate": 2.77731239207677e-05, "loss": 0.2407, "step": 6733500 }, { "epoch": 4.04, "learning_rate": 2.7771032355069377e-05, "loss": 0.2452, "step": 6734000 }, { "epoch": 4.04, "learning_rate": 2.776893238950881e-05, "loss": 0.2405, "step": 6734500 }, { "epoch": 4.04, "learning_rate": 2.7766832423948248e-05, "loss": 0.2457, "step": 6735000 }, { "epoch": 4.04, "learning_rate": 2.776473245838768e-05, "loss": 0.2477, "step": 6735500 }, { "epoch": 4.04, "learning_rate": 2.7762632492827118e-05, "loss": 0.246, "step": 6736000 }, { "epoch": 4.04, "learning_rate": 2.7760532527266555e-05, "loss": 0.2389, "step": 6736500 }, { "epoch": 4.04, "learning_rate": 2.775843676163711e-05, "loss": 0.2421, "step": 6737000 }, { "epoch": 4.04, "learning_rate": 2.7756336796076542e-05, "loss": 0.2432, "step": 6737500 }, { "epoch": 4.04, "learning_rate": 2.775423683051598e-05, "loss": 0.244, "step": 6738000 }, { "epoch": 4.04, "learning_rate": 2.7752136864955416e-05, "loss": 0.2434, "step": 6738500 }, { "epoch": 4.04, "learning_rate": 2.7750036899394852e-05, "loss": 0.2379, "step": 6739000 }, { "epoch": 4.04, "learning_rate": 2.7747936933834283e-05, "loss": 0.2345, "step": 6739500 }, { "epoch": 4.04, "learning_rate": 2.774583696827372e-05, "loss": 0.2411, "step": 6740000 }, { "epoch": 4.04, "learning_rate": 2.7743741202644276e-05, "loss": 0.2437, "step": 6740500 }, { "epoch": 4.04, "learning_rate": 2.7741641237083713e-05, "loss": 0.2354, "step": 6741000 }, { "epoch": 4.04, "learning_rate": 2.7739541271523147e-05, "loss": 0.2367, "step": 6741500 }, { "epoch": 4.04, "learning_rate": 2.773744130596258e-05, "loss": 0.2431, "step": 6742000 }, { "epoch": 4.04, "learning_rate": 2.7735341340402017e-05, "loss": 0.2398, "step": 6742500 }, { "epoch": 4.04, "learning_rate": 2.7733245574772574e-05, "loss": 0.2503, "step": 6743000 }, { "epoch": 4.04, "learning_rate": 2.773114560921201e-05, "loss": 0.2389, "step": 6743500 }, { "epoch": 4.04, "learning_rate": 2.7729045643651444e-05, "loss": 0.2445, "step": 6744000 }, { "epoch": 4.04, "learning_rate": 2.7726945678090878e-05, "loss": 0.2418, "step": 6744500 }, { "epoch": 4.04, "learning_rate": 2.7724845712530315e-05, "loss": 0.2383, "step": 6745000 }, { "epoch": 4.04, "learning_rate": 2.7722745746969748e-05, "loss": 0.2401, "step": 6745500 }, { "epoch": 4.04, "learning_rate": 2.7720645781409185e-05, "loss": 0.2437, "step": 6746000 }, { "epoch": 4.04, "learning_rate": 2.771854581584862e-05, "loss": 0.2349, "step": 6746500 }, { "epoch": 4.05, "learning_rate": 2.7716450050219175e-05, "loss": 0.2406, "step": 6747000 }, { "epoch": 4.05, "learning_rate": 2.7714350084658612e-05, "loss": 0.2496, "step": 6747500 }, { "epoch": 4.05, "learning_rate": 2.7712250119098046e-05, "loss": 0.2451, "step": 6748000 }, { "epoch": 4.05, "learning_rate": 2.7710150153537482e-05, "loss": 0.2415, "step": 6748500 }, { "epoch": 4.05, "learning_rate": 2.7708054387908036e-05, "loss": 0.2365, "step": 6749000 }, { "epoch": 4.05, "learning_rate": 2.7705954422347473e-05, "loss": 0.2353, "step": 6749500 }, { "epoch": 4.05, "learning_rate": 2.770385445678691e-05, "loss": 0.2388, "step": 6750000 }, { "epoch": 4.05, "learning_rate": 2.7701754491226343e-05, "loss": 0.2435, "step": 6750500 }, { "epoch": 4.05, "learning_rate": 2.76996587255969e-05, "loss": 0.2403, "step": 6751000 }, { "epoch": 4.05, "learning_rate": 2.7697558760036334e-05, "loss": 0.2385, "step": 6751500 }, { "epoch": 4.05, "learning_rate": 2.769545879447577e-05, "loss": 0.2399, "step": 6752000 }, { "epoch": 4.05, "learning_rate": 2.7693358828915204e-05, "loss": 0.2383, "step": 6752500 }, { "epoch": 4.05, "learning_rate": 2.7691263063285764e-05, "loss": 0.2407, "step": 6753000 }, { "epoch": 4.05, "learning_rate": 2.7689163097725198e-05, "loss": 0.2461, "step": 6753500 }, { "epoch": 4.05, "learning_rate": 2.768706313216463e-05, "loss": 0.2427, "step": 6754000 }, { "epoch": 4.05, "learning_rate": 2.7684967366535188e-05, "loss": 0.2419, "step": 6754500 }, { "epoch": 4.05, "learning_rate": 2.7682867400974625e-05, "loss": 0.24, "step": 6755000 }, { "epoch": 4.05, "learning_rate": 2.768076743541406e-05, "loss": 0.238, "step": 6755500 }, { "epoch": 4.05, "learning_rate": 2.7678667469853492e-05, "loss": 0.2444, "step": 6756000 }, { "epoch": 4.05, "learning_rate": 2.767656750429293e-05, "loss": 0.2477, "step": 6756500 }, { "epoch": 4.05, "learning_rate": 2.7674467538732366e-05, "loss": 0.2406, "step": 6757000 }, { "epoch": 4.05, "learning_rate": 2.7672375973034043e-05, "loss": 0.2413, "step": 6757500 }, { "epoch": 4.05, "learning_rate": 2.7670276007473476e-05, "loss": 0.2394, "step": 6758000 }, { "epoch": 4.05, "learning_rate": 2.7668176041912913e-05, "loss": 0.2448, "step": 6758500 }, { "epoch": 4.05, "learning_rate": 2.7666076076352346e-05, "loss": 0.2429, "step": 6759000 }, { "epoch": 4.05, "learning_rate": 2.7663976110791783e-05, "loss": 0.2409, "step": 6759500 }, { "epoch": 4.05, "learning_rate": 2.766187614523122e-05, "loss": 0.2453, "step": 6760000 }, { "epoch": 4.05, "learning_rate": 2.7659776179670654e-05, "loss": 0.2406, "step": 6760500 }, { "epoch": 4.05, "learning_rate": 2.7657676214110087e-05, "loss": 0.2458, "step": 6761000 }, { "epoch": 4.05, "learning_rate": 2.7655576248549524e-05, "loss": 0.2372, "step": 6761500 }, { "epoch": 4.05, "learning_rate": 2.7653476282988957e-05, "loss": 0.2417, "step": 6762000 }, { "epoch": 4.05, "learning_rate": 2.7651376317428394e-05, "loss": 0.2492, "step": 6762500 }, { "epoch": 4.05, "learning_rate": 2.764927635186783e-05, "loss": 0.2401, "step": 6763000 }, { "epoch": 4.05, "learning_rate": 2.7647180586238385e-05, "loss": 0.2409, "step": 6763500 }, { "epoch": 4.06, "learning_rate": 2.764508482060894e-05, "loss": 0.2404, "step": 6764000 }, { "epoch": 4.06, "learning_rate": 2.764298485504838e-05, "loss": 0.2439, "step": 6764500 }, { "epoch": 4.06, "learning_rate": 2.7640884889487812e-05, "loss": 0.2421, "step": 6765000 }, { "epoch": 4.06, "learning_rate": 2.7638784923927245e-05, "loss": 0.2436, "step": 6765500 }, { "epoch": 4.06, "learning_rate": 2.7636684958366682e-05, "loss": 0.2307, "step": 6766000 }, { "epoch": 4.06, "learning_rate": 2.7634584992806116e-05, "loss": 0.243, "step": 6766500 }, { "epoch": 4.06, "learning_rate": 2.7632485027245552e-05, "loss": 0.2403, "step": 6767000 }, { "epoch": 4.06, "learning_rate": 2.763038506168499e-05, "loss": 0.2341, "step": 6767500 }, { "epoch": 4.06, "learning_rate": 2.7628289296055543e-05, "loss": 0.238, "step": 6768000 }, { "epoch": 4.06, "learning_rate": 2.762618933049498e-05, "loss": 0.2405, "step": 6768500 }, { "epoch": 4.06, "learning_rate": 2.7624089364934413e-05, "loss": 0.2433, "step": 6769000 }, { "epoch": 4.06, "learning_rate": 2.762198939937385e-05, "loss": 0.245, "step": 6769500 }, { "epoch": 4.06, "learning_rate": 2.7619893633744407e-05, "loss": 0.2414, "step": 6770000 }, { "epoch": 4.06, "learning_rate": 2.761779366818384e-05, "loss": 0.2356, "step": 6770500 }, { "epoch": 4.06, "learning_rate": 2.7615693702623277e-05, "loss": 0.2427, "step": 6771000 }, { "epoch": 4.06, "learning_rate": 2.761359373706271e-05, "loss": 0.2414, "step": 6771500 }, { "epoch": 4.06, "learning_rate": 2.7611497971433268e-05, "loss": 0.242, "step": 6772000 }, { "epoch": 4.06, "learning_rate": 2.7609398005872705e-05, "loss": 0.2405, "step": 6772500 }, { "epoch": 4.06, "learning_rate": 2.7607298040312138e-05, "loss": 0.2366, "step": 6773000 }, { "epoch": 4.06, "learning_rate": 2.760519807475157e-05, "loss": 0.2417, "step": 6773500 }, { "epoch": 4.06, "learning_rate": 2.7603102309122132e-05, "loss": 0.2388, "step": 6774000 }, { "epoch": 4.06, "learning_rate": 2.760100654349269e-05, "loss": 0.2426, "step": 6774500 }, { "epoch": 4.06, "learning_rate": 2.759890657793212e-05, "loss": 0.2427, "step": 6775000 }, { "epoch": 4.06, "learning_rate": 2.759681081230268e-05, "loss": 0.2407, "step": 6775500 }, { "epoch": 4.06, "learning_rate": 2.7594710846742113e-05, "loss": 0.247, "step": 6776000 }, { "epoch": 4.06, "learning_rate": 2.759261088118155e-05, "loss": 0.2419, "step": 6776500 }, { "epoch": 4.06, "learning_rate": 2.7590510915620986e-05, "loss": 0.2394, "step": 6777000 }, { "epoch": 4.06, "learning_rate": 2.7588410950060416e-05, "loss": 0.2408, "step": 6777500 }, { "epoch": 4.06, "learning_rate": 2.7586310984499853e-05, "loss": 0.2433, "step": 6778000 }, { "epoch": 4.06, "learning_rate": 2.758421101893929e-05, "loss": 0.2409, "step": 6778500 }, { "epoch": 4.06, "learning_rate": 2.7582111053378724e-05, "loss": 0.2447, "step": 6779000 }, { "epoch": 4.06, "learning_rate": 2.758001528774928e-05, "loss": 0.2342, "step": 6779500 }, { "epoch": 4.06, "learning_rate": 2.7577915322188714e-05, "loss": 0.2381, "step": 6780000 }, { "epoch": 4.07, "learning_rate": 2.757581535662815e-05, "loss": 0.239, "step": 6780500 }, { "epoch": 4.07, "learning_rate": 2.7573715391067588e-05, "loss": 0.2438, "step": 6781000 }, { "epoch": 4.07, "learning_rate": 2.757161542550702e-05, "loss": 0.2414, "step": 6781500 }, { "epoch": 4.07, "learning_rate": 2.7569515459946458e-05, "loss": 0.2409, "step": 6782000 }, { "epoch": 4.07, "learning_rate": 2.756741549438589e-05, "loss": 0.2421, "step": 6782500 }, { "epoch": 4.07, "learning_rate": 2.7565315528825325e-05, "loss": 0.2425, "step": 6783000 }, { "epoch": 4.07, "learning_rate": 2.7563219763195882e-05, "loss": 0.238, "step": 6783500 }, { "epoch": 4.07, "learning_rate": 2.7561123997566442e-05, "loss": 0.2491, "step": 6784000 }, { "epoch": 4.07, "learning_rate": 2.7559024032005872e-05, "loss": 0.2382, "step": 6784500 }, { "epoch": 4.07, "learning_rate": 2.755692406644531e-05, "loss": 0.2461, "step": 6785000 }, { "epoch": 4.07, "learning_rate": 2.7554824100884746e-05, "loss": 0.2403, "step": 6785500 }, { "epoch": 4.07, "learning_rate": 2.755272413532418e-05, "loss": 0.2392, "step": 6786000 }, { "epoch": 4.07, "learning_rate": 2.7550624169763616e-05, "loss": 0.2455, "step": 6786500 }, { "epoch": 4.07, "learning_rate": 2.754852420420305e-05, "loss": 0.2396, "step": 6787000 }, { "epoch": 4.07, "learning_rate": 2.7546424238642487e-05, "loss": 0.2381, "step": 6787500 }, { "epoch": 4.07, "learning_rate": 2.7544328473013044e-05, "loss": 0.241, "step": 6788000 }, { "epoch": 4.07, "learning_rate": 2.7542228507452477e-05, "loss": 0.2395, "step": 6788500 }, { "epoch": 4.07, "learning_rate": 2.7540128541891914e-05, "loss": 0.2401, "step": 6789000 }, { "epoch": 4.07, "learning_rate": 2.7538028576331347e-05, "loss": 0.2432, "step": 6789500 }, { "epoch": 4.07, "learning_rate": 2.7535932810701904e-05, "loss": 0.2445, "step": 6790000 }, { "epoch": 4.07, "learning_rate": 2.753383284514134e-05, "loss": 0.2394, "step": 6790500 }, { "epoch": 4.07, "learning_rate": 2.7531732879580775e-05, "loss": 0.2387, "step": 6791000 }, { "epoch": 4.07, "learning_rate": 2.752963291402021e-05, "loss": 0.2401, "step": 6791500 }, { "epoch": 4.07, "learning_rate": 2.7527537148390765e-05, "loss": 0.2409, "step": 6792000 }, { "epoch": 4.07, "learning_rate": 2.7525437182830202e-05, "loss": 0.2422, "step": 6792500 }, { "epoch": 4.07, "learning_rate": 2.752334141720076e-05, "loss": 0.2425, "step": 6793000 }, { "epoch": 4.07, "learning_rate": 2.7521241451640196e-05, "loss": 0.2467, "step": 6793500 }, { "epoch": 4.07, "learning_rate": 2.7519141486079626e-05, "loss": 0.2374, "step": 6794000 }, { "epoch": 4.07, "learning_rate": 2.7517041520519063e-05, "loss": 0.2483, "step": 6794500 }, { "epoch": 4.07, "learning_rate": 2.75149415549585e-05, "loss": 0.2416, "step": 6795000 }, { "epoch": 4.07, "learning_rate": 2.7512841589397933e-05, "loss": 0.2414, "step": 6795500 }, { "epoch": 4.07, "learning_rate": 2.751074582376849e-05, "loss": 0.2409, "step": 6796000 }, { "epoch": 4.07, "learning_rate": 2.7508645858207923e-05, "loss": 0.2407, "step": 6796500 }, { "epoch": 4.08, "learning_rate": 2.750654589264736e-05, "loss": 0.241, "step": 6797000 }, { "epoch": 4.08, "learning_rate": 2.7504445927086797e-05, "loss": 0.2508, "step": 6797500 }, { "epoch": 4.08, "learning_rate": 2.750234596152623e-05, "loss": 0.2419, "step": 6798000 }, { "epoch": 4.08, "learning_rate": 2.7500245995965667e-05, "loss": 0.246, "step": 6798500 }, { "epoch": 4.08, "learning_rate": 2.74981460304051e-05, "loss": 0.239, "step": 6799000 }, { "epoch": 4.08, "learning_rate": 2.7496050264775658e-05, "loss": 0.2449, "step": 6799500 }, { "epoch": 4.08, "learning_rate": 2.749395029921509e-05, "loss": 0.2374, "step": 6800000 }, { "epoch": 4.08, "eval_loss": 0.22762024402618408, "eval_runtime": 1458.522, "eval_samples_per_second": 361.133, "eval_steps_per_second": 60.189, "step": 6800000 }, { "epoch": 4.08, "learning_rate": 2.7491850333654528e-05, "loss": 0.2412, "step": 6800500 }, { "epoch": 4.08, "learning_rate": 2.7489750368093965e-05, "loss": 0.2408, "step": 6801000 }, { "epoch": 4.08, "learning_rate": 2.74876504025334e-05, "loss": 0.2446, "step": 6801500 }, { "epoch": 4.08, "learning_rate": 2.7485550436972832e-05, "loss": 0.2395, "step": 6802000 }, { "epoch": 4.08, "learning_rate": 2.748345047141227e-05, "loss": 0.2432, "step": 6802500 }, { "epoch": 4.08, "learning_rate": 2.7481354705782826e-05, "loss": 0.2444, "step": 6803000 }, { "epoch": 4.08, "learning_rate": 2.7479254740222262e-05, "loss": 0.2385, "step": 6803500 }, { "epoch": 4.08, "learning_rate": 2.7477154774661693e-05, "loss": 0.2417, "step": 6804000 }, { "epoch": 4.08, "learning_rate": 2.747505480910113e-05, "loss": 0.2413, "step": 6804500 }, { "epoch": 4.08, "learning_rate": 2.7472959043471686e-05, "loss": 0.2446, "step": 6805000 }, { "epoch": 4.08, "learning_rate": 2.7470859077911123e-05, "loss": 0.2398, "step": 6805500 }, { "epoch": 4.08, "learning_rate": 2.746875911235056e-05, "loss": 0.2373, "step": 6806000 }, { "epoch": 4.08, "learning_rate": 2.746665914678999e-05, "loss": 0.2391, "step": 6806500 }, { "epoch": 4.08, "learning_rate": 2.7464559181229427e-05, "loss": 0.2374, "step": 6807000 }, { "epoch": 4.08, "learning_rate": 2.7462459215668864e-05, "loss": 0.2385, "step": 6807500 }, { "epoch": 4.08, "learning_rate": 2.7460359250108297e-05, "loss": 0.2462, "step": 6808000 }, { "epoch": 4.08, "learning_rate": 2.7458259284547734e-05, "loss": 0.236, "step": 6808500 }, { "epoch": 4.08, "learning_rate": 2.7456163518918288e-05, "loss": 0.2407, "step": 6809000 }, { "epoch": 4.08, "learning_rate": 2.7454063553357725e-05, "loss": 0.2401, "step": 6809500 }, { "epoch": 4.08, "learning_rate": 2.745196358779716e-05, "loss": 0.2439, "step": 6810000 }, { "epoch": 4.08, "learning_rate": 2.7449863622236595e-05, "loss": 0.2345, "step": 6810500 }, { "epoch": 4.08, "learning_rate": 2.744776785660715e-05, "loss": 0.2385, "step": 6811000 }, { "epoch": 4.08, "learning_rate": 2.7445667891046585e-05, "loss": 0.2416, "step": 6811500 }, { "epoch": 4.08, "learning_rate": 2.7443567925486022e-05, "loss": 0.2445, "step": 6812000 }, { "epoch": 4.08, "learning_rate": 2.7441467959925456e-05, "loss": 0.2357, "step": 6812500 }, { "epoch": 4.08, "learning_rate": 2.7439372194296016e-05, "loss": 0.2408, "step": 6813000 }, { "epoch": 4.08, "learning_rate": 2.7437272228735446e-05, "loss": 0.2398, "step": 6813500 }, { "epoch": 4.09, "learning_rate": 2.7435172263174883e-05, "loss": 0.2417, "step": 6814000 }, { "epoch": 4.09, "learning_rate": 2.743307229761432e-05, "loss": 0.244, "step": 6814500 }, { "epoch": 4.09, "learning_rate": 2.7430976531984877e-05, "loss": 0.2475, "step": 6815000 }, { "epoch": 4.09, "learning_rate": 2.742887656642431e-05, "loss": 0.2329, "step": 6815500 }, { "epoch": 4.09, "learning_rate": 2.7426776600863744e-05, "loss": 0.2444, "step": 6816000 }, { "epoch": 4.09, "learning_rate": 2.742467663530318e-05, "loss": 0.2428, "step": 6816500 }, { "epoch": 4.09, "learning_rate": 2.7422585069604858e-05, "loss": 0.245, "step": 6817000 }, { "epoch": 4.09, "learning_rate": 2.7420485104044294e-05, "loss": 0.2409, "step": 6817500 }, { "epoch": 4.09, "learning_rate": 2.7418385138483728e-05, "loss": 0.2391, "step": 6818000 }, { "epoch": 4.09, "learning_rate": 2.7416285172923165e-05, "loss": 0.2423, "step": 6818500 }, { "epoch": 4.09, "learning_rate": 2.7414185207362598e-05, "loss": 0.2403, "step": 6819000 }, { "epoch": 4.09, "learning_rate": 2.7412085241802035e-05, "loss": 0.2366, "step": 6819500 }, { "epoch": 4.09, "learning_rate": 2.7409985276241472e-05, "loss": 0.2387, "step": 6820000 }, { "epoch": 4.09, "learning_rate": 2.7407885310680902e-05, "loss": 0.2451, "step": 6820500 }, { "epoch": 4.09, "learning_rate": 2.740578954505146e-05, "loss": 0.2412, "step": 6821000 }, { "epoch": 4.09, "learning_rate": 2.7403689579490896e-05, "loss": 0.247, "step": 6821500 }, { "epoch": 4.09, "learning_rate": 2.7401589613930333e-05, "loss": 0.237, "step": 6822000 }, { "epoch": 4.09, "learning_rate": 2.7399489648369766e-05, "loss": 0.2458, "step": 6822500 }, { "epoch": 4.09, "learning_rate": 2.7397393882740323e-05, "loss": 0.2402, "step": 6823000 }, { "epoch": 4.09, "learning_rate": 2.7395293917179756e-05, "loss": 0.2487, "step": 6823500 }, { "epoch": 4.09, "learning_rate": 2.7393193951619193e-05, "loss": 0.2383, "step": 6824000 }, { "epoch": 4.09, "learning_rate": 2.739109398605863e-05, "loss": 0.244, "step": 6824500 }, { "epoch": 4.09, "learning_rate": 2.7388998220429184e-05, "loss": 0.2395, "step": 6825000 }, { "epoch": 4.09, "learning_rate": 2.738689825486862e-05, "loss": 0.2419, "step": 6825500 }, { "epoch": 4.09, "learning_rate": 2.7384798289308054e-05, "loss": 0.2397, "step": 6826000 }, { "epoch": 4.09, "learning_rate": 2.738269832374749e-05, "loss": 0.2452, "step": 6826500 }, { "epoch": 4.09, "learning_rate": 2.7380602558118048e-05, "loss": 0.2378, "step": 6827000 }, { "epoch": 4.09, "learning_rate": 2.73785067924886e-05, "loss": 0.2425, "step": 6827500 }, { "epoch": 4.09, "learning_rate": 2.7376406826928038e-05, "loss": 0.2411, "step": 6828000 }, { "epoch": 4.09, "learning_rate": 2.7374311061298595e-05, "loss": 0.2548, "step": 6828500 }, { "epoch": 4.09, "learning_rate": 2.7372211095738032e-05, "loss": 0.2414, "step": 6829000 }, { "epoch": 4.09, "learning_rate": 2.7370111130177462e-05, "loss": 0.2442, "step": 6829500 }, { "epoch": 4.09, "learning_rate": 2.73680111646169e-05, "loss": 0.2485, "step": 6830000 }, { "epoch": 4.1, "learning_rate": 2.7365911199056336e-05, "loss": 0.2391, "step": 6830500 }, { "epoch": 4.1, "learning_rate": 2.736381123349577e-05, "loss": 0.2439, "step": 6831000 }, { "epoch": 4.1, "learning_rate": 2.7361711267935206e-05, "loss": 0.2458, "step": 6831500 }, { "epoch": 4.1, "learning_rate": 2.735961130237464e-05, "loss": 0.238, "step": 6832000 }, { "epoch": 4.1, "learning_rate": 2.7357511336814076e-05, "loss": 0.2444, "step": 6832500 }, { "epoch": 4.1, "learning_rate": 2.735541137125351e-05, "loss": 0.2432, "step": 6833000 }, { "epoch": 4.1, "learning_rate": 2.7353311405692947e-05, "loss": 0.2357, "step": 6833500 }, { "epoch": 4.1, "learning_rate": 2.7351211440132384e-05, "loss": 0.2418, "step": 6834000 }, { "epoch": 4.1, "learning_rate": 2.7349115674502937e-05, "loss": 0.2408, "step": 6834500 }, { "epoch": 4.1, "learning_rate": 2.734701570894237e-05, "loss": 0.2355, "step": 6835000 }, { "epoch": 4.1, "learning_rate": 2.7344915743381807e-05, "loss": 0.2475, "step": 6835500 }, { "epoch": 4.1, "learning_rate": 2.7342815777821244e-05, "loss": 0.2428, "step": 6836000 }, { "epoch": 4.1, "learning_rate": 2.73407200121918e-05, "loss": 0.2477, "step": 6836500 }, { "epoch": 4.1, "learning_rate": 2.7338620046631235e-05, "loss": 0.2441, "step": 6837000 }, { "epoch": 4.1, "learning_rate": 2.7336520081070668e-05, "loss": 0.2416, "step": 6837500 }, { "epoch": 4.1, "learning_rate": 2.7334420115510105e-05, "loss": 0.2416, "step": 6838000 }, { "epoch": 4.1, "learning_rate": 2.7332324349880662e-05, "loss": 0.2495, "step": 6838500 }, { "epoch": 4.1, "learning_rate": 2.7330224384320095e-05, "loss": 0.2452, "step": 6839000 }, { "epoch": 4.1, "learning_rate": 2.7328124418759532e-05, "loss": 0.2403, "step": 6839500 }, { "epoch": 4.1, "learning_rate": 2.7326024453198966e-05, "loss": 0.2397, "step": 6840000 }, { "epoch": 4.1, "learning_rate": 2.7323928687569523e-05, "loss": 0.2454, "step": 6840500 }, { "epoch": 4.1, "learning_rate": 2.732182872200896e-05, "loss": 0.2537, "step": 6841000 }, { "epoch": 4.1, "learning_rate": 2.7319728756448393e-05, "loss": 0.2353, "step": 6841500 }, { "epoch": 4.1, "learning_rate": 2.7317628790887826e-05, "loss": 0.2367, "step": 6842000 }, { "epoch": 4.1, "learning_rate": 2.7315537225189507e-05, "loss": 0.2441, "step": 6842500 }, { "epoch": 4.1, "learning_rate": 2.7313437259628944e-05, "loss": 0.2426, "step": 6843000 }, { "epoch": 4.1, "learning_rate": 2.7311337294068374e-05, "loss": 0.2412, "step": 6843500 }, { "epoch": 4.1, "learning_rate": 2.730923732850781e-05, "loss": 0.2436, "step": 6844000 }, { "epoch": 4.1, "learning_rate": 2.7307137362947248e-05, "loss": 0.2387, "step": 6844500 }, { "epoch": 4.1, "learning_rate": 2.730503739738668e-05, "loss": 0.2421, "step": 6845000 }, { "epoch": 4.1, "learning_rate": 2.7302937431826118e-05, "loss": 0.2453, "step": 6845500 }, { "epoch": 4.1, "learning_rate": 2.7300837466265555e-05, "loss": 0.2471, "step": 6846000 }, { "epoch": 4.1, "learning_rate": 2.7298741700636108e-05, "loss": 0.243, "step": 6846500 }, { "epoch": 4.11, "learning_rate": 2.7296641735075545e-05, "loss": 0.243, "step": 6847000 }, { "epoch": 4.11, "learning_rate": 2.729454176951498e-05, "loss": 0.2482, "step": 6847500 }, { "epoch": 4.11, "learning_rate": 2.7292441803954415e-05, "loss": 0.2449, "step": 6848000 }, { "epoch": 4.11, "learning_rate": 2.729034603832497e-05, "loss": 0.2444, "step": 6848500 }, { "epoch": 4.11, "learning_rate": 2.7288246072764406e-05, "loss": 0.2446, "step": 6849000 }, { "epoch": 4.11, "learning_rate": 2.7286146107203843e-05, "loss": 0.2405, "step": 6849500 }, { "epoch": 4.11, "learning_rate": 2.7284046141643276e-05, "loss": 0.2445, "step": 6850000 }, { "epoch": 4.11, "learning_rate": 2.7281950376013833e-05, "loss": 0.2413, "step": 6850500 }, { "epoch": 4.11, "learning_rate": 2.7279850410453267e-05, "loss": 0.2442, "step": 6851000 }, { "epoch": 4.11, "learning_rate": 2.7277750444892703e-05, "loss": 0.2405, "step": 6851500 }, { "epoch": 4.11, "learning_rate": 2.727565047933214e-05, "loss": 0.2399, "step": 6852000 }, { "epoch": 4.11, "learning_rate": 2.7273554713702697e-05, "loss": 0.2416, "step": 6852500 }, { "epoch": 4.11, "learning_rate": 2.7271454748142127e-05, "loss": 0.2445, "step": 6853000 }, { "epoch": 4.11, "learning_rate": 2.7269354782581564e-05, "loss": 0.2396, "step": 6853500 }, { "epoch": 4.11, "learning_rate": 2.7267254817021e-05, "loss": 0.2398, "step": 6854000 }, { "epoch": 4.11, "learning_rate": 2.7265159051391558e-05, "loss": 0.2402, "step": 6854500 }, { "epoch": 4.11, "learning_rate": 2.7263059085830995e-05, "loss": 0.2397, "step": 6855000 }, { "epoch": 4.11, "learning_rate": 2.7260959120270425e-05, "loss": 0.2408, "step": 6855500 }, { "epoch": 4.11, "learning_rate": 2.7258859154709862e-05, "loss": 0.2495, "step": 6856000 }, { "epoch": 4.11, "learning_rate": 2.725676338908042e-05, "loss": 0.2439, "step": 6856500 }, { "epoch": 4.11, "learning_rate": 2.7254663423519856e-05, "loss": 0.2412, "step": 6857000 }, { "epoch": 4.11, "learning_rate": 2.725256345795929e-05, "loss": 0.2404, "step": 6857500 }, { "epoch": 4.11, "learning_rate": 2.7250463492398722e-05, "loss": 0.2396, "step": 6858000 }, { "epoch": 4.11, "learning_rate": 2.724836772676928e-05, "loss": 0.2397, "step": 6858500 }, { "epoch": 4.11, "learning_rate": 2.7246271961139836e-05, "loss": 0.2409, "step": 6859000 }, { "epoch": 4.11, "learning_rate": 2.7244171995579273e-05, "loss": 0.2408, "step": 6859500 }, { "epoch": 4.11, "learning_rate": 2.7242072030018707e-05, "loss": 0.2456, "step": 6860000 }, { "epoch": 4.11, "learning_rate": 2.723997206445814e-05, "loss": 0.2422, "step": 6860500 }, { "epoch": 4.11, "learning_rate": 2.7237872098897577e-05, "loss": 0.2414, "step": 6861000 }, { "epoch": 4.11, "learning_rate": 2.7235772133337014e-05, "loss": 0.2414, "step": 6861500 }, { "epoch": 4.11, "learning_rate": 2.723367216777645e-05, "loss": 0.2385, "step": 6862000 }, { "epoch": 4.11, "learning_rate": 2.723157220221588e-05, "loss": 0.2407, "step": 6862500 }, { "epoch": 4.11, "learning_rate": 2.7229476436586438e-05, "loss": 0.2398, "step": 6863000 }, { "epoch": 4.11, "learning_rate": 2.7227376471025875e-05, "loss": 0.2416, "step": 6863500 }, { "epoch": 4.12, "learning_rate": 2.722527650546531e-05, "loss": 0.2388, "step": 6864000 }, { "epoch": 4.12, "learning_rate": 2.7223176539904745e-05, "loss": 0.2424, "step": 6864500 }, { "epoch": 4.12, "learning_rate": 2.7221080774275302e-05, "loss": 0.2456, "step": 6865000 }, { "epoch": 4.12, "learning_rate": 2.7218980808714735e-05, "loss": 0.2405, "step": 6865500 }, { "epoch": 4.12, "learning_rate": 2.7216880843154172e-05, "loss": 0.2429, "step": 6866000 }, { "epoch": 4.12, "learning_rate": 2.721478507752473e-05, "loss": 0.2476, "step": 6866500 }, { "epoch": 4.12, "learning_rate": 2.7212685111964163e-05, "loss": 0.2463, "step": 6867000 }, { "epoch": 4.12, "learning_rate": 2.7210585146403596e-05, "loss": 0.244, "step": 6867500 }, { "epoch": 4.12, "learning_rate": 2.7208485180843033e-05, "loss": 0.242, "step": 6868000 }, { "epoch": 4.12, "learning_rate": 2.720638521528247e-05, "loss": 0.2434, "step": 6868500 }, { "epoch": 4.12, "learning_rate": 2.7204285249721907e-05, "loss": 0.2403, "step": 6869000 }, { "epoch": 4.12, "learning_rate": 2.720218528416134e-05, "loss": 0.2439, "step": 6869500 }, { "epoch": 4.12, "learning_rate": 2.7200085318600774e-05, "loss": 0.2371, "step": 6870000 }, { "epoch": 4.12, "learning_rate": 2.719798955297133e-05, "loss": 0.2408, "step": 6870500 }, { "epoch": 4.12, "learning_rate": 2.7195889587410767e-05, "loss": 0.2331, "step": 6871000 }, { "epoch": 4.12, "learning_rate": 2.71937896218502e-05, "loss": 0.2445, "step": 6871500 }, { "epoch": 4.12, "learning_rate": 2.7191689656289634e-05, "loss": 0.2382, "step": 6872000 }, { "epoch": 4.12, "learning_rate": 2.718959389066019e-05, "loss": 0.2471, "step": 6872500 }, { "epoch": 4.12, "learning_rate": 2.7187493925099628e-05, "loss": 0.2415, "step": 6873000 }, { "epoch": 4.12, "learning_rate": 2.7185393959539065e-05, "loss": 0.2393, "step": 6873500 }, { "epoch": 4.12, "learning_rate": 2.71832939939785e-05, "loss": 0.2453, "step": 6874000 }, { "epoch": 4.12, "learning_rate": 2.7181194028417932e-05, "loss": 0.2463, "step": 6874500 }, { "epoch": 4.12, "learning_rate": 2.717909406285737e-05, "loss": 0.2396, "step": 6875000 }, { "epoch": 4.12, "learning_rate": 2.7176994097296802e-05, "loss": 0.2416, "step": 6875500 }, { "epoch": 4.12, "learning_rate": 2.7174898331667362e-05, "loss": 0.2447, "step": 6876000 }, { "epoch": 4.12, "learning_rate": 2.7172798366106796e-05, "loss": 0.2384, "step": 6876500 }, { "epoch": 4.12, "learning_rate": 2.717069840054623e-05, "loss": 0.2401, "step": 6877000 }, { "epoch": 4.12, "learning_rate": 2.7168598434985666e-05, "loss": 0.2466, "step": 6877500 }, { "epoch": 4.12, "learning_rate": 2.71664984694251e-05, "loss": 0.2377, "step": 6878000 }, { "epoch": 4.12, "learning_rate": 2.7164398503864537e-05, "loss": 0.2441, "step": 6878500 }, { "epoch": 4.12, "learning_rate": 2.7162298538303973e-05, "loss": 0.2406, "step": 6879000 }, { "epoch": 4.12, "learning_rate": 2.7160198572743407e-05, "loss": 0.2419, "step": 6879500 }, { "epoch": 4.12, "learning_rate": 2.7158102807113964e-05, "loss": 0.2452, "step": 6880000 }, { "epoch": 4.13, "learning_rate": 2.7156002841553397e-05, "loss": 0.244, "step": 6880500 }, { "epoch": 4.13, "learning_rate": 2.7153902875992834e-05, "loss": 0.2376, "step": 6881000 }, { "epoch": 4.13, "learning_rate": 2.715180291043227e-05, "loss": 0.2397, "step": 6881500 }, { "epoch": 4.13, "learning_rate": 2.7149707144802825e-05, "loss": 0.2389, "step": 6882000 }, { "epoch": 4.13, "learning_rate": 2.7147607179242258e-05, "loss": 0.2365, "step": 6882500 }, { "epoch": 4.13, "learning_rate": 2.7145507213681695e-05, "loss": 0.238, "step": 6883000 }, { "epoch": 4.13, "learning_rate": 2.7143407248121132e-05, "loss": 0.2406, "step": 6883500 }, { "epoch": 4.13, "learning_rate": 2.7141311482491685e-05, "loss": 0.2446, "step": 6884000 }, { "epoch": 4.13, "learning_rate": 2.7139211516931122e-05, "loss": 0.2423, "step": 6884500 }, { "epoch": 4.13, "learning_rate": 2.7137111551370556e-05, "loss": 0.2494, "step": 6885000 }, { "epoch": 4.13, "learning_rate": 2.7135011585809992e-05, "loss": 0.2435, "step": 6885500 }, { "epoch": 4.13, "learning_rate": 2.713291582018055e-05, "loss": 0.2418, "step": 6886000 }, { "epoch": 4.13, "learning_rate": 2.7130815854619983e-05, "loss": 0.2398, "step": 6886500 }, { "epoch": 4.13, "learning_rate": 2.712871588905942e-05, "loss": 0.2378, "step": 6887000 }, { "epoch": 4.13, "learning_rate": 2.7126615923498853e-05, "loss": 0.245, "step": 6887500 }, { "epoch": 4.13, "learning_rate": 2.712452015786941e-05, "loss": 0.2422, "step": 6888000 }, { "epoch": 4.13, "learning_rate": 2.7122420192308847e-05, "loss": 0.2416, "step": 6888500 }, { "epoch": 4.13, "learning_rate": 2.712032022674828e-05, "loss": 0.2381, "step": 6889000 }, { "epoch": 4.13, "learning_rate": 2.7118220261187714e-05, "loss": 0.2377, "step": 6889500 }, { "epoch": 4.13, "learning_rate": 2.7116124495558274e-05, "loss": 0.2412, "step": 6890000 }, { "epoch": 4.13, "learning_rate": 2.7114024529997708e-05, "loss": 0.2408, "step": 6890500 }, { "epoch": 4.13, "learning_rate": 2.7111924564437145e-05, "loss": 0.2385, "step": 6891000 }, { "epoch": 4.13, "learning_rate": 2.7109824598876578e-05, "loss": 0.2446, "step": 6891500 }, { "epoch": 4.13, "learning_rate": 2.7107728833247135e-05, "loss": 0.2419, "step": 6892000 }, { "epoch": 4.13, "learning_rate": 2.710562886768657e-05, "loss": 0.2441, "step": 6892500 }, { "epoch": 4.13, "learning_rate": 2.7103528902126005e-05, "loss": 0.2394, "step": 6893000 }, { "epoch": 4.13, "learning_rate": 2.710142893656544e-05, "loss": 0.2373, "step": 6893500 }, { "epoch": 4.13, "learning_rate": 2.7099333170935996e-05, "loss": 0.2418, "step": 6894000 }, { "epoch": 4.13, "learning_rate": 2.7097233205375433e-05, "loss": 0.2441, "step": 6894500 }, { "epoch": 4.13, "learning_rate": 2.7095133239814866e-05, "loss": 0.245, "step": 6895000 }, { "epoch": 4.13, "learning_rate": 2.7093033274254303e-05, "loss": 0.2415, "step": 6895500 }, { "epoch": 4.13, "learning_rate": 2.7090937508624856e-05, "loss": 0.2452, "step": 6896000 }, { "epoch": 4.13, "learning_rate": 2.7088837543064293e-05, "loss": 0.2423, "step": 6896500 }, { "epoch": 4.14, "learning_rate": 2.708673757750373e-05, "loss": 0.2418, "step": 6897000 }, { "epoch": 4.14, "learning_rate": 2.7084637611943164e-05, "loss": 0.239, "step": 6897500 }, { "epoch": 4.14, "learning_rate": 2.7082541846313717e-05, "loss": 0.2399, "step": 6898000 }, { "epoch": 4.14, "learning_rate": 2.7080441880753154e-05, "loss": 0.2429, "step": 6898500 }, { "epoch": 4.14, "learning_rate": 2.707834191519259e-05, "loss": 0.2356, "step": 6899000 }, { "epoch": 4.14, "learning_rate": 2.7076241949632024e-05, "loss": 0.247, "step": 6899500 }, { "epoch": 4.14, "learning_rate": 2.7074146184002585e-05, "loss": 0.239, "step": 6900000 }, { "epoch": 4.14, "eval_loss": 0.22723785042762756, "eval_runtime": 1511.6506, "eval_samples_per_second": 348.44, "eval_steps_per_second": 58.074, "step": 6900000 }, { "epoch": 4.14, "learning_rate": 2.7072046218442015e-05, "loss": 0.243, "step": 6900500 }, { "epoch": 4.14, "learning_rate": 2.706994625288145e-05, "loss": 0.2411, "step": 6901000 }, { "epoch": 4.14, "learning_rate": 2.706784628732089e-05, "loss": 0.2433, "step": 6901500 }, { "epoch": 4.14, "learning_rate": 2.7065750521691445e-05, "loss": 0.2451, "step": 6902000 }, { "epoch": 4.14, "learning_rate": 2.706365055613088e-05, "loss": 0.2453, "step": 6902500 }, { "epoch": 4.14, "learning_rate": 2.7061550590570312e-05, "loss": 0.2409, "step": 6903000 }, { "epoch": 4.14, "learning_rate": 2.705945062500975e-05, "loss": 0.2446, "step": 6903500 }, { "epoch": 4.14, "learning_rate": 2.7057354859380306e-05, "loss": 0.2494, "step": 6904000 }, { "epoch": 4.14, "learning_rate": 2.7055254893819743e-05, "loss": 0.2406, "step": 6904500 }, { "epoch": 4.14, "learning_rate": 2.7053154928259173e-05, "loss": 0.242, "step": 6905000 }, { "epoch": 4.14, "learning_rate": 2.7051059162629733e-05, "loss": 0.2411, "step": 6905500 }, { "epoch": 4.14, "learning_rate": 2.7048959197069167e-05, "loss": 0.2416, "step": 6906000 }, { "epoch": 4.14, "learning_rate": 2.7046859231508604e-05, "loss": 0.2384, "step": 6906500 }, { "epoch": 4.14, "learning_rate": 2.704475926594804e-05, "loss": 0.2362, "step": 6907000 }, { "epoch": 4.14, "learning_rate": 2.7042663500318594e-05, "loss": 0.2442, "step": 6907500 }, { "epoch": 4.14, "learning_rate": 2.7040563534758028e-05, "loss": 0.2385, "step": 6908000 }, { "epoch": 4.14, "learning_rate": 2.7038463569197464e-05, "loss": 0.2477, "step": 6908500 }, { "epoch": 4.14, "learning_rate": 2.70363636036369e-05, "loss": 0.2419, "step": 6909000 }, { "epoch": 4.14, "learning_rate": 2.7034263638076335e-05, "loss": 0.2381, "step": 6909500 }, { "epoch": 4.14, "learning_rate": 2.7032163672515768e-05, "loss": 0.2416, "step": 6910000 }, { "epoch": 4.14, "learning_rate": 2.7030063706955205e-05, "loss": 0.2417, "step": 6910500 }, { "epoch": 4.14, "learning_rate": 2.7027967941325762e-05, "loss": 0.2468, "step": 6911000 }, { "epoch": 4.14, "learning_rate": 2.70258679757652e-05, "loss": 0.2465, "step": 6911500 }, { "epoch": 4.14, "learning_rate": 2.7023768010204632e-05, "loss": 0.2408, "step": 6912000 }, { "epoch": 4.14, "learning_rate": 2.7021668044644066e-05, "loss": 0.2472, "step": 6912500 }, { "epoch": 4.14, "learning_rate": 2.7019568079083503e-05, "loss": 0.2402, "step": 6913000 }, { "epoch": 4.14, "learning_rate": 2.701746811352294e-05, "loss": 0.2404, "step": 6913500 }, { "epoch": 4.15, "learning_rate": 2.7015372347893496e-05, "loss": 0.2426, "step": 6914000 }, { "epoch": 4.15, "learning_rate": 2.701327238233293e-05, "loss": 0.2412, "step": 6914500 }, { "epoch": 4.15, "learning_rate": 2.7011172416772363e-05, "loss": 0.2464, "step": 6915000 }, { "epoch": 4.15, "learning_rate": 2.70090724512118e-05, "loss": 0.24, "step": 6915500 }, { "epoch": 4.15, "learning_rate": 2.7006972485651234e-05, "loss": 0.2448, "step": 6916000 }, { "epoch": 4.15, "learning_rate": 2.7004876720021794e-05, "loss": 0.244, "step": 6916500 }, { "epoch": 4.15, "learning_rate": 2.7002776754461224e-05, "loss": 0.2405, "step": 6917000 }, { "epoch": 4.15, "learning_rate": 2.700067678890066e-05, "loss": 0.235, "step": 6917500 }, { "epoch": 4.15, "learning_rate": 2.6998576823340098e-05, "loss": 0.2398, "step": 6918000 }, { "epoch": 4.15, "learning_rate": 2.699647685777953e-05, "loss": 0.2458, "step": 6918500 }, { "epoch": 4.15, "learning_rate": 2.6994376892218968e-05, "loss": 0.2379, "step": 6919000 }, { "epoch": 4.15, "learning_rate": 2.6992276926658405e-05, "loss": 0.2481, "step": 6919500 }, { "epoch": 4.15, "learning_rate": 2.6990176961097835e-05, "loss": 0.237, "step": 6920000 }, { "epoch": 4.15, "learning_rate": 2.6988081195468395e-05, "loss": 0.246, "step": 6920500 }, { "epoch": 4.15, "learning_rate": 2.698598122990783e-05, "loss": 0.2418, "step": 6921000 }, { "epoch": 4.15, "learning_rate": 2.6983881264347266e-05, "loss": 0.2414, "step": 6921500 }, { "epoch": 4.15, "learning_rate": 2.698178549871782e-05, "loss": 0.2404, "step": 6922000 }, { "epoch": 4.15, "learning_rate": 2.6979689733088376e-05, "loss": 0.2399, "step": 6922500 }, { "epoch": 4.15, "learning_rate": 2.6977589767527813e-05, "loss": 0.24, "step": 6923000 }, { "epoch": 4.15, "learning_rate": 2.697548980196725e-05, "loss": 0.2362, "step": 6923500 }, { "epoch": 4.15, "learning_rate": 2.6973389836406683e-05, "loss": 0.2361, "step": 6924000 }, { "epoch": 4.15, "learning_rate": 2.6971289870846117e-05, "loss": 0.2383, "step": 6924500 }, { "epoch": 4.15, "learning_rate": 2.6969189905285554e-05, "loss": 0.2421, "step": 6925000 }, { "epoch": 4.15, "learning_rate": 2.6967089939724987e-05, "loss": 0.2399, "step": 6925500 }, { "epoch": 4.15, "learning_rate": 2.6964989974164424e-05, "loss": 0.2409, "step": 6926000 }, { "epoch": 4.15, "learning_rate": 2.696289000860386e-05, "loss": 0.2427, "step": 6926500 }, { "epoch": 4.15, "learning_rate": 2.696079004304329e-05, "loss": 0.2375, "step": 6927000 }, { "epoch": 4.15, "learning_rate": 2.695869427741385e-05, "loss": 0.2441, "step": 6927500 }, { "epoch": 4.15, "learning_rate": 2.6956594311853285e-05, "loss": 0.2419, "step": 6928000 }, { "epoch": 4.15, "learning_rate": 2.695449434629272e-05, "loss": 0.2422, "step": 6928500 }, { "epoch": 4.15, "learning_rate": 2.695239438073216e-05, "loss": 0.2389, "step": 6929000 }, { "epoch": 4.15, "learning_rate": 2.695029441517159e-05, "loss": 0.2406, "step": 6929500 }, { "epoch": 4.15, "learning_rate": 2.6948194449611025e-05, "loss": 0.2412, "step": 6930000 }, { "epoch": 4.16, "learning_rate": 2.6946094484050462e-05, "loss": 0.2399, "step": 6930500 }, { "epoch": 4.16, "learning_rate": 2.694399871842102e-05, "loss": 0.2345, "step": 6931000 }, { "epoch": 4.16, "learning_rate": 2.6941898752860453e-05, "loss": 0.2404, "step": 6931500 }, { "epoch": 4.16, "learning_rate": 2.6939798787299886e-05, "loss": 0.2397, "step": 6932000 }, { "epoch": 4.16, "learning_rate": 2.6937698821739323e-05, "loss": 0.2436, "step": 6932500 }, { "epoch": 4.16, "learning_rate": 2.693559885617876e-05, "loss": 0.2442, "step": 6933000 }, { "epoch": 4.16, "learning_rate": 2.6933498890618193e-05, "loss": 0.2443, "step": 6933500 }, { "epoch": 4.16, "learning_rate": 2.693139892505763e-05, "loss": 0.24, "step": 6934000 }, { "epoch": 4.16, "learning_rate": 2.6929298959497063e-05, "loss": 0.2379, "step": 6934500 }, { "epoch": 4.16, "learning_rate": 2.692720319386762e-05, "loss": 0.2442, "step": 6935000 }, { "epoch": 4.16, "learning_rate": 2.6925103228307054e-05, "loss": 0.2376, "step": 6935500 }, { "epoch": 4.16, "learning_rate": 2.692300326274649e-05, "loss": 0.2362, "step": 6936000 }, { "epoch": 4.16, "learning_rate": 2.6920907497117044e-05, "loss": 0.2384, "step": 6936500 }, { "epoch": 4.16, "learning_rate": 2.691880753155648e-05, "loss": 0.243, "step": 6937000 }, { "epoch": 4.16, "learning_rate": 2.6916707565995918e-05, "loss": 0.2485, "step": 6937500 }, { "epoch": 4.16, "learning_rate": 2.691460760043535e-05, "loss": 0.2469, "step": 6938000 }, { "epoch": 4.16, "learning_rate": 2.6912507634874788e-05, "loss": 0.241, "step": 6938500 }, { "epoch": 4.16, "learning_rate": 2.6910407669314225e-05, "loss": 0.2366, "step": 6939000 }, { "epoch": 4.16, "learning_rate": 2.690830770375366e-05, "loss": 0.24, "step": 6939500 }, { "epoch": 4.16, "learning_rate": 2.6906207738193092e-05, "loss": 0.2371, "step": 6940000 }, { "epoch": 4.16, "learning_rate": 2.690411197256365e-05, "loss": 0.2448, "step": 6940500 }, { "epoch": 4.16, "learning_rate": 2.6902012007003086e-05, "loss": 0.2357, "step": 6941000 }, { "epoch": 4.16, "learning_rate": 2.689991204144252e-05, "loss": 0.2415, "step": 6941500 }, { "epoch": 4.16, "learning_rate": 2.6897812075881953e-05, "loss": 0.2433, "step": 6942000 }, { "epoch": 4.16, "learning_rate": 2.689571211032139e-05, "loss": 0.2435, "step": 6942500 }, { "epoch": 4.16, "learning_rate": 2.6893616344691947e-05, "loss": 0.2357, "step": 6943000 }, { "epoch": 4.16, "learning_rate": 2.6891516379131383e-05, "loss": 0.2432, "step": 6943500 }, { "epoch": 4.16, "learning_rate": 2.6889416413570817e-05, "loss": 0.2402, "step": 6944000 }, { "epoch": 4.16, "learning_rate": 2.688731644801025e-05, "loss": 0.2422, "step": 6944500 }, { "epoch": 4.16, "learning_rate": 2.688522488231193e-05, "loss": 0.2441, "step": 6945000 }, { "epoch": 4.16, "learning_rate": 2.6883124916751368e-05, "loss": 0.2346, "step": 6945500 }, { "epoch": 4.16, "learning_rate": 2.688102915112192e-05, "loss": 0.247, "step": 6946000 }, { "epoch": 4.16, "learning_rate": 2.6878929185561355e-05, "loss": 0.2395, "step": 6946500 }, { "epoch": 4.17, "learning_rate": 2.687682922000079e-05, "loss": 0.2435, "step": 6947000 }, { "epoch": 4.17, "learning_rate": 2.687472925444023e-05, "loss": 0.2384, "step": 6947500 }, { "epoch": 4.17, "learning_rate": 2.6872629288879662e-05, "loss": 0.2427, "step": 6948000 }, { "epoch": 4.17, "learning_rate": 2.6870529323319095e-05, "loss": 0.2411, "step": 6948500 }, { "epoch": 4.17, "learning_rate": 2.6868429357758532e-05, "loss": 0.24, "step": 6949000 }, { "epoch": 4.17, "learning_rate": 2.686632939219797e-05, "loss": 0.2344, "step": 6949500 }, { "epoch": 4.17, "learning_rate": 2.6864229426637402e-05, "loss": 0.2429, "step": 6950000 }, { "epoch": 4.17, "learning_rate": 2.686212946107684e-05, "loss": 0.2473, "step": 6950500 }, { "epoch": 4.17, "learning_rate": 2.6860029495516276e-05, "loss": 0.247, "step": 6951000 }, { "epoch": 4.17, "learning_rate": 2.6857929529955706e-05, "loss": 0.2383, "step": 6951500 }, { "epoch": 4.17, "learning_rate": 2.6855833764326263e-05, "loss": 0.2438, "step": 6952000 }, { "epoch": 4.17, "learning_rate": 2.68537337987657e-05, "loss": 0.2421, "step": 6952500 }, { "epoch": 4.17, "learning_rate": 2.6851633833205137e-05, "loss": 0.2474, "step": 6953000 }, { "epoch": 4.17, "learning_rate": 2.684953386764457e-05, "loss": 0.2404, "step": 6953500 }, { "epoch": 4.17, "learning_rate": 2.6847438102015127e-05, "loss": 0.2401, "step": 6954000 }, { "epoch": 4.17, "learning_rate": 2.684533813645456e-05, "loss": 0.2406, "step": 6954500 }, { "epoch": 4.17, "learning_rate": 2.6843238170893998e-05, "loss": 0.2411, "step": 6955000 }, { "epoch": 4.17, "learning_rate": 2.6841138205333434e-05, "loss": 0.2412, "step": 6955500 }, { "epoch": 4.17, "learning_rate": 2.6839042439703988e-05, "loss": 0.2426, "step": 6956000 }, { "epoch": 4.17, "learning_rate": 2.6836942474143425e-05, "loss": 0.238, "step": 6956500 }, { "epoch": 4.17, "learning_rate": 2.683484250858286e-05, "loss": 0.2419, "step": 6957000 }, { "epoch": 4.17, "learning_rate": 2.6832742543022295e-05, "loss": 0.2396, "step": 6957500 }, { "epoch": 4.17, "learning_rate": 2.6830642577461732e-05, "loss": 0.2469, "step": 6958000 }, { "epoch": 4.17, "learning_rate": 2.6828546811832286e-05, "loss": 0.2401, "step": 6958500 }, { "epoch": 4.17, "learning_rate": 2.682644684627172e-05, "loss": 0.2475, "step": 6959000 }, { "epoch": 4.17, "learning_rate": 2.6824346880711156e-05, "loss": 0.2395, "step": 6959500 }, { "epoch": 4.17, "learning_rate": 2.6822246915150593e-05, "loss": 0.2386, "step": 6960000 }, { "epoch": 4.17, "learning_rate": 2.6820151149521146e-05, "loss": 0.2435, "step": 6960500 }, { "epoch": 4.17, "learning_rate": 2.6818055383891703e-05, "loss": 0.2418, "step": 6961000 }, { "epoch": 4.17, "learning_rate": 2.681595541833114e-05, "loss": 0.2409, "step": 6961500 }, { "epoch": 4.17, "learning_rate": 2.6813855452770574e-05, "loss": 0.243, "step": 6962000 }, { "epoch": 4.17, "learning_rate": 2.681175548721001e-05, "loss": 0.2441, "step": 6962500 }, { "epoch": 4.17, "learning_rate": 2.6809655521649444e-05, "loss": 0.2455, "step": 6963000 }, { "epoch": 4.17, "learning_rate": 2.680755555608888e-05, "loss": 0.2391, "step": 6963500 }, { "epoch": 4.18, "learning_rate": 2.6805455590528314e-05, "loss": 0.2357, "step": 6964000 }, { "epoch": 4.18, "learning_rate": 2.680335562496775e-05, "loss": 0.2359, "step": 6964500 }, { "epoch": 4.18, "learning_rate": 2.6801259859338305e-05, "loss": 0.2482, "step": 6965000 }, { "epoch": 4.18, "learning_rate": 2.679915989377774e-05, "loss": 0.2425, "step": 6965500 }, { "epoch": 4.18, "learning_rate": 2.6797059928217175e-05, "loss": 0.2384, "step": 6966000 }, { "epoch": 4.18, "learning_rate": 2.6794959962656612e-05, "loss": 0.2433, "step": 6966500 }, { "epoch": 4.18, "learning_rate": 2.679285999709605e-05, "loss": 0.2414, "step": 6967000 }, { "epoch": 4.18, "learning_rate": 2.6790764231466602e-05, "loss": 0.2427, "step": 6967500 }, { "epoch": 4.18, "learning_rate": 2.678866426590604e-05, "loss": 0.2438, "step": 6968000 }, { "epoch": 4.18, "learning_rate": 2.6786564300345473e-05, "loss": 0.2376, "step": 6968500 }, { "epoch": 4.18, "learning_rate": 2.678446433478491e-05, "loss": 0.2451, "step": 6969000 }, { "epoch": 4.18, "learning_rate": 2.6782368569155466e-05, "loss": 0.2442, "step": 6969500 }, { "epoch": 4.18, "learning_rate": 2.67802686035949e-05, "loss": 0.2413, "step": 6970000 }, { "epoch": 4.18, "learning_rate": 2.6778168638034337e-05, "loss": 0.2388, "step": 6970500 }, { "epoch": 4.18, "learning_rate": 2.677606867247377e-05, "loss": 0.2378, "step": 6971000 }, { "epoch": 4.18, "learning_rate": 2.6773972906844327e-05, "loss": 0.2426, "step": 6971500 }, { "epoch": 4.18, "learning_rate": 2.6771872941283764e-05, "loss": 0.2392, "step": 6972000 }, { "epoch": 4.18, "learning_rate": 2.6769772975723197e-05, "loss": 0.2451, "step": 6972500 }, { "epoch": 4.18, "learning_rate": 2.676767301016263e-05, "loss": 0.2398, "step": 6973000 }, { "epoch": 4.18, "learning_rate": 2.676557724453319e-05, "loss": 0.2383, "step": 6973500 }, { "epoch": 4.18, "learning_rate": 2.6763477278972625e-05, "loss": 0.2423, "step": 6974000 }, { "epoch": 4.18, "learning_rate": 2.6761377313412058e-05, "loss": 0.2403, "step": 6974500 }, { "epoch": 4.18, "learning_rate": 2.6759277347851495e-05, "loss": 0.2426, "step": 6975000 }, { "epoch": 4.18, "learning_rate": 2.6757181582222052e-05, "loss": 0.2422, "step": 6975500 }, { "epoch": 4.18, "learning_rate": 2.6755081616661485e-05, "loss": 0.2389, "step": 6976000 }, { "epoch": 4.18, "learning_rate": 2.6752981651100922e-05, "loss": 0.2363, "step": 6976500 }, { "epoch": 4.18, "learning_rate": 2.6750881685540356e-05, "loss": 0.238, "step": 6977000 }, { "epoch": 4.18, "learning_rate": 2.6748785919910913e-05, "loss": 0.243, "step": 6977500 }, { "epoch": 4.18, "learning_rate": 2.674668595435035e-05, "loss": 0.2426, "step": 6978000 }, { "epoch": 4.18, "learning_rate": 2.6744585988789783e-05, "loss": 0.238, "step": 6978500 }, { "epoch": 4.18, "learning_rate": 2.674248602322922e-05, "loss": 0.2432, "step": 6979000 }, { "epoch": 4.18, "learning_rate": 2.6740390257599773e-05, "loss": 0.2468, "step": 6979500 }, { "epoch": 4.18, "learning_rate": 2.673829029203921e-05, "loss": 0.2461, "step": 6980000 }, { "epoch": 4.19, "learning_rate": 2.6736190326478647e-05, "loss": 0.2458, "step": 6980500 }, { "epoch": 4.19, "learning_rate": 2.673409036091808e-05, "loss": 0.2446, "step": 6981000 }, { "epoch": 4.19, "learning_rate": 2.6731990395357517e-05, "loss": 0.2398, "step": 6981500 }, { "epoch": 4.19, "learning_rate": 2.672989462972807e-05, "loss": 0.2404, "step": 6982000 }, { "epoch": 4.19, "learning_rate": 2.6727794664167508e-05, "loss": 0.2453, "step": 6982500 }, { "epoch": 4.19, "learning_rate": 2.672569469860694e-05, "loss": 0.2417, "step": 6983000 }, { "epoch": 4.19, "learning_rate": 2.6723594733046378e-05, "loss": 0.2459, "step": 6983500 }, { "epoch": 4.19, "learning_rate": 2.6721494767485815e-05, "loss": 0.2351, "step": 6984000 }, { "epoch": 4.19, "learning_rate": 2.671939480192525e-05, "loss": 0.2398, "step": 6984500 }, { "epoch": 4.19, "learning_rate": 2.6717294836364682e-05, "loss": 0.2523, "step": 6985000 }, { "epoch": 4.19, "learning_rate": 2.671519487080412e-05, "loss": 0.2426, "step": 6985500 }, { "epoch": 4.19, "learning_rate": 2.6713099105174676e-05, "loss": 0.2443, "step": 6986000 }, { "epoch": 4.19, "learning_rate": 2.671099913961411e-05, "loss": 0.243, "step": 6986500 }, { "epoch": 4.19, "learning_rate": 2.6708903373984666e-05, "loss": 0.2414, "step": 6987000 }, { "epoch": 4.19, "learning_rate": 2.6706803408424103e-05, "loss": 0.237, "step": 6987500 }, { "epoch": 4.19, "learning_rate": 2.670470764279466e-05, "loss": 0.2431, "step": 6988000 }, { "epoch": 4.19, "learning_rate": 2.670260767723409e-05, "loss": 0.2427, "step": 6988500 }, { "epoch": 4.19, "learning_rate": 2.6700507711673527e-05, "loss": 0.24, "step": 6989000 }, { "epoch": 4.19, "learning_rate": 2.6698407746112964e-05, "loss": 0.243, "step": 6989500 }, { "epoch": 4.19, "learning_rate": 2.6696307780552397e-05, "loss": 0.2405, "step": 6990000 }, { "epoch": 4.19, "learning_rate": 2.6694207814991834e-05, "loss": 0.2399, "step": 6990500 }, { "epoch": 4.19, "learning_rate": 2.669210784943127e-05, "loss": 0.2396, "step": 6991000 }, { "epoch": 4.19, "learning_rate": 2.6690007883870704e-05, "loss": 0.246, "step": 6991500 }, { "epoch": 4.19, "learning_rate": 2.6687907918310138e-05, "loss": 0.2402, "step": 6992000 }, { "epoch": 4.19, "learning_rate": 2.6685807952749575e-05, "loss": 0.2406, "step": 6992500 }, { "epoch": 4.19, "learning_rate": 2.668370798718901e-05, "loss": 0.2424, "step": 6993000 }, { "epoch": 4.19, "learning_rate": 2.6681608021628445e-05, "loss": 0.2366, "step": 6993500 }, { "epoch": 4.19, "learning_rate": 2.6679512255999e-05, "loss": 0.2406, "step": 6994000 }, { "epoch": 4.19, "learning_rate": 2.6677412290438435e-05, "loss": 0.2383, "step": 6994500 }, { "epoch": 4.19, "learning_rate": 2.6675312324877872e-05, "loss": 0.2411, "step": 6995000 }, { "epoch": 4.19, "learning_rate": 2.667321235931731e-05, "loss": 0.2459, "step": 6995500 }, { "epoch": 4.19, "learning_rate": 2.6671112393756742e-05, "loss": 0.2399, "step": 6996000 }, { "epoch": 4.19, "learning_rate": 2.6669012428196176e-05, "loss": 0.2444, "step": 6996500 }, { "epoch": 4.19, "learning_rate": 2.6666916662566733e-05, "loss": 0.2394, "step": 6997000 }, { "epoch": 4.2, "learning_rate": 2.666481669700617e-05, "loss": 0.2363, "step": 6997500 }, { "epoch": 4.2, "learning_rate": 2.6662716731445603e-05, "loss": 0.2388, "step": 6998000 }, { "epoch": 4.2, "learning_rate": 2.666061676588504e-05, "loss": 0.2446, "step": 6998500 }, { "epoch": 4.2, "learning_rate": 2.6658516800324474e-05, "loss": 0.2453, "step": 6999000 }, { "epoch": 4.2, "learning_rate": 2.665641683476391e-05, "loss": 0.2439, "step": 6999500 }, { "epoch": 4.2, "learning_rate": 2.6654316869203344e-05, "loss": 0.2425, "step": 7000000 }, { "epoch": 4.2, "eval_loss": 0.22679175436496735, "eval_runtime": 1465.8218, "eval_samples_per_second": 359.334, "eval_steps_per_second": 59.889, "step": 7000000 }, { "epoch": 4.2, "learning_rate": 2.665221690364278e-05, "loss": 0.238, "step": 7000500 }, { "epoch": 4.2, "learning_rate": 2.6650121138013338e-05, "loss": 0.2442, "step": 7001000 }, { "epoch": 4.2, "learning_rate": 2.664802117245277e-05, "loss": 0.2395, "step": 7001500 }, { "epoch": 4.2, "learning_rate": 2.6645921206892205e-05, "loss": 0.2382, "step": 7002000 }, { "epoch": 4.2, "learning_rate": 2.664382124133164e-05, "loss": 0.2367, "step": 7002500 }, { "epoch": 4.2, "learning_rate": 2.66417254757022e-05, "loss": 0.2455, "step": 7003000 }, { "epoch": 4.2, "learning_rate": 2.6639625510141632e-05, "loss": 0.2397, "step": 7003500 }, { "epoch": 4.2, "learning_rate": 2.663752554458107e-05, "loss": 0.2381, "step": 7004000 }, { "epoch": 4.2, "learning_rate": 2.6635425579020502e-05, "loss": 0.2448, "step": 7004500 }, { "epoch": 4.2, "learning_rate": 2.663332981339106e-05, "loss": 0.2406, "step": 7005000 }, { "epoch": 4.2, "learning_rate": 2.6631229847830496e-05, "loss": 0.2401, "step": 7005500 }, { "epoch": 4.2, "learning_rate": 2.662913408220105e-05, "loss": 0.244, "step": 7006000 }, { "epoch": 4.2, "learning_rate": 2.6627034116640486e-05, "loss": 0.2497, "step": 7006500 }, { "epoch": 4.2, "learning_rate": 2.6624934151079923e-05, "loss": 0.2441, "step": 7007000 }, { "epoch": 4.2, "learning_rate": 2.6622834185519357e-05, "loss": 0.2385, "step": 7007500 }, { "epoch": 4.2, "learning_rate": 2.6620734219958794e-05, "loss": 0.2397, "step": 7008000 }, { "epoch": 4.2, "learning_rate": 2.6618634254398227e-05, "loss": 0.2363, "step": 7008500 }, { "epoch": 4.2, "learning_rate": 2.661653428883766e-05, "loss": 0.2353, "step": 7009000 }, { "epoch": 4.2, "learning_rate": 2.6614434323277097e-05, "loss": 0.2448, "step": 7009500 }, { "epoch": 4.2, "learning_rate": 2.6612338557647654e-05, "loss": 0.2421, "step": 7010000 }, { "epoch": 4.2, "learning_rate": 2.6610242792018208e-05, "loss": 0.2505, "step": 7010500 }, { "epoch": 4.2, "learning_rate": 2.6608142826457645e-05, "loss": 0.243, "step": 7011000 }, { "epoch": 4.2, "learning_rate": 2.660604286089708e-05, "loss": 0.2397, "step": 7011500 }, { "epoch": 4.2, "learning_rate": 2.6603942895336515e-05, "loss": 0.2414, "step": 7012000 }, { "epoch": 4.2, "learning_rate": 2.6601842929775952e-05, "loss": 0.2428, "step": 7012500 }, { "epoch": 4.2, "learning_rate": 2.659974296421539e-05, "loss": 0.2445, "step": 7013000 }, { "epoch": 4.2, "learning_rate": 2.6597642998654822e-05, "loss": 0.2427, "step": 7013500 }, { "epoch": 4.21, "learning_rate": 2.6595543033094256e-05, "loss": 0.2426, "step": 7014000 }, { "epoch": 4.21, "learning_rate": 2.6593447267464813e-05, "loss": 0.2404, "step": 7014500 }, { "epoch": 4.21, "learning_rate": 2.659134730190425e-05, "loss": 0.2414, "step": 7015000 }, { "epoch": 4.21, "learning_rate": 2.6589247336343683e-05, "loss": 0.2404, "step": 7015500 }, { "epoch": 4.21, "learning_rate": 2.658715157071424e-05, "loss": 0.2342, "step": 7016000 }, { "epoch": 4.21, "learning_rate": 2.6585051605153677e-05, "loss": 0.239, "step": 7016500 }, { "epoch": 4.21, "learning_rate": 2.658295163959311e-05, "loss": 0.2444, "step": 7017000 }, { "epoch": 4.21, "learning_rate": 2.6580851674032547e-05, "loss": 0.2404, "step": 7017500 }, { "epoch": 4.21, "learning_rate": 2.657875170847198e-05, "loss": 0.2409, "step": 7018000 }, { "epoch": 4.21, "learning_rate": 2.6576651742911414e-05, "loss": 0.2401, "step": 7018500 }, { "epoch": 4.21, "learning_rate": 2.657455177735085e-05, "loss": 0.2419, "step": 7019000 }, { "epoch": 4.21, "learning_rate": 2.6572451811790288e-05, "loss": 0.2368, "step": 7019500 }, { "epoch": 4.21, "learning_rate": 2.6570356046160845e-05, "loss": 0.2442, "step": 7020000 }, { "epoch": 4.21, "learning_rate": 2.6568256080600278e-05, "loss": 0.2423, "step": 7020500 }, { "epoch": 4.21, "learning_rate": 2.656615611503971e-05, "loss": 0.2417, "step": 7021000 }, { "epoch": 4.21, "learning_rate": 2.6564056149479148e-05, "loss": 0.2412, "step": 7021500 }, { "epoch": 4.21, "learning_rate": 2.6561956183918585e-05, "loss": 0.2412, "step": 7022000 }, { "epoch": 4.21, "learning_rate": 2.655985621835802e-05, "loss": 0.2435, "step": 7022500 }, { "epoch": 4.21, "learning_rate": 2.6557756252797452e-05, "loss": 0.2391, "step": 7023000 }, { "epoch": 4.21, "learning_rate": 2.655565628723689e-05, "loss": 0.2451, "step": 7023500 }, { "epoch": 4.21, "learning_rate": 2.6553560521607446e-05, "loss": 0.243, "step": 7024000 }, { "epoch": 4.21, "learning_rate": 2.6551460556046883e-05, "loss": 0.2391, "step": 7024500 }, { "epoch": 4.21, "learning_rate": 2.6549360590486316e-05, "loss": 0.2409, "step": 7025000 }, { "epoch": 4.21, "learning_rate": 2.654726482485687e-05, "loss": 0.2466, "step": 7025500 }, { "epoch": 4.21, "learning_rate": 2.6545164859296307e-05, "loss": 0.2388, "step": 7026000 }, { "epoch": 4.21, "learning_rate": 2.6543064893735743e-05, "loss": 0.2394, "step": 7026500 }, { "epoch": 4.21, "learning_rate": 2.6540964928175177e-05, "loss": 0.2401, "step": 7027000 }, { "epoch": 4.21, "learning_rate": 2.6538864962614614e-05, "loss": 0.2398, "step": 7027500 }, { "epoch": 4.21, "learning_rate": 2.6536764997054047e-05, "loss": 0.2418, "step": 7028000 }, { "epoch": 4.21, "learning_rate": 2.6534665031493484e-05, "loss": 0.241, "step": 7028500 }, { "epoch": 4.21, "learning_rate": 2.6532565065932918e-05, "loss": 0.2406, "step": 7029000 }, { "epoch": 4.21, "learning_rate": 2.6530469300303474e-05, "loss": 0.2469, "step": 7029500 }, { "epoch": 4.21, "learning_rate": 2.652836933474291e-05, "loss": 0.2425, "step": 7030000 }, { "epoch": 4.22, "learning_rate": 2.6526269369182345e-05, "loss": 0.2445, "step": 7030500 }, { "epoch": 4.22, "learning_rate": 2.6524169403621778e-05, "loss": 0.2438, "step": 7031000 }, { "epoch": 4.22, "learning_rate": 2.652207363799234e-05, "loss": 0.2373, "step": 7031500 }, { "epoch": 4.22, "learning_rate": 2.6519973672431772e-05, "loss": 0.2386, "step": 7032000 }, { "epoch": 4.22, "learning_rate": 2.6517873706871206e-05, "loss": 0.2386, "step": 7032500 }, { "epoch": 4.22, "learning_rate": 2.6515773741310642e-05, "loss": 0.2375, "step": 7033000 }, { "epoch": 4.22, "learning_rate": 2.65136779756812e-05, "loss": 0.2466, "step": 7033500 }, { "epoch": 4.22, "learning_rate": 2.6511578010120633e-05, "loss": 0.243, "step": 7034000 }, { "epoch": 4.22, "learning_rate": 2.650947804456007e-05, "loss": 0.2373, "step": 7034500 }, { "epoch": 4.22, "learning_rate": 2.6507378078999503e-05, "loss": 0.2489, "step": 7035000 }, { "epoch": 4.22, "learning_rate": 2.650528231337006e-05, "loss": 0.2393, "step": 7035500 }, { "epoch": 4.22, "learning_rate": 2.6503182347809497e-05, "loss": 0.2411, "step": 7036000 }, { "epoch": 4.22, "learning_rate": 2.6501086582180054e-05, "loss": 0.2422, "step": 7036500 }, { "epoch": 4.22, "learning_rate": 2.6498986616619487e-05, "loss": 0.2363, "step": 7037000 }, { "epoch": 4.22, "learning_rate": 2.649688665105892e-05, "loss": 0.2387, "step": 7037500 }, { "epoch": 4.22, "learning_rate": 2.6494786685498358e-05, "loss": 0.2422, "step": 7038000 }, { "epoch": 4.22, "learning_rate": 2.6492690919868915e-05, "loss": 0.2477, "step": 7038500 }, { "epoch": 4.22, "learning_rate": 2.649059095430835e-05, "loss": 0.2408, "step": 7039000 }, { "epoch": 4.22, "learning_rate": 2.648849098874778e-05, "loss": 0.2442, "step": 7039500 }, { "epoch": 4.22, "learning_rate": 2.648639102318722e-05, "loss": 0.239, "step": 7040000 }, { "epoch": 4.22, "learning_rate": 2.6484291057626655e-05, "loss": 0.2471, "step": 7040500 }, { "epoch": 4.22, "learning_rate": 2.648219109206609e-05, "loss": 0.242, "step": 7041000 }, { "epoch": 4.22, "learning_rate": 2.6480091126505526e-05, "loss": 0.243, "step": 7041500 }, { "epoch": 4.22, "learning_rate": 2.647799116094496e-05, "loss": 0.2426, "step": 7042000 }, { "epoch": 4.22, "learning_rate": 2.6475891195384396e-05, "loss": 0.2438, "step": 7042500 }, { "epoch": 4.22, "learning_rate": 2.647379122982383e-05, "loss": 0.2437, "step": 7043000 }, { "epoch": 4.22, "learning_rate": 2.6471691264263266e-05, "loss": 0.2441, "step": 7043500 }, { "epoch": 4.22, "learning_rate": 2.6469591298702703e-05, "loss": 0.2365, "step": 7044000 }, { "epoch": 4.22, "learning_rate": 2.6467495533073257e-05, "loss": 0.2413, "step": 7044500 }, { "epoch": 4.22, "learning_rate": 2.646539556751269e-05, "loss": 0.2386, "step": 7045000 }, { "epoch": 4.22, "learning_rate": 2.6463295601952127e-05, "loss": 0.2369, "step": 7045500 }, { "epoch": 4.22, "learning_rate": 2.6461195636391564e-05, "loss": 0.2488, "step": 7046000 }, { "epoch": 4.22, "learning_rate": 2.645909987076212e-05, "loss": 0.2436, "step": 7046500 }, { "epoch": 4.22, "learning_rate": 2.6456999905201554e-05, "loss": 0.2391, "step": 7047000 }, { "epoch": 4.23, "learning_rate": 2.6454899939640988e-05, "loss": 0.2397, "step": 7047500 }, { "epoch": 4.23, "learning_rate": 2.6452799974080424e-05, "loss": 0.242, "step": 7048000 }, { "epoch": 4.23, "learning_rate": 2.645070000851986e-05, "loss": 0.2412, "step": 7048500 }, { "epoch": 4.23, "learning_rate": 2.6448604242890418e-05, "loss": 0.2414, "step": 7049000 }, { "epoch": 4.23, "learning_rate": 2.6446504277329852e-05, "loss": 0.2416, "step": 7049500 }, { "epoch": 4.23, "learning_rate": 2.6444404311769285e-05, "loss": 0.2417, "step": 7050000 }, { "epoch": 4.23, "learning_rate": 2.6442304346208722e-05, "loss": 0.2442, "step": 7050500 }, { "epoch": 4.23, "learning_rate": 2.644020438064816e-05, "loss": 0.2406, "step": 7051000 }, { "epoch": 4.23, "learning_rate": 2.6438104415087592e-05, "loss": 0.247, "step": 7051500 }, { "epoch": 4.23, "learning_rate": 2.6436004449527026e-05, "loss": 0.2442, "step": 7052000 }, { "epoch": 4.23, "learning_rate": 2.6433904483966463e-05, "loss": 0.2377, "step": 7052500 }, { "epoch": 4.23, "learning_rate": 2.643180871833702e-05, "loss": 0.241, "step": 7053000 }, { "epoch": 4.23, "learning_rate": 2.6429708752776456e-05, "loss": 0.2405, "step": 7053500 }, { "epoch": 4.23, "learning_rate": 2.642760878721589e-05, "loss": 0.2429, "step": 7054000 }, { "epoch": 4.23, "learning_rate": 2.6425508821655323e-05, "loss": 0.2422, "step": 7054500 }, { "epoch": 4.23, "learning_rate": 2.642341305602588e-05, "loss": 0.242, "step": 7055000 }, { "epoch": 4.23, "learning_rate": 2.6421313090465317e-05, "loss": 0.2467, "step": 7055500 }, { "epoch": 4.23, "learning_rate": 2.641921312490475e-05, "loss": 0.2433, "step": 7056000 }, { "epoch": 4.23, "learning_rate": 2.6417113159344187e-05, "loss": 0.2436, "step": 7056500 }, { "epoch": 4.23, "learning_rate": 2.641501739371474e-05, "loss": 0.2405, "step": 7057000 }, { "epoch": 4.23, "learning_rate": 2.6412917428154178e-05, "loss": 0.2467, "step": 7057500 }, { "epoch": 4.23, "learning_rate": 2.6410817462593615e-05, "loss": 0.2449, "step": 7058000 }, { "epoch": 4.23, "learning_rate": 2.6408717497033048e-05, "loss": 0.2338, "step": 7058500 }, { "epoch": 4.23, "learning_rate": 2.6406621731403602e-05, "loss": 0.2352, "step": 7059000 }, { "epoch": 4.23, "learning_rate": 2.640452176584304e-05, "loss": 0.2444, "step": 7059500 }, { "epoch": 4.23, "learning_rate": 2.6402421800282475e-05, "loss": 0.2441, "step": 7060000 }, { "epoch": 4.23, "learning_rate": 2.6400321834721912e-05, "loss": 0.2453, "step": 7060500 }, { "epoch": 4.23, "learning_rate": 2.639822606909247e-05, "loss": 0.2413, "step": 7061000 }, { "epoch": 4.23, "learning_rate": 2.63961261035319e-05, "loss": 0.2372, "step": 7061500 }, { "epoch": 4.23, "learning_rate": 2.6394026137971336e-05, "loss": 0.2401, "step": 7062000 }, { "epoch": 4.23, "learning_rate": 2.6391926172410773e-05, "loss": 0.2403, "step": 7062500 }, { "epoch": 4.23, "learning_rate": 2.638983040678133e-05, "loss": 0.2397, "step": 7063000 }, { "epoch": 4.23, "learning_rate": 2.6387734641151884e-05, "loss": 0.2436, "step": 7063500 }, { "epoch": 4.24, "learning_rate": 2.638563467559132e-05, "loss": 0.2436, "step": 7064000 }, { "epoch": 4.24, "learning_rate": 2.6383534710030754e-05, "loss": 0.2447, "step": 7064500 }, { "epoch": 4.24, "learning_rate": 2.638143474447019e-05, "loss": 0.2406, "step": 7065000 }, { "epoch": 4.24, "learning_rate": 2.6379334778909628e-05, "loss": 0.2448, "step": 7065500 }, { "epoch": 4.24, "learning_rate": 2.637723901328018e-05, "loss": 0.2423, "step": 7066000 }, { "epoch": 4.24, "learning_rate": 2.6375139047719618e-05, "loss": 0.241, "step": 7066500 }, { "epoch": 4.24, "learning_rate": 2.637303908215905e-05, "loss": 0.2451, "step": 7067000 }, { "epoch": 4.24, "learning_rate": 2.6370939116598488e-05, "loss": 0.247, "step": 7067500 }, { "epoch": 4.24, "learning_rate": 2.6368839151037925e-05, "loss": 0.2381, "step": 7068000 }, { "epoch": 4.24, "learning_rate": 2.636674338540848e-05, "loss": 0.246, "step": 7068500 }, { "epoch": 4.24, "learning_rate": 2.6364643419847912e-05, "loss": 0.2402, "step": 7069000 }, { "epoch": 4.24, "learning_rate": 2.636254345428735e-05, "loss": 0.2438, "step": 7069500 }, { "epoch": 4.24, "learning_rate": 2.6360443488726786e-05, "loss": 0.2424, "step": 7070000 }, { "epoch": 4.24, "learning_rate": 2.6358343523166223e-05, "loss": 0.2362, "step": 7070500 }, { "epoch": 4.24, "learning_rate": 2.6356243557605653e-05, "loss": 0.242, "step": 7071000 }, { "epoch": 4.24, "learning_rate": 2.635414359204509e-05, "loss": 0.2422, "step": 7071500 }, { "epoch": 4.24, "learning_rate": 2.6352043626484526e-05, "loss": 0.2416, "step": 7072000 }, { "epoch": 4.24, "learning_rate": 2.6349947860855083e-05, "loss": 0.2462, "step": 7072500 }, { "epoch": 4.24, "learning_rate": 2.6347852095225637e-05, "loss": 0.2419, "step": 7073000 }, { "epoch": 4.24, "learning_rate": 2.6345752129665074e-05, "loss": 0.2415, "step": 7073500 }, { "epoch": 4.24, "learning_rate": 2.6343652164104507e-05, "loss": 0.2432, "step": 7074000 }, { "epoch": 4.24, "learning_rate": 2.6341552198543944e-05, "loss": 0.2423, "step": 7074500 }, { "epoch": 4.24, "learning_rate": 2.633945223298338e-05, "loss": 0.2441, "step": 7075000 }, { "epoch": 4.24, "learning_rate": 2.633735226742281e-05, "loss": 0.2414, "step": 7075500 }, { "epoch": 4.24, "learning_rate": 2.633525650179337e-05, "loss": 0.2467, "step": 7076000 }, { "epoch": 4.24, "learning_rate": 2.6333156536232805e-05, "loss": 0.2379, "step": 7076500 }, { "epoch": 4.24, "learning_rate": 2.6331056570672242e-05, "loss": 0.2341, "step": 7077000 }, { "epoch": 4.24, "learning_rate": 2.632895660511168e-05, "loss": 0.2405, "step": 7077500 }, { "epoch": 4.24, "learning_rate": 2.632685663955111e-05, "loss": 0.2394, "step": 7078000 }, { "epoch": 4.24, "learning_rate": 2.6324756673990546e-05, "loss": 0.242, "step": 7078500 }, { "epoch": 4.24, "learning_rate": 2.6322656708429982e-05, "loss": 0.2451, "step": 7079000 }, { "epoch": 4.24, "learning_rate": 2.6320556742869416e-05, "loss": 0.2467, "step": 7079500 }, { "epoch": 4.24, "learning_rate": 2.6318460977239973e-05, "loss": 0.2415, "step": 7080000 }, { "epoch": 4.25, "learning_rate": 2.6316361011679406e-05, "loss": 0.2414, "step": 7080500 }, { "epoch": 4.25, "learning_rate": 2.6314261046118843e-05, "loss": 0.2413, "step": 7081000 }, { "epoch": 4.25, "learning_rate": 2.631216108055828e-05, "loss": 0.2447, "step": 7081500 }, { "epoch": 4.25, "learning_rate": 2.6310065314928837e-05, "loss": 0.2425, "step": 7082000 }, { "epoch": 4.25, "learning_rate": 2.630796954929939e-05, "loss": 0.249, "step": 7082500 }, { "epoch": 4.25, "learning_rate": 2.6305869583738827e-05, "loss": 0.2392, "step": 7083000 }, { "epoch": 4.25, "learning_rate": 2.630376961817826e-05, "loss": 0.2431, "step": 7083500 }, { "epoch": 4.25, "learning_rate": 2.6301669652617698e-05, "loss": 0.2374, "step": 7084000 }, { "epoch": 4.25, "learning_rate": 2.6299569687057134e-05, "loss": 0.2428, "step": 7084500 }, { "epoch": 4.25, "learning_rate": 2.6297469721496565e-05, "loss": 0.2471, "step": 7085000 }, { "epoch": 4.25, "learning_rate": 2.6295369755936e-05, "loss": 0.2426, "step": 7085500 }, { "epoch": 4.25, "learning_rate": 2.6293269790375438e-05, "loss": 0.2368, "step": 7086000 }, { "epoch": 4.25, "learning_rate": 2.6291174024745995e-05, "loss": 0.242, "step": 7086500 }, { "epoch": 4.25, "learning_rate": 2.628907405918543e-05, "loss": 0.2437, "step": 7087000 }, { "epoch": 4.25, "learning_rate": 2.6286974093624862e-05, "loss": 0.246, "step": 7087500 }, { "epoch": 4.25, "learning_rate": 2.62848741280643e-05, "loss": 0.2374, "step": 7088000 }, { "epoch": 4.25, "learning_rate": 2.6282778362434856e-05, "loss": 0.2427, "step": 7088500 }, { "epoch": 4.25, "learning_rate": 2.6280678396874293e-05, "loss": 0.2383, "step": 7089000 }, { "epoch": 4.25, "learning_rate": 2.6278578431313726e-05, "loss": 0.2407, "step": 7089500 }, { "epoch": 4.25, "learning_rate": 2.6276482665684283e-05, "loss": 0.2425, "step": 7090000 }, { "epoch": 4.25, "learning_rate": 2.6274382700123717e-05, "loss": 0.2371, "step": 7090500 }, { "epoch": 4.25, "learning_rate": 2.6272282734563154e-05, "loss": 0.2499, "step": 7091000 }, { "epoch": 4.25, "learning_rate": 2.627018276900259e-05, "loss": 0.236, "step": 7091500 }, { "epoch": 4.25, "learning_rate": 2.6268082803442024e-05, "loss": 0.2353, "step": 7092000 }, { "epoch": 4.25, "learning_rate": 2.6265982837881457e-05, "loss": 0.2379, "step": 7092500 }, { "epoch": 4.25, "learning_rate": 2.6263882872320894e-05, "loss": 0.2431, "step": 7093000 }, { "epoch": 4.25, "learning_rate": 2.6261782906760328e-05, "loss": 0.246, "step": 7093500 }, { "epoch": 4.25, "learning_rate": 2.6259687141130885e-05, "loss": 0.2448, "step": 7094000 }, { "epoch": 4.25, "learning_rate": 2.6257587175570318e-05, "loss": 0.2388, "step": 7094500 }, { "epoch": 4.25, "learning_rate": 2.6255487210009755e-05, "loss": 0.2391, "step": 7095000 }, { "epoch": 4.25, "learning_rate": 2.6253387244449192e-05, "loss": 0.2376, "step": 7095500 }, { "epoch": 4.25, "learning_rate": 2.6251287278888625e-05, "loss": 0.2392, "step": 7096000 }, { "epoch": 4.25, "learning_rate": 2.6249187313328062e-05, "loss": 0.2465, "step": 7096500 }, { "epoch": 4.25, "learning_rate": 2.62470873477675e-05, "loss": 0.2454, "step": 7097000 }, { "epoch": 4.26, "learning_rate": 2.624498738220693e-05, "loss": 0.2426, "step": 7097500 }, { "epoch": 4.26, "learning_rate": 2.6242891616577486e-05, "loss": 0.2479, "step": 7098000 }, { "epoch": 4.26, "learning_rate": 2.6240791651016923e-05, "loss": 0.2497, "step": 7098500 }, { "epoch": 4.26, "learning_rate": 2.623869168545636e-05, "loss": 0.2421, "step": 7099000 }, { "epoch": 4.26, "learning_rate": 2.6236591719895796e-05, "loss": 0.2417, "step": 7099500 }, { "epoch": 4.26, "learning_rate": 2.623449595426635e-05, "loss": 0.2362, "step": 7100000 }, { "epoch": 4.26, "eval_loss": 0.2266959547996521, "eval_runtime": 1456.1619, "eval_samples_per_second": 361.718, "eval_steps_per_second": 60.287, "step": 7100000 }, { "epoch": 4.26, "learning_rate": 2.6232395988705783e-05, "loss": 0.2438, "step": 7100500 }, { "epoch": 4.26, "learning_rate": 2.623029602314522e-05, "loss": 0.2421, "step": 7101000 }, { "epoch": 4.26, "learning_rate": 2.6228196057584657e-05, "loss": 0.2412, "step": 7101500 }, { "epoch": 4.26, "learning_rate": 2.622610029195521e-05, "loss": 0.2426, "step": 7102000 }, { "epoch": 4.26, "learning_rate": 2.6224000326394648e-05, "loss": 0.2398, "step": 7102500 }, { "epoch": 4.26, "learning_rate": 2.622190036083408e-05, "loss": 0.2416, "step": 7103000 }, { "epoch": 4.26, "learning_rate": 2.6219800395273518e-05, "loss": 0.2454, "step": 7103500 }, { "epoch": 4.26, "learning_rate": 2.621770462964407e-05, "loss": 0.2452, "step": 7104000 }, { "epoch": 4.26, "learning_rate": 2.621560466408351e-05, "loss": 0.2444, "step": 7104500 }, { "epoch": 4.26, "learning_rate": 2.6213504698522942e-05, "loss": 0.2382, "step": 7105000 }, { "epoch": 4.26, "learning_rate": 2.6211408932893502e-05, "loss": 0.2391, "step": 7105500 }, { "epoch": 4.26, "learning_rate": 2.6209313167264056e-05, "loss": 0.2511, "step": 7106000 }, { "epoch": 4.26, "learning_rate": 2.620721320170349e-05, "loss": 0.246, "step": 7106500 }, { "epoch": 4.26, "learning_rate": 2.6205113236142926e-05, "loss": 0.2424, "step": 7107000 }, { "epoch": 4.26, "learning_rate": 2.6203013270582363e-05, "loss": 0.2474, "step": 7107500 }, { "epoch": 4.26, "learning_rate": 2.6200913305021796e-05, "loss": 0.2412, "step": 7108000 }, { "epoch": 4.26, "learning_rate": 2.6198813339461233e-05, "loss": 0.2366, "step": 7108500 }, { "epoch": 4.26, "learning_rate": 2.6196713373900667e-05, "loss": 0.2503, "step": 7109000 }, { "epoch": 4.26, "learning_rate": 2.6194613408340103e-05, "loss": 0.2349, "step": 7109500 }, { "epoch": 4.26, "learning_rate": 2.6192513442779537e-05, "loss": 0.2371, "step": 7110000 }, { "epoch": 4.26, "learning_rate": 2.6190413477218974e-05, "loss": 0.2463, "step": 7110500 }, { "epoch": 4.26, "learning_rate": 2.618831351165841e-05, "loss": 0.2364, "step": 7111000 }, { "epoch": 4.26, "learning_rate": 2.618621354609784e-05, "loss": 0.2411, "step": 7111500 }, { "epoch": 4.26, "learning_rate": 2.61841177804684e-05, "loss": 0.2334, "step": 7112000 }, { "epoch": 4.26, "learning_rate": 2.6182017814907834e-05, "loss": 0.2445, "step": 7112500 }, { "epoch": 4.26, "learning_rate": 2.617991784934727e-05, "loss": 0.2458, "step": 7113000 }, { "epoch": 4.26, "learning_rate": 2.6177817883786708e-05, "loss": 0.2416, "step": 7113500 }, { "epoch": 4.27, "learning_rate": 2.6175722118157262e-05, "loss": 0.2384, "step": 7114000 }, { "epoch": 4.27, "learning_rate": 2.6173622152596695e-05, "loss": 0.244, "step": 7114500 }, { "epoch": 4.27, "learning_rate": 2.6171522187036132e-05, "loss": 0.2392, "step": 7115000 }, { "epoch": 4.27, "learning_rate": 2.616942222147557e-05, "loss": 0.2396, "step": 7115500 }, { "epoch": 4.27, "learning_rate": 2.6167326455846122e-05, "loss": 0.2401, "step": 7116000 }, { "epoch": 4.27, "learning_rate": 2.616522649028556e-05, "loss": 0.2416, "step": 7116500 }, { "epoch": 4.27, "learning_rate": 2.6163130724656116e-05, "loss": 0.2403, "step": 7117000 }, { "epoch": 4.27, "learning_rate": 2.616103075909555e-05, "loss": 0.2419, "step": 7117500 }, { "epoch": 4.27, "learning_rate": 2.6158930793534987e-05, "loss": 0.239, "step": 7118000 }, { "epoch": 4.27, "learning_rate": 2.615683082797442e-05, "loss": 0.2348, "step": 7118500 }, { "epoch": 4.27, "learning_rate": 2.6154730862413857e-05, "loss": 0.2411, "step": 7119000 }, { "epoch": 4.27, "learning_rate": 2.615263089685329e-05, "loss": 0.242, "step": 7119500 }, { "epoch": 4.27, "learning_rate": 2.6150530931292727e-05, "loss": 0.2399, "step": 7120000 }, { "epoch": 4.27, "learning_rate": 2.6148430965732164e-05, "loss": 0.2408, "step": 7120500 }, { "epoch": 4.27, "learning_rate": 2.6146335200102718e-05, "loss": 0.2399, "step": 7121000 }, { "epoch": 4.27, "learning_rate": 2.614423523454215e-05, "loss": 0.2459, "step": 7121500 }, { "epoch": 4.27, "learning_rate": 2.6142135268981588e-05, "loss": 0.2405, "step": 7122000 }, { "epoch": 4.27, "learning_rate": 2.6140035303421025e-05, "loss": 0.2442, "step": 7122500 }, { "epoch": 4.27, "learning_rate": 2.6137939537791582e-05, "loss": 0.2376, "step": 7123000 }, { "epoch": 4.27, "learning_rate": 2.6135843772162135e-05, "loss": 0.2438, "step": 7123500 }, { "epoch": 4.27, "learning_rate": 2.6133743806601572e-05, "loss": 0.2389, "step": 7124000 }, { "epoch": 4.27, "learning_rate": 2.6131643841041006e-05, "loss": 0.2418, "step": 7124500 }, { "epoch": 4.27, "learning_rate": 2.6129543875480442e-05, "loss": 0.2438, "step": 7125000 }, { "epoch": 4.27, "learning_rate": 2.6127443909919876e-05, "loss": 0.2374, "step": 7125500 }, { "epoch": 4.27, "learning_rate": 2.6125348144290433e-05, "loss": 0.2413, "step": 7126000 }, { "epoch": 4.27, "learning_rate": 2.612325237866099e-05, "loss": 0.247, "step": 7126500 }, { "epoch": 4.27, "learning_rate": 2.6121152413100427e-05, "loss": 0.2412, "step": 7127000 }, { "epoch": 4.27, "learning_rate": 2.611905244753986e-05, "loss": 0.2398, "step": 7127500 }, { "epoch": 4.27, "learning_rate": 2.6116952481979294e-05, "loss": 0.2463, "step": 7128000 }, { "epoch": 4.27, "learning_rate": 2.611485251641873e-05, "loss": 0.2431, "step": 7128500 }, { "epoch": 4.27, "learning_rate": 2.6112752550858167e-05, "loss": 0.2465, "step": 7129000 }, { "epoch": 4.27, "learning_rate": 2.61106525852976e-05, "loss": 0.2377, "step": 7129500 }, { "epoch": 4.27, "learning_rate": 2.6108552619737038e-05, "loss": 0.2397, "step": 7130000 }, { "epoch": 4.28, "learning_rate": 2.610645265417647e-05, "loss": 0.2352, "step": 7130500 }, { "epoch": 4.28, "learning_rate": 2.6104352688615905e-05, "loss": 0.2401, "step": 7131000 }, { "epoch": 4.28, "learning_rate": 2.610225272305534e-05, "loss": 0.2469, "step": 7131500 }, { "epoch": 4.28, "learning_rate": 2.61001569574259e-05, "loss": 0.2425, "step": 7132000 }, { "epoch": 4.28, "learning_rate": 2.6098056991865335e-05, "loss": 0.2389, "step": 7132500 }, { "epoch": 4.28, "learning_rate": 2.609595702630477e-05, "loss": 0.2416, "step": 7133000 }, { "epoch": 4.28, "learning_rate": 2.6093857060744202e-05, "loss": 0.2475, "step": 7133500 }, { "epoch": 4.28, "learning_rate": 2.609175709518364e-05, "loss": 0.2387, "step": 7134000 }, { "epoch": 4.28, "learning_rate": 2.6089657129623076e-05, "loss": 0.2428, "step": 7134500 }, { "epoch": 4.28, "learning_rate": 2.608755716406251e-05, "loss": 0.245, "step": 7135000 }, { "epoch": 4.28, "learning_rate": 2.6085457198501943e-05, "loss": 0.2394, "step": 7135500 }, { "epoch": 4.28, "learning_rate": 2.60833614328725e-05, "loss": 0.2436, "step": 7136000 }, { "epoch": 4.28, "learning_rate": 2.6081261467311937e-05, "loss": 0.2435, "step": 7136500 }, { "epoch": 4.28, "learning_rate": 2.607916150175137e-05, "loss": 0.2376, "step": 7137000 }, { "epoch": 4.28, "learning_rate": 2.6077061536190807e-05, "loss": 0.2423, "step": 7137500 }, { "epoch": 4.28, "learning_rate": 2.607496577056136e-05, "loss": 0.2389, "step": 7138000 }, { "epoch": 4.28, "learning_rate": 2.6072870004931917e-05, "loss": 0.2457, "step": 7138500 }, { "epoch": 4.28, "learning_rate": 2.6070770039371354e-05, "loss": 0.2456, "step": 7139000 }, { "epoch": 4.28, "learning_rate": 2.606867007381079e-05, "loss": 0.2399, "step": 7139500 }, { "epoch": 4.28, "learning_rate": 2.6066570108250225e-05, "loss": 0.2421, "step": 7140000 }, { "epoch": 4.28, "learning_rate": 2.6064470142689658e-05, "loss": 0.2428, "step": 7140500 }, { "epoch": 4.28, "learning_rate": 2.6062370177129095e-05, "loss": 0.2395, "step": 7141000 }, { "epoch": 4.28, "learning_rate": 2.6060270211568532e-05, "loss": 0.2407, "step": 7141500 }, { "epoch": 4.28, "learning_rate": 2.6058170246007965e-05, "loss": 0.2436, "step": 7142000 }, { "epoch": 4.28, "learning_rate": 2.605607448037852e-05, "loss": 0.247, "step": 7142500 }, { "epoch": 4.28, "learning_rate": 2.605397871474908e-05, "loss": 0.2369, "step": 7143000 }, { "epoch": 4.28, "learning_rate": 2.6051878749188513e-05, "loss": 0.2387, "step": 7143500 }, { "epoch": 4.28, "learning_rate": 2.604977878362795e-05, "loss": 0.2371, "step": 7144000 }, { "epoch": 4.28, "learning_rate": 2.6047678818067383e-05, "loss": 0.2408, "step": 7144500 }, { "epoch": 4.28, "learning_rate": 2.6045578852506816e-05, "loss": 0.24, "step": 7145000 }, { "epoch": 4.28, "learning_rate": 2.6043483086877373e-05, "loss": 0.2428, "step": 7145500 }, { "epoch": 4.28, "learning_rate": 2.604138312131681e-05, "loss": 0.2391, "step": 7146000 }, { "epoch": 4.28, "learning_rate": 2.6039283155756247e-05, "loss": 0.2463, "step": 7146500 }, { "epoch": 4.28, "learning_rate": 2.603718319019568e-05, "loss": 0.2373, "step": 7147000 }, { "epoch": 4.29, "learning_rate": 2.6035083224635114e-05, "loss": 0.2429, "step": 7147500 }, { "epoch": 4.29, "learning_rate": 2.603298325907455e-05, "loss": 0.2448, "step": 7148000 }, { "epoch": 4.29, "learning_rate": 2.6030883293513988e-05, "loss": 0.2436, "step": 7148500 }, { "epoch": 4.29, "learning_rate": 2.6028787527884545e-05, "loss": 0.239, "step": 7149000 }, { "epoch": 4.29, "learning_rate": 2.6026687562323975e-05, "loss": 0.2451, "step": 7149500 }, { "epoch": 4.29, "learning_rate": 2.602458759676341e-05, "loss": 0.2396, "step": 7150000 }, { "epoch": 4.29, "learning_rate": 2.602248763120285e-05, "loss": 0.2431, "step": 7150500 }, { "epoch": 4.29, "learning_rate": 2.6020391865573405e-05, "loss": 0.2416, "step": 7151000 }, { "epoch": 4.29, "learning_rate": 2.6018291900012842e-05, "loss": 0.2429, "step": 7151500 }, { "epoch": 4.29, "learning_rate": 2.6016196134383396e-05, "loss": 0.2432, "step": 7152000 }, { "epoch": 4.29, "learning_rate": 2.601409616882283e-05, "loss": 0.237, "step": 7152500 }, { "epoch": 4.29, "learning_rate": 2.6011996203262266e-05, "loss": 0.2394, "step": 7153000 }, { "epoch": 4.29, "learning_rate": 2.6009896237701703e-05, "loss": 0.2384, "step": 7153500 }, { "epoch": 4.29, "learning_rate": 2.600779627214114e-05, "loss": 0.2483, "step": 7154000 }, { "epoch": 4.29, "learning_rate": 2.600569630658057e-05, "loss": 0.2434, "step": 7154500 }, { "epoch": 4.29, "learning_rate": 2.6003596341020007e-05, "loss": 0.2341, "step": 7155000 }, { "epoch": 4.29, "learning_rate": 2.6001496375459443e-05, "loss": 0.2414, "step": 7155500 }, { "epoch": 4.29, "learning_rate": 2.5999396409898877e-05, "loss": 0.2393, "step": 7156000 }, { "epoch": 4.29, "learning_rate": 2.5997296444338314e-05, "loss": 0.2429, "step": 7156500 }, { "epoch": 4.29, "learning_rate": 2.5995196478777747e-05, "loss": 0.2433, "step": 7157000 }, { "epoch": 4.29, "learning_rate": 2.599309651321718e-05, "loss": 0.2446, "step": 7157500 }, { "epoch": 4.29, "learning_rate": 2.599100074758774e-05, "loss": 0.2378, "step": 7158000 }, { "epoch": 4.29, "learning_rate": 2.5988900782027174e-05, "loss": 0.2395, "step": 7158500 }, { "epoch": 4.29, "learning_rate": 2.598680081646661e-05, "loss": 0.2387, "step": 7159000 }, { "epoch": 4.29, "learning_rate": 2.5984700850906045e-05, "loss": 0.2443, "step": 7159500 }, { "epoch": 4.29, "learning_rate": 2.5982605085276602e-05, "loss": 0.2438, "step": 7160000 }, { "epoch": 4.29, "learning_rate": 2.5980505119716035e-05, "loss": 0.241, "step": 7160500 }, { "epoch": 4.29, "learning_rate": 2.5978405154155472e-05, "loss": 0.2438, "step": 7161000 }, { "epoch": 4.29, "learning_rate": 2.597630518859491e-05, "loss": 0.2391, "step": 7161500 }, { "epoch": 4.29, "learning_rate": 2.5974205223034342e-05, "loss": 0.2436, "step": 7162000 }, { "epoch": 4.29, "learning_rate": 2.59721094574049e-05, "loss": 0.2427, "step": 7162500 }, { "epoch": 4.29, "learning_rate": 2.5970009491844333e-05, "loss": 0.2448, "step": 7163000 }, { "epoch": 4.29, "learning_rate": 2.596791372621489e-05, "loss": 0.2418, "step": 7163500 }, { "epoch": 4.3, "learning_rate": 2.5965813760654323e-05, "loss": 0.2463, "step": 7164000 }, { "epoch": 4.3, "learning_rate": 2.596371379509376e-05, "loss": 0.2481, "step": 7164500 }, { "epoch": 4.3, "learning_rate": 2.5961613829533197e-05, "loss": 0.2366, "step": 7165000 }, { "epoch": 4.3, "learning_rate": 2.595951386397263e-05, "loss": 0.2385, "step": 7165500 }, { "epoch": 4.3, "learning_rate": 2.5957418098343184e-05, "loss": 0.2395, "step": 7166000 }, { "epoch": 4.3, "learning_rate": 2.595531813278262e-05, "loss": 0.2438, "step": 7166500 }, { "epoch": 4.3, "learning_rate": 2.5953218167222058e-05, "loss": 0.2348, "step": 7167000 }, { "epoch": 4.3, "learning_rate": 2.595111820166149e-05, "loss": 0.2421, "step": 7167500 }, { "epoch": 4.3, "learning_rate": 2.5949018236100928e-05, "loss": 0.238, "step": 7168000 }, { "epoch": 4.3, "learning_rate": 2.5946918270540365e-05, "loss": 0.243, "step": 7168500 }, { "epoch": 4.3, "learning_rate": 2.5944818304979798e-05, "loss": 0.2428, "step": 7169000 }, { "epoch": 4.3, "learning_rate": 2.5942722539350355e-05, "loss": 0.2441, "step": 7169500 }, { "epoch": 4.3, "learning_rate": 2.594062257378979e-05, "loss": 0.2398, "step": 7170000 }, { "epoch": 4.3, "learning_rate": 2.5938522608229226e-05, "loss": 0.2374, "step": 7170500 }, { "epoch": 4.3, "learning_rate": 2.5936422642668662e-05, "loss": 0.2411, "step": 7171000 }, { "epoch": 4.3, "learning_rate": 2.5934322677108092e-05, "loss": 0.2377, "step": 7171500 }, { "epoch": 4.3, "learning_rate": 2.593222271154753e-05, "loss": 0.2379, "step": 7172000 }, { "epoch": 4.3, "learning_rate": 2.5930122745986966e-05, "loss": 0.2382, "step": 7172500 }, { "epoch": 4.3, "learning_rate": 2.59280227804264e-05, "loss": 0.2402, "step": 7173000 }, { "epoch": 4.3, "learning_rate": 2.5925927014796957e-05, "loss": 0.2428, "step": 7173500 }, { "epoch": 4.3, "learning_rate": 2.592382704923639e-05, "loss": 0.2413, "step": 7174000 }, { "epoch": 4.3, "learning_rate": 2.5921727083675827e-05, "loss": 0.2462, "step": 7174500 }, { "epoch": 4.3, "learning_rate": 2.5919627118115264e-05, "loss": 0.2375, "step": 7175000 }, { "epoch": 4.3, "learning_rate": 2.591753135248582e-05, "loss": 0.2433, "step": 7175500 }, { "epoch": 4.3, "learning_rate": 2.5915431386925254e-05, "loss": 0.2406, "step": 7176000 }, { "epoch": 4.3, "learning_rate": 2.5913331421364688e-05, "loss": 0.2426, "step": 7176500 }, { "epoch": 4.3, "learning_rate": 2.5911231455804124e-05, "loss": 0.2351, "step": 7177000 }, { "epoch": 4.3, "learning_rate": 2.590913569017468e-05, "loss": 0.2382, "step": 7177500 }, { "epoch": 4.3, "learning_rate": 2.5907035724614118e-05, "loss": 0.2385, "step": 7178000 }, { "epoch": 4.3, "learning_rate": 2.590493575905355e-05, "loss": 0.2365, "step": 7178500 }, { "epoch": 4.3, "learning_rate": 2.5902835793492985e-05, "loss": 0.243, "step": 7179000 }, { "epoch": 4.3, "learning_rate": 2.5900740027863542e-05, "loss": 0.2409, "step": 7179500 }, { "epoch": 4.3, "learning_rate": 2.589864006230298e-05, "loss": 0.2407, "step": 7180000 }, { "epoch": 4.31, "learning_rate": 2.5896540096742416e-05, "loss": 0.2419, "step": 7180500 }, { "epoch": 4.31, "learning_rate": 2.5894440131181846e-05, "loss": 0.2419, "step": 7181000 }, { "epoch": 4.31, "learning_rate": 2.5892344365552403e-05, "loss": 0.2458, "step": 7181500 }, { "epoch": 4.31, "learning_rate": 2.589024439999184e-05, "loss": 0.2356, "step": 7182000 }, { "epoch": 4.31, "learning_rate": 2.5888144434431277e-05, "loss": 0.2412, "step": 7182500 }, { "epoch": 4.31, "learning_rate": 2.588604446887071e-05, "loss": 0.2407, "step": 7183000 }, { "epoch": 4.31, "learning_rate": 2.5883948703241267e-05, "loss": 0.2411, "step": 7183500 }, { "epoch": 4.31, "learning_rate": 2.58818487376807e-05, "loss": 0.2427, "step": 7184000 }, { "epoch": 4.31, "learning_rate": 2.5879752972051257e-05, "loss": 0.2422, "step": 7184500 }, { "epoch": 4.31, "learning_rate": 2.587765300649069e-05, "loss": 0.2391, "step": 7185000 }, { "epoch": 4.31, "learning_rate": 2.5875553040930128e-05, "loss": 0.2363, "step": 7185500 }, { "epoch": 4.31, "learning_rate": 2.5873453075369565e-05, "loss": 0.2447, "step": 7186000 }, { "epoch": 4.31, "learning_rate": 2.5871353109808998e-05, "loss": 0.2401, "step": 7186500 }, { "epoch": 4.31, "learning_rate": 2.5869253144248435e-05, "loss": 0.2441, "step": 7187000 }, { "epoch": 4.31, "learning_rate": 2.5867153178687872e-05, "loss": 0.2445, "step": 7187500 }, { "epoch": 4.31, "learning_rate": 2.5865053213127302e-05, "loss": 0.2455, "step": 7188000 }, { "epoch": 4.31, "learning_rate": 2.586295744749786e-05, "loss": 0.2425, "step": 7188500 }, { "epoch": 4.31, "learning_rate": 2.5860857481937296e-05, "loss": 0.2418, "step": 7189000 }, { "epoch": 4.31, "learning_rate": 2.5858757516376732e-05, "loss": 0.2428, "step": 7189500 }, { "epoch": 4.31, "learning_rate": 2.5856661750747286e-05, "loss": 0.2429, "step": 7190000 }, { "epoch": 4.31, "learning_rate": 2.5854561785186723e-05, "loss": 0.2426, "step": 7190500 }, { "epoch": 4.31, "learning_rate": 2.585246601955728e-05, "loss": 0.246, "step": 7191000 }, { "epoch": 4.31, "learning_rate": 2.5850366053996713e-05, "loss": 0.2434, "step": 7191500 }, { "epoch": 4.31, "learning_rate": 2.584826608843615e-05, "loss": 0.2408, "step": 7192000 }, { "epoch": 4.31, "learning_rate": 2.5846166122875584e-05, "loss": 0.2432, "step": 7192500 }, { "epoch": 4.31, "learning_rate": 2.584406615731502e-05, "loss": 0.2338, "step": 7193000 }, { "epoch": 4.31, "learning_rate": 2.5841966191754454e-05, "loss": 0.2465, "step": 7193500 }, { "epoch": 4.31, "learning_rate": 2.583986622619389e-05, "loss": 0.2402, "step": 7194000 }, { "epoch": 4.31, "learning_rate": 2.5837766260633328e-05, "loss": 0.2448, "step": 7194500 }, { "epoch": 4.31, "learning_rate": 2.5835666295072758e-05, "loss": 0.2403, "step": 7195000 }, { "epoch": 4.31, "learning_rate": 2.5833566329512194e-05, "loss": 0.2394, "step": 7195500 }, { "epoch": 4.31, "learning_rate": 2.583146636395163e-05, "loss": 0.2468, "step": 7196000 }, { "epoch": 4.31, "learning_rate": 2.5829366398391065e-05, "loss": 0.2397, "step": 7196500 }, { "epoch": 4.31, "learning_rate": 2.5827270632761625e-05, "loss": 0.2377, "step": 7197000 }, { "epoch": 4.32, "learning_rate": 2.5825170667201055e-05, "loss": 0.2381, "step": 7197500 }, { "epoch": 4.32, "learning_rate": 2.5823074901571612e-05, "loss": 0.2386, "step": 7198000 }, { "epoch": 4.32, "learning_rate": 2.582097493601105e-05, "loss": 0.2392, "step": 7198500 }, { "epoch": 4.32, "learning_rate": 2.5818874970450486e-05, "loss": 0.2402, "step": 7199000 }, { "epoch": 4.32, "learning_rate": 2.581677500488992e-05, "loss": 0.2444, "step": 7199500 }, { "epoch": 4.32, "learning_rate": 2.5814675039329353e-05, "loss": 0.2388, "step": 7200000 }, { "epoch": 4.32, "eval_loss": 0.22556926310062408, "eval_runtime": 1456.6725, "eval_samples_per_second": 361.591, "eval_steps_per_second": 60.265, "step": 7200000 }, { "epoch": 4.32, "learning_rate": 2.581257507376879e-05, "loss": 0.2364, "step": 7200500 }, { "epoch": 4.32, "learning_rate": 2.5810475108208226e-05, "loss": 0.2369, "step": 7201000 }, { "epoch": 4.32, "learning_rate": 2.580837514264766e-05, "loss": 0.2397, "step": 7201500 }, { "epoch": 4.32, "learning_rate": 2.5806279377018217e-05, "loss": 0.2372, "step": 7202000 }, { "epoch": 4.32, "learning_rate": 2.580417941145765e-05, "loss": 0.2461, "step": 7202500 }, { "epoch": 4.32, "learning_rate": 2.5802079445897087e-05, "loss": 0.2427, "step": 7203000 }, { "epoch": 4.32, "learning_rate": 2.579997948033652e-05, "loss": 0.2425, "step": 7203500 }, { "epoch": 4.32, "learning_rate": 2.579788371470708e-05, "loss": 0.2377, "step": 7204000 }, { "epoch": 4.32, "learning_rate": 2.579578374914651e-05, "loss": 0.2342, "step": 7204500 }, { "epoch": 4.32, "learning_rate": 2.5793683783585948e-05, "loss": 0.2404, "step": 7205000 }, { "epoch": 4.32, "learning_rate": 2.5791583818025385e-05, "loss": 0.2489, "step": 7205500 }, { "epoch": 4.32, "learning_rate": 2.5789483852464818e-05, "loss": 0.2373, "step": 7206000 }, { "epoch": 4.32, "learning_rate": 2.5787388086835375e-05, "loss": 0.2313, "step": 7206500 }, { "epoch": 4.32, "learning_rate": 2.578528812127481e-05, "loss": 0.2401, "step": 7207000 }, { "epoch": 4.32, "learning_rate": 2.5783188155714246e-05, "loss": 0.2423, "step": 7207500 }, { "epoch": 4.32, "learning_rate": 2.5781088190153682e-05, "loss": 0.2432, "step": 7208000 }, { "epoch": 4.32, "learning_rate": 2.577899242452424e-05, "loss": 0.2426, "step": 7208500 }, { "epoch": 4.32, "learning_rate": 2.5776896658894793e-05, "loss": 0.2387, "step": 7209000 }, { "epoch": 4.32, "learning_rate": 2.577479669333423e-05, "loss": 0.2428, "step": 7209500 }, { "epoch": 4.32, "learning_rate": 2.5772696727773663e-05, "loss": 0.2377, "step": 7210000 }, { "epoch": 4.32, "learning_rate": 2.57705967622131e-05, "loss": 0.2378, "step": 7210500 }, { "epoch": 4.32, "learning_rate": 2.5768496796652537e-05, "loss": 0.2464, "step": 7211000 }, { "epoch": 4.32, "learning_rate": 2.576639683109197e-05, "loss": 0.2427, "step": 7211500 }, { "epoch": 4.32, "learning_rate": 2.5764296865531404e-05, "loss": 0.2414, "step": 7212000 }, { "epoch": 4.32, "learning_rate": 2.576219689997084e-05, "loss": 0.2404, "step": 7212500 }, { "epoch": 4.32, "learning_rate": 2.5760101134341398e-05, "loss": 0.2418, "step": 7213000 }, { "epoch": 4.32, "learning_rate": 2.575800116878083e-05, "loss": 0.2414, "step": 7213500 }, { "epoch": 4.33, "learning_rate": 2.5755901203220265e-05, "loss": 0.2427, "step": 7214000 }, { "epoch": 4.33, "learning_rate": 2.57538012376597e-05, "loss": 0.2421, "step": 7214500 }, { "epoch": 4.33, "learning_rate": 2.575170547203026e-05, "loss": 0.2395, "step": 7215000 }, { "epoch": 4.33, "learning_rate": 2.5749605506469695e-05, "loss": 0.2417, "step": 7215500 }, { "epoch": 4.33, "learning_rate": 2.574750974084025e-05, "loss": 0.2429, "step": 7216000 }, { "epoch": 4.33, "learning_rate": 2.5745409775279686e-05, "loss": 0.2367, "step": 7216500 }, { "epoch": 4.33, "learning_rate": 2.574330980971912e-05, "loss": 0.2414, "step": 7217000 }, { "epoch": 4.33, "learning_rate": 2.5741209844158556e-05, "loss": 0.243, "step": 7217500 }, { "epoch": 4.33, "learning_rate": 2.5739109878597993e-05, "loss": 0.2397, "step": 7218000 }, { "epoch": 4.33, "learning_rate": 2.5737009913037426e-05, "loss": 0.2415, "step": 7218500 }, { "epoch": 4.33, "learning_rate": 2.573490994747686e-05, "loss": 0.2439, "step": 7219000 }, { "epoch": 4.33, "learning_rate": 2.5732809981916297e-05, "loss": 0.2385, "step": 7219500 }, { "epoch": 4.33, "learning_rate": 2.5730714216286854e-05, "loss": 0.2312, "step": 7220000 }, { "epoch": 4.33, "learning_rate": 2.5728614250726287e-05, "loss": 0.2431, "step": 7220500 }, { "epoch": 4.33, "learning_rate": 2.5726514285165724e-05, "loss": 0.2394, "step": 7221000 }, { "epoch": 4.33, "learning_rate": 2.5724414319605157e-05, "loss": 0.2366, "step": 7221500 }, { "epoch": 4.33, "learning_rate": 2.5722318553975714e-05, "loss": 0.2425, "step": 7222000 }, { "epoch": 4.33, "learning_rate": 2.572021858841515e-05, "loss": 0.2371, "step": 7222500 }, { "epoch": 4.33, "learning_rate": 2.5718118622854585e-05, "loss": 0.2423, "step": 7223000 }, { "epoch": 4.33, "learning_rate": 2.571601865729402e-05, "loss": 0.2405, "step": 7223500 }, { "epoch": 4.33, "learning_rate": 2.5713922891664575e-05, "loss": 0.236, "step": 7224000 }, { "epoch": 4.33, "learning_rate": 2.5711822926104012e-05, "loss": 0.2384, "step": 7224500 }, { "epoch": 4.33, "learning_rate": 2.570972296054345e-05, "loss": 0.2457, "step": 7225000 }, { "epoch": 4.33, "learning_rate": 2.5707622994982882e-05, "loss": 0.2388, "step": 7225500 }, { "epoch": 4.33, "learning_rate": 2.5705527229353436e-05, "loss": 0.2434, "step": 7226000 }, { "epoch": 4.33, "learning_rate": 2.5703427263792873e-05, "loss": 0.2413, "step": 7226500 }, { "epoch": 4.33, "learning_rate": 2.570132729823231e-05, "loss": 0.2493, "step": 7227000 }, { "epoch": 4.33, "learning_rate": 2.5699227332671743e-05, "loss": 0.2377, "step": 7227500 }, { "epoch": 4.33, "learning_rate": 2.56971315670423e-05, "loss": 0.2442, "step": 7228000 }, { "epoch": 4.33, "learning_rate": 2.5695031601481733e-05, "loss": 0.2462, "step": 7228500 }, { "epoch": 4.33, "learning_rate": 2.569293163592117e-05, "loss": 0.2373, "step": 7229000 }, { "epoch": 4.33, "learning_rate": 2.5690831670360607e-05, "loss": 0.2408, "step": 7229500 }, { "epoch": 4.33, "learning_rate": 2.568874010466228e-05, "loss": 0.2456, "step": 7230000 }, { "epoch": 4.33, "learning_rate": 2.5686640139101718e-05, "loss": 0.2398, "step": 7230500 }, { "epoch": 4.34, "learning_rate": 2.5684540173541154e-05, "loss": 0.2464, "step": 7231000 }, { "epoch": 4.34, "learning_rate": 2.5682440207980588e-05, "loss": 0.2422, "step": 7231500 }, { "epoch": 4.34, "learning_rate": 2.5680340242420025e-05, "loss": 0.2373, "step": 7232000 }, { "epoch": 4.34, "learning_rate": 2.567824027685946e-05, "loss": 0.2406, "step": 7232500 }, { "epoch": 4.34, "learning_rate": 2.567614031129889e-05, "loss": 0.2392, "step": 7233000 }, { "epoch": 4.34, "learning_rate": 2.567404034573833e-05, "loss": 0.2453, "step": 7233500 }, { "epoch": 4.34, "learning_rate": 2.5671944580108885e-05, "loss": 0.2337, "step": 7234000 }, { "epoch": 4.34, "learning_rate": 2.5669844614548322e-05, "loss": 0.2366, "step": 7234500 }, { "epoch": 4.34, "learning_rate": 2.566774464898776e-05, "loss": 0.2414, "step": 7235000 }, { "epoch": 4.34, "learning_rate": 2.566564468342719e-05, "loss": 0.2405, "step": 7235500 }, { "epoch": 4.34, "learning_rate": 2.5663548917797746e-05, "loss": 0.2418, "step": 7236000 }, { "epoch": 4.34, "learning_rate": 2.5661448952237183e-05, "loss": 0.2392, "step": 7236500 }, { "epoch": 4.34, "learning_rate": 2.565934898667662e-05, "loss": 0.2349, "step": 7237000 }, { "epoch": 4.34, "learning_rate": 2.5657249021116053e-05, "loss": 0.2396, "step": 7237500 }, { "epoch": 4.34, "learning_rate": 2.5655149055555487e-05, "loss": 0.2396, "step": 7238000 }, { "epoch": 4.34, "learning_rate": 2.5653053289926044e-05, "loss": 0.2452, "step": 7238500 }, { "epoch": 4.34, "learning_rate": 2.565095332436548e-05, "loss": 0.2368, "step": 7239000 }, { "epoch": 4.34, "learning_rate": 2.5648853358804917e-05, "loss": 0.2326, "step": 7239500 }, { "epoch": 4.34, "learning_rate": 2.5646753393244347e-05, "loss": 0.2464, "step": 7240000 }, { "epoch": 4.34, "learning_rate": 2.5644657627614908e-05, "loss": 0.2359, "step": 7240500 }, { "epoch": 4.34, "learning_rate": 2.564255766205434e-05, "loss": 0.2368, "step": 7241000 }, { "epoch": 4.34, "learning_rate": 2.5640457696493778e-05, "loss": 0.2426, "step": 7241500 }, { "epoch": 4.34, "learning_rate": 2.5638357730933215e-05, "loss": 0.246, "step": 7242000 }, { "epoch": 4.34, "learning_rate": 2.563626616523489e-05, "loss": 0.2438, "step": 7242500 }, { "epoch": 4.34, "learning_rate": 2.5634166199674326e-05, "loss": 0.2399, "step": 7243000 }, { "epoch": 4.34, "learning_rate": 2.5632066234113762e-05, "loss": 0.2419, "step": 7243500 }, { "epoch": 4.34, "learning_rate": 2.5629970468484316e-05, "loss": 0.2426, "step": 7244000 }, { "epoch": 4.34, "learning_rate": 2.562787050292375e-05, "loss": 0.2411, "step": 7244500 }, { "epoch": 4.34, "learning_rate": 2.5625770537363186e-05, "loss": 0.237, "step": 7245000 }, { "epoch": 4.34, "learning_rate": 2.5623670571802623e-05, "loss": 0.2348, "step": 7245500 }, { "epoch": 4.34, "learning_rate": 2.5621570606242057e-05, "loss": 0.2405, "step": 7246000 }, { "epoch": 4.34, "learning_rate": 2.5619470640681493e-05, "loss": 0.2386, "step": 7246500 }, { "epoch": 4.34, "learning_rate": 2.5617370675120927e-05, "loss": 0.2422, "step": 7247000 }, { "epoch": 4.35, "learning_rate": 2.5615270709560364e-05, "loss": 0.2398, "step": 7247500 }, { "epoch": 4.35, "learning_rate": 2.5613170743999797e-05, "loss": 0.2415, "step": 7248000 }, { "epoch": 4.35, "learning_rate": 2.5611070778439234e-05, "loss": 0.2369, "step": 7248500 }, { "epoch": 4.35, "learning_rate": 2.560897081287867e-05, "loss": 0.2388, "step": 7249000 }, { "epoch": 4.35, "learning_rate": 2.56068708473181e-05, "loss": 0.2431, "step": 7249500 }, { "epoch": 4.35, "learning_rate": 2.5604775081688658e-05, "loss": 0.2394, "step": 7250000 }, { "epoch": 4.35, "learning_rate": 2.5602675116128095e-05, "loss": 0.2378, "step": 7250500 }, { "epoch": 4.35, "learning_rate": 2.560057515056753e-05, "loss": 0.2403, "step": 7251000 }, { "epoch": 4.35, "learning_rate": 2.559847518500697e-05, "loss": 0.2393, "step": 7251500 }, { "epoch": 4.35, "learning_rate": 2.55963752194464e-05, "loss": 0.2407, "step": 7252000 }, { "epoch": 4.35, "learning_rate": 2.5594275253885835e-05, "loss": 0.2396, "step": 7252500 }, { "epoch": 4.35, "learning_rate": 2.5592179488256392e-05, "loss": 0.2469, "step": 7253000 }, { "epoch": 4.35, "learning_rate": 2.559007952269583e-05, "loss": 0.2431, "step": 7253500 }, { "epoch": 4.35, "learning_rate": 2.5587979557135263e-05, "loss": 0.2421, "step": 7254000 }, { "epoch": 4.35, "learning_rate": 2.5585879591574696e-05, "loss": 0.2443, "step": 7254500 }, { "epoch": 4.35, "learning_rate": 2.5583779626014133e-05, "loss": 0.242, "step": 7255000 }, { "epoch": 4.35, "learning_rate": 2.558167966045357e-05, "loss": 0.2431, "step": 7255500 }, { "epoch": 4.35, "learning_rate": 2.5579579694893003e-05, "loss": 0.2376, "step": 7256000 }, { "epoch": 4.35, "learning_rate": 2.557747972933244e-05, "loss": 0.2378, "step": 7256500 }, { "epoch": 4.35, "learning_rate": 2.5575388163634114e-05, "loss": 0.2358, "step": 7257000 }, { "epoch": 4.35, "learning_rate": 2.557328819807355e-05, "loss": 0.2432, "step": 7257500 }, { "epoch": 4.35, "learning_rate": 2.5571188232512987e-05, "loss": 0.2429, "step": 7258000 }, { "epoch": 4.35, "learning_rate": 2.5569088266952424e-05, "loss": 0.2398, "step": 7258500 }, { "epoch": 4.35, "learning_rate": 2.5566988301391854e-05, "loss": 0.2422, "step": 7259000 }, { "epoch": 4.35, "learning_rate": 2.556488833583129e-05, "loss": 0.2421, "step": 7259500 }, { "epoch": 4.35, "learning_rate": 2.5562788370270728e-05, "loss": 0.2403, "step": 7260000 }, { "epoch": 4.35, "learning_rate": 2.556068840471016e-05, "loss": 0.2368, "step": 7260500 }, { "epoch": 4.35, "learning_rate": 2.555859263908072e-05, "loss": 0.2399, "step": 7261000 }, { "epoch": 4.35, "learning_rate": 2.5556492673520152e-05, "loss": 0.2412, "step": 7261500 }, { "epoch": 4.35, "learning_rate": 2.555439270795959e-05, "loss": 0.2456, "step": 7262000 }, { "epoch": 4.35, "learning_rate": 2.5552292742399026e-05, "loss": 0.2413, "step": 7262500 }, { "epoch": 4.35, "learning_rate": 2.5550196976769583e-05, "loss": 0.2464, "step": 7263000 }, { "epoch": 4.35, "learning_rate": 2.5548097011209016e-05, "loss": 0.2391, "step": 7263500 }, { "epoch": 4.36, "learning_rate": 2.554599704564845e-05, "loss": 0.2398, "step": 7264000 }, { "epoch": 4.36, "learning_rate": 2.5543897080087886e-05, "loss": 0.2386, "step": 7264500 }, { "epoch": 4.36, "learning_rate": 2.5541801314458443e-05, "loss": 0.2375, "step": 7265000 }, { "epoch": 4.36, "learning_rate": 2.553970134889788e-05, "loss": 0.243, "step": 7265500 }, { "epoch": 4.36, "learning_rate": 2.5537601383337314e-05, "loss": 0.244, "step": 7266000 }, { "epoch": 4.36, "learning_rate": 2.5535501417776747e-05, "loss": 0.2391, "step": 7266500 }, { "epoch": 4.36, "learning_rate": 2.5533405652147304e-05, "loss": 0.2508, "step": 7267000 }, { "epoch": 4.36, "learning_rate": 2.553130568658674e-05, "loss": 0.2427, "step": 7267500 }, { "epoch": 4.36, "learning_rate": 2.5529205721026174e-05, "loss": 0.2401, "step": 7268000 }, { "epoch": 4.36, "learning_rate": 2.5527105755465608e-05, "loss": 0.2385, "step": 7268500 }, { "epoch": 4.36, "learning_rate": 2.5525009989836165e-05, "loss": 0.2391, "step": 7269000 }, { "epoch": 4.36, "learning_rate": 2.55229100242756e-05, "loss": 0.2397, "step": 7269500 }, { "epoch": 4.36, "learning_rate": 2.552081005871504e-05, "loss": 0.2477, "step": 7270000 }, { "epoch": 4.36, "learning_rate": 2.5518710093154472e-05, "loss": 0.2424, "step": 7270500 }, { "epoch": 4.36, "learning_rate": 2.551661432752503e-05, "loss": 0.2406, "step": 7271000 }, { "epoch": 4.36, "learning_rate": 2.5514514361964462e-05, "loss": 0.2387, "step": 7271500 }, { "epoch": 4.36, "learning_rate": 2.55124143964039e-05, "loss": 0.2462, "step": 7272000 }, { "epoch": 4.36, "learning_rate": 2.5510314430843336e-05, "loss": 0.2456, "step": 7272500 }, { "epoch": 4.36, "learning_rate": 2.550821446528277e-05, "loss": 0.2444, "step": 7273000 }, { "epoch": 4.36, "learning_rate": 2.5506118699653323e-05, "loss": 0.2384, "step": 7273500 }, { "epoch": 4.36, "learning_rate": 2.550401873409276e-05, "loss": 0.236, "step": 7274000 }, { "epoch": 4.36, "learning_rate": 2.5501918768532197e-05, "loss": 0.2406, "step": 7274500 }, { "epoch": 4.36, "learning_rate": 2.549981880297163e-05, "loss": 0.2375, "step": 7275000 }, { "epoch": 4.36, "learning_rate": 2.5497718837411067e-05, "loss": 0.2416, "step": 7275500 }, { "epoch": 4.36, "learning_rate": 2.549562307178162e-05, "loss": 0.2375, "step": 7276000 }, { "epoch": 4.36, "learning_rate": 2.5493523106221058e-05, "loss": 0.2508, "step": 7276500 }, { "epoch": 4.36, "learning_rate": 2.5491423140660494e-05, "loss": 0.2399, "step": 7277000 }, { "epoch": 4.36, "learning_rate": 2.5489323175099928e-05, "loss": 0.239, "step": 7277500 }, { "epoch": 4.36, "learning_rate": 2.548722320953936e-05, "loss": 0.2409, "step": 7278000 }, { "epoch": 4.36, "learning_rate": 2.5485123243978798e-05, "loss": 0.2452, "step": 7278500 }, { "epoch": 4.36, "learning_rate": 2.548302327841823e-05, "loss": 0.2362, "step": 7279000 }, { "epoch": 4.36, "learning_rate": 2.548092331285767e-05, "loss": 0.2407, "step": 7279500 }, { "epoch": 4.36, "learning_rate": 2.5478827547228225e-05, "loss": 0.2437, "step": 7280000 }, { "epoch": 4.36, "learning_rate": 2.547672758166766e-05, "loss": 0.2378, "step": 7280500 }, { "epoch": 4.37, "learning_rate": 2.5474631816038216e-05, "loss": 0.2384, "step": 7281000 }, { "epoch": 4.37, "learning_rate": 2.5472531850477653e-05, "loss": 0.236, "step": 7281500 }, { "epoch": 4.37, "learning_rate": 2.5470431884917086e-05, "loss": 0.2432, "step": 7282000 }, { "epoch": 4.37, "learning_rate": 2.5468331919356523e-05, "loss": 0.2398, "step": 7282500 }, { "epoch": 4.37, "learning_rate": 2.5466231953795956e-05, "loss": 0.2409, "step": 7283000 }, { "epoch": 4.37, "learning_rate": 2.5464136188166513e-05, "loss": 0.254, "step": 7283500 }, { "epoch": 4.37, "learning_rate": 2.546203622260595e-05, "loss": 0.2387, "step": 7284000 }, { "epoch": 4.37, "learning_rate": 2.5459936257045384e-05, "loss": 0.2426, "step": 7284500 }, { "epoch": 4.37, "learning_rate": 2.545783629148482e-05, "loss": 0.2423, "step": 7285000 }, { "epoch": 4.37, "learning_rate": 2.5455736325924254e-05, "loss": 0.2442, "step": 7285500 }, { "epoch": 4.37, "learning_rate": 2.5453636360363687e-05, "loss": 0.2395, "step": 7286000 }, { "epoch": 4.37, "learning_rate": 2.5451536394803124e-05, "loss": 0.2408, "step": 7286500 }, { "epoch": 4.37, "learning_rate": 2.544943642924256e-05, "loss": 0.2381, "step": 7287000 }, { "epoch": 4.37, "learning_rate": 2.5447340663613115e-05, "loss": 0.2393, "step": 7287500 }, { "epoch": 4.37, "learning_rate": 2.544524069805255e-05, "loss": 0.2425, "step": 7288000 }, { "epoch": 4.37, "learning_rate": 2.5443140732491985e-05, "loss": 0.2384, "step": 7288500 }, { "epoch": 4.37, "learning_rate": 2.5441040766931422e-05, "loss": 0.2425, "step": 7289000 }, { "epoch": 4.37, "learning_rate": 2.543894500130198e-05, "loss": 0.2374, "step": 7289500 }, { "epoch": 4.37, "learning_rate": 2.5436845035741412e-05, "loss": 0.2411, "step": 7290000 }, { "epoch": 4.37, "learning_rate": 2.543474507018085e-05, "loss": 0.2389, "step": 7290500 }, { "epoch": 4.37, "learning_rate": 2.5432645104620283e-05, "loss": 0.2385, "step": 7291000 }, { "epoch": 4.37, "learning_rate": 2.543054933899084e-05, "loss": 0.2395, "step": 7291500 }, { "epoch": 4.37, "learning_rate": 2.5428453573361397e-05, "loss": 0.2425, "step": 7292000 }, { "epoch": 4.37, "learning_rate": 2.542635360780083e-05, "loss": 0.2425, "step": 7292500 }, { "epoch": 4.37, "learning_rate": 2.5424253642240267e-05, "loss": 0.2399, "step": 7293000 }, { "epoch": 4.37, "learning_rate": 2.5422153676679704e-05, "loss": 0.2407, "step": 7293500 }, { "epoch": 4.37, "learning_rate": 2.5420053711119137e-05, "loss": 0.246, "step": 7294000 }, { "epoch": 4.37, "learning_rate": 2.5417953745558574e-05, "loss": 0.2384, "step": 7294500 }, { "epoch": 4.37, "learning_rate": 2.5415853779998007e-05, "loss": 0.2369, "step": 7295000 }, { "epoch": 4.37, "learning_rate": 2.541375381443744e-05, "loss": 0.2431, "step": 7295500 }, { "epoch": 4.37, "learning_rate": 2.5411658048807998e-05, "loss": 0.2393, "step": 7296000 }, { "epoch": 4.37, "learning_rate": 2.5409558083247435e-05, "loss": 0.2395, "step": 7296500 }, { "epoch": 4.37, "learning_rate": 2.540745811768687e-05, "loss": 0.2412, "step": 7297000 }, { "epoch": 4.38, "learning_rate": 2.5405358152126305e-05, "loss": 0.2384, "step": 7297500 }, { "epoch": 4.38, "learning_rate": 2.5403262386496862e-05, "loss": 0.2382, "step": 7298000 }, { "epoch": 4.38, "learning_rate": 2.5401162420936295e-05, "loss": 0.2438, "step": 7298500 }, { "epoch": 4.38, "learning_rate": 2.5399062455375732e-05, "loss": 0.2342, "step": 7299000 }, { "epoch": 4.38, "learning_rate": 2.5396962489815166e-05, "loss": 0.2389, "step": 7299500 }, { "epoch": 4.38, "learning_rate": 2.5394866724185723e-05, "loss": 0.2411, "step": 7300000 }, { "epoch": 4.38, "eval_loss": 0.225030317902565, "eval_runtime": 1454.9683, "eval_samples_per_second": 362.015, "eval_steps_per_second": 60.336, "step": 7300000 }, { "epoch": 4.38, "learning_rate": 2.539276675862516e-05, "loss": 0.2388, "step": 7300500 }, { "epoch": 4.38, "learning_rate": 2.5390666793064593e-05, "loss": 0.2454, "step": 7301000 }, { "epoch": 4.38, "learning_rate": 2.538856682750403e-05, "loss": 0.2374, "step": 7301500 }, { "epoch": 4.38, "learning_rate": 2.5386471061874583e-05, "loss": 0.2413, "step": 7302000 }, { "epoch": 4.38, "learning_rate": 2.538437109631402e-05, "loss": 0.2419, "step": 7302500 }, { "epoch": 4.38, "learning_rate": 2.5382271130753457e-05, "loss": 0.2448, "step": 7303000 }, { "epoch": 4.38, "learning_rate": 2.538017116519289e-05, "loss": 0.2424, "step": 7303500 }, { "epoch": 4.38, "learning_rate": 2.5378075399563444e-05, "loss": 0.2439, "step": 7304000 }, { "epoch": 4.38, "learning_rate": 2.537597543400288e-05, "loss": 0.2456, "step": 7304500 }, { "epoch": 4.38, "learning_rate": 2.5373875468442318e-05, "loss": 0.2411, "step": 7305000 }, { "epoch": 4.38, "learning_rate": 2.537177550288175e-05, "loss": 0.2349, "step": 7305500 }, { "epoch": 4.38, "learning_rate": 2.536967973725231e-05, "loss": 0.2434, "step": 7306000 }, { "epoch": 4.38, "learning_rate": 2.5367579771691742e-05, "loss": 0.2406, "step": 7306500 }, { "epoch": 4.38, "learning_rate": 2.536547980613118e-05, "loss": 0.2413, "step": 7307000 }, { "epoch": 4.38, "learning_rate": 2.5363379840570615e-05, "loss": 0.2511, "step": 7307500 }, { "epoch": 4.38, "learning_rate": 2.5361284074941172e-05, "loss": 0.2459, "step": 7308000 }, { "epoch": 4.38, "learning_rate": 2.5359184109380606e-05, "loss": 0.2462, "step": 7308500 }, { "epoch": 4.38, "learning_rate": 2.535708414382004e-05, "loss": 0.2461, "step": 7309000 }, { "epoch": 4.38, "learning_rate": 2.5354984178259476e-05, "loss": 0.2444, "step": 7309500 }, { "epoch": 4.38, "learning_rate": 2.5352888412630033e-05, "loss": 0.2394, "step": 7310000 }, { "epoch": 4.38, "learning_rate": 2.535078844706947e-05, "loss": 0.2449, "step": 7310500 }, { "epoch": 4.38, "learning_rate": 2.53486884815089e-05, "loss": 0.2435, "step": 7311000 }, { "epoch": 4.38, "learning_rate": 2.5346588515948337e-05, "loss": 0.2408, "step": 7311500 }, { "epoch": 4.38, "learning_rate": 2.5344492750318894e-05, "loss": 0.2413, "step": 7312000 }, { "epoch": 4.38, "learning_rate": 2.534239278475833e-05, "loss": 0.2375, "step": 7312500 }, { "epoch": 4.38, "learning_rate": 2.5340292819197768e-05, "loss": 0.2429, "step": 7313000 }, { "epoch": 4.38, "learning_rate": 2.5338192853637198e-05, "loss": 0.2382, "step": 7313500 }, { "epoch": 4.39, "learning_rate": 2.5336097088007755e-05, "loss": 0.2415, "step": 7314000 }, { "epoch": 4.39, "learning_rate": 2.533399712244719e-05, "loss": 0.2309, "step": 7314500 }, { "epoch": 4.39, "learning_rate": 2.5331897156886628e-05, "loss": 0.2382, "step": 7315000 }, { "epoch": 4.39, "learning_rate": 2.5329797191326062e-05, "loss": 0.2338, "step": 7315500 }, { "epoch": 4.39, "learning_rate": 2.532770142569662e-05, "loss": 0.2372, "step": 7316000 }, { "epoch": 4.39, "learning_rate": 2.5325601460136052e-05, "loss": 0.2365, "step": 7316500 }, { "epoch": 4.39, "learning_rate": 2.532350569450661e-05, "loss": 0.2385, "step": 7317000 }, { "epoch": 4.39, "learning_rate": 2.5321405728946046e-05, "loss": 0.2358, "step": 7317500 }, { "epoch": 4.39, "learning_rate": 2.531930576338548e-05, "loss": 0.2433, "step": 7318000 }, { "epoch": 4.39, "learning_rate": 2.5317205797824916e-05, "loss": 0.2371, "step": 7318500 }, { "epoch": 4.39, "learning_rate": 2.531510583226435e-05, "loss": 0.2424, "step": 7319000 }, { "epoch": 4.39, "learning_rate": 2.5313010066634907e-05, "loss": 0.2424, "step": 7319500 }, { "epoch": 4.39, "learning_rate": 2.5310910101074344e-05, "loss": 0.2418, "step": 7320000 }, { "epoch": 4.39, "learning_rate": 2.5308810135513777e-05, "loss": 0.2424, "step": 7320500 }, { "epoch": 4.39, "learning_rate": 2.530671016995321e-05, "loss": 0.2423, "step": 7321000 }, { "epoch": 4.39, "learning_rate": 2.5304610204392647e-05, "loss": 0.2431, "step": 7321500 }, { "epoch": 4.39, "learning_rate": 2.5302510238832084e-05, "loss": 0.2385, "step": 7322000 }, { "epoch": 4.39, "learning_rate": 2.5300410273271518e-05, "loss": 0.2414, "step": 7322500 }, { "epoch": 4.39, "learning_rate": 2.5298314507642075e-05, "loss": 0.2476, "step": 7323000 }, { "epoch": 4.39, "learning_rate": 2.5296214542081508e-05, "loss": 0.2403, "step": 7323500 }, { "epoch": 4.39, "learning_rate": 2.5294114576520945e-05, "loss": 0.244, "step": 7324000 }, { "epoch": 4.39, "learning_rate": 2.5292014610960382e-05, "loss": 0.2362, "step": 7324500 }, { "epoch": 4.39, "learning_rate": 2.5289914645399815e-05, "loss": 0.2371, "step": 7325000 }, { "epoch": 4.39, "learning_rate": 2.528781467983925e-05, "loss": 0.2425, "step": 7325500 }, { "epoch": 4.39, "learning_rate": 2.5285714714278686e-05, "loss": 0.2383, "step": 7326000 }, { "epoch": 4.39, "learning_rate": 2.528361474871812e-05, "loss": 0.2398, "step": 7326500 }, { "epoch": 4.39, "learning_rate": 2.528151898308868e-05, "loss": 0.239, "step": 7327000 }, { "epoch": 4.39, "learning_rate": 2.5279419017528113e-05, "loss": 0.2388, "step": 7327500 }, { "epoch": 4.39, "learning_rate": 2.5277319051967546e-05, "loss": 0.2406, "step": 7328000 }, { "epoch": 4.39, "learning_rate": 2.5275219086406983e-05, "loss": 0.2436, "step": 7328500 }, { "epoch": 4.39, "learning_rate": 2.527312332077754e-05, "loss": 0.2362, "step": 7329000 }, { "epoch": 4.39, "learning_rate": 2.5271023355216974e-05, "loss": 0.2378, "step": 7329500 }, { "epoch": 4.39, "learning_rate": 2.526892338965641e-05, "loss": 0.2441, "step": 7330000 }, { "epoch": 4.39, "learning_rate": 2.5266823424095844e-05, "loss": 0.2366, "step": 7330500 }, { "epoch": 4.4, "learning_rate": 2.52647276584664e-05, "loss": 0.239, "step": 7331000 }, { "epoch": 4.4, "learning_rate": 2.5262627692905838e-05, "loss": 0.2406, "step": 7331500 }, { "epoch": 4.4, "learning_rate": 2.526052772734527e-05, "loss": 0.2406, "step": 7332000 }, { "epoch": 4.4, "learning_rate": 2.5258431961715828e-05, "loss": 0.2386, "step": 7332500 }, { "epoch": 4.4, "learning_rate": 2.525633199615526e-05, "loss": 0.2396, "step": 7333000 }, { "epoch": 4.4, "learning_rate": 2.52542320305947e-05, "loss": 0.2378, "step": 7333500 }, { "epoch": 4.4, "learning_rate": 2.5252132065034135e-05, "loss": 0.2413, "step": 7334000 }, { "epoch": 4.4, "learning_rate": 2.525003209947357e-05, "loss": 0.2466, "step": 7334500 }, { "epoch": 4.4, "learning_rate": 2.5247932133913002e-05, "loss": 0.2421, "step": 7335000 }, { "epoch": 4.4, "learning_rate": 2.524583216835244e-05, "loss": 0.2431, "step": 7335500 }, { "epoch": 4.4, "learning_rate": 2.5243732202791872e-05, "loss": 0.2444, "step": 7336000 }, { "epoch": 4.4, "learning_rate": 2.524163643716243e-05, "loss": 0.2426, "step": 7336500 }, { "epoch": 4.4, "learning_rate": 2.5239536471601866e-05, "loss": 0.239, "step": 7337000 }, { "epoch": 4.4, "learning_rate": 2.52374365060413e-05, "loss": 0.2449, "step": 7337500 }, { "epoch": 4.4, "learning_rate": 2.5235336540480737e-05, "loss": 0.2393, "step": 7338000 }, { "epoch": 4.4, "learning_rate": 2.5233240774851293e-05, "loss": 0.2389, "step": 7338500 }, { "epoch": 4.4, "learning_rate": 2.5231140809290727e-05, "loss": 0.2403, "step": 7339000 }, { "epoch": 4.4, "learning_rate": 2.5229040843730164e-05, "loss": 0.239, "step": 7339500 }, { "epoch": 4.4, "learning_rate": 2.5226940878169597e-05, "loss": 0.2421, "step": 7340000 }, { "epoch": 4.4, "learning_rate": 2.5224845112540154e-05, "loss": 0.2413, "step": 7340500 }, { "epoch": 4.4, "learning_rate": 2.522274514697959e-05, "loss": 0.2398, "step": 7341000 }, { "epoch": 4.4, "learning_rate": 2.5220645181419025e-05, "loss": 0.2431, "step": 7341500 }, { "epoch": 4.4, "learning_rate": 2.5218545215858458e-05, "loss": 0.2392, "step": 7342000 }, { "epoch": 4.4, "learning_rate": 2.5216445250297895e-05, "loss": 0.2427, "step": 7342500 }, { "epoch": 4.4, "learning_rate": 2.5214345284737328e-05, "loss": 0.2443, "step": 7343000 }, { "epoch": 4.4, "learning_rate": 2.5212245319176765e-05, "loss": 0.2359, "step": 7343500 }, { "epoch": 4.4, "learning_rate": 2.5210145353616202e-05, "loss": 0.2397, "step": 7344000 }, { "epoch": 4.4, "learning_rate": 2.5208049587986756e-05, "loss": 0.2455, "step": 7344500 }, { "epoch": 4.4, "learning_rate": 2.5205953822357313e-05, "loss": 0.2411, "step": 7345000 }, { "epoch": 4.4, "learning_rate": 2.520385385679675e-05, "loss": 0.24, "step": 7345500 }, { "epoch": 4.4, "learning_rate": 2.5201753891236183e-05, "loss": 0.2405, "step": 7346000 }, { "epoch": 4.4, "learning_rate": 2.519965392567562e-05, "loss": 0.2383, "step": 7346500 }, { "epoch": 4.4, "learning_rate": 2.5197553960115053e-05, "loss": 0.2337, "step": 7347000 }, { "epoch": 4.41, "learning_rate": 2.5195453994554487e-05, "loss": 0.2406, "step": 7347500 }, { "epoch": 4.41, "learning_rate": 2.5193354028993923e-05, "loss": 0.2367, "step": 7348000 }, { "epoch": 4.41, "learning_rate": 2.519125406343336e-05, "loss": 0.2427, "step": 7348500 }, { "epoch": 4.41, "learning_rate": 2.5189158297803917e-05, "loss": 0.2395, "step": 7349000 }, { "epoch": 4.41, "learning_rate": 2.518705833224335e-05, "loss": 0.2435, "step": 7349500 }, { "epoch": 4.41, "learning_rate": 2.5184958366682784e-05, "loss": 0.2369, "step": 7350000 }, { "epoch": 4.41, "learning_rate": 2.518285840112222e-05, "loss": 0.2402, "step": 7350500 }, { "epoch": 4.41, "learning_rate": 2.5180762635492778e-05, "loss": 0.2408, "step": 7351000 }, { "epoch": 4.41, "learning_rate": 2.517866266993221e-05, "loss": 0.2403, "step": 7351500 }, { "epoch": 4.41, "learning_rate": 2.5176562704371648e-05, "loss": 0.2438, "step": 7352000 }, { "epoch": 4.41, "learning_rate": 2.5174462738811082e-05, "loss": 0.2346, "step": 7352500 }, { "epoch": 4.41, "learning_rate": 2.517236697318164e-05, "loss": 0.2386, "step": 7353000 }, { "epoch": 4.41, "learning_rate": 2.5170267007621076e-05, "loss": 0.2371, "step": 7353500 }, { "epoch": 4.41, "learning_rate": 2.516816704206051e-05, "loss": 0.2413, "step": 7354000 }, { "epoch": 4.41, "learning_rate": 2.5166067076499946e-05, "loss": 0.2371, "step": 7354500 }, { "epoch": 4.41, "learning_rate": 2.5163971310870503e-05, "loss": 0.2413, "step": 7355000 }, { "epoch": 4.41, "learning_rate": 2.5161871345309936e-05, "loss": 0.2382, "step": 7355500 }, { "epoch": 4.41, "learning_rate": 2.5159771379749373e-05, "loss": 0.2385, "step": 7356000 }, { "epoch": 4.41, "learning_rate": 2.5157671414188807e-05, "loss": 0.2382, "step": 7356500 }, { "epoch": 4.41, "learning_rate": 2.5155575648559364e-05, "loss": 0.2445, "step": 7357000 }, { "epoch": 4.41, "learning_rate": 2.51534756829988e-05, "loss": 0.239, "step": 7357500 }, { "epoch": 4.41, "learning_rate": 2.5151375717438234e-05, "loss": 0.2367, "step": 7358000 }, { "epoch": 4.41, "learning_rate": 2.514927575187767e-05, "loss": 0.2402, "step": 7358500 }, { "epoch": 4.41, "learning_rate": 2.5147184186179344e-05, "loss": 0.2396, "step": 7359000 }, { "epoch": 4.41, "learning_rate": 2.514508422061878e-05, "loss": 0.2425, "step": 7359500 }, { "epoch": 4.41, "learning_rate": 2.5142984255058218e-05, "loss": 0.2342, "step": 7360000 }, { "epoch": 4.41, "learning_rate": 2.5140884289497655e-05, "loss": 0.2453, "step": 7360500 }, { "epoch": 4.41, "learning_rate": 2.5138784323937085e-05, "loss": 0.2424, "step": 7361000 }, { "epoch": 4.41, "learning_rate": 2.5136684358376522e-05, "loss": 0.2354, "step": 7361500 }, { "epoch": 4.41, "learning_rate": 2.513458439281596e-05, "loss": 0.2399, "step": 7362000 }, { "epoch": 4.41, "learning_rate": 2.5132484427255392e-05, "loss": 0.2436, "step": 7362500 }, { "epoch": 4.41, "learning_rate": 2.513038866162595e-05, "loss": 0.2401, "step": 7363000 }, { "epoch": 4.41, "learning_rate": 2.5128288696065383e-05, "loss": 0.2426, "step": 7363500 }, { "epoch": 4.42, "learning_rate": 2.512618873050482e-05, "loss": 0.2382, "step": 7364000 }, { "epoch": 4.42, "learning_rate": 2.5124088764944256e-05, "loss": 0.2415, "step": 7364500 }, { "epoch": 4.42, "learning_rate": 2.512198879938369e-05, "loss": 0.2431, "step": 7365000 }, { "epoch": 4.42, "learning_rate": 2.5119893033754243e-05, "loss": 0.2399, "step": 7365500 }, { "epoch": 4.42, "learning_rate": 2.511779306819368e-05, "loss": 0.2401, "step": 7366000 }, { "epoch": 4.42, "learning_rate": 2.5115697302564237e-05, "loss": 0.2418, "step": 7366500 }, { "epoch": 4.42, "learning_rate": 2.5113597337003674e-05, "loss": 0.235, "step": 7367000 }, { "epoch": 4.42, "learning_rate": 2.511149737144311e-05, "loss": 0.2414, "step": 7367500 }, { "epoch": 4.42, "learning_rate": 2.510939740588254e-05, "loss": 0.2383, "step": 7368000 }, { "epoch": 4.42, "learning_rate": 2.5107297440321978e-05, "loss": 0.2403, "step": 7368500 }, { "epoch": 4.42, "learning_rate": 2.5105197474761415e-05, "loss": 0.2486, "step": 7369000 }, { "epoch": 4.42, "learning_rate": 2.5103097509200848e-05, "loss": 0.2462, "step": 7369500 }, { "epoch": 4.42, "learning_rate": 2.5100997543640285e-05, "loss": 0.2446, "step": 7370000 }, { "epoch": 4.42, "learning_rate": 2.509890177801084e-05, "loss": 0.2393, "step": 7370500 }, { "epoch": 4.42, "learning_rate": 2.5096801812450275e-05, "loss": 0.2428, "step": 7371000 }, { "epoch": 4.42, "learning_rate": 2.5094701846889712e-05, "loss": 0.24, "step": 7371500 }, { "epoch": 4.42, "learning_rate": 2.509260608126027e-05, "loss": 0.2407, "step": 7372000 }, { "epoch": 4.42, "learning_rate": 2.5090506115699703e-05, "loss": 0.2407, "step": 7372500 }, { "epoch": 4.42, "learning_rate": 2.5088406150139136e-05, "loss": 0.2351, "step": 7373000 }, { "epoch": 4.42, "learning_rate": 2.5086306184578573e-05, "loss": 0.2338, "step": 7373500 }, { "epoch": 4.42, "learning_rate": 2.5084206219018006e-05, "loss": 0.2414, "step": 7374000 }, { "epoch": 4.42, "learning_rate": 2.5082106253457443e-05, "loss": 0.2385, "step": 7374500 }, { "epoch": 4.42, "learning_rate": 2.508000628789688e-05, "loss": 0.2355, "step": 7375000 }, { "epoch": 4.42, "learning_rate": 2.5077906322336313e-05, "loss": 0.2366, "step": 7375500 }, { "epoch": 4.42, "learning_rate": 2.507581055670687e-05, "loss": 0.2382, "step": 7376000 }, { "epoch": 4.42, "learning_rate": 2.5073710591146304e-05, "loss": 0.2381, "step": 7376500 }, { "epoch": 4.42, "learning_rate": 2.507161062558574e-05, "loss": 0.2422, "step": 7377000 }, { "epoch": 4.42, "learning_rate": 2.5069510660025178e-05, "loss": 0.2363, "step": 7377500 }, { "epoch": 4.42, "learning_rate": 2.506741489439573e-05, "loss": 0.2435, "step": 7378000 }, { "epoch": 4.42, "learning_rate": 2.5065319128766288e-05, "loss": 0.2352, "step": 7378500 }, { "epoch": 4.42, "learning_rate": 2.5063219163205725e-05, "loss": 0.2451, "step": 7379000 }, { "epoch": 4.42, "learning_rate": 2.506111919764516e-05, "loss": 0.2424, "step": 7379500 }, { "epoch": 4.42, "learning_rate": 2.5059019232084592e-05, "loss": 0.238, "step": 7380000 }, { "epoch": 4.42, "learning_rate": 2.505691926652403e-05, "loss": 0.2447, "step": 7380500 }, { "epoch": 4.43, "learning_rate": 2.5054819300963462e-05, "loss": 0.2437, "step": 7381000 }, { "epoch": 4.43, "learning_rate": 2.50527193354029e-05, "loss": 0.2441, "step": 7381500 }, { "epoch": 4.43, "learning_rate": 2.5050619369842336e-05, "loss": 0.2402, "step": 7382000 }, { "epoch": 4.43, "learning_rate": 2.504852360421289e-05, "loss": 0.2442, "step": 7382500 }, { "epoch": 4.43, "learning_rate": 2.5046423638652326e-05, "loss": 0.2344, "step": 7383000 }, { "epoch": 4.43, "learning_rate": 2.504432367309176e-05, "loss": 0.242, "step": 7383500 }, { "epoch": 4.43, "learning_rate": 2.5042223707531197e-05, "loss": 0.247, "step": 7384000 }, { "epoch": 4.43, "learning_rate": 2.5040123741970633e-05, "loss": 0.24, "step": 7384500 }, { "epoch": 4.43, "learning_rate": 2.5038023776410064e-05, "loss": 0.2394, "step": 7385000 }, { "epoch": 4.43, "learning_rate": 2.50359238108495e-05, "loss": 0.2408, "step": 7385500 }, { "epoch": 4.43, "learning_rate": 2.5033823845288937e-05, "loss": 0.2439, "step": 7386000 }, { "epoch": 4.43, "learning_rate": 2.5031728079659494e-05, "loss": 0.2358, "step": 7386500 }, { "epoch": 4.43, "learning_rate": 2.502962811409893e-05, "loss": 0.248, "step": 7387000 }, { "epoch": 4.43, "learning_rate": 2.502752814853836e-05, "loss": 0.2364, "step": 7387500 }, { "epoch": 4.43, "learning_rate": 2.5025428182977798e-05, "loss": 0.2408, "step": 7388000 }, { "epoch": 4.43, "learning_rate": 2.5023328217417235e-05, "loss": 0.2367, "step": 7388500 }, { "epoch": 4.43, "learning_rate": 2.5021232451787792e-05, "loss": 0.2424, "step": 7389000 }, { "epoch": 4.43, "learning_rate": 2.5019132486227225e-05, "loss": 0.2424, "step": 7389500 }, { "epoch": 4.43, "learning_rate": 2.5017036720597782e-05, "loss": 0.2363, "step": 7390000 }, { "epoch": 4.43, "learning_rate": 2.5014936755037216e-05, "loss": 0.2438, "step": 7390500 }, { "epoch": 4.43, "learning_rate": 2.5012836789476653e-05, "loss": 0.2383, "step": 7391000 }, { "epoch": 4.43, "learning_rate": 2.501073682391609e-05, "loss": 0.2405, "step": 7391500 }, { "epoch": 4.43, "learning_rate": 2.5008641058286643e-05, "loss": 0.2407, "step": 7392000 }, { "epoch": 4.43, "learning_rate": 2.500654109272608e-05, "loss": 0.2422, "step": 7392500 }, { "epoch": 4.43, "learning_rate": 2.5004441127165513e-05, "loss": 0.2342, "step": 7393000 }, { "epoch": 4.43, "learning_rate": 2.500234116160495e-05, "loss": 0.2333, "step": 7393500 }, { "epoch": 4.43, "learning_rate": 2.5000245395975507e-05, "loss": 0.2382, "step": 7394000 }, { "epoch": 4.43, "learning_rate": 2.499814543041494e-05, "loss": 0.24, "step": 7394500 }, { "epoch": 4.43, "learning_rate": 2.4996045464854374e-05, "loss": 0.2441, "step": 7395000 }, { "epoch": 4.43, "learning_rate": 2.499394549929381e-05, "loss": 0.2424, "step": 7395500 }, { "epoch": 4.43, "learning_rate": 2.4991845533733248e-05, "loss": 0.2417, "step": 7396000 }, { "epoch": 4.43, "learning_rate": 2.4989745568172685e-05, "loss": 0.2391, "step": 7396500 }, { "epoch": 4.43, "learning_rate": 2.4987645602612115e-05, "loss": 0.2381, "step": 7397000 }, { "epoch": 4.44, "learning_rate": 2.498554563705155e-05, "loss": 0.2452, "step": 7397500 }, { "epoch": 4.44, "learning_rate": 2.498344987142211e-05, "loss": 0.2394, "step": 7398000 }, { "epoch": 4.44, "learning_rate": 2.4981349905861545e-05, "loss": 0.2406, "step": 7398500 }, { "epoch": 4.44, "learning_rate": 2.497924994030098e-05, "loss": 0.244, "step": 7399000 }, { "epoch": 4.44, "learning_rate": 2.4977149974740412e-05, "loss": 0.2422, "step": 7399500 }, { "epoch": 4.44, "learning_rate": 2.497505420911097e-05, "loss": 0.2387, "step": 7400000 }, { "epoch": 4.44, "eval_loss": 0.22435419261455536, "eval_runtime": 1453.381, "eval_samples_per_second": 362.41, "eval_steps_per_second": 60.402, "step": 7400000 }, { "epoch": 4.44, "learning_rate": 2.4972954243550406e-05, "loss": 0.2385, "step": 7400500 }, { "epoch": 4.44, "learning_rate": 2.4970854277989843e-05, "loss": 0.2377, "step": 7401000 }, { "epoch": 4.44, "learning_rate": 2.4968754312429276e-05, "loss": 0.2453, "step": 7401500 }, { "epoch": 4.44, "learning_rate": 2.496665854679983e-05, "loss": 0.2431, "step": 7402000 }, { "epoch": 4.44, "learning_rate": 2.496456278117039e-05, "loss": 0.2473, "step": 7402500 }, { "epoch": 4.44, "learning_rate": 2.4962462815609824e-05, "loss": 0.2373, "step": 7403000 }, { "epoch": 4.44, "learning_rate": 2.496036285004926e-05, "loss": 0.2375, "step": 7403500 }, { "epoch": 4.44, "learning_rate": 2.4958262884488694e-05, "loss": 0.2406, "step": 7404000 }, { "epoch": 4.44, "learning_rate": 2.4956162918928127e-05, "loss": 0.241, "step": 7404500 }, { "epoch": 4.44, "learning_rate": 2.4954062953367564e-05, "loss": 0.243, "step": 7405000 }, { "epoch": 4.44, "learning_rate": 2.4951962987807e-05, "loss": 0.2373, "step": 7405500 }, { "epoch": 4.44, "learning_rate": 2.4949867222177555e-05, "loss": 0.2402, "step": 7406000 }, { "epoch": 4.44, "learning_rate": 2.494776725661699e-05, "loss": 0.2423, "step": 7406500 }, { "epoch": 4.44, "learning_rate": 2.4945667291056425e-05, "loss": 0.2402, "step": 7407000 }, { "epoch": 4.44, "learning_rate": 2.4943567325495862e-05, "loss": 0.2384, "step": 7407500 }, { "epoch": 4.44, "learning_rate": 2.49414673599353e-05, "loss": 0.2369, "step": 7408000 }, { "epoch": 4.44, "learning_rate": 2.4939367394374732e-05, "loss": 0.2379, "step": 7408500 }, { "epoch": 4.44, "learning_rate": 2.4937267428814166e-05, "loss": 0.2385, "step": 7409000 }, { "epoch": 4.44, "learning_rate": 2.4935167463253602e-05, "loss": 0.2455, "step": 7409500 }, { "epoch": 4.44, "learning_rate": 2.493307169762416e-05, "loss": 0.2433, "step": 7410000 }, { "epoch": 4.44, "learning_rate": 2.4930971732063596e-05, "loss": 0.2396, "step": 7410500 }, { "epoch": 4.44, "learning_rate": 2.492887596643415e-05, "loss": 0.2339, "step": 7411000 }, { "epoch": 4.44, "learning_rate": 2.4926776000873583e-05, "loss": 0.2395, "step": 7411500 }, { "epoch": 4.44, "learning_rate": 2.492467603531302e-05, "loss": 0.2348, "step": 7412000 }, { "epoch": 4.44, "learning_rate": 2.4922576069752457e-05, "loss": 0.2381, "step": 7412500 }, { "epoch": 4.44, "learning_rate": 2.492047610419189e-05, "loss": 0.2437, "step": 7413000 }, { "epoch": 4.44, "learning_rate": 2.4918376138631324e-05, "loss": 0.2383, "step": 7413500 }, { "epoch": 4.44, "learning_rate": 2.491627617307076e-05, "loss": 0.2438, "step": 7414000 }, { "epoch": 4.45, "learning_rate": 2.4914176207510198e-05, "loss": 0.2464, "step": 7414500 }, { "epoch": 4.45, "learning_rate": 2.4912080441880755e-05, "loss": 0.236, "step": 7415000 }, { "epoch": 4.45, "learning_rate": 2.4909980476320188e-05, "loss": 0.2463, "step": 7415500 }, { "epoch": 4.45, "learning_rate": 2.490788051075962e-05, "loss": 0.2392, "step": 7416000 }, { "epoch": 4.45, "learning_rate": 2.490578054519906e-05, "loss": 0.2365, "step": 7416500 }, { "epoch": 4.45, "learning_rate": 2.4903680579638492e-05, "loss": 0.2365, "step": 7417000 }, { "epoch": 4.45, "learning_rate": 2.4901584814009052e-05, "loss": 0.236, "step": 7417500 }, { "epoch": 4.45, "learning_rate": 2.4899484848448486e-05, "loss": 0.2396, "step": 7418000 }, { "epoch": 4.45, "learning_rate": 2.489738488288792e-05, "loss": 0.2293, "step": 7418500 }, { "epoch": 4.45, "learning_rate": 2.4895284917327356e-05, "loss": 0.2348, "step": 7419000 }, { "epoch": 4.45, "learning_rate": 2.4893189151697913e-05, "loss": 0.237, "step": 7419500 }, { "epoch": 4.45, "learning_rate": 2.489109338606847e-05, "loss": 0.2421, "step": 7420000 }, { "epoch": 4.45, "learning_rate": 2.4888993420507903e-05, "loss": 0.2445, "step": 7420500 }, { "epoch": 4.45, "learning_rate": 2.4886893454947337e-05, "loss": 0.2386, "step": 7421000 }, { "epoch": 4.45, "learning_rate": 2.4884793489386774e-05, "loss": 0.2383, "step": 7421500 }, { "epoch": 4.45, "learning_rate": 2.488269352382621e-05, "loss": 0.2419, "step": 7422000 }, { "epoch": 4.45, "learning_rate": 2.4880593558265644e-05, "loss": 0.243, "step": 7422500 }, { "epoch": 4.45, "learning_rate": 2.4878493592705077e-05, "loss": 0.241, "step": 7423000 }, { "epoch": 4.45, "learning_rate": 2.4876393627144514e-05, "loss": 0.2391, "step": 7423500 }, { "epoch": 4.45, "learning_rate": 2.487430206144619e-05, "loss": 0.2442, "step": 7424000 }, { "epoch": 4.45, "learning_rate": 2.4872202095885628e-05, "loss": 0.2391, "step": 7424500 }, { "epoch": 4.45, "learning_rate": 2.487010213032506e-05, "loss": 0.2375, "step": 7425000 }, { "epoch": 4.45, "learning_rate": 2.4868002164764495e-05, "loss": 0.2393, "step": 7425500 }, { "epoch": 4.45, "learning_rate": 2.4865902199203932e-05, "loss": 0.2447, "step": 7426000 }, { "epoch": 4.45, "learning_rate": 2.486380643357449e-05, "loss": 0.2492, "step": 7426500 }, { "epoch": 4.45, "learning_rate": 2.4861706468013926e-05, "loss": 0.242, "step": 7427000 }, { "epoch": 4.45, "learning_rate": 2.485960650245336e-05, "loss": 0.2348, "step": 7427500 }, { "epoch": 4.45, "learning_rate": 2.4857506536892793e-05, "loss": 0.2388, "step": 7428000 }, { "epoch": 4.45, "learning_rate": 2.485540657133223e-05, "loss": 0.2434, "step": 7428500 }, { "epoch": 4.45, "learning_rate": 2.4853306605771666e-05, "loss": 0.2436, "step": 7429000 }, { "epoch": 4.45, "learning_rate": 2.48512066402111e-05, "loss": 0.2408, "step": 7429500 }, { "epoch": 4.45, "learning_rate": 2.4849106674650537e-05, "loss": 0.2421, "step": 7430000 }, { "epoch": 4.45, "learning_rate": 2.484701090902109e-05, "loss": 0.2376, "step": 7430500 }, { "epoch": 4.46, "learning_rate": 2.4844910943460527e-05, "loss": 0.2421, "step": 7431000 }, { "epoch": 4.46, "learning_rate": 2.4842810977899964e-05, "loss": 0.2406, "step": 7431500 }, { "epoch": 4.46, "learning_rate": 2.4840711012339397e-05, "loss": 0.2479, "step": 7432000 }, { "epoch": 4.46, "learning_rate": 2.483861524670995e-05, "loss": 0.243, "step": 7432500 }, { "epoch": 4.46, "learning_rate": 2.4836515281149388e-05, "loss": 0.2398, "step": 7433000 }, { "epoch": 4.46, "learning_rate": 2.4834415315588825e-05, "loss": 0.2384, "step": 7433500 }, { "epoch": 4.46, "learning_rate": 2.4832315350028258e-05, "loss": 0.2411, "step": 7434000 }, { "epoch": 4.46, "learning_rate": 2.483021958439882e-05, "loss": 0.2413, "step": 7434500 }, { "epoch": 4.46, "learning_rate": 2.482811961883825e-05, "loss": 0.2423, "step": 7435000 }, { "epoch": 4.46, "learning_rate": 2.4826023853208805e-05, "loss": 0.245, "step": 7435500 }, { "epoch": 4.46, "learning_rate": 2.4823923887648242e-05, "loss": 0.2379, "step": 7436000 }, { "epoch": 4.46, "learning_rate": 2.482182392208768e-05, "loss": 0.2382, "step": 7436500 }, { "epoch": 4.46, "learning_rate": 2.4819723956527113e-05, "loss": 0.2368, "step": 7437000 }, { "epoch": 4.46, "learning_rate": 2.4817623990966546e-05, "loss": 0.2408, "step": 7437500 }, { "epoch": 4.46, "learning_rate": 2.4815524025405983e-05, "loss": 0.2379, "step": 7438000 }, { "epoch": 4.46, "learning_rate": 2.481342405984542e-05, "loss": 0.2347, "step": 7438500 }, { "epoch": 4.46, "learning_rate": 2.4811324094284853e-05, "loss": 0.2334, "step": 7439000 }, { "epoch": 4.46, "learning_rate": 2.4809228328655407e-05, "loss": 0.2418, "step": 7439500 }, { "epoch": 4.46, "learning_rate": 2.4807128363094844e-05, "loss": 0.242, "step": 7440000 }, { "epoch": 4.46, "learning_rate": 2.480502839753428e-05, "loss": 0.2419, "step": 7440500 }, { "epoch": 4.46, "learning_rate": 2.4802928431973714e-05, "loss": 0.2354, "step": 7441000 }, { "epoch": 4.46, "learning_rate": 2.480082846641315e-05, "loss": 0.2376, "step": 7441500 }, { "epoch": 4.46, "learning_rate": 2.4798728500852588e-05, "loss": 0.2402, "step": 7442000 }, { "epoch": 4.46, "learning_rate": 2.479662853529202e-05, "loss": 0.2344, "step": 7442500 }, { "epoch": 4.46, "learning_rate": 2.4794528569731455e-05, "loss": 0.2351, "step": 7443000 }, { "epoch": 4.46, "learning_rate": 2.479243280410201e-05, "loss": 0.246, "step": 7443500 }, { "epoch": 4.46, "learning_rate": 2.479033703847257e-05, "loss": 0.238, "step": 7444000 }, { "epoch": 4.46, "learning_rate": 2.4788237072912002e-05, "loss": 0.2416, "step": 7444500 }, { "epoch": 4.46, "learning_rate": 2.478613710735144e-05, "loss": 0.2362, "step": 7445000 }, { "epoch": 4.46, "learning_rate": 2.4784037141790876e-05, "loss": 0.2355, "step": 7445500 }, { "epoch": 4.46, "learning_rate": 2.478193717623031e-05, "loss": 0.2436, "step": 7446000 }, { "epoch": 4.46, "learning_rate": 2.4779837210669746e-05, "loss": 0.2359, "step": 7446500 }, { "epoch": 4.46, "learning_rate": 2.477773724510918e-05, "loss": 0.2341, "step": 7447000 }, { "epoch": 4.47, "learning_rate": 2.4775637279548613e-05, "loss": 0.2373, "step": 7447500 }, { "epoch": 4.47, "learning_rate": 2.477354151391917e-05, "loss": 0.2337, "step": 7448000 }, { "epoch": 4.47, "learning_rate": 2.4771441548358607e-05, "loss": 0.2401, "step": 7448500 }, { "epoch": 4.47, "learning_rate": 2.4769341582798044e-05, "loss": 0.2404, "step": 7449000 }, { "epoch": 4.47, "learning_rate": 2.4767241617237477e-05, "loss": 0.2408, "step": 7449500 }, { "epoch": 4.47, "learning_rate": 2.4765145851608034e-05, "loss": 0.2431, "step": 7450000 }, { "epoch": 4.47, "learning_rate": 2.4763045886047467e-05, "loss": 0.2394, "step": 7450500 }, { "epoch": 4.47, "learning_rate": 2.4760945920486904e-05, "loss": 0.2368, "step": 7451000 }, { "epoch": 4.47, "learning_rate": 2.475884595492634e-05, "loss": 0.2394, "step": 7451500 }, { "epoch": 4.47, "learning_rate": 2.4756750189296895e-05, "loss": 0.2413, "step": 7452000 }, { "epoch": 4.47, "learning_rate": 2.475465022373633e-05, "loss": 0.2388, "step": 7452500 }, { "epoch": 4.47, "learning_rate": 2.4752550258175765e-05, "loss": 0.2416, "step": 7453000 }, { "epoch": 4.47, "learning_rate": 2.4750450292615202e-05, "loss": 0.2432, "step": 7453500 }, { "epoch": 4.47, "learning_rate": 2.4748354526985755e-05, "loss": 0.2447, "step": 7454000 }, { "epoch": 4.47, "learning_rate": 2.4746254561425192e-05, "loss": 0.2426, "step": 7454500 }, { "epoch": 4.47, "learning_rate": 2.474415459586463e-05, "loss": 0.2356, "step": 7455000 }, { "epoch": 4.47, "learning_rate": 2.4742054630304063e-05, "loss": 0.2431, "step": 7455500 }, { "epoch": 4.47, "learning_rate": 2.4739958864674616e-05, "loss": 0.2357, "step": 7456000 }, { "epoch": 4.47, "learning_rate": 2.4737858899114053e-05, "loss": 0.2399, "step": 7456500 }, { "epoch": 4.47, "learning_rate": 2.473575893355349e-05, "loss": 0.2377, "step": 7457000 }, { "epoch": 4.47, "learning_rate": 2.4733658967992923e-05, "loss": 0.2416, "step": 7457500 }, { "epoch": 4.47, "learning_rate": 2.4731563202363484e-05, "loss": 0.2405, "step": 7458000 }, { "epoch": 4.47, "learning_rate": 2.4729463236802914e-05, "loss": 0.2398, "step": 7458500 }, { "epoch": 4.47, "learning_rate": 2.472736327124235e-05, "loss": 0.2422, "step": 7459000 }, { "epoch": 4.47, "learning_rate": 2.4725267505612908e-05, "loss": 0.241, "step": 7459500 }, { "epoch": 4.47, "learning_rate": 2.4723167540052344e-05, "loss": 0.237, "step": 7460000 }, { "epoch": 4.47, "learning_rate": 2.4721067574491778e-05, "loss": 0.2416, "step": 7460500 }, { "epoch": 4.47, "learning_rate": 2.471896760893121e-05, "loss": 0.2413, "step": 7461000 }, { "epoch": 4.47, "learning_rate": 2.4716867643370648e-05, "loss": 0.2372, "step": 7461500 }, { "epoch": 4.47, "learning_rate": 2.4714767677810085e-05, "loss": 0.2386, "step": 7462000 }, { "epoch": 4.47, "learning_rate": 2.471266771224952e-05, "loss": 0.239, "step": 7462500 }, { "epoch": 4.47, "learning_rate": 2.4710567746688955e-05, "loss": 0.236, "step": 7463000 }, { "epoch": 4.47, "learning_rate": 2.470847198105951e-05, "loss": 0.2436, "step": 7463500 }, { "epoch": 4.47, "learning_rate": 2.4706372015498946e-05, "loss": 0.2411, "step": 7464000 }, { "epoch": 4.48, "learning_rate": 2.470427204993838e-05, "loss": 0.2417, "step": 7464500 }, { "epoch": 4.48, "learning_rate": 2.470217628430894e-05, "loss": 0.2399, "step": 7465000 }, { "epoch": 4.48, "learning_rate": 2.4700076318748373e-05, "loss": 0.2344, "step": 7465500 }, { "epoch": 4.48, "learning_rate": 2.4697976353187806e-05, "loss": 0.241, "step": 7466000 }, { "epoch": 4.48, "learning_rate": 2.4695876387627243e-05, "loss": 0.2411, "step": 7466500 }, { "epoch": 4.48, "learning_rate": 2.4693776422066677e-05, "loss": 0.2369, "step": 7467000 }, { "epoch": 4.48, "learning_rate": 2.4691676456506114e-05, "loss": 0.2399, "step": 7467500 }, { "epoch": 4.48, "learning_rate": 2.468957649094555e-05, "loss": 0.2374, "step": 7468000 }, { "epoch": 4.48, "learning_rate": 2.468747652538498e-05, "loss": 0.2399, "step": 7468500 }, { "epoch": 4.48, "learning_rate": 2.468538075975554e-05, "loss": 0.2437, "step": 7469000 }, { "epoch": 4.48, "learning_rate": 2.4683280794194974e-05, "loss": 0.2395, "step": 7469500 }, { "epoch": 4.48, "learning_rate": 2.468118082863441e-05, "loss": 0.2428, "step": 7470000 }, { "epoch": 4.48, "learning_rate": 2.4679085063004965e-05, "loss": 0.2386, "step": 7470500 }, { "epoch": 4.48, "learning_rate": 2.46769850974444e-05, "loss": 0.2374, "step": 7471000 }, { "epoch": 4.48, "learning_rate": 2.4674885131883835e-05, "loss": 0.2377, "step": 7471500 }, { "epoch": 4.48, "learning_rate": 2.4672785166323272e-05, "loss": 0.2402, "step": 7472000 }, { "epoch": 4.48, "learning_rate": 2.467068520076271e-05, "loss": 0.2401, "step": 7472500 }, { "epoch": 4.48, "learning_rate": 2.4668585235202142e-05, "loss": 0.2412, "step": 7473000 }, { "epoch": 4.48, "learning_rate": 2.46664894695727e-05, "loss": 0.2355, "step": 7473500 }, { "epoch": 4.48, "learning_rate": 2.4664389504012133e-05, "loss": 0.2377, "step": 7474000 }, { "epoch": 4.48, "learning_rate": 2.466228953845157e-05, "loss": 0.2435, "step": 7474500 }, { "epoch": 4.48, "learning_rate": 2.4660189572891006e-05, "loss": 0.2358, "step": 7475000 }, { "epoch": 4.48, "learning_rate": 2.4658089607330436e-05, "loss": 0.2393, "step": 7475500 }, { "epoch": 4.48, "learning_rate": 2.4655989641769873e-05, "loss": 0.2415, "step": 7476000 }, { "epoch": 4.48, "learning_rate": 2.465388967620931e-05, "loss": 0.2374, "step": 7476500 }, { "epoch": 4.48, "learning_rate": 2.4651789710648744e-05, "loss": 0.2379, "step": 7477000 }, { "epoch": 4.48, "learning_rate": 2.4649693945019304e-05, "loss": 0.2503, "step": 7477500 }, { "epoch": 4.48, "learning_rate": 2.4647593979458734e-05, "loss": 0.2381, "step": 7478000 }, { "epoch": 4.48, "learning_rate": 2.464549821382929e-05, "loss": 0.2363, "step": 7478500 }, { "epoch": 4.48, "learning_rate": 2.4643398248268728e-05, "loss": 0.2427, "step": 7479000 }, { "epoch": 4.48, "learning_rate": 2.4641298282708165e-05, "loss": 0.2358, "step": 7479500 }, { "epoch": 4.48, "learning_rate": 2.4639198317147598e-05, "loss": 0.2423, "step": 7480000 }, { "epoch": 4.48, "learning_rate": 2.463709835158703e-05, "loss": 0.241, "step": 7480500 }, { "epoch": 4.49, "learning_rate": 2.463499838602647e-05, "loss": 0.2426, "step": 7481000 }, { "epoch": 4.49, "learning_rate": 2.4632902620397025e-05, "loss": 0.24, "step": 7481500 }, { "epoch": 4.49, "learning_rate": 2.4630802654836462e-05, "loss": 0.2407, "step": 7482000 }, { "epoch": 4.49, "learning_rate": 2.4628702689275896e-05, "loss": 0.2405, "step": 7482500 }, { "epoch": 4.49, "learning_rate": 2.462660272371533e-05, "loss": 0.2406, "step": 7483000 }, { "epoch": 4.49, "learning_rate": 2.4624502758154766e-05, "loss": 0.2419, "step": 7483500 }, { "epoch": 4.49, "learning_rate": 2.4622402792594203e-05, "loss": 0.2404, "step": 7484000 }, { "epoch": 4.49, "learning_rate": 2.4620302827033636e-05, "loss": 0.2395, "step": 7484500 }, { "epoch": 4.49, "learning_rate": 2.461820706140419e-05, "loss": 0.2417, "step": 7485000 }, { "epoch": 4.49, "learning_rate": 2.4616107095843627e-05, "loss": 0.2432, "step": 7485500 }, { "epoch": 4.49, "learning_rate": 2.4614007130283064e-05, "loss": 0.2343, "step": 7486000 }, { "epoch": 4.49, "learning_rate": 2.4611907164722497e-05, "loss": 0.2405, "step": 7486500 }, { "epoch": 4.49, "learning_rate": 2.4609807199161934e-05, "loss": 0.2419, "step": 7487000 }, { "epoch": 4.49, "learning_rate": 2.460770723360137e-05, "loss": 0.2386, "step": 7487500 }, { "epoch": 4.49, "learning_rate": 2.4605607268040804e-05, "loss": 0.2386, "step": 7488000 }, { "epoch": 4.49, "learning_rate": 2.4603507302480238e-05, "loss": 0.2336, "step": 7488500 }, { "epoch": 4.49, "learning_rate": 2.4601411536850795e-05, "loss": 0.2408, "step": 7489000 }, { "epoch": 4.49, "learning_rate": 2.459931157129023e-05, "loss": 0.2344, "step": 7489500 }, { "epoch": 4.49, "learning_rate": 2.4597211605729668e-05, "loss": 0.2349, "step": 7490000 }, { "epoch": 4.49, "learning_rate": 2.45951116401691e-05, "loss": 0.2393, "step": 7490500 }, { "epoch": 4.49, "learning_rate": 2.459301587453966e-05, "loss": 0.2437, "step": 7491000 }, { "epoch": 4.49, "learning_rate": 2.4590915908979092e-05, "loss": 0.2463, "step": 7491500 }, { "epoch": 4.49, "learning_rate": 2.458881594341853e-05, "loss": 0.241, "step": 7492000 }, { "epoch": 4.49, "learning_rate": 2.4586715977857962e-05, "loss": 0.2409, "step": 7492500 }, { "epoch": 4.49, "learning_rate": 2.458462021222852e-05, "loss": 0.2371, "step": 7493000 }, { "epoch": 4.49, "learning_rate": 2.4582520246667953e-05, "loss": 0.2432, "step": 7493500 }, { "epoch": 4.49, "learning_rate": 2.458042028110739e-05, "loss": 0.2465, "step": 7494000 }, { "epoch": 4.49, "learning_rate": 2.4578320315546827e-05, "loss": 0.2386, "step": 7494500 }, { "epoch": 4.49, "learning_rate": 2.457622454991738e-05, "loss": 0.2415, "step": 7495000 }, { "epoch": 4.49, "learning_rate": 2.4574124584356817e-05, "loss": 0.2378, "step": 7495500 }, { "epoch": 4.49, "learning_rate": 2.457202461879625e-05, "loss": 0.2351, "step": 7496000 }, { "epoch": 4.49, "learning_rate": 2.4569924653235687e-05, "loss": 0.2398, "step": 7496500 }, { "epoch": 4.49, "learning_rate": 2.456782888760624e-05, "loss": 0.2398, "step": 7497000 }, { "epoch": 4.5, "learning_rate": 2.4565728922045678e-05, "loss": 0.2429, "step": 7497500 }, { "epoch": 4.5, "learning_rate": 2.4563628956485115e-05, "loss": 0.2414, "step": 7498000 }, { "epoch": 4.5, "learning_rate": 2.4561528990924548e-05, "loss": 0.2384, "step": 7498500 }, { "epoch": 4.5, "learning_rate": 2.4559433225295105e-05, "loss": 0.2386, "step": 7499000 }, { "epoch": 4.5, "learning_rate": 2.455733325973454e-05, "loss": 0.2438, "step": 7499500 }, { "epoch": 4.5, "learning_rate": 2.4555233294173975e-05, "loss": 0.2439, "step": 7500000 }, { "epoch": 4.5, "eval_loss": 0.22402136027812958, "eval_runtime": 1460.9126, "eval_samples_per_second": 360.542, "eval_steps_per_second": 60.091, "step": 7500000 }, { "epoch": 4.5, "learning_rate": 2.455313332861341e-05, "loss": 0.2393, "step": 7500500 }, { "epoch": 4.5, "learning_rate": 2.455103756298397e-05, "loss": 0.2407, "step": 7501000 }, { "epoch": 4.5, "learning_rate": 2.4548937597423403e-05, "loss": 0.2302, "step": 7501500 }, { "epoch": 4.5, "learning_rate": 2.4546837631862836e-05, "loss": 0.2395, "step": 7502000 }, { "epoch": 4.5, "learning_rate": 2.4544737666302273e-05, "loss": 0.2402, "step": 7502500 }, { "epoch": 4.5, "learning_rate": 2.454264190067283e-05, "loss": 0.2333, "step": 7503000 }, { "epoch": 4.5, "learning_rate": 2.4540541935112263e-05, "loss": 0.2366, "step": 7503500 }, { "epoch": 4.5, "learning_rate": 2.45384419695517e-05, "loss": 0.2395, "step": 7504000 }, { "epoch": 4.5, "learning_rate": 2.4536342003991134e-05, "loss": 0.2416, "step": 7504500 }, { "epoch": 4.5, "learning_rate": 2.453424623836169e-05, "loss": 0.235, "step": 7505000 }, { "epoch": 4.5, "learning_rate": 2.4532146272801127e-05, "loss": 0.2338, "step": 7505500 }, { "epoch": 4.5, "learning_rate": 2.453004630724056e-05, "loss": 0.2393, "step": 7506000 }, { "epoch": 4.5, "learning_rate": 2.4527946341679994e-05, "loss": 0.2383, "step": 7506500 }, { "epoch": 4.5, "learning_rate": 2.452585057605055e-05, "loss": 0.2451, "step": 7507000 }, { "epoch": 4.5, "learning_rate": 2.4523754810421108e-05, "loss": 0.2348, "step": 7507500 }, { "epoch": 4.5, "learning_rate": 2.4521654844860545e-05, "loss": 0.2417, "step": 7508000 }, { "epoch": 4.5, "learning_rate": 2.451955487929998e-05, "loss": 0.2395, "step": 7508500 }, { "epoch": 4.5, "learning_rate": 2.4517454913739412e-05, "loss": 0.2401, "step": 7509000 }, { "epoch": 4.5, "learning_rate": 2.451535494817885e-05, "loss": 0.2382, "step": 7509500 }, { "epoch": 4.5, "learning_rate": 2.4513254982618286e-05, "loss": 0.2367, "step": 7510000 }, { "epoch": 4.5, "learning_rate": 2.451115501705772e-05, "loss": 0.235, "step": 7510500 }, { "epoch": 4.5, "learning_rate": 2.4509055051497156e-05, "loss": 0.2468, "step": 7511000 }, { "epoch": 4.5, "learning_rate": 2.450695928586771e-05, "loss": 0.2386, "step": 7511500 }, { "epoch": 4.5, "learning_rate": 2.4504859320307146e-05, "loss": 0.2431, "step": 7512000 }, { "epoch": 4.5, "learning_rate": 2.4502759354746583e-05, "loss": 0.2339, "step": 7512500 }, { "epoch": 4.5, "learning_rate": 2.4500659389186017e-05, "loss": 0.239, "step": 7513000 }, { "epoch": 4.5, "learning_rate": 2.4498563623556574e-05, "loss": 0.2416, "step": 7513500 }, { "epoch": 4.5, "learning_rate": 2.4496463657996007e-05, "loss": 0.2461, "step": 7514000 }, { "epoch": 4.51, "learning_rate": 2.4494363692435444e-05, "loss": 0.2419, "step": 7514500 }, { "epoch": 4.51, "learning_rate": 2.4492267926806e-05, "loss": 0.2387, "step": 7515000 }, { "epoch": 4.51, "learning_rate": 2.4490167961245438e-05, "loss": 0.2356, "step": 7515500 }, { "epoch": 4.51, "learning_rate": 2.4488067995684868e-05, "loss": 0.2422, "step": 7516000 }, { "epoch": 4.51, "learning_rate": 2.4485972230055428e-05, "loss": 0.242, "step": 7516500 }, { "epoch": 4.51, "learning_rate": 2.4483872264494862e-05, "loss": 0.2433, "step": 7517000 }, { "epoch": 4.51, "learning_rate": 2.44817722989343e-05, "loss": 0.2455, "step": 7517500 }, { "epoch": 4.51, "learning_rate": 2.4479672333373732e-05, "loss": 0.2416, "step": 7518000 }, { "epoch": 4.51, "learning_rate": 2.4477572367813165e-05, "loss": 0.2396, "step": 7518500 }, { "epoch": 4.51, "learning_rate": 2.4475472402252602e-05, "loss": 0.2367, "step": 7519000 }, { "epoch": 4.51, "learning_rate": 2.447337243669204e-05, "loss": 0.2419, "step": 7519500 }, { "epoch": 4.51, "learning_rate": 2.4471272471131473e-05, "loss": 0.2391, "step": 7520000 }, { "epoch": 4.51, "learning_rate": 2.446917250557091e-05, "loss": 0.2455, "step": 7520500 }, { "epoch": 4.51, "learning_rate": 2.4467072540010343e-05, "loss": 0.2506, "step": 7521000 }, { "epoch": 4.51, "learning_rate": 2.4464972574449776e-05, "loss": 0.2391, "step": 7521500 }, { "epoch": 4.51, "learning_rate": 2.4462872608889213e-05, "loss": 0.2364, "step": 7522000 }, { "epoch": 4.51, "learning_rate": 2.446077684325977e-05, "loss": 0.2345, "step": 7522500 }, { "epoch": 4.51, "learning_rate": 2.4458676877699207e-05, "loss": 0.236, "step": 7523000 }, { "epoch": 4.51, "learning_rate": 2.445657691213864e-05, "loss": 0.2436, "step": 7523500 }, { "epoch": 4.51, "learning_rate": 2.4454476946578074e-05, "loss": 0.2427, "step": 7524000 }, { "epoch": 4.51, "learning_rate": 2.445238118094863e-05, "loss": 0.2478, "step": 7524500 }, { "epoch": 4.51, "learning_rate": 2.4450281215388068e-05, "loss": 0.2353, "step": 7525000 }, { "epoch": 4.51, "learning_rate": 2.44481812498275e-05, "loss": 0.2379, "step": 7525500 }, { "epoch": 4.51, "learning_rate": 2.4446081284266938e-05, "loss": 0.2383, "step": 7526000 }, { "epoch": 4.51, "learning_rate": 2.4443985518637495e-05, "loss": 0.2385, "step": 7526500 }, { "epoch": 4.51, "learning_rate": 2.444188555307693e-05, "loss": 0.2451, "step": 7527000 }, { "epoch": 4.51, "learning_rate": 2.4439785587516365e-05, "loss": 0.2401, "step": 7527500 }, { "epoch": 4.51, "learning_rate": 2.44376856219558e-05, "loss": 0.2455, "step": 7528000 }, { "epoch": 4.51, "learning_rate": 2.4435585656395232e-05, "loss": 0.2394, "step": 7528500 }, { "epoch": 4.51, "learning_rate": 2.443348569083467e-05, "loss": 0.2381, "step": 7529000 }, { "epoch": 4.51, "learning_rate": 2.4431385725274106e-05, "loss": 0.244, "step": 7529500 }, { "epoch": 4.51, "learning_rate": 2.4429285759713543e-05, "loss": 0.2391, "step": 7530000 }, { "epoch": 4.51, "learning_rate": 2.4427189994084096e-05, "loss": 0.2404, "step": 7530500 }, { "epoch": 4.52, "learning_rate": 2.442509002852353e-05, "loss": 0.2401, "step": 7531000 }, { "epoch": 4.52, "learning_rate": 2.4422990062962967e-05, "loss": 0.2415, "step": 7531500 }, { "epoch": 4.52, "learning_rate": 2.4420894297333524e-05, "loss": 0.2344, "step": 7532000 }, { "epoch": 4.52, "learning_rate": 2.441879433177296e-05, "loss": 0.2427, "step": 7532500 }, { "epoch": 4.52, "learning_rate": 2.4416694366212394e-05, "loss": 0.2376, "step": 7533000 }, { "epoch": 4.52, "learning_rate": 2.4414594400651827e-05, "loss": 0.2382, "step": 7533500 }, { "epoch": 4.52, "learning_rate": 2.4412494435091264e-05, "loss": 0.2346, "step": 7534000 }, { "epoch": 4.52, "learning_rate": 2.44103944695307e-05, "loss": 0.2464, "step": 7534500 }, { "epoch": 4.52, "learning_rate": 2.4408294503970135e-05, "loss": 0.2376, "step": 7535000 }, { "epoch": 4.52, "learning_rate": 2.4406194538409568e-05, "loss": 0.2448, "step": 7535500 }, { "epoch": 4.52, "learning_rate": 2.4404098772780125e-05, "loss": 0.2389, "step": 7536000 }, { "epoch": 4.52, "learning_rate": 2.4401998807219562e-05, "loss": 0.2391, "step": 7536500 }, { "epoch": 4.52, "learning_rate": 2.4399898841659e-05, "loss": 0.2356, "step": 7537000 }, { "epoch": 4.52, "learning_rate": 2.4397798876098432e-05, "loss": 0.2397, "step": 7537500 }, { "epoch": 4.52, "learning_rate": 2.4395703110468986e-05, "loss": 0.2366, "step": 7538000 }, { "epoch": 4.52, "learning_rate": 2.4393603144908423e-05, "loss": 0.2328, "step": 7538500 }, { "epoch": 4.52, "learning_rate": 2.439150317934786e-05, "loss": 0.2392, "step": 7539000 }, { "epoch": 4.52, "learning_rate": 2.4389403213787293e-05, "loss": 0.2365, "step": 7539500 }, { "epoch": 4.52, "learning_rate": 2.438730744815785e-05, "loss": 0.2399, "step": 7540000 }, { "epoch": 4.52, "learning_rate": 2.4385207482597283e-05, "loss": 0.2403, "step": 7540500 }, { "epoch": 4.52, "learning_rate": 2.438310751703672e-05, "loss": 0.2432, "step": 7541000 }, { "epoch": 4.52, "learning_rate": 2.4381007551476157e-05, "loss": 0.2333, "step": 7541500 }, { "epoch": 4.52, "learning_rate": 2.4378911785846714e-05, "loss": 0.2373, "step": 7542000 }, { "epoch": 4.52, "learning_rate": 2.4376811820286147e-05, "loss": 0.2308, "step": 7542500 }, { "epoch": 4.52, "learning_rate": 2.437471185472558e-05, "loss": 0.2328, "step": 7543000 }, { "epoch": 4.52, "learning_rate": 2.4372611889165018e-05, "loss": 0.2353, "step": 7543500 }, { "epoch": 4.52, "learning_rate": 2.4370516123535575e-05, "loss": 0.2349, "step": 7544000 }, { "epoch": 4.52, "learning_rate": 2.4368420357906128e-05, "loss": 0.2454, "step": 7544500 }, { "epoch": 4.52, "learning_rate": 2.4366320392345565e-05, "loss": 0.2473, "step": 7545000 }, { "epoch": 4.52, "learning_rate": 2.4364220426785002e-05, "loss": 0.2394, "step": 7545500 }, { "epoch": 4.52, "learning_rate": 2.4362120461224435e-05, "loss": 0.2343, "step": 7546000 }, { "epoch": 4.52, "learning_rate": 2.4360020495663872e-05, "loss": 0.2421, "step": 7546500 }, { "epoch": 4.52, "learning_rate": 2.4357920530103306e-05, "loss": 0.2431, "step": 7547000 }, { "epoch": 4.53, "learning_rate": 2.435582056454274e-05, "loss": 0.2376, "step": 7547500 }, { "epoch": 4.53, "learning_rate": 2.4353724798913296e-05, "loss": 0.2379, "step": 7548000 }, { "epoch": 4.53, "learning_rate": 2.4351624833352733e-05, "loss": 0.2352, "step": 7548500 }, { "epoch": 4.53, "learning_rate": 2.434952486779217e-05, "loss": 0.2357, "step": 7549000 }, { "epoch": 4.53, "learning_rate": 2.4347424902231603e-05, "loss": 0.236, "step": 7549500 }, { "epoch": 4.53, "learning_rate": 2.4345324936671037e-05, "loss": 0.2386, "step": 7550000 }, { "epoch": 4.53, "learning_rate": 2.4343224971110474e-05, "loss": 0.2412, "step": 7550500 }, { "epoch": 4.53, "learning_rate": 2.434112500554991e-05, "loss": 0.2348, "step": 7551000 }, { "epoch": 4.53, "learning_rate": 2.4339025039989344e-05, "loss": 0.2376, "step": 7551500 }, { "epoch": 4.53, "learning_rate": 2.4336929274359897e-05, "loss": 0.2394, "step": 7552000 }, { "epoch": 4.53, "learning_rate": 2.4334829308799334e-05, "loss": 0.2419, "step": 7552500 }, { "epoch": 4.53, "learning_rate": 2.433272934323877e-05, "loss": 0.2377, "step": 7553000 }, { "epoch": 4.53, "learning_rate": 2.4330629377678205e-05, "loss": 0.2427, "step": 7553500 }, { "epoch": 4.53, "learning_rate": 2.432852941211764e-05, "loss": 0.2395, "step": 7554000 }, { "epoch": 4.53, "learning_rate": 2.4326429446557075e-05, "loss": 0.2375, "step": 7554500 }, { "epoch": 4.53, "learning_rate": 2.4324329480996512e-05, "loss": 0.2401, "step": 7555000 }, { "epoch": 4.53, "learning_rate": 2.4322229515435945e-05, "loss": 0.2331, "step": 7555500 }, { "epoch": 4.53, "learning_rate": 2.4320133749806502e-05, "loss": 0.2392, "step": 7556000 }, { "epoch": 4.53, "learning_rate": 2.431803378424594e-05, "loss": 0.2373, "step": 7556500 }, { "epoch": 4.53, "learning_rate": 2.4315938018616493e-05, "loss": 0.2409, "step": 7557000 }, { "epoch": 4.53, "learning_rate": 2.431383805305593e-05, "loss": 0.2336, "step": 7557500 }, { "epoch": 4.53, "learning_rate": 2.4311738087495366e-05, "loss": 0.2401, "step": 7558000 }, { "epoch": 4.53, "learning_rate": 2.43096381219348e-05, "loss": 0.2357, "step": 7558500 }, { "epoch": 4.53, "learning_rate": 2.4307538156374237e-05, "loss": 0.2391, "step": 7559000 }, { "epoch": 4.53, "learning_rate": 2.430543819081367e-05, "loss": 0.2415, "step": 7559500 }, { "epoch": 4.53, "learning_rate": 2.4303338225253104e-05, "loss": 0.2407, "step": 7560000 }, { "epoch": 4.53, "learning_rate": 2.430123825969254e-05, "loss": 0.2427, "step": 7560500 }, { "epoch": 4.53, "learning_rate": 2.4299142494063097e-05, "loss": 0.2378, "step": 7561000 }, { "epoch": 4.53, "learning_rate": 2.4297042528502534e-05, "loss": 0.2386, "step": 7561500 }, { "epoch": 4.53, "learning_rate": 2.4294946762873088e-05, "loss": 0.2339, "step": 7562000 }, { "epoch": 4.53, "learning_rate": 2.4292846797312525e-05, "loss": 0.2381, "step": 7562500 }, { "epoch": 4.53, "learning_rate": 2.4290746831751958e-05, "loss": 0.2388, "step": 7563000 }, { "epoch": 4.53, "learning_rate": 2.4288646866191395e-05, "loss": 0.2405, "step": 7563500 }, { "epoch": 4.53, "learning_rate": 2.428654690063083e-05, "loss": 0.243, "step": 7564000 }, { "epoch": 4.54, "learning_rate": 2.4284446935070262e-05, "loss": 0.2424, "step": 7564500 }, { "epoch": 4.54, "learning_rate": 2.4282351169440822e-05, "loss": 0.2407, "step": 7565000 }, { "epoch": 4.54, "learning_rate": 2.4280251203880256e-05, "loss": 0.2423, "step": 7565500 }, { "epoch": 4.54, "learning_rate": 2.4278151238319693e-05, "loss": 0.2386, "step": 7566000 }, { "epoch": 4.54, "learning_rate": 2.4276051272759126e-05, "loss": 0.2409, "step": 7566500 }, { "epoch": 4.54, "learning_rate": 2.427395130719856e-05, "loss": 0.2403, "step": 7567000 }, { "epoch": 4.54, "learning_rate": 2.4271851341637996e-05, "loss": 0.2356, "step": 7567500 }, { "epoch": 4.54, "learning_rate": 2.4269751376077433e-05, "loss": 0.2338, "step": 7568000 }, { "epoch": 4.54, "learning_rate": 2.4267651410516867e-05, "loss": 0.24, "step": 7568500 }, { "epoch": 4.54, "learning_rate": 2.4265555644887424e-05, "loss": 0.2356, "step": 7569000 }, { "epoch": 4.54, "learning_rate": 2.4263455679326857e-05, "loss": 0.2421, "step": 7569500 }, { "epoch": 4.54, "learning_rate": 2.4261355713766294e-05, "loss": 0.2417, "step": 7570000 }, { "epoch": 4.54, "learning_rate": 2.425925574820573e-05, "loss": 0.2431, "step": 7570500 }, { "epoch": 4.54, "learning_rate": 2.4257159982576288e-05, "loss": 0.2382, "step": 7571000 }, { "epoch": 4.54, "learning_rate": 2.4255060017015718e-05, "loss": 0.2414, "step": 7571500 }, { "epoch": 4.54, "learning_rate": 2.4252960051455155e-05, "loss": 0.2429, "step": 7572000 }, { "epoch": 4.54, "learning_rate": 2.425086008589459e-05, "loss": 0.2347, "step": 7572500 }, { "epoch": 4.54, "learning_rate": 2.424876432026515e-05, "loss": 0.2449, "step": 7573000 }, { "epoch": 4.54, "learning_rate": 2.4246668554635702e-05, "loss": 0.2305, "step": 7573500 }, { "epoch": 4.54, "learning_rate": 2.424456858907514e-05, "loss": 0.2395, "step": 7574000 }, { "epoch": 4.54, "learning_rate": 2.4242468623514572e-05, "loss": 0.2385, "step": 7574500 }, { "epoch": 4.54, "learning_rate": 2.424036865795401e-05, "loss": 0.2436, "step": 7575000 }, { "epoch": 4.54, "learning_rate": 2.4238268692393446e-05, "loss": 0.2428, "step": 7575500 }, { "epoch": 4.54, "learning_rate": 2.423616872683288e-05, "loss": 0.2393, "step": 7576000 }, { "epoch": 4.54, "learning_rate": 2.4234068761272313e-05, "loss": 0.2424, "step": 7576500 }, { "epoch": 4.54, "learning_rate": 2.423197299564287e-05, "loss": 0.2389, "step": 7577000 }, { "epoch": 4.54, "learning_rate": 2.4229873030082307e-05, "loss": 0.2408, "step": 7577500 }, { "epoch": 4.54, "learning_rate": 2.4227773064521744e-05, "loss": 0.2404, "step": 7578000 }, { "epoch": 4.54, "learning_rate": 2.4225673098961177e-05, "loss": 0.2408, "step": 7578500 }, { "epoch": 4.54, "learning_rate": 2.422357313340061e-05, "loss": 0.2387, "step": 7579000 }, { "epoch": 4.54, "learning_rate": 2.4221473167840047e-05, "loss": 0.2392, "step": 7579500 }, { "epoch": 4.54, "learning_rate": 2.4219373202279484e-05, "loss": 0.2382, "step": 7580000 }, { "epoch": 4.54, "learning_rate": 2.4217273236718918e-05, "loss": 0.2441, "step": 7580500 }, { "epoch": 4.55, "learning_rate": 2.4215181671020595e-05, "loss": 0.2412, "step": 7581000 }, { "epoch": 4.55, "learning_rate": 2.421308170546003e-05, "loss": 0.2403, "step": 7581500 }, { "epoch": 4.55, "learning_rate": 2.4210981739899465e-05, "loss": 0.2372, "step": 7582000 }, { "epoch": 4.55, "learning_rate": 2.4208881774338902e-05, "loss": 0.2329, "step": 7582500 }, { "epoch": 4.55, "learning_rate": 2.420678180877834e-05, "loss": 0.2318, "step": 7583000 }, { "epoch": 4.55, "learning_rate": 2.420468184321777e-05, "loss": 0.2436, "step": 7583500 }, { "epoch": 4.55, "learning_rate": 2.4202586077588326e-05, "loss": 0.2363, "step": 7584000 }, { "epoch": 4.55, "learning_rate": 2.4200486112027763e-05, "loss": 0.2411, "step": 7584500 }, { "epoch": 4.55, "learning_rate": 2.41983861464672e-05, "loss": 0.2412, "step": 7585000 }, { "epoch": 4.55, "learning_rate": 2.4196286180906633e-05, "loss": 0.2367, "step": 7585500 }, { "epoch": 4.55, "learning_rate": 2.4194186215346066e-05, "loss": 0.2395, "step": 7586000 }, { "epoch": 4.55, "learning_rate": 2.4192090449716623e-05, "loss": 0.241, "step": 7586500 }, { "epoch": 4.55, "learning_rate": 2.418999048415606e-05, "loss": 0.241, "step": 7587000 }, { "epoch": 4.55, "learning_rate": 2.4187894718526614e-05, "loss": 0.2391, "step": 7587500 }, { "epoch": 4.55, "learning_rate": 2.418579475296605e-05, "loss": 0.2369, "step": 7588000 }, { "epoch": 4.55, "learning_rate": 2.4183694787405487e-05, "loss": 0.2398, "step": 7588500 }, { "epoch": 4.55, "learning_rate": 2.418159482184492e-05, "loss": 0.2367, "step": 7589000 }, { "epoch": 4.55, "learning_rate": 2.4179494856284358e-05, "loss": 0.2384, "step": 7589500 }, { "epoch": 4.55, "learning_rate": 2.4177394890723795e-05, "loss": 0.2373, "step": 7590000 }, { "epoch": 4.55, "learning_rate": 2.4175294925163225e-05, "loss": 0.2363, "step": 7590500 }, { "epoch": 4.55, "learning_rate": 2.417319495960266e-05, "loss": 0.248, "step": 7591000 }, { "epoch": 4.55, "learning_rate": 2.41710949940421e-05, "loss": 0.2377, "step": 7591500 }, { "epoch": 4.55, "learning_rate": 2.4168995028481532e-05, "loss": 0.2366, "step": 7592000 }, { "epoch": 4.55, "learning_rate": 2.416689506292097e-05, "loss": 0.2406, "step": 7592500 }, { "epoch": 4.55, "learning_rate": 2.4164795097360402e-05, "loss": 0.2409, "step": 7593000 }, { "epoch": 4.55, "learning_rate": 2.416269933173096e-05, "loss": 0.2404, "step": 7593500 }, { "epoch": 4.55, "learning_rate": 2.4160599366170396e-05, "loss": 0.2381, "step": 7594000 }, { "epoch": 4.55, "learning_rate": 2.415849940060983e-05, "loss": 0.235, "step": 7594500 }, { "epoch": 4.55, "learning_rate": 2.4156399435049266e-05, "loss": 0.2378, "step": 7595000 }, { "epoch": 4.55, "learning_rate": 2.415430366941982e-05, "loss": 0.2352, "step": 7595500 }, { "epoch": 4.55, "learning_rate": 2.4152203703859257e-05, "loss": 0.2394, "step": 7596000 }, { "epoch": 4.55, "learning_rate": 2.415010373829869e-05, "loss": 0.2402, "step": 7596500 }, { "epoch": 4.55, "learning_rate": 2.4148003772738127e-05, "loss": 0.2387, "step": 7597000 }, { "epoch": 4.56, "learning_rate": 2.414590800710868e-05, "loss": 0.2411, "step": 7597500 }, { "epoch": 4.56, "learning_rate": 2.4143808041548117e-05, "loss": 0.2379, "step": 7598000 }, { "epoch": 4.56, "learning_rate": 2.4141708075987554e-05, "loss": 0.2462, "step": 7598500 }, { "epoch": 4.56, "learning_rate": 2.4139608110426988e-05, "loss": 0.2451, "step": 7599000 }, { "epoch": 4.56, "learning_rate": 2.4137512344797545e-05, "loss": 0.241, "step": 7599500 }, { "epoch": 4.56, "learning_rate": 2.4135412379236978e-05, "loss": 0.2307, "step": 7600000 }, { "epoch": 4.56, "eval_loss": 0.22410638630390167, "eval_runtime": 1460.22, "eval_samples_per_second": 360.713, "eval_steps_per_second": 60.119, "step": 7600000 }, { "epoch": 4.56, "learning_rate": 2.4133316613607535e-05, "loss": 0.2366, "step": 7600500 }, { "epoch": 4.56, "learning_rate": 2.4131216648046972e-05, "loss": 0.241, "step": 7601000 }, { "epoch": 4.56, "learning_rate": 2.412911668248641e-05, "loss": 0.2353, "step": 7601500 }, { "epoch": 4.56, "learning_rate": 2.4127016716925842e-05, "loss": 0.2366, "step": 7602000 }, { "epoch": 4.56, "learning_rate": 2.4124916751365276e-05, "loss": 0.2395, "step": 7602500 }, { "epoch": 4.56, "learning_rate": 2.4122816785804713e-05, "loss": 0.2443, "step": 7603000 }, { "epoch": 4.56, "learning_rate": 2.4120716820244146e-05, "loss": 0.2415, "step": 7603500 }, { "epoch": 4.56, "learning_rate": 2.4118616854683583e-05, "loss": 0.2346, "step": 7604000 }, { "epoch": 4.56, "learning_rate": 2.411652108905414e-05, "loss": 0.2443, "step": 7604500 }, { "epoch": 4.56, "learning_rate": 2.4114421123493573e-05, "loss": 0.237, "step": 7605000 }, { "epoch": 4.56, "learning_rate": 2.411232115793301e-05, "loss": 0.2401, "step": 7605500 }, { "epoch": 4.56, "learning_rate": 2.4110221192372444e-05, "loss": 0.2415, "step": 7606000 }, { "epoch": 4.56, "learning_rate": 2.4108125426743e-05, "loss": 0.2336, "step": 7606500 }, { "epoch": 4.56, "learning_rate": 2.4106025461182434e-05, "loss": 0.2463, "step": 7607000 }, { "epoch": 4.56, "learning_rate": 2.410392549562187e-05, "loss": 0.2415, "step": 7607500 }, { "epoch": 4.56, "learning_rate": 2.4101829729992428e-05, "loss": 0.2494, "step": 7608000 }, { "epoch": 4.56, "learning_rate": 2.4099729764431865e-05, "loss": 0.2371, "step": 7608500 }, { "epoch": 4.56, "learning_rate": 2.4097629798871298e-05, "loss": 0.2339, "step": 7609000 }, { "epoch": 4.56, "learning_rate": 2.409552983331073e-05, "loss": 0.2415, "step": 7609500 }, { "epoch": 4.56, "learning_rate": 2.409342986775017e-05, "loss": 0.2453, "step": 7610000 }, { "epoch": 4.56, "learning_rate": 2.4091329902189602e-05, "loss": 0.2408, "step": 7610500 }, { "epoch": 4.56, "learning_rate": 2.408922993662904e-05, "loss": 0.2431, "step": 7611000 }, { "epoch": 4.56, "learning_rate": 2.4087129971068476e-05, "loss": 0.2366, "step": 7611500 }, { "epoch": 4.56, "learning_rate": 2.408503420543903e-05, "loss": 0.24, "step": 7612000 }, { "epoch": 4.56, "learning_rate": 2.4082934239878466e-05, "loss": 0.2417, "step": 7612500 }, { "epoch": 4.56, "learning_rate": 2.40808342743179e-05, "loss": 0.2361, "step": 7613000 }, { "epoch": 4.56, "learning_rate": 2.4078738508688456e-05, "loss": 0.2373, "step": 7613500 }, { "epoch": 4.56, "learning_rate": 2.4076638543127893e-05, "loss": 0.2397, "step": 7614000 }, { "epoch": 4.57, "learning_rate": 2.4074538577567327e-05, "loss": 0.2376, "step": 7614500 }, { "epoch": 4.57, "learning_rate": 2.4072438612006764e-05, "loss": 0.2405, "step": 7615000 }, { "epoch": 4.57, "learning_rate": 2.4070338646446197e-05, "loss": 0.2359, "step": 7615500 }, { "epoch": 4.57, "learning_rate": 2.4068238680885634e-05, "loss": 0.2414, "step": 7616000 }, { "epoch": 4.57, "learning_rate": 2.406613871532507e-05, "loss": 0.2371, "step": 7616500 }, { "epoch": 4.57, "learning_rate": 2.40640387497645e-05, "loss": 0.2349, "step": 7617000 }, { "epoch": 4.57, "learning_rate": 2.406194298413506e-05, "loss": 0.2379, "step": 7617500 }, { "epoch": 4.57, "learning_rate": 2.4059843018574495e-05, "loss": 0.2413, "step": 7618000 }, { "epoch": 4.57, "learning_rate": 2.405774305301393e-05, "loss": 0.243, "step": 7618500 }, { "epoch": 4.57, "learning_rate": 2.4055643087453368e-05, "loss": 0.2376, "step": 7619000 }, { "epoch": 4.57, "learning_rate": 2.4053547321823922e-05, "loss": 0.2426, "step": 7619500 }, { "epoch": 4.57, "learning_rate": 2.4051447356263355e-05, "loss": 0.2365, "step": 7620000 }, { "epoch": 4.57, "learning_rate": 2.4049347390702792e-05, "loss": 0.2446, "step": 7620500 }, { "epoch": 4.57, "learning_rate": 2.404724742514223e-05, "loss": 0.2417, "step": 7621000 }, { "epoch": 4.57, "learning_rate": 2.4045147459581662e-05, "loss": 0.2383, "step": 7621500 }, { "epoch": 4.57, "learning_rate": 2.4043047494021096e-05, "loss": 0.2369, "step": 7622000 }, { "epoch": 4.57, "learning_rate": 2.4040947528460533e-05, "loss": 0.2389, "step": 7622500 }, { "epoch": 4.57, "learning_rate": 2.403884756289997e-05, "loss": 0.237, "step": 7623000 }, { "epoch": 4.57, "learning_rate": 2.4036751797270527e-05, "loss": 0.2406, "step": 7623500 }, { "epoch": 4.57, "learning_rate": 2.4034651831709957e-05, "loss": 0.2371, "step": 7624000 }, { "epoch": 4.57, "learning_rate": 2.4032551866149394e-05, "loss": 0.2402, "step": 7624500 }, { "epoch": 4.57, "learning_rate": 2.403045190058883e-05, "loss": 0.24, "step": 7625000 }, { "epoch": 4.57, "learning_rate": 2.4028356134959387e-05, "loss": 0.2448, "step": 7625500 }, { "epoch": 4.57, "learning_rate": 2.4026256169398824e-05, "loss": 0.2398, "step": 7626000 }, { "epoch": 4.57, "learning_rate": 2.4024156203838254e-05, "loss": 0.2375, "step": 7626500 }, { "epoch": 4.57, "learning_rate": 2.402205623827769e-05, "loss": 0.2385, "step": 7627000 }, { "epoch": 4.57, "learning_rate": 2.4019960472648248e-05, "loss": 0.2406, "step": 7627500 }, { "epoch": 4.57, "learning_rate": 2.4017860507087685e-05, "loss": 0.2327, "step": 7628000 }, { "epoch": 4.57, "learning_rate": 2.401576054152712e-05, "loss": 0.2392, "step": 7628500 }, { "epoch": 4.57, "learning_rate": 2.4013660575966552e-05, "loss": 0.2382, "step": 7629000 }, { "epoch": 4.57, "learning_rate": 2.401156481033711e-05, "loss": 0.2379, "step": 7629500 }, { "epoch": 4.57, "learning_rate": 2.4009464844776546e-05, "loss": 0.2395, "step": 7630000 }, { "epoch": 4.57, "learning_rate": 2.4007369079147103e-05, "loss": 0.2388, "step": 7630500 }, { "epoch": 4.58, "learning_rate": 2.4005269113586536e-05, "loss": 0.2317, "step": 7631000 }, { "epoch": 4.58, "learning_rate": 2.4003169148025973e-05, "loss": 0.2409, "step": 7631500 }, { "epoch": 4.58, "learning_rate": 2.4001069182465406e-05, "loss": 0.2375, "step": 7632000 }, { "epoch": 4.58, "learning_rate": 2.3998969216904843e-05, "loss": 0.2366, "step": 7632500 }, { "epoch": 4.58, "learning_rate": 2.399686925134428e-05, "loss": 0.2364, "step": 7633000 }, { "epoch": 4.58, "learning_rate": 2.399476928578371e-05, "loss": 0.2374, "step": 7633500 }, { "epoch": 4.58, "learning_rate": 2.3992669320223147e-05, "loss": 0.2361, "step": 7634000 }, { "epoch": 4.58, "learning_rate": 2.3990573554593704e-05, "loss": 0.2369, "step": 7634500 }, { "epoch": 4.58, "learning_rate": 2.398847358903314e-05, "loss": 0.2391, "step": 7635000 }, { "epoch": 4.58, "learning_rate": 2.3986373623472574e-05, "loss": 0.2416, "step": 7635500 }, { "epoch": 4.58, "learning_rate": 2.3984273657912008e-05, "loss": 0.2424, "step": 7636000 }, { "epoch": 4.58, "learning_rate": 2.3982177892282565e-05, "loss": 0.2355, "step": 7636500 }, { "epoch": 4.58, "learning_rate": 2.3980077926722e-05, "loss": 0.2402, "step": 7637000 }, { "epoch": 4.58, "learning_rate": 2.397797796116144e-05, "loss": 0.2381, "step": 7637500 }, { "epoch": 4.58, "learning_rate": 2.3975877995600872e-05, "loss": 0.2497, "step": 7638000 }, { "epoch": 4.58, "learning_rate": 2.397378222997143e-05, "loss": 0.2373, "step": 7638500 }, { "epoch": 4.58, "learning_rate": 2.3971682264410862e-05, "loss": 0.2442, "step": 7639000 }, { "epoch": 4.58, "learning_rate": 2.39695822988503e-05, "loss": 0.2368, "step": 7639500 }, { "epoch": 4.58, "learning_rate": 2.3967482333289736e-05, "loss": 0.2374, "step": 7640000 }, { "epoch": 4.58, "learning_rate": 2.396538656766029e-05, "loss": 0.2394, "step": 7640500 }, { "epoch": 4.58, "learning_rate": 2.3963286602099723e-05, "loss": 0.2396, "step": 7641000 }, { "epoch": 4.58, "learning_rate": 2.396118663653916e-05, "loss": 0.2396, "step": 7641500 }, { "epoch": 4.58, "learning_rate": 2.3959086670978597e-05, "loss": 0.2401, "step": 7642000 }, { "epoch": 4.58, "learning_rate": 2.3956990905349154e-05, "loss": 0.2353, "step": 7642500 }, { "epoch": 4.58, "learning_rate": 2.3954890939788587e-05, "loss": 0.2406, "step": 7643000 }, { "epoch": 4.58, "learning_rate": 2.3952795174159144e-05, "loss": 0.2351, "step": 7643500 }, { "epoch": 4.58, "learning_rate": 2.3950695208598577e-05, "loss": 0.2375, "step": 7644000 }, { "epoch": 4.58, "learning_rate": 2.3948595243038014e-05, "loss": 0.2355, "step": 7644500 }, { "epoch": 4.58, "learning_rate": 2.394649527747745e-05, "loss": 0.2349, "step": 7645000 }, { "epoch": 4.58, "learning_rate": 2.3944395311916885e-05, "loss": 0.2368, "step": 7645500 }, { "epoch": 4.58, "learning_rate": 2.3942295346356318e-05, "loss": 0.2335, "step": 7646000 }, { "epoch": 4.58, "learning_rate": 2.3940195380795755e-05, "loss": 0.2465, "step": 7646500 }, { "epoch": 4.58, "learning_rate": 2.3938095415235192e-05, "loss": 0.2406, "step": 7647000 }, { "epoch": 4.58, "learning_rate": 2.3935999649605745e-05, "loss": 0.2385, "step": 7647500 }, { "epoch": 4.59, "learning_rate": 2.393389968404518e-05, "loss": 0.2407, "step": 7648000 }, { "epoch": 4.59, "learning_rate": 2.393180391841574e-05, "loss": 0.2343, "step": 7648500 }, { "epoch": 4.59, "learning_rate": 2.3929703952855173e-05, "loss": 0.2369, "step": 7649000 }, { "epoch": 4.59, "learning_rate": 2.392760398729461e-05, "loss": 0.2349, "step": 7649500 }, { "epoch": 4.59, "learning_rate": 2.3925504021734043e-05, "loss": 0.2465, "step": 7650000 }, { "epoch": 4.59, "learning_rate": 2.3923404056173476e-05, "loss": 0.2364, "step": 7650500 }, { "epoch": 4.59, "learning_rate": 2.3921304090612913e-05, "loss": 0.2417, "step": 7651000 }, { "epoch": 4.59, "learning_rate": 2.391920412505235e-05, "loss": 0.2373, "step": 7651500 }, { "epoch": 4.59, "learning_rate": 2.3917104159491784e-05, "loss": 0.239, "step": 7652000 }, { "epoch": 4.59, "learning_rate": 2.391500839386234e-05, "loss": 0.2311, "step": 7652500 }, { "epoch": 4.59, "learning_rate": 2.3912908428301774e-05, "loss": 0.2398, "step": 7653000 }, { "epoch": 4.59, "learning_rate": 2.391080846274121e-05, "loss": 0.2407, "step": 7653500 }, { "epoch": 4.59, "learning_rate": 2.3908708497180648e-05, "loss": 0.2388, "step": 7654000 }, { "epoch": 4.59, "learning_rate": 2.3906612731551205e-05, "loss": 0.2327, "step": 7654500 }, { "epoch": 4.59, "learning_rate": 2.3904512765990635e-05, "loss": 0.2392, "step": 7655000 }, { "epoch": 4.59, "learning_rate": 2.390241280043007e-05, "loss": 0.2423, "step": 7655500 }, { "epoch": 4.59, "learning_rate": 2.390031283486951e-05, "loss": 0.2429, "step": 7656000 }, { "epoch": 4.59, "learning_rate": 2.3898217069240065e-05, "loss": 0.2355, "step": 7656500 }, { "epoch": 4.59, "learning_rate": 2.38961171036795e-05, "loss": 0.2329, "step": 7657000 }, { "epoch": 4.59, "learning_rate": 2.3894017138118932e-05, "loss": 0.24, "step": 7657500 }, { "epoch": 4.59, "learning_rate": 2.389191717255837e-05, "loss": 0.2365, "step": 7658000 }, { "epoch": 4.59, "learning_rate": 2.3889821406928926e-05, "loss": 0.2401, "step": 7658500 }, { "epoch": 4.59, "learning_rate": 2.3887721441368363e-05, "loss": 0.2428, "step": 7659000 }, { "epoch": 4.59, "learning_rate": 2.3885621475807796e-05, "loss": 0.2385, "step": 7659500 }, { "epoch": 4.59, "learning_rate": 2.388352151024723e-05, "loss": 0.2367, "step": 7660000 }, { "epoch": 4.59, "learning_rate": 2.3881425744617787e-05, "loss": 0.2362, "step": 7660500 }, { "epoch": 4.59, "learning_rate": 2.3879325779057224e-05, "loss": 0.24, "step": 7661000 }, { "epoch": 4.59, "learning_rate": 2.387722581349666e-05, "loss": 0.2381, "step": 7661500 }, { "epoch": 4.59, "learning_rate": 2.387512584793609e-05, "loss": 0.2397, "step": 7662000 }, { "epoch": 4.59, "learning_rate": 2.387303008230665e-05, "loss": 0.2374, "step": 7662500 }, { "epoch": 4.59, "learning_rate": 2.3870930116746084e-05, "loss": 0.2368, "step": 7663000 }, { "epoch": 4.59, "learning_rate": 2.386883015118552e-05, "loss": 0.2417, "step": 7663500 }, { "epoch": 4.59, "learning_rate": 2.3866730185624958e-05, "loss": 0.2378, "step": 7664000 }, { "epoch": 4.6, "learning_rate": 2.3864630220064388e-05, "loss": 0.2389, "step": 7664500 }, { "epoch": 4.6, "learning_rate": 2.3862530254503825e-05, "loss": 0.2366, "step": 7665000 }, { "epoch": 4.6, "learning_rate": 2.3860430288943262e-05, "loss": 0.2364, "step": 7665500 }, { "epoch": 4.6, "learning_rate": 2.3858330323382695e-05, "loss": 0.2398, "step": 7666000 }, { "epoch": 4.6, "learning_rate": 2.3856234557753252e-05, "loss": 0.2364, "step": 7666500 }, { "epoch": 4.6, "learning_rate": 2.3854134592192686e-05, "loss": 0.2391, "step": 7667000 }, { "epoch": 4.6, "learning_rate": 2.3852038826563243e-05, "loss": 0.2394, "step": 7667500 }, { "epoch": 4.6, "learning_rate": 2.384993886100268e-05, "loss": 0.2432, "step": 7668000 }, { "epoch": 4.6, "learning_rate": 2.3847838895442116e-05, "loss": 0.2406, "step": 7668500 }, { "epoch": 4.6, "learning_rate": 2.384573892988155e-05, "loss": 0.2353, "step": 7669000 }, { "epoch": 4.6, "learning_rate": 2.3843638964320983e-05, "loss": 0.2407, "step": 7669500 }, { "epoch": 4.6, "learning_rate": 2.384153899876042e-05, "loss": 0.2404, "step": 7670000 }, { "epoch": 4.6, "learning_rate": 2.3839439033199857e-05, "loss": 0.2345, "step": 7670500 }, { "epoch": 4.6, "learning_rate": 2.3837343267570414e-05, "loss": 0.2383, "step": 7671000 }, { "epoch": 4.6, "learning_rate": 2.3835243302009844e-05, "loss": 0.239, "step": 7671500 }, { "epoch": 4.6, "learning_rate": 2.383314333644928e-05, "loss": 0.2377, "step": 7672000 }, { "epoch": 4.6, "learning_rate": 2.3831043370888718e-05, "loss": 0.2432, "step": 7672500 }, { "epoch": 4.6, "learning_rate": 2.382894340532815e-05, "loss": 0.2443, "step": 7673000 }, { "epoch": 4.6, "learning_rate": 2.3826843439767588e-05, "loss": 0.2429, "step": 7673500 }, { "epoch": 4.6, "learning_rate": 2.3824743474207025e-05, "loss": 0.2357, "step": 7674000 }, { "epoch": 4.6, "learning_rate": 2.382264770857758e-05, "loss": 0.2358, "step": 7674500 }, { "epoch": 4.6, "learning_rate": 2.3820547743017015e-05, "loss": 0.2363, "step": 7675000 }, { "epoch": 4.6, "learning_rate": 2.381844777745645e-05, "loss": 0.2371, "step": 7675500 }, { "epoch": 4.6, "learning_rate": 2.3816347811895886e-05, "loss": 0.243, "step": 7676000 }, { "epoch": 4.6, "learning_rate": 2.381424784633532e-05, "loss": 0.2401, "step": 7676500 }, { "epoch": 4.6, "learning_rate": 2.3812147880774753e-05, "loss": 0.2338, "step": 7677000 }, { "epoch": 4.6, "learning_rate": 2.3810052115145313e-05, "loss": 0.2363, "step": 7677500 }, { "epoch": 4.6, "learning_rate": 2.3807952149584746e-05, "loss": 0.2361, "step": 7678000 }, { "epoch": 4.6, "learning_rate": 2.3805852184024183e-05, "loss": 0.2379, "step": 7678500 }, { "epoch": 4.6, "learning_rate": 2.3803752218463617e-05, "loss": 0.2388, "step": 7679000 }, { "epoch": 4.6, "learning_rate": 2.380165225290305e-05, "loss": 0.2353, "step": 7679500 }, { "epoch": 4.6, "learning_rate": 2.3799552287342487e-05, "loss": 0.2401, "step": 7680000 }, { "epoch": 4.6, "learning_rate": 2.3797452321781924e-05, "loss": 0.2397, "step": 7680500 }, { "epoch": 4.61, "learning_rate": 2.3795352356221357e-05, "loss": 0.2349, "step": 7681000 }, { "epoch": 4.61, "learning_rate": 2.3793256590591914e-05, "loss": 0.2365, "step": 7681500 }, { "epoch": 4.61, "learning_rate": 2.3791156625031348e-05, "loss": 0.2361, "step": 7682000 }, { "epoch": 4.61, "learning_rate": 2.3789056659470785e-05, "loss": 0.2414, "step": 7682500 }, { "epoch": 4.61, "learning_rate": 2.378695669391022e-05, "loss": 0.2387, "step": 7683000 }, { "epoch": 4.61, "learning_rate": 2.378486092828078e-05, "loss": 0.2335, "step": 7683500 }, { "epoch": 4.61, "learning_rate": 2.378276096272021e-05, "loss": 0.2339, "step": 7684000 }, { "epoch": 4.61, "learning_rate": 2.3780660997159645e-05, "loss": 0.2387, "step": 7684500 }, { "epoch": 4.61, "learning_rate": 2.3778561031599082e-05, "loss": 0.2315, "step": 7685000 }, { "epoch": 4.61, "learning_rate": 2.377646526596964e-05, "loss": 0.2486, "step": 7685500 }, { "epoch": 4.61, "learning_rate": 2.3774365300409073e-05, "loss": 0.2387, "step": 7686000 }, { "epoch": 4.61, "learning_rate": 2.3772265334848506e-05, "loss": 0.2359, "step": 7686500 }, { "epoch": 4.61, "learning_rate": 2.3770165369287943e-05, "loss": 0.2362, "step": 7687000 }, { "epoch": 4.61, "learning_rate": 2.37680696036585e-05, "loss": 0.2413, "step": 7687500 }, { "epoch": 4.61, "learning_rate": 2.3765969638097937e-05, "loss": 0.2381, "step": 7688000 }, { "epoch": 4.61, "learning_rate": 2.376386967253737e-05, "loss": 0.2409, "step": 7688500 }, { "epoch": 4.61, "learning_rate": 2.3761769706976804e-05, "loss": 0.2412, "step": 7689000 }, { "epoch": 4.61, "learning_rate": 2.375967394134736e-05, "loss": 0.2308, "step": 7689500 }, { "epoch": 4.61, "learning_rate": 2.3757573975786797e-05, "loss": 0.2415, "step": 7690000 }, { "epoch": 4.61, "learning_rate": 2.3755474010226234e-05, "loss": 0.2404, "step": 7690500 }, { "epoch": 4.61, "learning_rate": 2.3753374044665664e-05, "loss": 0.2373, "step": 7691000 }, { "epoch": 4.61, "learning_rate": 2.3751278279036225e-05, "loss": 0.2423, "step": 7691500 }, { "epoch": 4.61, "learning_rate": 2.3749178313475658e-05, "loss": 0.235, "step": 7692000 }, { "epoch": 4.61, "learning_rate": 2.3747078347915095e-05, "loss": 0.2388, "step": 7692500 }, { "epoch": 4.61, "learning_rate": 2.3744978382354532e-05, "loss": 0.2406, "step": 7693000 }, { "epoch": 4.61, "learning_rate": 2.3742882616725085e-05, "loss": 0.2374, "step": 7693500 }, { "epoch": 4.61, "learning_rate": 2.374078265116452e-05, "loss": 0.2444, "step": 7694000 }, { "epoch": 4.61, "learning_rate": 2.3738682685603956e-05, "loss": 0.2411, "step": 7694500 }, { "epoch": 4.61, "learning_rate": 2.3736582720043393e-05, "loss": 0.2409, "step": 7695000 }, { "epoch": 4.61, "learning_rate": 2.3734486954413946e-05, "loss": 0.2342, "step": 7695500 }, { "epoch": 4.61, "learning_rate": 2.3732386988853383e-05, "loss": 0.2353, "step": 7696000 }, { "epoch": 4.61, "learning_rate": 2.3730287023292816e-05, "loss": 0.2317, "step": 7696500 }, { "epoch": 4.61, "learning_rate": 2.3728187057732253e-05, "loss": 0.2406, "step": 7697000 }, { "epoch": 4.61, "learning_rate": 2.372608709217169e-05, "loss": 0.2358, "step": 7697500 }, { "epoch": 4.62, "learning_rate": 2.372398712661112e-05, "loss": 0.2396, "step": 7698000 }, { "epoch": 4.62, "learning_rate": 2.3721887161050557e-05, "loss": 0.2384, "step": 7698500 }, { "epoch": 4.62, "learning_rate": 2.3719787195489994e-05, "loss": 0.2374, "step": 7699000 }, { "epoch": 4.62, "learning_rate": 2.371769142986055e-05, "loss": 0.2451, "step": 7699500 }, { "epoch": 4.62, "learning_rate": 2.3715591464299988e-05, "loss": 0.2421, "step": 7700000 }, { "epoch": 4.62, "eval_loss": 0.22230121493339539, "eval_runtime": 1460.6685, "eval_samples_per_second": 360.602, "eval_steps_per_second": 60.101, "step": 7700000 }, { "epoch": 4.62, "learning_rate": 2.3713491498739418e-05, "loss": 0.2366, "step": 7700500 }, { "epoch": 4.62, "learning_rate": 2.3711395733109975e-05, "loss": 0.2356, "step": 7701000 }, { "epoch": 4.62, "learning_rate": 2.370929576754941e-05, "loss": 0.236, "step": 7701500 }, { "epoch": 4.62, "learning_rate": 2.370719580198885e-05, "loss": 0.2393, "step": 7702000 }, { "epoch": 4.62, "learning_rate": 2.3705095836428285e-05, "loss": 0.2373, "step": 7702500 }, { "epoch": 4.62, "learning_rate": 2.3702995870867715e-05, "loss": 0.2402, "step": 7703000 }, { "epoch": 4.62, "learning_rate": 2.3700895905307152e-05, "loss": 0.2366, "step": 7703500 }, { "epoch": 4.62, "learning_rate": 2.369880013967771e-05, "loss": 0.2421, "step": 7704000 }, { "epoch": 4.62, "learning_rate": 2.3696700174117146e-05, "loss": 0.2378, "step": 7704500 }, { "epoch": 4.62, "learning_rate": 2.369460020855658e-05, "loss": 0.2411, "step": 7705000 }, { "epoch": 4.62, "learning_rate": 2.3692500242996013e-05, "loss": 0.2356, "step": 7705500 }, { "epoch": 4.62, "learning_rate": 2.369040027743545e-05, "loss": 0.2381, "step": 7706000 }, { "epoch": 4.62, "learning_rate": 2.3688304511806007e-05, "loss": 0.2368, "step": 7706500 }, { "epoch": 4.62, "learning_rate": 2.3686204546245444e-05, "loss": 0.2333, "step": 7707000 }, { "epoch": 4.62, "learning_rate": 2.3684104580684874e-05, "loss": 0.2374, "step": 7707500 }, { "epoch": 4.62, "learning_rate": 2.368200461512431e-05, "loss": 0.2398, "step": 7708000 }, { "epoch": 4.62, "learning_rate": 2.3679904649563747e-05, "loss": 0.2383, "step": 7708500 }, { "epoch": 4.62, "learning_rate": 2.367780468400318e-05, "loss": 0.2423, "step": 7709000 }, { "epoch": 4.62, "learning_rate": 2.3675704718442618e-05, "loss": 0.2364, "step": 7709500 }, { "epoch": 4.62, "learning_rate": 2.3673604752882054e-05, "loss": 0.2355, "step": 7710000 }, { "epoch": 4.62, "learning_rate": 2.3671513187183728e-05, "loss": 0.2421, "step": 7710500 }, { "epoch": 4.62, "learning_rate": 2.3669413221623165e-05, "loss": 0.2395, "step": 7711000 }, { "epoch": 4.62, "learning_rate": 2.3667313256062602e-05, "loss": 0.2377, "step": 7711500 }, { "epoch": 4.62, "learning_rate": 2.3665213290502035e-05, "loss": 0.2442, "step": 7712000 }, { "epoch": 4.62, "learning_rate": 2.366311332494147e-05, "loss": 0.2391, "step": 7712500 }, { "epoch": 4.62, "learning_rate": 2.3661013359380906e-05, "loss": 0.2403, "step": 7713000 }, { "epoch": 4.62, "learning_rate": 2.3658913393820342e-05, "loss": 0.2432, "step": 7713500 }, { "epoch": 4.62, "learning_rate": 2.3656813428259776e-05, "loss": 0.2422, "step": 7714000 }, { "epoch": 4.63, "learning_rate": 2.3654717662630333e-05, "loss": 0.2358, "step": 7714500 }, { "epoch": 4.63, "learning_rate": 2.3652617697069766e-05, "loss": 0.2379, "step": 7715000 }, { "epoch": 4.63, "learning_rate": 2.3650517731509203e-05, "loss": 0.238, "step": 7715500 }, { "epoch": 4.63, "learning_rate": 2.3648417765948637e-05, "loss": 0.2392, "step": 7716000 }, { "epoch": 4.63, "learning_rate": 2.3646322000319197e-05, "loss": 0.2357, "step": 7716500 }, { "epoch": 4.63, "learning_rate": 2.3644222034758627e-05, "loss": 0.2359, "step": 7717000 }, { "epoch": 4.63, "learning_rate": 2.3642122069198064e-05, "loss": 0.2372, "step": 7717500 }, { "epoch": 4.63, "learning_rate": 2.36400221036375e-05, "loss": 0.2433, "step": 7718000 }, { "epoch": 4.63, "learning_rate": 2.3637926338008058e-05, "loss": 0.2322, "step": 7718500 }, { "epoch": 4.63, "learning_rate": 2.363583057237861e-05, "loss": 0.2367, "step": 7719000 }, { "epoch": 4.63, "learning_rate": 2.3633730606818048e-05, "loss": 0.2403, "step": 7719500 }, { "epoch": 4.63, "learning_rate": 2.363163064125748e-05, "loss": 0.2385, "step": 7720000 }, { "epoch": 4.63, "learning_rate": 2.362953067569692e-05, "loss": 0.2413, "step": 7720500 }, { "epoch": 4.63, "learning_rate": 2.3627430710136355e-05, "loss": 0.2405, "step": 7721000 }, { "epoch": 4.63, "learning_rate": 2.362533074457579e-05, "loss": 0.2363, "step": 7721500 }, { "epoch": 4.63, "learning_rate": 2.3623230779015222e-05, "loss": 0.2432, "step": 7722000 }, { "epoch": 4.63, "learning_rate": 2.362113081345466e-05, "loss": 0.2406, "step": 7722500 }, { "epoch": 4.63, "learning_rate": 2.3619035047825216e-05, "loss": 0.2397, "step": 7723000 }, { "epoch": 4.63, "learning_rate": 2.3616935082264653e-05, "loss": 0.2411, "step": 7723500 }, { "epoch": 4.63, "learning_rate": 2.3614835116704086e-05, "loss": 0.2409, "step": 7724000 }, { "epoch": 4.63, "learning_rate": 2.361273515114352e-05, "loss": 0.2414, "step": 7724500 }, { "epoch": 4.63, "learning_rate": 2.3610639385514077e-05, "loss": 0.2349, "step": 7725000 }, { "epoch": 4.63, "learning_rate": 2.3608539419953514e-05, "loss": 0.2392, "step": 7725500 }, { "epoch": 4.63, "learning_rate": 2.3606439454392947e-05, "loss": 0.2414, "step": 7726000 }, { "epoch": 4.63, "learning_rate": 2.360433948883238e-05, "loss": 0.2385, "step": 7726500 }, { "epoch": 4.63, "learning_rate": 2.3602243723202937e-05, "loss": 0.2392, "step": 7727000 }, { "epoch": 4.63, "learning_rate": 2.3600143757642374e-05, "loss": 0.2413, "step": 7727500 }, { "epoch": 4.63, "learning_rate": 2.359804379208181e-05, "loss": 0.2354, "step": 7728000 }, { "epoch": 4.63, "learning_rate": 2.3595943826521245e-05, "loss": 0.2354, "step": 7728500 }, { "epoch": 4.63, "learning_rate": 2.3593843860960678e-05, "loss": 0.2358, "step": 7729000 }, { "epoch": 4.63, "learning_rate": 2.3591743895400115e-05, "loss": 0.2377, "step": 7729500 }, { "epoch": 4.63, "learning_rate": 2.358964392983955e-05, "loss": 0.2339, "step": 7730000 }, { "epoch": 4.63, "learning_rate": 2.3587543964278985e-05, "loss": 0.2392, "step": 7730500 }, { "epoch": 4.64, "learning_rate": 2.3585448198649542e-05, "loss": 0.2379, "step": 7731000 }, { "epoch": 4.64, "learning_rate": 2.3583348233088976e-05, "loss": 0.2362, "step": 7731500 }, { "epoch": 4.64, "learning_rate": 2.3581248267528413e-05, "loss": 0.2408, "step": 7732000 }, { "epoch": 4.64, "learning_rate": 2.357915250189897e-05, "loss": 0.2409, "step": 7732500 }, { "epoch": 4.64, "learning_rate": 2.3577052536338403e-05, "loss": 0.2342, "step": 7733000 }, { "epoch": 4.64, "learning_rate": 2.357495257077784e-05, "loss": 0.2313, "step": 7733500 }, { "epoch": 4.64, "learning_rate": 2.3572852605217273e-05, "loss": 0.2423, "step": 7734000 }, { "epoch": 4.64, "learning_rate": 2.357075263965671e-05, "loss": 0.2446, "step": 7734500 }, { "epoch": 4.64, "learning_rate": 2.3568656874027267e-05, "loss": 0.2411, "step": 7735000 }, { "epoch": 4.64, "learning_rate": 2.35665569084667e-05, "loss": 0.2357, "step": 7735500 }, { "epoch": 4.64, "learning_rate": 2.3564456942906134e-05, "loss": 0.2342, "step": 7736000 }, { "epoch": 4.64, "learning_rate": 2.356235697734557e-05, "loss": 0.2381, "step": 7736500 }, { "epoch": 4.64, "learning_rate": 2.3560257011785004e-05, "loss": 0.2379, "step": 7737000 }, { "epoch": 4.64, "learning_rate": 2.355815704622444e-05, "loss": 0.2398, "step": 7737500 }, { "epoch": 4.64, "learning_rate": 2.3556061280594998e-05, "loss": 0.2396, "step": 7738000 }, { "epoch": 4.64, "learning_rate": 2.355396131503443e-05, "loss": 0.2384, "step": 7738500 }, { "epoch": 4.64, "learning_rate": 2.355186554940499e-05, "loss": 0.2401, "step": 7739000 }, { "epoch": 4.64, "learning_rate": 2.3549765583844425e-05, "loss": 0.2407, "step": 7739500 }, { "epoch": 4.64, "learning_rate": 2.354766561828386e-05, "loss": 0.2346, "step": 7740000 }, { "epoch": 4.64, "learning_rate": 2.3545565652723296e-05, "loss": 0.2364, "step": 7740500 }, { "epoch": 4.64, "learning_rate": 2.354346568716273e-05, "loss": 0.2424, "step": 7741000 }, { "epoch": 4.64, "learning_rate": 2.3541365721602166e-05, "loss": 0.2382, "step": 7741500 }, { "epoch": 4.64, "learning_rate": 2.35392657560416e-05, "loss": 0.2428, "step": 7742000 }, { "epoch": 4.64, "learning_rate": 2.3537165790481036e-05, "loss": 0.2328, "step": 7742500 }, { "epoch": 4.64, "learning_rate": 2.3535065824920473e-05, "loss": 0.2378, "step": 7743000 }, { "epoch": 4.64, "learning_rate": 2.3532965859359907e-05, "loss": 0.2401, "step": 7743500 }, { "epoch": 4.64, "learning_rate": 2.353086589379934e-05, "loss": 0.2305, "step": 7744000 }, { "epoch": 4.64, "learning_rate": 2.3528765928238777e-05, "loss": 0.2369, "step": 7744500 }, { "epoch": 4.64, "learning_rate": 2.3526670162609334e-05, "loss": 0.2389, "step": 7745000 }, { "epoch": 4.64, "learning_rate": 2.352457019704877e-05, "loss": 0.2362, "step": 7745500 }, { "epoch": 4.64, "learning_rate": 2.3522474431419324e-05, "loss": 0.2376, "step": 7746000 }, { "epoch": 4.64, "learning_rate": 2.3520374465858758e-05, "loss": 0.2398, "step": 7746500 }, { "epoch": 4.64, "learning_rate": 2.3518274500298195e-05, "loss": 0.2284, "step": 7747000 }, { "epoch": 4.64, "learning_rate": 2.351617453473763e-05, "loss": 0.2327, "step": 7747500 }, { "epoch": 4.65, "learning_rate": 2.3514074569177065e-05, "loss": 0.2382, "step": 7748000 }, { "epoch": 4.65, "learning_rate": 2.35119746036165e-05, "loss": 0.2411, "step": 7748500 }, { "epoch": 4.65, "learning_rate": 2.3509874638055935e-05, "loss": 0.2362, "step": 7749000 }, { "epoch": 4.65, "learning_rate": 2.3507774672495372e-05, "loss": 0.2359, "step": 7749500 }, { "epoch": 4.65, "learning_rate": 2.350567890686593e-05, "loss": 0.237, "step": 7750000 }, { "epoch": 4.65, "learning_rate": 2.3503578941305362e-05, "loss": 0.2383, "step": 7750500 }, { "epoch": 4.65, "learning_rate": 2.3501478975744796e-05, "loss": 0.2368, "step": 7751000 }, { "epoch": 4.65, "learning_rate": 2.3499379010184233e-05, "loss": 0.2434, "step": 7751500 }, { "epoch": 4.65, "learning_rate": 2.3497279044623666e-05, "loss": 0.2383, "step": 7752000 }, { "epoch": 4.65, "learning_rate": 2.3495179079063103e-05, "loss": 0.2392, "step": 7752500 }, { "epoch": 4.65, "learning_rate": 2.349308331343366e-05, "loss": 0.2393, "step": 7753000 }, { "epoch": 4.65, "learning_rate": 2.3490983347873094e-05, "loss": 0.2367, "step": 7753500 }, { "epoch": 4.65, "learning_rate": 2.348888338231253e-05, "loss": 0.2398, "step": 7754000 }, { "epoch": 4.65, "learning_rate": 2.3486783416751964e-05, "loss": 0.2415, "step": 7754500 }, { "epoch": 4.65, "learning_rate": 2.34846834511914e-05, "loss": 0.2409, "step": 7755000 }, { "epoch": 4.65, "learning_rate": 2.3482583485630838e-05, "loss": 0.2349, "step": 7755500 }, { "epoch": 4.65, "learning_rate": 2.348048772000139e-05, "loss": 0.2388, "step": 7756000 }, { "epoch": 4.65, "learning_rate": 2.3478387754440828e-05, "loss": 0.234, "step": 7756500 }, { "epoch": 4.65, "learning_rate": 2.347628778888026e-05, "loss": 0.2373, "step": 7757000 }, { "epoch": 4.65, "learning_rate": 2.3474187823319698e-05, "loss": 0.2402, "step": 7757500 }, { "epoch": 4.65, "learning_rate": 2.3472087857759135e-05, "loss": 0.2396, "step": 7758000 }, { "epoch": 4.65, "learning_rate": 2.3469987892198565e-05, "loss": 0.2398, "step": 7758500 }, { "epoch": 4.65, "learning_rate": 2.3467887926638002e-05, "loss": 0.2354, "step": 7759000 }, { "epoch": 4.65, "learning_rate": 2.346578796107744e-05, "loss": 0.2334, "step": 7759500 }, { "epoch": 4.65, "learning_rate": 2.3463692195447996e-05, "loss": 0.2353, "step": 7760000 }, { "epoch": 4.65, "learning_rate": 2.3461592229887433e-05, "loss": 0.2328, "step": 7760500 }, { "epoch": 4.65, "learning_rate": 2.3459492264326863e-05, "loss": 0.2395, "step": 7761000 }, { "epoch": 4.65, "learning_rate": 2.34573922987663e-05, "loss": 0.2427, "step": 7761500 }, { "epoch": 4.65, "learning_rate": 2.3455296533136857e-05, "loss": 0.2365, "step": 7762000 }, { "epoch": 4.65, "learning_rate": 2.3453196567576293e-05, "loss": 0.2425, "step": 7762500 }, { "epoch": 4.65, "learning_rate": 2.3451096602015723e-05, "loss": 0.2345, "step": 7763000 }, { "epoch": 4.65, "learning_rate": 2.344899663645516e-05, "loss": 0.2461, "step": 7763500 }, { "epoch": 4.65, "learning_rate": 2.3446900870825717e-05, "loss": 0.2347, "step": 7764000 }, { "epoch": 4.66, "learning_rate": 2.3444800905265154e-05, "loss": 0.238, "step": 7764500 }, { "epoch": 4.66, "learning_rate": 2.344270093970459e-05, "loss": 0.2351, "step": 7765000 }, { "epoch": 4.66, "learning_rate": 2.344060097414402e-05, "loss": 0.242, "step": 7765500 }, { "epoch": 4.66, "learning_rate": 2.3438505208514578e-05, "loss": 0.2403, "step": 7766000 }, { "epoch": 4.66, "learning_rate": 2.3436405242954015e-05, "loss": 0.2374, "step": 7766500 }, { "epoch": 4.66, "learning_rate": 2.343430527739345e-05, "loss": 0.2389, "step": 7767000 }, { "epoch": 4.66, "learning_rate": 2.343220531183289e-05, "loss": 0.2402, "step": 7767500 }, { "epoch": 4.66, "learning_rate": 2.3430109546203442e-05, "loss": 0.2373, "step": 7768000 }, { "epoch": 4.66, "learning_rate": 2.3428009580642876e-05, "loss": 0.2401, "step": 7768500 }, { "epoch": 4.66, "learning_rate": 2.3425909615082312e-05, "loss": 0.237, "step": 7769000 }, { "epoch": 4.66, "learning_rate": 2.342380964952175e-05, "loss": 0.243, "step": 7769500 }, { "epoch": 4.66, "learning_rate": 2.3421713883892303e-05, "loss": 0.2385, "step": 7770000 }, { "epoch": 4.66, "learning_rate": 2.341961391833174e-05, "loss": 0.2388, "step": 7770500 }, { "epoch": 4.66, "learning_rate": 2.3417518152702297e-05, "loss": 0.2356, "step": 7771000 }, { "epoch": 4.66, "learning_rate": 2.341541818714173e-05, "loss": 0.239, "step": 7771500 }, { "epoch": 4.66, "learning_rate": 2.3413318221581167e-05, "loss": 0.239, "step": 7772000 }, { "epoch": 4.66, "learning_rate": 2.34112182560206e-05, "loss": 0.2326, "step": 7772500 }, { "epoch": 4.66, "learning_rate": 2.3409118290460034e-05, "loss": 0.2345, "step": 7773000 }, { "epoch": 4.66, "learning_rate": 2.340701832489947e-05, "loss": 0.2365, "step": 7773500 }, { "epoch": 4.66, "learning_rate": 2.3404918359338908e-05, "loss": 0.2347, "step": 7774000 }, { "epoch": 4.66, "learning_rate": 2.3402818393778344e-05, "loss": 0.2403, "step": 7774500 }, { "epoch": 4.66, "learning_rate": 2.3400722628148898e-05, "loss": 0.2403, "step": 7775000 }, { "epoch": 4.66, "learning_rate": 2.339862266258833e-05, "loss": 0.2405, "step": 7775500 }, { "epoch": 4.66, "learning_rate": 2.3396522697027768e-05, "loss": 0.2373, "step": 7776000 }, { "epoch": 4.66, "learning_rate": 2.3394422731467205e-05, "loss": 0.2363, "step": 7776500 }, { "epoch": 4.66, "learning_rate": 2.339232696583776e-05, "loss": 0.2394, "step": 7777000 }, { "epoch": 4.66, "learning_rate": 2.3390231200208316e-05, "loss": 0.2415, "step": 7777500 }, { "epoch": 4.66, "learning_rate": 2.3388131234647753e-05, "loss": 0.2424, "step": 7778000 }, { "epoch": 4.66, "learning_rate": 2.3386031269087186e-05, "loss": 0.2403, "step": 7778500 }, { "epoch": 4.66, "learning_rate": 2.3383931303526623e-05, "loss": 0.2333, "step": 7779000 }, { "epoch": 4.66, "learning_rate": 2.3381831337966056e-05, "loss": 0.2371, "step": 7779500 }, { "epoch": 4.66, "learning_rate": 2.3379731372405493e-05, "loss": 0.2381, "step": 7780000 }, { "epoch": 4.66, "learning_rate": 2.3377631406844927e-05, "loss": 0.2318, "step": 7780500 }, { "epoch": 4.67, "learning_rate": 2.3375531441284363e-05, "loss": 0.2402, "step": 7781000 }, { "epoch": 4.67, "learning_rate": 2.33734314757238e-05, "loss": 0.2436, "step": 7781500 }, { "epoch": 4.67, "learning_rate": 2.3371339910025474e-05, "loss": 0.2409, "step": 7782000 }, { "epoch": 4.67, "learning_rate": 2.336923994446491e-05, "loss": 0.2394, "step": 7782500 }, { "epoch": 4.67, "learning_rate": 2.3367139978904348e-05, "loss": 0.2362, "step": 7783000 }, { "epoch": 4.67, "learning_rate": 2.336504001334378e-05, "loss": 0.2358, "step": 7783500 }, { "epoch": 4.67, "learning_rate": 2.3362940047783218e-05, "loss": 0.2381, "step": 7784000 }, { "epoch": 4.67, "learning_rate": 2.336084008222265e-05, "loss": 0.2359, "step": 7784500 }, { "epoch": 4.67, "learning_rate": 2.335874431659321e-05, "loss": 0.2346, "step": 7785000 }, { "epoch": 4.67, "learning_rate": 2.3356644351032642e-05, "loss": 0.2396, "step": 7785500 }, { "epoch": 4.67, "learning_rate": 2.335454438547208e-05, "loss": 0.2384, "step": 7786000 }, { "epoch": 4.67, "learning_rate": 2.3352444419911512e-05, "loss": 0.2394, "step": 7786500 }, { "epoch": 4.67, "learning_rate": 2.335034445435095e-05, "loss": 0.2404, "step": 7787000 }, { "epoch": 4.67, "learning_rate": 2.3348244488790382e-05, "loss": 0.242, "step": 7787500 }, { "epoch": 4.67, "learning_rate": 2.334614452322982e-05, "loss": 0.2385, "step": 7788000 }, { "epoch": 4.67, "learning_rate": 2.3344044557669256e-05, "loss": 0.2365, "step": 7788500 }, { "epoch": 4.67, "learning_rate": 2.334194879203981e-05, "loss": 0.2411, "step": 7789000 }, { "epoch": 4.67, "learning_rate": 2.3339848826479243e-05, "loss": 0.2361, "step": 7789500 }, { "epoch": 4.67, "learning_rate": 2.333774886091868e-05, "loss": 0.2368, "step": 7790000 }, { "epoch": 4.67, "learning_rate": 2.3335648895358117e-05, "loss": 0.2414, "step": 7790500 }, { "epoch": 4.67, "learning_rate": 2.3333553129728674e-05, "loss": 0.2374, "step": 7791000 }, { "epoch": 4.67, "learning_rate": 2.3331453164168107e-05, "loss": 0.2368, "step": 7791500 }, { "epoch": 4.67, "learning_rate": 2.332935319860754e-05, "loss": 0.2399, "step": 7792000 }, { "epoch": 4.67, "learning_rate": 2.3327253233046978e-05, "loss": 0.2374, "step": 7792500 }, { "epoch": 4.67, "learning_rate": 2.3325157467417535e-05, "loss": 0.2383, "step": 7793000 }, { "epoch": 4.67, "learning_rate": 2.332305750185697e-05, "loss": 0.2402, "step": 7793500 }, { "epoch": 4.67, "learning_rate": 2.3320957536296405e-05, "loss": 0.2384, "step": 7794000 }, { "epoch": 4.67, "learning_rate": 2.331885757073584e-05, "loss": 0.2376, "step": 7794500 }, { "epoch": 4.67, "learning_rate": 2.3316761805106395e-05, "loss": 0.2349, "step": 7795000 }, { "epoch": 4.67, "learning_rate": 2.3314661839545832e-05, "loss": 0.2403, "step": 7795500 }, { "epoch": 4.67, "learning_rate": 2.3312561873985266e-05, "loss": 0.2402, "step": 7796000 }, { "epoch": 4.67, "learning_rate": 2.33104619084247e-05, "loss": 0.2373, "step": 7796500 }, { "epoch": 4.67, "learning_rate": 2.330836614279526e-05, "loss": 0.2349, "step": 7797000 }, { "epoch": 4.67, "learning_rate": 2.3306266177234693e-05, "loss": 0.2385, "step": 7797500 }, { "epoch": 4.68, "learning_rate": 2.330416621167413e-05, "loss": 0.2377, "step": 7798000 }, { "epoch": 4.68, "learning_rate": 2.3302070446044683e-05, "loss": 0.2368, "step": 7798500 }, { "epoch": 4.68, "learning_rate": 2.329997048048412e-05, "loss": 0.2419, "step": 7799000 }, { "epoch": 4.68, "learning_rate": 2.3297870514923554e-05, "loss": 0.2409, "step": 7799500 }, { "epoch": 4.68, "learning_rate": 2.329577054936299e-05, "loss": 0.2371, "step": 7800000 }, { "epoch": 4.68, "eval_loss": 0.22194808721542358, "eval_runtime": 1458.8951, "eval_samples_per_second": 361.04, "eval_steps_per_second": 60.174, "step": 7800000 }, { "epoch": 4.68, "learning_rate": 2.3293670583802427e-05, "loss": 0.2363, "step": 7800500 }, { "epoch": 4.68, "learning_rate": 2.329157061824186e-05, "loss": 0.2412, "step": 7801000 }, { "epoch": 4.68, "learning_rate": 2.3289470652681294e-05, "loss": 0.2336, "step": 7801500 }, { "epoch": 4.68, "learning_rate": 2.328737068712073e-05, "loss": 0.2363, "step": 7802000 }, { "epoch": 4.68, "learning_rate": 2.3285274921491288e-05, "loss": 0.2339, "step": 7802500 }, { "epoch": 4.68, "learning_rate": 2.3283174955930725e-05, "loss": 0.2369, "step": 7803000 }, { "epoch": 4.68, "learning_rate": 2.3281074990370155e-05, "loss": 0.2426, "step": 7803500 }, { "epoch": 4.68, "learning_rate": 2.3278975024809592e-05, "loss": 0.2356, "step": 7804000 }, { "epoch": 4.68, "learning_rate": 2.327687925918015e-05, "loss": 0.2349, "step": 7804500 }, { "epoch": 4.68, "learning_rate": 2.3274779293619586e-05, "loss": 0.2402, "step": 7805000 }, { "epoch": 4.68, "learning_rate": 2.327267932805902e-05, "loss": 0.2426, "step": 7805500 }, { "epoch": 4.68, "learning_rate": 2.3270579362498453e-05, "loss": 0.237, "step": 7806000 }, { "epoch": 4.68, "learning_rate": 2.326848359686901e-05, "loss": 0.2372, "step": 7806500 }, { "epoch": 4.68, "learning_rate": 2.3266383631308446e-05, "loss": 0.2442, "step": 7807000 }, { "epoch": 4.68, "learning_rate": 2.3264283665747883e-05, "loss": 0.2384, "step": 7807500 }, { "epoch": 4.68, "learning_rate": 2.3262183700187317e-05, "loss": 0.2415, "step": 7808000 }, { "epoch": 4.68, "learning_rate": 2.3260087934557874e-05, "loss": 0.2391, "step": 7808500 }, { "epoch": 4.68, "learning_rate": 2.3257987968997307e-05, "loss": 0.2457, "step": 7809000 }, { "epoch": 4.68, "learning_rate": 2.3255888003436744e-05, "loss": 0.2385, "step": 7809500 }, { "epoch": 4.68, "learning_rate": 2.325378803787618e-05, "loss": 0.2341, "step": 7810000 }, { "epoch": 4.68, "learning_rate": 2.3251692272246734e-05, "loss": 0.2368, "step": 7810500 }, { "epoch": 4.68, "learning_rate": 2.324959230668617e-05, "loss": 0.2421, "step": 7811000 }, { "epoch": 4.68, "learning_rate": 2.3247496541056728e-05, "loss": 0.2341, "step": 7811500 }, { "epoch": 4.68, "learning_rate": 2.324539657549616e-05, "loss": 0.2359, "step": 7812000 }, { "epoch": 4.68, "learning_rate": 2.3243296609935595e-05, "loss": 0.2327, "step": 7812500 }, { "epoch": 4.68, "learning_rate": 2.3241196644375032e-05, "loss": 0.2357, "step": 7813000 }, { "epoch": 4.68, "learning_rate": 2.3239096678814465e-05, "loss": 0.2372, "step": 7813500 }, { "epoch": 4.68, "learning_rate": 2.3236996713253902e-05, "loss": 0.2392, "step": 7814000 }, { "epoch": 4.69, "learning_rate": 2.323489674769334e-05, "loss": 0.2323, "step": 7814500 }, { "epoch": 4.69, "learning_rate": 2.3232796782132773e-05, "loss": 0.235, "step": 7815000 }, { "epoch": 4.69, "learning_rate": 2.3230696816572206e-05, "loss": 0.2369, "step": 7815500 }, { "epoch": 4.69, "learning_rate": 2.3228601050942763e-05, "loss": 0.2333, "step": 7816000 }, { "epoch": 4.69, "learning_rate": 2.32265010853822e-05, "loss": 0.2387, "step": 7816500 }, { "epoch": 4.69, "learning_rate": 2.3224401119821637e-05, "loss": 0.2382, "step": 7817000 }, { "epoch": 4.69, "learning_rate": 2.3222301154261067e-05, "loss": 0.2358, "step": 7817500 }, { "epoch": 4.69, "learning_rate": 2.3220205388631627e-05, "loss": 0.2383, "step": 7818000 }, { "epoch": 4.69, "learning_rate": 2.321810542307106e-05, "loss": 0.2387, "step": 7818500 }, { "epoch": 4.69, "learning_rate": 2.3216005457510497e-05, "loss": 0.2396, "step": 7819000 }, { "epoch": 4.69, "learning_rate": 2.3213905491949934e-05, "loss": 0.2421, "step": 7819500 }, { "epoch": 4.69, "learning_rate": 2.3211809726320488e-05, "loss": 0.2399, "step": 7820000 }, { "epoch": 4.69, "learning_rate": 2.320970976075992e-05, "loss": 0.2391, "step": 7820500 }, { "epoch": 4.69, "learning_rate": 2.3207609795199358e-05, "loss": 0.2389, "step": 7821000 }, { "epoch": 4.69, "learning_rate": 2.3205509829638795e-05, "loss": 0.2314, "step": 7821500 }, { "epoch": 4.69, "learning_rate": 2.320341406400935e-05, "loss": 0.2378, "step": 7822000 }, { "epoch": 4.69, "learning_rate": 2.3201314098448785e-05, "loss": 0.2367, "step": 7822500 }, { "epoch": 4.69, "learning_rate": 2.319921413288822e-05, "loss": 0.2344, "step": 7823000 }, { "epoch": 4.69, "learning_rate": 2.3197114167327656e-05, "loss": 0.2432, "step": 7823500 }, { "epoch": 4.69, "learning_rate": 2.3195018401698213e-05, "loss": 0.233, "step": 7824000 }, { "epoch": 4.69, "learning_rate": 2.3192922636068766e-05, "loss": 0.2345, "step": 7824500 }, { "epoch": 4.69, "learning_rate": 2.3190822670508203e-05, "loss": 0.2412, "step": 7825000 }, { "epoch": 4.69, "learning_rate": 2.318872270494764e-05, "loss": 0.2337, "step": 7825500 }, { "epoch": 4.69, "learning_rate": 2.3186622739387073e-05, "loss": 0.2382, "step": 7826000 }, { "epoch": 4.69, "learning_rate": 2.318452277382651e-05, "loss": 0.2308, "step": 7826500 }, { "epoch": 4.69, "learning_rate": 2.3182422808265944e-05, "loss": 0.2367, "step": 7827000 }, { "epoch": 4.69, "learning_rate": 2.3180322842705377e-05, "loss": 0.2406, "step": 7827500 }, { "epoch": 4.69, "learning_rate": 2.3178222877144814e-05, "loss": 0.2404, "step": 7828000 }, { "epoch": 4.69, "learning_rate": 2.317612711151537e-05, "loss": 0.233, "step": 7828500 }, { "epoch": 4.69, "learning_rate": 2.3174027145954804e-05, "loss": 0.2354, "step": 7829000 }, { "epoch": 4.69, "learning_rate": 2.317192718039424e-05, "loss": 0.2407, "step": 7829500 }, { "epoch": 4.69, "learning_rate": 2.3169827214833675e-05, "loss": 0.2343, "step": 7830000 }, { "epoch": 4.69, "learning_rate": 2.316773144920423e-05, "loss": 0.2403, "step": 7830500 }, { "epoch": 4.7, "learning_rate": 2.316563148364367e-05, "loss": 0.2437, "step": 7831000 }, { "epoch": 4.7, "learning_rate": 2.3163531518083102e-05, "loss": 0.2404, "step": 7831500 }, { "epoch": 4.7, "learning_rate": 2.316143575245366e-05, "loss": 0.2371, "step": 7832000 }, { "epoch": 4.7, "learning_rate": 2.3159335786893096e-05, "loss": 0.2429, "step": 7832500 }, { "epoch": 4.7, "learning_rate": 2.315723582133253e-05, "loss": 0.2362, "step": 7833000 }, { "epoch": 4.7, "learning_rate": 2.3155135855771966e-05, "loss": 0.2359, "step": 7833500 }, { "epoch": 4.7, "learning_rate": 2.31530358902114e-05, "loss": 0.2392, "step": 7834000 }, { "epoch": 4.7, "learning_rate": 2.3150935924650833e-05, "loss": 0.2377, "step": 7834500 }, { "epoch": 4.7, "learning_rate": 2.314883595909027e-05, "loss": 0.2395, "step": 7835000 }, { "epoch": 4.7, "learning_rate": 2.3146735993529707e-05, "loss": 0.2349, "step": 7835500 }, { "epoch": 4.7, "learning_rate": 2.3144640227900264e-05, "loss": 0.2336, "step": 7836000 }, { "epoch": 4.7, "learning_rate": 2.3142540262339697e-05, "loss": 0.2414, "step": 7836500 }, { "epoch": 4.7, "learning_rate": 2.314044029677913e-05, "loss": 0.2382, "step": 7837000 }, { "epoch": 4.7, "learning_rate": 2.3138344531149688e-05, "loss": 0.238, "step": 7837500 }, { "epoch": 4.7, "learning_rate": 2.3136244565589124e-05, "loss": 0.2388, "step": 7838000 }, { "epoch": 4.7, "learning_rate": 2.3134144600028558e-05, "loss": 0.2361, "step": 7838500 }, { "epoch": 4.7, "learning_rate": 2.3132044634467995e-05, "loss": 0.2376, "step": 7839000 }, { "epoch": 4.7, "learning_rate": 2.3129944668907428e-05, "loss": 0.2363, "step": 7839500 }, { "epoch": 4.7, "learning_rate": 2.3127844703346865e-05, "loss": 0.2401, "step": 7840000 }, { "epoch": 4.7, "learning_rate": 2.3125744737786302e-05, "loss": 0.2395, "step": 7840500 }, { "epoch": 4.7, "learning_rate": 2.3123644772225735e-05, "loss": 0.2344, "step": 7841000 }, { "epoch": 4.7, "learning_rate": 2.3121549006596292e-05, "loss": 0.2406, "step": 7841500 }, { "epoch": 4.7, "learning_rate": 2.3119449041035726e-05, "loss": 0.2377, "step": 7842000 }, { "epoch": 4.7, "learning_rate": 2.3117349075475163e-05, "loss": 0.2372, "step": 7842500 }, { "epoch": 4.7, "learning_rate": 2.31152491099146e-05, "loss": 0.2391, "step": 7843000 }, { "epoch": 4.7, "learning_rate": 2.3113153344285153e-05, "loss": 0.2302, "step": 7843500 }, { "epoch": 4.7, "learning_rate": 2.3111053378724586e-05, "loss": 0.2389, "step": 7844000 }, { "epoch": 4.7, "learning_rate": 2.3108953413164023e-05, "loss": 0.2398, "step": 7844500 }, { "epoch": 4.7, "learning_rate": 2.310685344760346e-05, "loss": 0.2395, "step": 7845000 }, { "epoch": 4.7, "learning_rate": 2.3104757681974017e-05, "loss": 0.2369, "step": 7845500 }, { "epoch": 4.7, "learning_rate": 2.310265771641345e-05, "loss": 0.2361, "step": 7846000 }, { "epoch": 4.7, "learning_rate": 2.3100561950784008e-05, "loss": 0.2332, "step": 7846500 }, { "epoch": 4.7, "learning_rate": 2.309846198522344e-05, "loss": 0.2397, "step": 7847000 }, { "epoch": 4.7, "learning_rate": 2.3096362019662878e-05, "loss": 0.2357, "step": 7847500 }, { "epoch": 4.71, "learning_rate": 2.3094262054102315e-05, "loss": 0.2376, "step": 7848000 }, { "epoch": 4.71, "learning_rate": 2.3092162088541748e-05, "loss": 0.2384, "step": 7848500 }, { "epoch": 4.71, "learning_rate": 2.309006212298118e-05, "loss": 0.2365, "step": 7849000 }, { "epoch": 4.71, "learning_rate": 2.308796215742062e-05, "loss": 0.2308, "step": 7849500 }, { "epoch": 4.71, "learning_rate": 2.3085862191860055e-05, "loss": 0.2398, "step": 7850000 }, { "epoch": 4.71, "learning_rate": 2.308376642623061e-05, "loss": 0.2358, "step": 7850500 }, { "epoch": 4.71, "learning_rate": 2.3081666460670042e-05, "loss": 0.2442, "step": 7851000 }, { "epoch": 4.71, "learning_rate": 2.307956649510948e-05, "loss": 0.2394, "step": 7851500 }, { "epoch": 4.71, "learning_rate": 2.3077466529548916e-05, "loss": 0.2432, "step": 7852000 }, { "epoch": 4.71, "learning_rate": 2.3075370763919473e-05, "loss": 0.2361, "step": 7852500 }, { "epoch": 4.71, "learning_rate": 2.3073270798358906e-05, "loss": 0.2361, "step": 7853000 }, { "epoch": 4.71, "learning_rate": 2.307117083279834e-05, "loss": 0.2379, "step": 7853500 }, { "epoch": 4.71, "learning_rate": 2.3069070867237777e-05, "loss": 0.2364, "step": 7854000 }, { "epoch": 4.71, "learning_rate": 2.3066975101608334e-05, "loss": 0.2368, "step": 7854500 }, { "epoch": 4.71, "learning_rate": 2.306487513604777e-05, "loss": 0.238, "step": 7855000 }, { "epoch": 4.71, "learning_rate": 2.3062775170487204e-05, "loss": 0.2339, "step": 7855500 }, { "epoch": 4.71, "learning_rate": 2.3060675204926637e-05, "loss": 0.2376, "step": 7856000 }, { "epoch": 4.71, "learning_rate": 2.3058579439297194e-05, "loss": 0.2462, "step": 7856500 }, { "epoch": 4.71, "learning_rate": 2.305647947373663e-05, "loss": 0.2389, "step": 7857000 }, { "epoch": 4.71, "learning_rate": 2.3054379508176068e-05, "loss": 0.239, "step": 7857500 }, { "epoch": 4.71, "learning_rate": 2.3052279542615498e-05, "loss": 0.2369, "step": 7858000 }, { "epoch": 4.71, "learning_rate": 2.305018377698606e-05, "loss": 0.2351, "step": 7858500 }, { "epoch": 4.71, "learning_rate": 2.3048083811425492e-05, "loss": 0.234, "step": 7859000 }, { "epoch": 4.71, "learning_rate": 2.304598384586493e-05, "loss": 0.2362, "step": 7859500 }, { "epoch": 4.71, "learning_rate": 2.3043888080235482e-05, "loss": 0.2328, "step": 7860000 }, { "epoch": 4.71, "learning_rate": 2.304178811467492e-05, "loss": 0.2413, "step": 7860500 }, { "epoch": 4.71, "learning_rate": 2.3039688149114353e-05, "loss": 0.2357, "step": 7861000 }, { "epoch": 4.71, "learning_rate": 2.303758818355379e-05, "loss": 0.2373, "step": 7861500 }, { "epoch": 4.71, "learning_rate": 2.3035488217993226e-05, "loss": 0.2329, "step": 7862000 }, { "epoch": 4.71, "learning_rate": 2.303338825243266e-05, "loss": 0.2403, "step": 7862500 }, { "epoch": 4.71, "learning_rate": 2.3031292486803217e-05, "loss": 0.2359, "step": 7863000 }, { "epoch": 4.71, "learning_rate": 2.302919252124265e-05, "loss": 0.2356, "step": 7863500 }, { "epoch": 4.71, "learning_rate": 2.3027092555682087e-05, "loss": 0.2334, "step": 7864000 }, { "epoch": 4.72, "learning_rate": 2.3024992590121524e-05, "loss": 0.2393, "step": 7864500 }, { "epoch": 4.72, "learning_rate": 2.3022892624560954e-05, "loss": 0.24, "step": 7865000 }, { "epoch": 4.72, "learning_rate": 2.302079265900039e-05, "loss": 0.2365, "step": 7865500 }, { "epoch": 4.72, "learning_rate": 2.3018692693439828e-05, "loss": 0.2389, "step": 7866000 }, { "epoch": 4.72, "learning_rate": 2.301659272787926e-05, "loss": 0.2326, "step": 7866500 }, { "epoch": 4.72, "learning_rate": 2.301449696224982e-05, "loss": 0.237, "step": 7867000 }, { "epoch": 4.72, "learning_rate": 2.301239699668925e-05, "loss": 0.2418, "step": 7867500 }, { "epoch": 4.72, "learning_rate": 2.301029703112869e-05, "loss": 0.2403, "step": 7868000 }, { "epoch": 4.72, "learning_rate": 2.3008197065568125e-05, "loss": 0.2397, "step": 7868500 }, { "epoch": 4.72, "learning_rate": 2.3006101299938682e-05, "loss": 0.2399, "step": 7869000 }, { "epoch": 4.72, "learning_rate": 2.3004001334378116e-05, "loss": 0.2392, "step": 7869500 }, { "epoch": 4.72, "learning_rate": 2.300190136881755e-05, "loss": 0.2374, "step": 7870000 }, { "epoch": 4.72, "learning_rate": 2.2999801403256986e-05, "loss": 0.2351, "step": 7870500 }, { "epoch": 4.72, "learning_rate": 2.2997705637627543e-05, "loss": 0.2411, "step": 7871000 }, { "epoch": 4.72, "learning_rate": 2.299560567206698e-05, "loss": 0.2424, "step": 7871500 }, { "epoch": 4.72, "learning_rate": 2.299350570650641e-05, "loss": 0.2375, "step": 7872000 }, { "epoch": 4.72, "learning_rate": 2.2991405740945847e-05, "loss": 0.2398, "step": 7872500 }, { "epoch": 4.72, "learning_rate": 2.2989305775385284e-05, "loss": 0.2395, "step": 7873000 }, { "epoch": 4.72, "learning_rate": 2.2987205809824717e-05, "loss": 0.2346, "step": 7873500 }, { "epoch": 4.72, "learning_rate": 2.2985105844264154e-05, "loss": 0.2452, "step": 7874000 }, { "epoch": 4.72, "learning_rate": 2.298300587870359e-05, "loss": 0.2392, "step": 7874500 }, { "epoch": 4.72, "learning_rate": 2.2980910113074144e-05, "loss": 0.2376, "step": 7875000 }, { "epoch": 4.72, "learning_rate": 2.297881014751358e-05, "loss": 0.2399, "step": 7875500 }, { "epoch": 4.72, "learning_rate": 2.2976710181953015e-05, "loss": 0.2403, "step": 7876000 }, { "epoch": 4.72, "learning_rate": 2.297461021639245e-05, "loss": 0.2396, "step": 7876500 }, { "epoch": 4.72, "learning_rate": 2.2972514450763005e-05, "loss": 0.2383, "step": 7877000 }, { "epoch": 4.72, "learning_rate": 2.2970414485202442e-05, "loss": 0.2334, "step": 7877500 }, { "epoch": 4.72, "learning_rate": 2.296831451964188e-05, "loss": 0.237, "step": 7878000 }, { "epoch": 4.72, "learning_rate": 2.2966214554081312e-05, "loss": 0.2371, "step": 7878500 }, { "epoch": 4.72, "learning_rate": 2.296411878845187e-05, "loss": 0.236, "step": 7879000 }, { "epoch": 4.72, "learning_rate": 2.2962018822891303e-05, "loss": 0.2351, "step": 7879500 }, { "epoch": 4.72, "learning_rate": 2.295991885733074e-05, "loss": 0.2334, "step": 7880000 }, { "epoch": 4.72, "learning_rate": 2.2957818891770176e-05, "loss": 0.2429, "step": 7880500 }, { "epoch": 4.72, "learning_rate": 2.2955723126140733e-05, "loss": 0.237, "step": 7881000 }, { "epoch": 4.73, "learning_rate": 2.2953623160580163e-05, "loss": 0.2412, "step": 7881500 }, { "epoch": 4.73, "learning_rate": 2.29515231950196e-05, "loss": 0.2354, "step": 7882000 }, { "epoch": 4.73, "learning_rate": 2.2949427429390157e-05, "loss": 0.2385, "step": 7882500 }, { "epoch": 4.73, "learning_rate": 2.2947327463829594e-05, "loss": 0.233, "step": 7883000 }, { "epoch": 4.73, "learning_rate": 2.294522749826903e-05, "loss": 0.2342, "step": 7883500 }, { "epoch": 4.73, "learning_rate": 2.294312753270846e-05, "loss": 0.2432, "step": 7884000 }, { "epoch": 4.73, "learning_rate": 2.2941027567147898e-05, "loss": 0.2388, "step": 7884500 }, { "epoch": 4.73, "learning_rate": 2.2938927601587335e-05, "loss": 0.2346, "step": 7885000 }, { "epoch": 4.73, "learning_rate": 2.2936827636026768e-05, "loss": 0.2361, "step": 7885500 }, { "epoch": 4.73, "learning_rate": 2.2934731870397325e-05, "loss": 0.2387, "step": 7886000 }, { "epoch": 4.73, "learning_rate": 2.293263190483676e-05, "loss": 0.2365, "step": 7886500 }, { "epoch": 4.73, "learning_rate": 2.2930531939276195e-05, "loss": 0.2352, "step": 7887000 }, { "epoch": 4.73, "learning_rate": 2.2928431973715632e-05, "loss": 0.234, "step": 7887500 }, { "epoch": 4.73, "learning_rate": 2.2926332008155066e-05, "loss": 0.2366, "step": 7888000 }, { "epoch": 4.73, "learning_rate": 2.2924232042594503e-05, "loss": 0.2413, "step": 7888500 }, { "epoch": 4.73, "learning_rate": 2.2922132077033936e-05, "loss": 0.2383, "step": 7889000 }, { "epoch": 4.73, "learning_rate": 2.292003211147337e-05, "loss": 0.238, "step": 7889500 }, { "epoch": 4.73, "learning_rate": 2.2917936345843926e-05, "loss": 0.2419, "step": 7890000 }, { "epoch": 4.73, "learning_rate": 2.2915836380283363e-05, "loss": 0.235, "step": 7890500 }, { "epoch": 4.73, "learning_rate": 2.29137364147228e-05, "loss": 0.2423, "step": 7891000 }, { "epoch": 4.73, "learning_rate": 2.2911636449162234e-05, "loss": 0.2429, "step": 7891500 }, { "epoch": 4.73, "learning_rate": 2.290954068353279e-05, "loss": 0.2363, "step": 7892000 }, { "epoch": 4.73, "learning_rate": 2.2907440717972224e-05, "loss": 0.234, "step": 7892500 }, { "epoch": 4.73, "learning_rate": 2.290534075241166e-05, "loss": 0.2376, "step": 7893000 }, { "epoch": 4.73, "learning_rate": 2.2903240786851098e-05, "loss": 0.2362, "step": 7893500 }, { "epoch": 4.73, "learning_rate": 2.290114502122165e-05, "loss": 0.2371, "step": 7894000 }, { "epoch": 4.73, "learning_rate": 2.2899045055661088e-05, "loss": 0.2391, "step": 7894500 }, { "epoch": 4.73, "learning_rate": 2.289694509010052e-05, "loss": 0.2364, "step": 7895000 }, { "epoch": 4.73, "learning_rate": 2.289484932447108e-05, "loss": 0.2334, "step": 7895500 }, { "epoch": 4.73, "learning_rate": 2.2892749358910512e-05, "loss": 0.236, "step": 7896000 }, { "epoch": 4.73, "learning_rate": 2.289064939334995e-05, "loss": 0.2402, "step": 7896500 }, { "epoch": 4.73, "learning_rate": 2.2888549427789382e-05, "loss": 0.2392, "step": 7897000 }, { "epoch": 4.73, "learning_rate": 2.288644946222882e-05, "loss": 0.2331, "step": 7897500 }, { "epoch": 4.74, "learning_rate": 2.2884353696599376e-05, "loss": 0.2371, "step": 7898000 }, { "epoch": 4.74, "learning_rate": 2.288225373103881e-05, "loss": 0.2368, "step": 7898500 }, { "epoch": 4.74, "learning_rate": 2.2880153765478246e-05, "loss": 0.2388, "step": 7899000 }, { "epoch": 4.74, "learning_rate": 2.287805379991768e-05, "loss": 0.236, "step": 7899500 }, { "epoch": 4.74, "learning_rate": 2.2875953834357117e-05, "loss": 0.2383, "step": 7900000 }, { "epoch": 4.74, "eval_loss": 0.22186195850372314, "eval_runtime": 1459.9349, "eval_samples_per_second": 360.783, "eval_steps_per_second": 60.131, "step": 7900000 }, { "epoch": 4.74, "learning_rate": 2.2873853868796554e-05, "loss": 0.246, "step": 7900500 }, { "epoch": 4.74, "learning_rate": 2.2871758103167107e-05, "loss": 0.2367, "step": 7901000 }, { "epoch": 4.74, "learning_rate": 2.2869658137606544e-05, "loss": 0.2366, "step": 7901500 }, { "epoch": 4.74, "learning_rate": 2.2867558172045977e-05, "loss": 0.2404, "step": 7902000 }, { "epoch": 4.74, "learning_rate": 2.2865458206485414e-05, "loss": 0.2395, "step": 7902500 }, { "epoch": 4.74, "learning_rate": 2.286335824092485e-05, "loss": 0.2374, "step": 7903000 }, { "epoch": 4.74, "learning_rate": 2.286125827536428e-05, "loss": 0.2413, "step": 7903500 }, { "epoch": 4.74, "learning_rate": 2.2859158309803718e-05, "loss": 0.2346, "step": 7904000 }, { "epoch": 4.74, "learning_rate": 2.2857058344243155e-05, "loss": 0.235, "step": 7904500 }, { "epoch": 4.74, "learning_rate": 2.2854962578613712e-05, "loss": 0.23, "step": 7905000 }, { "epoch": 4.74, "learning_rate": 2.2852862613053145e-05, "loss": 0.2398, "step": 7905500 }, { "epoch": 4.74, "learning_rate": 2.285076264749258e-05, "loss": 0.2388, "step": 7906000 }, { "epoch": 4.74, "learning_rate": 2.2848662681932016e-05, "loss": 0.2381, "step": 7906500 }, { "epoch": 4.74, "learning_rate": 2.2846571116233693e-05, "loss": 0.2444, "step": 7907000 }, { "epoch": 4.74, "learning_rate": 2.284447115067313e-05, "loss": 0.2412, "step": 7907500 }, { "epoch": 4.74, "learning_rate": 2.2842371185112563e-05, "loss": 0.2313, "step": 7908000 }, { "epoch": 4.74, "learning_rate": 2.2840271219552e-05, "loss": 0.2351, "step": 7908500 }, { "epoch": 4.74, "learning_rate": 2.2838171253991433e-05, "loss": 0.2425, "step": 7909000 }, { "epoch": 4.74, "learning_rate": 2.283607128843087e-05, "loss": 0.2329, "step": 7909500 }, { "epoch": 4.74, "learning_rate": 2.2833971322870307e-05, "loss": 0.234, "step": 7910000 }, { "epoch": 4.74, "learning_rate": 2.283187555724086e-05, "loss": 0.2356, "step": 7910500 }, { "epoch": 4.74, "learning_rate": 2.2829775591680294e-05, "loss": 0.2369, "step": 7911000 }, { "epoch": 4.74, "learning_rate": 2.282767562611973e-05, "loss": 0.2369, "step": 7911500 }, { "epoch": 4.74, "learning_rate": 2.2825575660559168e-05, "loss": 0.2348, "step": 7912000 }, { "epoch": 4.74, "learning_rate": 2.2823475694998605e-05, "loss": 0.2397, "step": 7912500 }, { "epoch": 4.74, "learning_rate": 2.2821375729438035e-05, "loss": 0.2375, "step": 7913000 }, { "epoch": 4.74, "learning_rate": 2.281927576387747e-05, "loss": 0.2382, "step": 7913500 }, { "epoch": 4.74, "learning_rate": 2.281717579831691e-05, "loss": 0.2378, "step": 7914000 }, { "epoch": 4.75, "learning_rate": 2.2815080032687465e-05, "loss": 0.2352, "step": 7914500 }, { "epoch": 4.75, "learning_rate": 2.28129800671269e-05, "loss": 0.2346, "step": 7915000 }, { "epoch": 4.75, "learning_rate": 2.2810880101566332e-05, "loss": 0.2355, "step": 7915500 }, { "epoch": 4.75, "learning_rate": 2.280878013600577e-05, "loss": 0.2393, "step": 7916000 }, { "epoch": 4.75, "learning_rate": 2.2806684370376326e-05, "loss": 0.2426, "step": 7916500 }, { "epoch": 4.75, "learning_rate": 2.2804584404815763e-05, "loss": 0.2287, "step": 7917000 }, { "epoch": 4.75, "learning_rate": 2.2802484439255196e-05, "loss": 0.2344, "step": 7917500 }, { "epoch": 4.75, "learning_rate": 2.280038447369463e-05, "loss": 0.2385, "step": 7918000 }, { "epoch": 4.75, "learning_rate": 2.2798288708065187e-05, "loss": 0.2373, "step": 7918500 }, { "epoch": 4.75, "learning_rate": 2.2796188742504624e-05, "loss": 0.237, "step": 7919000 }, { "epoch": 4.75, "learning_rate": 2.279409297687518e-05, "loss": 0.2424, "step": 7919500 }, { "epoch": 4.75, "learning_rate": 2.2791993011314614e-05, "loss": 0.2306, "step": 7920000 }, { "epoch": 4.75, "learning_rate": 2.2789893045754048e-05, "loss": 0.2363, "step": 7920500 }, { "epoch": 4.75, "learning_rate": 2.2787793080193484e-05, "loss": 0.2376, "step": 7921000 }, { "epoch": 4.75, "learning_rate": 2.278569311463292e-05, "loss": 0.2409, "step": 7921500 }, { "epoch": 4.75, "learning_rate": 2.2783593149072355e-05, "loss": 0.2394, "step": 7922000 }, { "epoch": 4.75, "learning_rate": 2.2781493183511788e-05, "loss": 0.2423, "step": 7922500 }, { "epoch": 4.75, "learning_rate": 2.2779393217951225e-05, "loss": 0.2404, "step": 7923000 }, { "epoch": 4.75, "learning_rate": 2.2777297452321782e-05, "loss": 0.2366, "step": 7923500 }, { "epoch": 4.75, "learning_rate": 2.277519748676122e-05, "loss": 0.2322, "step": 7924000 }, { "epoch": 4.75, "learning_rate": 2.2773097521200652e-05, "loss": 0.2316, "step": 7924500 }, { "epoch": 4.75, "learning_rate": 2.2770997555640086e-05, "loss": 0.2324, "step": 7925000 }, { "epoch": 4.75, "learning_rate": 2.2768901790010643e-05, "loss": 0.2305, "step": 7925500 }, { "epoch": 4.75, "learning_rate": 2.276680182445008e-05, "loss": 0.2407, "step": 7926000 }, { "epoch": 4.75, "learning_rate": 2.2764706058820637e-05, "loss": 0.2382, "step": 7926500 }, { "epoch": 4.75, "learning_rate": 2.276260609326007e-05, "loss": 0.241, "step": 7927000 }, { "epoch": 4.75, "learning_rate": 2.2760506127699503e-05, "loss": 0.2334, "step": 7927500 }, { "epoch": 4.75, "learning_rate": 2.275840616213894e-05, "loss": 0.2362, "step": 7928000 }, { "epoch": 4.75, "learning_rate": 2.2756306196578377e-05, "loss": 0.2323, "step": 7928500 }, { "epoch": 4.75, "learning_rate": 2.275420623101781e-05, "loss": 0.2359, "step": 7929000 }, { "epoch": 4.75, "learning_rate": 2.2752106265457244e-05, "loss": 0.243, "step": 7929500 }, { "epoch": 4.75, "learning_rate": 2.275000629989668e-05, "loss": 0.2366, "step": 7930000 }, { "epoch": 4.75, "learning_rate": 2.2747910534267238e-05, "loss": 0.2378, "step": 7930500 }, { "epoch": 4.75, "learning_rate": 2.2745810568706675e-05, "loss": 0.2347, "step": 7931000 }, { "epoch": 4.76, "learning_rate": 2.2743710603146108e-05, "loss": 0.2367, "step": 7931500 }, { "epoch": 4.76, "learning_rate": 2.274161063758554e-05, "loss": 0.2339, "step": 7932000 }, { "epoch": 4.76, "learning_rate": 2.27395148719561e-05, "loss": 0.237, "step": 7932500 }, { "epoch": 4.76, "learning_rate": 2.2737414906395535e-05, "loss": 0.241, "step": 7933000 }, { "epoch": 4.76, "learning_rate": 2.2735314940834972e-05, "loss": 0.2378, "step": 7933500 }, { "epoch": 4.76, "learning_rate": 2.2733214975274406e-05, "loss": 0.23, "step": 7934000 }, { "epoch": 4.76, "learning_rate": 2.273111500971384e-05, "loss": 0.2344, "step": 7934500 }, { "epoch": 4.76, "learning_rate": 2.2729015044153276e-05, "loss": 0.2369, "step": 7935000 }, { "epoch": 4.76, "learning_rate": 2.2726919278523833e-05, "loss": 0.2394, "step": 7935500 }, { "epoch": 4.76, "learning_rate": 2.2724819312963266e-05, "loss": 0.2343, "step": 7936000 }, { "epoch": 4.76, "learning_rate": 2.2722719347402703e-05, "loss": 0.2374, "step": 7936500 }, { "epoch": 4.76, "learning_rate": 2.2720619381842137e-05, "loss": 0.2365, "step": 7937000 }, { "epoch": 4.76, "learning_rate": 2.2718519416281574e-05, "loss": 0.2421, "step": 7937500 }, { "epoch": 4.76, "learning_rate": 2.2716419450721007e-05, "loss": 0.2338, "step": 7938000 }, { "epoch": 4.76, "learning_rate": 2.2714319485160444e-05, "loss": 0.2384, "step": 7938500 }, { "epoch": 4.76, "learning_rate": 2.271221951959988e-05, "loss": 0.2342, "step": 7939000 }, { "epoch": 4.76, "learning_rate": 2.2710123753970434e-05, "loss": 0.239, "step": 7939500 }, { "epoch": 4.76, "learning_rate": 2.270802798834099e-05, "loss": 0.2343, "step": 7940000 }, { "epoch": 4.76, "learning_rate": 2.2705928022780428e-05, "loss": 0.2353, "step": 7940500 }, { "epoch": 4.76, "learning_rate": 2.270382805721986e-05, "loss": 0.2403, "step": 7941000 }, { "epoch": 4.76, "learning_rate": 2.2701728091659295e-05, "loss": 0.2421, "step": 7941500 }, { "epoch": 4.76, "learning_rate": 2.2699628126098732e-05, "loss": 0.2322, "step": 7942000 }, { "epoch": 4.76, "learning_rate": 2.2697528160538165e-05, "loss": 0.234, "step": 7942500 }, { "epoch": 4.76, "learning_rate": 2.2695428194977602e-05, "loss": 0.2352, "step": 7943000 }, { "epoch": 4.76, "learning_rate": 2.269332822941704e-05, "loss": 0.2395, "step": 7943500 }, { "epoch": 4.76, "learning_rate": 2.2691232463787593e-05, "loss": 0.2341, "step": 7944000 }, { "epoch": 4.76, "learning_rate": 2.268913249822703e-05, "loss": 0.2408, "step": 7944500 }, { "epoch": 4.76, "learning_rate": 2.2687032532666463e-05, "loss": 0.2331, "step": 7945000 }, { "epoch": 4.76, "learning_rate": 2.268493676703702e-05, "loss": 0.2348, "step": 7945500 }, { "epoch": 4.76, "learning_rate": 2.2682836801476457e-05, "loss": 0.2376, "step": 7946000 }, { "epoch": 4.76, "learning_rate": 2.268073683591589e-05, "loss": 0.2321, "step": 7946500 }, { "epoch": 4.76, "learning_rate": 2.2678636870355324e-05, "loss": 0.2329, "step": 7947000 }, { "epoch": 4.76, "learning_rate": 2.267653690479476e-05, "loss": 0.2313, "step": 7947500 }, { "epoch": 4.77, "learning_rate": 2.2674436939234197e-05, "loss": 0.2317, "step": 7948000 }, { "epoch": 4.77, "learning_rate": 2.2672341173604754e-05, "loss": 0.2404, "step": 7948500 }, { "epoch": 4.77, "learning_rate": 2.2670241208044188e-05, "loss": 0.2382, "step": 7949000 }, { "epoch": 4.77, "learning_rate": 2.266814124248362e-05, "loss": 0.2381, "step": 7949500 }, { "epoch": 4.77, "learning_rate": 2.2666041276923058e-05, "loss": 0.2366, "step": 7950000 }, { "epoch": 4.77, "learning_rate": 2.2663941311362495e-05, "loss": 0.2352, "step": 7950500 }, { "epoch": 4.77, "learning_rate": 2.266184134580193e-05, "loss": 0.237, "step": 7951000 }, { "epoch": 4.77, "learning_rate": 2.2659741380241362e-05, "loss": 0.2399, "step": 7951500 }, { "epoch": 4.77, "learning_rate": 2.26576414146808e-05, "loss": 0.233, "step": 7952000 }, { "epoch": 4.77, "learning_rate": 2.2655545649051356e-05, "loss": 0.2347, "step": 7952500 }, { "epoch": 4.77, "learning_rate": 2.2653445683490793e-05, "loss": 0.2318, "step": 7953000 }, { "epoch": 4.77, "learning_rate": 2.2651345717930226e-05, "loss": 0.238, "step": 7953500 }, { "epoch": 4.77, "learning_rate": 2.264924575236966e-05, "loss": 0.2315, "step": 7954000 }, { "epoch": 4.77, "learning_rate": 2.2647149986740216e-05, "loss": 0.2367, "step": 7954500 }, { "epoch": 4.77, "learning_rate": 2.2645050021179653e-05, "loss": 0.2409, "step": 7955000 }, { "epoch": 4.77, "learning_rate": 2.264295005561909e-05, "loss": 0.2408, "step": 7955500 }, { "epoch": 4.77, "learning_rate": 2.2640850090058524e-05, "loss": 0.2363, "step": 7956000 }, { "epoch": 4.77, "learning_rate": 2.2638754324429077e-05, "loss": 0.2338, "step": 7956500 }, { "epoch": 4.77, "learning_rate": 2.2636658558799634e-05, "loss": 0.2389, "step": 7957000 }, { "epoch": 4.77, "learning_rate": 2.263455859323907e-05, "loss": 0.24, "step": 7957500 }, { "epoch": 4.77, "learning_rate": 2.2632458627678508e-05, "loss": 0.2329, "step": 7958000 }, { "epoch": 4.77, "learning_rate": 2.263035866211794e-05, "loss": 0.24, "step": 7958500 }, { "epoch": 4.77, "learning_rate": 2.2628258696557375e-05, "loss": 0.2354, "step": 7959000 }, { "epoch": 4.77, "learning_rate": 2.262615873099681e-05, "loss": 0.2343, "step": 7959500 }, { "epoch": 4.77, "learning_rate": 2.262405876543625e-05, "loss": 0.2352, "step": 7960000 }, { "epoch": 4.77, "learning_rate": 2.2621958799875682e-05, "loss": 0.2322, "step": 7960500 }, { "epoch": 4.77, "learning_rate": 2.2619863034246235e-05, "loss": 0.2371, "step": 7961000 }, { "epoch": 4.77, "learning_rate": 2.2617763068685672e-05, "loss": 0.2393, "step": 7961500 }, { "epoch": 4.77, "learning_rate": 2.261566730305623e-05, "loss": 0.2326, "step": 7962000 }, { "epoch": 4.77, "learning_rate": 2.2613567337495666e-05, "loss": 0.2323, "step": 7962500 }, { "epoch": 4.77, "learning_rate": 2.26114673719351e-05, "loss": 0.2329, "step": 7963000 }, { "epoch": 4.77, "learning_rate": 2.2609367406374533e-05, "loss": 0.2456, "step": 7963500 }, { "epoch": 4.77, "learning_rate": 2.260726744081397e-05, "loss": 0.2325, "step": 7964000 }, { "epoch": 4.78, "learning_rate": 2.2605167475253407e-05, "loss": 0.2383, "step": 7964500 }, { "epoch": 4.78, "learning_rate": 2.260306750969284e-05, "loss": 0.2304, "step": 7965000 }, { "epoch": 4.78, "learning_rate": 2.2600967544132277e-05, "loss": 0.2363, "step": 7965500 }, { "epoch": 4.78, "learning_rate": 2.259887177850283e-05, "loss": 0.2447, "step": 7966000 }, { "epoch": 4.78, "learning_rate": 2.2596771812942267e-05, "loss": 0.2371, "step": 7966500 }, { "epoch": 4.78, "learning_rate": 2.2594671847381704e-05, "loss": 0.2297, "step": 7967000 }, { "epoch": 4.78, "learning_rate": 2.2592571881821138e-05, "loss": 0.2365, "step": 7967500 }, { "epoch": 4.78, "learning_rate": 2.2590476116191695e-05, "loss": 0.2336, "step": 7968000 }, { "epoch": 4.78, "learning_rate": 2.2588376150631128e-05, "loss": 0.2423, "step": 7968500 }, { "epoch": 4.78, "learning_rate": 2.2586276185070565e-05, "loss": 0.2338, "step": 7969000 }, { "epoch": 4.78, "learning_rate": 2.2584176219510002e-05, "loss": 0.2378, "step": 7969500 }, { "epoch": 4.78, "learning_rate": 2.2582080453880555e-05, "loss": 0.2348, "step": 7970000 }, { "epoch": 4.78, "learning_rate": 2.257998048831999e-05, "loss": 0.2377, "step": 7970500 }, { "epoch": 4.78, "learning_rate": 2.2577880522759426e-05, "loss": 0.2345, "step": 7971000 }, { "epoch": 4.78, "learning_rate": 2.2575780557198863e-05, "loss": 0.2309, "step": 7971500 }, { "epoch": 4.78, "learning_rate": 2.257368479156942e-05, "loss": 0.234, "step": 7972000 }, { "epoch": 4.78, "learning_rate": 2.2571584826008853e-05, "loss": 0.2355, "step": 7972500 }, { "epoch": 4.78, "learning_rate": 2.2569484860448286e-05, "loss": 0.2378, "step": 7973000 }, { "epoch": 4.78, "learning_rate": 2.2567384894887723e-05, "loss": 0.2372, "step": 7973500 }, { "epoch": 4.78, "learning_rate": 2.256528912925828e-05, "loss": 0.2351, "step": 7974000 }, { "epoch": 4.78, "learning_rate": 2.2563189163697717e-05, "loss": 0.2349, "step": 7974500 }, { "epoch": 4.78, "learning_rate": 2.256108919813715e-05, "loss": 0.2333, "step": 7975000 }, { "epoch": 4.78, "learning_rate": 2.2558989232576584e-05, "loss": 0.2374, "step": 7975500 }, { "epoch": 4.78, "learning_rate": 2.255689346694714e-05, "loss": 0.2384, "step": 7976000 }, { "epoch": 4.78, "learning_rate": 2.2554793501386578e-05, "loss": 0.2349, "step": 7976500 }, { "epoch": 4.78, "learning_rate": 2.2552693535826015e-05, "loss": 0.2361, "step": 7977000 }, { "epoch": 4.78, "learning_rate": 2.2550593570265445e-05, "loss": 0.2323, "step": 7977500 }, { "epoch": 4.78, "learning_rate": 2.2548497804636005e-05, "loss": 0.2383, "step": 7978000 }, { "epoch": 4.78, "learning_rate": 2.254639783907544e-05, "loss": 0.2404, "step": 7978500 }, { "epoch": 4.78, "learning_rate": 2.2544297873514875e-05, "loss": 0.2341, "step": 7979000 }, { "epoch": 4.78, "learning_rate": 2.254219790795431e-05, "loss": 0.242, "step": 7979500 }, { "epoch": 4.78, "learning_rate": 2.2540102142324866e-05, "loss": 0.2404, "step": 7980000 }, { "epoch": 4.78, "learning_rate": 2.25380021767643e-05, "loss": 0.2346, "step": 7980500 }, { "epoch": 4.78, "learning_rate": 2.253590641113486e-05, "loss": 0.2348, "step": 7981000 }, { "epoch": 4.79, "learning_rate": 2.2533806445574293e-05, "loss": 0.2345, "step": 7981500 }, { "epoch": 4.79, "learning_rate": 2.2531706480013727e-05, "loss": 0.2398, "step": 7982000 }, { "epoch": 4.79, "learning_rate": 2.2529606514453163e-05, "loss": 0.2358, "step": 7982500 }, { "epoch": 4.79, "learning_rate": 2.2527506548892597e-05, "loss": 0.236, "step": 7983000 }, { "epoch": 4.79, "learning_rate": 2.2525406583332034e-05, "loss": 0.2343, "step": 7983500 }, { "epoch": 4.79, "learning_rate": 2.252330661777147e-05, "loss": 0.2374, "step": 7984000 }, { "epoch": 4.79, "learning_rate": 2.25212066522109e-05, "loss": 0.2275, "step": 7984500 }, { "epoch": 4.79, "learning_rate": 2.251911088658146e-05, "loss": 0.2367, "step": 7985000 }, { "epoch": 4.79, "learning_rate": 2.2517010921020894e-05, "loss": 0.2383, "step": 7985500 }, { "epoch": 4.79, "learning_rate": 2.251491095546033e-05, "loss": 0.2387, "step": 7986000 }, { "epoch": 4.79, "learning_rate": 2.2512810989899768e-05, "loss": 0.238, "step": 7986500 }, { "epoch": 4.79, "learning_rate": 2.2510711024339198e-05, "loss": 0.2327, "step": 7987000 }, { "epoch": 4.79, "learning_rate": 2.2508611058778635e-05, "loss": 0.2362, "step": 7987500 }, { "epoch": 4.79, "learning_rate": 2.2506511093218072e-05, "loss": 0.236, "step": 7988000 }, { "epoch": 4.79, "learning_rate": 2.2504411127657505e-05, "loss": 0.2372, "step": 7988500 }, { "epoch": 4.79, "learning_rate": 2.2502315362028062e-05, "loss": 0.2313, "step": 7989000 }, { "epoch": 4.79, "learning_rate": 2.2500215396467496e-05, "loss": 0.2362, "step": 7989500 }, { "epoch": 4.79, "learning_rate": 2.2498115430906933e-05, "loss": 0.2319, "step": 7990000 }, { "epoch": 4.79, "learning_rate": 2.249601546534637e-05, "loss": 0.2346, "step": 7990500 }, { "epoch": 4.79, "learning_rate": 2.2493919699716926e-05, "loss": 0.2404, "step": 7991000 }, { "epoch": 4.79, "learning_rate": 2.2491819734156357e-05, "loss": 0.2337, "step": 7991500 }, { "epoch": 4.79, "learning_rate": 2.2489719768595793e-05, "loss": 0.2371, "step": 7992000 }, { "epoch": 4.79, "learning_rate": 2.248762400296635e-05, "loss": 0.2377, "step": 7992500 }, { "epoch": 4.79, "learning_rate": 2.2485524037405787e-05, "loss": 0.2381, "step": 7993000 }, { "epoch": 4.79, "learning_rate": 2.2483424071845224e-05, "loss": 0.2412, "step": 7993500 }, { "epoch": 4.79, "learning_rate": 2.2481324106284654e-05, "loss": 0.2331, "step": 7994000 }, { "epoch": 4.79, "learning_rate": 2.247922414072409e-05, "loss": 0.2406, "step": 7994500 }, { "epoch": 4.79, "learning_rate": 2.2477124175163528e-05, "loss": 0.2402, "step": 7995000 }, { "epoch": 4.79, "learning_rate": 2.247502420960296e-05, "loss": 0.2358, "step": 7995500 }, { "epoch": 4.79, "learning_rate": 2.2472924244042398e-05, "loss": 0.2359, "step": 7996000 }, { "epoch": 4.79, "learning_rate": 2.247082847841295e-05, "loss": 0.2385, "step": 7996500 }, { "epoch": 4.79, "learning_rate": 2.246872851285239e-05, "loss": 0.2376, "step": 7997000 }, { "epoch": 4.79, "learning_rate": 2.2466628547291825e-05, "loss": 0.2379, "step": 7997500 }, { "epoch": 4.8, "learning_rate": 2.246452858173126e-05, "loss": 0.242, "step": 7998000 }, { "epoch": 4.8, "learning_rate": 2.2462432816101816e-05, "loss": 0.2403, "step": 7998500 }, { "epoch": 4.8, "learning_rate": 2.246033285054125e-05, "loss": 0.2382, "step": 7999000 }, { "epoch": 4.8, "learning_rate": 2.2458232884980686e-05, "loss": 0.2355, "step": 7999500 }, { "epoch": 4.8, "learning_rate": 2.245613291942012e-05, "loss": 0.2351, "step": 8000000 }, { "epoch": 4.8, "eval_loss": 0.22045443952083588, "eval_runtime": 1460.9757, "eval_samples_per_second": 360.526, "eval_steps_per_second": 60.088, "step": 8000000 }, { "epoch": 4.8, "learning_rate": 2.245403715379068e-05, "loss": 0.2349, "step": 8000500 }, { "epoch": 4.8, "learning_rate": 2.245193718823011e-05, "loss": 0.2334, "step": 8001000 }, { "epoch": 4.8, "learning_rate": 2.2449837222669547e-05, "loss": 0.233, "step": 8001500 }, { "epoch": 4.8, "learning_rate": 2.2447737257108984e-05, "loss": 0.2398, "step": 8002000 }, { "epoch": 4.8, "learning_rate": 2.244564149147954e-05, "loss": 0.2384, "step": 8002500 }, { "epoch": 4.8, "learning_rate": 2.2443545725850094e-05, "loss": 0.2357, "step": 8003000 }, { "epoch": 4.8, "learning_rate": 2.244144576028953e-05, "loss": 0.2383, "step": 8003500 }, { "epoch": 4.8, "learning_rate": 2.2439345794728965e-05, "loss": 0.2308, "step": 8004000 }, { "epoch": 4.8, "learning_rate": 2.24372458291684e-05, "loss": 0.2348, "step": 8004500 }, { "epoch": 4.8, "learning_rate": 2.243515006353896e-05, "loss": 0.2387, "step": 8005000 }, { "epoch": 4.8, "learning_rate": 2.2433050097978392e-05, "loss": 0.2368, "step": 8005500 }, { "epoch": 4.8, "learning_rate": 2.243095013241783e-05, "loss": 0.2364, "step": 8006000 }, { "epoch": 4.8, "learning_rate": 2.2428850166857262e-05, "loss": 0.2326, "step": 8006500 }, { "epoch": 4.8, "learning_rate": 2.24267502012967e-05, "loss": 0.242, "step": 8007000 }, { "epoch": 4.8, "learning_rate": 2.2424650235736136e-05, "loss": 0.2391, "step": 8007500 }, { "epoch": 4.8, "learning_rate": 2.242255027017557e-05, "loss": 0.2369, "step": 8008000 }, { "epoch": 4.8, "learning_rate": 2.2420450304615003e-05, "loss": 0.2348, "step": 8008500 }, { "epoch": 4.8, "learning_rate": 2.241835453898556e-05, "loss": 0.2351, "step": 8009000 }, { "epoch": 4.8, "learning_rate": 2.2416254573424997e-05, "loss": 0.2376, "step": 8009500 }, { "epoch": 4.8, "learning_rate": 2.2414158807795553e-05, "loss": 0.2371, "step": 8010000 }, { "epoch": 4.8, "learning_rate": 2.2412058842234987e-05, "loss": 0.2376, "step": 8010500 }, { "epoch": 4.8, "learning_rate": 2.240995887667442e-05, "loss": 0.2339, "step": 8011000 }, { "epoch": 4.8, "learning_rate": 2.2407858911113857e-05, "loss": 0.2371, "step": 8011500 }, { "epoch": 4.8, "learning_rate": 2.2405763145484414e-05, "loss": 0.2293, "step": 8012000 }, { "epoch": 4.8, "learning_rate": 2.2403663179923848e-05, "loss": 0.2388, "step": 8012500 }, { "epoch": 4.8, "learning_rate": 2.2401563214363285e-05, "loss": 0.2363, "step": 8013000 }, { "epoch": 4.8, "learning_rate": 2.2399463248802718e-05, "loss": 0.2367, "step": 8013500 }, { "epoch": 4.8, "learning_rate": 2.2397363283242155e-05, "loss": 0.2418, "step": 8014000 }, { "epoch": 4.81, "learning_rate": 2.2395267517612712e-05, "loss": 0.2395, "step": 8014500 }, { "epoch": 4.81, "learning_rate": 2.2393167552052145e-05, "loss": 0.2375, "step": 8015000 }, { "epoch": 4.81, "learning_rate": 2.239106758649158e-05, "loss": 0.2398, "step": 8015500 }, { "epoch": 4.81, "learning_rate": 2.2388967620931016e-05, "loss": 0.2379, "step": 8016000 }, { "epoch": 4.81, "learning_rate": 2.2386867655370452e-05, "loss": 0.2358, "step": 8016500 }, { "epoch": 4.81, "learning_rate": 2.238476768980989e-05, "loss": 0.242, "step": 8017000 }, { "epoch": 4.81, "learning_rate": 2.2382667724249323e-05, "loss": 0.2395, "step": 8017500 }, { "epoch": 4.81, "learning_rate": 2.2380567758688756e-05, "loss": 0.2412, "step": 8018000 }, { "epoch": 4.81, "learning_rate": 2.2378471993059313e-05, "loss": 0.2284, "step": 8018500 }, { "epoch": 4.81, "learning_rate": 2.237637202749875e-05, "loss": 0.2301, "step": 8019000 }, { "epoch": 4.81, "learning_rate": 2.2374272061938183e-05, "loss": 0.2358, "step": 8019500 }, { "epoch": 4.81, "learning_rate": 2.237217209637762e-05, "loss": 0.2371, "step": 8020000 }, { "epoch": 4.81, "learning_rate": 2.2370076330748174e-05, "loss": 0.2341, "step": 8020500 }, { "epoch": 4.81, "learning_rate": 2.236797636518761e-05, "loss": 0.2453, "step": 8021000 }, { "epoch": 4.81, "learning_rate": 2.2365876399627048e-05, "loss": 0.2372, "step": 8021500 }, { "epoch": 4.81, "learning_rate": 2.236377643406648e-05, "loss": 0.2358, "step": 8022000 }, { "epoch": 4.81, "learning_rate": 2.2361680668437038e-05, "loss": 0.2361, "step": 8022500 }, { "epoch": 4.81, "learning_rate": 2.2359584902807595e-05, "loss": 0.2356, "step": 8023000 }, { "epoch": 4.81, "learning_rate": 2.235748493724703e-05, "loss": 0.2351, "step": 8023500 }, { "epoch": 4.81, "learning_rate": 2.2355384971686465e-05, "loss": 0.2395, "step": 8024000 }, { "epoch": 4.81, "learning_rate": 2.23532850061259e-05, "loss": 0.237, "step": 8024500 }, { "epoch": 4.81, "learning_rate": 2.2351185040565332e-05, "loss": 0.2321, "step": 8025000 }, { "epoch": 4.81, "learning_rate": 2.234908507500477e-05, "loss": 0.2443, "step": 8025500 }, { "epoch": 4.81, "learning_rate": 2.2346985109444206e-05, "loss": 0.2353, "step": 8026000 }, { "epoch": 4.81, "learning_rate": 2.234488514388364e-05, "loss": 0.2286, "step": 8026500 }, { "epoch": 4.81, "learning_rate": 2.2342789378254196e-05, "loss": 0.2347, "step": 8027000 }, { "epoch": 4.81, "learning_rate": 2.234068941269363e-05, "loss": 0.2374, "step": 8027500 }, { "epoch": 4.81, "learning_rate": 2.2338589447133067e-05, "loss": 0.24, "step": 8028000 }, { "epoch": 4.81, "learning_rate": 2.2336489481572503e-05, "loss": 0.2355, "step": 8028500 }, { "epoch": 4.81, "learning_rate": 2.233439371594306e-05, "loss": 0.2386, "step": 8029000 }, { "epoch": 4.81, "learning_rate": 2.2332293750382494e-05, "loss": 0.2329, "step": 8029500 }, { "epoch": 4.81, "learning_rate": 2.2330193784821927e-05, "loss": 0.2341, "step": 8030000 }, { "epoch": 4.81, "learning_rate": 2.2328093819261364e-05, "loss": 0.2356, "step": 8030500 }, { "epoch": 4.81, "learning_rate": 2.232599805363192e-05, "loss": 0.2382, "step": 8031000 }, { "epoch": 4.82, "learning_rate": 2.2323898088071358e-05, "loss": 0.2426, "step": 8031500 }, { "epoch": 4.82, "learning_rate": 2.2321798122510788e-05, "loss": 0.2428, "step": 8032000 }, { "epoch": 4.82, "learning_rate": 2.2319698156950225e-05, "loss": 0.2397, "step": 8032500 }, { "epoch": 4.82, "learning_rate": 2.2317598191389662e-05, "loss": 0.234, "step": 8033000 }, { "epoch": 4.82, "learning_rate": 2.231550242576022e-05, "loss": 0.2383, "step": 8033500 }, { "epoch": 4.82, "learning_rate": 2.2313402460199652e-05, "loss": 0.2311, "step": 8034000 }, { "epoch": 4.82, "learning_rate": 2.2311302494639086e-05, "loss": 0.2375, "step": 8034500 }, { "epoch": 4.82, "learning_rate": 2.2309202529078522e-05, "loss": 0.2346, "step": 8035000 }, { "epoch": 4.82, "learning_rate": 2.230710676344908e-05, "loss": 0.2414, "step": 8035500 }, { "epoch": 4.82, "learning_rate": 2.2305006797888516e-05, "loss": 0.2397, "step": 8036000 }, { "epoch": 4.82, "learning_rate": 2.230290683232795e-05, "loss": 0.2349, "step": 8036500 }, { "epoch": 4.82, "learning_rate": 2.2300806866767383e-05, "loss": 0.2369, "step": 8037000 }, { "epoch": 4.82, "learning_rate": 2.229871110113794e-05, "loss": 0.2413, "step": 8037500 }, { "epoch": 4.82, "learning_rate": 2.2296611135577377e-05, "loss": 0.2348, "step": 8038000 }, { "epoch": 4.82, "learning_rate": 2.2294511170016814e-05, "loss": 0.2363, "step": 8038500 }, { "epoch": 4.82, "learning_rate": 2.2292411204456244e-05, "loss": 0.2346, "step": 8039000 }, { "epoch": 4.82, "learning_rate": 2.2290315438826804e-05, "loss": 0.2341, "step": 8039500 }, { "epoch": 4.82, "learning_rate": 2.2288215473266238e-05, "loss": 0.2372, "step": 8040000 }, { "epoch": 4.82, "learning_rate": 2.2286119707636795e-05, "loss": 0.2379, "step": 8040500 }, { "epoch": 4.82, "learning_rate": 2.2284019742076228e-05, "loss": 0.237, "step": 8041000 }, { "epoch": 4.82, "learning_rate": 2.2281919776515665e-05, "loss": 0.2313, "step": 8041500 }, { "epoch": 4.82, "learning_rate": 2.22798198109551e-05, "loss": 0.2371, "step": 8042000 }, { "epoch": 4.82, "learning_rate": 2.2277719845394535e-05, "loss": 0.2422, "step": 8042500 }, { "epoch": 4.82, "learning_rate": 2.2275619879833972e-05, "loss": 0.2347, "step": 8043000 }, { "epoch": 4.82, "learning_rate": 2.2273519914273406e-05, "loss": 0.2342, "step": 8043500 }, { "epoch": 4.82, "learning_rate": 2.227141994871284e-05, "loss": 0.2307, "step": 8044000 }, { "epoch": 4.82, "learning_rate": 2.2269324183083396e-05, "loss": 0.2369, "step": 8044500 }, { "epoch": 4.82, "learning_rate": 2.2267224217522833e-05, "loss": 0.2364, "step": 8045000 }, { "epoch": 4.82, "learning_rate": 2.226512425196227e-05, "loss": 0.2413, "step": 8045500 }, { "epoch": 4.82, "learning_rate": 2.22630242864017e-05, "loss": 0.2371, "step": 8046000 }, { "epoch": 4.82, "learning_rate": 2.226092852077226e-05, "loss": 0.2382, "step": 8046500 }, { "epoch": 4.82, "learning_rate": 2.2258832755142817e-05, "loss": 0.2325, "step": 8047000 }, { "epoch": 4.82, "learning_rate": 2.225673278958225e-05, "loss": 0.2376, "step": 8047500 }, { "epoch": 4.83, "learning_rate": 2.2254632824021684e-05, "loss": 0.2348, "step": 8048000 }, { "epoch": 4.83, "learning_rate": 2.225253285846112e-05, "loss": 0.2369, "step": 8048500 }, { "epoch": 4.83, "learning_rate": 2.2250432892900554e-05, "loss": 0.2428, "step": 8049000 }, { "epoch": 4.83, "learning_rate": 2.224833292733999e-05, "loss": 0.2397, "step": 8049500 }, { "epoch": 4.83, "learning_rate": 2.2246232961779428e-05, "loss": 0.2341, "step": 8050000 }, { "epoch": 4.83, "learning_rate": 2.224413719614998e-05, "loss": 0.2345, "step": 8050500 }, { "epoch": 4.83, "learning_rate": 2.224203723058942e-05, "loss": 0.2386, "step": 8051000 }, { "epoch": 4.83, "learning_rate": 2.2239937265028852e-05, "loss": 0.2395, "step": 8051500 }, { "epoch": 4.83, "learning_rate": 2.223783729946829e-05, "loss": 0.2314, "step": 8052000 }, { "epoch": 4.83, "learning_rate": 2.2235737333907726e-05, "loss": 0.2376, "step": 8052500 }, { "epoch": 4.83, "learning_rate": 2.223363736834716e-05, "loss": 0.2391, "step": 8053000 }, { "epoch": 4.83, "learning_rate": 2.2231537402786593e-05, "loss": 0.2369, "step": 8053500 }, { "epoch": 4.83, "learning_rate": 2.222943743722603e-05, "loss": 0.2342, "step": 8054000 }, { "epoch": 4.83, "learning_rate": 2.2227341671596586e-05, "loss": 0.2418, "step": 8054500 }, { "epoch": 4.83, "learning_rate": 2.2225241706036023e-05, "loss": 0.2326, "step": 8055000 }, { "epoch": 4.83, "learning_rate": 2.2223141740475453e-05, "loss": 0.2313, "step": 8055500 }, { "epoch": 4.83, "learning_rate": 2.222104597484601e-05, "loss": 0.2379, "step": 8056000 }, { "epoch": 4.83, "learning_rate": 2.2218946009285447e-05, "loss": 0.2394, "step": 8056500 }, { "epoch": 4.83, "learning_rate": 2.2216846043724884e-05, "loss": 0.2381, "step": 8057000 }, { "epoch": 4.83, "learning_rate": 2.2214746078164317e-05, "loss": 0.2394, "step": 8057500 }, { "epoch": 4.83, "learning_rate": 2.221264611260375e-05, "loss": 0.236, "step": 8058000 }, { "epoch": 4.83, "learning_rate": 2.2210546147043188e-05, "loss": 0.2347, "step": 8058500 }, { "epoch": 4.83, "learning_rate": 2.2208450381413745e-05, "loss": 0.2315, "step": 8059000 }, { "epoch": 4.83, "learning_rate": 2.220635041585318e-05, "loss": 0.2452, "step": 8059500 }, { "epoch": 4.83, "learning_rate": 2.2204250450292615e-05, "loss": 0.2421, "step": 8060000 }, { "epoch": 4.83, "learning_rate": 2.220215048473205e-05, "loss": 0.2309, "step": 8060500 }, { "epoch": 4.83, "learning_rate": 2.2200050519171485e-05, "loss": 0.235, "step": 8061000 }, { "epoch": 4.83, "learning_rate": 2.2197950553610922e-05, "loss": 0.2337, "step": 8061500 }, { "epoch": 4.83, "learning_rate": 2.2195850588050356e-05, "loss": 0.2364, "step": 8062000 }, { "epoch": 4.83, "learning_rate": 2.2193750622489792e-05, "loss": 0.2315, "step": 8062500 }, { "epoch": 4.83, "learning_rate": 2.2191654856860346e-05, "loss": 0.2384, "step": 8063000 }, { "epoch": 4.83, "learning_rate": 2.2189554891299783e-05, "loss": 0.2314, "step": 8063500 }, { "epoch": 4.83, "learning_rate": 2.2187454925739216e-05, "loss": 0.2421, "step": 8064000 }, { "epoch": 4.83, "learning_rate": 2.2185354960178653e-05, "loss": 0.2348, "step": 8064500 }, { "epoch": 4.84, "learning_rate": 2.2183259194549207e-05, "loss": 0.2341, "step": 8065000 }, { "epoch": 4.84, "learning_rate": 2.2181159228988644e-05, "loss": 0.2365, "step": 8065500 }, { "epoch": 4.84, "learning_rate": 2.217905926342808e-05, "loss": 0.2399, "step": 8066000 }, { "epoch": 4.84, "learning_rate": 2.2176959297867514e-05, "loss": 0.2399, "step": 8066500 }, { "epoch": 4.84, "learning_rate": 2.217486353223807e-05, "loss": 0.2346, "step": 8067000 }, { "epoch": 4.84, "learning_rate": 2.2172763566677504e-05, "loss": 0.2383, "step": 8067500 }, { "epoch": 4.84, "learning_rate": 2.217066360111694e-05, "loss": 0.2352, "step": 8068000 }, { "epoch": 4.84, "learning_rate": 2.2168563635556378e-05, "loss": 0.2348, "step": 8068500 }, { "epoch": 4.84, "learning_rate": 2.2166467869926935e-05, "loss": 0.2373, "step": 8069000 }, { "epoch": 4.84, "learning_rate": 2.216436790436637e-05, "loss": 0.2382, "step": 8069500 }, { "epoch": 4.84, "learning_rate": 2.2162272138736922e-05, "loss": 0.2338, "step": 8070000 }, { "epoch": 4.84, "learning_rate": 2.216017217317636e-05, "loss": 0.2347, "step": 8070500 }, { "epoch": 4.84, "learning_rate": 2.2158072207615796e-05, "loss": 0.2364, "step": 8071000 }, { "epoch": 4.84, "learning_rate": 2.2155972242055233e-05, "loss": 0.2363, "step": 8071500 }, { "epoch": 4.84, "learning_rate": 2.2153876476425786e-05, "loss": 0.2373, "step": 8072000 }, { "epoch": 4.84, "learning_rate": 2.215177651086522e-05, "loss": 0.2352, "step": 8072500 }, { "epoch": 4.84, "learning_rate": 2.2149676545304656e-05, "loss": 0.238, "step": 8073000 }, { "epoch": 4.84, "learning_rate": 2.2147576579744093e-05, "loss": 0.2307, "step": 8073500 }, { "epoch": 4.84, "learning_rate": 2.2145476614183527e-05, "loss": 0.2371, "step": 8074000 }, { "epoch": 4.84, "learning_rate": 2.214337664862296e-05, "loss": 0.2337, "step": 8074500 }, { "epoch": 4.84, "learning_rate": 2.2141276683062397e-05, "loss": 0.233, "step": 8075000 }, { "epoch": 4.84, "learning_rate": 2.2139176717501834e-05, "loss": 0.2405, "step": 8075500 }, { "epoch": 4.84, "learning_rate": 2.213708515180351e-05, "loss": 0.2437, "step": 8076000 }, { "epoch": 4.84, "learning_rate": 2.2134985186242944e-05, "loss": 0.2398, "step": 8076500 }, { "epoch": 4.84, "learning_rate": 2.2132885220682378e-05, "loss": 0.2385, "step": 8077000 }, { "epoch": 4.84, "learning_rate": 2.2130785255121815e-05, "loss": 0.2322, "step": 8077500 }, { "epoch": 4.84, "learning_rate": 2.212868528956125e-05, "loss": 0.2383, "step": 8078000 }, { "epoch": 4.84, "learning_rate": 2.212658952393181e-05, "loss": 0.2324, "step": 8078500 }, { "epoch": 4.84, "learning_rate": 2.2124489558371242e-05, "loss": 0.2391, "step": 8079000 }, { "epoch": 4.84, "learning_rate": 2.2122389592810675e-05, "loss": 0.2376, "step": 8079500 }, { "epoch": 4.84, "learning_rate": 2.2120289627250112e-05, "loss": 0.2341, "step": 8080000 }, { "epoch": 4.84, "learning_rate": 2.211818966168955e-05, "loss": 0.2366, "step": 8080500 }, { "epoch": 4.84, "learning_rate": 2.2116089696128983e-05, "loss": 0.2337, "step": 8081000 }, { "epoch": 4.85, "learning_rate": 2.211399393049954e-05, "loss": 0.234, "step": 8081500 }, { "epoch": 4.85, "learning_rate": 2.2111893964938973e-05, "loss": 0.2368, "step": 8082000 }, { "epoch": 4.85, "learning_rate": 2.210979399937841e-05, "loss": 0.2352, "step": 8082500 }, { "epoch": 4.85, "learning_rate": 2.2107694033817847e-05, "loss": 0.2362, "step": 8083000 }, { "epoch": 4.85, "learning_rate": 2.2105598268188404e-05, "loss": 0.2363, "step": 8083500 }, { "epoch": 4.85, "learning_rate": 2.2103498302627837e-05, "loss": 0.238, "step": 8084000 }, { "epoch": 4.85, "learning_rate": 2.210139833706727e-05, "loss": 0.2305, "step": 8084500 }, { "epoch": 4.85, "learning_rate": 2.2099298371506707e-05, "loss": 0.2341, "step": 8085000 }, { "epoch": 4.85, "learning_rate": 2.2097198405946144e-05, "loss": 0.2395, "step": 8085500 }, { "epoch": 4.85, "learning_rate": 2.2095098440385578e-05, "loss": 0.239, "step": 8086000 }, { "epoch": 4.85, "learning_rate": 2.209299847482501e-05, "loss": 0.235, "step": 8086500 }, { "epoch": 4.85, "learning_rate": 2.2090898509264448e-05, "loss": 0.2342, "step": 8087000 }, { "epoch": 4.85, "learning_rate": 2.2088802743635005e-05, "loss": 0.2404, "step": 8087500 }, { "epoch": 4.85, "learning_rate": 2.208670277807444e-05, "loss": 0.2385, "step": 8088000 }, { "epoch": 4.85, "learning_rate": 2.2084602812513875e-05, "loss": 0.2358, "step": 8088500 }, { "epoch": 4.85, "learning_rate": 2.208250284695331e-05, "loss": 0.2332, "step": 8089000 }, { "epoch": 4.85, "learning_rate": 2.2080407081323866e-05, "loss": 0.2346, "step": 8089500 }, { "epoch": 4.85, "learning_rate": 2.2078307115763303e-05, "loss": 0.2349, "step": 8090000 }, { "epoch": 4.85, "learning_rate": 2.2076207150202736e-05, "loss": 0.239, "step": 8090500 }, { "epoch": 4.85, "learning_rate": 2.2074107184642173e-05, "loss": 0.2386, "step": 8091000 }, { "epoch": 4.85, "learning_rate": 2.2072007219081606e-05, "loss": 0.2376, "step": 8091500 }, { "epoch": 4.85, "learning_rate": 2.206990725352104e-05, "loss": 0.2365, "step": 8092000 }, { "epoch": 4.85, "learning_rate": 2.2067807287960477e-05, "loss": 0.2386, "step": 8092500 }, { "epoch": 4.85, "learning_rate": 2.2065707322399913e-05, "loss": 0.2362, "step": 8093000 }, { "epoch": 4.85, "learning_rate": 2.206361155677047e-05, "loss": 0.2313, "step": 8093500 }, { "epoch": 4.85, "learning_rate": 2.2061511591209904e-05, "loss": 0.2308, "step": 8094000 }, { "epoch": 4.85, "learning_rate": 2.2059411625649337e-05, "loss": 0.2385, "step": 8094500 }, { "epoch": 4.85, "learning_rate": 2.2057311660088774e-05, "loss": 0.2367, "step": 8095000 }, { "epoch": 4.85, "learning_rate": 2.205521589445933e-05, "loss": 0.2331, "step": 8095500 }, { "epoch": 4.85, "learning_rate": 2.2053115928898765e-05, "loss": 0.2346, "step": 8096000 }, { "epoch": 4.85, "learning_rate": 2.20510159633382e-05, "loss": 0.2371, "step": 8096500 }, { "epoch": 4.85, "learning_rate": 2.2048915997777635e-05, "loss": 0.2347, "step": 8097000 }, { "epoch": 4.85, "learning_rate": 2.2046820232148192e-05, "loss": 0.2366, "step": 8097500 }, { "epoch": 4.86, "learning_rate": 2.204472026658763e-05, "loss": 0.2399, "step": 8098000 }, { "epoch": 4.86, "learning_rate": 2.2042620301027062e-05, "loss": 0.2376, "step": 8098500 }, { "epoch": 4.86, "learning_rate": 2.2040520335466496e-05, "loss": 0.2386, "step": 8099000 }, { "epoch": 4.86, "learning_rate": 2.2038424569837056e-05, "loss": 0.2387, "step": 8099500 }, { "epoch": 4.86, "learning_rate": 2.203632460427649e-05, "loss": 0.2306, "step": 8100000 }, { "epoch": 4.86, "eval_loss": 0.2206544727087021, "eval_runtime": 1459.1546, "eval_samples_per_second": 360.976, "eval_steps_per_second": 60.163, "step": 8100000 }, { "epoch": 4.86, "learning_rate": 2.2034224638715926e-05, "loss": 0.2324, "step": 8100500 }, { "epoch": 4.86, "learning_rate": 2.203212467315536e-05, "loss": 0.237, "step": 8101000 }, { "epoch": 4.86, "learning_rate": 2.2030028907525917e-05, "loss": 0.2329, "step": 8101500 }, { "epoch": 4.86, "learning_rate": 2.2027933141896474e-05, "loss": 0.2405, "step": 8102000 }, { "epoch": 4.86, "learning_rate": 2.202583317633591e-05, "loss": 0.2376, "step": 8102500 }, { "epoch": 4.86, "learning_rate": 2.202373321077534e-05, "loss": 0.2359, "step": 8103000 }, { "epoch": 4.86, "learning_rate": 2.2021633245214777e-05, "loss": 0.241, "step": 8103500 }, { "epoch": 4.86, "learning_rate": 2.2019533279654214e-05, "loss": 0.2366, "step": 8104000 }, { "epoch": 4.86, "learning_rate": 2.2017433314093648e-05, "loss": 0.2353, "step": 8104500 }, { "epoch": 4.86, "learning_rate": 2.2015333348533085e-05, "loss": 0.2423, "step": 8105000 }, { "epoch": 4.86, "learning_rate": 2.2013233382972518e-05, "loss": 0.2329, "step": 8105500 }, { "epoch": 4.86, "learning_rate": 2.2011137617343075e-05, "loss": 0.2369, "step": 8106000 }, { "epoch": 4.86, "learning_rate": 2.2009037651782512e-05, "loss": 0.2344, "step": 8106500 }, { "epoch": 4.86, "learning_rate": 2.2006937686221945e-05, "loss": 0.2327, "step": 8107000 }, { "epoch": 4.86, "learning_rate": 2.2004837720661382e-05, "loss": 0.2376, "step": 8107500 }, { "epoch": 4.86, "learning_rate": 2.2002741955031936e-05, "loss": 0.2383, "step": 8108000 }, { "epoch": 4.86, "learning_rate": 2.2000641989471373e-05, "loss": 0.2374, "step": 8108500 }, { "epoch": 4.86, "learning_rate": 2.1998542023910806e-05, "loss": 0.2342, "step": 8109000 }, { "epoch": 4.86, "learning_rate": 2.1996442058350243e-05, "loss": 0.2363, "step": 8109500 }, { "epoch": 4.86, "learning_rate": 2.1994346292720797e-05, "loss": 0.2373, "step": 8110000 }, { "epoch": 4.86, "learning_rate": 2.1992246327160233e-05, "loss": 0.2332, "step": 8110500 }, { "epoch": 4.86, "learning_rate": 2.199014636159967e-05, "loss": 0.2357, "step": 8111000 }, { "epoch": 4.86, "learning_rate": 2.1988046396039104e-05, "loss": 0.2361, "step": 8111500 }, { "epoch": 4.86, "learning_rate": 2.198595063040966e-05, "loss": 0.2347, "step": 8112000 }, { "epoch": 4.86, "learning_rate": 2.1983850664849094e-05, "loss": 0.2337, "step": 8112500 }, { "epoch": 4.86, "learning_rate": 2.198175069928853e-05, "loss": 0.2345, "step": 8113000 }, { "epoch": 4.86, "learning_rate": 2.1979650733727968e-05, "loss": 0.2372, "step": 8113500 }, { "epoch": 4.86, "learning_rate": 2.1977554968098525e-05, "loss": 0.2347, "step": 8114000 }, { "epoch": 4.86, "learning_rate": 2.1975455002537958e-05, "loss": 0.2314, "step": 8114500 }, { "epoch": 4.87, "learning_rate": 2.197335503697739e-05, "loss": 0.2362, "step": 8115000 }, { "epoch": 4.87, "learning_rate": 2.197125507141683e-05, "loss": 0.233, "step": 8115500 }, { "epoch": 4.87, "learning_rate": 2.1969155105856262e-05, "loss": 0.2405, "step": 8116000 }, { "epoch": 4.87, "learning_rate": 2.1967059340226822e-05, "loss": 0.2472, "step": 8116500 }, { "epoch": 4.87, "learning_rate": 2.1964963574597376e-05, "loss": 0.2456, "step": 8117000 }, { "epoch": 4.87, "learning_rate": 2.196286360903681e-05, "loss": 0.2373, "step": 8117500 }, { "epoch": 4.87, "learning_rate": 2.1960763643476246e-05, "loss": 0.2347, "step": 8118000 }, { "epoch": 4.87, "learning_rate": 2.1958663677915683e-05, "loss": 0.2365, "step": 8118500 }, { "epoch": 4.87, "learning_rate": 2.1956563712355117e-05, "loss": 0.2343, "step": 8119000 }, { "epoch": 4.87, "learning_rate": 2.195446374679455e-05, "loss": 0.2343, "step": 8119500 }, { "epoch": 4.87, "learning_rate": 2.1952367981165107e-05, "loss": 0.2368, "step": 8120000 }, { "epoch": 4.87, "learning_rate": 2.1950268015604544e-05, "loss": 0.2355, "step": 8120500 }, { "epoch": 4.87, "learning_rate": 2.194816805004398e-05, "loss": 0.2399, "step": 8121000 }, { "epoch": 4.87, "learning_rate": 2.1946068084483414e-05, "loss": 0.2333, "step": 8121500 }, { "epoch": 4.87, "learning_rate": 2.1943968118922848e-05, "loss": 0.2433, "step": 8122000 }, { "epoch": 4.87, "learning_rate": 2.1941868153362284e-05, "loss": 0.2296, "step": 8122500 }, { "epoch": 4.87, "learning_rate": 2.193976818780172e-05, "loss": 0.2385, "step": 8123000 }, { "epoch": 4.87, "learning_rate": 2.1937668222241155e-05, "loss": 0.2399, "step": 8123500 }, { "epoch": 4.87, "learning_rate": 2.193557245661171e-05, "loss": 0.238, "step": 8124000 }, { "epoch": 4.87, "learning_rate": 2.1933472491051145e-05, "loss": 0.2403, "step": 8124500 }, { "epoch": 4.87, "learning_rate": 2.1931372525490582e-05, "loss": 0.2375, "step": 8125000 }, { "epoch": 4.87, "learning_rate": 2.1929272559930015e-05, "loss": 0.2422, "step": 8125500 }, { "epoch": 4.87, "learning_rate": 2.1927172594369452e-05, "loss": 0.2404, "step": 8126000 }, { "epoch": 4.87, "learning_rate": 2.192507262880889e-05, "loss": 0.2405, "step": 8126500 }, { "epoch": 4.87, "learning_rate": 2.1922976863179443e-05, "loss": 0.2338, "step": 8127000 }, { "epoch": 4.87, "learning_rate": 2.192087689761888e-05, "loss": 0.2359, "step": 8127500 }, { "epoch": 4.87, "learning_rate": 2.1918776932058313e-05, "loss": 0.2379, "step": 8128000 }, { "epoch": 4.87, "learning_rate": 2.191667696649775e-05, "loss": 0.2327, "step": 8128500 }, { "epoch": 4.87, "learning_rate": 2.1914577000937187e-05, "loss": 0.233, "step": 8129000 }, { "epoch": 4.87, "learning_rate": 2.1912477035376617e-05, "loss": 0.237, "step": 8129500 }, { "epoch": 4.87, "learning_rate": 2.1910377069816054e-05, "loss": 0.2336, "step": 8130000 }, { "epoch": 4.87, "learning_rate": 2.190828130418661e-05, "loss": 0.233, "step": 8130500 }, { "epoch": 4.87, "learning_rate": 2.1906181338626047e-05, "loss": 0.2289, "step": 8131000 }, { "epoch": 4.88, "learning_rate": 2.1904081373065484e-05, "loss": 0.2352, "step": 8131500 }, { "epoch": 4.88, "learning_rate": 2.1901985607436038e-05, "loss": 0.2304, "step": 8132000 }, { "epoch": 4.88, "learning_rate": 2.189988564187547e-05, "loss": 0.2388, "step": 8132500 }, { "epoch": 4.88, "learning_rate": 2.1897785676314908e-05, "loss": 0.2342, "step": 8133000 }, { "epoch": 4.88, "learning_rate": 2.1895685710754345e-05, "loss": 0.235, "step": 8133500 }, { "epoch": 4.88, "learning_rate": 2.189358574519378e-05, "loss": 0.2413, "step": 8134000 }, { "epoch": 4.88, "learning_rate": 2.1891485779633212e-05, "loss": 0.2308, "step": 8134500 }, { "epoch": 4.88, "learning_rate": 2.188938581407265e-05, "loss": 0.2348, "step": 8135000 }, { "epoch": 4.88, "learning_rate": 2.1887285848512086e-05, "loss": 0.2407, "step": 8135500 }, { "epoch": 4.88, "learning_rate": 2.1885190082882643e-05, "loss": 0.2377, "step": 8136000 }, { "epoch": 4.88, "learning_rate": 2.1883090117322073e-05, "loss": 0.2422, "step": 8136500 }, { "epoch": 4.88, "learning_rate": 2.188099015176151e-05, "loss": 0.2356, "step": 8137000 }, { "epoch": 4.88, "learning_rate": 2.1878890186200946e-05, "loss": 0.2365, "step": 8137500 }, { "epoch": 4.88, "learning_rate": 2.187679022064038e-05, "loss": 0.2339, "step": 8138000 }, { "epoch": 4.88, "learning_rate": 2.1874690255079817e-05, "loss": 0.2337, "step": 8138500 }, { "epoch": 4.88, "learning_rate": 2.1872590289519253e-05, "loss": 0.2366, "step": 8139000 }, { "epoch": 4.88, "learning_rate": 2.1870490323958687e-05, "loss": 0.2362, "step": 8139500 }, { "epoch": 4.88, "learning_rate": 2.1868394558329244e-05, "loss": 0.2425, "step": 8140000 }, { "epoch": 4.88, "learning_rate": 2.1866294592768677e-05, "loss": 0.2346, "step": 8140500 }, { "epoch": 4.88, "learning_rate": 2.1864194627208114e-05, "loss": 0.2377, "step": 8141000 }, { "epoch": 4.88, "learning_rate": 2.186209466164755e-05, "loss": 0.2359, "step": 8141500 }, { "epoch": 4.88, "learning_rate": 2.185999469608698e-05, "loss": 0.24, "step": 8142000 }, { "epoch": 4.88, "learning_rate": 2.1857894730526418e-05, "loss": 0.2357, "step": 8142500 }, { "epoch": 4.88, "learning_rate": 2.1855794764965855e-05, "loss": 0.2368, "step": 8143000 }, { "epoch": 4.88, "learning_rate": 2.185369479940529e-05, "loss": 0.2395, "step": 8143500 }, { "epoch": 4.88, "learning_rate": 2.1851599033775845e-05, "loss": 0.2396, "step": 8144000 }, { "epoch": 4.88, "learning_rate": 2.1849503268146402e-05, "loss": 0.2377, "step": 8144500 }, { "epoch": 4.88, "learning_rate": 2.1847403302585836e-05, "loss": 0.2361, "step": 8145000 }, { "epoch": 4.88, "learning_rate": 2.1845303337025273e-05, "loss": 0.2273, "step": 8145500 }, { "epoch": 4.88, "learning_rate": 2.184320337146471e-05, "loss": 0.2423, "step": 8146000 }, { "epoch": 4.88, "learning_rate": 2.1841103405904143e-05, "loss": 0.2378, "step": 8146500 }, { "epoch": 4.88, "learning_rate": 2.1839003440343576e-05, "loss": 0.2339, "step": 8147000 }, { "epoch": 4.88, "learning_rate": 2.1836903474783013e-05, "loss": 0.2405, "step": 8147500 }, { "epoch": 4.89, "learning_rate": 2.183480350922245e-05, "loss": 0.2365, "step": 8148000 }, { "epoch": 4.89, "learning_rate": 2.1832707743593007e-05, "loss": 0.2426, "step": 8148500 }, { "epoch": 4.89, "learning_rate": 2.1830607778032437e-05, "loss": 0.2323, "step": 8149000 }, { "epoch": 4.89, "learning_rate": 2.1828507812471874e-05, "loss": 0.2375, "step": 8149500 }, { "epoch": 4.89, "learning_rate": 2.182640784691131e-05, "loss": 0.239, "step": 8150000 }, { "epoch": 4.89, "learning_rate": 2.1824312081281868e-05, "loss": 0.2341, "step": 8150500 }, { "epoch": 4.89, "learning_rate": 2.1822212115721305e-05, "loss": 0.2414, "step": 8151000 }, { "epoch": 4.89, "learning_rate": 2.1820112150160735e-05, "loss": 0.2283, "step": 8151500 }, { "epoch": 4.89, "learning_rate": 2.181801638453129e-05, "loss": 0.232, "step": 8152000 }, { "epoch": 4.89, "learning_rate": 2.181591641897073e-05, "loss": 0.2383, "step": 8152500 }, { "epoch": 4.89, "learning_rate": 2.1813816453410165e-05, "loss": 0.2326, "step": 8153000 }, { "epoch": 4.89, "learning_rate": 2.1811716487849602e-05, "loss": 0.2391, "step": 8153500 }, { "epoch": 4.89, "learning_rate": 2.1809616522289032e-05, "loss": 0.2433, "step": 8154000 }, { "epoch": 4.89, "learning_rate": 2.180751655672847e-05, "loss": 0.2314, "step": 8154500 }, { "epoch": 4.89, "learning_rate": 2.1805416591167906e-05, "loss": 0.2326, "step": 8155000 }, { "epoch": 4.89, "learning_rate": 2.180331662560734e-05, "loss": 0.2361, "step": 8155500 }, { "epoch": 4.89, "learning_rate": 2.1801216660046776e-05, "loss": 0.236, "step": 8156000 }, { "epoch": 4.89, "learning_rate": 2.179912089441733e-05, "loss": 0.2339, "step": 8156500 }, { "epoch": 4.89, "learning_rate": 2.1797025128787887e-05, "loss": 0.2446, "step": 8157000 }, { "epoch": 4.89, "learning_rate": 2.1794925163227324e-05, "loss": 0.2328, "step": 8157500 }, { "epoch": 4.89, "learning_rate": 2.179282519766676e-05, "loss": 0.2328, "step": 8158000 }, { "epoch": 4.89, "learning_rate": 2.179072523210619e-05, "loss": 0.2389, "step": 8158500 }, { "epoch": 4.89, "learning_rate": 2.1788625266545627e-05, "loss": 0.2346, "step": 8159000 }, { "epoch": 4.89, "learning_rate": 2.1786529500916184e-05, "loss": 0.2316, "step": 8159500 }, { "epoch": 4.89, "learning_rate": 2.178442953535562e-05, "loss": 0.2366, "step": 8160000 }, { "epoch": 4.89, "learning_rate": 2.1782329569795058e-05, "loss": 0.2337, "step": 8160500 }, { "epoch": 4.89, "learning_rate": 2.1780229604234488e-05, "loss": 0.2334, "step": 8161000 }, { "epoch": 4.89, "learning_rate": 2.1778129638673925e-05, "loss": 0.2332, "step": 8161500 }, { "epoch": 4.89, "learning_rate": 2.1776029673113362e-05, "loss": 0.2351, "step": 8162000 }, { "epoch": 4.89, "learning_rate": 2.1773929707552795e-05, "loss": 0.2351, "step": 8162500 }, { "epoch": 4.89, "learning_rate": 2.1771829741992232e-05, "loss": 0.2348, "step": 8163000 }, { "epoch": 4.89, "learning_rate": 2.1769733976362786e-05, "loss": 0.2366, "step": 8163500 }, { "epoch": 4.89, "learning_rate": 2.1767634010802222e-05, "loss": 0.2355, "step": 8164000 }, { "epoch": 4.89, "learning_rate": 2.176553404524166e-05, "loss": 0.237, "step": 8164500 }, { "epoch": 4.9, "learning_rate": 2.1763438279612216e-05, "loss": 0.2406, "step": 8165000 }, { "epoch": 4.9, "learning_rate": 2.1761338314051646e-05, "loss": 0.2363, "step": 8165500 }, { "epoch": 4.9, "learning_rate": 2.1759238348491083e-05, "loss": 0.2329, "step": 8166000 }, { "epoch": 4.9, "learning_rate": 2.175713838293052e-05, "loss": 0.2316, "step": 8166500 }, { "epoch": 4.9, "learning_rate": 2.1755038417369953e-05, "loss": 0.2356, "step": 8167000 }, { "epoch": 4.9, "learning_rate": 2.175293845180939e-05, "loss": 0.2397, "step": 8167500 }, { "epoch": 4.9, "learning_rate": 2.1750838486248827e-05, "loss": 0.2391, "step": 8168000 }, { "epoch": 4.9, "learning_rate": 2.174873852068826e-05, "loss": 0.2355, "step": 8168500 }, { "epoch": 4.9, "learning_rate": 2.1746642755058818e-05, "loss": 0.2318, "step": 8169000 }, { "epoch": 4.9, "learning_rate": 2.174454278949825e-05, "loss": 0.2371, "step": 8169500 }, { "epoch": 4.9, "learning_rate": 2.1742442823937688e-05, "loss": 0.2354, "step": 8170000 }, { "epoch": 4.9, "learning_rate": 2.1740342858377125e-05, "loss": 0.226, "step": 8170500 }, { "epoch": 4.9, "learning_rate": 2.173824709274768e-05, "loss": 0.2415, "step": 8171000 }, { "epoch": 4.9, "learning_rate": 2.1736147127187115e-05, "loss": 0.2386, "step": 8171500 }, { "epoch": 4.9, "learning_rate": 2.1734051361557672e-05, "loss": 0.2388, "step": 8172000 }, { "epoch": 4.9, "learning_rate": 2.1731951395997106e-05, "loss": 0.2364, "step": 8172500 }, { "epoch": 4.9, "learning_rate": 2.172985143043654e-05, "loss": 0.2362, "step": 8173000 }, { "epoch": 4.9, "learning_rate": 2.1727751464875976e-05, "loss": 0.2346, "step": 8173500 }, { "epoch": 4.9, "learning_rate": 2.172565149931541e-05, "loss": 0.2316, "step": 8174000 }, { "epoch": 4.9, "learning_rate": 2.1723551533754846e-05, "loss": 0.2336, "step": 8174500 }, { "epoch": 4.9, "learning_rate": 2.17214557681254e-05, "loss": 0.229, "step": 8175000 }, { "epoch": 4.9, "learning_rate": 2.1719355802564837e-05, "loss": 0.2379, "step": 8175500 }, { "epoch": 4.9, "learning_rate": 2.1717255837004273e-05, "loss": 0.2394, "step": 8176000 }, { "epoch": 4.9, "learning_rate": 2.1715155871443707e-05, "loss": 0.2372, "step": 8176500 }, { "epoch": 4.9, "learning_rate": 2.1713055905883144e-05, "loss": 0.2383, "step": 8177000 }, { "epoch": 4.9, "learning_rate": 2.171095594032258e-05, "loss": 0.2375, "step": 8177500 }, { "epoch": 4.9, "learning_rate": 2.170885597476201e-05, "loss": 0.2388, "step": 8178000 }, { "epoch": 4.9, "learning_rate": 2.1706756009201448e-05, "loss": 0.2391, "step": 8178500 }, { "epoch": 4.9, "learning_rate": 2.1704660243572005e-05, "loss": 0.2395, "step": 8179000 }, { "epoch": 4.9, "learning_rate": 2.170256027801144e-05, "loss": 0.233, "step": 8179500 }, { "epoch": 4.9, "learning_rate": 2.1700460312450878e-05, "loss": 0.2321, "step": 8180000 }, { "epoch": 4.9, "learning_rate": 2.1698364546821432e-05, "loss": 0.2389, "step": 8180500 }, { "epoch": 4.9, "learning_rate": 2.1696264581260865e-05, "loss": 0.2371, "step": 8181000 }, { "epoch": 4.91, "learning_rate": 2.1694164615700302e-05, "loss": 0.2335, "step": 8181500 }, { "epoch": 4.91, "learning_rate": 2.169206465013974e-05, "loss": 0.2305, "step": 8182000 }, { "epoch": 4.91, "learning_rate": 2.1689964684579172e-05, "loss": 0.2319, "step": 8182500 }, { "epoch": 4.91, "learning_rate": 2.1687864719018606e-05, "loss": 0.2303, "step": 8183000 }, { "epoch": 4.91, "learning_rate": 2.1685768953389163e-05, "loss": 0.2306, "step": 8183500 }, { "epoch": 4.91, "learning_rate": 2.16836689878286e-05, "loss": 0.2347, "step": 8184000 }, { "epoch": 4.91, "learning_rate": 2.1681569022268037e-05, "loss": 0.2342, "step": 8184500 }, { "epoch": 4.91, "learning_rate": 2.1679469056707467e-05, "loss": 0.2318, "step": 8185000 }, { "epoch": 4.91, "learning_rate": 2.1677373291078027e-05, "loss": 0.2301, "step": 8185500 }, { "epoch": 4.91, "learning_rate": 2.167527332551746e-05, "loss": 0.2352, "step": 8186000 }, { "epoch": 4.91, "learning_rate": 2.1673173359956897e-05, "loss": 0.2402, "step": 8186500 }, { "epoch": 4.91, "learning_rate": 2.1671073394396334e-05, "loss": 0.2351, "step": 8187000 }, { "epoch": 4.91, "learning_rate": 2.1668973428835764e-05, "loss": 0.2366, "step": 8187500 }, { "epoch": 4.91, "learning_rate": 2.16668734632752e-05, "loss": 0.2326, "step": 8188000 }, { "epoch": 4.91, "learning_rate": 2.1664773497714638e-05, "loss": 0.2365, "step": 8188500 }, { "epoch": 4.91, "learning_rate": 2.166267353215407e-05, "loss": 0.2325, "step": 8189000 }, { "epoch": 4.91, "learning_rate": 2.166057776652463e-05, "loss": 0.2387, "step": 8189500 }, { "epoch": 4.91, "learning_rate": 2.1658482000895185e-05, "loss": 0.2285, "step": 8190000 }, { "epoch": 4.91, "learning_rate": 2.165638203533462e-05, "loss": 0.234, "step": 8190500 }, { "epoch": 4.91, "learning_rate": 2.1654282069774056e-05, "loss": 0.2386, "step": 8191000 }, { "epoch": 4.91, "learning_rate": 2.1652182104213492e-05, "loss": 0.2345, "step": 8191500 }, { "epoch": 4.91, "learning_rate": 2.1650082138652926e-05, "loss": 0.2361, "step": 8192000 }, { "epoch": 4.91, "learning_rate": 2.164798217309236e-05, "loss": 0.2362, "step": 8192500 }, { "epoch": 4.91, "learning_rate": 2.1645886407462916e-05, "loss": 0.2337, "step": 8193000 }, { "epoch": 4.91, "learning_rate": 2.1643786441902353e-05, "loss": 0.2357, "step": 8193500 }, { "epoch": 4.91, "learning_rate": 2.164168647634179e-05, "loss": 0.2379, "step": 8194000 }, { "epoch": 4.91, "learning_rate": 2.163958651078122e-05, "loss": 0.2336, "step": 8194500 }, { "epoch": 4.91, "learning_rate": 2.1637486545220657e-05, "loss": 0.236, "step": 8195000 }, { "epoch": 4.91, "learning_rate": 2.1635386579660094e-05, "loss": 0.2422, "step": 8195500 }, { "epoch": 4.91, "learning_rate": 2.163329081403065e-05, "loss": 0.2376, "step": 8196000 }, { "epoch": 4.91, "learning_rate": 2.1631190848470088e-05, "loss": 0.2391, "step": 8196500 }, { "epoch": 4.91, "learning_rate": 2.1629090882909518e-05, "loss": 0.2465, "step": 8197000 }, { "epoch": 4.91, "learning_rate": 2.1626990917348954e-05, "loss": 0.2337, "step": 8197500 }, { "epoch": 4.92, "learning_rate": 2.162489095178839e-05, "loss": 0.2354, "step": 8198000 }, { "epoch": 4.92, "learning_rate": 2.1622790986227825e-05, "loss": 0.2328, "step": 8198500 }, { "epoch": 4.92, "learning_rate": 2.1620695220598382e-05, "loss": 0.2351, "step": 8199000 }, { "epoch": 4.92, "learning_rate": 2.1618595255037815e-05, "loss": 0.2386, "step": 8199500 }, { "epoch": 4.92, "learning_rate": 2.1616495289477252e-05, "loss": 0.2371, "step": 8200000 }, { "epoch": 4.92, "eval_loss": 0.21934321522712708, "eval_runtime": 1456.8266, "eval_samples_per_second": 361.553, "eval_steps_per_second": 60.259, "step": 8200000 }, { "epoch": 4.92, "learning_rate": 2.161439532391669e-05, "loss": 0.243, "step": 8200500 }, { "epoch": 4.92, "learning_rate": 2.1612295358356122e-05, "loss": 0.234, "step": 8201000 }, { "epoch": 4.92, "learning_rate": 2.161019539279556e-05, "loss": 0.2335, "step": 8201500 }, { "epoch": 4.92, "learning_rate": 2.1608095427234993e-05, "loss": 0.2367, "step": 8202000 }, { "epoch": 4.92, "learning_rate": 2.1605995461674426e-05, "loss": 0.2365, "step": 8202500 }, { "epoch": 4.92, "learning_rate": 2.1603895496113863e-05, "loss": 0.2293, "step": 8203000 }, { "epoch": 4.92, "learning_rate": 2.160179973048442e-05, "loss": 0.234, "step": 8203500 }, { "epoch": 4.92, "learning_rate": 2.1599699764923857e-05, "loss": 0.2338, "step": 8204000 }, { "epoch": 4.92, "learning_rate": 2.159759979936329e-05, "loss": 0.2347, "step": 8204500 }, { "epoch": 4.92, "learning_rate": 2.1595499833802724e-05, "loss": 0.2379, "step": 8205000 }, { "epoch": 4.92, "learning_rate": 2.159340406817328e-05, "loss": 0.2344, "step": 8205500 }, { "epoch": 4.92, "learning_rate": 2.1591304102612717e-05, "loss": 0.2301, "step": 8206000 }, { "epoch": 4.92, "learning_rate": 2.1589204137052154e-05, "loss": 0.2344, "step": 8206500 }, { "epoch": 4.92, "learning_rate": 2.1587104171491584e-05, "loss": 0.2346, "step": 8207000 }, { "epoch": 4.92, "learning_rate": 2.1585008405862145e-05, "loss": 0.2349, "step": 8207500 }, { "epoch": 4.92, "learning_rate": 2.1582908440301578e-05, "loss": 0.2373, "step": 8208000 }, { "epoch": 4.92, "learning_rate": 2.1580808474741015e-05, "loss": 0.2345, "step": 8208500 }, { "epoch": 4.92, "learning_rate": 2.1578708509180452e-05, "loss": 0.2344, "step": 8209000 }, { "epoch": 4.92, "learning_rate": 2.1576616943482126e-05, "loss": 0.2382, "step": 8209500 }, { "epoch": 4.92, "learning_rate": 2.1574516977921562e-05, "loss": 0.2337, "step": 8210000 }, { "epoch": 4.92, "learning_rate": 2.1572417012361e-05, "loss": 0.2287, "step": 8210500 }, { "epoch": 4.92, "learning_rate": 2.1570317046800433e-05, "loss": 0.238, "step": 8211000 }, { "epoch": 4.92, "learning_rate": 2.1568217081239866e-05, "loss": 0.2372, "step": 8211500 }, { "epoch": 4.92, "learning_rate": 2.1566117115679303e-05, "loss": 0.237, "step": 8212000 }, { "epoch": 4.92, "learning_rate": 2.1564017150118737e-05, "loss": 0.2339, "step": 8212500 }, { "epoch": 4.92, "learning_rate": 2.1561917184558173e-05, "loss": 0.2302, "step": 8213000 }, { "epoch": 4.92, "learning_rate": 2.1559821418928727e-05, "loss": 0.2342, "step": 8213500 }, { "epoch": 4.92, "learning_rate": 2.1557721453368164e-05, "loss": 0.2317, "step": 8214000 }, { "epoch": 4.92, "learning_rate": 2.155562568773872e-05, "loss": 0.225, "step": 8214500 }, { "epoch": 4.93, "learning_rate": 2.1553525722178158e-05, "loss": 0.2357, "step": 8215000 }, { "epoch": 4.93, "learning_rate": 2.155142575661759e-05, "loss": 0.2317, "step": 8215500 }, { "epoch": 4.93, "learning_rate": 2.1549325791057025e-05, "loss": 0.2389, "step": 8216000 }, { "epoch": 4.93, "learning_rate": 2.154722582549646e-05, "loss": 0.235, "step": 8216500 }, { "epoch": 4.93, "learning_rate": 2.1545125859935895e-05, "loss": 0.236, "step": 8217000 }, { "epoch": 4.93, "learning_rate": 2.1543030094306455e-05, "loss": 0.2366, "step": 8217500 }, { "epoch": 4.93, "learning_rate": 2.154093012874589e-05, "loss": 0.2322, "step": 8218000 }, { "epoch": 4.93, "learning_rate": 2.1538830163185322e-05, "loss": 0.2375, "step": 8218500 }, { "epoch": 4.93, "learning_rate": 2.153673019762476e-05, "loss": 0.2355, "step": 8219000 }, { "epoch": 4.93, "learning_rate": 2.1534630232064192e-05, "loss": 0.2374, "step": 8219500 }, { "epoch": 4.93, "learning_rate": 2.153253446643475e-05, "loss": 0.2441, "step": 8220000 }, { "epoch": 4.93, "learning_rate": 2.1530438700805306e-05, "loss": 0.244, "step": 8220500 }, { "epoch": 4.93, "learning_rate": 2.152833873524474e-05, "loss": 0.2427, "step": 8221000 }, { "epoch": 4.93, "learning_rate": 2.1526238769684177e-05, "loss": 0.2367, "step": 8221500 }, { "epoch": 4.93, "learning_rate": 2.1524138804123613e-05, "loss": 0.235, "step": 8222000 }, { "epoch": 4.93, "learning_rate": 2.1522038838563047e-05, "loss": 0.2342, "step": 8222500 }, { "epoch": 4.93, "learning_rate": 2.151993887300248e-05, "loss": 0.2359, "step": 8223000 }, { "epoch": 4.93, "learning_rate": 2.1517838907441917e-05, "loss": 0.235, "step": 8223500 }, { "epoch": 4.93, "learning_rate": 2.151573894188135e-05, "loss": 0.235, "step": 8224000 }, { "epoch": 4.93, "learning_rate": 2.1513638976320788e-05, "loss": 0.2384, "step": 8224500 }, { "epoch": 4.93, "learning_rate": 2.1511539010760224e-05, "loss": 0.237, "step": 8225000 }, { "epoch": 4.93, "learning_rate": 2.150943904519966e-05, "loss": 0.2353, "step": 8225500 }, { "epoch": 4.93, "learning_rate": 2.150733907963909e-05, "loss": 0.2355, "step": 8226000 }, { "epoch": 4.93, "learning_rate": 2.1505243314009648e-05, "loss": 0.2385, "step": 8226500 }, { "epoch": 4.93, "learning_rate": 2.1503143348449085e-05, "loss": 0.2364, "step": 8227000 }, { "epoch": 4.93, "learning_rate": 2.1501043382888522e-05, "loss": 0.2384, "step": 8227500 }, { "epoch": 4.93, "learning_rate": 2.1498943417327955e-05, "loss": 0.2394, "step": 8228000 }, { "epoch": 4.93, "learning_rate": 2.1496847651698512e-05, "loss": 0.2339, "step": 8228500 }, { "epoch": 4.93, "learning_rate": 2.1494747686137946e-05, "loss": 0.2313, "step": 8229000 }, { "epoch": 4.93, "learning_rate": 2.1492647720577383e-05, "loss": 0.2341, "step": 8229500 }, { "epoch": 4.93, "learning_rate": 2.149054775501682e-05, "loss": 0.2337, "step": 8230000 }, { "epoch": 4.93, "learning_rate": 2.1488451989387373e-05, "loss": 0.238, "step": 8230500 }, { "epoch": 4.93, "learning_rate": 2.148635202382681e-05, "loss": 0.2379, "step": 8231000 }, { "epoch": 4.94, "learning_rate": 2.1484252058266243e-05, "loss": 0.2311, "step": 8231500 }, { "epoch": 4.94, "learning_rate": 2.148215209270568e-05, "loss": 0.2367, "step": 8232000 }, { "epoch": 4.94, "learning_rate": 2.1480056327076237e-05, "loss": 0.2323, "step": 8232500 }, { "epoch": 4.94, "learning_rate": 2.147795636151567e-05, "loss": 0.2375, "step": 8233000 }, { "epoch": 4.94, "learning_rate": 2.1475856395955104e-05, "loss": 0.2348, "step": 8233500 }, { "epoch": 4.94, "learning_rate": 2.147375643039454e-05, "loss": 0.2352, "step": 8234000 }, { "epoch": 4.94, "learning_rate": 2.1471660664765098e-05, "loss": 0.2315, "step": 8234500 }, { "epoch": 4.94, "learning_rate": 2.146956489913565e-05, "loss": 0.2409, "step": 8235000 }, { "epoch": 4.94, "learning_rate": 2.146746493357509e-05, "loss": 0.2403, "step": 8235500 }, { "epoch": 4.94, "learning_rate": 2.1465364968014525e-05, "loss": 0.2314, "step": 8236000 }, { "epoch": 4.94, "learning_rate": 2.146326500245396e-05, "loss": 0.236, "step": 8236500 }, { "epoch": 4.94, "learning_rate": 2.1461165036893396e-05, "loss": 0.2342, "step": 8237000 }, { "epoch": 4.94, "learning_rate": 2.145906507133283e-05, "loss": 0.2353, "step": 8237500 }, { "epoch": 4.94, "learning_rate": 2.1456965105772266e-05, "loss": 0.2335, "step": 8238000 }, { "epoch": 4.94, "learning_rate": 2.14548651402117e-05, "loss": 0.2334, "step": 8238500 }, { "epoch": 4.94, "learning_rate": 2.1452765174651136e-05, "loss": 0.2316, "step": 8239000 }, { "epoch": 4.94, "learning_rate": 2.1450665209090573e-05, "loss": 0.2395, "step": 8239500 }, { "epoch": 4.94, "learning_rate": 2.1448565243530006e-05, "loss": 0.2376, "step": 8240000 }, { "epoch": 4.94, "learning_rate": 2.144646527796944e-05, "loss": 0.2325, "step": 8240500 }, { "epoch": 4.94, "learning_rate": 2.1444369512339997e-05, "loss": 0.2356, "step": 8241000 }, { "epoch": 4.94, "learning_rate": 2.1442269546779434e-05, "loss": 0.2354, "step": 8241500 }, { "epoch": 4.94, "learning_rate": 2.1440169581218867e-05, "loss": 0.234, "step": 8242000 }, { "epoch": 4.94, "learning_rate": 2.14380696156583e-05, "loss": 0.239, "step": 8242500 }, { "epoch": 4.94, "learning_rate": 2.1435969650097737e-05, "loss": 0.2373, "step": 8243000 }, { "epoch": 4.94, "learning_rate": 2.1433869684537174e-05, "loss": 0.2372, "step": 8243500 }, { "epoch": 4.94, "learning_rate": 2.1431769718976608e-05, "loss": 0.2343, "step": 8244000 }, { "epoch": 4.94, "learning_rate": 2.1429669753416045e-05, "loss": 0.2426, "step": 8244500 }, { "epoch": 4.94, "learning_rate": 2.1427573987786598e-05, "loss": 0.2373, "step": 8245000 }, { "epoch": 4.94, "learning_rate": 2.1425478222157155e-05, "loss": 0.2329, "step": 8245500 }, { "epoch": 4.94, "learning_rate": 2.1423378256596592e-05, "loss": 0.2387, "step": 8246000 }, { "epoch": 4.94, "learning_rate": 2.142127829103603e-05, "loss": 0.2395, "step": 8246500 }, { "epoch": 4.94, "learning_rate": 2.1419178325475462e-05, "loss": 0.2374, "step": 8247000 }, { "epoch": 4.94, "learning_rate": 2.1417078359914896e-05, "loss": 0.2381, "step": 8247500 }, { "epoch": 4.95, "learning_rate": 2.1414978394354333e-05, "loss": 0.2345, "step": 8248000 }, { "epoch": 4.95, "learning_rate": 2.1412878428793766e-05, "loss": 0.2362, "step": 8248500 }, { "epoch": 4.95, "learning_rate": 2.1410778463233203e-05, "loss": 0.2372, "step": 8249000 }, { "epoch": 4.95, "learning_rate": 2.140868269760376e-05, "loss": 0.2369, "step": 8249500 }, { "epoch": 4.95, "learning_rate": 2.1406586931974313e-05, "loss": 0.2354, "step": 8250000 }, { "epoch": 4.95, "learning_rate": 2.140448696641375e-05, "loss": 0.2383, "step": 8250500 }, { "epoch": 4.95, "learning_rate": 2.1402387000853187e-05, "loss": 0.2357, "step": 8251000 }, { "epoch": 4.95, "learning_rate": 2.140028703529262e-05, "loss": 0.2373, "step": 8251500 }, { "epoch": 4.95, "learning_rate": 2.1398187069732054e-05, "loss": 0.2333, "step": 8252000 }, { "epoch": 4.95, "learning_rate": 2.139608710417149e-05, "loss": 0.2279, "step": 8252500 }, { "epoch": 4.95, "learning_rate": 2.1393987138610924e-05, "loss": 0.2352, "step": 8253000 }, { "epoch": 4.95, "learning_rate": 2.1391891372981485e-05, "loss": 0.2352, "step": 8253500 }, { "epoch": 4.95, "learning_rate": 2.1389791407420918e-05, "loss": 0.2394, "step": 8254000 }, { "epoch": 4.95, "learning_rate": 2.138769144186035e-05, "loss": 0.2313, "step": 8254500 }, { "epoch": 4.95, "learning_rate": 2.138559147629979e-05, "loss": 0.2329, "step": 8255000 }, { "epoch": 4.95, "learning_rate": 2.1383491510739222e-05, "loss": 0.2325, "step": 8255500 }, { "epoch": 4.95, "learning_rate": 2.138139154517866e-05, "loss": 0.2343, "step": 8256000 }, { "epoch": 4.95, "learning_rate": 2.1379291579618096e-05, "loss": 0.236, "step": 8256500 }, { "epoch": 4.95, "learning_rate": 2.137719581398865e-05, "loss": 0.2325, "step": 8257000 }, { "epoch": 4.95, "learning_rate": 2.1375095848428086e-05, "loss": 0.2356, "step": 8257500 }, { "epoch": 4.95, "learning_rate": 2.137299588286752e-05, "loss": 0.2411, "step": 8258000 }, { "epoch": 4.95, "learning_rate": 2.1370895917306956e-05, "loss": 0.2416, "step": 8258500 }, { "epoch": 4.95, "learning_rate": 2.1368795951746393e-05, "loss": 0.2342, "step": 8259000 }, { "epoch": 4.95, "learning_rate": 2.1366695986185827e-05, "loss": 0.2358, "step": 8259500 }, { "epoch": 4.95, "learning_rate": 2.136459602062526e-05, "loss": 0.2332, "step": 8260000 }, { "epoch": 4.95, "learning_rate": 2.1362496055064697e-05, "loss": 0.2393, "step": 8260500 }, { "epoch": 4.95, "learning_rate": 2.1360400289435254e-05, "loss": 0.2323, "step": 8261000 }, { "epoch": 4.95, "learning_rate": 2.135830032387469e-05, "loss": 0.2369, "step": 8261500 }, { "epoch": 4.95, "learning_rate": 2.135620035831412e-05, "loss": 0.2366, "step": 8262000 }, { "epoch": 4.95, "learning_rate": 2.1354100392753558e-05, "loss": 0.2357, "step": 8262500 }, { "epoch": 4.95, "learning_rate": 2.1352008827055235e-05, "loss": 0.2362, "step": 8263000 }, { "epoch": 4.95, "learning_rate": 2.134990886149467e-05, "loss": 0.2417, "step": 8263500 }, { "epoch": 4.95, "learning_rate": 2.1347808895934105e-05, "loss": 0.2391, "step": 8264000 }, { "epoch": 4.95, "learning_rate": 2.1345708930373542e-05, "loss": 0.235, "step": 8264500 }, { "epoch": 4.96, "learning_rate": 2.1343608964812975e-05, "loss": 0.2409, "step": 8265000 }, { "epoch": 4.96, "learning_rate": 2.1341508999252412e-05, "loss": 0.2347, "step": 8265500 }, { "epoch": 4.96, "learning_rate": 2.133941323362297e-05, "loss": 0.2391, "step": 8266000 }, { "epoch": 4.96, "learning_rate": 2.1337313268062403e-05, "loss": 0.2287, "step": 8266500 }, { "epoch": 4.96, "learning_rate": 2.133521330250184e-05, "loss": 0.2323, "step": 8267000 }, { "epoch": 4.96, "learning_rate": 2.1333113336941273e-05, "loss": 0.2356, "step": 8267500 }, { "epoch": 4.96, "learning_rate": 2.133101337138071e-05, "loss": 0.2355, "step": 8268000 }, { "epoch": 4.96, "learning_rate": 2.1328913405820147e-05, "loss": 0.2343, "step": 8268500 }, { "epoch": 4.96, "learning_rate": 2.132681344025958e-05, "loss": 0.2361, "step": 8269000 }, { "epoch": 4.96, "learning_rate": 2.1324717674630134e-05, "loss": 0.2323, "step": 8269500 }, { "epoch": 4.96, "learning_rate": 2.132261770906957e-05, "loss": 0.2335, "step": 8270000 }, { "epoch": 4.96, "learning_rate": 2.1320517743509007e-05, "loss": 0.2335, "step": 8270500 }, { "epoch": 4.96, "learning_rate": 2.131841777794844e-05, "loss": 0.2388, "step": 8271000 }, { "epoch": 4.96, "learning_rate": 2.1316317812387874e-05, "loss": 0.2343, "step": 8271500 }, { "epoch": 4.96, "learning_rate": 2.131421784682731e-05, "loss": 0.2388, "step": 8272000 }, { "epoch": 4.96, "learning_rate": 2.1312117881266748e-05, "loss": 0.2362, "step": 8272500 }, { "epoch": 4.96, "learning_rate": 2.131001791570618e-05, "loss": 0.2369, "step": 8273000 }, { "epoch": 4.96, "learning_rate": 2.130792215007674e-05, "loss": 0.2379, "step": 8273500 }, { "epoch": 4.96, "learning_rate": 2.1305822184516172e-05, "loss": 0.2282, "step": 8274000 }, { "epoch": 4.96, "learning_rate": 2.130372221895561e-05, "loss": 0.2331, "step": 8274500 }, { "epoch": 4.96, "learning_rate": 2.1301622253395042e-05, "loss": 0.2367, "step": 8275000 }, { "epoch": 4.96, "learning_rate": 2.1299526487765603e-05, "loss": 0.2343, "step": 8275500 }, { "epoch": 4.96, "learning_rate": 2.1297426522205036e-05, "loss": 0.236, "step": 8276000 }, { "epoch": 4.96, "learning_rate": 2.129533075657559e-05, "loss": 0.2358, "step": 8276500 }, { "epoch": 4.96, "learning_rate": 2.1293230791015026e-05, "loss": 0.2378, "step": 8277000 }, { "epoch": 4.96, "learning_rate": 2.1291130825454463e-05, "loss": 0.2304, "step": 8277500 }, { "epoch": 4.96, "learning_rate": 2.1289030859893897e-05, "loss": 0.2348, "step": 8278000 }, { "epoch": 4.96, "learning_rate": 2.1286935094264454e-05, "loss": 0.2353, "step": 8278500 }, { "epoch": 4.96, "learning_rate": 2.1284835128703887e-05, "loss": 0.2338, "step": 8279000 }, { "epoch": 4.96, "learning_rate": 2.1282735163143324e-05, "loss": 0.231, "step": 8279500 }, { "epoch": 4.96, "learning_rate": 2.128063519758276e-05, "loss": 0.245, "step": 8280000 }, { "epoch": 4.96, "learning_rate": 2.1278535232022194e-05, "loss": 0.2368, "step": 8280500 }, { "epoch": 4.96, "learning_rate": 2.1276435266461628e-05, "loss": 0.2365, "step": 8281000 }, { "epoch": 4.97, "learning_rate": 2.1274335300901065e-05, "loss": 0.2429, "step": 8281500 }, { "epoch": 4.97, "learning_rate": 2.1272235335340498e-05, "loss": 0.2384, "step": 8282000 }, { "epoch": 4.97, "learning_rate": 2.127014376964218e-05, "loss": 0.2306, "step": 8282500 }, { "epoch": 4.97, "learning_rate": 2.1268043804081612e-05, "loss": 0.2325, "step": 8283000 }, { "epoch": 4.97, "learning_rate": 2.1265943838521046e-05, "loss": 0.2352, "step": 8283500 }, { "epoch": 4.97, "learning_rate": 2.1263843872960482e-05, "loss": 0.2337, "step": 8284000 }, { "epoch": 4.97, "learning_rate": 2.126174390739992e-05, "loss": 0.2361, "step": 8284500 }, { "epoch": 4.97, "learning_rate": 2.1259643941839353e-05, "loss": 0.2312, "step": 8285000 }, { "epoch": 4.97, "learning_rate": 2.125754397627879e-05, "loss": 0.2353, "step": 8285500 }, { "epoch": 4.97, "learning_rate": 2.1255444010718223e-05, "loss": 0.23, "step": 8286000 }, { "epoch": 4.97, "learning_rate": 2.125334824508878e-05, "loss": 0.2335, "step": 8286500 }, { "epoch": 4.97, "learning_rate": 2.1251248279528217e-05, "loss": 0.2406, "step": 8287000 }, { "epoch": 4.97, "learning_rate": 2.124914831396765e-05, "loss": 0.2415, "step": 8287500 }, { "epoch": 4.97, "learning_rate": 2.1247052548338207e-05, "loss": 0.2363, "step": 8288000 }, { "epoch": 4.97, "learning_rate": 2.124495258277764e-05, "loss": 0.2312, "step": 8288500 }, { "epoch": 4.97, "learning_rate": 2.1242852617217077e-05, "loss": 0.2347, "step": 8289000 }, { "epoch": 4.97, "learning_rate": 2.1240752651656514e-05, "loss": 0.2334, "step": 8289500 }, { "epoch": 4.97, "learning_rate": 2.123865688602707e-05, "loss": 0.2404, "step": 8290000 }, { "epoch": 4.97, "learning_rate": 2.12365569204665e-05, "loss": 0.2326, "step": 8290500 }, { "epoch": 4.97, "learning_rate": 2.1234456954905938e-05, "loss": 0.2351, "step": 8291000 }, { "epoch": 4.97, "learning_rate": 2.1232356989345375e-05, "loss": 0.2394, "step": 8291500 }, { "epoch": 4.97, "learning_rate": 2.123025702378481e-05, "loss": 0.2308, "step": 8292000 }, { "epoch": 4.97, "learning_rate": 2.1228157058224245e-05, "loss": 0.2351, "step": 8292500 }, { "epoch": 4.97, "learning_rate": 2.122605709266368e-05, "loss": 0.2332, "step": 8293000 }, { "epoch": 4.97, "learning_rate": 2.1223957127103116e-05, "loss": 0.2321, "step": 8293500 }, { "epoch": 4.97, "learning_rate": 2.122185716154255e-05, "loss": 0.2355, "step": 8294000 }, { "epoch": 4.97, "learning_rate": 2.1219757195981986e-05, "loss": 0.233, "step": 8294500 }, { "epoch": 4.97, "learning_rate": 2.1217657230421423e-05, "loss": 0.2326, "step": 8295000 }, { "epoch": 4.97, "learning_rate": 2.1215557264860856e-05, "loss": 0.2369, "step": 8295500 }, { "epoch": 4.97, "learning_rate": 2.1213461499231413e-05, "loss": 0.239, "step": 8296000 }, { "epoch": 4.97, "learning_rate": 2.1211361533670847e-05, "loss": 0.2299, "step": 8296500 }, { "epoch": 4.97, "learning_rate": 2.1209261568110284e-05, "loss": 0.2357, "step": 8297000 }, { "epoch": 4.97, "learning_rate": 2.120716580248084e-05, "loss": 0.2421, "step": 8297500 }, { "epoch": 4.97, "learning_rate": 2.1205065836920274e-05, "loss": 0.2361, "step": 8298000 }, { "epoch": 4.98, "learning_rate": 2.1202965871359707e-05, "loss": 0.2339, "step": 8298500 }, { "epoch": 4.98, "learning_rate": 2.1200865905799144e-05, "loss": 0.2328, "step": 8299000 }, { "epoch": 4.98, "learning_rate": 2.119876594023858e-05, "loss": 0.2411, "step": 8299500 }, { "epoch": 4.98, "learning_rate": 2.1196665974678015e-05, "loss": 0.2343, "step": 8300000 }, { "epoch": 4.98, "eval_loss": 0.21912431716918945, "eval_runtime": 1456.2721, "eval_samples_per_second": 361.691, "eval_steps_per_second": 60.282, "step": 8300000 }, { "epoch": 4.98, "learning_rate": 2.1194566009117448e-05, "loss": 0.239, "step": 8300500 }, { "epoch": 4.98, "learning_rate": 2.1192466043556885e-05, "loss": 0.237, "step": 8301000 }, { "epoch": 4.98, "learning_rate": 2.1190370277927442e-05, "loss": 0.2288, "step": 8301500 }, { "epoch": 4.98, "learning_rate": 2.1188274512298e-05, "loss": 0.2393, "step": 8302000 }, { "epoch": 4.98, "learning_rate": 2.1186174546737432e-05, "loss": 0.2281, "step": 8302500 }, { "epoch": 4.98, "learning_rate": 2.118407458117687e-05, "loss": 0.2325, "step": 8303000 }, { "epoch": 4.98, "learning_rate": 2.1181974615616303e-05, "loss": 0.2345, "step": 8303500 }, { "epoch": 4.98, "learning_rate": 2.117987884998686e-05, "loss": 0.2375, "step": 8304000 }, { "epoch": 4.98, "learning_rate": 2.1177778884426296e-05, "loss": 0.2368, "step": 8304500 }, { "epoch": 4.98, "learning_rate": 2.117567891886573e-05, "loss": 0.2411, "step": 8305000 }, { "epoch": 4.98, "learning_rate": 2.1173578953305163e-05, "loss": 0.2346, "step": 8305500 }, { "epoch": 4.98, "learning_rate": 2.11714789877446e-05, "loss": 0.2385, "step": 8306000 }, { "epoch": 4.98, "learning_rate": 2.1169379022184037e-05, "loss": 0.2347, "step": 8306500 }, { "epoch": 4.98, "learning_rate": 2.116727905662347e-05, "loss": 0.2324, "step": 8307000 }, { "epoch": 4.98, "learning_rate": 2.1165179091062907e-05, "loss": 0.2292, "step": 8307500 }, { "epoch": 4.98, "learning_rate": 2.116308332543346e-05, "loss": 0.2348, "step": 8308000 }, { "epoch": 4.98, "learning_rate": 2.1160983359872898e-05, "loss": 0.2393, "step": 8308500 }, { "epoch": 4.98, "learning_rate": 2.1158883394312335e-05, "loss": 0.2368, "step": 8309000 }, { "epoch": 4.98, "learning_rate": 2.1156783428751768e-05, "loss": 0.2325, "step": 8309500 }, { "epoch": 4.98, "learning_rate": 2.1154687663122325e-05, "loss": 0.2344, "step": 8310000 }, { "epoch": 4.98, "learning_rate": 2.1152591897492882e-05, "loss": 0.2335, "step": 8310500 }, { "epoch": 4.98, "learning_rate": 2.1150491931932315e-05, "loss": 0.2341, "step": 8311000 }, { "epoch": 4.98, "learning_rate": 2.1148391966371752e-05, "loss": 0.2347, "step": 8311500 }, { "epoch": 4.98, "learning_rate": 2.1146292000811186e-05, "loss": 0.2349, "step": 8312000 }, { "epoch": 4.98, "learning_rate": 2.1144196235181743e-05, "loss": 0.236, "step": 8312500 }, { "epoch": 4.98, "learning_rate": 2.114209626962118e-05, "loss": 0.2371, "step": 8313000 }, { "epoch": 4.98, "learning_rate": 2.1139996304060613e-05, "loss": 0.2343, "step": 8313500 }, { "epoch": 4.98, "learning_rate": 2.113789633850005e-05, "loss": 0.2317, "step": 8314000 }, { "epoch": 4.98, "learning_rate": 2.1135796372939483e-05, "loss": 0.2375, "step": 8314500 }, { "epoch": 4.99, "learning_rate": 2.1133696407378917e-05, "loss": 0.233, "step": 8315000 }, { "epoch": 4.99, "learning_rate": 2.1131600641749474e-05, "loss": 0.2385, "step": 8315500 }, { "epoch": 4.99, "learning_rate": 2.112950067618891e-05, "loss": 0.2327, "step": 8316000 }, { "epoch": 4.99, "learning_rate": 2.1127400710628347e-05, "loss": 0.2292, "step": 8316500 }, { "epoch": 4.99, "learning_rate": 2.112530074506778e-05, "loss": 0.2348, "step": 8317000 }, { "epoch": 4.99, "learning_rate": 2.1123200779507214e-05, "loss": 0.2345, "step": 8317500 }, { "epoch": 4.99, "learning_rate": 2.112110081394665e-05, "loss": 0.2293, "step": 8318000 }, { "epoch": 4.99, "learning_rate": 2.1119005048317208e-05, "loss": 0.2314, "step": 8318500 }, { "epoch": 4.99, "learning_rate": 2.1116905082756645e-05, "loss": 0.2376, "step": 8319000 }, { "epoch": 4.99, "learning_rate": 2.1114805117196075e-05, "loss": 0.2375, "step": 8319500 }, { "epoch": 4.99, "learning_rate": 2.1112705151635512e-05, "loss": 0.2358, "step": 8320000 }, { "epoch": 4.99, "learning_rate": 2.111060518607495e-05, "loss": 0.2369, "step": 8320500 }, { "epoch": 4.99, "learning_rate": 2.1108505220514382e-05, "loss": 0.2329, "step": 8321000 }, { "epoch": 4.99, "learning_rate": 2.110640525495382e-05, "loss": 0.2275, "step": 8321500 }, { "epoch": 4.99, "learning_rate": 2.1104305289393253e-05, "loss": 0.232, "step": 8322000 }, { "epoch": 4.99, "learning_rate": 2.110220952376381e-05, "loss": 0.2309, "step": 8322500 }, { "epoch": 4.99, "learning_rate": 2.1100109558203246e-05, "loss": 0.2369, "step": 8323000 }, { "epoch": 4.99, "learning_rate": 2.109800959264268e-05, "loss": 0.2391, "step": 8323500 }, { "epoch": 4.99, "learning_rate": 2.1095909627082117e-05, "loss": 0.2354, "step": 8324000 }, { "epoch": 4.99, "learning_rate": 2.109381386145267e-05, "loss": 0.2367, "step": 8324500 }, { "epoch": 4.99, "learning_rate": 2.1091713895892107e-05, "loss": 0.2349, "step": 8325000 }, { "epoch": 4.99, "learning_rate": 2.1089613930331544e-05, "loss": 0.2345, "step": 8325500 }, { "epoch": 4.99, "learning_rate": 2.1087513964770977e-05, "loss": 0.2341, "step": 8326000 }, { "epoch": 4.99, "learning_rate": 2.108541819914153e-05, "loss": 0.2319, "step": 8326500 }, { "epoch": 4.99, "learning_rate": 2.1083318233580968e-05, "loss": 0.2347, "step": 8327000 }, { "epoch": 4.99, "learning_rate": 2.1081218268020405e-05, "loss": 0.2307, "step": 8327500 }, { "epoch": 4.99, "learning_rate": 2.1079118302459838e-05, "loss": 0.2335, "step": 8328000 }, { "epoch": 4.99, "learning_rate": 2.10770225368304e-05, "loss": 0.2366, "step": 8328500 }, { "epoch": 4.99, "learning_rate": 2.107492257126983e-05, "loss": 0.2382, "step": 8329000 }, { "epoch": 4.99, "learning_rate": 2.1072822605709265e-05, "loss": 0.2404, "step": 8329500 }, { "epoch": 4.99, "learning_rate": 2.1070722640148702e-05, "loss": 0.2353, "step": 8330000 }, { "epoch": 4.99, "learning_rate": 2.106862687451926e-05, "loss": 0.2334, "step": 8330500 }, { "epoch": 4.99, "learning_rate": 2.1066526908958693e-05, "loss": 0.2328, "step": 8331000 }, { "epoch": 5.0, "learning_rate": 2.1064426943398126e-05, "loss": 0.2358, "step": 8331500 }, { "epoch": 5.0, "learning_rate": 2.1062326977837563e-05, "loss": 0.2311, "step": 8332000 }, { "epoch": 5.0, "learning_rate": 2.106023121220812e-05, "loss": 0.2324, "step": 8332500 }, { "epoch": 5.0, "learning_rate": 2.1058131246647557e-05, "loss": 0.2323, "step": 8333000 }, { "epoch": 5.0, "learning_rate": 2.1056031281086987e-05, "loss": 0.2418, "step": 8333500 }, { "epoch": 5.0, "learning_rate": 2.1053931315526424e-05, "loss": 0.2348, "step": 8334000 }, { "epoch": 5.0, "learning_rate": 2.105183554989698e-05, "loss": 0.2326, "step": 8334500 }, { "epoch": 5.0, "learning_rate": 2.1049735584336417e-05, "loss": 0.2356, "step": 8335000 }, { "epoch": 5.0, "learning_rate": 2.1047635618775854e-05, "loss": 0.232, "step": 8335500 }, { "epoch": 5.0, "learning_rate": 2.1045535653215284e-05, "loss": 0.2313, "step": 8336000 }, { "epoch": 5.0, "learning_rate": 2.104343988758584e-05, "loss": 0.2339, "step": 8336500 }, { "epoch": 5.0, "learning_rate": 2.1041339922025278e-05, "loss": 0.2385, "step": 8337000 }, { "epoch": 5.0, "learning_rate": 2.1039239956464715e-05, "loss": 0.2307, "step": 8337500 }, { "epoch": 5.0, "learning_rate": 2.1037139990904152e-05, "loss": 0.2359, "step": 8338000 }, { "epoch": 5.0, "learning_rate": 2.1035048425205826e-05, "loss": 0.2343, "step": 8338500 }, { "epoch": 5.0, "learning_rate": 2.1032948459645262e-05, "loss": 0.2354, "step": 8339000 }, { "epoch": 5.0, "learning_rate": 2.1030848494084696e-05, "loss": 0.2336, "step": 8339500 }, { "epoch": 5.0, "learning_rate": 2.1028748528524133e-05, "loss": 0.2282, "step": 8340000 }, { "epoch": 5.0, "learning_rate": 2.1026648562963566e-05, "loss": 0.2248, "step": 8340500 }, { "epoch": 5.0, "learning_rate": 2.1024548597403003e-05, "loss": 0.2274, "step": 8341000 }, { "epoch": 5.0, "learning_rate": 2.102245283177356e-05, "loss": 0.2362, "step": 8341500 }, { "epoch": 5.0, "learning_rate": 2.1020352866212993e-05, "loss": 0.2284, "step": 8342000 }, { "epoch": 5.0, "learning_rate": 2.101825290065243e-05, "loss": 0.2278, "step": 8342500 }, { "epoch": 5.0, "learning_rate": 2.1016152935091864e-05, "loss": 0.2274, "step": 8343000 }, { "epoch": 5.0, "learning_rate": 2.1014052969531297e-05, "loss": 0.2254, "step": 8343500 }, { "epoch": 5.0, "learning_rate": 2.1011953003970734e-05, "loss": 0.227, "step": 8344000 }, { "epoch": 5.0, "learning_rate": 2.100985303841017e-05, "loss": 0.2339, "step": 8344500 }, { "epoch": 5.0, "learning_rate": 2.1007757272780725e-05, "loss": 0.2289, "step": 8345000 }, { "epoch": 5.0, "learning_rate": 2.100565730722016e-05, "loss": 0.2288, "step": 8345500 }, { "epoch": 5.0, "learning_rate": 2.1003557341659595e-05, "loss": 0.2326, "step": 8346000 }, { "epoch": 5.0, "learning_rate": 2.100145737609903e-05, "loss": 0.233, "step": 8346500 }, { "epoch": 5.0, "learning_rate": 2.099935741053847e-05, "loss": 0.2292, "step": 8347000 }, { "epoch": 5.0, "learning_rate": 2.0997257444977902e-05, "loss": 0.2315, "step": 8347500 }, { "epoch": 5.0, "learning_rate": 2.099516167934846e-05, "loss": 0.2309, "step": 8348000 }, { "epoch": 5.01, "learning_rate": 2.0993061713787892e-05, "loss": 0.2273, "step": 8348500 }, { "epoch": 5.01, "learning_rate": 2.099096174822733e-05, "loss": 0.2245, "step": 8349000 }, { "epoch": 5.01, "learning_rate": 2.0988861782666766e-05, "loss": 0.2264, "step": 8349500 }, { "epoch": 5.01, "learning_rate": 2.09867618171062e-05, "loss": 0.2285, "step": 8350000 }, { "epoch": 5.01, "learning_rate": 2.0984661851545633e-05, "loss": 0.2287, "step": 8350500 }, { "epoch": 5.01, "learning_rate": 2.098256188598507e-05, "loss": 0.2254, "step": 8351000 }, { "epoch": 5.01, "learning_rate": 2.0980461920424503e-05, "loss": 0.2306, "step": 8351500 }, { "epoch": 5.01, "learning_rate": 2.0978366154795064e-05, "loss": 0.2275, "step": 8352000 }, { "epoch": 5.01, "learning_rate": 2.0976266189234494e-05, "loss": 0.2263, "step": 8352500 }, { "epoch": 5.01, "learning_rate": 2.097416622367393e-05, "loss": 0.2276, "step": 8353000 }, { "epoch": 5.01, "learning_rate": 2.0972070458044488e-05, "loss": 0.2428, "step": 8353500 }, { "epoch": 5.01, "learning_rate": 2.0969970492483924e-05, "loss": 0.2309, "step": 8354000 }, { "epoch": 5.01, "learning_rate": 2.0967870526923358e-05, "loss": 0.2263, "step": 8354500 }, { "epoch": 5.01, "learning_rate": 2.096577056136279e-05, "loss": 0.2242, "step": 8355000 }, { "epoch": 5.01, "learning_rate": 2.0963670595802228e-05, "loss": 0.2289, "step": 8355500 }, { "epoch": 5.01, "learning_rate": 2.0961570630241665e-05, "loss": 0.2293, "step": 8356000 }, { "epoch": 5.01, "learning_rate": 2.09594706646811e-05, "loss": 0.2281, "step": 8356500 }, { "epoch": 5.01, "learning_rate": 2.0957370699120535e-05, "loss": 0.23, "step": 8357000 }, { "epoch": 5.01, "learning_rate": 2.095527493349109e-05, "loss": 0.2257, "step": 8357500 }, { "epoch": 5.01, "learning_rate": 2.0953174967930526e-05, "loss": 0.2258, "step": 8358000 }, { "epoch": 5.01, "learning_rate": 2.095107500236996e-05, "loss": 0.228, "step": 8358500 }, { "epoch": 5.01, "learning_rate": 2.0948975036809396e-05, "loss": 0.234, "step": 8359000 }, { "epoch": 5.01, "learning_rate": 2.0946879271179953e-05, "loss": 0.2339, "step": 8359500 }, { "epoch": 5.01, "learning_rate": 2.0944779305619386e-05, "loss": 0.2297, "step": 8360000 }, { "epoch": 5.01, "learning_rate": 2.0942679340058823e-05, "loss": 0.2293, "step": 8360500 }, { "epoch": 5.01, "learning_rate": 2.0940579374498257e-05, "loss": 0.2307, "step": 8361000 }, { "epoch": 5.01, "learning_rate": 2.0938479408937694e-05, "loss": 0.2295, "step": 8361500 }, { "epoch": 5.01, "learning_rate": 2.0936383643308247e-05, "loss": 0.2314, "step": 8362000 }, { "epoch": 5.01, "learning_rate": 2.0934283677747684e-05, "loss": 0.2399, "step": 8362500 }, { "epoch": 5.01, "learning_rate": 2.093218371218712e-05, "loss": 0.2325, "step": 8363000 }, { "epoch": 5.01, "learning_rate": 2.0930083746626554e-05, "loss": 0.2282, "step": 8363500 }, { "epoch": 5.01, "learning_rate": 2.092798378106599e-05, "loss": 0.2265, "step": 8364000 }, { "epoch": 5.01, "learning_rate": 2.0925883815505428e-05, "loss": 0.227, "step": 8364500 }, { "epoch": 5.02, "learning_rate": 2.0923783849944858e-05, "loss": 0.2354, "step": 8365000 }, { "epoch": 5.02, "learning_rate": 2.0921683884384295e-05, "loss": 0.2318, "step": 8365500 }, { "epoch": 5.02, "learning_rate": 2.0919588118754852e-05, "loss": 0.2256, "step": 8366000 }, { "epoch": 5.02, "learning_rate": 2.091748815319429e-05, "loss": 0.2307, "step": 8366500 }, { "epoch": 5.02, "learning_rate": 2.0915388187633722e-05, "loss": 0.228, "step": 8367000 }, { "epoch": 5.02, "learning_rate": 2.0913288222073156e-05, "loss": 0.227, "step": 8367500 }, { "epoch": 5.02, "learning_rate": 2.0911192456443713e-05, "loss": 0.2274, "step": 8368000 }, { "epoch": 5.02, "learning_rate": 2.090909249088315e-05, "loss": 0.2258, "step": 8368500 }, { "epoch": 5.02, "learning_rate": 2.0906992525322586e-05, "loss": 0.2308, "step": 8369000 }, { "epoch": 5.02, "learning_rate": 2.090489255976202e-05, "loss": 0.2319, "step": 8369500 }, { "epoch": 5.02, "learning_rate": 2.0902796794132577e-05, "loss": 0.2349, "step": 8370000 }, { "epoch": 5.02, "learning_rate": 2.0900701028503134e-05, "loss": 0.228, "step": 8370500 }, { "epoch": 5.02, "learning_rate": 2.0898601062942567e-05, "loss": 0.2296, "step": 8371000 }, { "epoch": 5.02, "learning_rate": 2.0896501097382004e-05, "loss": 0.2272, "step": 8371500 }, { "epoch": 5.02, "learning_rate": 2.0894401131821438e-05, "loss": 0.232, "step": 8372000 }, { "epoch": 5.02, "learning_rate": 2.089230116626087e-05, "loss": 0.2308, "step": 8372500 }, { "epoch": 5.02, "learning_rate": 2.0890201200700308e-05, "loss": 0.2288, "step": 8373000 }, { "epoch": 5.02, "learning_rate": 2.0888101235139745e-05, "loss": 0.2288, "step": 8373500 }, { "epoch": 5.02, "learning_rate": 2.088600126957918e-05, "loss": 0.223, "step": 8374000 }, { "epoch": 5.02, "learning_rate": 2.0883905503949735e-05, "loss": 0.2249, "step": 8374500 }, { "epoch": 5.02, "learning_rate": 2.088180553838917e-05, "loss": 0.236, "step": 8375000 }, { "epoch": 5.02, "learning_rate": 2.0879705572828605e-05, "loss": 0.2316, "step": 8375500 }, { "epoch": 5.02, "learning_rate": 2.0877605607268042e-05, "loss": 0.2267, "step": 8376000 }, { "epoch": 5.02, "learning_rate": 2.0875509841638596e-05, "loss": 0.2271, "step": 8376500 }, { "epoch": 5.02, "learning_rate": 2.0873409876078033e-05, "loss": 0.2255, "step": 8377000 }, { "epoch": 5.02, "learning_rate": 2.0871309910517466e-05, "loss": 0.2266, "step": 8377500 }, { "epoch": 5.02, "learning_rate": 2.0869209944956903e-05, "loss": 0.2265, "step": 8378000 }, { "epoch": 5.02, "learning_rate": 2.086711417932746e-05, "loss": 0.2234, "step": 8378500 }, { "epoch": 5.02, "learning_rate": 2.0865014213766893e-05, "loss": 0.2289, "step": 8379000 }, { "epoch": 5.02, "learning_rate": 2.0862914248206327e-05, "loss": 0.2294, "step": 8379500 }, { "epoch": 5.02, "learning_rate": 2.0860814282645764e-05, "loss": 0.2252, "step": 8380000 }, { "epoch": 5.02, "learning_rate": 2.085871851701632e-05, "loss": 0.2328, "step": 8380500 }, { "epoch": 5.02, "learning_rate": 2.0856618551455757e-05, "loss": 0.2315, "step": 8381000 }, { "epoch": 5.03, "learning_rate": 2.085451858589519e-05, "loss": 0.2279, "step": 8381500 }, { "epoch": 5.03, "learning_rate": 2.0852418620334624e-05, "loss": 0.2278, "step": 8382000 }, { "epoch": 5.03, "learning_rate": 2.085031865477406e-05, "loss": 0.2359, "step": 8382500 }, { "epoch": 5.03, "learning_rate": 2.0848222889144618e-05, "loss": 0.2292, "step": 8383000 }, { "epoch": 5.03, "learning_rate": 2.084612292358405e-05, "loss": 0.2271, "step": 8383500 }, { "epoch": 5.03, "learning_rate": 2.084402295802349e-05, "loss": 0.2228, "step": 8384000 }, { "epoch": 5.03, "learning_rate": 2.0841922992462922e-05, "loss": 0.2307, "step": 8384500 }, { "epoch": 5.03, "learning_rate": 2.083982302690236e-05, "loss": 0.2267, "step": 8385000 }, { "epoch": 5.03, "learning_rate": 2.0837723061341796e-05, "loss": 0.2273, "step": 8385500 }, { "epoch": 5.03, "learning_rate": 2.083562309578123e-05, "loss": 0.2297, "step": 8386000 }, { "epoch": 5.03, "learning_rate": 2.0833523130220663e-05, "loss": 0.2289, "step": 8386500 }, { "epoch": 5.03, "learning_rate": 2.083142736459122e-05, "loss": 0.2268, "step": 8387000 }, { "epoch": 5.03, "learning_rate": 2.0829327399030656e-05, "loss": 0.2322, "step": 8387500 }, { "epoch": 5.03, "learning_rate": 2.0827227433470093e-05, "loss": 0.2308, "step": 8388000 }, { "epoch": 5.03, "learning_rate": 2.0825131667840647e-05, "loss": 0.2281, "step": 8388500 }, { "epoch": 5.03, "learning_rate": 2.082303170228008e-05, "loss": 0.2318, "step": 8389000 }, { "epoch": 5.03, "learning_rate": 2.0820931736719517e-05, "loss": 0.2335, "step": 8389500 }, { "epoch": 5.03, "learning_rate": 2.0818831771158954e-05, "loss": 0.2306, "step": 8390000 }, { "epoch": 5.03, "learning_rate": 2.0816731805598387e-05, "loss": 0.2296, "step": 8390500 }, { "epoch": 5.03, "learning_rate": 2.081463184003782e-05, "loss": 0.2323, "step": 8391000 }, { "epoch": 5.03, "learning_rate": 2.0812531874477258e-05, "loss": 0.2277, "step": 8391500 }, { "epoch": 5.03, "learning_rate": 2.0810431908916695e-05, "loss": 0.2242, "step": 8392000 }, { "epoch": 5.03, "learning_rate": 2.080833614328725e-05, "loss": 0.2294, "step": 8392500 }, { "epoch": 5.03, "learning_rate": 2.0806236177726685e-05, "loss": 0.2256, "step": 8393000 }, { "epoch": 5.03, "learning_rate": 2.080413621216612e-05, "loss": 0.2318, "step": 8393500 }, { "epoch": 5.03, "learning_rate": 2.0802036246605555e-05, "loss": 0.2296, "step": 8394000 }, { "epoch": 5.03, "learning_rate": 2.0799940480976112e-05, "loss": 0.2292, "step": 8394500 }, { "epoch": 5.03, "learning_rate": 2.079784051541555e-05, "loss": 0.2251, "step": 8395000 }, { "epoch": 5.03, "learning_rate": 2.0795744749786103e-05, "loss": 0.2242, "step": 8395500 }, { "epoch": 5.03, "learning_rate": 2.0793644784225536e-05, "loss": 0.2301, "step": 8396000 }, { "epoch": 5.03, "learning_rate": 2.0791544818664973e-05, "loss": 0.2294, "step": 8396500 }, { "epoch": 5.03, "learning_rate": 2.078944485310441e-05, "loss": 0.2295, "step": 8397000 }, { "epoch": 5.03, "learning_rate": 2.0787344887543843e-05, "loss": 0.2367, "step": 8397500 }, { "epoch": 5.03, "learning_rate": 2.078524492198328e-05, "loss": 0.2283, "step": 8398000 }, { "epoch": 5.04, "learning_rate": 2.0783144956422714e-05, "loss": 0.2264, "step": 8398500 }, { "epoch": 5.04, "learning_rate": 2.078104499086215e-05, "loss": 0.2273, "step": 8399000 }, { "epoch": 5.04, "learning_rate": 2.0778949225232707e-05, "loss": 0.2316, "step": 8399500 }, { "epoch": 5.04, "learning_rate": 2.0776853459603264e-05, "loss": 0.2282, "step": 8400000 }, { "epoch": 5.04, "eval_loss": 0.2189430147409439, "eval_runtime": 1457.5589, "eval_samples_per_second": 361.371, "eval_steps_per_second": 60.229, "step": 8400000 }, { "epoch": 5.04, "learning_rate": 2.0774757693973818e-05, "loss": 0.2328, "step": 8400500 }, { "epoch": 5.04, "learning_rate": 2.0772657728413255e-05, "loss": 0.2265, "step": 8401000 }, { "epoch": 5.04, "learning_rate": 2.0770557762852688e-05, "loss": 0.2247, "step": 8401500 }, { "epoch": 5.04, "learning_rate": 2.0768457797292125e-05, "loss": 0.231, "step": 8402000 }, { "epoch": 5.04, "learning_rate": 2.0766357831731562e-05, "loss": 0.2287, "step": 8402500 }, { "epoch": 5.04, "learning_rate": 2.0764257866170992e-05, "loss": 0.2294, "step": 8403000 }, { "epoch": 5.04, "learning_rate": 2.076215790061043e-05, "loss": 0.2268, "step": 8403500 }, { "epoch": 5.04, "learning_rate": 2.0760057935049866e-05, "loss": 0.2229, "step": 8404000 }, { "epoch": 5.04, "learning_rate": 2.07579579694893e-05, "loss": 0.2314, "step": 8404500 }, { "epoch": 5.04, "learning_rate": 2.0755858003928736e-05, "loss": 0.2268, "step": 8405000 }, { "epoch": 5.04, "learning_rate": 2.075375803836817e-05, "loss": 0.2336, "step": 8405500 }, { "epoch": 5.04, "learning_rate": 2.0751658072807606e-05, "loss": 0.2298, "step": 8406000 }, { "epoch": 5.04, "learning_rate": 2.0749562307178163e-05, "loss": 0.2332, "step": 8406500 }, { "epoch": 5.04, "learning_rate": 2.0747462341617597e-05, "loss": 0.2296, "step": 8407000 }, { "epoch": 5.04, "learning_rate": 2.0745362376057034e-05, "loss": 0.229, "step": 8407500 }, { "epoch": 5.04, "learning_rate": 2.0743262410496467e-05, "loss": 0.2302, "step": 8408000 }, { "epoch": 5.04, "learning_rate": 2.0741170844798144e-05, "loss": 0.2318, "step": 8408500 }, { "epoch": 5.04, "learning_rate": 2.073907087923758e-05, "loss": 0.2302, "step": 8409000 }, { "epoch": 5.04, "learning_rate": 2.0736970913677018e-05, "loss": 0.226, "step": 8409500 }, { "epoch": 5.04, "learning_rate": 2.0734870948116448e-05, "loss": 0.2231, "step": 8410000 }, { "epoch": 5.04, "learning_rate": 2.0732770982555885e-05, "loss": 0.2282, "step": 8410500 }, { "epoch": 5.04, "learning_rate": 2.0730675216926442e-05, "loss": 0.2304, "step": 8411000 }, { "epoch": 5.04, "learning_rate": 2.072857525136588e-05, "loss": 0.2256, "step": 8411500 }, { "epoch": 5.04, "learning_rate": 2.0726475285805315e-05, "loss": 0.2284, "step": 8412000 }, { "epoch": 5.04, "learning_rate": 2.0724375320244746e-05, "loss": 0.2324, "step": 8412500 }, { "epoch": 5.04, "learning_rate": 2.0722275354684182e-05, "loss": 0.2277, "step": 8413000 }, { "epoch": 5.04, "learning_rate": 2.072017958905474e-05, "loss": 0.2259, "step": 8413500 }, { "epoch": 5.04, "learning_rate": 2.0718079623494176e-05, "loss": 0.2255, "step": 8414000 }, { "epoch": 5.04, "learning_rate": 2.071597965793361e-05, "loss": 0.2262, "step": 8414500 }, { "epoch": 5.05, "learning_rate": 2.0713879692373043e-05, "loss": 0.2295, "step": 8415000 }, { "epoch": 5.05, "learning_rate": 2.071177972681248e-05, "loss": 0.2252, "step": 8415500 }, { "epoch": 5.05, "learning_rate": 2.0709679761251917e-05, "loss": 0.2284, "step": 8416000 }, { "epoch": 5.05, "learning_rate": 2.070757979569135e-05, "loss": 0.2326, "step": 8416500 }, { "epoch": 5.05, "learning_rate": 2.0705479830130787e-05, "loss": 0.223, "step": 8417000 }, { "epoch": 5.05, "learning_rate": 2.070338406450134e-05, "loss": 0.228, "step": 8417500 }, { "epoch": 5.05, "learning_rate": 2.0701284098940777e-05, "loss": 0.2336, "step": 8418000 }, { "epoch": 5.05, "learning_rate": 2.069918413338021e-05, "loss": 0.2306, "step": 8418500 }, { "epoch": 5.05, "learning_rate": 2.0697084167819648e-05, "loss": 0.2269, "step": 8419000 }, { "epoch": 5.05, "learning_rate": 2.06949884021902e-05, "loss": 0.2297, "step": 8419500 }, { "epoch": 5.05, "learning_rate": 2.0692888436629638e-05, "loss": 0.2325, "step": 8420000 }, { "epoch": 5.05, "learning_rate": 2.0690788471069075e-05, "loss": 0.2242, "step": 8420500 }, { "epoch": 5.05, "learning_rate": 2.068868850550851e-05, "loss": 0.2316, "step": 8421000 }, { "epoch": 5.05, "learning_rate": 2.0686592739879065e-05, "loss": 0.2315, "step": 8421500 }, { "epoch": 5.05, "learning_rate": 2.06844927743185e-05, "loss": 0.2314, "step": 8422000 }, { "epoch": 5.05, "learning_rate": 2.0682392808757936e-05, "loss": 0.2354, "step": 8422500 }, { "epoch": 5.05, "learning_rate": 2.0680292843197373e-05, "loss": 0.2327, "step": 8423000 }, { "epoch": 5.05, "learning_rate": 2.067819707756793e-05, "loss": 0.231, "step": 8423500 }, { "epoch": 5.05, "learning_rate": 2.067609711200736e-05, "loss": 0.228, "step": 8424000 }, { "epoch": 5.05, "learning_rate": 2.0673997146446797e-05, "loss": 0.2328, "step": 8424500 }, { "epoch": 5.05, "learning_rate": 2.0671897180886233e-05, "loss": 0.2258, "step": 8425000 }, { "epoch": 5.05, "learning_rate": 2.066980141525679e-05, "loss": 0.2338, "step": 8425500 }, { "epoch": 5.05, "learning_rate": 2.0667701449696227e-05, "loss": 0.2291, "step": 8426000 }, { "epoch": 5.05, "learning_rate": 2.066560568406678e-05, "loss": 0.2252, "step": 8426500 }, { "epoch": 5.05, "learning_rate": 2.0663505718506214e-05, "loss": 0.2291, "step": 8427000 }, { "epoch": 5.05, "learning_rate": 2.066140575294565e-05, "loss": 0.2309, "step": 8427500 }, { "epoch": 5.05, "learning_rate": 2.0659305787385088e-05, "loss": 0.2274, "step": 8428000 }, { "epoch": 5.05, "learning_rate": 2.065720582182452e-05, "loss": 0.2279, "step": 8428500 }, { "epoch": 5.05, "learning_rate": 2.0655105856263955e-05, "loss": 0.2246, "step": 8429000 }, { "epoch": 5.05, "learning_rate": 2.065300589070339e-05, "loss": 0.2279, "step": 8429500 }, { "epoch": 5.05, "learning_rate": 2.065090592514283e-05, "loss": 0.2308, "step": 8430000 }, { "epoch": 5.05, "learning_rate": 2.0648810159513385e-05, "loss": 0.2291, "step": 8430500 }, { "epoch": 5.05, "learning_rate": 2.064671019395282e-05, "loss": 0.2307, "step": 8431000 }, { "epoch": 5.06, "learning_rate": 2.0644610228392252e-05, "loss": 0.2306, "step": 8431500 }, { "epoch": 5.06, "learning_rate": 2.064251026283169e-05, "loss": 0.2303, "step": 8432000 }, { "epoch": 5.06, "learning_rate": 2.0640414497202246e-05, "loss": 0.2311, "step": 8432500 }, { "epoch": 5.06, "learning_rate": 2.0638314531641683e-05, "loss": 0.2287, "step": 8433000 }, { "epoch": 5.06, "learning_rate": 2.0636214566081117e-05, "loss": 0.2313, "step": 8433500 }, { "epoch": 5.06, "learning_rate": 2.063411460052055e-05, "loss": 0.235, "step": 8434000 }, { "epoch": 5.06, "learning_rate": 2.0632018834891107e-05, "loss": 0.2332, "step": 8434500 }, { "epoch": 5.06, "learning_rate": 2.0629918869330544e-05, "loss": 0.2322, "step": 8435000 }, { "epoch": 5.06, "learning_rate": 2.062781890376998e-05, "loss": 0.2318, "step": 8435500 }, { "epoch": 5.06, "learning_rate": 2.062571893820941e-05, "loss": 0.2327, "step": 8436000 }, { "epoch": 5.06, "learning_rate": 2.0623623172579968e-05, "loss": 0.2276, "step": 8436500 }, { "epoch": 5.06, "learning_rate": 2.0621523207019405e-05, "loss": 0.2311, "step": 8437000 }, { "epoch": 5.06, "learning_rate": 2.061942324145884e-05, "loss": 0.2326, "step": 8437500 }, { "epoch": 5.06, "learning_rate": 2.0617323275898275e-05, "loss": 0.2271, "step": 8438000 }, { "epoch": 5.06, "learning_rate": 2.0615227510268832e-05, "loss": 0.2302, "step": 8438500 }, { "epoch": 5.06, "learning_rate": 2.061313174463939e-05, "loss": 0.2279, "step": 8439000 }, { "epoch": 5.06, "learning_rate": 2.0611031779078822e-05, "loss": 0.2253, "step": 8439500 }, { "epoch": 5.06, "learning_rate": 2.060893181351826e-05, "loss": 0.2344, "step": 8440000 }, { "epoch": 5.06, "learning_rate": 2.0606831847957693e-05, "loss": 0.2286, "step": 8440500 }, { "epoch": 5.06, "learning_rate": 2.060473608232825e-05, "loss": 0.2329, "step": 8441000 }, { "epoch": 5.06, "learning_rate": 2.0602636116767686e-05, "loss": 0.2355, "step": 8441500 }, { "epoch": 5.06, "learning_rate": 2.060053615120712e-05, "loss": 0.2288, "step": 8442000 }, { "epoch": 5.06, "learning_rate": 2.0598436185646557e-05, "loss": 0.2296, "step": 8442500 }, { "epoch": 5.06, "learning_rate": 2.059633622008599e-05, "loss": 0.2283, "step": 8443000 }, { "epoch": 5.06, "learning_rate": 2.0594236254525424e-05, "loss": 0.2285, "step": 8443500 }, { "epoch": 5.06, "learning_rate": 2.059213628896486e-05, "loss": 0.2368, "step": 8444000 }, { "epoch": 5.06, "learning_rate": 2.0590036323404297e-05, "loss": 0.2284, "step": 8444500 }, { "epoch": 5.06, "learning_rate": 2.0587940557774854e-05, "loss": 0.2257, "step": 8445000 }, { "epoch": 5.06, "learning_rate": 2.0585840592214288e-05, "loss": 0.2326, "step": 8445500 }, { "epoch": 5.06, "learning_rate": 2.058374062665372e-05, "loss": 0.2262, "step": 8446000 }, { "epoch": 5.06, "learning_rate": 2.0581640661093158e-05, "loss": 0.2331, "step": 8446500 }, { "epoch": 5.06, "learning_rate": 2.0579544895463715e-05, "loss": 0.2319, "step": 8447000 }, { "epoch": 5.06, "learning_rate": 2.057744492990315e-05, "loss": 0.233, "step": 8447500 }, { "epoch": 5.06, "learning_rate": 2.0575344964342585e-05, "loss": 0.2243, "step": 8448000 }, { "epoch": 5.07, "learning_rate": 2.057324499878202e-05, "loss": 0.2269, "step": 8448500 }, { "epoch": 5.07, "learning_rate": 2.0571149233152576e-05, "loss": 0.2302, "step": 8449000 }, { "epoch": 5.07, "learning_rate": 2.0569049267592013e-05, "loss": 0.2258, "step": 8449500 }, { "epoch": 5.07, "learning_rate": 2.0566949302031446e-05, "loss": 0.236, "step": 8450000 }, { "epoch": 5.07, "learning_rate": 2.056484933647088e-05, "loss": 0.233, "step": 8450500 }, { "epoch": 5.07, "learning_rate": 2.0562749370910316e-05, "loss": 0.2249, "step": 8451000 }, { "epoch": 5.07, "learning_rate": 2.0560649405349753e-05, "loss": 0.226, "step": 8451500 }, { "epoch": 5.07, "learning_rate": 2.0558549439789187e-05, "loss": 0.2269, "step": 8452000 }, { "epoch": 5.07, "learning_rate": 2.0556453674159744e-05, "loss": 0.2345, "step": 8452500 }, { "epoch": 5.07, "learning_rate": 2.0554353708599177e-05, "loss": 0.2321, "step": 8453000 }, { "epoch": 5.07, "learning_rate": 2.0552253743038614e-05, "loss": 0.2273, "step": 8453500 }, { "epoch": 5.07, "learning_rate": 2.055015377747805e-05, "loss": 0.2254, "step": 8454000 }, { "epoch": 5.07, "learning_rate": 2.0548053811917484e-05, "loss": 0.2261, "step": 8454500 }, { "epoch": 5.07, "learning_rate": 2.054595804628804e-05, "loss": 0.2324, "step": 8455000 }, { "epoch": 5.07, "learning_rate": 2.0543858080727475e-05, "loss": 0.2259, "step": 8455500 }, { "epoch": 5.07, "learning_rate": 2.054175811516691e-05, "loss": 0.2319, "step": 8456000 }, { "epoch": 5.07, "learning_rate": 2.0539658149606348e-05, "loss": 0.2279, "step": 8456500 }, { "epoch": 5.07, "learning_rate": 2.0537558184045782e-05, "loss": 0.2281, "step": 8457000 }, { "epoch": 5.07, "learning_rate": 2.0535462418416335e-05, "loss": 0.2347, "step": 8457500 }, { "epoch": 5.07, "learning_rate": 2.0533362452855772e-05, "loss": 0.2335, "step": 8458000 }, { "epoch": 5.07, "learning_rate": 2.053126248729521e-05, "loss": 0.2212, "step": 8458500 }, { "epoch": 5.07, "learning_rate": 2.0529166721665766e-05, "loss": 0.234, "step": 8459000 }, { "epoch": 5.07, "learning_rate": 2.05270667561052e-05, "loss": 0.2323, "step": 8459500 }, { "epoch": 5.07, "learning_rate": 2.0524966790544633e-05, "loss": 0.2271, "step": 8460000 }, { "epoch": 5.07, "learning_rate": 2.052286682498407e-05, "loss": 0.2281, "step": 8460500 }, { "epoch": 5.07, "learning_rate": 2.0520766859423507e-05, "loss": 0.2192, "step": 8461000 }, { "epoch": 5.07, "learning_rate": 2.051866689386294e-05, "loss": 0.2265, "step": 8461500 }, { "epoch": 5.07, "learning_rate": 2.0516566928302377e-05, "loss": 0.2317, "step": 8462000 }, { "epoch": 5.07, "learning_rate": 2.051446696274181e-05, "loss": 0.2297, "step": 8462500 }, { "epoch": 5.07, "learning_rate": 2.0512366997181244e-05, "loss": 0.2269, "step": 8463000 }, { "epoch": 5.07, "learning_rate": 2.051026703162068e-05, "loss": 0.2264, "step": 8463500 }, { "epoch": 5.07, "learning_rate": 2.0508167066060117e-05, "loss": 0.2307, "step": 8464000 }, { "epoch": 5.07, "learning_rate": 2.050606710049955e-05, "loss": 0.2309, "step": 8464500 }, { "epoch": 5.08, "learning_rate": 2.0503971334870108e-05, "loss": 0.2279, "step": 8465000 }, { "epoch": 5.08, "learning_rate": 2.0501875569240665e-05, "loss": 0.233, "step": 8465500 }, { "epoch": 5.08, "learning_rate": 2.04997756036801e-05, "loss": 0.2356, "step": 8466000 }, { "epoch": 5.08, "learning_rate": 2.0497675638119535e-05, "loss": 0.2228, "step": 8466500 }, { "epoch": 5.08, "learning_rate": 2.049557567255897e-05, "loss": 0.226, "step": 8467000 }, { "epoch": 5.08, "learning_rate": 2.0493475706998405e-05, "loss": 0.2297, "step": 8467500 }, { "epoch": 5.08, "learning_rate": 2.049137574143784e-05, "loss": 0.2194, "step": 8468000 }, { "epoch": 5.08, "learning_rate": 2.0489275775877276e-05, "loss": 0.2239, "step": 8468500 }, { "epoch": 5.08, "learning_rate": 2.0487175810316713e-05, "loss": 0.2249, "step": 8469000 }, { "epoch": 5.08, "learning_rate": 2.0485080044687266e-05, "loss": 0.235, "step": 8469500 }, { "epoch": 5.08, "learning_rate": 2.04829800791267e-05, "loss": 0.2291, "step": 8470000 }, { "epoch": 5.08, "learning_rate": 2.0480880113566137e-05, "loss": 0.2317, "step": 8470500 }, { "epoch": 5.08, "learning_rate": 2.0478780148005573e-05, "loss": 0.2245, "step": 8471000 }, { "epoch": 5.08, "learning_rate": 2.047668438237613e-05, "loss": 0.2282, "step": 8471500 }, { "epoch": 5.08, "learning_rate": 2.0474584416815564e-05, "loss": 0.2242, "step": 8472000 }, { "epoch": 5.08, "learning_rate": 2.0472484451254997e-05, "loss": 0.2282, "step": 8472500 }, { "epoch": 5.08, "learning_rate": 2.0470384485694434e-05, "loss": 0.2282, "step": 8473000 }, { "epoch": 5.08, "learning_rate": 2.046828452013387e-05, "loss": 0.2271, "step": 8473500 }, { "epoch": 5.08, "learning_rate": 2.0466184554573304e-05, "loss": 0.2276, "step": 8474000 }, { "epoch": 5.08, "learning_rate": 2.0464084589012738e-05, "loss": 0.2392, "step": 8474500 }, { "epoch": 5.08, "learning_rate": 2.0461984623452175e-05, "loss": 0.2205, "step": 8475000 }, { "epoch": 5.08, "learning_rate": 2.045988885782273e-05, "loss": 0.2285, "step": 8475500 }, { "epoch": 5.08, "learning_rate": 2.045778889226217e-05, "loss": 0.2264, "step": 8476000 }, { "epoch": 5.08, "learning_rate": 2.0455688926701602e-05, "loss": 0.227, "step": 8476500 }, { "epoch": 5.08, "learning_rate": 2.0453588961141035e-05, "loss": 0.2235, "step": 8477000 }, { "epoch": 5.08, "learning_rate": 2.0451493195511592e-05, "loss": 0.231, "step": 8477500 }, { "epoch": 5.08, "learning_rate": 2.044939322995103e-05, "loss": 0.2245, "step": 8478000 }, { "epoch": 5.08, "learning_rate": 2.0447293264390466e-05, "loss": 0.2273, "step": 8478500 }, { "epoch": 5.08, "learning_rate": 2.04451932988299e-05, "loss": 0.2293, "step": 8479000 }, { "epoch": 5.08, "learning_rate": 2.0443097533200453e-05, "loss": 0.2249, "step": 8479500 }, { "epoch": 5.08, "learning_rate": 2.044099756763989e-05, "loss": 0.2331, "step": 8480000 }, { "epoch": 5.08, "learning_rate": 2.0438897602079327e-05, "loss": 0.2324, "step": 8480500 }, { "epoch": 5.08, "learning_rate": 2.043679763651876e-05, "loss": 0.2281, "step": 8481000 }, { "epoch": 5.09, "learning_rate": 2.0434706070820437e-05, "loss": 0.2326, "step": 8481500 }, { "epoch": 5.09, "learning_rate": 2.0432606105259874e-05, "loss": 0.2303, "step": 8482000 }, { "epoch": 5.09, "learning_rate": 2.0430506139699308e-05, "loss": 0.2281, "step": 8482500 }, { "epoch": 5.09, "learning_rate": 2.0428406174138745e-05, "loss": 0.2262, "step": 8483000 }, { "epoch": 5.09, "learning_rate": 2.042630620857818e-05, "loss": 0.231, "step": 8483500 }, { "epoch": 5.09, "learning_rate": 2.0424206243017615e-05, "loss": 0.2321, "step": 8484000 }, { "epoch": 5.09, "learning_rate": 2.0422106277457048e-05, "loss": 0.2311, "step": 8484500 }, { "epoch": 5.09, "learning_rate": 2.0420006311896485e-05, "loss": 0.2269, "step": 8485000 }, { "epoch": 5.09, "learning_rate": 2.0417906346335922e-05, "loss": 0.2299, "step": 8485500 }, { "epoch": 5.09, "learning_rate": 2.0415810580706476e-05, "loss": 0.2263, "step": 8486000 }, { "epoch": 5.09, "learning_rate": 2.041371061514591e-05, "loss": 0.2361, "step": 8486500 }, { "epoch": 5.09, "learning_rate": 2.0411610649585346e-05, "loss": 0.2294, "step": 8487000 }, { "epoch": 5.09, "learning_rate": 2.0409510684024783e-05, "loss": 0.2301, "step": 8487500 }, { "epoch": 5.09, "learning_rate": 2.0407410718464216e-05, "loss": 0.2321, "step": 8488000 }, { "epoch": 5.09, "learning_rate": 2.0405310752903653e-05, "loss": 0.2299, "step": 8488500 }, { "epoch": 5.09, "learning_rate": 2.0403210787343086e-05, "loss": 0.2302, "step": 8489000 }, { "epoch": 5.09, "learning_rate": 2.0401115021713643e-05, "loss": 0.23, "step": 8489500 }, { "epoch": 5.09, "learning_rate": 2.039901505615308e-05, "loss": 0.2287, "step": 8490000 }, { "epoch": 5.09, "learning_rate": 2.0396919290523637e-05, "loss": 0.2232, "step": 8490500 }, { "epoch": 5.09, "learning_rate": 2.039481932496307e-05, "loss": 0.235, "step": 8491000 }, { "epoch": 5.09, "learning_rate": 2.0392719359402504e-05, "loss": 0.2236, "step": 8491500 }, { "epoch": 5.09, "learning_rate": 2.039061939384194e-05, "loss": 0.2298, "step": 8492000 }, { "epoch": 5.09, "learning_rate": 2.0388519428281378e-05, "loss": 0.2315, "step": 8492500 }, { "epoch": 5.09, "learning_rate": 2.038641946272081e-05, "loss": 0.2272, "step": 8493000 }, { "epoch": 5.09, "learning_rate": 2.0384319497160245e-05, "loss": 0.2287, "step": 8493500 }, { "epoch": 5.09, "learning_rate": 2.038221953159968e-05, "loss": 0.2358, "step": 8494000 }, { "epoch": 5.09, "learning_rate": 2.0380119566039115e-05, "loss": 0.2346, "step": 8494500 }, { "epoch": 5.09, "learning_rate": 2.0378019600478552e-05, "loss": 0.2264, "step": 8495000 }, { "epoch": 5.09, "learning_rate": 2.037591963491799e-05, "loss": 0.228, "step": 8495500 }, { "epoch": 5.09, "learning_rate": 2.0373819669357422e-05, "loss": 0.2311, "step": 8496000 }, { "epoch": 5.09, "learning_rate": 2.037172390372798e-05, "loss": 0.2278, "step": 8496500 }, { "epoch": 5.09, "learning_rate": 2.0369623938167413e-05, "loss": 0.2288, "step": 8497000 }, { "epoch": 5.09, "learning_rate": 2.036752397260685e-05, "loss": 0.2262, "step": 8497500 }, { "epoch": 5.09, "learning_rate": 2.0365428206977406e-05, "loss": 0.2306, "step": 8498000 }, { "epoch": 5.1, "learning_rate": 2.036332824141684e-05, "loss": 0.2334, "step": 8498500 }, { "epoch": 5.1, "learning_rate": 2.0361228275856273e-05, "loss": 0.2281, "step": 8499000 }, { "epoch": 5.1, "learning_rate": 2.035912831029571e-05, "loss": 0.2323, "step": 8499500 }, { "epoch": 5.1, "learning_rate": 2.0357028344735147e-05, "loss": 0.2207, "step": 8500000 }, { "epoch": 5.1, "eval_loss": 0.2186298370361328, "eval_runtime": 1453.1972, "eval_samples_per_second": 362.456, "eval_steps_per_second": 60.41, "step": 8500000 }, { "epoch": 5.1, "learning_rate": 2.035492837917458e-05, "loss": 0.2312, "step": 8500500 }, { "epoch": 5.1, "learning_rate": 2.0352828413614014e-05, "loss": 0.2353, "step": 8501000 }, { "epoch": 5.1, "learning_rate": 2.035072844805345e-05, "loss": 0.228, "step": 8501500 }, { "epoch": 5.1, "learning_rate": 2.0348632682424008e-05, "loss": 0.2242, "step": 8502000 }, { "epoch": 5.1, "learning_rate": 2.0346532716863445e-05, "loss": 0.2322, "step": 8502500 }, { "epoch": 5.1, "learning_rate": 2.0344432751302878e-05, "loss": 0.226, "step": 8503000 }, { "epoch": 5.1, "learning_rate": 2.0342336985673435e-05, "loss": 0.2304, "step": 8503500 }, { "epoch": 5.1, "learning_rate": 2.034023702011287e-05, "loss": 0.2329, "step": 8504000 }, { "epoch": 5.1, "learning_rate": 2.0338137054552305e-05, "loss": 0.2303, "step": 8504500 }, { "epoch": 5.1, "learning_rate": 2.0336037088991742e-05, "loss": 0.2309, "step": 8505000 }, { "epoch": 5.1, "learning_rate": 2.0333937123431176e-05, "loss": 0.2286, "step": 8505500 }, { "epoch": 5.1, "learning_rate": 2.033183715787061e-05, "loss": 0.2363, "step": 8506000 }, { "epoch": 5.1, "learning_rate": 2.0329741392241166e-05, "loss": 0.2262, "step": 8506500 }, { "epoch": 5.1, "learning_rate": 2.0327641426680603e-05, "loss": 0.232, "step": 8507000 }, { "epoch": 5.1, "learning_rate": 2.032554146112004e-05, "loss": 0.2258, "step": 8507500 }, { "epoch": 5.1, "learning_rate": 2.0323441495559473e-05, "loss": 0.2265, "step": 8508000 }, { "epoch": 5.1, "learning_rate": 2.0321341529998907e-05, "loss": 0.2356, "step": 8508500 }, { "epoch": 5.1, "learning_rate": 2.0319241564438344e-05, "loss": 0.2269, "step": 8509000 }, { "epoch": 5.1, "learning_rate": 2.03171457988089e-05, "loss": 0.2266, "step": 8509500 }, { "epoch": 5.1, "learning_rate": 2.0315045833248334e-05, "loss": 0.2323, "step": 8510000 }, { "epoch": 5.1, "learning_rate": 2.031294586768777e-05, "loss": 0.2316, "step": 8510500 }, { "epoch": 5.1, "learning_rate": 2.0310845902127204e-05, "loss": 0.226, "step": 8511000 }, { "epoch": 5.1, "learning_rate": 2.030874593656664e-05, "loss": 0.2218, "step": 8511500 }, { "epoch": 5.1, "learning_rate": 2.0306645971006075e-05, "loss": 0.2279, "step": 8512000 }, { "epoch": 5.1, "learning_rate": 2.030454600544551e-05, "loss": 0.2305, "step": 8512500 }, { "epoch": 5.1, "learning_rate": 2.0302450239816065e-05, "loss": 0.2312, "step": 8513000 }, { "epoch": 5.1, "learning_rate": 2.0300350274255502e-05, "loss": 0.2289, "step": 8513500 }, { "epoch": 5.1, "learning_rate": 2.0298250308694935e-05, "loss": 0.228, "step": 8514000 }, { "epoch": 5.1, "learning_rate": 2.0296150343134372e-05, "loss": 0.2302, "step": 8514500 }, { "epoch": 5.11, "learning_rate": 2.029405037757381e-05, "loss": 0.2304, "step": 8515000 }, { "epoch": 5.11, "learning_rate": 2.0291950412013242e-05, "loss": 0.2311, "step": 8515500 }, { "epoch": 5.11, "learning_rate": 2.0289850446452676e-05, "loss": 0.2376, "step": 8516000 }, { "epoch": 5.11, "learning_rate": 2.0287750480892113e-05, "loss": 0.2257, "step": 8516500 }, { "epoch": 5.11, "learning_rate": 2.028565471526267e-05, "loss": 0.2338, "step": 8517000 }, { "epoch": 5.11, "learning_rate": 2.0283554749702107e-05, "loss": 0.2307, "step": 8517500 }, { "epoch": 5.11, "learning_rate": 2.028145478414154e-05, "loss": 0.2235, "step": 8518000 }, { "epoch": 5.11, "learning_rate": 2.0279354818580974e-05, "loss": 0.2274, "step": 8518500 }, { "epoch": 5.11, "learning_rate": 2.027725905295153e-05, "loss": 0.2263, "step": 8519000 }, { "epoch": 5.11, "learning_rate": 2.0275159087390967e-05, "loss": 0.2279, "step": 8519500 }, { "epoch": 5.11, "learning_rate": 2.0273059121830404e-05, "loss": 0.2291, "step": 8520000 }, { "epoch": 5.11, "learning_rate": 2.0270959156269834e-05, "loss": 0.2336, "step": 8520500 }, { "epoch": 5.11, "learning_rate": 2.026886339064039e-05, "loss": 0.2274, "step": 8521000 }, { "epoch": 5.11, "learning_rate": 2.0266763425079828e-05, "loss": 0.2224, "step": 8521500 }, { "epoch": 5.11, "learning_rate": 2.0264663459519265e-05, "loss": 0.2298, "step": 8522000 }, { "epoch": 5.11, "learning_rate": 2.02625634939587e-05, "loss": 0.226, "step": 8522500 }, { "epoch": 5.11, "learning_rate": 2.0260463528398132e-05, "loss": 0.2233, "step": 8523000 }, { "epoch": 5.11, "learning_rate": 2.025836356283757e-05, "loss": 0.2324, "step": 8523500 }, { "epoch": 5.11, "learning_rate": 2.0256263597277006e-05, "loss": 0.225, "step": 8524000 }, { "epoch": 5.11, "learning_rate": 2.025416363171644e-05, "loss": 0.2354, "step": 8524500 }, { "epoch": 5.11, "learning_rate": 2.0252067866086996e-05, "loss": 0.2308, "step": 8525000 }, { "epoch": 5.11, "learning_rate": 2.024996790052643e-05, "loss": 0.23, "step": 8525500 }, { "epoch": 5.11, "learning_rate": 2.0247867934965866e-05, "loss": 0.2294, "step": 8526000 }, { "epoch": 5.11, "learning_rate": 2.0245772169336423e-05, "loss": 0.2304, "step": 8526500 }, { "epoch": 5.11, "learning_rate": 2.024367220377586e-05, "loss": 0.2273, "step": 8527000 }, { "epoch": 5.11, "learning_rate": 2.0241572238215294e-05, "loss": 0.2248, "step": 8527500 }, { "epoch": 5.11, "learning_rate": 2.0239472272654727e-05, "loss": 0.2226, "step": 8528000 }, { "epoch": 5.11, "learning_rate": 2.0237372307094164e-05, "loss": 0.2328, "step": 8528500 }, { "epoch": 5.11, "learning_rate": 2.0235272341533597e-05, "loss": 0.2284, "step": 8529000 }, { "epoch": 5.11, "learning_rate": 2.0233172375973034e-05, "loss": 0.229, "step": 8529500 }, { "epoch": 5.11, "learning_rate": 2.023107241041247e-05, "loss": 0.2329, "step": 8530000 }, { "epoch": 5.11, "learning_rate": 2.0228976644783025e-05, "loss": 0.2325, "step": 8530500 }, { "epoch": 5.11, "learning_rate": 2.022687667922246e-05, "loss": 0.2295, "step": 8531000 }, { "epoch": 5.11, "learning_rate": 2.0224776713661895e-05, "loss": 0.2284, "step": 8531500 }, { "epoch": 5.12, "learning_rate": 2.0222676748101332e-05, "loss": 0.2337, "step": 8532000 }, { "epoch": 5.12, "learning_rate": 2.022057678254077e-05, "loss": 0.2264, "step": 8532500 }, { "epoch": 5.12, "learning_rate": 2.02184768169802e-05, "loss": 0.225, "step": 8533000 }, { "epoch": 5.12, "learning_rate": 2.021638105135076e-05, "loss": 0.2284, "step": 8533500 }, { "epoch": 5.12, "learning_rate": 2.0214281085790192e-05, "loss": 0.2313, "step": 8534000 }, { "epoch": 5.12, "learning_rate": 2.021218112022963e-05, "loss": 0.2322, "step": 8534500 }, { "epoch": 5.12, "learning_rate": 2.0210081154669066e-05, "loss": 0.2306, "step": 8535000 }, { "epoch": 5.12, "learning_rate": 2.0207981189108496e-05, "loss": 0.2305, "step": 8535500 }, { "epoch": 5.12, "learning_rate": 2.0205881223547933e-05, "loss": 0.2282, "step": 8536000 }, { "epoch": 5.12, "learning_rate": 2.020378125798737e-05, "loss": 0.2301, "step": 8536500 }, { "epoch": 5.12, "learning_rate": 2.0201681292426803e-05, "loss": 0.2227, "step": 8537000 }, { "epoch": 5.12, "learning_rate": 2.019958132686624e-05, "loss": 0.2306, "step": 8537500 }, { "epoch": 5.12, "learning_rate": 2.0197485561236794e-05, "loss": 0.2345, "step": 8538000 }, { "epoch": 5.12, "learning_rate": 2.019538559567623e-05, "loss": 0.2252, "step": 8538500 }, { "epoch": 5.12, "learning_rate": 2.0193285630115667e-05, "loss": 0.2339, "step": 8539000 }, { "epoch": 5.12, "learning_rate": 2.0191189864486224e-05, "loss": 0.2325, "step": 8539500 }, { "epoch": 5.12, "learning_rate": 2.0189089898925654e-05, "loss": 0.2304, "step": 8540000 }, { "epoch": 5.12, "learning_rate": 2.018698993336509e-05, "loss": 0.235, "step": 8540500 }, { "epoch": 5.12, "learning_rate": 2.0184889967804528e-05, "loss": 0.2271, "step": 8541000 }, { "epoch": 5.12, "learning_rate": 2.018279000224396e-05, "loss": 0.2317, "step": 8541500 }, { "epoch": 5.12, "learning_rate": 2.01806900366834e-05, "loss": 0.2309, "step": 8542000 }, { "epoch": 5.12, "learning_rate": 2.0178594271053952e-05, "loss": 0.2343, "step": 8542500 }, { "epoch": 5.12, "learning_rate": 2.017649430549339e-05, "loss": 0.2268, "step": 8543000 }, { "epoch": 5.12, "learning_rate": 2.0174394339932826e-05, "loss": 0.2251, "step": 8543500 }, { "epoch": 5.12, "learning_rate": 2.017229437437226e-05, "loss": 0.23, "step": 8544000 }, { "epoch": 5.12, "learning_rate": 2.0170194408811696e-05, "loss": 0.2268, "step": 8544500 }, { "epoch": 5.12, "learning_rate": 2.0168094443251133e-05, "loss": 0.2279, "step": 8545000 }, { "epoch": 5.12, "learning_rate": 2.0165994477690563e-05, "loss": 0.2308, "step": 8545500 }, { "epoch": 5.12, "learning_rate": 2.016389451213e-05, "loss": 0.2283, "step": 8546000 }, { "epoch": 5.12, "learning_rate": 2.0161798746500557e-05, "loss": 0.233, "step": 8546500 }, { "epoch": 5.12, "learning_rate": 2.0159698780939994e-05, "loss": 0.2297, "step": 8547000 }, { "epoch": 5.12, "learning_rate": 2.0157598815379427e-05, "loss": 0.2264, "step": 8547500 }, { "epoch": 5.12, "learning_rate": 2.0155503049749984e-05, "loss": 0.2282, "step": 8548000 }, { "epoch": 5.13, "learning_rate": 2.0153403084189418e-05, "loss": 0.226, "step": 8548500 }, { "epoch": 5.13, "learning_rate": 2.0151303118628854e-05, "loss": 0.2298, "step": 8549000 }, { "epoch": 5.13, "learning_rate": 2.014920315306829e-05, "loss": 0.2294, "step": 8549500 }, { "epoch": 5.13, "learning_rate": 2.0147103187507725e-05, "loss": 0.2317, "step": 8550000 }, { "epoch": 5.13, "learning_rate": 2.0145003221947158e-05, "loss": 0.2311, "step": 8550500 }, { "epoch": 5.13, "learning_rate": 2.0142903256386595e-05, "loss": 0.2289, "step": 8551000 }, { "epoch": 5.13, "learning_rate": 2.0140807490757152e-05, "loss": 0.2314, "step": 8551500 }, { "epoch": 5.13, "learning_rate": 2.013870752519659e-05, "loss": 0.227, "step": 8552000 }, { "epoch": 5.13, "learning_rate": 2.013660755963602e-05, "loss": 0.2305, "step": 8552500 }, { "epoch": 5.13, "learning_rate": 2.0134507594075456e-05, "loss": 0.2227, "step": 8553000 }, { "epoch": 5.13, "learning_rate": 2.0132407628514893e-05, "loss": 0.2246, "step": 8553500 }, { "epoch": 5.13, "learning_rate": 2.013030766295433e-05, "loss": 0.227, "step": 8554000 }, { "epoch": 5.13, "learning_rate": 2.0128207697393763e-05, "loss": 0.234, "step": 8554500 }, { "epoch": 5.13, "learning_rate": 2.0126107731833196e-05, "loss": 0.2266, "step": 8555000 }, { "epoch": 5.13, "learning_rate": 2.0124011966203753e-05, "loss": 0.2317, "step": 8555500 }, { "epoch": 5.13, "learning_rate": 2.012191200064319e-05, "loss": 0.2319, "step": 8556000 }, { "epoch": 5.13, "learning_rate": 2.0119812035082624e-05, "loss": 0.226, "step": 8556500 }, { "epoch": 5.13, "learning_rate": 2.011771206952206e-05, "loss": 0.2281, "step": 8557000 }, { "epoch": 5.13, "learning_rate": 2.0115616303892614e-05, "loss": 0.2316, "step": 8557500 }, { "epoch": 5.13, "learning_rate": 2.011351633833205e-05, "loss": 0.2305, "step": 8558000 }, { "epoch": 5.13, "learning_rate": 2.0111416372771488e-05, "loss": 0.2244, "step": 8558500 }, { "epoch": 5.13, "learning_rate": 2.010931640721092e-05, "loss": 0.2306, "step": 8559000 }, { "epoch": 5.13, "learning_rate": 2.0107220641581475e-05, "loss": 0.2303, "step": 8559500 }, { "epoch": 5.13, "learning_rate": 2.010512067602091e-05, "loss": 0.2247, "step": 8560000 }, { "epoch": 5.13, "learning_rate": 2.010302071046035e-05, "loss": 0.2256, "step": 8560500 }, { "epoch": 5.13, "learning_rate": 2.0100920744899785e-05, "loss": 0.2311, "step": 8561000 }, { "epoch": 5.13, "learning_rate": 2.0098824979270342e-05, "loss": 0.2307, "step": 8561500 }, { "epoch": 5.13, "learning_rate": 2.0096725013709772e-05, "loss": 0.2253, "step": 8562000 }, { "epoch": 5.13, "learning_rate": 2.009462504814921e-05, "loss": 0.2331, "step": 8562500 }, { "epoch": 5.13, "learning_rate": 2.0092525082588646e-05, "loss": 0.2328, "step": 8563000 }, { "epoch": 5.13, "learning_rate": 2.009042511702808e-05, "loss": 0.2296, "step": 8563500 }, { "epoch": 5.13, "learning_rate": 2.0088325151467516e-05, "loss": 0.2302, "step": 8564000 }, { "epoch": 5.13, "learning_rate": 2.008622518590695e-05, "loss": 0.2343, "step": 8564500 }, { "epoch": 5.14, "learning_rate": 2.0084129420277507e-05, "loss": 0.2324, "step": 8565000 }, { "epoch": 5.14, "learning_rate": 2.0082029454716944e-05, "loss": 0.2302, "step": 8565500 }, { "epoch": 5.14, "learning_rate": 2.0079929489156377e-05, "loss": 0.2314, "step": 8566000 }, { "epoch": 5.14, "learning_rate": 2.0077829523595814e-05, "loss": 0.2282, "step": 8566500 }, { "epoch": 5.14, "learning_rate": 2.0075733757966367e-05, "loss": 0.2267, "step": 8567000 }, { "epoch": 5.14, "learning_rate": 2.0073633792405804e-05, "loss": 0.2301, "step": 8567500 }, { "epoch": 5.14, "learning_rate": 2.007153382684524e-05, "loss": 0.227, "step": 8568000 }, { "epoch": 5.14, "learning_rate": 2.0069433861284675e-05, "loss": 0.2305, "step": 8568500 }, { "epoch": 5.14, "learning_rate": 2.006733389572411e-05, "loss": 0.2289, "step": 8569000 }, { "epoch": 5.14, "learning_rate": 2.0065233930163545e-05, "loss": 0.2296, "step": 8569500 }, { "epoch": 5.14, "learning_rate": 2.006313396460298e-05, "loss": 0.2342, "step": 8570000 }, { "epoch": 5.14, "learning_rate": 2.0061033999042415e-05, "loss": 0.2295, "step": 8570500 }, { "epoch": 5.14, "learning_rate": 2.0058938233412972e-05, "loss": 0.2324, "step": 8571000 }, { "epoch": 5.14, "learning_rate": 2.0056842467783526e-05, "loss": 0.2279, "step": 8571500 }, { "epoch": 5.14, "learning_rate": 2.0054742502222963e-05, "loss": 0.2284, "step": 8572000 }, { "epoch": 5.14, "learning_rate": 2.00526425366624e-05, "loss": 0.2294, "step": 8572500 }, { "epoch": 5.14, "learning_rate": 2.0050542571101833e-05, "loss": 0.2314, "step": 8573000 }, { "epoch": 5.14, "learning_rate": 2.004844260554127e-05, "loss": 0.2348, "step": 8573500 }, { "epoch": 5.14, "learning_rate": 2.0046342639980707e-05, "loss": 0.2234, "step": 8574000 }, { "epoch": 5.14, "learning_rate": 2.0044242674420137e-05, "loss": 0.2282, "step": 8574500 }, { "epoch": 5.14, "learning_rate": 2.0042142708859574e-05, "loss": 0.2272, "step": 8575000 }, { "epoch": 5.14, "learning_rate": 2.004004274329901e-05, "loss": 0.2291, "step": 8575500 }, { "epoch": 5.14, "learning_rate": 2.0037946977669567e-05, "loss": 0.2343, "step": 8576000 }, { "epoch": 5.14, "learning_rate": 2.0035847012109e-05, "loss": 0.2344, "step": 8576500 }, { "epoch": 5.14, "learning_rate": 2.0033747046548434e-05, "loss": 0.2295, "step": 8577000 }, { "epoch": 5.14, "learning_rate": 2.003164708098787e-05, "loss": 0.2381, "step": 8577500 }, { "epoch": 5.14, "learning_rate": 2.0029547115427308e-05, "loss": 0.2268, "step": 8578000 }, { "epoch": 5.14, "learning_rate": 2.002744714986674e-05, "loss": 0.2291, "step": 8578500 }, { "epoch": 5.14, "learning_rate": 2.0025347184306178e-05, "loss": 0.2227, "step": 8579000 }, { "epoch": 5.14, "learning_rate": 2.0023251418676732e-05, "loss": 0.2293, "step": 8579500 }, { "epoch": 5.14, "learning_rate": 2.002115145311617e-05, "loss": 0.2319, "step": 8580000 }, { "epoch": 5.14, "learning_rate": 2.0019051487555606e-05, "loss": 0.2274, "step": 8580500 }, { "epoch": 5.14, "learning_rate": 2.001695152199504e-05, "loss": 0.2267, "step": 8581000 }, { "epoch": 5.14, "learning_rate": 2.0014851556434476e-05, "loss": 0.2282, "step": 8581500 }, { "epoch": 5.15, "learning_rate": 2.001275159087391e-05, "loss": 0.236, "step": 8582000 }, { "epoch": 5.15, "learning_rate": 2.0010651625313343e-05, "loss": 0.2302, "step": 8582500 }, { "epoch": 5.15, "learning_rate": 2.000855165975278e-05, "loss": 0.2286, "step": 8583000 }, { "epoch": 5.15, "learning_rate": 2.0006455894123337e-05, "loss": 0.2224, "step": 8583500 }, { "epoch": 5.15, "learning_rate": 2.000435592856277e-05, "loss": 0.2315, "step": 8584000 }, { "epoch": 5.15, "learning_rate": 2.0002255963002207e-05, "loss": 0.2296, "step": 8584500 }, { "epoch": 5.15, "learning_rate": 2.000015599744164e-05, "loss": 0.2297, "step": 8585000 }, { "epoch": 5.15, "learning_rate": 1.9998060231812197e-05, "loss": 0.2279, "step": 8585500 }, { "epoch": 5.15, "learning_rate": 1.9995960266251634e-05, "loss": 0.2351, "step": 8586000 }, { "epoch": 5.15, "learning_rate": 1.9993860300691068e-05, "loss": 0.2288, "step": 8586500 }, { "epoch": 5.15, "learning_rate": 1.9991764535061625e-05, "loss": 0.2285, "step": 8587000 }, { "epoch": 5.15, "learning_rate": 1.998966456950106e-05, "loss": 0.2277, "step": 8587500 }, { "epoch": 5.15, "learning_rate": 1.9987564603940495e-05, "loss": 0.2269, "step": 8588000 }, { "epoch": 5.15, "learning_rate": 1.9985464638379932e-05, "loss": 0.2335, "step": 8588500 }, { "epoch": 5.15, "learning_rate": 1.9983364672819365e-05, "loss": 0.2262, "step": 8589000 }, { "epoch": 5.15, "learning_rate": 1.99812647072588e-05, "loss": 0.2282, "step": 8589500 }, { "epoch": 5.15, "learning_rate": 1.9979164741698235e-05, "loss": 0.2321, "step": 8590000 }, { "epoch": 5.15, "learning_rate": 1.9977064776137672e-05, "loss": 0.2266, "step": 8590500 }, { "epoch": 5.15, "learning_rate": 1.997496901050823e-05, "loss": 0.2307, "step": 8591000 }, { "epoch": 5.15, "learning_rate": 1.9972869044947663e-05, "loss": 0.2312, "step": 8591500 }, { "epoch": 5.15, "learning_rate": 1.9970769079387096e-05, "loss": 0.234, "step": 8592000 }, { "epoch": 5.15, "learning_rate": 1.9968669113826533e-05, "loss": 0.227, "step": 8592500 }, { "epoch": 5.15, "learning_rate": 1.996657334819709e-05, "loss": 0.2295, "step": 8593000 }, { "epoch": 5.15, "learning_rate": 1.9964473382636523e-05, "loss": 0.2335, "step": 8593500 }, { "epoch": 5.15, "learning_rate": 1.996237341707596e-05, "loss": 0.2272, "step": 8594000 }, { "epoch": 5.15, "learning_rate": 1.9960273451515394e-05, "loss": 0.2255, "step": 8594500 }, { "epoch": 5.15, "learning_rate": 1.995817768588595e-05, "loss": 0.2283, "step": 8595000 }, { "epoch": 5.15, "learning_rate": 1.9956077720325388e-05, "loss": 0.2354, "step": 8595500 }, { "epoch": 5.15, "learning_rate": 1.995397775476482e-05, "loss": 0.2278, "step": 8596000 }, { "epoch": 5.15, "learning_rate": 1.9951877789204255e-05, "loss": 0.2263, "step": 8596500 }, { "epoch": 5.15, "learning_rate": 1.9949782023574815e-05, "loss": 0.2291, "step": 8597000 }, { "epoch": 5.15, "learning_rate": 1.994768205801425e-05, "loss": 0.2268, "step": 8597500 }, { "epoch": 5.15, "learning_rate": 1.9945582092453685e-05, "loss": 0.2287, "step": 8598000 }, { "epoch": 5.16, "learning_rate": 1.994348212689312e-05, "loss": 0.2214, "step": 8598500 }, { "epoch": 5.16, "learning_rate": 1.9941386361263676e-05, "loss": 0.2325, "step": 8599000 }, { "epoch": 5.16, "learning_rate": 1.993928639570311e-05, "loss": 0.2341, "step": 8599500 }, { "epoch": 5.16, "learning_rate": 1.9937186430142546e-05, "loss": 0.2224, "step": 8600000 }, { "epoch": 5.16, "eval_loss": 0.21899452805519104, "eval_runtime": 1453.4452, "eval_samples_per_second": 362.394, "eval_steps_per_second": 60.399, "step": 8600000 }, { "epoch": 5.16, "learning_rate": 1.9935086464581983e-05, "loss": 0.2317, "step": 8600500 }, { "epoch": 5.16, "learning_rate": 1.9932990698952536e-05, "loss": 0.2254, "step": 8601000 }, { "epoch": 5.16, "learning_rate": 1.9930890733391973e-05, "loss": 0.2285, "step": 8601500 }, { "epoch": 5.16, "learning_rate": 1.992879496776253e-05, "loss": 0.2268, "step": 8602000 }, { "epoch": 5.16, "learning_rate": 1.9926695002201964e-05, "loss": 0.2278, "step": 8602500 }, { "epoch": 5.16, "learning_rate": 1.9924595036641397e-05, "loss": 0.2356, "step": 8603000 }, { "epoch": 5.16, "learning_rate": 1.9922495071080834e-05, "loss": 0.2293, "step": 8603500 }, { "epoch": 5.16, "learning_rate": 1.992039510552027e-05, "loss": 0.2247, "step": 8604000 }, { "epoch": 5.16, "learning_rate": 1.9918299339890828e-05, "loss": 0.2408, "step": 8604500 }, { "epoch": 5.16, "learning_rate": 1.9916199374330258e-05, "loss": 0.2285, "step": 8605000 }, { "epoch": 5.16, "learning_rate": 1.9914099408769695e-05, "loss": 0.2294, "step": 8605500 }, { "epoch": 5.16, "learning_rate": 1.991199944320913e-05, "loss": 0.2288, "step": 8606000 }, { "epoch": 5.16, "learning_rate": 1.9909899477648565e-05, "loss": 0.2297, "step": 8606500 }, { "epoch": 5.16, "learning_rate": 1.9907799512088002e-05, "loss": 0.2344, "step": 8607000 }, { "epoch": 5.16, "learning_rate": 1.990569954652744e-05, "loss": 0.2303, "step": 8607500 }, { "epoch": 5.16, "learning_rate": 1.9903603780897992e-05, "loss": 0.2315, "step": 8608000 }, { "epoch": 5.16, "learning_rate": 1.990150381533743e-05, "loss": 0.2341, "step": 8608500 }, { "epoch": 5.16, "learning_rate": 1.9899403849776863e-05, "loss": 0.2301, "step": 8609000 }, { "epoch": 5.16, "learning_rate": 1.98973038842163e-05, "loss": 0.2342, "step": 8609500 }, { "epoch": 5.16, "learning_rate": 1.9895203918655736e-05, "loss": 0.229, "step": 8610000 }, { "epoch": 5.16, "learning_rate": 1.9893103953095166e-05, "loss": 0.2282, "step": 8610500 }, { "epoch": 5.16, "learning_rate": 1.9891003987534603e-05, "loss": 0.2312, "step": 8611000 }, { "epoch": 5.16, "learning_rate": 1.988890402197404e-05, "loss": 0.2326, "step": 8611500 }, { "epoch": 5.16, "learning_rate": 1.9886808256344597e-05, "loss": 0.2312, "step": 8612000 }, { "epoch": 5.16, "learning_rate": 1.9884708290784034e-05, "loss": 0.2252, "step": 8612500 }, { "epoch": 5.16, "learning_rate": 1.9882608325223464e-05, "loss": 0.2223, "step": 8613000 }, { "epoch": 5.16, "learning_rate": 1.98805083596629e-05, "loss": 0.2266, "step": 8613500 }, { "epoch": 5.16, "learning_rate": 1.9878408394102338e-05, "loss": 0.2287, "step": 8614000 }, { "epoch": 5.16, "learning_rate": 1.987630842854177e-05, "loss": 0.2313, "step": 8614500 }, { "epoch": 5.17, "learning_rate": 1.9874208462981208e-05, "loss": 0.2294, "step": 8615000 }, { "epoch": 5.17, "learning_rate": 1.987210849742064e-05, "loss": 0.2292, "step": 8615500 }, { "epoch": 5.17, "learning_rate": 1.9870012731791198e-05, "loss": 0.233, "step": 8616000 }, { "epoch": 5.17, "learning_rate": 1.9867916966161755e-05, "loss": 0.2332, "step": 8616500 }, { "epoch": 5.17, "learning_rate": 1.9865817000601192e-05, "loss": 0.2311, "step": 8617000 }, { "epoch": 5.17, "learning_rate": 1.9863717035040622e-05, "loss": 0.2319, "step": 8617500 }, { "epoch": 5.17, "learning_rate": 1.986161706948006e-05, "loss": 0.2314, "step": 8618000 }, { "epoch": 5.17, "learning_rate": 1.9859517103919496e-05, "loss": 0.2297, "step": 8618500 }, { "epoch": 5.17, "learning_rate": 1.9857417138358933e-05, "loss": 0.2298, "step": 8619000 }, { "epoch": 5.17, "learning_rate": 1.9855317172798366e-05, "loss": 0.2262, "step": 8619500 }, { "epoch": 5.17, "learning_rate": 1.9853217207237803e-05, "loss": 0.2299, "step": 8620000 }, { "epoch": 5.17, "learning_rate": 1.9851125641539477e-05, "loss": 0.2316, "step": 8620500 }, { "epoch": 5.17, "learning_rate": 1.9849025675978914e-05, "loss": 0.2318, "step": 8621000 }, { "epoch": 5.17, "learning_rate": 1.984692571041835e-05, "loss": 0.2331, "step": 8621500 }, { "epoch": 5.17, "learning_rate": 1.9844825744857784e-05, "loss": 0.2312, "step": 8622000 }, { "epoch": 5.17, "learning_rate": 1.9842725779297217e-05, "loss": 0.2312, "step": 8622500 }, { "epoch": 5.17, "learning_rate": 1.9840625813736654e-05, "loss": 0.2267, "step": 8623000 }, { "epoch": 5.17, "learning_rate": 1.983852584817609e-05, "loss": 0.2314, "step": 8623500 }, { "epoch": 5.17, "learning_rate": 1.9836425882615524e-05, "loss": 0.2295, "step": 8624000 }, { "epoch": 5.17, "learning_rate": 1.9834330116986078e-05, "loss": 0.2258, "step": 8624500 }, { "epoch": 5.17, "learning_rate": 1.9832230151425515e-05, "loss": 0.227, "step": 8625000 }, { "epoch": 5.17, "learning_rate": 1.9830130185864952e-05, "loss": 0.2229, "step": 8625500 }, { "epoch": 5.17, "learning_rate": 1.982803442023551e-05, "loss": 0.2289, "step": 8626000 }, { "epoch": 5.17, "learning_rate": 1.9825934454674946e-05, "loss": 0.2324, "step": 8626500 }, { "epoch": 5.17, "learning_rate": 1.9823834489114376e-05, "loss": 0.2254, "step": 8627000 }, { "epoch": 5.17, "learning_rate": 1.9821734523553812e-05, "loss": 0.2326, "step": 8627500 }, { "epoch": 5.17, "learning_rate": 1.981963455799325e-05, "loss": 0.2323, "step": 8628000 }, { "epoch": 5.17, "learning_rate": 1.9817534592432683e-05, "loss": 0.2257, "step": 8628500 }, { "epoch": 5.17, "learning_rate": 1.9815438826803243e-05, "loss": 0.2321, "step": 8629000 }, { "epoch": 5.17, "learning_rate": 1.9813338861242673e-05, "loss": 0.2271, "step": 8629500 }, { "epoch": 5.17, "learning_rate": 1.981123889568211e-05, "loss": 0.2301, "step": 8630000 }, { "epoch": 5.17, "learning_rate": 1.9809138930121547e-05, "loss": 0.2401, "step": 8630500 }, { "epoch": 5.17, "learning_rate": 1.980703896456098e-05, "loss": 0.2256, "step": 8631000 }, { "epoch": 5.17, "learning_rate": 1.9804938999000417e-05, "loss": 0.2307, "step": 8631500 }, { "epoch": 5.18, "learning_rate": 1.980283903343985e-05, "loss": 0.2301, "step": 8632000 }, { "epoch": 5.18, "learning_rate": 1.9800739067879284e-05, "loss": 0.2224, "step": 8632500 }, { "epoch": 5.18, "learning_rate": 1.9798647502180965e-05, "loss": 0.2301, "step": 8633000 }, { "epoch": 5.18, "learning_rate": 1.97965475366204e-05, "loss": 0.2344, "step": 8633500 }, { "epoch": 5.18, "learning_rate": 1.979444757105983e-05, "loss": 0.2371, "step": 8634000 }, { "epoch": 5.18, "learning_rate": 1.979234760549927e-05, "loss": 0.2339, "step": 8634500 }, { "epoch": 5.18, "learning_rate": 1.9790247639938705e-05, "loss": 0.2343, "step": 8635000 }, { "epoch": 5.18, "learning_rate": 1.978814767437814e-05, "loss": 0.2305, "step": 8635500 }, { "epoch": 5.18, "learning_rate": 1.9786047708817575e-05, "loss": 0.2293, "step": 8636000 }, { "epoch": 5.18, "learning_rate": 1.9783947743257012e-05, "loss": 0.2307, "step": 8636500 }, { "epoch": 5.18, "learning_rate": 1.9781851977627566e-05, "loss": 0.2339, "step": 8637000 }, { "epoch": 5.18, "learning_rate": 1.9779752012067003e-05, "loss": 0.2294, "step": 8637500 }, { "epoch": 5.18, "learning_rate": 1.9777652046506436e-05, "loss": 0.2284, "step": 8638000 }, { "epoch": 5.18, "learning_rate": 1.9775556280876993e-05, "loss": 0.2381, "step": 8638500 }, { "epoch": 5.18, "learning_rate": 1.9773456315316427e-05, "loss": 0.2284, "step": 8639000 }, { "epoch": 5.18, "learning_rate": 1.9771356349755863e-05, "loss": 0.2336, "step": 8639500 }, { "epoch": 5.18, "learning_rate": 1.97692563841953e-05, "loss": 0.2312, "step": 8640000 }, { "epoch": 5.18, "learning_rate": 1.9767160618565857e-05, "loss": 0.2297, "step": 8640500 }, { "epoch": 5.18, "learning_rate": 1.976506065300529e-05, "loss": 0.229, "step": 8641000 }, { "epoch": 5.18, "learning_rate": 1.9762960687444724e-05, "loss": 0.2297, "step": 8641500 }, { "epoch": 5.18, "learning_rate": 1.976086072188416e-05, "loss": 0.2311, "step": 8642000 }, { "epoch": 5.18, "learning_rate": 1.9758760756323595e-05, "loss": 0.2311, "step": 8642500 }, { "epoch": 5.18, "learning_rate": 1.975666079076303e-05, "loss": 0.2265, "step": 8643000 }, { "epoch": 5.18, "learning_rate": 1.9754560825202468e-05, "loss": 0.2273, "step": 8643500 }, { "epoch": 5.18, "learning_rate": 1.97524608596419e-05, "loss": 0.2263, "step": 8644000 }, { "epoch": 5.18, "learning_rate": 1.975036509401246e-05, "loss": 0.2279, "step": 8644500 }, { "epoch": 5.18, "learning_rate": 1.9748265128451892e-05, "loss": 0.2301, "step": 8645000 }, { "epoch": 5.18, "learning_rate": 1.974616516289133e-05, "loss": 0.2276, "step": 8645500 }, { "epoch": 5.18, "learning_rate": 1.9744065197330766e-05, "loss": 0.2245, "step": 8646000 }, { "epoch": 5.18, "learning_rate": 1.974197363163244e-05, "loss": 0.2334, "step": 8646500 }, { "epoch": 5.18, "learning_rate": 1.9739873666071876e-05, "loss": 0.2307, "step": 8647000 }, { "epoch": 5.18, "learning_rate": 1.9737773700511313e-05, "loss": 0.2302, "step": 8647500 }, { "epoch": 5.18, "learning_rate": 1.9735673734950747e-05, "loss": 0.2241, "step": 8648000 }, { "epoch": 5.19, "learning_rate": 1.973357376939018e-05, "loss": 0.2249, "step": 8648500 }, { "epoch": 5.19, "learning_rate": 1.9731473803829617e-05, "loss": 0.2298, "step": 8649000 }, { "epoch": 5.19, "learning_rate": 1.972937383826905e-05, "loss": 0.2356, "step": 8649500 }, { "epoch": 5.19, "learning_rate": 1.9727273872708487e-05, "loss": 0.232, "step": 8650000 }, { "epoch": 5.19, "learning_rate": 1.9725178107079044e-05, "loss": 0.2337, "step": 8650500 }, { "epoch": 5.19, "learning_rate": 1.9723078141518478e-05, "loss": 0.2241, "step": 8651000 }, { "epoch": 5.19, "learning_rate": 1.9720978175957915e-05, "loss": 0.2322, "step": 8651500 }, { "epoch": 5.19, "learning_rate": 1.9718878210397348e-05, "loss": 0.2322, "step": 8652000 }, { "epoch": 5.19, "learning_rate": 1.9716782444767905e-05, "loss": 0.2321, "step": 8652500 }, { "epoch": 5.19, "learning_rate": 1.9714682479207342e-05, "loss": 0.2296, "step": 8653000 }, { "epoch": 5.19, "learning_rate": 1.9712586713577895e-05, "loss": 0.2333, "step": 8653500 }, { "epoch": 5.19, "learning_rate": 1.9710486748017332e-05, "loss": 0.2267, "step": 8654000 }, { "epoch": 5.19, "learning_rate": 1.970838678245677e-05, "loss": 0.2263, "step": 8654500 }, { "epoch": 5.19, "learning_rate": 1.9706286816896203e-05, "loss": 0.2316, "step": 8655000 }, { "epoch": 5.19, "learning_rate": 1.9704186851335636e-05, "loss": 0.232, "step": 8655500 }, { "epoch": 5.19, "learning_rate": 1.9702086885775073e-05, "loss": 0.2293, "step": 8656000 }, { "epoch": 5.19, "learning_rate": 1.9699986920214506e-05, "loss": 0.2266, "step": 8656500 }, { "epoch": 5.19, "learning_rate": 1.9697886954653943e-05, "loss": 0.2265, "step": 8657000 }, { "epoch": 5.19, "learning_rate": 1.96957911890245e-05, "loss": 0.2334, "step": 8657500 }, { "epoch": 5.19, "learning_rate": 1.9693691223463934e-05, "loss": 0.2262, "step": 8658000 }, { "epoch": 5.19, "learning_rate": 1.969159125790337e-05, "loss": 0.2273, "step": 8658500 }, { "epoch": 5.19, "learning_rate": 1.9689491292342804e-05, "loss": 0.229, "step": 8659000 }, { "epoch": 5.19, "learning_rate": 1.968739552671336e-05, "loss": 0.2282, "step": 8659500 }, { "epoch": 5.19, "learning_rate": 1.9685295561152798e-05, "loss": 0.2307, "step": 8660000 }, { "epoch": 5.19, "learning_rate": 1.968319559559223e-05, "loss": 0.2285, "step": 8660500 }, { "epoch": 5.19, "learning_rate": 1.9681095630031668e-05, "loss": 0.2279, "step": 8661000 }, { "epoch": 5.19, "learning_rate": 1.9678999864402225e-05, "loss": 0.2292, "step": 8661500 }, { "epoch": 5.19, "learning_rate": 1.967689989884166e-05, "loss": 0.2287, "step": 8662000 }, { "epoch": 5.19, "learning_rate": 1.9674799933281095e-05, "loss": 0.224, "step": 8662500 }, { "epoch": 5.19, "learning_rate": 1.967269996772053e-05, "loss": 0.2334, "step": 8663000 }, { "epoch": 5.19, "learning_rate": 1.9670604202091086e-05, "loss": 0.2304, "step": 8663500 }, { "epoch": 5.19, "learning_rate": 1.9668508436461643e-05, "loss": 0.2327, "step": 8664000 }, { "epoch": 5.19, "learning_rate": 1.966640847090108e-05, "loss": 0.2324, "step": 8664500 }, { "epoch": 5.2, "learning_rate": 1.966430850534051e-05, "loss": 0.2272, "step": 8665000 }, { "epoch": 5.2, "learning_rate": 1.9662208539779946e-05, "loss": 0.2245, "step": 8665500 }, { "epoch": 5.2, "learning_rate": 1.9660108574219383e-05, "loss": 0.2269, "step": 8666000 }, { "epoch": 5.2, "learning_rate": 1.9658008608658817e-05, "loss": 0.23, "step": 8666500 }, { "epoch": 5.2, "learning_rate": 1.9655908643098254e-05, "loss": 0.2312, "step": 8667000 }, { "epoch": 5.2, "learning_rate": 1.9653808677537687e-05, "loss": 0.237, "step": 8667500 }, { "epoch": 5.2, "learning_rate": 1.9651712911908244e-05, "loss": 0.2307, "step": 8668000 }, { "epoch": 5.2, "learning_rate": 1.96496171462788e-05, "loss": 0.2293, "step": 8668500 }, { "epoch": 5.2, "learning_rate": 1.9647517180718238e-05, "loss": 0.2301, "step": 8669000 }, { "epoch": 5.2, "learning_rate": 1.964541721515767e-05, "loss": 0.2299, "step": 8669500 }, { "epoch": 5.2, "learning_rate": 1.9643317249597105e-05, "loss": 0.2325, "step": 8670000 }, { "epoch": 5.2, "learning_rate": 1.964121728403654e-05, "loss": 0.233, "step": 8670500 }, { "epoch": 5.2, "learning_rate": 1.963911731847598e-05, "loss": 0.2306, "step": 8671000 }, { "epoch": 5.2, "learning_rate": 1.9637021552846535e-05, "loss": 0.2297, "step": 8671500 }, { "epoch": 5.2, "learning_rate": 1.9634921587285965e-05, "loss": 0.2244, "step": 8672000 }, { "epoch": 5.2, "learning_rate": 1.9632821621725402e-05, "loss": 0.2332, "step": 8672500 }, { "epoch": 5.2, "learning_rate": 1.963072165616484e-05, "loss": 0.2253, "step": 8673000 }, { "epoch": 5.2, "learning_rate": 1.9628625890535396e-05, "loss": 0.2318, "step": 8673500 }, { "epoch": 5.2, "learning_rate": 1.9626525924974833e-05, "loss": 0.228, "step": 8674000 }, { "epoch": 5.2, "learning_rate": 1.9624425959414263e-05, "loss": 0.2289, "step": 8674500 }, { "epoch": 5.2, "learning_rate": 1.96223259938537e-05, "loss": 0.229, "step": 8675000 }, { "epoch": 5.2, "learning_rate": 1.9620226028293137e-05, "loss": 0.228, "step": 8675500 }, { "epoch": 5.2, "learning_rate": 1.961812606273257e-05, "loss": 0.2307, "step": 8676000 }, { "epoch": 5.2, "learning_rate": 1.9616026097172007e-05, "loss": 0.2299, "step": 8676500 }, { "epoch": 5.2, "learning_rate": 1.961392613161144e-05, "loss": 0.2278, "step": 8677000 }, { "epoch": 5.2, "learning_rate": 1.9611826166050877e-05, "loss": 0.2266, "step": 8677500 }, { "epoch": 5.2, "learning_rate": 1.9609734600352554e-05, "loss": 0.2317, "step": 8678000 }, { "epoch": 5.2, "learning_rate": 1.960763463479199e-05, "loss": 0.2275, "step": 8678500 }, { "epoch": 5.2, "learning_rate": 1.960553466923142e-05, "loss": 0.2276, "step": 8679000 }, { "epoch": 5.2, "learning_rate": 1.9603434703670858e-05, "loss": 0.228, "step": 8679500 }, { "epoch": 5.2, "learning_rate": 1.9601334738110295e-05, "loss": 0.2296, "step": 8680000 }, { "epoch": 5.2, "learning_rate": 1.9599234772549732e-05, "loss": 0.2306, "step": 8680500 }, { "epoch": 5.2, "learning_rate": 1.959713900692029e-05, "loss": 0.2259, "step": 8681000 }, { "epoch": 5.2, "learning_rate": 1.959503904135972e-05, "loss": 0.2288, "step": 8681500 }, { "epoch": 5.21, "learning_rate": 1.9592939075799156e-05, "loss": 0.2266, "step": 8682000 }, { "epoch": 5.21, "learning_rate": 1.9590839110238593e-05, "loss": 0.2287, "step": 8682500 }, { "epoch": 5.21, "learning_rate": 1.9588739144678026e-05, "loss": 0.2272, "step": 8683000 }, { "epoch": 5.21, "learning_rate": 1.9586639179117463e-05, "loss": 0.231, "step": 8683500 }, { "epoch": 5.21, "learning_rate": 1.95845392135569e-05, "loss": 0.2308, "step": 8684000 }, { "epoch": 5.21, "learning_rate": 1.9582439247996333e-05, "loss": 0.2285, "step": 8684500 }, { "epoch": 5.21, "learning_rate": 1.958034348236689e-05, "loss": 0.2256, "step": 8685000 }, { "epoch": 5.21, "learning_rate": 1.9578243516806324e-05, "loss": 0.2308, "step": 8685500 }, { "epoch": 5.21, "learning_rate": 1.957614355124576e-05, "loss": 0.2361, "step": 8686000 }, { "epoch": 5.21, "learning_rate": 1.9574043585685194e-05, "loss": 0.225, "step": 8686500 }, { "epoch": 5.21, "learning_rate": 1.957195201998687e-05, "loss": 0.2308, "step": 8687000 }, { "epoch": 5.21, "learning_rate": 1.9569852054426308e-05, "loss": 0.2333, "step": 8687500 }, { "epoch": 5.21, "learning_rate": 1.9567752088865745e-05, "loss": 0.2296, "step": 8688000 }, { "epoch": 5.21, "learning_rate": 1.9565652123305175e-05, "loss": 0.2241, "step": 8688500 }, { "epoch": 5.21, "learning_rate": 1.956355215774461e-05, "loss": 0.2289, "step": 8689000 }, { "epoch": 5.21, "learning_rate": 1.956145219218405e-05, "loss": 0.2265, "step": 8689500 }, { "epoch": 5.21, "learning_rate": 1.9559356426554605e-05, "loss": 0.2254, "step": 8690000 }, { "epoch": 5.21, "learning_rate": 1.9557256460994042e-05, "loss": 0.2224, "step": 8690500 }, { "epoch": 5.21, "learning_rate": 1.9555156495433472e-05, "loss": 0.2261, "step": 8691000 }, { "epoch": 5.21, "learning_rate": 1.955305652987291e-05, "loss": 0.2328, "step": 8691500 }, { "epoch": 5.21, "learning_rate": 1.9550956564312346e-05, "loss": 0.224, "step": 8692000 }, { "epoch": 5.21, "learning_rate": 1.954885659875178e-05, "loss": 0.2274, "step": 8692500 }, { "epoch": 5.21, "learning_rate": 1.9546756633191216e-05, "loss": 0.2241, "step": 8693000 }, { "epoch": 5.21, "learning_rate": 1.9544656667630653e-05, "loss": 0.2358, "step": 8693500 }, { "epoch": 5.21, "learning_rate": 1.9542560902001207e-05, "loss": 0.2265, "step": 8694000 }, { "epoch": 5.21, "learning_rate": 1.9540460936440644e-05, "loss": 0.2299, "step": 8694500 }, { "epoch": 5.21, "learning_rate": 1.9538360970880077e-05, "loss": 0.2299, "step": 8695000 }, { "epoch": 5.21, "learning_rate": 1.9536261005319514e-05, "loss": 0.2327, "step": 8695500 }, { "epoch": 5.21, "learning_rate": 1.9534165239690067e-05, "loss": 0.2424, "step": 8696000 }, { "epoch": 5.21, "learning_rate": 1.9532065274129504e-05, "loss": 0.2286, "step": 8696500 }, { "epoch": 5.21, "learning_rate": 1.9529965308568938e-05, "loss": 0.2217, "step": 8697000 }, { "epoch": 5.21, "learning_rate": 1.9527869542939498e-05, "loss": 0.234, "step": 8697500 }, { "epoch": 5.21, "learning_rate": 1.9525769577378928e-05, "loss": 0.2347, "step": 8698000 }, { "epoch": 5.22, "learning_rate": 1.9523669611818365e-05, "loss": 0.2279, "step": 8698500 }, { "epoch": 5.22, "learning_rate": 1.9521569646257802e-05, "loss": 0.2315, "step": 8699000 }, { "epoch": 5.22, "learning_rate": 1.9519469680697235e-05, "loss": 0.2278, "step": 8699500 }, { "epoch": 5.22, "learning_rate": 1.9517369715136672e-05, "loss": 0.226, "step": 8700000 }, { "epoch": 5.22, "eval_loss": 0.21748991310596466, "eval_runtime": 1458.2187, "eval_samples_per_second": 361.208, "eval_steps_per_second": 60.202, "step": 8700000 }, { "epoch": 5.22, "learning_rate": 1.951526974957611e-05, "loss": 0.2245, "step": 8700500 }, { "epoch": 5.22, "learning_rate": 1.951316978401554e-05, "loss": 0.2374, "step": 8701000 }, { "epoch": 5.22, "learning_rate": 1.95110740183861e-05, "loss": 0.2302, "step": 8701500 }, { "epoch": 5.22, "learning_rate": 1.9508974052825533e-05, "loss": 0.2266, "step": 8702000 }, { "epoch": 5.22, "learning_rate": 1.950687408726497e-05, "loss": 0.2254, "step": 8702500 }, { "epoch": 5.22, "learning_rate": 1.9504774121704407e-05, "loss": 0.2284, "step": 8703000 }, { "epoch": 5.22, "learning_rate": 1.950267835607496e-05, "loss": 0.2323, "step": 8703500 }, { "epoch": 5.22, "learning_rate": 1.9500582590445517e-05, "loss": 0.2277, "step": 8704000 }, { "epoch": 5.22, "learning_rate": 1.9498482624884954e-05, "loss": 0.2307, "step": 8704500 }, { "epoch": 5.22, "learning_rate": 1.9496382659324387e-05, "loss": 0.2312, "step": 8705000 }, { "epoch": 5.22, "learning_rate": 1.949428269376382e-05, "loss": 0.2297, "step": 8705500 }, { "epoch": 5.22, "learning_rate": 1.9492182728203258e-05, "loss": 0.2274, "step": 8706000 }, { "epoch": 5.22, "learning_rate": 1.949008276264269e-05, "loss": 0.2261, "step": 8706500 }, { "epoch": 5.22, "learning_rate": 1.9487982797082128e-05, "loss": 0.2272, "step": 8707000 }, { "epoch": 5.22, "learning_rate": 1.9485882831521565e-05, "loss": 0.226, "step": 8707500 }, { "epoch": 5.22, "learning_rate": 1.948378706589212e-05, "loss": 0.2268, "step": 8708000 }, { "epoch": 5.22, "learning_rate": 1.9481687100331555e-05, "loss": 0.2312, "step": 8708500 }, { "epoch": 5.22, "learning_rate": 1.947958713477099e-05, "loss": 0.2267, "step": 8709000 }, { "epoch": 5.22, "learning_rate": 1.9477487169210426e-05, "loss": 0.2308, "step": 8709500 }, { "epoch": 5.22, "learning_rate": 1.947539140358098e-05, "loss": 0.2278, "step": 8710000 }, { "epoch": 5.22, "learning_rate": 1.9473291438020416e-05, "loss": 0.226, "step": 8710500 }, { "epoch": 5.22, "learning_rate": 1.947119147245985e-05, "loss": 0.2277, "step": 8711000 }, { "epoch": 5.22, "learning_rate": 1.946909570683041e-05, "loss": 0.2287, "step": 8711500 }, { "epoch": 5.22, "learning_rate": 1.9466995741269843e-05, "loss": 0.2343, "step": 8712000 }, { "epoch": 5.22, "learning_rate": 1.9464895775709277e-05, "loss": 0.2261, "step": 8712500 }, { "epoch": 5.22, "learning_rate": 1.9462795810148714e-05, "loss": 0.2289, "step": 8713000 }, { "epoch": 5.22, "learning_rate": 1.9460695844588147e-05, "loss": 0.2323, "step": 8713500 }, { "epoch": 5.22, "learning_rate": 1.9458600078958704e-05, "loss": 0.2289, "step": 8714000 }, { "epoch": 5.22, "learning_rate": 1.945650011339814e-05, "loss": 0.2275, "step": 8714500 }, { "epoch": 5.22, "learning_rate": 1.9454400147837574e-05, "loss": 0.2289, "step": 8715000 }, { "epoch": 5.23, "learning_rate": 1.945230018227701e-05, "loss": 0.2329, "step": 8715500 }, { "epoch": 5.23, "learning_rate": 1.9450200216716445e-05, "loss": 0.2308, "step": 8716000 }, { "epoch": 5.23, "learning_rate": 1.944810025115588e-05, "loss": 0.2274, "step": 8716500 }, { "epoch": 5.23, "learning_rate": 1.944600448552644e-05, "loss": 0.2342, "step": 8717000 }, { "epoch": 5.23, "learning_rate": 1.9443904519965872e-05, "loss": 0.2319, "step": 8717500 }, { "epoch": 5.23, "learning_rate": 1.9441804554405305e-05, "loss": 0.2237, "step": 8718000 }, { "epoch": 5.23, "learning_rate": 1.9439704588844742e-05, "loss": 0.2266, "step": 8718500 }, { "epoch": 5.23, "learning_rate": 1.943760462328418e-05, "loss": 0.2339, "step": 8719000 }, { "epoch": 5.23, "learning_rate": 1.9435508857654733e-05, "loss": 0.2289, "step": 8719500 }, { "epoch": 5.23, "learning_rate": 1.943340889209417e-05, "loss": 0.2294, "step": 8720000 }, { "epoch": 5.23, "learning_rate": 1.9431308926533603e-05, "loss": 0.2303, "step": 8720500 }, { "epoch": 5.23, "learning_rate": 1.942920896097304e-05, "loss": 0.2314, "step": 8721000 }, { "epoch": 5.23, "learning_rate": 1.9427108995412477e-05, "loss": 0.2297, "step": 8721500 }, { "epoch": 5.23, "learning_rate": 1.942501322978303e-05, "loss": 0.2262, "step": 8722000 }, { "epoch": 5.23, "learning_rate": 1.9422913264222467e-05, "loss": 0.2298, "step": 8722500 }, { "epoch": 5.23, "learning_rate": 1.94208132986619e-05, "loss": 0.2345, "step": 8723000 }, { "epoch": 5.23, "learning_rate": 1.9418713333101337e-05, "loss": 0.2256, "step": 8723500 }, { "epoch": 5.23, "learning_rate": 1.9416613367540774e-05, "loss": 0.227, "step": 8724000 }, { "epoch": 5.23, "learning_rate": 1.9414513401980208e-05, "loss": 0.2268, "step": 8724500 }, { "epoch": 5.23, "learning_rate": 1.941241343641964e-05, "loss": 0.2342, "step": 8725000 }, { "epoch": 5.23, "learning_rate": 1.9410313470859078e-05, "loss": 0.2326, "step": 8725500 }, { "epoch": 5.23, "learning_rate": 1.9408217705229635e-05, "loss": 0.2301, "step": 8726000 }, { "epoch": 5.23, "learning_rate": 1.9406117739669072e-05, "loss": 0.2378, "step": 8726500 }, { "epoch": 5.23, "learning_rate": 1.9404021974039625e-05, "loss": 0.2284, "step": 8727000 }, { "epoch": 5.23, "learning_rate": 1.940192200847906e-05, "loss": 0.2271, "step": 8727500 }, { "epoch": 5.23, "learning_rate": 1.9399822042918496e-05, "loss": 0.2299, "step": 8728000 }, { "epoch": 5.23, "learning_rate": 1.9397722077357933e-05, "loss": 0.2242, "step": 8728500 }, { "epoch": 5.23, "learning_rate": 1.9395622111797366e-05, "loss": 0.2294, "step": 8729000 }, { "epoch": 5.23, "learning_rate": 1.93935221462368e-05, "loss": 0.2274, "step": 8729500 }, { "epoch": 5.23, "learning_rate": 1.9391422180676236e-05, "loss": 0.2301, "step": 8730000 }, { "epoch": 5.23, "learning_rate": 1.9389322215115673e-05, "loss": 0.2236, "step": 8730500 }, { "epoch": 5.23, "learning_rate": 1.938723064941735e-05, "loss": 0.2313, "step": 8731000 }, { "epoch": 5.23, "learning_rate": 1.9385130683856784e-05, "loss": 0.2272, "step": 8731500 }, { "epoch": 5.24, "learning_rate": 1.9383030718296217e-05, "loss": 0.2273, "step": 8732000 }, { "epoch": 5.24, "learning_rate": 1.9380930752735654e-05, "loss": 0.2366, "step": 8732500 }, { "epoch": 5.24, "learning_rate": 1.937883498710621e-05, "loss": 0.226, "step": 8733000 }, { "epoch": 5.24, "learning_rate": 1.9376735021545648e-05, "loss": 0.2295, "step": 8733500 }, { "epoch": 5.24, "learning_rate": 1.937463505598508e-05, "loss": 0.2278, "step": 8734000 }, { "epoch": 5.24, "learning_rate": 1.9372535090424515e-05, "loss": 0.2278, "step": 8734500 }, { "epoch": 5.24, "learning_rate": 1.937043512486395e-05, "loss": 0.2326, "step": 8735000 }, { "epoch": 5.24, "learning_rate": 1.936833515930339e-05, "loss": 0.2319, "step": 8735500 }, { "epoch": 5.24, "learning_rate": 1.9366239393673945e-05, "loss": 0.2328, "step": 8736000 }, { "epoch": 5.24, "learning_rate": 1.936413942811338e-05, "loss": 0.2261, "step": 8736500 }, { "epoch": 5.24, "learning_rate": 1.9362039462552812e-05, "loss": 0.2301, "step": 8737000 }, { "epoch": 5.24, "learning_rate": 1.935993949699225e-05, "loss": 0.2313, "step": 8737500 }, { "epoch": 5.24, "learning_rate": 1.9357839531431686e-05, "loss": 0.227, "step": 8738000 }, { "epoch": 5.24, "learning_rate": 1.935573956587112e-05, "loss": 0.2295, "step": 8738500 }, { "epoch": 5.24, "learning_rate": 1.9353643800241676e-05, "loss": 0.2315, "step": 8739000 }, { "epoch": 5.24, "learning_rate": 1.935154383468111e-05, "loss": 0.228, "step": 8739500 }, { "epoch": 5.24, "learning_rate": 1.9349443869120547e-05, "loss": 0.2251, "step": 8740000 }, { "epoch": 5.24, "learning_rate": 1.9347343903559984e-05, "loss": 0.2229, "step": 8740500 }, { "epoch": 5.24, "learning_rate": 1.9345243937999417e-05, "loss": 0.2309, "step": 8741000 }, { "epoch": 5.24, "learning_rate": 1.934314817236997e-05, "loss": 0.2295, "step": 8741500 }, { "epoch": 5.24, "learning_rate": 1.9341048206809407e-05, "loss": 0.2319, "step": 8742000 }, { "epoch": 5.24, "learning_rate": 1.9338948241248844e-05, "loss": 0.2305, "step": 8742500 }, { "epoch": 5.24, "learning_rate": 1.9336848275688278e-05, "loss": 0.2242, "step": 8743000 }, { "epoch": 5.24, "learning_rate": 1.9334748310127715e-05, "loss": 0.2258, "step": 8743500 }, { "epoch": 5.24, "learning_rate": 1.9332652544498268e-05, "loss": 0.2326, "step": 8744000 }, { "epoch": 5.24, "learning_rate": 1.9330552578937705e-05, "loss": 0.2345, "step": 8744500 }, { "epoch": 5.24, "learning_rate": 1.9328452613377142e-05, "loss": 0.2253, "step": 8745000 }, { "epoch": 5.24, "learning_rate": 1.9326352647816575e-05, "loss": 0.2292, "step": 8745500 }, { "epoch": 5.24, "learning_rate": 1.932425268225601e-05, "loss": 0.2306, "step": 8746000 }, { "epoch": 5.24, "learning_rate": 1.9322152716695446e-05, "loss": 0.2297, "step": 8746500 }, { "epoch": 5.24, "learning_rate": 1.932005275113488e-05, "loss": 0.2239, "step": 8747000 }, { "epoch": 5.24, "learning_rate": 1.9317952785574316e-05, "loss": 0.2307, "step": 8747500 }, { "epoch": 5.24, "learning_rate": 1.9315857019944873e-05, "loss": 0.224, "step": 8748000 }, { "epoch": 5.25, "learning_rate": 1.9313757054384306e-05, "loss": 0.232, "step": 8748500 }, { "epoch": 5.25, "learning_rate": 1.9311657088823743e-05, "loss": 0.2252, "step": 8749000 }, { "epoch": 5.25, "learning_rate": 1.93095613231943e-05, "loss": 0.2357, "step": 8749500 }, { "epoch": 5.25, "learning_rate": 1.9307461357633734e-05, "loss": 0.2261, "step": 8750000 }, { "epoch": 5.25, "learning_rate": 1.930536559200429e-05, "loss": 0.2375, "step": 8750500 }, { "epoch": 5.25, "learning_rate": 1.9303265626443724e-05, "loss": 0.2288, "step": 8751000 }, { "epoch": 5.25, "learning_rate": 1.930116566088316e-05, "loss": 0.2313, "step": 8751500 }, { "epoch": 5.25, "learning_rate": 1.9299065695322598e-05, "loss": 0.2255, "step": 8752000 }, { "epoch": 5.25, "learning_rate": 1.929696572976203e-05, "loss": 0.2325, "step": 8752500 }, { "epoch": 5.25, "learning_rate": 1.9294865764201468e-05, "loss": 0.2309, "step": 8753000 }, { "epoch": 5.25, "learning_rate": 1.92927657986409e-05, "loss": 0.2284, "step": 8753500 }, { "epoch": 5.25, "learning_rate": 1.9290665833080335e-05, "loss": 0.2311, "step": 8754000 }, { "epoch": 5.25, "learning_rate": 1.9288565867519772e-05, "loss": 0.2235, "step": 8754500 }, { "epoch": 5.25, "learning_rate": 1.928646590195921e-05, "loss": 0.2261, "step": 8755000 }, { "epoch": 5.25, "learning_rate": 1.9284365936398646e-05, "loss": 0.2301, "step": 8755500 }, { "epoch": 5.25, "learning_rate": 1.9282265970838076e-05, "loss": 0.2247, "step": 8756000 }, { "epoch": 5.25, "learning_rate": 1.9280170205208633e-05, "loss": 0.2247, "step": 8756500 }, { "epoch": 5.25, "learning_rate": 1.927807023964807e-05, "loss": 0.2322, "step": 8757000 }, { "epoch": 5.25, "learning_rate": 1.9275970274087506e-05, "loss": 0.2295, "step": 8757500 }, { "epoch": 5.25, "learning_rate": 1.927387450845806e-05, "loss": 0.2321, "step": 8758000 }, { "epoch": 5.25, "learning_rate": 1.9271774542897497e-05, "loss": 0.2299, "step": 8758500 }, { "epoch": 5.25, "learning_rate": 1.926967457733693e-05, "loss": 0.2307, "step": 8759000 }, { "epoch": 5.25, "learning_rate": 1.9267574611776367e-05, "loss": 0.2303, "step": 8759500 }, { "epoch": 5.25, "learning_rate": 1.9265474646215804e-05, "loss": 0.2294, "step": 8760000 }, { "epoch": 5.25, "learning_rate": 1.9263374680655237e-05, "loss": 0.2278, "step": 8760500 }, { "epoch": 5.25, "learning_rate": 1.926127471509467e-05, "loss": 0.2288, "step": 8761000 }, { "epoch": 5.25, "learning_rate": 1.9259174749534108e-05, "loss": 0.2301, "step": 8761500 }, { "epoch": 5.25, "learning_rate": 1.9257078983904665e-05, "loss": 0.2315, "step": 8762000 }, { "epoch": 5.25, "learning_rate": 1.92549790183441e-05, "loss": 0.2283, "step": 8762500 }, { "epoch": 5.25, "learning_rate": 1.9252879052783535e-05, "loss": 0.2358, "step": 8763000 }, { "epoch": 5.25, "learning_rate": 1.925077908722297e-05, "loss": 0.2324, "step": 8763500 }, { "epoch": 5.25, "learning_rate": 1.9248683321593525e-05, "loss": 0.2266, "step": 8764000 }, { "epoch": 5.25, "learning_rate": 1.9246583356032962e-05, "loss": 0.23, "step": 8764500 }, { "epoch": 5.25, "learning_rate": 1.9244483390472396e-05, "loss": 0.2253, "step": 8765000 }, { "epoch": 5.26, "learning_rate": 1.924238342491183e-05, "loss": 0.2314, "step": 8765500 }, { "epoch": 5.26, "learning_rate": 1.9240287659282386e-05, "loss": 0.2264, "step": 8766000 }, { "epoch": 5.26, "learning_rate": 1.9238187693721823e-05, "loss": 0.2288, "step": 8766500 }, { "epoch": 5.26, "learning_rate": 1.923608772816126e-05, "loss": 0.2247, "step": 8767000 }, { "epoch": 5.26, "learning_rate": 1.9233987762600693e-05, "loss": 0.2251, "step": 8767500 }, { "epoch": 5.26, "learning_rate": 1.923189199697125e-05, "loss": 0.2257, "step": 8768000 }, { "epoch": 5.26, "learning_rate": 1.9229792031410684e-05, "loss": 0.2275, "step": 8768500 }, { "epoch": 5.26, "learning_rate": 1.922769206585012e-05, "loss": 0.2248, "step": 8769000 }, { "epoch": 5.26, "learning_rate": 1.9225592100289557e-05, "loss": 0.229, "step": 8769500 }, { "epoch": 5.26, "learning_rate": 1.922349633466011e-05, "loss": 0.227, "step": 8770000 }, { "epoch": 5.26, "learning_rate": 1.9221400569030668e-05, "loss": 0.2327, "step": 8770500 }, { "epoch": 5.26, "learning_rate": 1.9219300603470105e-05, "loss": 0.232, "step": 8771000 }, { "epoch": 5.26, "learning_rate": 1.9217200637909538e-05, "loss": 0.233, "step": 8771500 }, { "epoch": 5.26, "learning_rate": 1.9215100672348975e-05, "loss": 0.226, "step": 8772000 }, { "epoch": 5.26, "learning_rate": 1.921300070678841e-05, "loss": 0.2365, "step": 8772500 }, { "epoch": 5.26, "learning_rate": 1.9210900741227842e-05, "loss": 0.2326, "step": 8773000 }, { "epoch": 5.26, "learning_rate": 1.920880077566728e-05, "loss": 0.2274, "step": 8773500 }, { "epoch": 5.26, "learning_rate": 1.9206700810106716e-05, "loss": 0.2284, "step": 8774000 }, { "epoch": 5.26, "learning_rate": 1.9204605044477273e-05, "loss": 0.2332, "step": 8774500 }, { "epoch": 5.26, "learning_rate": 1.9202505078916706e-05, "loss": 0.232, "step": 8775000 }, { "epoch": 5.26, "learning_rate": 1.920040511335614e-05, "loss": 0.2264, "step": 8775500 }, { "epoch": 5.26, "learning_rate": 1.9198305147795576e-05, "loss": 0.2268, "step": 8776000 }, { "epoch": 5.26, "learning_rate": 1.9196205182235013e-05, "loss": 0.2291, "step": 8776500 }, { "epoch": 5.26, "learning_rate": 1.9194109416605567e-05, "loss": 0.2322, "step": 8777000 }, { "epoch": 5.26, "learning_rate": 1.9192009451045e-05, "loss": 0.2315, "step": 8777500 }, { "epoch": 5.26, "learning_rate": 1.9189909485484437e-05, "loss": 0.2323, "step": 8778000 }, { "epoch": 5.26, "learning_rate": 1.9187809519923874e-05, "loss": 0.2332, "step": 8778500 }, { "epoch": 5.26, "learning_rate": 1.9185709554363307e-05, "loss": 0.2217, "step": 8779000 }, { "epoch": 5.26, "learning_rate": 1.9183609588802744e-05, "loss": 0.2329, "step": 8779500 }, { "epoch": 5.26, "learning_rate": 1.9181509623242178e-05, "loss": 0.232, "step": 8780000 }, { "epoch": 5.26, "learning_rate": 1.9179409657681615e-05, "loss": 0.2214, "step": 8780500 }, { "epoch": 5.26, "learning_rate": 1.917731809198329e-05, "loss": 0.2319, "step": 8781000 }, { "epoch": 5.26, "learning_rate": 1.917521812642273e-05, "loss": 0.2298, "step": 8781500 }, { "epoch": 5.27, "learning_rate": 1.9173118160862162e-05, "loss": 0.228, "step": 8782000 }, { "epoch": 5.27, "learning_rate": 1.9171018195301595e-05, "loss": 0.2328, "step": 8782500 }, { "epoch": 5.27, "learning_rate": 1.9168918229741032e-05, "loss": 0.2327, "step": 8783000 }, { "epoch": 5.27, "learning_rate": 1.916681826418047e-05, "loss": 0.2297, "step": 8783500 }, { "epoch": 5.27, "learning_rate": 1.9164718298619903e-05, "loss": 0.226, "step": 8784000 }, { "epoch": 5.27, "learning_rate": 1.916261833305934e-05, "loss": 0.2304, "step": 8784500 }, { "epoch": 5.27, "learning_rate": 1.9160522567429893e-05, "loss": 0.2307, "step": 8785000 }, { "epoch": 5.27, "learning_rate": 1.915842260186933e-05, "loss": 0.2307, "step": 8785500 }, { "epoch": 5.27, "learning_rate": 1.9156322636308763e-05, "loss": 0.2247, "step": 8786000 }, { "epoch": 5.27, "learning_rate": 1.9154226870679324e-05, "loss": 0.2329, "step": 8786500 }, { "epoch": 5.27, "learning_rate": 1.9152126905118754e-05, "loss": 0.229, "step": 8787000 }, { "epoch": 5.27, "learning_rate": 1.915002693955819e-05, "loss": 0.2309, "step": 8787500 }, { "epoch": 5.27, "learning_rate": 1.9147926973997627e-05, "loss": 0.2252, "step": 8788000 }, { "epoch": 5.27, "learning_rate": 1.914582700843706e-05, "loss": 0.2313, "step": 8788500 }, { "epoch": 5.27, "learning_rate": 1.9143727042876498e-05, "loss": 0.2241, "step": 8789000 }, { "epoch": 5.27, "learning_rate": 1.914162707731593e-05, "loss": 0.2244, "step": 8789500 }, { "epoch": 5.27, "learning_rate": 1.9139531311686488e-05, "loss": 0.2294, "step": 8790000 }, { "epoch": 5.27, "learning_rate": 1.9137431346125925e-05, "loss": 0.2277, "step": 8790500 }, { "epoch": 5.27, "learning_rate": 1.913533138056536e-05, "loss": 0.2226, "step": 8791000 }, { "epoch": 5.27, "learning_rate": 1.9133231415004795e-05, "loss": 0.2329, "step": 8791500 }, { "epoch": 5.27, "learning_rate": 1.913113144944423e-05, "loss": 0.2251, "step": 8792000 }, { "epoch": 5.27, "learning_rate": 1.9129031483883662e-05, "loss": 0.229, "step": 8792500 }, { "epoch": 5.27, "learning_rate": 1.912693571825422e-05, "loss": 0.228, "step": 8793000 }, { "epoch": 5.27, "learning_rate": 1.9124835752693656e-05, "loss": 0.2288, "step": 8793500 }, { "epoch": 5.27, "learning_rate": 1.9122735787133093e-05, "loss": 0.2277, "step": 8794000 }, { "epoch": 5.27, "learning_rate": 1.9120635821572526e-05, "loss": 0.2268, "step": 8794500 }, { "epoch": 5.27, "learning_rate": 1.911853585601196e-05, "loss": 0.2325, "step": 8795000 }, { "epoch": 5.27, "learning_rate": 1.9116440090382517e-05, "loss": 0.2231, "step": 8795500 }, { "epoch": 5.27, "learning_rate": 1.9114340124821954e-05, "loss": 0.2359, "step": 8796000 }, { "epoch": 5.27, "learning_rate": 1.9112240159261387e-05, "loss": 0.2299, "step": 8796500 }, { "epoch": 5.27, "learning_rate": 1.911014019370082e-05, "loss": 0.2343, "step": 8797000 }, { "epoch": 5.27, "learning_rate": 1.910804442807138e-05, "loss": 0.2294, "step": 8797500 }, { "epoch": 5.27, "learning_rate": 1.9105944462510814e-05, "loss": 0.2289, "step": 8798000 }, { "epoch": 5.28, "learning_rate": 1.910384449695025e-05, "loss": 0.2271, "step": 8798500 }, { "epoch": 5.28, "learning_rate": 1.9101744531389685e-05, "loss": 0.2339, "step": 8799000 }, { "epoch": 5.28, "learning_rate": 1.9099644565829118e-05, "loss": 0.229, "step": 8799500 }, { "epoch": 5.28, "learning_rate": 1.9097544600268555e-05, "loss": 0.2235, "step": 8800000 }, { "epoch": 5.28, "eval_loss": 0.21679942309856415, "eval_runtime": 1460.3289, "eval_samples_per_second": 360.686, "eval_steps_per_second": 60.115, "step": 8800000 }, { "epoch": 5.28, "learning_rate": 1.9095444634707992e-05, "loss": 0.2303, "step": 8800500 }, { "epoch": 5.28, "learning_rate": 1.9093344669147425e-05, "loss": 0.2304, "step": 8801000 }, { "epoch": 5.28, "learning_rate": 1.9091248903517982e-05, "loss": 0.2287, "step": 8801500 }, { "epoch": 5.28, "learning_rate": 1.9089148937957416e-05, "loss": 0.226, "step": 8802000 }, { "epoch": 5.28, "learning_rate": 1.9087048972396852e-05, "loss": 0.2255, "step": 8802500 }, { "epoch": 5.28, "learning_rate": 1.908494900683629e-05, "loss": 0.2303, "step": 8803000 }, { "epoch": 5.28, "learning_rate": 1.9082853241206846e-05, "loss": 0.2303, "step": 8803500 }, { "epoch": 5.28, "learning_rate": 1.90807574755774e-05, "loss": 0.2262, "step": 8804000 }, { "epoch": 5.28, "learning_rate": 1.9078657510016837e-05, "loss": 0.2286, "step": 8804500 }, { "epoch": 5.28, "learning_rate": 1.907655754445627e-05, "loss": 0.2308, "step": 8805000 }, { "epoch": 5.28, "learning_rate": 1.9074457578895707e-05, "loss": 0.2328, "step": 8805500 }, { "epoch": 5.28, "learning_rate": 1.907235761333514e-05, "loss": 0.2289, "step": 8806000 }, { "epoch": 5.28, "learning_rate": 1.9070257647774574e-05, "loss": 0.2244, "step": 8806500 }, { "epoch": 5.28, "learning_rate": 1.906815768221401e-05, "loss": 0.2284, "step": 8807000 }, { "epoch": 5.28, "learning_rate": 1.9066061916584568e-05, "loss": 0.2286, "step": 8807500 }, { "epoch": 5.28, "learning_rate": 1.9063961951024005e-05, "loss": 0.2293, "step": 8808000 }, { "epoch": 5.28, "learning_rate": 1.9061861985463438e-05, "loss": 0.2312, "step": 8808500 }, { "epoch": 5.28, "learning_rate": 1.905976201990287e-05, "loss": 0.2271, "step": 8809000 }, { "epoch": 5.28, "learning_rate": 1.905766205434231e-05, "loss": 0.2234, "step": 8809500 }, { "epoch": 5.28, "learning_rate": 1.9055566288712865e-05, "loss": 0.2299, "step": 8810000 }, { "epoch": 5.28, "learning_rate": 1.9053466323152302e-05, "loss": 0.2325, "step": 8810500 }, { "epoch": 5.28, "learning_rate": 1.9051366357591736e-05, "loss": 0.2284, "step": 8811000 }, { "epoch": 5.28, "learning_rate": 1.904926639203117e-05, "loss": 0.228, "step": 8811500 }, { "epoch": 5.28, "learning_rate": 1.9047166426470606e-05, "loss": 0.233, "step": 8812000 }, { "epoch": 5.28, "learning_rate": 1.9045066460910043e-05, "loss": 0.2264, "step": 8812500 }, { "epoch": 5.28, "learning_rate": 1.9042966495349476e-05, "loss": 0.2282, "step": 8813000 }, { "epoch": 5.28, "learning_rate": 1.904086652978891e-05, "loss": 0.2216, "step": 8813500 }, { "epoch": 5.28, "learning_rate": 1.9038770764159467e-05, "loss": 0.2298, "step": 8814000 }, { "epoch": 5.28, "learning_rate": 1.9036670798598903e-05, "loss": 0.2327, "step": 8814500 }, { "epoch": 5.28, "learning_rate": 1.9034570833038337e-05, "loss": 0.2265, "step": 8815000 }, { "epoch": 5.29, "learning_rate": 1.9032470867477774e-05, "loss": 0.2361, "step": 8815500 }, { "epoch": 5.29, "learning_rate": 1.9030375101848327e-05, "loss": 0.2299, "step": 8816000 }, { "epoch": 5.29, "learning_rate": 1.9028275136287764e-05, "loss": 0.2237, "step": 8816500 }, { "epoch": 5.29, "learning_rate": 1.90261751707272e-05, "loss": 0.2327, "step": 8817000 }, { "epoch": 5.29, "learning_rate": 1.9024075205166635e-05, "loss": 0.2332, "step": 8817500 }, { "epoch": 5.29, "learning_rate": 1.902197943953719e-05, "loss": 0.2291, "step": 8818000 }, { "epoch": 5.29, "learning_rate": 1.9019879473976625e-05, "loss": 0.2299, "step": 8818500 }, { "epoch": 5.29, "learning_rate": 1.9017779508416062e-05, "loss": 0.2285, "step": 8819000 }, { "epoch": 5.29, "learning_rate": 1.90156795428555e-05, "loss": 0.2315, "step": 8819500 }, { "epoch": 5.29, "learning_rate": 1.9013583777226056e-05, "loss": 0.23, "step": 8820000 }, { "epoch": 5.29, "learning_rate": 1.9011483811665486e-05, "loss": 0.228, "step": 8820500 }, { "epoch": 5.29, "learning_rate": 1.9009383846104923e-05, "loss": 0.2268, "step": 8821000 }, { "epoch": 5.29, "learning_rate": 1.900728388054436e-05, "loss": 0.2296, "step": 8821500 }, { "epoch": 5.29, "learning_rate": 1.9005188114914916e-05, "loss": 0.2265, "step": 8822000 }, { "epoch": 5.29, "learning_rate": 1.900309234928547e-05, "loss": 0.2333, "step": 8822500 }, { "epoch": 5.29, "learning_rate": 1.9000992383724907e-05, "loss": 0.2223, "step": 8823000 }, { "epoch": 5.29, "learning_rate": 1.899889241816434e-05, "loss": 0.2256, "step": 8823500 }, { "epoch": 5.29, "learning_rate": 1.8996792452603777e-05, "loss": 0.2262, "step": 8824000 }, { "epoch": 5.29, "learning_rate": 1.8994696686974334e-05, "loss": 0.2314, "step": 8824500 }, { "epoch": 5.29, "learning_rate": 1.8992596721413767e-05, "loss": 0.2279, "step": 8825000 }, { "epoch": 5.29, "learning_rate": 1.8990496755853204e-05, "loss": 0.2267, "step": 8825500 }, { "epoch": 5.29, "learning_rate": 1.8988396790292638e-05, "loss": 0.2272, "step": 8826000 }, { "epoch": 5.29, "learning_rate": 1.8986296824732075e-05, "loss": 0.2347, "step": 8826500 }, { "epoch": 5.29, "learning_rate": 1.898419685917151e-05, "loss": 0.23, "step": 8827000 }, { "epoch": 5.29, "learning_rate": 1.898209689361094e-05, "loss": 0.23, "step": 8827500 }, { "epoch": 5.29, "learning_rate": 1.897999692805038e-05, "loss": 0.2254, "step": 8828000 }, { "epoch": 5.29, "learning_rate": 1.8977901162420935e-05, "loss": 0.225, "step": 8828500 }, { "epoch": 5.29, "learning_rate": 1.8975801196860372e-05, "loss": 0.2262, "step": 8829000 }, { "epoch": 5.29, "learning_rate": 1.897370123129981e-05, "loss": 0.2233, "step": 8829500 }, { "epoch": 5.29, "learning_rate": 1.897160126573924e-05, "loss": 0.2287, "step": 8830000 }, { "epoch": 5.29, "learning_rate": 1.8969501300178676e-05, "loss": 0.2274, "step": 8830500 }, { "epoch": 5.29, "learning_rate": 1.8967401334618113e-05, "loss": 0.2353, "step": 8831000 }, { "epoch": 5.29, "learning_rate": 1.8965301369057546e-05, "loss": 0.23, "step": 8831500 }, { "epoch": 5.3, "learning_rate": 1.8963201403496983e-05, "loss": 0.2281, "step": 8832000 }, { "epoch": 5.3, "learning_rate": 1.8961105637867537e-05, "loss": 0.2264, "step": 8832500 }, { "epoch": 5.3, "learning_rate": 1.8959005672306974e-05, "loss": 0.2261, "step": 8833000 }, { "epoch": 5.3, "learning_rate": 1.895690570674641e-05, "loss": 0.2327, "step": 8833500 }, { "epoch": 5.3, "learning_rate": 1.8954805741185844e-05, "loss": 0.2243, "step": 8834000 }, { "epoch": 5.3, "learning_rate": 1.89527099755564e-05, "loss": 0.2308, "step": 8834500 }, { "epoch": 5.3, "learning_rate": 1.8950610009995834e-05, "loss": 0.2293, "step": 8835000 }, { "epoch": 5.3, "learning_rate": 1.894851004443527e-05, "loss": 0.2264, "step": 8835500 }, { "epoch": 5.3, "learning_rate": 1.8946410078874705e-05, "loss": 0.2299, "step": 8836000 }, { "epoch": 5.3, "learning_rate": 1.8944318513176385e-05, "loss": 0.225, "step": 8836500 }, { "epoch": 5.3, "learning_rate": 1.894221854761582e-05, "loss": 0.2314, "step": 8837000 }, { "epoch": 5.3, "learning_rate": 1.8940118582055252e-05, "loss": 0.2333, "step": 8837500 }, { "epoch": 5.3, "learning_rate": 1.893801861649469e-05, "loss": 0.2353, "step": 8838000 }, { "epoch": 5.3, "learning_rate": 1.8935918650934126e-05, "loss": 0.2281, "step": 8838500 }, { "epoch": 5.3, "learning_rate": 1.893381868537356e-05, "loss": 0.2312, "step": 8839000 }, { "epoch": 5.3, "learning_rate": 1.8931722919744116e-05, "loss": 0.229, "step": 8839500 }, { "epoch": 5.3, "learning_rate": 1.892962295418355e-05, "loss": 0.2306, "step": 8840000 }, { "epoch": 5.3, "learning_rate": 1.8927522988622986e-05, "loss": 0.2249, "step": 8840500 }, { "epoch": 5.3, "learning_rate": 1.8925423023062423e-05, "loss": 0.2298, "step": 8841000 }, { "epoch": 5.3, "learning_rate": 1.8923323057501857e-05, "loss": 0.2344, "step": 8841500 }, { "epoch": 5.3, "learning_rate": 1.8921227291872414e-05, "loss": 0.2376, "step": 8842000 }, { "epoch": 5.3, "learning_rate": 1.8919127326311847e-05, "loss": 0.2316, "step": 8842500 }, { "epoch": 5.3, "learning_rate": 1.8917027360751284e-05, "loss": 0.2255, "step": 8843000 }, { "epoch": 5.3, "learning_rate": 1.891492739519072e-05, "loss": 0.2348, "step": 8843500 }, { "epoch": 5.3, "learning_rate": 1.8912827429630154e-05, "loss": 0.2316, "step": 8844000 }, { "epoch": 5.3, "learning_rate": 1.8910731664000708e-05, "loss": 0.2318, "step": 8844500 }, { "epoch": 5.3, "learning_rate": 1.8908631698440145e-05, "loss": 0.2316, "step": 8845000 }, { "epoch": 5.3, "learning_rate": 1.890653173287958e-05, "loss": 0.2281, "step": 8845500 }, { "epoch": 5.3, "learning_rate": 1.890443176731902e-05, "loss": 0.2275, "step": 8846000 }, { "epoch": 5.3, "learning_rate": 1.890233180175845e-05, "loss": 0.2288, "step": 8846500 }, { "epoch": 5.3, "learning_rate": 1.8900231836197885e-05, "loss": 0.2289, "step": 8847000 }, { "epoch": 5.3, "learning_rate": 1.8898131870637322e-05, "loss": 0.2307, "step": 8847500 }, { "epoch": 5.3, "learning_rate": 1.8896031905076756e-05, "loss": 0.2307, "step": 8848000 }, { "epoch": 5.31, "learning_rate": 1.8893936139447313e-05, "loss": 0.2232, "step": 8848500 }, { "epoch": 5.31, "learning_rate": 1.8891836173886746e-05, "loss": 0.2326, "step": 8849000 }, { "epoch": 5.31, "learning_rate": 1.8889736208326183e-05, "loss": 0.2232, "step": 8849500 }, { "epoch": 5.31, "learning_rate": 1.888763624276562e-05, "loss": 0.2327, "step": 8850000 }, { "epoch": 5.31, "learning_rate": 1.8885540477136177e-05, "loss": 0.2277, "step": 8850500 }, { "epoch": 5.31, "learning_rate": 1.888344471150673e-05, "loss": 0.2293, "step": 8851000 }, { "epoch": 5.31, "learning_rate": 1.8881344745946164e-05, "loss": 0.2313, "step": 8851500 }, { "epoch": 5.31, "learning_rate": 1.88792447803856e-05, "loss": 0.2328, "step": 8852000 }, { "epoch": 5.31, "learning_rate": 1.8877144814825037e-05, "loss": 0.2353, "step": 8852500 }, { "epoch": 5.31, "learning_rate": 1.8875044849264474e-05, "loss": 0.2309, "step": 8853000 }, { "epoch": 5.31, "learning_rate": 1.8872944883703908e-05, "loss": 0.2293, "step": 8853500 }, { "epoch": 5.31, "learning_rate": 1.887084491814334e-05, "loss": 0.2354, "step": 8854000 }, { "epoch": 5.31, "learning_rate": 1.8868749152513898e-05, "loss": 0.2297, "step": 8854500 }, { "epoch": 5.31, "learning_rate": 1.8866649186953335e-05, "loss": 0.226, "step": 8855000 }, { "epoch": 5.31, "learning_rate": 1.886454922139277e-05, "loss": 0.2231, "step": 8855500 }, { "epoch": 5.31, "learning_rate": 1.8862449255832205e-05, "loss": 0.2244, "step": 8856000 }, { "epoch": 5.31, "learning_rate": 1.886034929027164e-05, "loss": 0.232, "step": 8856500 }, { "epoch": 5.31, "learning_rate": 1.8858249324711076e-05, "loss": 0.2274, "step": 8857000 }, { "epoch": 5.31, "learning_rate": 1.8856153559081633e-05, "loss": 0.2282, "step": 8857500 }, { "epoch": 5.31, "learning_rate": 1.8854053593521066e-05, "loss": 0.2321, "step": 8858000 }, { "epoch": 5.31, "learning_rate": 1.88519536279605e-05, "loss": 0.2328, "step": 8858500 }, { "epoch": 5.31, "learning_rate": 1.8849853662399936e-05, "loss": 0.2338, "step": 8859000 }, { "epoch": 5.31, "learning_rate": 1.8847757896770493e-05, "loss": 0.2305, "step": 8859500 }, { "epoch": 5.31, "learning_rate": 1.884565793120993e-05, "loss": 0.2273, "step": 8860000 }, { "epoch": 5.31, "learning_rate": 1.8843557965649364e-05, "loss": 0.229, "step": 8860500 }, { "epoch": 5.31, "learning_rate": 1.8841458000088797e-05, "loss": 0.2278, "step": 8861000 }, { "epoch": 5.31, "learning_rate": 1.8839358034528234e-05, "loss": 0.227, "step": 8861500 }, { "epoch": 5.31, "learning_rate": 1.8837258068967667e-05, "loss": 0.2258, "step": 8862000 }, { "epoch": 5.31, "learning_rate": 1.8835158103407104e-05, "loss": 0.2319, "step": 8862500 }, { "epoch": 5.31, "learning_rate": 1.883305813784654e-05, "loss": 0.2293, "step": 8863000 }, { "epoch": 5.31, "learning_rate": 1.8830962372217095e-05, "loss": 0.2246, "step": 8863500 }, { "epoch": 5.31, "learning_rate": 1.882886240665653e-05, "loss": 0.2272, "step": 8864000 }, { "epoch": 5.31, "learning_rate": 1.8826762441095965e-05, "loss": 0.2295, "step": 8864500 }, { "epoch": 5.31, "learning_rate": 1.8824662475535402e-05, "loss": 0.2263, "step": 8865000 }, { "epoch": 5.32, "learning_rate": 1.882256670990596e-05, "loss": 0.2246, "step": 8865500 }, { "epoch": 5.32, "learning_rate": 1.8820470944276512e-05, "loss": 0.2264, "step": 8866000 }, { "epoch": 5.32, "learning_rate": 1.881837517864707e-05, "loss": 0.2287, "step": 8866500 }, { "epoch": 5.32, "learning_rate": 1.8816275213086506e-05, "loss": 0.2265, "step": 8867000 }, { "epoch": 5.32, "learning_rate": 1.8814175247525943e-05, "loss": 0.2352, "step": 8867500 }, { "epoch": 5.32, "learning_rate": 1.8812075281965373e-05, "loss": 0.234, "step": 8868000 }, { "epoch": 5.32, "learning_rate": 1.880997531640481e-05, "loss": 0.2293, "step": 8868500 }, { "epoch": 5.32, "learning_rate": 1.8807875350844247e-05, "loss": 0.2331, "step": 8869000 }, { "epoch": 5.32, "learning_rate": 1.880577538528368e-05, "loss": 0.2258, "step": 8869500 }, { "epoch": 5.32, "learning_rate": 1.8803675419723117e-05, "loss": 0.2221, "step": 8870000 }, { "epoch": 5.32, "learning_rate": 1.880157545416255e-05, "loss": 0.2207, "step": 8870500 }, { "epoch": 5.32, "learning_rate": 1.8799475488601987e-05, "loss": 0.2289, "step": 8871000 }, { "epoch": 5.32, "learning_rate": 1.879737552304142e-05, "loss": 0.2305, "step": 8871500 }, { "epoch": 5.32, "learning_rate": 1.8795275557480858e-05, "loss": 0.2255, "step": 8872000 }, { "epoch": 5.32, "learning_rate": 1.8793179791851415e-05, "loss": 0.2287, "step": 8872500 }, { "epoch": 5.32, "learning_rate": 1.8791079826290848e-05, "loss": 0.2262, "step": 8873000 }, { "epoch": 5.32, "learning_rate": 1.878897986073028e-05, "loss": 0.2296, "step": 8873500 }, { "epoch": 5.32, "learning_rate": 1.878687989516972e-05, "loss": 0.2317, "step": 8874000 }, { "epoch": 5.32, "learning_rate": 1.8784784129540275e-05, "loss": 0.2283, "step": 8874500 }, { "epoch": 5.32, "learning_rate": 1.878268836391083e-05, "loss": 0.234, "step": 8875000 }, { "epoch": 5.32, "learning_rate": 1.8780588398350266e-05, "loss": 0.2247, "step": 8875500 }, { "epoch": 5.32, "learning_rate": 1.8778488432789703e-05, "loss": 0.2316, "step": 8876000 }, { "epoch": 5.32, "learning_rate": 1.877639266716026e-05, "loss": 0.2296, "step": 8876500 }, { "epoch": 5.32, "learning_rate": 1.8774292701599696e-05, "loss": 0.2305, "step": 8877000 }, { "epoch": 5.32, "learning_rate": 1.8772192736039127e-05, "loss": 0.2295, "step": 8877500 }, { "epoch": 5.32, "learning_rate": 1.8770092770478563e-05, "loss": 0.2304, "step": 8878000 }, { "epoch": 5.32, "learning_rate": 1.8767992804918e-05, "loss": 0.2304, "step": 8878500 }, { "epoch": 5.32, "learning_rate": 1.8765892839357434e-05, "loss": 0.2262, "step": 8879000 }, { "epoch": 5.32, "learning_rate": 1.876379287379687e-05, "loss": 0.231, "step": 8879500 }, { "epoch": 5.32, "learning_rate": 1.8761692908236304e-05, "loss": 0.2314, "step": 8880000 }, { "epoch": 5.32, "learning_rate": 1.8759592942675737e-05, "loss": 0.2259, "step": 8880500 }, { "epoch": 5.32, "learning_rate": 1.8757492977115174e-05, "loss": 0.2256, "step": 8881000 }, { "epoch": 5.32, "learning_rate": 1.875539301155461e-05, "loss": 0.2251, "step": 8881500 }, { "epoch": 5.33, "learning_rate": 1.8753297245925168e-05, "loss": 0.2246, "step": 8882000 }, { "epoch": 5.33, "learning_rate": 1.87511972803646e-05, "loss": 0.2307, "step": 8882500 }, { "epoch": 5.33, "learning_rate": 1.8749097314804035e-05, "loss": 0.2357, "step": 8883000 }, { "epoch": 5.33, "learning_rate": 1.8746997349243472e-05, "loss": 0.2313, "step": 8883500 }, { "epoch": 5.33, "learning_rate": 1.874490158361403e-05, "loss": 0.2289, "step": 8884000 }, { "epoch": 5.33, "learning_rate": 1.8742801618053466e-05, "loss": 0.2342, "step": 8884500 }, { "epoch": 5.33, "learning_rate": 1.87407016524929e-05, "loss": 0.2288, "step": 8885000 }, { "epoch": 5.33, "learning_rate": 1.8738601686932333e-05, "loss": 0.2272, "step": 8885500 }, { "epoch": 5.33, "learning_rate": 1.873650172137177e-05, "loss": 0.2274, "step": 8886000 }, { "epoch": 5.33, "learning_rate": 1.8734401755811206e-05, "loss": 0.2271, "step": 8886500 }, { "epoch": 5.33, "learning_rate": 1.873230179025064e-05, "loss": 0.2276, "step": 8887000 }, { "epoch": 5.33, "learning_rate": 1.8730206024621193e-05, "loss": 0.2297, "step": 8887500 }, { "epoch": 5.33, "learning_rate": 1.872810605906063e-05, "loss": 0.2292, "step": 8888000 }, { "epoch": 5.33, "learning_rate": 1.8726006093500067e-05, "loss": 0.2331, "step": 8888500 }, { "epoch": 5.33, "learning_rate": 1.8723906127939504e-05, "loss": 0.2318, "step": 8889000 }, { "epoch": 5.33, "learning_rate": 1.8721806162378937e-05, "loss": 0.2258, "step": 8889500 }, { "epoch": 5.33, "learning_rate": 1.871970619681837e-05, "loss": 0.2272, "step": 8890000 }, { "epoch": 5.33, "learning_rate": 1.8717606231257808e-05, "loss": 0.231, "step": 8890500 }, { "epoch": 5.33, "learning_rate": 1.871550626569724e-05, "loss": 0.2304, "step": 8891000 }, { "epoch": 5.33, "learning_rate": 1.8713410500067798e-05, "loss": 0.2279, "step": 8891500 }, { "epoch": 5.33, "learning_rate": 1.8711310534507235e-05, "loss": 0.2299, "step": 8892000 }, { "epoch": 5.33, "learning_rate": 1.870921056894667e-05, "loss": 0.2281, "step": 8892500 }, { "epoch": 5.33, "learning_rate": 1.8707114803317225e-05, "loss": 0.2322, "step": 8893000 }, { "epoch": 5.33, "learning_rate": 1.8705014837756662e-05, "loss": 0.2231, "step": 8893500 }, { "epoch": 5.33, "learning_rate": 1.8702914872196096e-05, "loss": 0.2283, "step": 8894000 }, { "epoch": 5.33, "learning_rate": 1.8700814906635532e-05, "loss": 0.2231, "step": 8894500 }, { "epoch": 5.33, "learning_rate": 1.8698714941074966e-05, "loss": 0.2264, "step": 8895000 }, { "epoch": 5.33, "learning_rate": 1.86966149755144e-05, "loss": 0.2316, "step": 8895500 }, { "epoch": 5.33, "learning_rate": 1.8694515009953836e-05, "loss": 0.226, "step": 8896000 }, { "epoch": 5.33, "learning_rate": 1.8692419244324393e-05, "loss": 0.2269, "step": 8896500 }, { "epoch": 5.33, "learning_rate": 1.8690319278763827e-05, "loss": 0.2278, "step": 8897000 }, { "epoch": 5.33, "learning_rate": 1.8688219313203263e-05, "loss": 0.2282, "step": 8897500 }, { "epoch": 5.33, "learning_rate": 1.8686119347642697e-05, "loss": 0.2335, "step": 8898000 }, { "epoch": 5.34, "learning_rate": 1.8684019382082134e-05, "loss": 0.2322, "step": 8898500 }, { "epoch": 5.34, "learning_rate": 1.868191941652157e-05, "loss": 0.234, "step": 8899000 }, { "epoch": 5.34, "learning_rate": 1.8679819450961004e-05, "loss": 0.2312, "step": 8899500 }, { "epoch": 5.34, "learning_rate": 1.8677719485400438e-05, "loss": 0.229, "step": 8900000 }, { "epoch": 5.34, "eval_loss": 0.21580925583839417, "eval_runtime": 1461.805, "eval_samples_per_second": 360.322, "eval_steps_per_second": 60.054, "step": 8900000 }, { "epoch": 5.34, "learning_rate": 1.8675623719770995e-05, "loss": 0.2272, "step": 8900500 }, { "epoch": 5.34, "learning_rate": 1.867352375421043e-05, "loss": 0.233, "step": 8901000 }, { "epoch": 5.34, "learning_rate": 1.8671423788649868e-05, "loss": 0.2243, "step": 8901500 }, { "epoch": 5.34, "learning_rate": 1.86693238230893e-05, "loss": 0.2254, "step": 8902000 }, { "epoch": 5.34, "learning_rate": 1.8667228057459855e-05, "loss": 0.2273, "step": 8902500 }, { "epoch": 5.34, "learning_rate": 1.8665128091899292e-05, "loss": 0.2311, "step": 8903000 }, { "epoch": 5.34, "learning_rate": 1.866303232626985e-05, "loss": 0.2314, "step": 8903500 }, { "epoch": 5.34, "learning_rate": 1.8660932360709286e-05, "loss": 0.2317, "step": 8904000 }, { "epoch": 5.34, "learning_rate": 1.865883239514872e-05, "loss": 0.2296, "step": 8904500 }, { "epoch": 5.34, "learning_rate": 1.8656732429588153e-05, "loss": 0.2298, "step": 8905000 }, { "epoch": 5.34, "learning_rate": 1.865463246402759e-05, "loss": 0.2245, "step": 8905500 }, { "epoch": 5.34, "learning_rate": 1.8652536698398147e-05, "loss": 0.2303, "step": 8906000 }, { "epoch": 5.34, "learning_rate": 1.865043673283758e-05, "loss": 0.2288, "step": 8906500 }, { "epoch": 5.34, "learning_rate": 1.8648336767277017e-05, "loss": 0.2295, "step": 8907000 }, { "epoch": 5.34, "learning_rate": 1.864623680171645e-05, "loss": 0.2277, "step": 8907500 }, { "epoch": 5.34, "learning_rate": 1.8644136836155887e-05, "loss": 0.228, "step": 8908000 }, { "epoch": 5.34, "learning_rate": 1.8642036870595324e-05, "loss": 0.2296, "step": 8908500 }, { "epoch": 5.34, "learning_rate": 1.8639936905034758e-05, "loss": 0.2277, "step": 8909000 }, { "epoch": 5.34, "learning_rate": 1.863783693947419e-05, "loss": 0.2277, "step": 8909500 }, { "epoch": 5.34, "learning_rate": 1.8635741173844748e-05, "loss": 0.2302, "step": 8910000 }, { "epoch": 5.34, "learning_rate": 1.8633641208284185e-05, "loss": 0.2312, "step": 8910500 }, { "epoch": 5.34, "learning_rate": 1.8631541242723618e-05, "loss": 0.2249, "step": 8911000 }, { "epoch": 5.34, "learning_rate": 1.8629441277163055e-05, "loss": 0.2299, "step": 8911500 }, { "epoch": 5.34, "learning_rate": 1.8627349711464732e-05, "loss": 0.2288, "step": 8912000 }, { "epoch": 5.34, "learning_rate": 1.8625249745904166e-05, "loss": 0.2278, "step": 8912500 }, { "epoch": 5.34, "learning_rate": 1.8623149780343603e-05, "loss": 0.2224, "step": 8913000 }, { "epoch": 5.34, "learning_rate": 1.862104981478304e-05, "loss": 0.2333, "step": 8913500 }, { "epoch": 5.34, "learning_rate": 1.8618949849222473e-05, "loss": 0.2314, "step": 8914000 }, { "epoch": 5.34, "learning_rate": 1.8616849883661906e-05, "loss": 0.2225, "step": 8914500 }, { "epoch": 5.34, "learning_rate": 1.8614749918101343e-05, "loss": 0.2258, "step": 8915000 }, { "epoch": 5.35, "learning_rate": 1.86126541524719e-05, "loss": 0.2275, "step": 8915500 }, { "epoch": 5.35, "learning_rate": 1.8610554186911334e-05, "loss": 0.23, "step": 8916000 }, { "epoch": 5.35, "learning_rate": 1.8608454221350767e-05, "loss": 0.227, "step": 8916500 }, { "epoch": 5.35, "learning_rate": 1.8606354255790204e-05, "loss": 0.2308, "step": 8917000 }, { "epoch": 5.35, "learning_rate": 1.860425429022964e-05, "loss": 0.2276, "step": 8917500 }, { "epoch": 5.35, "learning_rate": 1.8602154324669078e-05, "loss": 0.2273, "step": 8918000 }, { "epoch": 5.35, "learning_rate": 1.860005435910851e-05, "loss": 0.2281, "step": 8918500 }, { "epoch": 5.35, "learning_rate": 1.8597954393547944e-05, "loss": 0.2292, "step": 8919000 }, { "epoch": 5.35, "learning_rate": 1.85958586279185e-05, "loss": 0.2283, "step": 8919500 }, { "epoch": 5.35, "learning_rate": 1.8593758662357938e-05, "loss": 0.2305, "step": 8920000 }, { "epoch": 5.35, "learning_rate": 1.8591662896728495e-05, "loss": 0.2275, "step": 8920500 }, { "epoch": 5.35, "learning_rate": 1.858956293116793e-05, "loss": 0.2318, "step": 8921000 }, { "epoch": 5.35, "learning_rate": 1.8587462965607362e-05, "loss": 0.2238, "step": 8921500 }, { "epoch": 5.35, "learning_rate": 1.85853630000468e-05, "loss": 0.2283, "step": 8922000 }, { "epoch": 5.35, "learning_rate": 1.8583263034486236e-05, "loss": 0.2253, "step": 8922500 }, { "epoch": 5.35, "learning_rate": 1.858116306892567e-05, "loss": 0.2256, "step": 8923000 }, { "epoch": 5.35, "learning_rate": 1.8579063103365106e-05, "loss": 0.2301, "step": 8923500 }, { "epoch": 5.35, "learning_rate": 1.857696733773566e-05, "loss": 0.2275, "step": 8924000 }, { "epoch": 5.35, "learning_rate": 1.8574867372175097e-05, "loss": 0.2232, "step": 8924500 }, { "epoch": 5.35, "learning_rate": 1.8572767406614533e-05, "loss": 0.2298, "step": 8925000 }, { "epoch": 5.35, "learning_rate": 1.8570667441053967e-05, "loss": 0.2254, "step": 8925500 }, { "epoch": 5.35, "learning_rate": 1.85685674754934e-05, "loss": 0.2295, "step": 8926000 }, { "epoch": 5.35, "learning_rate": 1.8566467509932837e-05, "loss": 0.2291, "step": 8926500 }, { "epoch": 5.35, "learning_rate": 1.856436754437227e-05, "loss": 0.232, "step": 8927000 }, { "epoch": 5.35, "learning_rate": 1.8562267578811707e-05, "loss": 0.2319, "step": 8927500 }, { "epoch": 5.35, "learning_rate": 1.8560171813182264e-05, "loss": 0.2308, "step": 8928000 }, { "epoch": 5.35, "learning_rate": 1.8558071847621698e-05, "loss": 0.2308, "step": 8928500 }, { "epoch": 5.35, "learning_rate": 1.8555971882061135e-05, "loss": 0.2288, "step": 8929000 }, { "epoch": 5.35, "learning_rate": 1.8553871916500568e-05, "loss": 0.23, "step": 8929500 }, { "epoch": 5.35, "learning_rate": 1.855178035080225e-05, "loss": 0.2305, "step": 8930000 }, { "epoch": 5.35, "learning_rate": 1.854968038524168e-05, "loss": 0.2269, "step": 8930500 }, { "epoch": 5.35, "learning_rate": 1.8547580419681116e-05, "loss": 0.2268, "step": 8931000 }, { "epoch": 5.35, "learning_rate": 1.8545480454120552e-05, "loss": 0.2321, "step": 8931500 }, { "epoch": 5.36, "learning_rate": 1.854338048855999e-05, "loss": 0.2331, "step": 8932000 }, { "epoch": 5.36, "learning_rate": 1.8541280522999423e-05, "loss": 0.2296, "step": 8932500 }, { "epoch": 5.36, "learning_rate": 1.853918055743886e-05, "loss": 0.2275, "step": 8933000 }, { "epoch": 5.36, "learning_rate": 1.8537084791809413e-05, "loss": 0.2263, "step": 8933500 }, { "epoch": 5.36, "learning_rate": 1.853498482624885e-05, "loss": 0.2307, "step": 8934000 }, { "epoch": 5.36, "learning_rate": 1.8532884860688283e-05, "loss": 0.2234, "step": 8934500 }, { "epoch": 5.36, "learning_rate": 1.853078489512772e-05, "loss": 0.2317, "step": 8935000 }, { "epoch": 5.36, "learning_rate": 1.8528684929567154e-05, "loss": 0.227, "step": 8935500 }, { "epoch": 5.36, "learning_rate": 1.852658496400659e-05, "loss": 0.2279, "step": 8936000 }, { "epoch": 5.36, "learning_rate": 1.8524489198377148e-05, "loss": 0.2296, "step": 8936500 }, { "epoch": 5.36, "learning_rate": 1.852238923281658e-05, "loss": 0.2307, "step": 8937000 }, { "epoch": 5.36, "learning_rate": 1.8520289267256018e-05, "loss": 0.2254, "step": 8937500 }, { "epoch": 5.36, "learning_rate": 1.851818930169545e-05, "loss": 0.2234, "step": 8938000 }, { "epoch": 5.36, "learning_rate": 1.8516089336134885e-05, "loss": 0.2314, "step": 8938500 }, { "epoch": 5.36, "learning_rate": 1.8513993570505445e-05, "loss": 0.2331, "step": 8939000 }, { "epoch": 5.36, "learning_rate": 1.851189360494488e-05, "loss": 0.2315, "step": 8939500 }, { "epoch": 5.36, "learning_rate": 1.8509793639384315e-05, "loss": 0.2264, "step": 8940000 }, { "epoch": 5.36, "learning_rate": 1.850769367382375e-05, "loss": 0.23, "step": 8940500 }, { "epoch": 5.36, "learning_rate": 1.8505593708263182e-05, "loss": 0.2297, "step": 8941000 }, { "epoch": 5.36, "learning_rate": 1.850349374270262e-05, "loss": 0.2293, "step": 8941500 }, { "epoch": 5.36, "learning_rate": 1.8501393777142056e-05, "loss": 0.2255, "step": 8942000 }, { "epoch": 5.36, "learning_rate": 1.849929381158149e-05, "loss": 0.2272, "step": 8942500 }, { "epoch": 5.36, "learning_rate": 1.8497198045952047e-05, "loss": 0.2328, "step": 8943000 }, { "epoch": 5.36, "learning_rate": 1.8495102280322603e-05, "loss": 0.2324, "step": 8943500 }, { "epoch": 5.36, "learning_rate": 1.8493002314762037e-05, "loss": 0.2305, "step": 8944000 }, { "epoch": 5.36, "learning_rate": 1.8490902349201474e-05, "loss": 0.2283, "step": 8944500 }, { "epoch": 5.36, "learning_rate": 1.8488802383640907e-05, "loss": 0.232, "step": 8945000 }, { "epoch": 5.36, "learning_rate": 1.848670241808034e-05, "loss": 0.2309, "step": 8945500 }, { "epoch": 5.36, "learning_rate": 1.8484602452519778e-05, "loss": 0.2294, "step": 8946000 }, { "epoch": 5.36, "learning_rate": 1.8482502486959214e-05, "loss": 0.2238, "step": 8946500 }, { "epoch": 5.36, "learning_rate": 1.8480402521398648e-05, "loss": 0.2296, "step": 8947000 }, { "epoch": 5.36, "learning_rate": 1.8478306755769205e-05, "loss": 0.2256, "step": 8947500 }, { "epoch": 5.36, "learning_rate": 1.8476210990139762e-05, "loss": 0.231, "step": 8948000 }, { "epoch": 5.36, "learning_rate": 1.8474111024579195e-05, "loss": 0.2344, "step": 8948500 }, { "epoch": 5.37, "learning_rate": 1.8472011059018632e-05, "loss": 0.2247, "step": 8949000 }, { "epoch": 5.37, "learning_rate": 1.846991109345807e-05, "loss": 0.2231, "step": 8949500 }, { "epoch": 5.37, "learning_rate": 1.8467811127897502e-05, "loss": 0.2297, "step": 8950000 }, { "epoch": 5.37, "learning_rate": 1.846571536226806e-05, "loss": 0.229, "step": 8950500 }, { "epoch": 5.37, "learning_rate": 1.8463615396707493e-05, "loss": 0.2254, "step": 8951000 }, { "epoch": 5.37, "learning_rate": 1.846151543114693e-05, "loss": 0.2299, "step": 8951500 }, { "epoch": 5.37, "learning_rate": 1.8459415465586367e-05, "loss": 0.2294, "step": 8952000 }, { "epoch": 5.37, "learning_rate": 1.8457315500025797e-05, "loss": 0.2305, "step": 8952500 }, { "epoch": 5.37, "learning_rate": 1.8455215534465233e-05, "loss": 0.2307, "step": 8953000 }, { "epoch": 5.37, "learning_rate": 1.845311556890467e-05, "loss": 0.2325, "step": 8953500 }, { "epoch": 5.37, "learning_rate": 1.8451015603344107e-05, "loss": 0.2293, "step": 8954000 }, { "epoch": 5.37, "learning_rate": 1.844891983771466e-05, "loss": 0.2291, "step": 8954500 }, { "epoch": 5.37, "learning_rate": 1.8446819872154094e-05, "loss": 0.2284, "step": 8955000 }, { "epoch": 5.37, "learning_rate": 1.844471990659353e-05, "loss": 0.2273, "step": 8955500 }, { "epoch": 5.37, "learning_rate": 1.8442624140964088e-05, "loss": 0.2342, "step": 8956000 }, { "epoch": 5.37, "learning_rate": 1.8440524175403525e-05, "loss": 0.2266, "step": 8956500 }, { "epoch": 5.37, "learning_rate": 1.8438424209842958e-05, "loss": 0.2292, "step": 8957000 }, { "epoch": 5.37, "learning_rate": 1.8436324244282392e-05, "loss": 0.2288, "step": 8957500 }, { "epoch": 5.37, "learning_rate": 1.843422427872183e-05, "loss": 0.2265, "step": 8958000 }, { "epoch": 5.37, "learning_rate": 1.8432124313161265e-05, "loss": 0.2329, "step": 8958500 }, { "epoch": 5.37, "learning_rate": 1.8430028547531822e-05, "loss": 0.2358, "step": 8959000 }, { "epoch": 5.37, "learning_rate": 1.8427928581971252e-05, "loss": 0.2278, "step": 8959500 }, { "epoch": 5.37, "learning_rate": 1.842582861641069e-05, "loss": 0.2319, "step": 8960000 }, { "epoch": 5.37, "learning_rate": 1.8423728650850126e-05, "loss": 0.2285, "step": 8960500 }, { "epoch": 5.37, "learning_rate": 1.8421628685289563e-05, "loss": 0.2308, "step": 8961000 }, { "epoch": 5.37, "learning_rate": 1.841953291966012e-05, "loss": 0.2267, "step": 8961500 }, { "epoch": 5.37, "learning_rate": 1.841743295409955e-05, "loss": 0.2308, "step": 8962000 }, { "epoch": 5.37, "learning_rate": 1.8415332988538987e-05, "loss": 0.2285, "step": 8962500 }, { "epoch": 5.37, "learning_rate": 1.8413233022978424e-05, "loss": 0.2303, "step": 8963000 }, { "epoch": 5.37, "learning_rate": 1.8411133057417857e-05, "loss": 0.2278, "step": 8963500 }, { "epoch": 5.37, "learning_rate": 1.8409033091857294e-05, "loss": 0.2324, "step": 8964000 }, { "epoch": 5.37, "learning_rate": 1.8406937326227848e-05, "loss": 0.2291, "step": 8964500 }, { "epoch": 5.37, "learning_rate": 1.8404837360667284e-05, "loss": 0.23, "step": 8965000 }, { "epoch": 5.38, "learning_rate": 1.840273739510672e-05, "loss": 0.2264, "step": 8965500 }, { "epoch": 5.38, "learning_rate": 1.8400637429546155e-05, "loss": 0.2272, "step": 8966000 }, { "epoch": 5.38, "learning_rate": 1.839853746398559e-05, "loss": 0.2308, "step": 8966500 }, { "epoch": 5.38, "learning_rate": 1.8396437498425025e-05, "loss": 0.2285, "step": 8967000 }, { "epoch": 5.38, "learning_rate": 1.839433753286446e-05, "loss": 0.2267, "step": 8967500 }, { "epoch": 5.38, "learning_rate": 1.8392237567303895e-05, "loss": 0.2312, "step": 8968000 }, { "epoch": 5.38, "learning_rate": 1.8390141801674452e-05, "loss": 0.2275, "step": 8968500 }, { "epoch": 5.38, "learning_rate": 1.838804183611389e-05, "loss": 0.2288, "step": 8969000 }, { "epoch": 5.38, "learning_rate": 1.8385946070484443e-05, "loss": 0.2253, "step": 8969500 }, { "epoch": 5.38, "learning_rate": 1.838384610492388e-05, "loss": 0.2248, "step": 8970000 }, { "epoch": 5.38, "learning_rate": 1.8381746139363313e-05, "loss": 0.2229, "step": 8970500 }, { "epoch": 5.38, "learning_rate": 1.837964617380275e-05, "loss": 0.2274, "step": 8971000 }, { "epoch": 5.38, "learning_rate": 1.8377546208242187e-05, "loss": 0.2311, "step": 8971500 }, { "epoch": 5.38, "learning_rate": 1.837544624268162e-05, "loss": 0.2249, "step": 8972000 }, { "epoch": 5.38, "learning_rate": 1.8373346277121054e-05, "loss": 0.231, "step": 8972500 }, { "epoch": 5.38, "learning_rate": 1.837124631156049e-05, "loss": 0.2264, "step": 8973000 }, { "epoch": 5.38, "learning_rate": 1.8369150545931047e-05, "loss": 0.2283, "step": 8973500 }, { "epoch": 5.38, "learning_rate": 1.836705058037048e-05, "loss": 0.2263, "step": 8974000 }, { "epoch": 5.38, "learning_rate": 1.8364950614809914e-05, "loss": 0.229, "step": 8974500 }, { "epoch": 5.38, "learning_rate": 1.836285064924935e-05, "loss": 0.2307, "step": 8975000 }, { "epoch": 5.38, "learning_rate": 1.8360754883619908e-05, "loss": 0.2281, "step": 8975500 }, { "epoch": 5.38, "learning_rate": 1.8358659117990462e-05, "loss": 0.2262, "step": 8976000 }, { "epoch": 5.38, "learning_rate": 1.83565591524299e-05, "loss": 0.2318, "step": 8976500 }, { "epoch": 5.38, "learning_rate": 1.8354459186869335e-05, "loss": 0.234, "step": 8977000 }, { "epoch": 5.38, "learning_rate": 1.835235922130877e-05, "loss": 0.2276, "step": 8977500 }, { "epoch": 5.38, "learning_rate": 1.8350259255748206e-05, "loss": 0.2312, "step": 8978000 }, { "epoch": 5.38, "learning_rate": 1.8348159290187643e-05, "loss": 0.2253, "step": 8978500 }, { "epoch": 5.38, "learning_rate": 1.8346059324627076e-05, "loss": 0.2231, "step": 8979000 }, { "epoch": 5.38, "learning_rate": 1.834395935906651e-05, "loss": 0.2294, "step": 8979500 }, { "epoch": 5.38, "learning_rate": 1.8341863593437067e-05, "loss": 0.2254, "step": 8980000 }, { "epoch": 5.38, "learning_rate": 1.8339763627876503e-05, "loss": 0.2235, "step": 8980500 }, { "epoch": 5.38, "learning_rate": 1.833766366231594e-05, "loss": 0.2283, "step": 8981000 }, { "epoch": 5.38, "learning_rate": 1.833556369675537e-05, "loss": 0.2348, "step": 8981500 }, { "epoch": 5.39, "learning_rate": 1.833346793112593e-05, "loss": 0.2275, "step": 8982000 }, { "epoch": 5.39, "learning_rate": 1.8331367965565364e-05, "loss": 0.2275, "step": 8982500 }, { "epoch": 5.39, "learning_rate": 1.83292680000048e-05, "loss": 0.2313, "step": 8983000 }, { "epoch": 5.39, "learning_rate": 1.8327168034444234e-05, "loss": 0.2316, "step": 8983500 }, { "epoch": 5.39, "learning_rate": 1.832507646874591e-05, "loss": 0.2267, "step": 8984000 }, { "epoch": 5.39, "learning_rate": 1.832297650318535e-05, "loss": 0.2271, "step": 8984500 }, { "epoch": 5.39, "learning_rate": 1.8320876537624785e-05, "loss": 0.2326, "step": 8985000 }, { "epoch": 5.39, "learning_rate": 1.8318776572064215e-05, "loss": 0.2259, "step": 8985500 }, { "epoch": 5.39, "learning_rate": 1.8316676606503652e-05, "loss": 0.2275, "step": 8986000 }, { "epoch": 5.39, "learning_rate": 1.831458084087421e-05, "loss": 0.2301, "step": 8986500 }, { "epoch": 5.39, "learning_rate": 1.8312480875313646e-05, "loss": 0.2261, "step": 8987000 }, { "epoch": 5.39, "learning_rate": 1.831038090975308e-05, "loss": 0.2295, "step": 8987500 }, { "epoch": 5.39, "learning_rate": 1.8308280944192513e-05, "loss": 0.2307, "step": 8988000 }, { "epoch": 5.39, "learning_rate": 1.830618097863195e-05, "loss": 0.2301, "step": 8988500 }, { "epoch": 5.39, "learning_rate": 1.8304081013071387e-05, "loss": 0.2257, "step": 8989000 }, { "epoch": 5.39, "learning_rate": 1.830198104751082e-05, "loss": 0.2254, "step": 8989500 }, { "epoch": 5.39, "learning_rate": 1.8299881081950257e-05, "loss": 0.227, "step": 8990000 }, { "epoch": 5.39, "learning_rate": 1.829778531632081e-05, "loss": 0.2321, "step": 8990500 }, { "epoch": 5.39, "learning_rate": 1.8295685350760247e-05, "loss": 0.2282, "step": 8991000 }, { "epoch": 5.39, "learning_rate": 1.829358538519968e-05, "loss": 0.2271, "step": 8991500 }, { "epoch": 5.39, "learning_rate": 1.8291485419639118e-05, "loss": 0.2308, "step": 8992000 }, { "epoch": 5.39, "learning_rate": 1.8289385454078554e-05, "loss": 0.2253, "step": 8992500 }, { "epoch": 5.39, "learning_rate": 1.828728548851799e-05, "loss": 0.2289, "step": 8993000 }, { "epoch": 5.39, "learning_rate": 1.828518552295742e-05, "loss": 0.2378, "step": 8993500 }, { "epoch": 5.39, "learning_rate": 1.8283085557396858e-05, "loss": 0.2324, "step": 8994000 }, { "epoch": 5.39, "learning_rate": 1.8280989791767415e-05, "loss": 0.2264, "step": 8994500 }, { "epoch": 5.39, "learning_rate": 1.8278889826206852e-05, "loss": 0.2257, "step": 8995000 }, { "epoch": 5.39, "learning_rate": 1.8276789860646282e-05, "loss": 0.2248, "step": 8995500 }, { "epoch": 5.39, "learning_rate": 1.827468989508572e-05, "loss": 0.2272, "step": 8996000 }, { "epoch": 5.39, "learning_rate": 1.82725983293874e-05, "loss": 0.238, "step": 8996500 }, { "epoch": 5.39, "learning_rate": 1.8270498363826833e-05, "loss": 0.2258, "step": 8997000 }, { "epoch": 5.39, "learning_rate": 1.8268398398266266e-05, "loss": 0.225, "step": 8997500 }, { "epoch": 5.39, "learning_rate": 1.8266298432705703e-05, "loss": 0.2239, "step": 8998000 }, { "epoch": 5.39, "learning_rate": 1.8264198467145137e-05, "loss": 0.2353, "step": 8998500 }, { "epoch": 5.4, "learning_rate": 1.8262098501584573e-05, "loss": 0.2305, "step": 8999000 }, { "epoch": 5.4, "learning_rate": 1.826000273595513e-05, "loss": 0.2297, "step": 8999500 }, { "epoch": 5.4, "learning_rate": 1.8257902770394564e-05, "loss": 0.2279, "step": 9000000 }, { "epoch": 5.4, "eval_loss": 0.21521437168121338, "eval_runtime": 1529.0739, "eval_samples_per_second": 344.47, "eval_steps_per_second": 57.412, "step": 9000000 }, { "epoch": 5.4, "learning_rate": 1.8255802804834e-05, "loss": 0.2258, "step": 9000500 }, { "epoch": 5.4, "learning_rate": 1.8253702839273434e-05, "loss": 0.2265, "step": 9001000 }, { "epoch": 5.4, "learning_rate": 1.825160287371287e-05, "loss": 0.2331, "step": 9001500 }, { "epoch": 5.4, "learning_rate": 1.8249502908152308e-05, "loss": 0.2268, "step": 9002000 }, { "epoch": 5.4, "learning_rate": 1.824740294259174e-05, "loss": 0.2293, "step": 9002500 }, { "epoch": 5.4, "learning_rate": 1.8245302977031175e-05, "loss": 0.2288, "step": 9003000 }, { "epoch": 5.4, "learning_rate": 1.8243207211401732e-05, "loss": 0.2319, "step": 9003500 }, { "epoch": 5.4, "learning_rate": 1.824110724584117e-05, "loss": 0.2285, "step": 9004000 }, { "epoch": 5.4, "learning_rate": 1.8239007280280605e-05, "loss": 0.2268, "step": 9004500 }, { "epoch": 5.4, "learning_rate": 1.8236907314720035e-05, "loss": 0.229, "step": 9005000 }, { "epoch": 5.4, "learning_rate": 1.8234811549090596e-05, "loss": 0.2276, "step": 9005500 }, { "epoch": 5.4, "learning_rate": 1.823271158353003e-05, "loss": 0.228, "step": 9006000 }, { "epoch": 5.4, "learning_rate": 1.8230611617969466e-05, "loss": 0.2271, "step": 9006500 }, { "epoch": 5.4, "learning_rate": 1.8228511652408903e-05, "loss": 0.2314, "step": 9007000 }, { "epoch": 5.4, "learning_rate": 1.8226415886779457e-05, "loss": 0.2298, "step": 9007500 }, { "epoch": 5.4, "learning_rate": 1.822431592121889e-05, "loss": 0.234, "step": 9008000 }, { "epoch": 5.4, "learning_rate": 1.8222215955658327e-05, "loss": 0.2277, "step": 9008500 }, { "epoch": 5.4, "learning_rate": 1.8220115990097764e-05, "loss": 0.2288, "step": 9009000 }, { "epoch": 5.4, "learning_rate": 1.8218020224468317e-05, "loss": 0.2235, "step": 9009500 }, { "epoch": 5.4, "learning_rate": 1.8215920258907754e-05, "loss": 0.2274, "step": 9010000 }, { "epoch": 5.4, "learning_rate": 1.8213820293347188e-05, "loss": 0.2265, "step": 9010500 }, { "epoch": 5.4, "learning_rate": 1.8211720327786624e-05, "loss": 0.2247, "step": 9011000 }, { "epoch": 5.4, "learning_rate": 1.820962456215718e-05, "loss": 0.2355, "step": 9011500 }, { "epoch": 5.4, "learning_rate": 1.8207524596596615e-05, "loss": 0.2282, "step": 9012000 }, { "epoch": 5.4, "learning_rate": 1.8205428830967172e-05, "loss": 0.2274, "step": 9012500 }, { "epoch": 5.4, "learning_rate": 1.820332886540661e-05, "loss": 0.2292, "step": 9013000 }, { "epoch": 5.4, "learning_rate": 1.8201228899846042e-05, "loss": 0.2267, "step": 9013500 }, { "epoch": 5.4, "learning_rate": 1.819912893428548e-05, "loss": 0.2261, "step": 9014000 }, { "epoch": 5.4, "learning_rate": 1.8197028968724912e-05, "loss": 0.2235, "step": 9014500 }, { "epoch": 5.4, "learning_rate": 1.8194929003164346e-05, "loss": 0.2281, "step": 9015000 }, { "epoch": 5.41, "learning_rate": 1.8192833237534906e-05, "loss": 0.2279, "step": 9015500 }, { "epoch": 5.41, "learning_rate": 1.819073327197434e-05, "loss": 0.2273, "step": 9016000 }, { "epoch": 5.41, "learning_rate": 1.8188633306413773e-05, "loss": 0.2315, "step": 9016500 }, { "epoch": 5.41, "learning_rate": 1.818653334085321e-05, "loss": 0.2347, "step": 9017000 }, { "epoch": 5.41, "learning_rate": 1.8184433375292643e-05, "loss": 0.2273, "step": 9017500 }, { "epoch": 5.41, "learning_rate": 1.818233340973208e-05, "loss": 0.2255, "step": 9018000 }, { "epoch": 5.41, "learning_rate": 1.8180233444171517e-05, "loss": 0.222, "step": 9018500 }, { "epoch": 5.41, "learning_rate": 1.817813347861095e-05, "loss": 0.2222, "step": 9019000 }, { "epoch": 5.41, "learning_rate": 1.8176037712981508e-05, "loss": 0.2269, "step": 9019500 }, { "epoch": 5.41, "learning_rate": 1.817393774742094e-05, "loss": 0.2315, "step": 9020000 }, { "epoch": 5.41, "learning_rate": 1.8171837781860378e-05, "loss": 0.2257, "step": 9020500 }, { "epoch": 5.41, "learning_rate": 1.8169737816299815e-05, "loss": 0.2249, "step": 9021000 }, { "epoch": 5.41, "learning_rate": 1.816764205067037e-05, "loss": 0.2227, "step": 9021500 }, { "epoch": 5.41, "learning_rate": 1.8165542085109802e-05, "loss": 0.2301, "step": 9022000 }, { "epoch": 5.41, "learning_rate": 1.816344211954924e-05, "loss": 0.2256, "step": 9022500 }, { "epoch": 5.41, "learning_rate": 1.8161342153988675e-05, "loss": 0.2313, "step": 9023000 }, { "epoch": 5.41, "learning_rate": 1.815924218842811e-05, "loss": 0.2276, "step": 9023500 }, { "epoch": 5.41, "learning_rate": 1.8157142222867542e-05, "loss": 0.2227, "step": 9024000 }, { "epoch": 5.41, "learning_rate": 1.815504225730698e-05, "loss": 0.2236, "step": 9024500 }, { "epoch": 5.41, "learning_rate": 1.8152942291746416e-05, "loss": 0.231, "step": 9025000 }, { "epoch": 5.41, "learning_rate": 1.8150846526116973e-05, "loss": 0.2296, "step": 9025500 }, { "epoch": 5.41, "learning_rate": 1.814875076048753e-05, "loss": 0.2324, "step": 9026000 }, { "epoch": 5.41, "learning_rate": 1.8146650794926963e-05, "loss": 0.2367, "step": 9026500 }, { "epoch": 5.41, "learning_rate": 1.8144550829366397e-05, "loss": 0.2305, "step": 9027000 }, { "epoch": 5.41, "learning_rate": 1.8142455063736954e-05, "loss": 0.2273, "step": 9027500 }, { "epoch": 5.41, "learning_rate": 1.814035509817639e-05, "loss": 0.2297, "step": 9028000 }, { "epoch": 5.41, "learning_rate": 1.8138255132615824e-05, "loss": 0.2315, "step": 9028500 }, { "epoch": 5.41, "learning_rate": 1.8136155167055258e-05, "loss": 0.2241, "step": 9029000 }, { "epoch": 5.41, "learning_rate": 1.8134055201494695e-05, "loss": 0.2317, "step": 9029500 }, { "epoch": 5.41, "learning_rate": 1.813195523593413e-05, "loss": 0.2221, "step": 9030000 }, { "epoch": 5.41, "learning_rate": 1.8129855270373565e-05, "loss": 0.226, "step": 9030500 }, { "epoch": 5.41, "learning_rate": 1.8127755304813e-05, "loss": 0.2321, "step": 9031000 }, { "epoch": 5.41, "learning_rate": 1.8125655339252435e-05, "loss": 0.2258, "step": 9031500 }, { "epoch": 5.42, "learning_rate": 1.8123555373691872e-05, "loss": 0.2299, "step": 9032000 }, { "epoch": 5.42, "learning_rate": 1.8121455408131305e-05, "loss": 0.2321, "step": 9032500 }, { "epoch": 5.42, "learning_rate": 1.8119355442570742e-05, "loss": 0.2315, "step": 9033000 }, { "epoch": 5.42, "learning_rate": 1.81172596769413e-05, "loss": 0.2269, "step": 9033500 }, { "epoch": 5.42, "learning_rate": 1.8115159711380733e-05, "loss": 0.2273, "step": 9034000 }, { "epoch": 5.42, "learning_rate": 1.8113059745820166e-05, "loss": 0.2333, "step": 9034500 }, { "epoch": 5.42, "learning_rate": 1.8110959780259603e-05, "loss": 0.2299, "step": 9035000 }, { "epoch": 5.42, "learning_rate": 1.810886401463016e-05, "loss": 0.2261, "step": 9035500 }, { "epoch": 5.42, "learning_rate": 1.8106764049069593e-05, "loss": 0.2321, "step": 9036000 }, { "epoch": 5.42, "learning_rate": 1.810466408350903e-05, "loss": 0.2278, "step": 9036500 }, { "epoch": 5.42, "learning_rate": 1.8102568317879587e-05, "loss": 0.2277, "step": 9037000 }, { "epoch": 5.42, "learning_rate": 1.810046835231902e-05, "loss": 0.2274, "step": 9037500 }, { "epoch": 5.42, "learning_rate": 1.8098368386758458e-05, "loss": 0.2254, "step": 9038000 }, { "epoch": 5.42, "learning_rate": 1.809626842119789e-05, "loss": 0.2327, "step": 9038500 }, { "epoch": 5.42, "learning_rate": 1.8094168455637328e-05, "loss": 0.2269, "step": 9039000 }, { "epoch": 5.42, "learning_rate": 1.809206849007676e-05, "loss": 0.2268, "step": 9039500 }, { "epoch": 5.42, "learning_rate": 1.8089968524516198e-05, "loss": 0.2321, "step": 9040000 }, { "epoch": 5.42, "learning_rate": 1.8087868558955635e-05, "loss": 0.229, "step": 9040500 }, { "epoch": 5.42, "learning_rate": 1.808577279332619e-05, "loss": 0.2264, "step": 9041000 }, { "epoch": 5.42, "learning_rate": 1.8083672827765625e-05, "loss": 0.2319, "step": 9041500 }, { "epoch": 5.42, "learning_rate": 1.808157286220506e-05, "loss": 0.2259, "step": 9042000 }, { "epoch": 5.42, "learning_rate": 1.8079477096575616e-05, "loss": 0.2233, "step": 9042500 }, { "epoch": 5.42, "learning_rate": 1.8077377131015053e-05, "loss": 0.2272, "step": 9043000 }, { "epoch": 5.42, "learning_rate": 1.8075277165454486e-05, "loss": 0.2331, "step": 9043500 }, { "epoch": 5.42, "learning_rate": 1.807317719989392e-05, "loss": 0.2285, "step": 9044000 }, { "epoch": 5.42, "learning_rate": 1.8071077234333356e-05, "loss": 0.2274, "step": 9044500 }, { "epoch": 5.42, "learning_rate": 1.8068977268772793e-05, "loss": 0.2299, "step": 9045000 }, { "epoch": 5.42, "learning_rate": 1.8066881503143347e-05, "loss": 0.2284, "step": 9045500 }, { "epoch": 5.42, "learning_rate": 1.8064781537582784e-05, "loss": 0.2231, "step": 9046000 }, { "epoch": 5.42, "learning_rate": 1.8062681572022217e-05, "loss": 0.2361, "step": 9046500 }, { "epoch": 5.42, "learning_rate": 1.8060581606461654e-05, "loss": 0.2281, "step": 9047000 }, { "epoch": 5.42, "learning_rate": 1.805848164090109e-05, "loss": 0.2299, "step": 9047500 }, { "epoch": 5.42, "learning_rate": 1.8056381675340524e-05, "loss": 0.2305, "step": 9048000 }, { "epoch": 5.42, "learning_rate": 1.8054281709779958e-05, "loss": 0.2271, "step": 9048500 }, { "epoch": 5.43, "learning_rate": 1.8052181744219395e-05, "loss": 0.2318, "step": 9049000 }, { "epoch": 5.43, "learning_rate": 1.8050090178521072e-05, "loss": 0.2333, "step": 9049500 }, { "epoch": 5.43, "learning_rate": 1.804799021296051e-05, "loss": 0.2263, "step": 9050000 }, { "epoch": 5.43, "learning_rate": 1.8045890247399942e-05, "loss": 0.2229, "step": 9050500 }, { "epoch": 5.43, "learning_rate": 1.8043790281839375e-05, "loss": 0.2227, "step": 9051000 }, { "epoch": 5.43, "learning_rate": 1.8041690316278812e-05, "loss": 0.2307, "step": 9051500 }, { "epoch": 5.43, "learning_rate": 1.803959035071825e-05, "loss": 0.2247, "step": 9052000 }, { "epoch": 5.43, "learning_rate": 1.8037494585088806e-05, "loss": 0.2316, "step": 9052500 }, { "epoch": 5.43, "learning_rate": 1.803539461952824e-05, "loss": 0.2245, "step": 9053000 }, { "epoch": 5.43, "learning_rate": 1.8033294653967673e-05, "loss": 0.2296, "step": 9053500 }, { "epoch": 5.43, "learning_rate": 1.803119468840711e-05, "loss": 0.2297, "step": 9054000 }, { "epoch": 5.43, "learning_rate": 1.8029094722846547e-05, "loss": 0.2264, "step": 9054500 }, { "epoch": 5.43, "learning_rate": 1.80269989572171e-05, "loss": 0.2374, "step": 9055000 }, { "epoch": 5.43, "learning_rate": 1.8024898991656537e-05, "loss": 0.2251, "step": 9055500 }, { "epoch": 5.43, "learning_rate": 1.802279902609597e-05, "loss": 0.2301, "step": 9056000 }, { "epoch": 5.43, "learning_rate": 1.8020699060535407e-05, "loss": 0.2275, "step": 9056500 }, { "epoch": 5.43, "learning_rate": 1.8018599094974844e-05, "loss": 0.2279, "step": 9057000 }, { "epoch": 5.43, "learning_rate": 1.8016503329345398e-05, "loss": 0.2298, "step": 9057500 }, { "epoch": 5.43, "learning_rate": 1.801440336378483e-05, "loss": 0.2202, "step": 9058000 }, { "epoch": 5.43, "learning_rate": 1.8012303398224268e-05, "loss": 0.2289, "step": 9058500 }, { "epoch": 5.43, "learning_rate": 1.8010203432663705e-05, "loss": 0.2293, "step": 9059000 }, { "epoch": 5.43, "learning_rate": 1.800810346710314e-05, "loss": 0.2326, "step": 9059500 }, { "epoch": 5.43, "learning_rate": 1.8006003501542575e-05, "loss": 0.2301, "step": 9060000 }, { "epoch": 5.43, "learning_rate": 1.800390353598201e-05, "loss": 0.2299, "step": 9060500 }, { "epoch": 5.43, "learning_rate": 1.8001803570421446e-05, "loss": 0.2237, "step": 9061000 }, { "epoch": 5.43, "learning_rate": 1.7999707804792003e-05, "loss": 0.2234, "step": 9061500 }, { "epoch": 5.43, "learning_rate": 1.7997607839231436e-05, "loss": 0.2308, "step": 9062000 }, { "epoch": 5.43, "learning_rate": 1.7995507873670873e-05, "loss": 0.2276, "step": 9062500 }, { "epoch": 5.43, "learning_rate": 1.7993407908110306e-05, "loss": 0.2249, "step": 9063000 }, { "epoch": 5.43, "learning_rate": 1.7991312142480863e-05, "loss": 0.2263, "step": 9063500 }, { "epoch": 5.43, "learning_rate": 1.79892121769203e-05, "loss": 0.2327, "step": 9064000 }, { "epoch": 5.43, "learning_rate": 1.7987116411290857e-05, "loss": 0.2255, "step": 9064500 }, { "epoch": 5.43, "learning_rate": 1.7985016445730287e-05, "loss": 0.2286, "step": 9065000 }, { "epoch": 5.44, "learning_rate": 1.7982916480169724e-05, "loss": 0.2277, "step": 9065500 }, { "epoch": 5.44, "learning_rate": 1.798081651460916e-05, "loss": 0.2311, "step": 9066000 }, { "epoch": 5.44, "learning_rate": 1.7978716549048594e-05, "loss": 0.2342, "step": 9066500 }, { "epoch": 5.44, "learning_rate": 1.797662078341915e-05, "loss": 0.2257, "step": 9067000 }, { "epoch": 5.44, "learning_rate": 1.7974520817858585e-05, "loss": 0.2362, "step": 9067500 }, { "epoch": 5.44, "learning_rate": 1.797242085229802e-05, "loss": 0.2253, "step": 9068000 }, { "epoch": 5.44, "learning_rate": 1.797032088673746e-05, "loss": 0.2251, "step": 9068500 }, { "epoch": 5.44, "learning_rate": 1.7968220921176892e-05, "loss": 0.225, "step": 9069000 }, { "epoch": 5.44, "learning_rate": 1.796612095561633e-05, "loss": 0.2311, "step": 9069500 }, { "epoch": 5.44, "learning_rate": 1.7964025189986882e-05, "loss": 0.234, "step": 9070000 }, { "epoch": 5.44, "learning_rate": 1.796192522442632e-05, "loss": 0.232, "step": 9070500 }, { "epoch": 5.44, "learning_rate": 1.7959825258865756e-05, "loss": 0.2296, "step": 9071000 }, { "epoch": 5.44, "learning_rate": 1.795772529330519e-05, "loss": 0.2269, "step": 9071500 }, { "epoch": 5.44, "learning_rate": 1.7955625327744626e-05, "loss": 0.2295, "step": 9072000 }, { "epoch": 5.44, "learning_rate": 1.795352536218406e-05, "loss": 0.2281, "step": 9072500 }, { "epoch": 5.44, "learning_rate": 1.7951425396623493e-05, "loss": 0.2273, "step": 9073000 }, { "epoch": 5.44, "learning_rate": 1.794932543106293e-05, "loss": 0.2288, "step": 9073500 }, { "epoch": 5.44, "learning_rate": 1.7947229665433487e-05, "loss": 0.2258, "step": 9074000 }, { "epoch": 5.44, "learning_rate": 1.794512969987292e-05, "loss": 0.2301, "step": 9074500 }, { "epoch": 5.44, "learning_rate": 1.7943029734312357e-05, "loss": 0.2228, "step": 9075000 }, { "epoch": 5.44, "learning_rate": 1.794092976875179e-05, "loss": 0.2296, "step": 9075500 }, { "epoch": 5.44, "learning_rate": 1.7938834003122348e-05, "loss": 0.2334, "step": 9076000 }, { "epoch": 5.44, "learning_rate": 1.7936734037561785e-05, "loss": 0.2307, "step": 9076500 }, { "epoch": 5.44, "learning_rate": 1.7934634072001218e-05, "loss": 0.2269, "step": 9077000 }, { "epoch": 5.44, "learning_rate": 1.7932534106440655e-05, "loss": 0.2317, "step": 9077500 }, { "epoch": 5.44, "learning_rate": 1.7930438340811212e-05, "loss": 0.2345, "step": 9078000 }, { "epoch": 5.44, "learning_rate": 1.7928338375250645e-05, "loss": 0.2296, "step": 9078500 }, { "epoch": 5.44, "learning_rate": 1.79262426096212e-05, "loss": 0.2251, "step": 9079000 }, { "epoch": 5.44, "learning_rate": 1.7924142644060636e-05, "loss": 0.2232, "step": 9079500 }, { "epoch": 5.44, "learning_rate": 1.7922042678500073e-05, "loss": 0.2293, "step": 9080000 }, { "epoch": 5.44, "learning_rate": 1.791994271293951e-05, "loss": 0.2249, "step": 9080500 }, { "epoch": 5.44, "learning_rate": 1.7917842747378943e-05, "loss": 0.223, "step": 9081000 }, { "epoch": 5.44, "learning_rate": 1.791574278181838e-05, "loss": 0.2254, "step": 9081500 }, { "epoch": 5.45, "learning_rate": 1.7913647016188933e-05, "loss": 0.2245, "step": 9082000 }, { "epoch": 5.45, "learning_rate": 1.791154705062837e-05, "loss": 0.2283, "step": 9082500 }, { "epoch": 5.45, "learning_rate": 1.7909447085067804e-05, "loss": 0.2267, "step": 9083000 }, { "epoch": 5.45, "learning_rate": 1.790734711950724e-05, "loss": 0.233, "step": 9083500 }, { "epoch": 5.45, "learning_rate": 1.7905251353877794e-05, "loss": 0.23, "step": 9084000 }, { "epoch": 5.45, "learning_rate": 1.790315138831723e-05, "loss": 0.2241, "step": 9084500 }, { "epoch": 5.45, "learning_rate": 1.7901051422756668e-05, "loss": 0.2323, "step": 9085000 }, { "epoch": 5.45, "learning_rate": 1.78989514571961e-05, "loss": 0.2278, "step": 9085500 }, { "epoch": 5.45, "learning_rate": 1.7896851491635538e-05, "loss": 0.2267, "step": 9086000 }, { "epoch": 5.45, "learning_rate": 1.789475152607497e-05, "loss": 0.2257, "step": 9086500 }, { "epoch": 5.45, "learning_rate": 1.7892651560514405e-05, "loss": 0.2252, "step": 9087000 }, { "epoch": 5.45, "learning_rate": 1.7890551594953842e-05, "loss": 0.2248, "step": 9087500 }, { "epoch": 5.45, "learning_rate": 1.78884558293244e-05, "loss": 0.2227, "step": 9088000 }, { "epoch": 5.45, "learning_rate": 1.7886355863763836e-05, "loss": 0.2269, "step": 9088500 }, { "epoch": 5.45, "learning_rate": 1.788425589820327e-05, "loss": 0.2266, "step": 9089000 }, { "epoch": 5.45, "learning_rate": 1.7882155932642703e-05, "loss": 0.2309, "step": 9089500 }, { "epoch": 5.45, "learning_rate": 1.788006016701326e-05, "loss": 0.2276, "step": 9090000 }, { "epoch": 5.45, "learning_rate": 1.7877960201452696e-05, "loss": 0.2281, "step": 9090500 }, { "epoch": 5.45, "learning_rate": 1.7875860235892133e-05, "loss": 0.2266, "step": 9091000 }, { "epoch": 5.45, "learning_rate": 1.7873760270331567e-05, "loss": 0.2256, "step": 9091500 }, { "epoch": 5.45, "learning_rate": 1.7871668704633244e-05, "loss": 0.2307, "step": 9092000 }, { "epoch": 5.45, "learning_rate": 1.786956873907268e-05, "loss": 0.2278, "step": 9092500 }, { "epoch": 5.45, "learning_rate": 1.7867468773512114e-05, "loss": 0.2243, "step": 9093000 }, { "epoch": 5.45, "learning_rate": 1.786537300788267e-05, "loss": 0.2276, "step": 9093500 }, { "epoch": 5.45, "learning_rate": 1.7863273042322105e-05, "loss": 0.2276, "step": 9094000 }, { "epoch": 5.45, "learning_rate": 1.786117307676154e-05, "loss": 0.2303, "step": 9094500 }, { "epoch": 5.45, "learning_rate": 1.7859073111200978e-05, "loss": 0.2303, "step": 9095000 }, { "epoch": 5.45, "learning_rate": 1.7856973145640412e-05, "loss": 0.2303, "step": 9095500 }, { "epoch": 5.45, "learning_rate": 1.7854873180079845e-05, "loss": 0.2273, "step": 9096000 }, { "epoch": 5.45, "learning_rate": 1.7852773214519282e-05, "loss": 0.23, "step": 9096500 }, { "epoch": 5.45, "learning_rate": 1.7850673248958715e-05, "loss": 0.2254, "step": 9097000 }, { "epoch": 5.45, "learning_rate": 1.7848573283398152e-05, "loss": 0.2275, "step": 9097500 }, { "epoch": 5.45, "learning_rate": 1.784647331783759e-05, "loss": 0.2322, "step": 9098000 }, { "epoch": 5.45, "learning_rate": 1.7844377552208143e-05, "loss": 0.2332, "step": 9098500 }, { "epoch": 5.46, "learning_rate": 1.784227758664758e-05, "loss": 0.2306, "step": 9099000 }, { "epoch": 5.46, "learning_rate": 1.7840177621087013e-05, "loss": 0.2278, "step": 9099500 }, { "epoch": 5.46, "learning_rate": 1.783807765552645e-05, "loss": 0.2259, "step": 9100000 }, { "epoch": 5.46, "eval_loss": 0.21511751413345337, "eval_runtime": 1449.9249, "eval_samples_per_second": 363.274, "eval_steps_per_second": 60.546, "step": 9100000 }, { "epoch": 5.46, "learning_rate": 1.7835977689965887e-05, "loss": 0.2273, "step": 9100500 }, { "epoch": 5.46, "learning_rate": 1.783388192433644e-05, "loss": 0.2306, "step": 9101000 }, { "epoch": 5.46, "learning_rate": 1.7831781958775877e-05, "loss": 0.225, "step": 9101500 }, { "epoch": 5.46, "learning_rate": 1.782968199321531e-05, "loss": 0.2299, "step": 9102000 }, { "epoch": 5.46, "learning_rate": 1.7827582027654747e-05, "loss": 0.2315, "step": 9102500 }, { "epoch": 5.46, "learning_rate": 1.7825482062094184e-05, "loss": 0.2282, "step": 9103000 }, { "epoch": 5.46, "learning_rate": 1.7823382096533614e-05, "loss": 0.2315, "step": 9103500 }, { "epoch": 5.46, "learning_rate": 1.782128213097305e-05, "loss": 0.2259, "step": 9104000 }, { "epoch": 5.46, "learning_rate": 1.7819182165412488e-05, "loss": 0.2274, "step": 9104500 }, { "epoch": 5.46, "learning_rate": 1.7817086399783045e-05, "loss": 0.223, "step": 9105000 }, { "epoch": 5.46, "learning_rate": 1.78149906341536e-05, "loss": 0.228, "step": 9105500 }, { "epoch": 5.46, "learning_rate": 1.7812890668593035e-05, "loss": 0.2286, "step": 9106000 }, { "epoch": 5.46, "learning_rate": 1.781079070303247e-05, "loss": 0.2232, "step": 9106500 }, { "epoch": 5.46, "learning_rate": 1.7808690737471906e-05, "loss": 0.2358, "step": 9107000 }, { "epoch": 5.46, "learning_rate": 1.7806590771911343e-05, "loss": 0.2331, "step": 9107500 }, { "epoch": 5.46, "learning_rate": 1.7804490806350773e-05, "loss": 0.2283, "step": 9108000 }, { "epoch": 5.46, "learning_rate": 1.7802395040721333e-05, "loss": 0.2254, "step": 9108500 }, { "epoch": 5.46, "learning_rate": 1.7800295075160767e-05, "loss": 0.2336, "step": 9109000 }, { "epoch": 5.46, "learning_rate": 1.7798195109600203e-05, "loss": 0.2315, "step": 9109500 }, { "epoch": 5.46, "learning_rate": 1.779609514403964e-05, "loss": 0.2304, "step": 9110000 }, { "epoch": 5.46, "learning_rate": 1.779399517847907e-05, "loss": 0.2268, "step": 9110500 }, { "epoch": 5.46, "learning_rate": 1.7791899412849627e-05, "loss": 0.2255, "step": 9111000 }, { "epoch": 5.46, "learning_rate": 1.7789799447289064e-05, "loss": 0.2329, "step": 9111500 }, { "epoch": 5.46, "learning_rate": 1.77876994817285e-05, "loss": 0.2311, "step": 9112000 }, { "epoch": 5.46, "learning_rate": 1.7785599516167934e-05, "loss": 0.2259, "step": 9112500 }, { "epoch": 5.46, "learning_rate": 1.7783499550607368e-05, "loss": 0.2255, "step": 9113000 }, { "epoch": 5.46, "learning_rate": 1.7781399585046805e-05, "loss": 0.224, "step": 9113500 }, { "epoch": 5.46, "learning_rate": 1.777929961948624e-05, "loss": 0.225, "step": 9114000 }, { "epoch": 5.46, "learning_rate": 1.7777199653925675e-05, "loss": 0.2244, "step": 9114500 }, { "epoch": 5.46, "learning_rate": 1.7775108088227352e-05, "loss": 0.2271, "step": 9115000 }, { "epoch": 5.47, "learning_rate": 1.777300812266679e-05, "loss": 0.2306, "step": 9115500 }, { "epoch": 5.47, "learning_rate": 1.7770908157106222e-05, "loss": 0.2242, "step": 9116000 }, { "epoch": 5.47, "learning_rate": 1.776880819154566e-05, "loss": 0.2276, "step": 9116500 }, { "epoch": 5.47, "learning_rate": 1.7766708225985096e-05, "loss": 0.2274, "step": 9117000 }, { "epoch": 5.47, "learning_rate": 1.7764608260424526e-05, "loss": 0.2203, "step": 9117500 }, { "epoch": 5.47, "learning_rate": 1.7762508294863963e-05, "loss": 0.2292, "step": 9118000 }, { "epoch": 5.47, "learning_rate": 1.77604083293034e-05, "loss": 0.2261, "step": 9118500 }, { "epoch": 5.47, "learning_rate": 1.7758312563673957e-05, "loss": 0.2293, "step": 9119000 }, { "epoch": 5.47, "learning_rate": 1.7756212598113394e-05, "loss": 0.2256, "step": 9119500 }, { "epoch": 5.47, "learning_rate": 1.7754112632552824e-05, "loss": 0.2269, "step": 9120000 }, { "epoch": 5.47, "learning_rate": 1.775201686692338e-05, "loss": 0.2277, "step": 9120500 }, { "epoch": 5.47, "learning_rate": 1.7749916901362818e-05, "loss": 0.2291, "step": 9121000 }, { "epoch": 5.47, "learning_rate": 1.7747816935802254e-05, "loss": 0.223, "step": 9121500 }, { "epoch": 5.47, "learning_rate": 1.7745716970241688e-05, "loss": 0.2231, "step": 9122000 }, { "epoch": 5.47, "learning_rate": 1.774361700468112e-05, "loss": 0.2293, "step": 9122500 }, { "epoch": 5.47, "learning_rate": 1.7741517039120558e-05, "loss": 0.2327, "step": 9123000 }, { "epoch": 5.47, "learning_rate": 1.7739421273491115e-05, "loss": 0.2258, "step": 9123500 }, { "epoch": 5.47, "learning_rate": 1.7737321307930552e-05, "loss": 0.2222, "step": 9124000 }, { "epoch": 5.47, "learning_rate": 1.7735221342369982e-05, "loss": 0.2267, "step": 9124500 }, { "epoch": 5.47, "learning_rate": 1.773312137680942e-05, "loss": 0.2302, "step": 9125000 }, { "epoch": 5.47, "learning_rate": 1.7731021411248856e-05, "loss": 0.2231, "step": 9125500 }, { "epoch": 5.47, "learning_rate": 1.772892144568829e-05, "loss": 0.2268, "step": 9126000 }, { "epoch": 5.47, "learning_rate": 1.7726821480127726e-05, "loss": 0.2262, "step": 9126500 }, { "epoch": 5.47, "learning_rate": 1.7724721514567163e-05, "loss": 0.2301, "step": 9127000 }, { "epoch": 5.47, "learning_rate": 1.7722625748937716e-05, "loss": 0.2298, "step": 9127500 }, { "epoch": 5.47, "learning_rate": 1.7720525783377153e-05, "loss": 0.2282, "step": 9128000 }, { "epoch": 5.47, "learning_rate": 1.771843001774771e-05, "loss": 0.2295, "step": 9128500 }, { "epoch": 5.47, "learning_rate": 1.7716330052187144e-05, "loss": 0.2307, "step": 9129000 }, { "epoch": 5.47, "learning_rate": 1.7714230086626577e-05, "loss": 0.2307, "step": 9129500 }, { "epoch": 5.47, "learning_rate": 1.7712130121066014e-05, "loss": 0.2339, "step": 9130000 }, { "epoch": 5.47, "learning_rate": 1.771003015550545e-05, "loss": 0.2276, "step": 9130500 }, { "epoch": 5.47, "learning_rate": 1.7707930189944884e-05, "loss": 0.2267, "step": 9131000 }, { "epoch": 5.47, "learning_rate": 1.770583022438432e-05, "loss": 0.2292, "step": 9131500 }, { "epoch": 5.48, "learning_rate": 1.7703730258823758e-05, "loss": 0.2311, "step": 9132000 }, { "epoch": 5.48, "learning_rate": 1.770163449319431e-05, "loss": 0.228, "step": 9132500 }, { "epoch": 5.48, "learning_rate": 1.769953872756487e-05, "loss": 0.2258, "step": 9133000 }, { "epoch": 5.48, "learning_rate": 1.7697438762004305e-05, "loss": 0.224, "step": 9133500 }, { "epoch": 5.48, "learning_rate": 1.769533879644374e-05, "loss": 0.2286, "step": 9134000 }, { "epoch": 5.48, "learning_rate": 1.7693238830883172e-05, "loss": 0.2299, "step": 9134500 }, { "epoch": 5.48, "learning_rate": 1.769113886532261e-05, "loss": 0.2308, "step": 9135000 }, { "epoch": 5.48, "learning_rate": 1.7689043099693166e-05, "loss": 0.2294, "step": 9135500 }, { "epoch": 5.48, "learning_rate": 1.76869431341326e-05, "loss": 0.2296, "step": 9136000 }, { "epoch": 5.48, "learning_rate": 1.7684843168572033e-05, "loss": 0.2298, "step": 9136500 }, { "epoch": 5.48, "learning_rate": 1.768274320301147e-05, "loss": 0.2322, "step": 9137000 }, { "epoch": 5.48, "learning_rate": 1.7680647437382027e-05, "loss": 0.2317, "step": 9137500 }, { "epoch": 5.48, "learning_rate": 1.7678547471821464e-05, "loss": 0.2266, "step": 9138000 }, { "epoch": 5.48, "learning_rate": 1.7676447506260897e-05, "loss": 0.2275, "step": 9138500 }, { "epoch": 5.48, "learning_rate": 1.767434754070033e-05, "loss": 0.2268, "step": 9139000 }, { "epoch": 5.48, "learning_rate": 1.7672247575139767e-05, "loss": 0.2252, "step": 9139500 }, { "epoch": 5.48, "learning_rate": 1.7670151809510324e-05, "loss": 0.2265, "step": 9140000 }, { "epoch": 5.48, "learning_rate": 1.766805184394976e-05, "loss": 0.2288, "step": 9140500 }, { "epoch": 5.48, "learning_rate": 1.7665951878389195e-05, "loss": 0.23, "step": 9141000 }, { "epoch": 5.48, "learning_rate": 1.7663851912828628e-05, "loss": 0.2267, "step": 9141500 }, { "epoch": 5.48, "learning_rate": 1.7661751947268065e-05, "loss": 0.2269, "step": 9142000 }, { "epoch": 5.48, "learning_rate": 1.76596519817075e-05, "loss": 0.2277, "step": 9142500 }, { "epoch": 5.48, "learning_rate": 1.7657556216078055e-05, "loss": 0.2315, "step": 9143000 }, { "epoch": 5.48, "learning_rate": 1.7655456250517492e-05, "loss": 0.23, "step": 9143500 }, { "epoch": 5.48, "learning_rate": 1.7653356284956926e-05, "loss": 0.2275, "step": 9144000 }, { "epoch": 5.48, "learning_rate": 1.7651256319396363e-05, "loss": 0.225, "step": 9144500 }, { "epoch": 5.48, "learning_rate": 1.7649156353835796e-05, "loss": 0.2325, "step": 9145000 }, { "epoch": 5.48, "learning_rate": 1.7647060588206353e-05, "loss": 0.2258, "step": 9145500 }, { "epoch": 5.48, "learning_rate": 1.7644960622645787e-05, "loss": 0.235, "step": 9146000 }, { "epoch": 5.48, "learning_rate": 1.7642860657085223e-05, "loss": 0.2264, "step": 9146500 }, { "epoch": 5.48, "learning_rate": 1.7640760691524657e-05, "loss": 0.2273, "step": 9147000 }, { "epoch": 5.48, "learning_rate": 1.7638660725964094e-05, "loss": 0.2279, "step": 9147500 }, { "epoch": 5.48, "learning_rate": 1.763656076040353e-05, "loss": 0.2265, "step": 9148000 }, { "epoch": 5.48, "learning_rate": 1.7634460794842964e-05, "loss": 0.2252, "step": 9148500 }, { "epoch": 5.49, "learning_rate": 1.7632360829282397e-05, "loss": 0.2267, "step": 9149000 }, { "epoch": 5.49, "learning_rate": 1.7630265063652954e-05, "loss": 0.2243, "step": 9149500 }, { "epoch": 5.49, "learning_rate": 1.762816509809239e-05, "loss": 0.2263, "step": 9150000 }, { "epoch": 5.49, "learning_rate": 1.7626065132531828e-05, "loss": 0.2296, "step": 9150500 }, { "epoch": 5.49, "learning_rate": 1.762396516697126e-05, "loss": 0.2269, "step": 9151000 }, { "epoch": 5.49, "learning_rate": 1.762186940134182e-05, "loss": 0.2216, "step": 9151500 }, { "epoch": 5.49, "learning_rate": 1.7619769435781252e-05, "loss": 0.226, "step": 9152000 }, { "epoch": 5.49, "learning_rate": 1.761766947022069e-05, "loss": 0.2274, "step": 9152500 }, { "epoch": 5.49, "learning_rate": 1.7615573704591246e-05, "loss": 0.2239, "step": 9153000 }, { "epoch": 5.49, "learning_rate": 1.761347373903068e-05, "loss": 0.2231, "step": 9153500 }, { "epoch": 5.49, "learning_rate": 1.7611373773470113e-05, "loss": 0.2274, "step": 9154000 }, { "epoch": 5.49, "learning_rate": 1.760927380790955e-05, "loss": 0.2315, "step": 9154500 }, { "epoch": 5.49, "learning_rate": 1.7607173842348986e-05, "loss": 0.2288, "step": 9155000 }, { "epoch": 5.49, "learning_rate": 1.7605073876788423e-05, "loss": 0.2282, "step": 9155500 }, { "epoch": 5.49, "learning_rate": 1.7602973911227853e-05, "loss": 0.2308, "step": 9156000 }, { "epoch": 5.49, "learning_rate": 1.760087394566729e-05, "loss": 0.2291, "step": 9156500 }, { "epoch": 5.49, "learning_rate": 1.7598778180037847e-05, "loss": 0.2312, "step": 9157000 }, { "epoch": 5.49, "learning_rate": 1.7596682414408404e-05, "loss": 0.2217, "step": 9157500 }, { "epoch": 5.49, "learning_rate": 1.7594582448847838e-05, "loss": 0.2302, "step": 9158000 }, { "epoch": 5.49, "learning_rate": 1.7592482483287274e-05, "loss": 0.2296, "step": 9158500 }, { "epoch": 5.49, "learning_rate": 1.7590382517726708e-05, "loss": 0.2284, "step": 9159000 }, { "epoch": 5.49, "learning_rate": 1.7588282552166145e-05, "loss": 0.2261, "step": 9159500 }, { "epoch": 5.49, "learning_rate": 1.758618258660558e-05, "loss": 0.2292, "step": 9160000 }, { "epoch": 5.49, "learning_rate": 1.7584082621045015e-05, "loss": 0.2239, "step": 9160500 }, { "epoch": 5.49, "learning_rate": 1.758198685541557e-05, "loss": 0.2237, "step": 9161000 }, { "epoch": 5.49, "learning_rate": 1.7579886889855005e-05, "loss": 0.2266, "step": 9161500 }, { "epoch": 5.49, "learning_rate": 1.7577786924294442e-05, "loss": 0.2205, "step": 9162000 }, { "epoch": 5.49, "learning_rate": 1.757568695873388e-05, "loss": 0.2265, "step": 9162500 }, { "epoch": 5.49, "learning_rate": 1.757358699317331e-05, "loss": 0.225, "step": 9163000 }, { "epoch": 5.49, "learning_rate": 1.7571487027612746e-05, "loss": 0.2289, "step": 9163500 }, { "epoch": 5.49, "learning_rate": 1.7569387062052183e-05, "loss": 0.2285, "step": 9164000 }, { "epoch": 5.49, "learning_rate": 1.756729129642274e-05, "loss": 0.2318, "step": 9164500 }, { "epoch": 5.49, "learning_rate": 1.7565191330862173e-05, "loss": 0.2272, "step": 9165000 }, { "epoch": 5.5, "learning_rate": 1.7563091365301607e-05, "loss": 0.2255, "step": 9165500 }, { "epoch": 5.5, "learning_rate": 1.7560991399741044e-05, "loss": 0.2333, "step": 9166000 }, { "epoch": 5.5, "learning_rate": 1.755889143418048e-05, "loss": 0.2264, "step": 9166500 }, { "epoch": 5.5, "learning_rate": 1.7556795668551037e-05, "loss": 0.2283, "step": 9167000 }, { "epoch": 5.5, "learning_rate": 1.755469570299047e-05, "loss": 0.2235, "step": 9167500 }, { "epoch": 5.5, "learning_rate": 1.7552595737429904e-05, "loss": 0.2275, "step": 9168000 }, { "epoch": 5.5, "learning_rate": 1.755049577186934e-05, "loss": 0.2282, "step": 9168500 }, { "epoch": 5.5, "learning_rate": 1.7548395806308775e-05, "loss": 0.2254, "step": 9169000 }, { "epoch": 5.5, "learning_rate": 1.754629584074821e-05, "loss": 0.2308, "step": 9169500 }, { "epoch": 5.5, "learning_rate": 1.754420007511877e-05, "loss": 0.2291, "step": 9170000 }, { "epoch": 5.5, "learning_rate": 1.7542100109558202e-05, "loss": 0.2271, "step": 9170500 }, { "epoch": 5.5, "learning_rate": 1.754000014399764e-05, "loss": 0.227, "step": 9171000 }, { "epoch": 5.5, "learning_rate": 1.7537900178437072e-05, "loss": 0.2234, "step": 9171500 }, { "epoch": 5.5, "learning_rate": 1.753580021287651e-05, "loss": 0.2289, "step": 9172000 }, { "epoch": 5.5, "learning_rate": 1.7533700247315946e-05, "loss": 0.2264, "step": 9172500 }, { "epoch": 5.5, "learning_rate": 1.7531600281755376e-05, "loss": 0.2218, "step": 9173000 }, { "epoch": 5.5, "learning_rate": 1.7529500316194813e-05, "loss": 0.2275, "step": 9173500 }, { "epoch": 5.5, "learning_rate": 1.7527408750496493e-05, "loss": 0.2269, "step": 9174000 }, { "epoch": 5.5, "learning_rate": 1.7525308784935927e-05, "loss": 0.2294, "step": 9174500 }, { "epoch": 5.5, "learning_rate": 1.752320881937536e-05, "loss": 0.2254, "step": 9175000 }, { "epoch": 5.5, "learning_rate": 1.7521108853814797e-05, "loss": 0.2259, "step": 9175500 }, { "epoch": 5.5, "learning_rate": 1.751900888825423e-05, "loss": 0.224, "step": 9176000 }, { "epoch": 5.5, "learning_rate": 1.7516908922693667e-05, "loss": 0.2303, "step": 9176500 }, { "epoch": 5.5, "learning_rate": 1.7514808957133104e-05, "loss": 0.2264, "step": 9177000 }, { "epoch": 5.5, "learning_rate": 1.7512708991572538e-05, "loss": 0.2331, "step": 9177500 }, { "epoch": 5.5, "learning_rate": 1.7510613225943095e-05, "loss": 0.229, "step": 9178000 }, { "epoch": 5.5, "learning_rate": 1.750851746031365e-05, "loss": 0.2261, "step": 9178500 }, { "epoch": 5.5, "learning_rate": 1.7506417494753085e-05, "loss": 0.2315, "step": 9179000 }, { "epoch": 5.5, "learning_rate": 1.7504317529192522e-05, "loss": 0.2275, "step": 9179500 }, { "epoch": 5.5, "learning_rate": 1.7502217563631955e-05, "loss": 0.2263, "step": 9180000 }, { "epoch": 5.5, "learning_rate": 1.7500117598071392e-05, "loss": 0.2315, "step": 9180500 }, { "epoch": 5.5, "learning_rate": 1.7498017632510826e-05, "loss": 0.2253, "step": 9181000 }, { "epoch": 5.5, "learning_rate": 1.7495917666950263e-05, "loss": 0.2259, "step": 9181500 }, { "epoch": 5.5, "learning_rate": 1.74938177013897e-05, "loss": 0.2296, "step": 9182000 }, { "epoch": 5.51, "learning_rate": 1.7491721935760253e-05, "loss": 0.2331, "step": 9182500 }, { "epoch": 5.51, "learning_rate": 1.748962617013081e-05, "loss": 0.2286, "step": 9183000 }, { "epoch": 5.51, "learning_rate": 1.7487526204570247e-05, "loss": 0.2234, "step": 9183500 }, { "epoch": 5.51, "learning_rate": 1.748542623900968e-05, "loss": 0.2287, "step": 9184000 }, { "epoch": 5.51, "learning_rate": 1.7483326273449114e-05, "loss": 0.2211, "step": 9184500 }, { "epoch": 5.51, "learning_rate": 1.748122630788855e-05, "loss": 0.2259, "step": 9185000 }, { "epoch": 5.51, "learning_rate": 1.7479126342327984e-05, "loss": 0.2278, "step": 9185500 }, { "epoch": 5.51, "learning_rate": 1.747702637676742e-05, "loss": 0.226, "step": 9186000 }, { "epoch": 5.51, "learning_rate": 1.7474926411206858e-05, "loss": 0.225, "step": 9186500 }, { "epoch": 5.51, "learning_rate": 1.747283064557741e-05, "loss": 0.2268, "step": 9187000 }, { "epoch": 5.51, "learning_rate": 1.7470730680016848e-05, "loss": 0.2221, "step": 9187500 }, { "epoch": 5.51, "learning_rate": 1.746863071445628e-05, "loss": 0.2299, "step": 9188000 }, { "epoch": 5.51, "learning_rate": 1.746653494882684e-05, "loss": 0.2276, "step": 9188500 }, { "epoch": 5.51, "learning_rate": 1.7464434983266275e-05, "loss": 0.2288, "step": 9189000 }, { "epoch": 5.51, "learning_rate": 1.746233501770571e-05, "loss": 0.2248, "step": 9189500 }, { "epoch": 5.51, "learning_rate": 1.7460235052145142e-05, "loss": 0.2266, "step": 9190000 }, { "epoch": 5.51, "learning_rate": 1.7458139286515703e-05, "loss": 0.2194, "step": 9190500 }, { "epoch": 5.51, "learning_rate": 1.7456039320955136e-05, "loss": 0.2258, "step": 9191000 }, { "epoch": 5.51, "learning_rate": 1.7453939355394573e-05, "loss": 0.2297, "step": 9191500 }, { "epoch": 5.51, "learning_rate": 1.7451839389834006e-05, "loss": 0.2281, "step": 9192000 }, { "epoch": 5.51, "learning_rate": 1.744973942427344e-05, "loss": 0.2201, "step": 9192500 }, { "epoch": 5.51, "learning_rate": 1.7447639458712877e-05, "loss": 0.2276, "step": 9193000 }, { "epoch": 5.51, "learning_rate": 1.7445543693083434e-05, "loss": 0.2317, "step": 9193500 }, { "epoch": 5.51, "learning_rate": 1.7443443727522867e-05, "loss": 0.2304, "step": 9194000 }, { "epoch": 5.51, "learning_rate": 1.7441343761962304e-05, "loss": 0.2218, "step": 9194500 }, { "epoch": 5.51, "learning_rate": 1.7439243796401737e-05, "loss": 0.2258, "step": 9195000 }, { "epoch": 5.51, "learning_rate": 1.7437143830841174e-05, "loss": 0.2267, "step": 9195500 }, { "epoch": 5.51, "learning_rate": 1.743504806521173e-05, "loss": 0.221, "step": 9196000 }, { "epoch": 5.51, "learning_rate": 1.7432948099651165e-05, "loss": 0.227, "step": 9196500 }, { "epoch": 5.51, "learning_rate": 1.7430848134090598e-05, "loss": 0.2355, "step": 9197000 }, { "epoch": 5.51, "learning_rate": 1.7428748168530035e-05, "loss": 0.2285, "step": 9197500 }, { "epoch": 5.51, "learning_rate": 1.7426648202969472e-05, "loss": 0.2323, "step": 9198000 }, { "epoch": 5.51, "learning_rate": 1.742454823740891e-05, "loss": 0.2279, "step": 9198500 }, { "epoch": 5.52, "learning_rate": 1.7422448271848342e-05, "loss": 0.226, "step": 9199000 }, { "epoch": 5.52, "learning_rate": 1.7420348306287776e-05, "loss": 0.2282, "step": 9199500 }, { "epoch": 5.52, "learning_rate": 1.7418252540658333e-05, "loss": 0.2255, "step": 9200000 }, { "epoch": 5.52, "eval_loss": 0.2149570733308792, "eval_runtime": 1448.7344, "eval_samples_per_second": 363.573, "eval_steps_per_second": 60.596, "step": 9200000 }, { "epoch": 5.52, "learning_rate": 1.741615257509777e-05, "loss": 0.2224, "step": 9200500 }, { "epoch": 5.52, "learning_rate": 1.7414052609537203e-05, "loss": 0.2263, "step": 9201000 }, { "epoch": 5.52, "learning_rate": 1.741195264397664e-05, "loss": 0.2322, "step": 9201500 }, { "epoch": 5.52, "learning_rate": 1.7409861078278317e-05, "loss": 0.233, "step": 9202000 }, { "epoch": 5.52, "learning_rate": 1.740776111271775e-05, "loss": 0.2304, "step": 9202500 }, { "epoch": 5.52, "learning_rate": 1.7405661147157187e-05, "loss": 0.2342, "step": 9203000 }, { "epoch": 5.52, "learning_rate": 1.7403561181596624e-05, "loss": 0.2314, "step": 9203500 }, { "epoch": 5.52, "learning_rate": 1.7401461216036054e-05, "loss": 0.2282, "step": 9204000 }, { "epoch": 5.52, "learning_rate": 1.739936125047549e-05, "loss": 0.2271, "step": 9204500 }, { "epoch": 5.52, "learning_rate": 1.7397261284914928e-05, "loss": 0.2249, "step": 9205000 }, { "epoch": 5.52, "learning_rate": 1.7395165519285485e-05, "loss": 0.2303, "step": 9205500 }, { "epoch": 5.52, "learning_rate": 1.7393065553724918e-05, "loss": 0.2239, "step": 9206000 }, { "epoch": 5.52, "learning_rate": 1.739096558816435e-05, "loss": 0.2239, "step": 9206500 }, { "epoch": 5.52, "learning_rate": 1.738886562260379e-05, "loss": 0.2252, "step": 9207000 }, { "epoch": 5.52, "learning_rate": 1.7386765657043225e-05, "loss": 0.23, "step": 9207500 }, { "epoch": 5.52, "learning_rate": 1.738466569148266e-05, "loss": 0.2299, "step": 9208000 }, { "epoch": 5.52, "learning_rate": 1.7382565725922096e-05, "loss": 0.228, "step": 9208500 }, { "epoch": 5.52, "learning_rate": 1.738046576036153e-05, "loss": 0.2288, "step": 9209000 }, { "epoch": 5.52, "learning_rate": 1.7378369994732086e-05, "loss": 0.2284, "step": 9209500 }, { "epoch": 5.52, "learning_rate": 1.7376270029171523e-05, "loss": 0.2247, "step": 9210000 }, { "epoch": 5.52, "learning_rate": 1.7374170063610956e-05, "loss": 0.2274, "step": 9210500 }, { "epoch": 5.52, "learning_rate": 1.7372070098050393e-05, "loss": 0.2242, "step": 9211000 }, { "epoch": 5.52, "learning_rate": 1.736997853235207e-05, "loss": 0.2299, "step": 9211500 }, { "epoch": 5.52, "learning_rate": 1.7367878566791504e-05, "loss": 0.2274, "step": 9212000 }, { "epoch": 5.52, "learning_rate": 1.736577860123094e-05, "loss": 0.2295, "step": 9212500 }, { "epoch": 5.52, "learning_rate": 1.7363678635670377e-05, "loss": 0.2227, "step": 9213000 }, { "epoch": 5.52, "learning_rate": 1.7361578670109808e-05, "loss": 0.2259, "step": 9213500 }, { "epoch": 5.52, "learning_rate": 1.7359478704549244e-05, "loss": 0.2245, "step": 9214000 }, { "epoch": 5.52, "learning_rate": 1.735737873898868e-05, "loss": 0.2237, "step": 9214500 }, { "epoch": 5.52, "learning_rate": 1.7355278773428115e-05, "loss": 0.2264, "step": 9215000 }, { "epoch": 5.53, "learning_rate": 1.735318300779867e-05, "loss": 0.2286, "step": 9215500 }, { "epoch": 5.53, "learning_rate": 1.7351083042238105e-05, "loss": 0.2302, "step": 9216000 }, { "epoch": 5.53, "learning_rate": 1.7348983076677542e-05, "loss": 0.2241, "step": 9216500 }, { "epoch": 5.53, "learning_rate": 1.734688311111698e-05, "loss": 0.2306, "step": 9217000 }, { "epoch": 5.53, "learning_rate": 1.7344787345487536e-05, "loss": 0.2287, "step": 9217500 }, { "epoch": 5.53, "learning_rate": 1.734268737992697e-05, "loss": 0.2267, "step": 9218000 }, { "epoch": 5.53, "learning_rate": 1.7340587414366403e-05, "loss": 0.225, "step": 9218500 }, { "epoch": 5.53, "learning_rate": 1.733848744880584e-05, "loss": 0.2279, "step": 9219000 }, { "epoch": 5.53, "learning_rate": 1.7336387483245276e-05, "loss": 0.2308, "step": 9219500 }, { "epoch": 5.53, "learning_rate": 1.733428751768471e-05, "loss": 0.2275, "step": 9220000 }, { "epoch": 5.53, "learning_rate": 1.7332187552124147e-05, "loss": 0.2227, "step": 9220500 }, { "epoch": 5.53, "learning_rate": 1.733008758656358e-05, "loss": 0.226, "step": 9221000 }, { "epoch": 5.53, "learning_rate": 1.7327991820934137e-05, "loss": 0.23, "step": 9221500 }, { "epoch": 5.53, "learning_rate": 1.732589185537357e-05, "loss": 0.2255, "step": 9222000 }, { "epoch": 5.53, "learning_rate": 1.7323791889813007e-05, "loss": 0.2321, "step": 9222500 }, { "epoch": 5.53, "learning_rate": 1.732169192425244e-05, "loss": 0.2298, "step": 9223000 }, { "epoch": 5.53, "learning_rate": 1.7319600358554118e-05, "loss": 0.2262, "step": 9223500 }, { "epoch": 5.53, "learning_rate": 1.7317500392993555e-05, "loss": 0.2302, "step": 9224000 }, { "epoch": 5.53, "learning_rate": 1.731540042743299e-05, "loss": 0.2281, "step": 9224500 }, { "epoch": 5.53, "learning_rate": 1.7313300461872425e-05, "loss": 0.2242, "step": 9225000 }, { "epoch": 5.53, "learning_rate": 1.731120049631186e-05, "loss": 0.2265, "step": 9225500 }, { "epoch": 5.53, "learning_rate": 1.7309100530751295e-05, "loss": 0.2297, "step": 9226000 }, { "epoch": 5.53, "learning_rate": 1.7307000565190732e-05, "loss": 0.2227, "step": 9226500 }, { "epoch": 5.53, "learning_rate": 1.7304900599630166e-05, "loss": 0.2266, "step": 9227000 }, { "epoch": 5.53, "learning_rate": 1.730280483400072e-05, "loss": 0.2251, "step": 9227500 }, { "epoch": 5.53, "learning_rate": 1.730070906837128e-05, "loss": 0.2236, "step": 9228000 }, { "epoch": 5.53, "learning_rate": 1.7298609102810713e-05, "loss": 0.2273, "step": 9228500 }, { "epoch": 5.53, "learning_rate": 1.729650913725015e-05, "loss": 0.228, "step": 9229000 }, { "epoch": 5.53, "learning_rate": 1.7294413371620703e-05, "loss": 0.2276, "step": 9229500 }, { "epoch": 5.53, "learning_rate": 1.729231340606014e-05, "loss": 0.2223, "step": 9230000 }, { "epoch": 5.53, "learning_rate": 1.7290213440499574e-05, "loss": 0.2326, "step": 9230500 }, { "epoch": 5.53, "learning_rate": 1.728811347493901e-05, "loss": 0.2261, "step": 9231000 }, { "epoch": 5.53, "learning_rate": 1.7286013509378447e-05, "loss": 0.2292, "step": 9231500 }, { "epoch": 5.53, "learning_rate": 1.728391354381788e-05, "loss": 0.2268, "step": 9232000 }, { "epoch": 5.54, "learning_rate": 1.7281813578257314e-05, "loss": 0.2313, "step": 9232500 }, { "epoch": 5.54, "learning_rate": 1.727971361269675e-05, "loss": 0.2302, "step": 9233000 }, { "epoch": 5.54, "learning_rate": 1.7277613647136188e-05, "loss": 0.2315, "step": 9233500 }, { "epoch": 5.54, "learning_rate": 1.727551368157562e-05, "loss": 0.224, "step": 9234000 }, { "epoch": 5.54, "learning_rate": 1.727341371601506e-05, "loss": 0.2248, "step": 9234500 }, { "epoch": 5.54, "learning_rate": 1.7271313750454492e-05, "loss": 0.2288, "step": 9235000 }, { "epoch": 5.54, "learning_rate": 1.726921798482505e-05, "loss": 0.2279, "step": 9235500 }, { "epoch": 5.54, "learning_rate": 1.7267118019264482e-05, "loss": 0.2269, "step": 9236000 }, { "epoch": 5.54, "learning_rate": 1.7265022253635043e-05, "loss": 0.2241, "step": 9236500 }, { "epoch": 5.54, "learning_rate": 1.7262922288074473e-05, "loss": 0.2233, "step": 9237000 }, { "epoch": 5.54, "learning_rate": 1.726082232251391e-05, "loss": 0.2298, "step": 9237500 }, { "epoch": 5.54, "learning_rate": 1.7258722356953346e-05, "loss": 0.2255, "step": 9238000 }, { "epoch": 5.54, "learning_rate": 1.725662239139278e-05, "loss": 0.2297, "step": 9238500 }, { "epoch": 5.54, "learning_rate": 1.7254522425832217e-05, "loss": 0.2304, "step": 9239000 }, { "epoch": 5.54, "learning_rate": 1.7252422460271654e-05, "loss": 0.2256, "step": 9239500 }, { "epoch": 5.54, "learning_rate": 1.7250322494711087e-05, "loss": 0.2229, "step": 9240000 }, { "epoch": 5.54, "learning_rate": 1.7248226729081644e-05, "loss": 0.2262, "step": 9240500 }, { "epoch": 5.54, "learning_rate": 1.72461309634522e-05, "loss": 0.2305, "step": 9241000 }, { "epoch": 5.54, "learning_rate": 1.7244030997891634e-05, "loss": 0.225, "step": 9241500 }, { "epoch": 5.54, "learning_rate": 1.7241931032331068e-05, "loss": 0.2299, "step": 9242000 }, { "epoch": 5.54, "learning_rate": 1.7239831066770505e-05, "loss": 0.2356, "step": 9242500 }, { "epoch": 5.54, "learning_rate": 1.7237731101209938e-05, "loss": 0.2286, "step": 9243000 }, { "epoch": 5.54, "learning_rate": 1.7235631135649375e-05, "loss": 0.2274, "step": 9243500 }, { "epoch": 5.54, "learning_rate": 1.7233531170088812e-05, "loss": 0.2302, "step": 9244000 }, { "epoch": 5.54, "learning_rate": 1.7231435404459365e-05, "loss": 0.2256, "step": 9244500 }, { "epoch": 5.54, "learning_rate": 1.7229335438898802e-05, "loss": 0.2302, "step": 9245000 }, { "epoch": 5.54, "learning_rate": 1.7227235473338236e-05, "loss": 0.2247, "step": 9245500 }, { "epoch": 5.54, "learning_rate": 1.7225135507777673e-05, "loss": 0.2273, "step": 9246000 }, { "epoch": 5.54, "learning_rate": 1.722303554221711e-05, "loss": 0.2274, "step": 9246500 }, { "epoch": 5.54, "learning_rate": 1.7220935576656543e-05, "loss": 0.2264, "step": 9247000 }, { "epoch": 5.54, "learning_rate": 1.7218835611095976e-05, "loss": 0.2259, "step": 9247500 }, { "epoch": 5.54, "learning_rate": 1.7216735645535413e-05, "loss": 0.2298, "step": 9248000 }, { "epoch": 5.54, "learning_rate": 1.721464407983709e-05, "loss": 0.2315, "step": 9248500 }, { "epoch": 5.55, "learning_rate": 1.7212548314207647e-05, "loss": 0.2248, "step": 9249000 }, { "epoch": 5.55, "learning_rate": 1.721044834864708e-05, "loss": 0.2269, "step": 9249500 }, { "epoch": 5.55, "learning_rate": 1.7208348383086518e-05, "loss": 0.2308, "step": 9250000 }, { "epoch": 5.55, "learning_rate": 1.7206248417525954e-05, "loss": 0.2292, "step": 9250500 }, { "epoch": 5.55, "learning_rate": 1.7204148451965388e-05, "loss": 0.232, "step": 9251000 }, { "epoch": 5.55, "learning_rate": 1.720204848640482e-05, "loss": 0.2237, "step": 9251500 }, { "epoch": 5.55, "learning_rate": 1.7199948520844258e-05, "loss": 0.2264, "step": 9252000 }, { "epoch": 5.55, "learning_rate": 1.719784855528369e-05, "loss": 0.2258, "step": 9252500 }, { "epoch": 5.55, "learning_rate": 1.719574858972313e-05, "loss": 0.2229, "step": 9253000 }, { "epoch": 5.55, "learning_rate": 1.7193648624162565e-05, "loss": 0.2287, "step": 9253500 }, { "epoch": 5.55, "learning_rate": 1.7191548658602e-05, "loss": 0.2296, "step": 9254000 }, { "epoch": 5.55, "learning_rate": 1.7189448693041432e-05, "loss": 0.2285, "step": 9254500 }, { "epoch": 5.55, "learning_rate": 1.718735292741199e-05, "loss": 0.2275, "step": 9255000 }, { "epoch": 5.55, "learning_rate": 1.7185252961851426e-05, "loss": 0.234, "step": 9255500 }, { "epoch": 5.55, "learning_rate": 1.7183152996290863e-05, "loss": 0.2293, "step": 9256000 }, { "epoch": 5.55, "learning_rate": 1.7181053030730293e-05, "loss": 0.2257, "step": 9256500 }, { "epoch": 5.55, "learning_rate": 1.7178957265100853e-05, "loss": 0.227, "step": 9257000 }, { "epoch": 5.55, "learning_rate": 1.717686149947141e-05, "loss": 0.2333, "step": 9257500 }, { "epoch": 5.55, "learning_rate": 1.7174761533910844e-05, "loss": 0.2261, "step": 9258000 }, { "epoch": 5.55, "learning_rate": 1.7172661568350277e-05, "loss": 0.2244, "step": 9258500 }, { "epoch": 5.55, "learning_rate": 1.7170561602789714e-05, "loss": 0.2273, "step": 9259000 }, { "epoch": 5.55, "learning_rate": 1.7168461637229148e-05, "loss": 0.225, "step": 9259500 }, { "epoch": 5.55, "learning_rate": 1.7166365871599708e-05, "loss": 0.227, "step": 9260000 }, { "epoch": 5.55, "learning_rate": 1.716426590603914e-05, "loss": 0.2329, "step": 9260500 }, { "epoch": 5.55, "learning_rate": 1.7162165940478575e-05, "loss": 0.229, "step": 9261000 }, { "epoch": 5.55, "learning_rate": 1.716006597491801e-05, "loss": 0.2261, "step": 9261500 }, { "epoch": 5.55, "learning_rate": 1.7157966009357445e-05, "loss": 0.2319, "step": 9262000 }, { "epoch": 5.55, "learning_rate": 1.7155866043796882e-05, "loss": 0.2279, "step": 9262500 }, { "epoch": 5.55, "learning_rate": 1.715376607823632e-05, "loss": 0.2258, "step": 9263000 }, { "epoch": 5.55, "learning_rate": 1.7151670312606872e-05, "loss": 0.2245, "step": 9263500 }, { "epoch": 5.55, "learning_rate": 1.714957034704631e-05, "loss": 0.2276, "step": 9264000 }, { "epoch": 5.55, "learning_rate": 1.7147470381485743e-05, "loss": 0.224, "step": 9264500 }, { "epoch": 5.55, "learning_rate": 1.714537041592518e-05, "loss": 0.2211, "step": 9265000 }, { "epoch": 5.56, "learning_rate": 1.7143270450364616e-05, "loss": 0.2276, "step": 9265500 }, { "epoch": 5.56, "learning_rate": 1.7141170484804046e-05, "loss": 0.2252, "step": 9266000 }, { "epoch": 5.56, "learning_rate": 1.7139070519243483e-05, "loss": 0.231, "step": 9266500 }, { "epoch": 5.56, "learning_rate": 1.713697055368292e-05, "loss": 0.2262, "step": 9267000 }, { "epoch": 5.56, "learning_rate": 1.7134874788053477e-05, "loss": 0.2272, "step": 9267500 }, { "epoch": 5.56, "learning_rate": 1.713277482249291e-05, "loss": 0.2245, "step": 9268000 }, { "epoch": 5.56, "learning_rate": 1.7130674856932344e-05, "loss": 0.2236, "step": 9268500 }, { "epoch": 5.56, "learning_rate": 1.712857489137178e-05, "loss": 0.2233, "step": 9269000 }, { "epoch": 5.56, "learning_rate": 1.7126479125742338e-05, "loss": 0.2258, "step": 9269500 }, { "epoch": 5.56, "learning_rate": 1.7124379160181775e-05, "loss": 0.2282, "step": 9270000 }, { "epoch": 5.56, "learning_rate": 1.7122279194621208e-05, "loss": 0.2336, "step": 9270500 }, { "epoch": 5.56, "learning_rate": 1.7120183428991765e-05, "loss": 0.2304, "step": 9271000 }, { "epoch": 5.56, "learning_rate": 1.71180834634312e-05, "loss": 0.2272, "step": 9271500 }, { "epoch": 5.56, "learning_rate": 1.7115983497870635e-05, "loss": 0.2249, "step": 9272000 }, { "epoch": 5.56, "learning_rate": 1.7113883532310072e-05, "loss": 0.2272, "step": 9272500 }, { "epoch": 5.56, "learning_rate": 1.7111783566749506e-05, "loss": 0.2249, "step": 9273000 }, { "epoch": 5.56, "learning_rate": 1.710968360118894e-05, "loss": 0.2273, "step": 9273500 }, { "epoch": 5.56, "learning_rate": 1.7107583635628376e-05, "loss": 0.2266, "step": 9274000 }, { "epoch": 5.56, "learning_rate": 1.710548367006781e-05, "loss": 0.2243, "step": 9274500 }, { "epoch": 5.56, "learning_rate": 1.7103387904438366e-05, "loss": 0.2246, "step": 9275000 }, { "epoch": 5.56, "learning_rate": 1.7101292138808923e-05, "loss": 0.2306, "step": 9275500 }, { "epoch": 5.56, "learning_rate": 1.7099192173248357e-05, "loss": 0.2266, "step": 9276000 }, { "epoch": 5.56, "learning_rate": 1.7097092207687794e-05, "loss": 0.2262, "step": 9276500 }, { "epoch": 5.56, "learning_rate": 1.709499224212723e-05, "loss": 0.2273, "step": 9277000 }, { "epoch": 5.56, "learning_rate": 1.7092896476497784e-05, "loss": 0.2287, "step": 9277500 }, { "epoch": 5.56, "learning_rate": 1.709079651093722e-05, "loss": 0.228, "step": 9278000 }, { "epoch": 5.56, "learning_rate": 1.7088696545376654e-05, "loss": 0.2283, "step": 9278500 }, { "epoch": 5.56, "learning_rate": 1.708659657981609e-05, "loss": 0.2281, "step": 9279000 }, { "epoch": 5.56, "learning_rate": 1.7084496614255528e-05, "loss": 0.2254, "step": 9279500 }, { "epoch": 5.56, "learning_rate": 1.708239664869496e-05, "loss": 0.2336, "step": 9280000 }, { "epoch": 5.56, "learning_rate": 1.7080296683134395e-05, "loss": 0.2269, "step": 9280500 }, { "epoch": 5.56, "learning_rate": 1.7078196717573832e-05, "loss": 0.2223, "step": 9281000 }, { "epoch": 5.56, "learning_rate": 1.707610095194439e-05, "loss": 0.2234, "step": 9281500 }, { "epoch": 5.56, "learning_rate": 1.7074000986383826e-05, "loss": 0.2229, "step": 9282000 }, { "epoch": 5.57, "learning_rate": 1.707190102082326e-05, "loss": 0.2273, "step": 9282500 }, { "epoch": 5.57, "learning_rate": 1.7069801055262693e-05, "loss": 0.2231, "step": 9283000 }, { "epoch": 5.57, "learning_rate": 1.706770528963325e-05, "loss": 0.2254, "step": 9283500 }, { "epoch": 5.57, "learning_rate": 1.7065605324072686e-05, "loss": 0.2253, "step": 9284000 }, { "epoch": 5.57, "learning_rate": 1.706350535851212e-05, "loss": 0.2291, "step": 9284500 }, { "epoch": 5.57, "learning_rate": 1.7061405392951553e-05, "loss": 0.2209, "step": 9285000 }, { "epoch": 5.57, "learning_rate": 1.705930542739099e-05, "loss": 0.227, "step": 9285500 }, { "epoch": 5.57, "learning_rate": 1.7057205461830427e-05, "loss": 0.2303, "step": 9286000 }, { "epoch": 5.57, "learning_rate": 1.705510549626986e-05, "loss": 0.2203, "step": 9286500 }, { "epoch": 5.57, "learning_rate": 1.7053005530709297e-05, "loss": 0.2285, "step": 9287000 }, { "epoch": 5.57, "learning_rate": 1.705090976507985e-05, "loss": 0.2257, "step": 9287500 }, { "epoch": 5.57, "learning_rate": 1.7048813999450408e-05, "loss": 0.232, "step": 9288000 }, { "epoch": 5.57, "learning_rate": 1.7046714033889845e-05, "loss": 0.2323, "step": 9288500 }, { "epoch": 5.57, "learning_rate": 1.704461406832928e-05, "loss": 0.2282, "step": 9289000 }, { "epoch": 5.57, "learning_rate": 1.7042514102768715e-05, "loss": 0.2243, "step": 9289500 }, { "epoch": 5.57, "learning_rate": 1.704041413720815e-05, "loss": 0.2303, "step": 9290000 }, { "epoch": 5.57, "learning_rate": 1.7038314171647585e-05, "loss": 0.2275, "step": 9290500 }, { "epoch": 5.57, "learning_rate": 1.703621420608702e-05, "loss": 0.2265, "step": 9291000 }, { "epoch": 5.57, "learning_rate": 1.7034114240526456e-05, "loss": 0.2269, "step": 9291500 }, { "epoch": 5.57, "learning_rate": 1.7032018474897013e-05, "loss": 0.2332, "step": 9292000 }, { "epoch": 5.57, "learning_rate": 1.7029918509336446e-05, "loss": 0.2273, "step": 9292500 }, { "epoch": 5.57, "learning_rate": 1.7027818543775883e-05, "loss": 0.2303, "step": 9293000 }, { "epoch": 5.57, "learning_rate": 1.7025718578215316e-05, "loss": 0.2266, "step": 9293500 }, { "epoch": 5.57, "learning_rate": 1.7023622812585873e-05, "loss": 0.227, "step": 9294000 }, { "epoch": 5.57, "learning_rate": 1.7021522847025307e-05, "loss": 0.2271, "step": 9294500 }, { "epoch": 5.57, "learning_rate": 1.7019422881464744e-05, "loss": 0.2311, "step": 9295000 }, { "epoch": 5.57, "learning_rate": 1.7017322915904177e-05, "loss": 0.2268, "step": 9295500 }, { "epoch": 5.57, "learning_rate": 1.7015227150274737e-05, "loss": 0.2266, "step": 9296000 }, { "epoch": 5.57, "learning_rate": 1.701312718471417e-05, "loss": 0.2315, "step": 9296500 }, { "epoch": 5.57, "learning_rate": 1.7011027219153604e-05, "loss": 0.2248, "step": 9297000 }, { "epoch": 5.57, "learning_rate": 1.700892725359304e-05, "loss": 0.2251, "step": 9297500 }, { "epoch": 5.57, "learning_rate": 1.7006831487963598e-05, "loss": 0.2285, "step": 9298000 }, { "epoch": 5.57, "learning_rate": 1.700473152240303e-05, "loss": 0.2282, "step": 9298500 }, { "epoch": 5.58, "learning_rate": 1.700263575677359e-05, "loss": 0.2371, "step": 9299000 }, { "epoch": 5.58, "learning_rate": 1.7000535791213022e-05, "loss": 0.2315, "step": 9299500 }, { "epoch": 5.58, "learning_rate": 1.699843582565246e-05, "loss": 0.2268, "step": 9300000 }, { "epoch": 5.58, "eval_loss": 0.21397361159324646, "eval_runtime": 1463.8676, "eval_samples_per_second": 359.814, "eval_steps_per_second": 59.969, "step": 9300000 }, { "epoch": 5.58, "learning_rate": 1.6996335860091896e-05, "loss": 0.2277, "step": 9300500 }, { "epoch": 5.58, "learning_rate": 1.699423589453133e-05, "loss": 0.2316, "step": 9301000 }, { "epoch": 5.58, "learning_rate": 1.6992135928970766e-05, "loss": 0.2257, "step": 9301500 }, { "epoch": 5.58, "learning_rate": 1.699004016334132e-05, "loss": 0.2301, "step": 9302000 }, { "epoch": 5.58, "learning_rate": 1.6987940197780756e-05, "loss": 0.2301, "step": 9302500 }, { "epoch": 5.58, "learning_rate": 1.6985840232220193e-05, "loss": 0.2264, "step": 9303000 }, { "epoch": 5.58, "learning_rate": 1.6983740266659627e-05, "loss": 0.2289, "step": 9303500 }, { "epoch": 5.58, "learning_rate": 1.6981640301099064e-05, "loss": 0.2294, "step": 9304000 }, { "epoch": 5.58, "learning_rate": 1.6979540335538497e-05, "loss": 0.2301, "step": 9304500 }, { "epoch": 5.58, "learning_rate": 1.6977444569909054e-05, "loss": 0.2264, "step": 9305000 }, { "epoch": 5.58, "learning_rate": 1.6975344604348487e-05, "loss": 0.2277, "step": 9305500 }, { "epoch": 5.58, "learning_rate": 1.6973244638787924e-05, "loss": 0.2299, "step": 9306000 }, { "epoch": 5.58, "learning_rate": 1.6971144673227358e-05, "loss": 0.2277, "step": 9306500 }, { "epoch": 5.58, "learning_rate": 1.6969044707666795e-05, "loss": 0.2262, "step": 9307000 }, { "epoch": 5.58, "learning_rate": 1.6966944742106228e-05, "loss": 0.2275, "step": 9307500 }, { "epoch": 5.58, "learning_rate": 1.6964844776545665e-05, "loss": 0.2283, "step": 9308000 }, { "epoch": 5.58, "learning_rate": 1.6962744810985102e-05, "loss": 0.2245, "step": 9308500 }, { "epoch": 5.58, "learning_rate": 1.6960649045355655e-05, "loss": 0.2256, "step": 9309000 }, { "epoch": 5.58, "learning_rate": 1.695854907979509e-05, "loss": 0.2297, "step": 9309500 }, { "epoch": 5.58, "learning_rate": 1.695645331416565e-05, "loss": 0.2267, "step": 9310000 }, { "epoch": 5.58, "learning_rate": 1.6954353348605083e-05, "loss": 0.2251, "step": 9310500 }, { "epoch": 5.58, "learning_rate": 1.695225338304452e-05, "loss": 0.2179, "step": 9311000 }, { "epoch": 5.58, "learning_rate": 1.6950153417483953e-05, "loss": 0.2273, "step": 9311500 }, { "epoch": 5.58, "learning_rate": 1.6948053451923386e-05, "loss": 0.2232, "step": 9312000 }, { "epoch": 5.58, "learning_rate": 1.6945957686293943e-05, "loss": 0.2303, "step": 9312500 }, { "epoch": 5.58, "learning_rate": 1.694385772073338e-05, "loss": 0.2254, "step": 9313000 }, { "epoch": 5.58, "learning_rate": 1.6941757755172817e-05, "loss": 0.2279, "step": 9313500 }, { "epoch": 5.58, "learning_rate": 1.693965778961225e-05, "loss": 0.2216, "step": 9314000 }, { "epoch": 5.58, "learning_rate": 1.6937557824051684e-05, "loss": 0.2261, "step": 9314500 }, { "epoch": 5.58, "learning_rate": 1.693545785849112e-05, "loss": 0.2301, "step": 9315000 }, { "epoch": 5.59, "learning_rate": 1.6933362092861678e-05, "loss": 0.2249, "step": 9315500 }, { "epoch": 5.59, "learning_rate": 1.693126212730111e-05, "loss": 0.2291, "step": 9316000 }, { "epoch": 5.59, "learning_rate": 1.6929162161740545e-05, "loss": 0.2221, "step": 9316500 }, { "epoch": 5.59, "learning_rate": 1.692706219617998e-05, "loss": 0.2276, "step": 9317000 }, { "epoch": 5.59, "learning_rate": 1.692496223061942e-05, "loss": 0.227, "step": 9317500 }, { "epoch": 5.59, "learning_rate": 1.6922862265058855e-05, "loss": 0.2276, "step": 9318000 }, { "epoch": 5.59, "learning_rate": 1.692076229949829e-05, "loss": 0.2229, "step": 9318500 }, { "epoch": 5.59, "learning_rate": 1.6918662333937722e-05, "loss": 0.2286, "step": 9319000 }, { "epoch": 5.59, "learning_rate": 1.691656656830828e-05, "loss": 0.2277, "step": 9319500 }, { "epoch": 5.59, "learning_rate": 1.6914466602747716e-05, "loss": 0.2224, "step": 9320000 }, { "epoch": 5.59, "learning_rate": 1.691236663718715e-05, "loss": 0.2222, "step": 9320500 }, { "epoch": 5.59, "learning_rate": 1.6910266671626586e-05, "loss": 0.2216, "step": 9321000 }, { "epoch": 5.59, "learning_rate": 1.690817090599714e-05, "loss": 0.227, "step": 9321500 }, { "epoch": 5.59, "learning_rate": 1.6906070940436577e-05, "loss": 0.2273, "step": 9322000 }, { "epoch": 5.59, "learning_rate": 1.6903970974876014e-05, "loss": 0.2281, "step": 9322500 }, { "epoch": 5.59, "learning_rate": 1.6901871009315447e-05, "loss": 0.2221, "step": 9323000 }, { "epoch": 5.59, "learning_rate": 1.6899775243686e-05, "loss": 0.2308, "step": 9323500 }, { "epoch": 5.59, "learning_rate": 1.6897675278125437e-05, "loss": 0.227, "step": 9324000 }, { "epoch": 5.59, "learning_rate": 1.6895575312564874e-05, "loss": 0.2283, "step": 9324500 }, { "epoch": 5.59, "learning_rate": 1.689347534700431e-05, "loss": 0.22, "step": 9325000 }, { "epoch": 5.59, "learning_rate": 1.6891379581374865e-05, "loss": 0.228, "step": 9325500 }, { "epoch": 5.59, "learning_rate": 1.6889279615814298e-05, "loss": 0.2256, "step": 9326000 }, { "epoch": 5.59, "learning_rate": 1.6887179650253735e-05, "loss": 0.2243, "step": 9326500 }, { "epoch": 5.59, "learning_rate": 1.6885079684693172e-05, "loss": 0.228, "step": 9327000 }, { "epoch": 5.59, "learning_rate": 1.6882988118994846e-05, "loss": 0.2327, "step": 9327500 }, { "epoch": 5.59, "learning_rate": 1.6880888153434282e-05, "loss": 0.2231, "step": 9328000 }, { "epoch": 5.59, "learning_rate": 1.687878818787372e-05, "loss": 0.2281, "step": 9328500 }, { "epoch": 5.59, "learning_rate": 1.6876688222313153e-05, "loss": 0.2248, "step": 9329000 }, { "epoch": 5.59, "learning_rate": 1.687458825675259e-05, "loss": 0.2271, "step": 9329500 }, { "epoch": 5.59, "learning_rate": 1.6872488291192026e-05, "loss": 0.2234, "step": 9330000 }, { "epoch": 5.59, "learning_rate": 1.687039252556258e-05, "loss": 0.2271, "step": 9330500 }, { "epoch": 5.59, "learning_rate": 1.6868292560002017e-05, "loss": 0.2292, "step": 9331000 }, { "epoch": 5.59, "learning_rate": 1.6866196794372574e-05, "loss": 0.2305, "step": 9331500 }, { "epoch": 5.59, "learning_rate": 1.6864096828812007e-05, "loss": 0.2272, "step": 9332000 }, { "epoch": 5.6, "learning_rate": 1.686199686325144e-05, "loss": 0.2278, "step": 9332500 }, { "epoch": 5.6, "learning_rate": 1.6859896897690878e-05, "loss": 0.2293, "step": 9333000 }, { "epoch": 5.6, "learning_rate": 1.685779693213031e-05, "loss": 0.2314, "step": 9333500 }, { "epoch": 5.6, "learning_rate": 1.6855696966569748e-05, "loss": 0.2259, "step": 9334000 }, { "epoch": 5.6, "learning_rate": 1.6853597001009185e-05, "loss": 0.2251, "step": 9334500 }, { "epoch": 5.6, "learning_rate": 1.6851497035448618e-05, "loss": 0.231, "step": 9335000 }, { "epoch": 5.6, "learning_rate": 1.684939706988805e-05, "loss": 0.227, "step": 9335500 }, { "epoch": 5.6, "learning_rate": 1.684730130425861e-05, "loss": 0.2283, "step": 9336000 }, { "epoch": 5.6, "learning_rate": 1.6845201338698045e-05, "loss": 0.226, "step": 9336500 }, { "epoch": 5.6, "learning_rate": 1.6843101373137482e-05, "loss": 0.2222, "step": 9337000 }, { "epoch": 5.6, "learning_rate": 1.6841001407576916e-05, "loss": 0.2202, "step": 9337500 }, { "epoch": 5.6, "learning_rate": 1.683890144201635e-05, "loss": 0.2313, "step": 9338000 }, { "epoch": 5.6, "learning_rate": 1.6836801476455786e-05, "loss": 0.2276, "step": 9338500 }, { "epoch": 5.6, "learning_rate": 1.6834701510895223e-05, "loss": 0.2263, "step": 9339000 }, { "epoch": 5.6, "learning_rate": 1.6832601545334656e-05, "loss": 0.2244, "step": 9339500 }, { "epoch": 5.6, "learning_rate": 1.683050577970521e-05, "loss": 0.225, "step": 9340000 }, { "epoch": 5.6, "learning_rate": 1.6828405814144647e-05, "loss": 0.2259, "step": 9340500 }, { "epoch": 5.6, "learning_rate": 1.6826305848584084e-05, "loss": 0.2274, "step": 9341000 }, { "epoch": 5.6, "learning_rate": 1.6824205883023517e-05, "loss": 0.2281, "step": 9341500 }, { "epoch": 5.6, "learning_rate": 1.6822114317325194e-05, "loss": 0.2297, "step": 9342000 }, { "epoch": 5.6, "learning_rate": 1.682001435176463e-05, "loss": 0.2283, "step": 9342500 }, { "epoch": 5.6, "learning_rate": 1.6817914386204064e-05, "loss": 0.2267, "step": 9343000 }, { "epoch": 5.6, "learning_rate": 1.68158144206435e-05, "loss": 0.2227, "step": 9343500 }, { "epoch": 5.6, "learning_rate": 1.6813718655014058e-05, "loss": 0.2334, "step": 9344000 }, { "epoch": 5.6, "learning_rate": 1.6811618689453492e-05, "loss": 0.2269, "step": 9344500 }, { "epoch": 5.6, "learning_rate": 1.680951872389293e-05, "loss": 0.2257, "step": 9345000 }, { "epoch": 5.6, "learning_rate": 1.6807418758332362e-05, "loss": 0.2328, "step": 9345500 }, { "epoch": 5.6, "learning_rate": 1.68053187927718e-05, "loss": 0.2256, "step": 9346000 }, { "epoch": 5.6, "learning_rate": 1.6803218827211236e-05, "loss": 0.2258, "step": 9346500 }, { "epoch": 5.6, "learning_rate": 1.6801118861650666e-05, "loss": 0.2201, "step": 9347000 }, { "epoch": 5.6, "learning_rate": 1.6799023096021226e-05, "loss": 0.2311, "step": 9347500 }, { "epoch": 5.6, "learning_rate": 1.679692313046066e-05, "loss": 0.227, "step": 9348000 }, { "epoch": 5.6, "learning_rate": 1.6794823164900096e-05, "loss": 0.2214, "step": 9348500 }, { "epoch": 5.61, "learning_rate": 1.6792723199339533e-05, "loss": 0.2295, "step": 9349000 }, { "epoch": 5.61, "learning_rate": 1.6790623233778963e-05, "loss": 0.227, "step": 9349500 }, { "epoch": 5.61, "learning_rate": 1.678852746814952e-05, "loss": 0.222, "step": 9350000 }, { "epoch": 5.61, "learning_rate": 1.6786427502588957e-05, "loss": 0.2305, "step": 9350500 }, { "epoch": 5.61, "learning_rate": 1.6784327537028394e-05, "loss": 0.2269, "step": 9351000 }, { "epoch": 5.61, "learning_rate": 1.6782227571467827e-05, "loss": 0.2247, "step": 9351500 }, { "epoch": 5.61, "learning_rate": 1.678012760590726e-05, "loss": 0.2283, "step": 9352000 }, { "epoch": 5.61, "learning_rate": 1.6778027640346698e-05, "loss": 0.225, "step": 9352500 }, { "epoch": 5.61, "learning_rate": 1.6775927674786135e-05, "loss": 0.2223, "step": 9353000 }, { "epoch": 5.61, "learning_rate": 1.6773827709225568e-05, "loss": 0.2279, "step": 9353500 }, { "epoch": 5.61, "learning_rate": 1.6771731943596125e-05, "loss": 0.2273, "step": 9354000 }, { "epoch": 5.61, "learning_rate": 1.676963197803556e-05, "loss": 0.2238, "step": 9354500 }, { "epoch": 5.61, "learning_rate": 1.6767536212406115e-05, "loss": 0.2271, "step": 9355000 }, { "epoch": 5.61, "learning_rate": 1.6765436246845552e-05, "loss": 0.2227, "step": 9355500 }, { "epoch": 5.61, "learning_rate": 1.676333628128499e-05, "loss": 0.2229, "step": 9356000 }, { "epoch": 5.61, "learning_rate": 1.676123631572442e-05, "loss": 0.2251, "step": 9356500 }, { "epoch": 5.61, "learning_rate": 1.6759136350163856e-05, "loss": 0.2309, "step": 9357000 }, { "epoch": 5.61, "learning_rate": 1.6757036384603293e-05, "loss": 0.229, "step": 9357500 }, { "epoch": 5.61, "learning_rate": 1.6754936419042726e-05, "loss": 0.2294, "step": 9358000 }, { "epoch": 5.61, "learning_rate": 1.6752836453482163e-05, "loss": 0.2271, "step": 9358500 }, { "epoch": 5.61, "learning_rate": 1.6750740687852717e-05, "loss": 0.229, "step": 9359000 }, { "epoch": 5.61, "learning_rate": 1.6748640722292154e-05, "loss": 0.2264, "step": 9359500 }, { "epoch": 5.61, "learning_rate": 1.674654075673159e-05, "loss": 0.2293, "step": 9360000 }, { "epoch": 5.61, "learning_rate": 1.6744440791171024e-05, "loss": 0.2229, "step": 9360500 }, { "epoch": 5.61, "learning_rate": 1.674234502554158e-05, "loss": 0.2362, "step": 9361000 }, { "epoch": 5.61, "learning_rate": 1.6740245059981014e-05, "loss": 0.2219, "step": 9361500 }, { "epoch": 5.61, "learning_rate": 1.673814509442045e-05, "loss": 0.2277, "step": 9362000 }, { "epoch": 5.61, "learning_rate": 1.6736045128859885e-05, "loss": 0.2246, "step": 9362500 }, { "epoch": 5.61, "learning_rate": 1.6733949363230445e-05, "loss": 0.2237, "step": 9363000 }, { "epoch": 5.61, "learning_rate": 1.673184939766988e-05, "loss": 0.2239, "step": 9363500 }, { "epoch": 5.61, "learning_rate": 1.6729749432109312e-05, "loss": 0.2221, "step": 9364000 }, { "epoch": 5.61, "learning_rate": 1.672764946654875e-05, "loss": 0.225, "step": 9364500 }, { "epoch": 5.61, "learning_rate": 1.6725549500988182e-05, "loss": 0.2305, "step": 9365000 }, { "epoch": 5.61, "learning_rate": 1.672344953542762e-05, "loss": 0.2258, "step": 9365500 }, { "epoch": 5.62, "learning_rate": 1.6721349569867056e-05, "loss": 0.2254, "step": 9366000 }, { "epoch": 5.62, "learning_rate": 1.6719249604306486e-05, "loss": 0.2246, "step": 9366500 }, { "epoch": 5.62, "learning_rate": 1.6717153838677046e-05, "loss": 0.2254, "step": 9367000 }, { "epoch": 5.62, "learning_rate": 1.671505387311648e-05, "loss": 0.2231, "step": 9367500 }, { "epoch": 5.62, "learning_rate": 1.6712962307418157e-05, "loss": 0.2329, "step": 9368000 }, { "epoch": 5.62, "learning_rate": 1.6710862341857594e-05, "loss": 0.2274, "step": 9368500 }, { "epoch": 5.62, "learning_rate": 1.6708762376297027e-05, "loss": 0.2258, "step": 9369000 }, { "epoch": 5.62, "learning_rate": 1.6706662410736464e-05, "loss": 0.2303, "step": 9369500 }, { "epoch": 5.62, "learning_rate": 1.67045624451759e-05, "loss": 0.2271, "step": 9370000 }, { "epoch": 5.62, "learning_rate": 1.6702462479615334e-05, "loss": 0.2278, "step": 9370500 }, { "epoch": 5.62, "learning_rate": 1.6700362514054768e-05, "loss": 0.2283, "step": 9371000 }, { "epoch": 5.62, "learning_rate": 1.6698262548494205e-05, "loss": 0.224, "step": 9371500 }, { "epoch": 5.62, "learning_rate": 1.6696162582933638e-05, "loss": 0.2226, "step": 9372000 }, { "epoch": 5.62, "learning_rate": 1.6694062617373075e-05, "loss": 0.2284, "step": 9372500 }, { "epoch": 5.62, "learning_rate": 1.6691962651812512e-05, "loss": 0.2275, "step": 9373000 }, { "epoch": 5.62, "learning_rate": 1.6689862686251945e-05, "loss": 0.2285, "step": 9373500 }, { "epoch": 5.62, "learning_rate": 1.6687766920622502e-05, "loss": 0.2326, "step": 9374000 }, { "epoch": 5.62, "learning_rate": 1.6685666955061936e-05, "loss": 0.2252, "step": 9374500 }, { "epoch": 5.62, "learning_rate": 1.6683566989501373e-05, "loss": 0.2244, "step": 9375000 }, { "epoch": 5.62, "learning_rate": 1.668146702394081e-05, "loss": 0.2306, "step": 9375500 }, { "epoch": 5.62, "learning_rate": 1.667936705838024e-05, "loss": 0.2272, "step": 9376000 }, { "epoch": 5.62, "learning_rate": 1.6677267092819676e-05, "loss": 0.2226, "step": 9376500 }, { "epoch": 5.62, "learning_rate": 1.6675167127259113e-05, "loss": 0.2261, "step": 9377000 }, { "epoch": 5.62, "learning_rate": 1.667307136162967e-05, "loss": 0.226, "step": 9377500 }, { "epoch": 5.62, "learning_rate": 1.6670971396069107e-05, "loss": 0.2281, "step": 9378000 }, { "epoch": 5.62, "learning_rate": 1.6668871430508537e-05, "loss": 0.2286, "step": 9378500 }, { "epoch": 5.62, "learning_rate": 1.6666771464947974e-05, "loss": 0.2213, "step": 9379000 }, { "epoch": 5.62, "learning_rate": 1.666467569931853e-05, "loss": 0.2277, "step": 9379500 }, { "epoch": 5.62, "learning_rate": 1.6662575733757968e-05, "loss": 0.2236, "step": 9380000 }, { "epoch": 5.62, "learning_rate": 1.66604757681974e-05, "loss": 0.2252, "step": 9380500 }, { "epoch": 5.62, "learning_rate": 1.6658375802636835e-05, "loss": 0.2304, "step": 9381000 }, { "epoch": 5.62, "learning_rate": 1.665627583707627e-05, "loss": 0.2288, "step": 9381500 }, { "epoch": 5.62, "learning_rate": 1.665417587151571e-05, "loss": 0.2247, "step": 9382000 }, { "epoch": 5.63, "learning_rate": 1.6652080105886265e-05, "loss": 0.2232, "step": 9382500 }, { "epoch": 5.63, "learning_rate": 1.66499801403257e-05, "loss": 0.2272, "step": 9383000 }, { "epoch": 5.63, "learning_rate": 1.6647880174765132e-05, "loss": 0.2293, "step": 9383500 }, { "epoch": 5.63, "learning_rate": 1.664578020920457e-05, "loss": 0.2243, "step": 9384000 }, { "epoch": 5.63, "learning_rate": 1.6643680243644003e-05, "loss": 0.2242, "step": 9384500 }, { "epoch": 5.63, "learning_rate": 1.664158027808344e-05, "loss": 0.2291, "step": 9385000 }, { "epoch": 5.63, "learning_rate": 1.6639480312522876e-05, "loss": 0.227, "step": 9385500 }, { "epoch": 5.63, "learning_rate": 1.663738034696231e-05, "loss": 0.2248, "step": 9386000 }, { "epoch": 5.63, "learning_rate": 1.6635284581332867e-05, "loss": 0.2241, "step": 9386500 }, { "epoch": 5.63, "learning_rate": 1.6633188815703424e-05, "loss": 0.2224, "step": 9387000 }, { "epoch": 5.63, "learning_rate": 1.6631088850142857e-05, "loss": 0.22, "step": 9387500 }, { "epoch": 5.63, "learning_rate": 1.662898888458229e-05, "loss": 0.2299, "step": 9388000 }, { "epoch": 5.63, "learning_rate": 1.6626888919021727e-05, "loss": 0.2242, "step": 9388500 }, { "epoch": 5.63, "learning_rate": 1.6624788953461164e-05, "loss": 0.2279, "step": 9389000 }, { "epoch": 5.63, "learning_rate": 1.6622688987900598e-05, "loss": 0.2284, "step": 9389500 }, { "epoch": 5.63, "learning_rate": 1.6620589022340035e-05, "loss": 0.2279, "step": 9390000 }, { "epoch": 5.63, "learning_rate": 1.661848905677947e-05, "loss": 0.2298, "step": 9390500 }, { "epoch": 5.63, "learning_rate": 1.6616397491081145e-05, "loss": 0.2266, "step": 9391000 }, { "epoch": 5.63, "learning_rate": 1.6614297525520582e-05, "loss": 0.2244, "step": 9391500 }, { "epoch": 5.63, "learning_rate": 1.661219755996002e-05, "loss": 0.2238, "step": 9392000 }, { "epoch": 5.63, "learning_rate": 1.6610097594399452e-05, "loss": 0.2281, "step": 9392500 }, { "epoch": 5.63, "learning_rate": 1.6607997628838886e-05, "loss": 0.2251, "step": 9393000 }, { "epoch": 5.63, "learning_rate": 1.6605897663278323e-05, "loss": 0.2228, "step": 9393500 }, { "epoch": 5.63, "learning_rate": 1.6603797697717756e-05, "loss": 0.2216, "step": 9394000 }, { "epoch": 5.63, "learning_rate": 1.6601697732157193e-05, "loss": 0.2254, "step": 9394500 }, { "epoch": 5.63, "learning_rate": 1.6599601966527746e-05, "loss": 0.2222, "step": 9395000 }, { "epoch": 5.63, "learning_rate": 1.6597506200898303e-05, "loss": 0.2324, "step": 9395500 }, { "epoch": 5.63, "learning_rate": 1.659540623533774e-05, "loss": 0.2261, "step": 9396000 }, { "epoch": 5.63, "learning_rate": 1.6593306269777177e-05, "loss": 0.224, "step": 9396500 }, { "epoch": 5.63, "learning_rate": 1.659120630421661e-05, "loss": 0.2241, "step": 9397000 }, { "epoch": 5.63, "learning_rate": 1.6589110538587167e-05, "loss": 0.223, "step": 9397500 }, { "epoch": 5.63, "learning_rate": 1.65870105730266e-05, "loss": 0.2265, "step": 9398000 }, { "epoch": 5.63, "learning_rate": 1.6584910607466038e-05, "loss": 0.2265, "step": 9398500 }, { "epoch": 5.64, "learning_rate": 1.6582810641905475e-05, "loss": 0.2273, "step": 9399000 }, { "epoch": 5.64, "learning_rate": 1.6580710676344908e-05, "loss": 0.2221, "step": 9399500 }, { "epoch": 5.64, "learning_rate": 1.657861491071546e-05, "loss": 0.226, "step": 9400000 }, { "epoch": 5.64, "eval_loss": 0.2130396068096161, "eval_runtime": 1466.0636, "eval_samples_per_second": 359.275, "eval_steps_per_second": 59.879, "step": 9400000 }, { "epoch": 5.64, "learning_rate": 1.65765149451549e-05, "loss": 0.2246, "step": 9400500 }, { "epoch": 5.64, "learning_rate": 1.6574414979594335e-05, "loss": 0.2263, "step": 9401000 }, { "epoch": 5.64, "learning_rate": 1.657231501403377e-05, "loss": 0.2252, "step": 9401500 }, { "epoch": 5.64, "learning_rate": 1.6570219248404326e-05, "loss": 0.226, "step": 9402000 }, { "epoch": 5.64, "learning_rate": 1.656811928284376e-05, "loss": 0.2262, "step": 9402500 }, { "epoch": 5.64, "learning_rate": 1.6566019317283196e-05, "loss": 0.225, "step": 9403000 }, { "epoch": 5.64, "learning_rate": 1.6563919351722633e-05, "loss": 0.2285, "step": 9403500 }, { "epoch": 5.64, "learning_rate": 1.6561819386162066e-05, "loss": 0.2317, "step": 9404000 }, { "epoch": 5.64, "learning_rate": 1.6559723620532623e-05, "loss": 0.2282, "step": 9404500 }, { "epoch": 5.64, "learning_rate": 1.6557623654972057e-05, "loss": 0.229, "step": 9405000 }, { "epoch": 5.64, "learning_rate": 1.6555523689411494e-05, "loss": 0.2275, "step": 9405500 }, { "epoch": 5.64, "learning_rate": 1.655342372385093e-05, "loss": 0.225, "step": 9406000 }, { "epoch": 5.64, "learning_rate": 1.6551323758290364e-05, "loss": 0.2189, "step": 9406500 }, { "epoch": 5.64, "learning_rate": 1.6549223792729797e-05, "loss": 0.2306, "step": 9407000 }, { "epoch": 5.64, "learning_rate": 1.6547123827169234e-05, "loss": 0.2212, "step": 9407500 }, { "epoch": 5.64, "learning_rate": 1.6545023861608668e-05, "loss": 0.2293, "step": 9408000 }, { "epoch": 5.64, "learning_rate": 1.6542932295910348e-05, "loss": 0.2233, "step": 9408500 }, { "epoch": 5.64, "learning_rate": 1.654083233034978e-05, "loss": 0.2226, "step": 9409000 }, { "epoch": 5.64, "learning_rate": 1.6538732364789215e-05, "loss": 0.2338, "step": 9409500 }, { "epoch": 5.64, "learning_rate": 1.6536632399228652e-05, "loss": 0.225, "step": 9410000 }, { "epoch": 5.64, "learning_rate": 1.653453243366809e-05, "loss": 0.2287, "step": 9410500 }, { "epoch": 5.64, "learning_rate": 1.6532432468107522e-05, "loss": 0.229, "step": 9411000 }, { "epoch": 5.64, "learning_rate": 1.653033250254696e-05, "loss": 0.2263, "step": 9411500 }, { "epoch": 5.64, "learning_rate": 1.6528232536986393e-05, "loss": 0.2292, "step": 9412000 }, { "epoch": 5.64, "learning_rate": 1.652614097128807e-05, "loss": 0.2227, "step": 9412500 }, { "epoch": 5.64, "learning_rate": 1.6524041005727507e-05, "loss": 0.2295, "step": 9413000 }, { "epoch": 5.64, "learning_rate": 1.6521941040166943e-05, "loss": 0.2304, "step": 9413500 }, { "epoch": 5.64, "learning_rate": 1.6519841074606373e-05, "loss": 0.2283, "step": 9414000 }, { "epoch": 5.64, "learning_rate": 1.651774110904581e-05, "loss": 0.2274, "step": 9414500 }, { "epoch": 5.64, "learning_rate": 1.6515641143485247e-05, "loss": 0.2298, "step": 9415000 }, { "epoch": 5.64, "learning_rate": 1.6513541177924684e-05, "loss": 0.2289, "step": 9415500 }, { "epoch": 5.65, "learning_rate": 1.6511441212364117e-05, "loss": 0.2303, "step": 9416000 }, { "epoch": 5.65, "learning_rate": 1.650934544673467e-05, "loss": 0.2241, "step": 9416500 }, { "epoch": 5.65, "learning_rate": 1.6507245481174108e-05, "loss": 0.2297, "step": 9417000 }, { "epoch": 5.65, "learning_rate": 1.6505145515613545e-05, "loss": 0.2241, "step": 9417500 }, { "epoch": 5.65, "learning_rate": 1.6503045550052978e-05, "loss": 0.2248, "step": 9418000 }, { "epoch": 5.65, "learning_rate": 1.6500949784423535e-05, "loss": 0.223, "step": 9418500 }, { "epoch": 5.65, "learning_rate": 1.649884981886297e-05, "loss": 0.2274, "step": 9419000 }, { "epoch": 5.65, "learning_rate": 1.6496754053233526e-05, "loss": 0.2269, "step": 9419500 }, { "epoch": 5.65, "learning_rate": 1.6494654087672962e-05, "loss": 0.2287, "step": 9420000 }, { "epoch": 5.65, "learning_rate": 1.64925541221124e-05, "loss": 0.2228, "step": 9420500 }, { "epoch": 5.65, "learning_rate": 1.649045415655183e-05, "loss": 0.227, "step": 9421000 }, { "epoch": 5.65, "learning_rate": 1.6488354190991266e-05, "loss": 0.2283, "step": 9421500 }, { "epoch": 5.65, "learning_rate": 1.6486254225430703e-05, "loss": 0.2241, "step": 9422000 }, { "epoch": 5.65, "learning_rate": 1.648415425987014e-05, "loss": 0.2221, "step": 9422500 }, { "epoch": 5.65, "learning_rate": 1.6482054294309573e-05, "loss": 0.2284, "step": 9423000 }, { "epoch": 5.65, "learning_rate": 1.647995432874901e-05, "loss": 0.2262, "step": 9423500 }, { "epoch": 5.65, "learning_rate": 1.6477854363188444e-05, "loss": 0.2266, "step": 9424000 }, { "epoch": 5.65, "learning_rate": 1.6475754397627877e-05, "loss": 0.225, "step": 9424500 }, { "epoch": 5.65, "learning_rate": 1.6473654432067314e-05, "loss": 0.2215, "step": 9425000 }, { "epoch": 5.65, "learning_rate": 1.647155866643787e-05, "loss": 0.2255, "step": 9425500 }, { "epoch": 5.65, "learning_rate": 1.6469458700877304e-05, "loss": 0.2312, "step": 9426000 }, { "epoch": 5.65, "learning_rate": 1.646736293524786e-05, "loss": 0.2305, "step": 9426500 }, { "epoch": 5.65, "learning_rate": 1.6465262969687298e-05, "loss": 0.2274, "step": 9427000 }, { "epoch": 5.65, "learning_rate": 1.646316300412673e-05, "loss": 0.2279, "step": 9427500 }, { "epoch": 5.65, "learning_rate": 1.646106303856617e-05, "loss": 0.2273, "step": 9428000 }, { "epoch": 5.65, "learning_rate": 1.6458963073005602e-05, "loss": 0.2274, "step": 9428500 }, { "epoch": 5.65, "learning_rate": 1.6456863107445035e-05, "loss": 0.2253, "step": 9429000 }, { "epoch": 5.65, "learning_rate": 1.6454763141884472e-05, "loss": 0.2257, "step": 9429500 }, { "epoch": 5.65, "learning_rate": 1.645266317632391e-05, "loss": 0.2315, "step": 9430000 }, { "epoch": 5.65, "learning_rate": 1.6450567410694466e-05, "loss": 0.2228, "step": 9430500 }, { "epoch": 5.65, "learning_rate": 1.644847164506502e-05, "loss": 0.2258, "step": 9431000 }, { "epoch": 5.65, "learning_rate": 1.6446371679504456e-05, "loss": 0.2301, "step": 9431500 }, { "epoch": 5.65, "learning_rate": 1.644427171394389e-05, "loss": 0.2266, "step": 9432000 }, { "epoch": 5.66, "learning_rate": 1.6442171748383327e-05, "loss": 0.2263, "step": 9432500 }, { "epoch": 5.66, "learning_rate": 1.6440071782822764e-05, "loss": 0.2255, "step": 9433000 }, { "epoch": 5.66, "learning_rate": 1.6437971817262197e-05, "loss": 0.2216, "step": 9433500 }, { "epoch": 5.66, "learning_rate": 1.643587185170163e-05, "loss": 0.2286, "step": 9434000 }, { "epoch": 5.66, "learning_rate": 1.6433776086072188e-05, "loss": 0.2288, "step": 9434500 }, { "epoch": 5.66, "learning_rate": 1.6431676120511624e-05, "loss": 0.2315, "step": 9435000 }, { "epoch": 5.66, "learning_rate": 1.6429576154951058e-05, "loss": 0.227, "step": 9435500 }, { "epoch": 5.66, "learning_rate": 1.642747618939049e-05, "loss": 0.2273, "step": 9436000 }, { "epoch": 5.66, "learning_rate": 1.6425376223829928e-05, "loss": 0.2252, "step": 9436500 }, { "epoch": 5.66, "learning_rate": 1.6423276258269365e-05, "loss": 0.2257, "step": 9437000 }, { "epoch": 5.66, "learning_rate": 1.64211762927088e-05, "loss": 0.2306, "step": 9437500 }, { "epoch": 5.66, "learning_rate": 1.6419076327148235e-05, "loss": 0.224, "step": 9438000 }, { "epoch": 5.66, "learning_rate": 1.641698056151879e-05, "loss": 0.2211, "step": 9438500 }, { "epoch": 5.66, "learning_rate": 1.6414880595958226e-05, "loss": 0.2251, "step": 9439000 }, { "epoch": 5.66, "learning_rate": 1.6412780630397663e-05, "loss": 0.2279, "step": 9439500 }, { "epoch": 5.66, "learning_rate": 1.6410680664837096e-05, "loss": 0.2228, "step": 9440000 }, { "epoch": 5.66, "learning_rate": 1.6408584899207653e-05, "loss": 0.2226, "step": 9440500 }, { "epoch": 5.66, "learning_rate": 1.6406484933647086e-05, "loss": 0.2316, "step": 9441000 }, { "epoch": 5.66, "learning_rate": 1.6404384968086523e-05, "loss": 0.228, "step": 9441500 }, { "epoch": 5.66, "learning_rate": 1.640228500252596e-05, "loss": 0.2269, "step": 9442000 }, { "epoch": 5.66, "learning_rate": 1.6400189236896517e-05, "loss": 0.225, "step": 9442500 }, { "epoch": 5.66, "learning_rate": 1.639809347126707e-05, "loss": 0.2315, "step": 9443000 }, { "epoch": 5.66, "learning_rate": 1.6395993505706507e-05, "loss": 0.2257, "step": 9443500 }, { "epoch": 5.66, "learning_rate": 1.639389354014594e-05, "loss": 0.2273, "step": 9444000 }, { "epoch": 5.66, "learning_rate": 1.6391793574585378e-05, "loss": 0.2249, "step": 9444500 }, { "epoch": 5.66, "learning_rate": 1.6389693609024815e-05, "loss": 0.223, "step": 9445000 }, { "epoch": 5.66, "learning_rate": 1.6387593643464245e-05, "loss": 0.2256, "step": 9445500 }, { "epoch": 5.66, "learning_rate": 1.638549367790368e-05, "loss": 0.2248, "step": 9446000 }, { "epoch": 5.66, "learning_rate": 1.638339371234312e-05, "loss": 0.2343, "step": 9446500 }, { "epoch": 5.66, "learning_rate": 1.6381297946713675e-05, "loss": 0.2279, "step": 9447000 }, { "epoch": 5.66, "learning_rate": 1.637919798115311e-05, "loss": 0.2235, "step": 9447500 }, { "epoch": 5.66, "learning_rate": 1.6377098015592542e-05, "loss": 0.2347, "step": 9448000 }, { "epoch": 5.66, "learning_rate": 1.637499805003198e-05, "loss": 0.2329, "step": 9448500 }, { "epoch": 5.67, "learning_rate": 1.6372902284402536e-05, "loss": 0.2309, "step": 9449000 }, { "epoch": 5.67, "learning_rate": 1.6370802318841973e-05, "loss": 0.225, "step": 9449500 }, { "epoch": 5.67, "learning_rate": 1.6368702353281403e-05, "loss": 0.2218, "step": 9450000 }, { "epoch": 5.67, "learning_rate": 1.636660238772084e-05, "loss": 0.2216, "step": 9450500 }, { "epoch": 5.67, "learning_rate": 1.6364502422160277e-05, "loss": 0.226, "step": 9451000 }, { "epoch": 5.67, "learning_rate": 1.6362402456599714e-05, "loss": 0.2237, "step": 9451500 }, { "epoch": 5.67, "learning_rate": 1.6360302491039147e-05, "loss": 0.2273, "step": 9452000 }, { "epoch": 5.67, "learning_rate": 1.6358202525478584e-05, "loss": 0.2241, "step": 9452500 }, { "epoch": 5.67, "learning_rate": 1.6356106759849137e-05, "loss": 0.2228, "step": 9453000 }, { "epoch": 5.67, "learning_rate": 1.6354010994219694e-05, "loss": 0.2264, "step": 9453500 }, { "epoch": 5.67, "learning_rate": 1.635191522859025e-05, "loss": 0.2252, "step": 9454000 }, { "epoch": 5.67, "learning_rate": 1.6349815263029685e-05, "loss": 0.2251, "step": 9454500 }, { "epoch": 5.67, "learning_rate": 1.634771529746912e-05, "loss": 0.2268, "step": 9455000 }, { "epoch": 5.67, "learning_rate": 1.6345615331908555e-05, "loss": 0.2242, "step": 9455500 }, { "epoch": 5.67, "learning_rate": 1.6343515366347992e-05, "loss": 0.2274, "step": 9456000 }, { "epoch": 5.67, "learning_rate": 1.634141540078743e-05, "loss": 0.2285, "step": 9456500 }, { "epoch": 5.67, "learning_rate": 1.633931543522686e-05, "loss": 0.2286, "step": 9457000 }, { "epoch": 5.67, "learning_rate": 1.6337215469666296e-05, "loss": 0.2217, "step": 9457500 }, { "epoch": 5.67, "learning_rate": 1.6335115504105733e-05, "loss": 0.2265, "step": 9458000 }, { "epoch": 5.67, "learning_rate": 1.633301553854517e-05, "loss": 0.2218, "step": 9458500 }, { "epoch": 5.67, "learning_rate": 1.6330915572984603e-05, "loss": 0.2179, "step": 9459000 }, { "epoch": 5.67, "learning_rate": 1.632881560742404e-05, "loss": 0.2257, "step": 9459500 }, { "epoch": 5.67, "learning_rate": 1.6326719841794593e-05, "loss": 0.2216, "step": 9460000 }, { "epoch": 5.67, "learning_rate": 1.632462407616515e-05, "loss": 0.2211, "step": 9460500 }, { "epoch": 5.67, "learning_rate": 1.6322524110604587e-05, "loss": 0.2294, "step": 9461000 }, { "epoch": 5.67, "learning_rate": 1.6320424145044024e-05, "loss": 0.2224, "step": 9461500 }, { "epoch": 5.67, "learning_rate": 1.6318324179483454e-05, "loss": 0.2275, "step": 9462000 }, { "epoch": 5.67, "learning_rate": 1.631622421392289e-05, "loss": 0.2206, "step": 9462500 }, { "epoch": 5.67, "learning_rate": 1.6314128448293448e-05, "loss": 0.2289, "step": 9463000 }, { "epoch": 5.67, "learning_rate": 1.6312028482732885e-05, "loss": 0.2226, "step": 9463500 }, { "epoch": 5.67, "learning_rate": 1.6309928517172318e-05, "loss": 0.2219, "step": 9464000 }, { "epoch": 5.67, "learning_rate": 1.630782855161175e-05, "loss": 0.2264, "step": 9464500 }, { "epoch": 5.67, "learning_rate": 1.630572858605119e-05, "loss": 0.2286, "step": 9465000 }, { "epoch": 5.67, "learning_rate": 1.6303632820421745e-05, "loss": 0.2226, "step": 9465500 }, { "epoch": 5.68, "learning_rate": 1.6301532854861182e-05, "loss": 0.2273, "step": 9466000 }, { "epoch": 5.68, "learning_rate": 1.6299432889300612e-05, "loss": 0.2297, "step": 9466500 }, { "epoch": 5.68, "learning_rate": 1.629733292374005e-05, "loss": 0.2268, "step": 9467000 }, { "epoch": 5.68, "learning_rate": 1.6295232958179486e-05, "loss": 0.2278, "step": 9467500 }, { "epoch": 5.68, "learning_rate": 1.629313299261892e-05, "loss": 0.2217, "step": 9468000 }, { "epoch": 5.68, "learning_rate": 1.6291033027058356e-05, "loss": 0.2277, "step": 9468500 }, { "epoch": 5.68, "learning_rate": 1.6288933061497793e-05, "loss": 0.2296, "step": 9469000 }, { "epoch": 5.68, "learning_rate": 1.6286841495799467e-05, "loss": 0.2273, "step": 9469500 }, { "epoch": 5.68, "learning_rate": 1.6284741530238904e-05, "loss": 0.2256, "step": 9470000 }, { "epoch": 5.68, "learning_rate": 1.628264156467834e-05, "loss": 0.2299, "step": 9470500 }, { "epoch": 5.68, "learning_rate": 1.6280541599117774e-05, "loss": 0.2268, "step": 9471000 }, { "epoch": 5.68, "learning_rate": 1.6278441633557208e-05, "loss": 0.2257, "step": 9471500 }, { "epoch": 5.68, "learning_rate": 1.6276341667996644e-05, "loss": 0.2269, "step": 9472000 }, { "epoch": 5.68, "learning_rate": 1.627424170243608e-05, "loss": 0.2248, "step": 9472500 }, { "epoch": 5.68, "learning_rate": 1.6272141736875515e-05, "loss": 0.2232, "step": 9473000 }, { "epoch": 5.68, "learning_rate": 1.627004597124607e-05, "loss": 0.2289, "step": 9473500 }, { "epoch": 5.68, "learning_rate": 1.6267946005685505e-05, "loss": 0.2345, "step": 9474000 }, { "epoch": 5.68, "learning_rate": 1.6265846040124942e-05, "loss": 0.2243, "step": 9474500 }, { "epoch": 5.68, "learning_rate": 1.62637502744955e-05, "loss": 0.2297, "step": 9475000 }, { "epoch": 5.68, "learning_rate": 1.6261650308934936e-05, "loss": 0.2234, "step": 9475500 }, { "epoch": 5.68, "learning_rate": 1.6259550343374366e-05, "loss": 0.2258, "step": 9476000 }, { "epoch": 5.68, "learning_rate": 1.6257450377813803e-05, "loss": 0.2248, "step": 9476500 }, { "epoch": 5.68, "learning_rate": 1.625535041225324e-05, "loss": 0.2266, "step": 9477000 }, { "epoch": 5.68, "learning_rate": 1.6253254646623796e-05, "loss": 0.2272, "step": 9477500 }, { "epoch": 5.68, "learning_rate": 1.625115468106323e-05, "loss": 0.2292, "step": 9478000 }, { "epoch": 5.68, "learning_rate": 1.6249054715502663e-05, "loss": 0.2261, "step": 9478500 }, { "epoch": 5.68, "learning_rate": 1.62469547499421e-05, "loss": 0.2236, "step": 9479000 }, { "epoch": 5.68, "learning_rate": 1.6244854784381537e-05, "loss": 0.2239, "step": 9479500 }, { "epoch": 5.68, "learning_rate": 1.624275481882097e-05, "loss": 0.2261, "step": 9480000 }, { "epoch": 5.68, "learning_rate": 1.6240654853260407e-05, "loss": 0.2262, "step": 9480500 }, { "epoch": 5.68, "learning_rate": 1.6238554887699844e-05, "loss": 0.2252, "step": 9481000 }, { "epoch": 5.68, "learning_rate": 1.6236459122070398e-05, "loss": 0.2255, "step": 9481500 }, { "epoch": 5.68, "learning_rate": 1.6234363356440955e-05, "loss": 0.2232, "step": 9482000 }, { "epoch": 5.69, "learning_rate": 1.623226339088039e-05, "loss": 0.2265, "step": 9482500 }, { "epoch": 5.69, "learning_rate": 1.6230163425319825e-05, "loss": 0.2286, "step": 9483000 }, { "epoch": 5.69, "learning_rate": 1.622806345975926e-05, "loss": 0.227, "step": 9483500 }, { "epoch": 5.69, "learning_rate": 1.6225963494198695e-05, "loss": 0.2226, "step": 9484000 }, { "epoch": 5.69, "learning_rate": 1.622386352863813e-05, "loss": 0.2217, "step": 9484500 }, { "epoch": 5.69, "learning_rate": 1.6221763563077566e-05, "loss": 0.2189, "step": 9485000 }, { "epoch": 5.69, "learning_rate": 1.6219667797448123e-05, "loss": 0.2255, "step": 9485500 }, { "epoch": 5.69, "learning_rate": 1.6217567831887556e-05, "loss": 0.2264, "step": 9486000 }, { "epoch": 5.69, "learning_rate": 1.6215467866326993e-05, "loss": 0.2283, "step": 9486500 }, { "epoch": 5.69, "learning_rate": 1.6213367900766426e-05, "loss": 0.2261, "step": 9487000 }, { "epoch": 5.69, "learning_rate": 1.6211267935205863e-05, "loss": 0.2273, "step": 9487500 }, { "epoch": 5.69, "learning_rate": 1.62091679696453e-05, "loss": 0.2282, "step": 9488000 }, { "epoch": 5.69, "learning_rate": 1.620706800408473e-05, "loss": 0.226, "step": 9488500 }, { "epoch": 5.69, "learning_rate": 1.6204968038524167e-05, "loss": 0.2209, "step": 9489000 }, { "epoch": 5.69, "learning_rate": 1.6202872272894724e-05, "loss": 0.229, "step": 9489500 }, { "epoch": 5.69, "learning_rate": 1.620077650726528e-05, "loss": 0.2253, "step": 9490000 }, { "epoch": 5.69, "learning_rate": 1.6198676541704714e-05, "loss": 0.2251, "step": 9490500 }, { "epoch": 5.69, "learning_rate": 1.619657657614415e-05, "loss": 0.2216, "step": 9491000 }, { "epoch": 5.69, "learning_rate": 1.6194476610583585e-05, "loss": 0.232, "step": 9491500 }, { "epoch": 5.69, "learning_rate": 1.619237664502302e-05, "loss": 0.221, "step": 9492000 }, { "epoch": 5.69, "learning_rate": 1.619027667946246e-05, "loss": 0.2298, "step": 9492500 }, { "epoch": 5.69, "learning_rate": 1.6188176713901892e-05, "loss": 0.2244, "step": 9493000 }, { "epoch": 5.69, "learning_rate": 1.6186076748341325e-05, "loss": 0.2254, "step": 9493500 }, { "epoch": 5.69, "learning_rate": 1.6183980982711882e-05, "loss": 0.2219, "step": 9494000 }, { "epoch": 5.69, "learning_rate": 1.618188101715132e-05, "loss": 0.2233, "step": 9494500 }, { "epoch": 5.69, "learning_rate": 1.6179781051590756e-05, "loss": 0.2305, "step": 9495000 }, { "epoch": 5.69, "learning_rate": 1.6177681086030186e-05, "loss": 0.2237, "step": 9495500 }, { "epoch": 5.69, "learning_rate": 1.6175585320400743e-05, "loss": 0.2251, "step": 9496000 }, { "epoch": 5.69, "learning_rate": 1.6173489554771303e-05, "loss": 0.2232, "step": 9496500 }, { "epoch": 5.69, "learning_rate": 1.6171389589210737e-05, "loss": 0.2276, "step": 9497000 }, { "epoch": 5.69, "learning_rate": 1.616929382358129e-05, "loss": 0.2237, "step": 9497500 }, { "epoch": 5.69, "learning_rate": 1.6167193858020727e-05, "loss": 0.2304, "step": 9498000 }, { "epoch": 5.69, "learning_rate": 1.6165093892460164e-05, "loss": 0.2309, "step": 9498500 }, { "epoch": 5.7, "learning_rate": 1.6162993926899598e-05, "loss": 0.2229, "step": 9499000 }, { "epoch": 5.7, "learning_rate": 1.6160893961339034e-05, "loss": 0.2238, "step": 9499500 }, { "epoch": 5.7, "learning_rate": 1.6158793995778468e-05, "loss": 0.2297, "step": 9500000 }, { "epoch": 5.7, "eval_loss": 0.21234430372714996, "eval_runtime": 1454.9249, "eval_samples_per_second": 362.026, "eval_steps_per_second": 60.338, "step": 9500000 }, { "epoch": 5.7, "learning_rate": 1.6156694030217905e-05, "loss": 0.2254, "step": 9500500 }, { "epoch": 5.7, "learning_rate": 1.6154594064657338e-05, "loss": 0.225, "step": 9501000 }, { "epoch": 5.7, "learning_rate": 1.6152494099096775e-05, "loss": 0.2253, "step": 9501500 }, { "epoch": 5.7, "learning_rate": 1.6150394133536212e-05, "loss": 0.2221, "step": 9502000 }, { "epoch": 5.7, "learning_rate": 1.6148294167975645e-05, "loss": 0.2278, "step": 9502500 }, { "epoch": 5.7, "learning_rate": 1.614619420241508e-05, "loss": 0.2209, "step": 9503000 }, { "epoch": 5.7, "learning_rate": 1.6144098436785636e-05, "loss": 0.2243, "step": 9503500 }, { "epoch": 5.7, "learning_rate": 1.6141998471225073e-05, "loss": 0.2299, "step": 9504000 }, { "epoch": 5.7, "learning_rate": 1.613989850566451e-05, "loss": 0.2229, "step": 9504500 }, { "epoch": 5.7, "learning_rate": 1.613779854010394e-05, "loss": 0.2304, "step": 9505000 }, { "epoch": 5.7, "learning_rate": 1.6135702774474496e-05, "loss": 0.2289, "step": 9505500 }, { "epoch": 5.7, "learning_rate": 1.6133602808913933e-05, "loss": 0.221, "step": 9506000 }, { "epoch": 5.7, "learning_rate": 1.613150704328449e-05, "loss": 0.2254, "step": 9506500 }, { "epoch": 5.7, "learning_rate": 1.6129407077723924e-05, "loss": 0.2248, "step": 9507000 }, { "epoch": 5.7, "learning_rate": 1.612730711216336e-05, "loss": 0.2254, "step": 9507500 }, { "epoch": 5.7, "learning_rate": 1.6125207146602794e-05, "loss": 0.2277, "step": 9508000 }, { "epoch": 5.7, "learning_rate": 1.612310718104223e-05, "loss": 0.2267, "step": 9508500 }, { "epoch": 5.7, "learning_rate": 1.6121007215481668e-05, "loss": 0.2238, "step": 9509000 }, { "epoch": 5.7, "learning_rate": 1.61189072499211e-05, "loss": 0.2275, "step": 9509500 }, { "epoch": 5.7, "learning_rate": 1.6116807284360535e-05, "loss": 0.2253, "step": 9510000 }, { "epoch": 5.7, "learning_rate": 1.6114715718662215e-05, "loss": 0.2307, "step": 9510500 }, { "epoch": 5.7, "learning_rate": 1.611261575310165e-05, "loss": 0.2286, "step": 9511000 }, { "epoch": 5.7, "learning_rate": 1.6110515787541085e-05, "loss": 0.2311, "step": 9511500 }, { "epoch": 5.7, "learning_rate": 1.610841582198052e-05, "loss": 0.2229, "step": 9512000 }, { "epoch": 5.7, "learning_rate": 1.6106315856419952e-05, "loss": 0.2273, "step": 9512500 }, { "epoch": 5.7, "learning_rate": 1.610421589085939e-05, "loss": 0.2255, "step": 9513000 }, { "epoch": 5.7, "learning_rate": 1.6102120125229946e-05, "loss": 0.2232, "step": 9513500 }, { "epoch": 5.7, "learning_rate": 1.6100020159669383e-05, "loss": 0.2208, "step": 9514000 }, { "epoch": 5.7, "learning_rate": 1.6097920194108816e-05, "loss": 0.2231, "step": 9514500 }, { "epoch": 5.7, "learning_rate": 1.609582022854825e-05, "loss": 0.2244, "step": 9515000 }, { "epoch": 5.7, "learning_rate": 1.6093720262987687e-05, "loss": 0.2274, "step": 9515500 }, { "epoch": 5.71, "learning_rate": 1.6091620297427124e-05, "loss": 0.2231, "step": 9516000 }, { "epoch": 5.71, "learning_rate": 1.608952453179768e-05, "loss": 0.2271, "step": 9516500 }, { "epoch": 5.71, "learning_rate": 1.6087424566237114e-05, "loss": 0.2265, "step": 9517000 }, { "epoch": 5.71, "learning_rate": 1.6085324600676548e-05, "loss": 0.2249, "step": 9517500 }, { "epoch": 5.71, "learning_rate": 1.6083224635115984e-05, "loss": 0.2255, "step": 9518000 }, { "epoch": 5.71, "learning_rate": 1.608112466955542e-05, "loss": 0.2248, "step": 9518500 }, { "epoch": 5.71, "learning_rate": 1.6079024703994855e-05, "loss": 0.2203, "step": 9519000 }, { "epoch": 5.71, "learning_rate": 1.6076928938365408e-05, "loss": 0.2306, "step": 9519500 }, { "epoch": 5.71, "learning_rate": 1.6074828972804845e-05, "loss": 0.2294, "step": 9520000 }, { "epoch": 5.71, "learning_rate": 1.6072729007244282e-05, "loss": 0.2262, "step": 9520500 }, { "epoch": 5.71, "learning_rate": 1.6070629041683715e-05, "loss": 0.2223, "step": 9521000 }, { "epoch": 5.71, "learning_rate": 1.6068529076123152e-05, "loss": 0.2222, "step": 9521500 }, { "epoch": 5.71, "learning_rate": 1.6066429110562586e-05, "loss": 0.2295, "step": 9522000 }, { "epoch": 5.71, "learning_rate": 1.6064329145002023e-05, "loss": 0.2205, "step": 9522500 }, { "epoch": 5.71, "learning_rate": 1.6062229179441456e-05, "loss": 0.2203, "step": 9523000 }, { "epoch": 5.71, "learning_rate": 1.6060133413812013e-05, "loss": 0.2239, "step": 9523500 }, { "epoch": 5.71, "learning_rate": 1.605803344825145e-05, "loss": 0.2269, "step": 9524000 }, { "epoch": 5.71, "learning_rate": 1.6055933482690883e-05, "loss": 0.2254, "step": 9524500 }, { "epoch": 5.71, "learning_rate": 1.6053833517130317e-05, "loss": 0.2291, "step": 9525000 }, { "epoch": 5.71, "learning_rate": 1.6051737751500877e-05, "loss": 0.2248, "step": 9525500 }, { "epoch": 5.71, "learning_rate": 1.604963778594031e-05, "loss": 0.2216, "step": 9526000 }, { "epoch": 5.71, "learning_rate": 1.6047537820379744e-05, "loss": 0.227, "step": 9526500 }, { "epoch": 5.71, "learning_rate": 1.604543785481918e-05, "loss": 0.2264, "step": 9527000 }, { "epoch": 5.71, "learning_rate": 1.6043346289120858e-05, "loss": 0.2309, "step": 9527500 }, { "epoch": 5.71, "learning_rate": 1.6041246323560295e-05, "loss": 0.2265, "step": 9528000 }, { "epoch": 5.71, "learning_rate": 1.6039146357999728e-05, "loss": 0.2237, "step": 9528500 }, { "epoch": 5.71, "learning_rate": 1.603704639243916e-05, "loss": 0.2235, "step": 9529000 }, { "epoch": 5.71, "learning_rate": 1.60349464268786e-05, "loss": 0.2269, "step": 9529500 }, { "epoch": 5.71, "learning_rate": 1.6032850661249155e-05, "loss": 0.2221, "step": 9530000 }, { "epoch": 5.71, "learning_rate": 1.6030750695688592e-05, "loss": 0.2249, "step": 9530500 }, { "epoch": 5.71, "learning_rate": 1.6028650730128026e-05, "loss": 0.2199, "step": 9531000 }, { "epoch": 5.71, "learning_rate": 1.602655076456746e-05, "loss": 0.2261, "step": 9531500 }, { "epoch": 5.71, "learning_rate": 1.6024450799006896e-05, "loss": 0.2273, "step": 9532000 }, { "epoch": 5.72, "learning_rate": 1.6022350833446333e-05, "loss": 0.2318, "step": 9532500 }, { "epoch": 5.72, "learning_rate": 1.6020250867885766e-05, "loss": 0.2253, "step": 9533000 }, { "epoch": 5.72, "learning_rate": 1.6018150902325203e-05, "loss": 0.2237, "step": 9533500 }, { "epoch": 5.72, "learning_rate": 1.6016055136695757e-05, "loss": 0.2257, "step": 9534000 }, { "epoch": 5.72, "learning_rate": 1.6013955171135194e-05, "loss": 0.2261, "step": 9534500 }, { "epoch": 5.72, "learning_rate": 1.601185940550575e-05, "loss": 0.2274, "step": 9535000 }, { "epoch": 5.72, "learning_rate": 1.6009759439945187e-05, "loss": 0.2274, "step": 9535500 }, { "epoch": 5.72, "learning_rate": 1.6007659474384618e-05, "loss": 0.2273, "step": 9536000 }, { "epoch": 5.72, "learning_rate": 1.6005563708755175e-05, "loss": 0.2286, "step": 9536500 }, { "epoch": 5.72, "learning_rate": 1.600346374319461e-05, "loss": 0.2208, "step": 9537000 }, { "epoch": 5.72, "learning_rate": 1.6001363777634048e-05, "loss": 0.2266, "step": 9537500 }, { "epoch": 5.72, "learning_rate": 1.599926381207348e-05, "loss": 0.2255, "step": 9538000 }, { "epoch": 5.72, "learning_rate": 1.5997163846512915e-05, "loss": 0.2263, "step": 9538500 }, { "epoch": 5.72, "learning_rate": 1.5995063880952352e-05, "loss": 0.2209, "step": 9539000 }, { "epoch": 5.72, "learning_rate": 1.599296391539179e-05, "loss": 0.2195, "step": 9539500 }, { "epoch": 5.72, "learning_rate": 1.5990863949831222e-05, "loss": 0.2301, "step": 9540000 }, { "epoch": 5.72, "learning_rate": 1.598876398427066e-05, "loss": 0.2206, "step": 9540500 }, { "epoch": 5.72, "learning_rate": 1.5986664018710093e-05, "loss": 0.2228, "step": 9541000 }, { "epoch": 5.72, "learning_rate": 1.598456825308065e-05, "loss": 0.2207, "step": 9541500 }, { "epoch": 5.72, "learning_rate": 1.5982468287520086e-05, "loss": 0.2256, "step": 9542000 }, { "epoch": 5.72, "learning_rate": 1.598036832195952e-05, "loss": 0.2259, "step": 9542500 }, { "epoch": 5.72, "learning_rate": 1.5978268356398957e-05, "loss": 0.23, "step": 9543000 }, { "epoch": 5.72, "learning_rate": 1.597616839083839e-05, "loss": 0.2272, "step": 9543500 }, { "epoch": 5.72, "learning_rate": 1.5974068425277824e-05, "loss": 0.2234, "step": 9544000 }, { "epoch": 5.72, "learning_rate": 1.597196845971726e-05, "loss": 0.2201, "step": 9544500 }, { "epoch": 5.72, "learning_rate": 1.5969868494156697e-05, "loss": 0.2269, "step": 9545000 }, { "epoch": 5.72, "learning_rate": 1.596777272852725e-05, "loss": 0.2258, "step": 9545500 }, { "epoch": 5.72, "learning_rate": 1.5965672762966688e-05, "loss": 0.2248, "step": 9546000 }, { "epoch": 5.72, "learning_rate": 1.596357279740612e-05, "loss": 0.2268, "step": 9546500 }, { "epoch": 5.72, "learning_rate": 1.5961472831845558e-05, "loss": 0.2317, "step": 9547000 }, { "epoch": 5.72, "learning_rate": 1.5959377066216115e-05, "loss": 0.2232, "step": 9547500 }, { "epoch": 5.72, "learning_rate": 1.595728130058667e-05, "loss": 0.2321, "step": 9548000 }, { "epoch": 5.72, "learning_rate": 1.5955181335026105e-05, "loss": 0.229, "step": 9548500 }, { "epoch": 5.73, "learning_rate": 1.5953081369465542e-05, "loss": 0.2266, "step": 9549000 }, { "epoch": 5.73, "learning_rate": 1.5950981403904976e-05, "loss": 0.2258, "step": 9549500 }, { "epoch": 5.73, "learning_rate": 1.5948881438344413e-05, "loss": 0.2324, "step": 9550000 }, { "epoch": 5.73, "learning_rate": 1.5946781472783846e-05, "loss": 0.2213, "step": 9550500 }, { "epoch": 5.73, "learning_rate": 1.594468150722328e-05, "loss": 0.224, "step": 9551000 }, { "epoch": 5.73, "learning_rate": 1.5942585741593836e-05, "loss": 0.223, "step": 9551500 }, { "epoch": 5.73, "learning_rate": 1.5940485776033273e-05, "loss": 0.2251, "step": 9552000 }, { "epoch": 5.73, "learning_rate": 1.593838581047271e-05, "loss": 0.2176, "step": 9552500 }, { "epoch": 5.73, "learning_rate": 1.5936285844912144e-05, "loss": 0.2232, "step": 9553000 }, { "epoch": 5.73, "learning_rate": 1.59341900792827e-05, "loss": 0.2239, "step": 9553500 }, { "epoch": 5.73, "learning_rate": 1.5932090113722134e-05, "loss": 0.228, "step": 9554000 }, { "epoch": 5.73, "learning_rate": 1.592999014816157e-05, "loss": 0.2263, "step": 9554500 }, { "epoch": 5.73, "learning_rate": 1.5927890182601008e-05, "loss": 0.2215, "step": 9555000 }, { "epoch": 5.73, "learning_rate": 1.5925790217040438e-05, "loss": 0.222, "step": 9555500 }, { "epoch": 5.73, "learning_rate": 1.5923694451410998e-05, "loss": 0.2286, "step": 9556000 }, { "epoch": 5.73, "learning_rate": 1.592159448585043e-05, "loss": 0.2228, "step": 9556500 }, { "epoch": 5.73, "learning_rate": 1.591949452028987e-05, "loss": 0.2293, "step": 9557000 }, { "epoch": 5.73, "learning_rate": 1.5917394554729302e-05, "loss": 0.2283, "step": 9557500 }, { "epoch": 5.73, "learning_rate": 1.5915294589168735e-05, "loss": 0.2254, "step": 9558000 }, { "epoch": 5.73, "learning_rate": 1.5913194623608172e-05, "loss": 0.2286, "step": 9558500 }, { "epoch": 5.73, "learning_rate": 1.591109465804761e-05, "loss": 0.2193, "step": 9559000 }, { "epoch": 5.73, "learning_rate": 1.5908994692487043e-05, "loss": 0.2309, "step": 9559500 }, { "epoch": 5.73, "learning_rate": 1.590690312678872e-05, "loss": 0.2233, "step": 9560000 }, { "epoch": 5.73, "learning_rate": 1.5904803161228156e-05, "loss": 0.2216, "step": 9560500 }, { "epoch": 5.73, "learning_rate": 1.590270319566759e-05, "loss": 0.2234, "step": 9561000 }, { "epoch": 5.73, "learning_rate": 1.5900603230107027e-05, "loss": 0.2233, "step": 9561500 }, { "epoch": 5.73, "learning_rate": 1.5898503264546464e-05, "loss": 0.2253, "step": 9562000 }, { "epoch": 5.73, "learning_rate": 1.5896403298985894e-05, "loss": 0.2241, "step": 9562500 }, { "epoch": 5.73, "learning_rate": 1.5894307533356454e-05, "loss": 0.2348, "step": 9563000 }, { "epoch": 5.73, "learning_rate": 1.5892207567795888e-05, "loss": 0.2356, "step": 9563500 }, { "epoch": 5.73, "learning_rate": 1.5890107602235324e-05, "loss": 0.2233, "step": 9564000 }, { "epoch": 5.73, "learning_rate": 1.588800763667476e-05, "loss": 0.2225, "step": 9564500 }, { "epoch": 5.73, "learning_rate": 1.588590767111419e-05, "loss": 0.2266, "step": 9565000 }, { "epoch": 5.73, "learning_rate": 1.5883807705553628e-05, "loss": 0.2273, "step": 9565500 }, { "epoch": 5.74, "learning_rate": 1.5881707739993065e-05, "loss": 0.2297, "step": 9566000 }, { "epoch": 5.74, "learning_rate": 1.5879611974363622e-05, "loss": 0.2296, "step": 9566500 }, { "epoch": 5.74, "learning_rate": 1.5877512008803055e-05, "loss": 0.2324, "step": 9567000 }, { "epoch": 5.74, "learning_rate": 1.587541204324249e-05, "loss": 0.2206, "step": 9567500 }, { "epoch": 5.74, "learning_rate": 1.5873312077681926e-05, "loss": 0.2251, "step": 9568000 }, { "epoch": 5.74, "learning_rate": 1.5871212112121363e-05, "loss": 0.2288, "step": 9568500 }, { "epoch": 5.74, "learning_rate": 1.586911634649192e-05, "loss": 0.2245, "step": 9569000 }, { "epoch": 5.74, "learning_rate": 1.586701638093135e-05, "loss": 0.2212, "step": 9569500 }, { "epoch": 5.74, "learning_rate": 1.5864916415370786e-05, "loss": 0.2256, "step": 9570000 }, { "epoch": 5.74, "learning_rate": 1.5862816449810223e-05, "loss": 0.2295, "step": 9570500 }, { "epoch": 5.74, "learning_rate": 1.5860716484249657e-05, "loss": 0.2272, "step": 9571000 }, { "epoch": 5.74, "learning_rate": 1.5858620718620217e-05, "loss": 0.2314, "step": 9571500 }, { "epoch": 5.74, "learning_rate": 1.5856520753059647e-05, "loss": 0.2254, "step": 9572000 }, { "epoch": 5.74, "learning_rate": 1.5854420787499084e-05, "loss": 0.2245, "step": 9572500 }, { "epoch": 5.74, "learning_rate": 1.585232082193852e-05, "loss": 0.2255, "step": 9573000 }, { "epoch": 5.74, "learning_rate": 1.5850220856377954e-05, "loss": 0.2243, "step": 9573500 }, { "epoch": 5.74, "learning_rate": 1.584812509074851e-05, "loss": 0.2289, "step": 9574000 }, { "epoch": 5.74, "learning_rate": 1.5846025125187945e-05, "loss": 0.2244, "step": 9574500 }, { "epoch": 5.74, "learning_rate": 1.584392515962738e-05, "loss": 0.2272, "step": 9575000 }, { "epoch": 5.74, "learning_rate": 1.584182519406682e-05, "loss": 0.2259, "step": 9575500 }, { "epoch": 5.74, "learning_rate": 1.5839725228506252e-05, "loss": 0.2222, "step": 9576000 }, { "epoch": 5.74, "learning_rate": 1.583762526294569e-05, "loss": 0.2272, "step": 9576500 }, { "epoch": 5.74, "learning_rate": 1.5835525297385122e-05, "loss": 0.2328, "step": 9577000 }, { "epoch": 5.74, "learning_rate": 1.5833425331824556e-05, "loss": 0.2245, "step": 9577500 }, { "epoch": 5.74, "learning_rate": 1.5831333766126236e-05, "loss": 0.2194, "step": 9578000 }, { "epoch": 5.74, "learning_rate": 1.5829233800565673e-05, "loss": 0.2326, "step": 9578500 }, { "epoch": 5.74, "learning_rate": 1.5827133835005103e-05, "loss": 0.2269, "step": 9579000 }, { "epoch": 5.74, "learning_rate": 1.582503386944454e-05, "loss": 0.2302, "step": 9579500 }, { "epoch": 5.74, "learning_rate": 1.5822938103815097e-05, "loss": 0.2254, "step": 9580000 }, { "epoch": 5.74, "learning_rate": 1.5820838138254534e-05, "loss": 0.2265, "step": 9580500 }, { "epoch": 5.74, "learning_rate": 1.581873817269397e-05, "loss": 0.2287, "step": 9581000 }, { "epoch": 5.74, "learning_rate": 1.58166382071334e-05, "loss": 0.2236, "step": 9581500 }, { "epoch": 5.74, "learning_rate": 1.5814538241572837e-05, "loss": 0.2297, "step": 9582000 }, { "epoch": 5.75, "learning_rate": 1.5812438276012274e-05, "loss": 0.2225, "step": 9582500 }, { "epoch": 5.75, "learning_rate": 1.581034251038283e-05, "loss": 0.2229, "step": 9583000 }, { "epoch": 5.75, "learning_rate": 1.5808242544822265e-05, "loss": 0.2271, "step": 9583500 }, { "epoch": 5.75, "learning_rate": 1.5806142579261698e-05, "loss": 0.2259, "step": 9584000 }, { "epoch": 5.75, "learning_rate": 1.5804042613701135e-05, "loss": 0.2241, "step": 9584500 }, { "epoch": 5.75, "learning_rate": 1.5801942648140572e-05, "loss": 0.2218, "step": 9585000 }, { "epoch": 5.75, "learning_rate": 1.579984688251113e-05, "loss": 0.2252, "step": 9585500 }, { "epoch": 5.75, "learning_rate": 1.5797746916950562e-05, "loss": 0.2272, "step": 9586000 }, { "epoch": 5.75, "learning_rate": 1.5795646951389996e-05, "loss": 0.2259, "step": 9586500 }, { "epoch": 5.75, "learning_rate": 1.5793546985829433e-05, "loss": 0.2217, "step": 9587000 }, { "epoch": 5.75, "learning_rate": 1.5791447020268866e-05, "loss": 0.2227, "step": 9587500 }, { "epoch": 5.75, "learning_rate": 1.5789347054708303e-05, "loss": 0.2302, "step": 9588000 }, { "epoch": 5.75, "learning_rate": 1.578724708914774e-05, "loss": 0.2238, "step": 9588500 }, { "epoch": 5.75, "learning_rate": 1.5785147123587173e-05, "loss": 0.2247, "step": 9589000 }, { "epoch": 5.75, "learning_rate": 1.578305135795773e-05, "loss": 0.2313, "step": 9589500 }, { "epoch": 5.75, "learning_rate": 1.5780951392397164e-05, "loss": 0.2285, "step": 9590000 }, { "epoch": 5.75, "learning_rate": 1.57788514268366e-05, "loss": 0.2224, "step": 9590500 }, { "epoch": 5.75, "learning_rate": 1.5776751461276037e-05, "loss": 0.2223, "step": 9591000 }, { "epoch": 5.75, "learning_rate": 1.5774651495715467e-05, "loss": 0.2264, "step": 9591500 }, { "epoch": 5.75, "learning_rate": 1.5772551530154904e-05, "loss": 0.2281, "step": 9592000 }, { "epoch": 5.75, "learning_rate": 1.577045156459434e-05, "loss": 0.2233, "step": 9592500 }, { "epoch": 5.75, "learning_rate": 1.5768355798964898e-05, "loss": 0.224, "step": 9593000 }, { "epoch": 5.75, "learning_rate": 1.5766255833404335e-05, "loss": 0.2274, "step": 9593500 }, { "epoch": 5.75, "learning_rate": 1.576416006777489e-05, "loss": 0.2294, "step": 9594000 }, { "epoch": 5.75, "learning_rate": 1.5762060102214322e-05, "loss": 0.2241, "step": 9594500 }, { "epoch": 5.75, "learning_rate": 1.575996013665376e-05, "loss": 0.2262, "step": 9595000 }, { "epoch": 5.75, "learning_rate": 1.5757860171093196e-05, "loss": 0.2272, "step": 9595500 }, { "epoch": 5.75, "learning_rate": 1.575576020553263e-05, "loss": 0.2221, "step": 9596000 }, { "epoch": 5.75, "learning_rate": 1.5753660239972063e-05, "loss": 0.2279, "step": 9596500 }, { "epoch": 5.75, "learning_rate": 1.57515602744115e-05, "loss": 0.2249, "step": 9597000 }, { "epoch": 5.75, "learning_rate": 1.5749460308850936e-05, "loss": 0.2272, "step": 9597500 }, { "epoch": 5.75, "learning_rate": 1.5747364543221493e-05, "loss": 0.2222, "step": 9598000 }, { "epoch": 5.75, "learning_rate": 1.5745264577660923e-05, "loss": 0.2287, "step": 9598500 }, { "epoch": 5.75, "learning_rate": 1.574316461210036e-05, "loss": 0.2212, "step": 9599000 }, { "epoch": 5.76, "learning_rate": 1.5741064646539797e-05, "loss": 0.2268, "step": 9599500 }, { "epoch": 5.76, "learning_rate": 1.573896468097923e-05, "loss": 0.2238, "step": 9600000 }, { "epoch": 5.76, "eval_loss": 0.21220462024211884, "eval_runtime": 1455.2091, "eval_samples_per_second": 361.955, "eval_steps_per_second": 60.326, "step": 9600000 }, { "epoch": 5.76, "learning_rate": 1.5736864715418667e-05, "loss": 0.2228, "step": 9600500 }, { "epoch": 5.76, "learning_rate": 1.5734764749858104e-05, "loss": 0.2243, "step": 9601000 }, { "epoch": 5.76, "learning_rate": 1.5732668984228658e-05, "loss": 0.2279, "step": 9601500 }, { "epoch": 5.76, "learning_rate": 1.5730569018668095e-05, "loss": 0.2272, "step": 9602000 }, { "epoch": 5.76, "learning_rate": 1.5728469053107528e-05, "loss": 0.2262, "step": 9602500 }, { "epoch": 5.76, "learning_rate": 1.5726369087546965e-05, "loss": 0.2238, "step": 9603000 }, { "epoch": 5.76, "learning_rate": 1.5724269121986398e-05, "loss": 0.2257, "step": 9603500 }, { "epoch": 5.76, "learning_rate": 1.5722173356356955e-05, "loss": 0.2294, "step": 9604000 }, { "epoch": 5.76, "learning_rate": 1.5720073390796392e-05, "loss": 0.2301, "step": 9604500 }, { "epoch": 5.76, "learning_rate": 1.5717973425235826e-05, "loss": 0.2235, "step": 9605000 }, { "epoch": 5.76, "learning_rate": 1.5715873459675262e-05, "loss": 0.2311, "step": 9605500 }, { "epoch": 5.76, "learning_rate": 1.5713777694045816e-05, "loss": 0.2195, "step": 9606000 }, { "epoch": 5.76, "learning_rate": 1.5711677728485253e-05, "loss": 0.2325, "step": 9606500 }, { "epoch": 5.76, "learning_rate": 1.5709577762924686e-05, "loss": 0.2214, "step": 9607000 }, { "epoch": 5.76, "learning_rate": 1.5707477797364123e-05, "loss": 0.2272, "step": 9607500 }, { "epoch": 5.76, "learning_rate": 1.5705382031734677e-05, "loss": 0.2232, "step": 9608000 }, { "epoch": 5.76, "learning_rate": 1.5703282066174114e-05, "loss": 0.2279, "step": 9608500 }, { "epoch": 5.76, "learning_rate": 1.570118210061355e-05, "loss": 0.2241, "step": 9609000 }, { "epoch": 5.76, "learning_rate": 1.5699082135052984e-05, "loss": 0.2248, "step": 9609500 }, { "epoch": 5.76, "learning_rate": 1.569698216949242e-05, "loss": 0.2251, "step": 9610000 }, { "epoch": 5.76, "learning_rate": 1.5694886403862974e-05, "loss": 0.2257, "step": 9610500 }, { "epoch": 5.76, "learning_rate": 1.569278643830241e-05, "loss": 0.2266, "step": 9611000 }, { "epoch": 5.76, "learning_rate": 1.5690686472741848e-05, "loss": 0.2235, "step": 9611500 }, { "epoch": 5.76, "learning_rate": 1.568858650718128e-05, "loss": 0.2239, "step": 9612000 }, { "epoch": 5.76, "learning_rate": 1.5686486541620718e-05, "loss": 0.2204, "step": 9612500 }, { "epoch": 5.76, "learning_rate": 1.5684390775991272e-05, "loss": 0.2244, "step": 9613000 }, { "epoch": 5.76, "learning_rate": 1.568229081043071e-05, "loss": 0.2235, "step": 9613500 }, { "epoch": 5.76, "learning_rate": 1.5680190844870146e-05, "loss": 0.2275, "step": 9614000 }, { "epoch": 5.76, "learning_rate": 1.567809087930958e-05, "loss": 0.2264, "step": 9614500 }, { "epoch": 5.76, "learning_rate": 1.5675990913749016e-05, "loss": 0.2214, "step": 9615000 }, { "epoch": 5.76, "learning_rate": 1.567389094818845e-05, "loss": 0.2223, "step": 9615500 }, { "epoch": 5.77, "learning_rate": 1.5671795182559006e-05, "loss": 0.2276, "step": 9616000 }, { "epoch": 5.77, "learning_rate": 1.566969521699844e-05, "loss": 0.2246, "step": 9616500 }, { "epoch": 5.77, "learning_rate": 1.5667595251437877e-05, "loss": 0.2234, "step": 9617000 }, { "epoch": 5.77, "learning_rate": 1.5665495285877313e-05, "loss": 0.2184, "step": 9617500 }, { "epoch": 5.77, "learning_rate": 1.5663395320316747e-05, "loss": 0.2221, "step": 9618000 }, { "epoch": 5.77, "learning_rate": 1.5661299554687304e-05, "loss": 0.2257, "step": 9618500 }, { "epoch": 5.77, "learning_rate": 1.5659199589126737e-05, "loss": 0.2231, "step": 9619000 }, { "epoch": 5.77, "learning_rate": 1.5657099623566174e-05, "loss": 0.2242, "step": 9619500 }, { "epoch": 5.77, "learning_rate": 1.565499965800561e-05, "loss": 0.2322, "step": 9620000 }, { "epoch": 5.77, "learning_rate": 1.5652903892376165e-05, "loss": 0.2251, "step": 9620500 }, { "epoch": 5.77, "learning_rate": 1.56508039268156e-05, "loss": 0.2301, "step": 9621000 }, { "epoch": 5.77, "learning_rate": 1.5648703961255035e-05, "loss": 0.2261, "step": 9621500 }, { "epoch": 5.77, "learning_rate": 1.5646603995694472e-05, "loss": 0.2213, "step": 9622000 }, { "epoch": 5.77, "learning_rate": 1.564450403013391e-05, "loss": 0.2237, "step": 9622500 }, { "epoch": 5.77, "learning_rate": 1.5642408264504462e-05, "loss": 0.2251, "step": 9623000 }, { "epoch": 5.77, "learning_rate": 1.5640308298943896e-05, "loss": 0.2262, "step": 9623500 }, { "epoch": 5.77, "learning_rate": 1.5638208333383332e-05, "loss": 0.2313, "step": 9624000 }, { "epoch": 5.77, "learning_rate": 1.563610836782277e-05, "loss": 0.2304, "step": 9624500 }, { "epoch": 5.77, "learning_rate": 1.5634008402262203e-05, "loss": 0.2239, "step": 9625000 }, { "epoch": 5.77, "learning_rate": 1.5631908436701636e-05, "loss": 0.2212, "step": 9625500 }, { "epoch": 5.77, "learning_rate": 1.5629812671072193e-05, "loss": 0.2301, "step": 9626000 }, { "epoch": 5.77, "learning_rate": 1.562771270551163e-05, "loss": 0.2222, "step": 9626500 }, { "epoch": 5.77, "learning_rate": 1.5625612739951067e-05, "loss": 0.228, "step": 9627000 }, { "epoch": 5.77, "learning_rate": 1.5623512774390497e-05, "loss": 0.2247, "step": 9627500 }, { "epoch": 5.77, "learning_rate": 1.5621412808829934e-05, "loss": 0.2252, "step": 9628000 }, { "epoch": 5.77, "learning_rate": 1.561931284326937e-05, "loss": 0.2251, "step": 9628500 }, { "epoch": 5.77, "learning_rate": 1.5617212877708804e-05, "loss": 0.2234, "step": 9629000 }, { "epoch": 5.77, "learning_rate": 1.561511291214824e-05, "loss": 0.2251, "step": 9629500 }, { "epoch": 5.77, "learning_rate": 1.5613017146518795e-05, "loss": 0.228, "step": 9630000 }, { "epoch": 5.77, "learning_rate": 1.561091718095823e-05, "loss": 0.225, "step": 9630500 }, { "epoch": 5.77, "learning_rate": 1.5608817215397668e-05, "loss": 0.225, "step": 9631000 }, { "epoch": 5.77, "learning_rate": 1.5606717249837102e-05, "loss": 0.2241, "step": 9631500 }, { "epoch": 5.77, "learning_rate": 1.560461728427654e-05, "loss": 0.222, "step": 9632000 }, { "epoch": 5.78, "learning_rate": 1.5602521518647092e-05, "loss": 0.226, "step": 9632500 }, { "epoch": 5.78, "learning_rate": 1.560042155308653e-05, "loss": 0.2267, "step": 9633000 }, { "epoch": 5.78, "learning_rate": 1.5598321587525966e-05, "loss": 0.2309, "step": 9633500 }, { "epoch": 5.78, "learning_rate": 1.55962216219654e-05, "loss": 0.2202, "step": 9634000 }, { "epoch": 5.78, "learning_rate": 1.5594121656404836e-05, "loss": 0.2246, "step": 9634500 }, { "epoch": 5.78, "learning_rate": 1.559202169084427e-05, "loss": 0.2197, "step": 9635000 }, { "epoch": 5.78, "learning_rate": 1.5589925925214827e-05, "loss": 0.2277, "step": 9635500 }, { "epoch": 5.78, "learning_rate": 1.558782595965426e-05, "loss": 0.2259, "step": 9636000 }, { "epoch": 5.78, "learning_rate": 1.5585725994093697e-05, "loss": 0.2356, "step": 9636500 }, { "epoch": 5.78, "learning_rate": 1.5583626028533134e-05, "loss": 0.2258, "step": 9637000 }, { "epoch": 5.78, "learning_rate": 1.5581526062972567e-05, "loss": 0.2245, "step": 9637500 }, { "epoch": 5.78, "learning_rate": 1.5579426097412e-05, "loss": 0.2273, "step": 9638000 }, { "epoch": 5.78, "learning_rate": 1.5577326131851437e-05, "loss": 0.2227, "step": 9638500 }, { "epoch": 5.78, "learning_rate": 1.5575226166290874e-05, "loss": 0.2232, "step": 9639000 }, { "epoch": 5.78, "learning_rate": 1.557313040066143e-05, "loss": 0.2292, "step": 9639500 }, { "epoch": 5.78, "learning_rate": 1.557103043510086e-05, "loss": 0.2234, "step": 9640000 }, { "epoch": 5.78, "learning_rate": 1.5568930469540298e-05, "loss": 0.2258, "step": 9640500 }, { "epoch": 5.78, "learning_rate": 1.5566830503979735e-05, "loss": 0.2213, "step": 9641000 }, { "epoch": 5.78, "learning_rate": 1.5564734738350292e-05, "loss": 0.2245, "step": 9641500 }, { "epoch": 5.78, "learning_rate": 1.5562634772789725e-05, "loss": 0.2226, "step": 9642000 }, { "epoch": 5.78, "learning_rate": 1.556053480722916e-05, "loss": 0.2273, "step": 9642500 }, { "epoch": 5.78, "learning_rate": 1.5558434841668596e-05, "loss": 0.2237, "step": 9643000 }, { "epoch": 5.78, "learning_rate": 1.5556339076039153e-05, "loss": 0.2279, "step": 9643500 }, { "epoch": 5.78, "learning_rate": 1.555423911047859e-05, "loss": 0.2284, "step": 9644000 }, { "epoch": 5.78, "learning_rate": 1.5552143344849143e-05, "loss": 0.23, "step": 9644500 }, { "epoch": 5.78, "learning_rate": 1.555004337928858e-05, "loss": 0.2237, "step": 9645000 }, { "epoch": 5.78, "learning_rate": 1.5547943413728013e-05, "loss": 0.2292, "step": 9645500 }, { "epoch": 5.78, "learning_rate": 1.554584344816745e-05, "loss": 0.2263, "step": 9646000 }, { "epoch": 5.78, "learning_rate": 1.5543743482606887e-05, "loss": 0.2265, "step": 9646500 }, { "epoch": 5.78, "learning_rate": 1.554164771697744e-05, "loss": 0.2253, "step": 9647000 }, { "epoch": 5.78, "learning_rate": 1.5539547751416878e-05, "loss": 0.2242, "step": 9647500 }, { "epoch": 5.78, "learning_rate": 1.553744778585631e-05, "loss": 0.2281, "step": 9648000 }, { "epoch": 5.78, "learning_rate": 1.5535347820295748e-05, "loss": 0.225, "step": 9648500 }, { "epoch": 5.78, "learning_rate": 1.5533247854735185e-05, "loss": 0.2252, "step": 9649000 }, { "epoch": 5.79, "learning_rate": 1.5531152089105738e-05, "loss": 0.2255, "step": 9649500 }, { "epoch": 5.79, "learning_rate": 1.5529052123545175e-05, "loss": 0.2272, "step": 9650000 }, { "epoch": 5.79, "learning_rate": 1.552695215798461e-05, "loss": 0.2283, "step": 9650500 }, { "epoch": 5.79, "learning_rate": 1.5524852192424045e-05, "loss": 0.2255, "step": 9651000 }, { "epoch": 5.79, "learning_rate": 1.552275222686348e-05, "loss": 0.2267, "step": 9651500 }, { "epoch": 5.79, "learning_rate": 1.5520652261302912e-05, "loss": 0.2264, "step": 9652000 }, { "epoch": 5.79, "learning_rate": 1.551855649567347e-05, "loss": 0.2325, "step": 9652500 }, { "epoch": 5.79, "learning_rate": 1.5516456530112906e-05, "loss": 0.2327, "step": 9653000 }, { "epoch": 5.79, "learning_rate": 1.5514356564552343e-05, "loss": 0.2246, "step": 9653500 }, { "epoch": 5.79, "learning_rate": 1.5512256598991776e-05, "loss": 0.2192, "step": 9654000 }, { "epoch": 5.79, "learning_rate": 1.551015663343121e-05, "loss": 0.2216, "step": 9654500 }, { "epoch": 5.79, "learning_rate": 1.5508060867801767e-05, "loss": 0.2247, "step": 9655000 }, { "epoch": 5.79, "learning_rate": 1.5505960902241204e-05, "loss": 0.2355, "step": 9655500 }, { "epoch": 5.79, "learning_rate": 1.550386093668064e-05, "loss": 0.2237, "step": 9656000 }, { "epoch": 5.79, "learning_rate": 1.550176097112007e-05, "loss": 0.2234, "step": 9656500 }, { "epoch": 5.79, "learning_rate": 1.5499661005559508e-05, "loss": 0.2257, "step": 9657000 }, { "epoch": 5.79, "learning_rate": 1.5497561039998944e-05, "loss": 0.2272, "step": 9657500 }, { "epoch": 5.79, "learning_rate": 1.54954652743695e-05, "loss": 0.2243, "step": 9658000 }, { "epoch": 5.79, "learning_rate": 1.5493365308808938e-05, "loss": 0.232, "step": 9658500 }, { "epoch": 5.79, "learning_rate": 1.5491265343248368e-05, "loss": 0.23, "step": 9659000 }, { "epoch": 5.79, "learning_rate": 1.5489165377687805e-05, "loss": 0.2241, "step": 9659500 }, { "epoch": 5.79, "learning_rate": 1.5487069612058362e-05, "loss": 0.2217, "step": 9660000 }, { "epoch": 5.79, "learning_rate": 1.54849696464978e-05, "loss": 0.2291, "step": 9660500 }, { "epoch": 5.79, "learning_rate": 1.5482869680937232e-05, "loss": 0.2269, "step": 9661000 }, { "epoch": 5.79, "learning_rate": 1.5480769715376666e-05, "loss": 0.2275, "step": 9661500 }, { "epoch": 5.79, "learning_rate": 1.5478669749816103e-05, "loss": 0.2256, "step": 9662000 }, { "epoch": 5.79, "learning_rate": 1.547656978425554e-05, "loss": 0.23, "step": 9662500 }, { "epoch": 5.79, "learning_rate": 1.5474474018626096e-05, "loss": 0.2193, "step": 9663000 }, { "epoch": 5.79, "learning_rate": 1.5472374053065527e-05, "loss": 0.2279, "step": 9663500 }, { "epoch": 5.79, "learning_rate": 1.5470274087504963e-05, "loss": 0.2224, "step": 9664000 }, { "epoch": 5.79, "learning_rate": 1.54681741219444e-05, "loss": 0.2282, "step": 9664500 }, { "epoch": 5.79, "learning_rate": 1.5466074156383834e-05, "loss": 0.2228, "step": 9665000 }, { "epoch": 5.79, "learning_rate": 1.546397419082327e-05, "loss": 0.2276, "step": 9665500 }, { "epoch": 5.8, "learning_rate": 1.5461874225262707e-05, "loss": 0.2227, "step": 9666000 }, { "epoch": 5.8, "learning_rate": 1.545977845963326e-05, "loss": 0.2231, "step": 9666500 }, { "epoch": 5.8, "learning_rate": 1.5457678494072698e-05, "loss": 0.2237, "step": 9667000 }, { "epoch": 5.8, "learning_rate": 1.545557852851213e-05, "loss": 0.2192, "step": 9667500 }, { "epoch": 5.8, "learning_rate": 1.5453478562951568e-05, "loss": 0.227, "step": 9668000 }, { "epoch": 5.8, "learning_rate": 1.5451378597391005e-05, "loss": 0.2257, "step": 9668500 }, { "epoch": 5.8, "learning_rate": 1.544928283176156e-05, "loss": 0.225, "step": 9669000 }, { "epoch": 5.8, "learning_rate": 1.5447182866200995e-05, "loss": 0.2278, "step": 9669500 }, { "epoch": 5.8, "learning_rate": 1.544508290064043e-05, "loss": 0.2253, "step": 9670000 }, { "epoch": 5.8, "learning_rate": 1.5442982935079866e-05, "loss": 0.2247, "step": 9670500 }, { "epoch": 5.8, "learning_rate": 1.54408829695193e-05, "loss": 0.2287, "step": 9671000 }, { "epoch": 5.8, "learning_rate": 1.5438783003958733e-05, "loss": 0.2313, "step": 9671500 }, { "epoch": 5.8, "learning_rate": 1.543668303839817e-05, "loss": 0.2278, "step": 9672000 }, { "epoch": 5.8, "learning_rate": 1.5434583072837606e-05, "loss": 0.2255, "step": 9672500 }, { "epoch": 5.8, "learning_rate": 1.5432487307208163e-05, "loss": 0.2254, "step": 9673000 }, { "epoch": 5.8, "learning_rate": 1.5430387341647597e-05, "loss": 0.2284, "step": 9673500 }, { "epoch": 5.8, "learning_rate": 1.5428291576018154e-05, "loss": 0.2224, "step": 9674000 }, { "epoch": 5.8, "learning_rate": 1.5426191610457587e-05, "loss": 0.2242, "step": 9674500 }, { "epoch": 5.8, "learning_rate": 1.5424091644897024e-05, "loss": 0.2259, "step": 9675000 }, { "epoch": 5.8, "learning_rate": 1.542199167933646e-05, "loss": 0.2208, "step": 9675500 }, { "epoch": 5.8, "learning_rate": 1.541989171377589e-05, "loss": 0.2243, "step": 9676000 }, { "epoch": 5.8, "learning_rate": 1.5417791748215328e-05, "loss": 0.2291, "step": 9676500 }, { "epoch": 5.8, "learning_rate": 1.5415691782654765e-05, "loss": 0.2239, "step": 9677000 }, { "epoch": 5.8, "learning_rate": 1.54135918170942e-05, "loss": 0.229, "step": 9677500 }, { "epoch": 5.8, "learning_rate": 1.541149605146476e-05, "loss": 0.2201, "step": 9678000 }, { "epoch": 5.8, "learning_rate": 1.540939608590419e-05, "loss": 0.2254, "step": 9678500 }, { "epoch": 5.8, "learning_rate": 1.5407296120343625e-05, "loss": 0.2233, "step": 9679000 }, { "epoch": 5.8, "learning_rate": 1.5405196154783062e-05, "loss": 0.2247, "step": 9679500 }, { "epoch": 5.8, "learning_rate": 1.5403096189222496e-05, "loss": 0.2202, "step": 9680000 }, { "epoch": 5.8, "learning_rate": 1.5400996223661933e-05, "loss": 0.2234, "step": 9680500 }, { "epoch": 5.8, "learning_rate": 1.5398900458032486e-05, "loss": 0.2277, "step": 9681000 }, { "epoch": 5.8, "learning_rate": 1.5396800492471923e-05, "loss": 0.2199, "step": 9681500 }, { "epoch": 5.8, "learning_rate": 1.539470052691136e-05, "loss": 0.2277, "step": 9682000 }, { "epoch": 5.81, "learning_rate": 1.5392600561350793e-05, "loss": 0.2316, "step": 9682500 }, { "epoch": 5.81, "learning_rate": 1.539050479572135e-05, "loss": 0.228, "step": 9683000 }, { "epoch": 5.81, "learning_rate": 1.5388404830160784e-05, "loss": 0.225, "step": 9683500 }, { "epoch": 5.81, "learning_rate": 1.538630486460022e-05, "loss": 0.2242, "step": 9684000 }, { "epoch": 5.81, "learning_rate": 1.5384204899039657e-05, "loss": 0.2254, "step": 9684500 }, { "epoch": 5.81, "learning_rate": 1.538210493347909e-05, "loss": 0.222, "step": 9685000 }, { "epoch": 5.81, "learning_rate": 1.5380004967918528e-05, "loss": 0.2207, "step": 9685500 }, { "epoch": 5.81, "learning_rate": 1.537790500235796e-05, "loss": 0.2261, "step": 9686000 }, { "epoch": 5.81, "learning_rate": 1.5375805036797395e-05, "loss": 0.2246, "step": 9686500 }, { "epoch": 5.81, "learning_rate": 1.537370927116795e-05, "loss": 0.2279, "step": 9687000 }, { "epoch": 5.81, "learning_rate": 1.537160930560739e-05, "loss": 0.2235, "step": 9687500 }, { "epoch": 5.81, "learning_rate": 1.5369509340046825e-05, "loss": 0.2347, "step": 9688000 }, { "epoch": 5.81, "learning_rate": 1.536740937448626e-05, "loss": 0.2227, "step": 9688500 }, { "epoch": 5.81, "learning_rate": 1.5365313608856816e-05, "loss": 0.2262, "step": 9689000 }, { "epoch": 5.81, "learning_rate": 1.536321364329625e-05, "loss": 0.224, "step": 9689500 }, { "epoch": 5.81, "learning_rate": 1.5361113677735686e-05, "loss": 0.2233, "step": 9690000 }, { "epoch": 5.81, "learning_rate": 1.535901371217512e-05, "loss": 0.2308, "step": 9690500 }, { "epoch": 5.81, "learning_rate": 1.5356917946545676e-05, "loss": 0.2286, "step": 9691000 }, { "epoch": 5.81, "learning_rate": 1.5354822180916233e-05, "loss": 0.2245, "step": 9691500 }, { "epoch": 5.81, "learning_rate": 1.535272221535567e-05, "loss": 0.2264, "step": 9692000 }, { "epoch": 5.81, "learning_rate": 1.53506222497951e-05, "loss": 0.2249, "step": 9692500 }, { "epoch": 5.81, "learning_rate": 1.5348522284234537e-05, "loss": 0.2223, "step": 9693000 }, { "epoch": 5.81, "learning_rate": 1.5346422318673974e-05, "loss": 0.2236, "step": 9693500 }, { "epoch": 5.81, "learning_rate": 1.534432655304453e-05, "loss": 0.2255, "step": 9694000 }, { "epoch": 5.81, "learning_rate": 1.5342226587483968e-05, "loss": 0.2242, "step": 9694500 }, { "epoch": 5.81, "learning_rate": 1.5340126621923398e-05, "loss": 0.2333, "step": 9695000 }, { "epoch": 5.81, "learning_rate": 1.5338026656362835e-05, "loss": 0.2265, "step": 9695500 }, { "epoch": 5.81, "learning_rate": 1.533593089073339e-05, "loss": 0.2183, "step": 9696000 }, { "epoch": 5.81, "learning_rate": 1.533383092517283e-05, "loss": 0.2247, "step": 9696500 }, { "epoch": 5.81, "learning_rate": 1.5331730959612262e-05, "loss": 0.2269, "step": 9697000 }, { "epoch": 5.81, "learning_rate": 1.5329630994051695e-05, "loss": 0.2215, "step": 9697500 }, { "epoch": 5.81, "learning_rate": 1.5327531028491132e-05, "loss": 0.2259, "step": 9698000 }, { "epoch": 5.81, "learning_rate": 1.532543106293057e-05, "loss": 0.223, "step": 9698500 }, { "epoch": 5.81, "learning_rate": 1.5323331097370003e-05, "loss": 0.2202, "step": 9699000 }, { "epoch": 5.82, "learning_rate": 1.532123533174056e-05, "loss": 0.2247, "step": 9699500 }, { "epoch": 5.82, "learning_rate": 1.5319135366179993e-05, "loss": 0.2228, "step": 9700000 }, { "epoch": 5.82, "eval_loss": 0.21187874674797058, "eval_runtime": 1454.8594, "eval_samples_per_second": 362.042, "eval_steps_per_second": 60.341, "step": 9700000 }, { "epoch": 5.82, "learning_rate": 1.531703540061943e-05, "loss": 0.2257, "step": 9700500 }, { "epoch": 5.82, "learning_rate": 1.5314935435058863e-05, "loss": 0.2245, "step": 9701000 }, { "epoch": 5.82, "learning_rate": 1.5312839669429424e-05, "loss": 0.2227, "step": 9701500 }, { "epoch": 5.82, "learning_rate": 1.5310739703868854e-05, "loss": 0.2268, "step": 9702000 }, { "epoch": 5.82, "learning_rate": 1.530863973830829e-05, "loss": 0.2274, "step": 9702500 }, { "epoch": 5.82, "learning_rate": 1.5306539772747727e-05, "loss": 0.2351, "step": 9703000 }, { "epoch": 5.82, "learning_rate": 1.5304444007118284e-05, "loss": 0.2221, "step": 9703500 }, { "epoch": 5.82, "learning_rate": 1.5302344041557718e-05, "loss": 0.2276, "step": 9704000 }, { "epoch": 5.82, "learning_rate": 1.530024407599715e-05, "loss": 0.2213, "step": 9704500 }, { "epoch": 5.82, "learning_rate": 1.5298144110436588e-05, "loss": 0.2259, "step": 9705000 }, { "epoch": 5.82, "learning_rate": 1.5296044144876025e-05, "loss": 0.2233, "step": 9705500 }, { "epoch": 5.82, "learning_rate": 1.529394417931546e-05, "loss": 0.2231, "step": 9706000 }, { "epoch": 5.82, "learning_rate": 1.5291844213754895e-05, "loss": 0.2285, "step": 9706500 }, { "epoch": 5.82, "learning_rate": 1.528974844812545e-05, "loss": 0.2246, "step": 9707000 }, { "epoch": 5.82, "learning_rate": 1.5287648482564886e-05, "loss": 0.2205, "step": 9707500 }, { "epoch": 5.82, "learning_rate": 1.528554851700432e-05, "loss": 0.2266, "step": 9708000 }, { "epoch": 5.82, "learning_rate": 1.5283448551443756e-05, "loss": 0.2315, "step": 9708500 }, { "epoch": 5.82, "learning_rate": 1.5281348585883193e-05, "loss": 0.2214, "step": 9709000 }, { "epoch": 5.82, "learning_rate": 1.5279252820253746e-05, "loss": 0.2248, "step": 9709500 }, { "epoch": 5.82, "learning_rate": 1.5277152854693183e-05, "loss": 0.2285, "step": 9710000 }, { "epoch": 5.82, "learning_rate": 1.5275052889132617e-05, "loss": 0.2196, "step": 9710500 }, { "epoch": 5.82, "learning_rate": 1.5272952923572054e-05, "loss": 0.2207, "step": 9711000 }, { "epoch": 5.82, "learning_rate": 1.527085295801149e-05, "loss": 0.2228, "step": 9711500 }, { "epoch": 5.82, "learning_rate": 1.5268752992450924e-05, "loss": 0.223, "step": 9712000 }, { "epoch": 5.82, "learning_rate": 1.5266653026890357e-05, "loss": 0.2234, "step": 9712500 }, { "epoch": 5.82, "learning_rate": 1.5264553061329794e-05, "loss": 0.2215, "step": 9713000 }, { "epoch": 5.82, "learning_rate": 1.526245729570035e-05, "loss": 0.2266, "step": 9713500 }, { "epoch": 5.82, "learning_rate": 1.5260357330139788e-05, "loss": 0.2289, "step": 9714000 }, { "epoch": 5.82, "learning_rate": 1.525825736457922e-05, "loss": 0.2219, "step": 9714500 }, { "epoch": 5.82, "learning_rate": 1.5256157399018655e-05, "loss": 0.2299, "step": 9715000 }, { "epoch": 5.82, "learning_rate": 1.5254061633389212e-05, "loss": 0.2226, "step": 9715500 }, { "epoch": 5.83, "learning_rate": 1.5251961667828649e-05, "loss": 0.2225, "step": 9716000 }, { "epoch": 5.83, "learning_rate": 1.5249861702268084e-05, "loss": 0.2232, "step": 9716500 }, { "epoch": 5.83, "learning_rate": 1.5247761736707517e-05, "loss": 0.2244, "step": 9717000 }, { "epoch": 5.83, "learning_rate": 1.5245665971078074e-05, "loss": 0.2272, "step": 9717500 }, { "epoch": 5.83, "learning_rate": 1.524356600551751e-05, "loss": 0.2213, "step": 9718000 }, { "epoch": 5.83, "learning_rate": 1.5241466039956946e-05, "loss": 0.2293, "step": 9718500 }, { "epoch": 5.83, "learning_rate": 1.5239366074396378e-05, "loss": 0.2209, "step": 9719000 }, { "epoch": 5.83, "learning_rate": 1.5237270308766935e-05, "loss": 0.2263, "step": 9719500 }, { "epoch": 5.83, "learning_rate": 1.5235170343206372e-05, "loss": 0.2278, "step": 9720000 }, { "epoch": 5.83, "learning_rate": 1.5233070377645807e-05, "loss": 0.2231, "step": 9720500 }, { "epoch": 5.83, "learning_rate": 1.523097461201636e-05, "loss": 0.2286, "step": 9721000 }, { "epoch": 5.83, "learning_rate": 1.5228874646455797e-05, "loss": 0.2219, "step": 9721500 }, { "epoch": 5.83, "learning_rate": 1.5226774680895233e-05, "loss": 0.2247, "step": 9722000 }, { "epoch": 5.83, "learning_rate": 1.5224674715334668e-05, "loss": 0.2221, "step": 9722500 }, { "epoch": 5.83, "learning_rate": 1.5222578949705226e-05, "loss": 0.226, "step": 9723000 }, { "epoch": 5.83, "learning_rate": 1.5220478984144658e-05, "loss": 0.2267, "step": 9723500 }, { "epoch": 5.83, "learning_rate": 1.5218379018584093e-05, "loss": 0.2188, "step": 9724000 }, { "epoch": 5.83, "learning_rate": 1.521627905302353e-05, "loss": 0.2242, "step": 9724500 }, { "epoch": 5.83, "learning_rate": 1.5214179087462965e-05, "loss": 0.2203, "step": 9725000 }, { "epoch": 5.83, "learning_rate": 1.5212083321833522e-05, "loss": 0.2232, "step": 9725500 }, { "epoch": 5.83, "learning_rate": 1.5209983356272956e-05, "loss": 0.2303, "step": 9726000 }, { "epoch": 5.83, "learning_rate": 1.5207883390712391e-05, "loss": 0.2258, "step": 9726500 }, { "epoch": 5.83, "learning_rate": 1.5205783425151828e-05, "loss": 0.2299, "step": 9727000 }, { "epoch": 5.83, "learning_rate": 1.5203687659522385e-05, "loss": 0.2276, "step": 9727500 }, { "epoch": 5.83, "learning_rate": 1.520158769396182e-05, "loss": 0.2172, "step": 9728000 }, { "epoch": 5.83, "learning_rate": 1.5199487728401253e-05, "loss": 0.2231, "step": 9728500 }, { "epoch": 5.83, "learning_rate": 1.5197387762840688e-05, "loss": 0.2215, "step": 9729000 }, { "epoch": 5.83, "learning_rate": 1.5195287797280124e-05, "loss": 0.2225, "step": 9729500 }, { "epoch": 5.83, "learning_rate": 1.519318783171956e-05, "loss": 0.2285, "step": 9730000 }, { "epoch": 5.83, "learning_rate": 1.5191087866158996e-05, "loss": 0.2213, "step": 9730500 }, { "epoch": 5.83, "learning_rate": 1.5188987900598429e-05, "loss": 0.2268, "step": 9731000 }, { "epoch": 5.83, "learning_rate": 1.5186892134968986e-05, "loss": 0.2261, "step": 9731500 }, { "epoch": 5.83, "learning_rate": 1.5184792169408421e-05, "loss": 0.2237, "step": 9732000 }, { "epoch": 5.84, "learning_rate": 1.5182692203847858e-05, "loss": 0.2257, "step": 9732500 }, { "epoch": 5.84, "learning_rate": 1.5180596438218412e-05, "loss": 0.2214, "step": 9733000 }, { "epoch": 5.84, "learning_rate": 1.5178496472657847e-05, "loss": 0.2247, "step": 9733500 }, { "epoch": 5.84, "learning_rate": 1.5176396507097284e-05, "loss": 0.2222, "step": 9734000 }, { "epoch": 5.84, "learning_rate": 1.5174296541536719e-05, "loss": 0.2249, "step": 9734500 }, { "epoch": 5.84, "learning_rate": 1.5172196575976154e-05, "loss": 0.2237, "step": 9735000 }, { "epoch": 5.84, "learning_rate": 1.517009661041559e-05, "loss": 0.228, "step": 9735500 }, { "epoch": 5.84, "learning_rate": 1.5167996644855023e-05, "loss": 0.2264, "step": 9736000 }, { "epoch": 5.84, "learning_rate": 1.516589667929446e-05, "loss": 0.2229, "step": 9736500 }, { "epoch": 5.84, "learning_rate": 1.5163805113596138e-05, "loss": 0.2279, "step": 9737000 }, { "epoch": 5.84, "learning_rate": 1.5161705148035573e-05, "loss": 0.228, "step": 9737500 }, { "epoch": 5.84, "learning_rate": 1.5159605182475007e-05, "loss": 0.2238, "step": 9738000 }, { "epoch": 5.84, "learning_rate": 1.5157505216914442e-05, "loss": 0.2227, "step": 9738500 }, { "epoch": 5.84, "learning_rate": 1.5155405251353877e-05, "loss": 0.2256, "step": 9739000 }, { "epoch": 5.84, "learning_rate": 1.5153305285793314e-05, "loss": 0.2184, "step": 9739500 }, { "epoch": 5.84, "learning_rate": 1.5151205320232749e-05, "loss": 0.2212, "step": 9740000 }, { "epoch": 5.84, "learning_rate": 1.5149105354672181e-05, "loss": 0.222, "step": 9740500 }, { "epoch": 5.84, "learning_rate": 1.514700958904274e-05, "loss": 0.2207, "step": 9741000 }, { "epoch": 5.84, "learning_rate": 1.5144913823413296e-05, "loss": 0.223, "step": 9741500 }, { "epoch": 5.84, "learning_rate": 1.5142813857852732e-05, "loss": 0.2238, "step": 9742000 }, { "epoch": 5.84, "learning_rate": 1.5140713892292165e-05, "loss": 0.2284, "step": 9742500 }, { "epoch": 5.84, "learning_rate": 1.51386139267316e-05, "loss": 0.2335, "step": 9743000 }, { "epoch": 5.84, "learning_rate": 1.5136513961171035e-05, "loss": 0.2265, "step": 9743500 }, { "epoch": 5.84, "learning_rate": 1.5134418195541594e-05, "loss": 0.2232, "step": 9744000 }, { "epoch": 5.84, "learning_rate": 1.513231822998103e-05, "loss": 0.2281, "step": 9744500 }, { "epoch": 5.84, "learning_rate": 1.5130218264420463e-05, "loss": 0.2209, "step": 9745000 }, { "epoch": 5.84, "learning_rate": 1.5128118298859898e-05, "loss": 0.2261, "step": 9745500 }, { "epoch": 5.84, "learning_rate": 1.5126018333299333e-05, "loss": 0.2228, "step": 9746000 }, { "epoch": 5.84, "learning_rate": 1.512391836773877e-05, "loss": 0.2276, "step": 9746500 }, { "epoch": 5.84, "learning_rate": 1.5121822602109327e-05, "loss": 0.2265, "step": 9747000 }, { "epoch": 5.84, "learning_rate": 1.5119722636548759e-05, "loss": 0.2284, "step": 9747500 }, { "epoch": 5.84, "learning_rate": 1.5117622670988195e-05, "loss": 0.2226, "step": 9748000 }, { "epoch": 5.84, "learning_rate": 1.511552270542763e-05, "loss": 0.2205, "step": 9748500 }, { "epoch": 5.84, "learning_rate": 1.5113422739867066e-05, "loss": 0.2261, "step": 9749000 }, { "epoch": 5.85, "learning_rate": 1.5111322774306503e-05, "loss": 0.2287, "step": 9749500 }, { "epoch": 5.85, "learning_rate": 1.5109222808745934e-05, "loss": 0.2264, "step": 9750000 }, { "epoch": 5.85, "learning_rate": 1.5107122843185371e-05, "loss": 0.2267, "step": 9750500 }, { "epoch": 5.85, "learning_rate": 1.5105027077555928e-05, "loss": 0.2267, "step": 9751000 }, { "epoch": 5.85, "learning_rate": 1.5102927111995363e-05, "loss": 0.2268, "step": 9751500 }, { "epoch": 5.85, "learning_rate": 1.51008271464348e-05, "loss": 0.2271, "step": 9752000 }, { "epoch": 5.85, "learning_rate": 1.5098727180874232e-05, "loss": 0.2216, "step": 9752500 }, { "epoch": 5.85, "learning_rate": 1.5096631415244789e-05, "loss": 0.2269, "step": 9753000 }, { "epoch": 5.85, "learning_rate": 1.5094531449684226e-05, "loss": 0.2228, "step": 9753500 }, { "epoch": 5.85, "learning_rate": 1.5092431484123661e-05, "loss": 0.222, "step": 9754000 }, { "epoch": 5.85, "learning_rate": 1.5090331518563096e-05, "loss": 0.2215, "step": 9754500 }, { "epoch": 5.85, "learning_rate": 1.508823155300253e-05, "loss": 0.2257, "step": 9755000 }, { "epoch": 5.85, "learning_rate": 1.5086131587441965e-05, "loss": 0.223, "step": 9755500 }, { "epoch": 5.85, "learning_rate": 1.5084035821812522e-05, "loss": 0.226, "step": 9756000 }, { "epoch": 5.85, "learning_rate": 1.5081935856251958e-05, "loss": 0.2258, "step": 9756500 }, { "epoch": 5.85, "learning_rate": 1.5079835890691394e-05, "loss": 0.2204, "step": 9757000 }, { "epoch": 5.85, "learning_rate": 1.5077735925130827e-05, "loss": 0.2276, "step": 9757500 }, { "epoch": 5.85, "learning_rate": 1.5075635959570262e-05, "loss": 0.2251, "step": 9758000 }, { "epoch": 5.85, "learning_rate": 1.5073535994009697e-05, "loss": 0.2256, "step": 9758500 }, { "epoch": 5.85, "learning_rate": 1.5071436028449134e-05, "loss": 0.2245, "step": 9759000 }, { "epoch": 5.85, "learning_rate": 1.506933606288857e-05, "loss": 0.2245, "step": 9759500 }, { "epoch": 5.85, "learning_rate": 1.5067240297259123e-05, "loss": 0.2206, "step": 9760000 }, { "epoch": 5.85, "learning_rate": 1.506514033169856e-05, "loss": 0.2249, "step": 9760500 }, { "epoch": 5.85, "learning_rate": 1.5063044566069117e-05, "loss": 0.2178, "step": 9761000 }, { "epoch": 5.85, "learning_rate": 1.5060944600508552e-05, "loss": 0.2272, "step": 9761500 }, { "epoch": 5.85, "learning_rate": 1.5058844634947985e-05, "loss": 0.2215, "step": 9762000 }, { "epoch": 5.85, "learning_rate": 1.505674466938742e-05, "loss": 0.2262, "step": 9762500 }, { "epoch": 5.85, "learning_rate": 1.5054644703826857e-05, "loss": 0.2282, "step": 9763000 }, { "epoch": 5.85, "learning_rate": 1.5052544738266293e-05, "loss": 0.2232, "step": 9763500 }, { "epoch": 5.85, "learning_rate": 1.5050444772705728e-05, "loss": 0.2239, "step": 9764000 }, { "epoch": 5.85, "learning_rate": 1.5048349007076283e-05, "loss": 0.2293, "step": 9764500 }, { "epoch": 5.85, "learning_rate": 1.5046249041515718e-05, "loss": 0.2287, "step": 9765000 }, { "epoch": 5.85, "learning_rate": 1.5044149075955153e-05, "loss": 0.224, "step": 9765500 }, { "epoch": 5.86, "learning_rate": 1.504204911039459e-05, "loss": 0.2281, "step": 9766000 }, { "epoch": 5.86, "learning_rate": 1.5039949144834025e-05, "loss": 0.2188, "step": 9766500 }, { "epoch": 5.86, "learning_rate": 1.503784917927346e-05, "loss": 0.2222, "step": 9767000 }, { "epoch": 5.86, "learning_rate": 1.5035749213712894e-05, "loss": 0.2243, "step": 9767500 }, { "epoch": 5.86, "learning_rate": 1.5033649248152329e-05, "loss": 0.2282, "step": 9768000 }, { "epoch": 5.86, "learning_rate": 1.5031553482522888e-05, "loss": 0.2236, "step": 9768500 }, { "epoch": 5.86, "learning_rate": 1.5029453516962323e-05, "loss": 0.2272, "step": 9769000 }, { "epoch": 5.86, "learning_rate": 1.5027357751332876e-05, "loss": 0.2213, "step": 9769500 }, { "epoch": 5.86, "learning_rate": 1.5025257785772313e-05, "loss": 0.2209, "step": 9770000 }, { "epoch": 5.86, "learning_rate": 1.5023157820211748e-05, "loss": 0.2203, "step": 9770500 }, { "epoch": 5.86, "learning_rate": 1.5021057854651184e-05, "loss": 0.2209, "step": 9771000 }, { "epoch": 5.86, "learning_rate": 1.501895788909062e-05, "loss": 0.2253, "step": 9771500 }, { "epoch": 5.86, "learning_rate": 1.5016857923530052e-05, "loss": 0.2296, "step": 9772000 }, { "epoch": 5.86, "learning_rate": 1.5014757957969489e-05, "loss": 0.2267, "step": 9772500 }, { "epoch": 5.86, "learning_rate": 1.5012662192340046e-05, "loss": 0.2199, "step": 9773000 }, { "epoch": 5.86, "learning_rate": 1.5010562226779481e-05, "loss": 0.2266, "step": 9773500 }, { "epoch": 5.86, "learning_rate": 1.5008462261218918e-05, "loss": 0.2305, "step": 9774000 }, { "epoch": 5.86, "learning_rate": 1.500636229565835e-05, "loss": 0.231, "step": 9774500 }, { "epoch": 5.86, "learning_rate": 1.5004262330097785e-05, "loss": 0.2275, "step": 9775000 }, { "epoch": 5.86, "learning_rate": 1.5002162364537222e-05, "loss": 0.2233, "step": 9775500 }, { "epoch": 5.86, "learning_rate": 1.5000062398976657e-05, "loss": 0.2231, "step": 9776000 }, { "epoch": 5.86, "learning_rate": 1.4997962433416092e-05, "loss": 0.2244, "step": 9776500 }, { "epoch": 5.86, "learning_rate": 1.4995866667786647e-05, "loss": 0.2226, "step": 9777000 }, { "epoch": 5.86, "learning_rate": 1.4993766702226082e-05, "loss": 0.2201, "step": 9777500 }, { "epoch": 5.86, "learning_rate": 1.499166673666552e-05, "loss": 0.222, "step": 9778000 }, { "epoch": 5.86, "learning_rate": 1.4989566771104954e-05, "loss": 0.2229, "step": 9778500 }, { "epoch": 5.86, "learning_rate": 1.498746680554439e-05, "loss": 0.2248, "step": 9779000 }, { "epoch": 5.86, "learning_rate": 1.4985366839983823e-05, "loss": 0.2258, "step": 9779500 }, { "epoch": 5.86, "learning_rate": 1.4983266874423258e-05, "loss": 0.2203, "step": 9780000 }, { "epoch": 5.86, "learning_rate": 1.4981166908862693e-05, "loss": 0.2223, "step": 9780500 }, { "epoch": 5.86, "learning_rate": 1.4979071143233252e-05, "loss": 0.2241, "step": 9781000 }, { "epoch": 5.86, "learning_rate": 1.4976975377603806e-05, "loss": 0.2173, "step": 9781500 }, { "epoch": 5.86, "learning_rate": 1.497487541204324e-05, "loss": 0.2226, "step": 9782000 }, { "epoch": 5.87, "learning_rate": 1.4972775446482678e-05, "loss": 0.2218, "step": 9782500 }, { "epoch": 5.87, "learning_rate": 1.4970675480922113e-05, "loss": 0.2243, "step": 9783000 }, { "epoch": 5.87, "learning_rate": 1.4968575515361548e-05, "loss": 0.2173, "step": 9783500 }, { "epoch": 5.87, "learning_rate": 1.4966475549800985e-05, "loss": 0.2275, "step": 9784000 }, { "epoch": 5.87, "learning_rate": 1.4964375584240417e-05, "loss": 0.2286, "step": 9784500 }, { "epoch": 5.87, "learning_rate": 1.4962279818610975e-05, "loss": 0.2246, "step": 9785000 }, { "epoch": 5.87, "learning_rate": 1.496017985305041e-05, "loss": 0.2286, "step": 9785500 }, { "epoch": 5.87, "learning_rate": 1.4958079887489845e-05, "loss": 0.2188, "step": 9786000 }, { "epoch": 5.87, "learning_rate": 1.4955979921929279e-05, "loss": 0.226, "step": 9786500 }, { "epoch": 5.87, "learning_rate": 1.4953879956368714e-05, "loss": 0.2274, "step": 9787000 }, { "epoch": 5.87, "learning_rate": 1.4951784190739271e-05, "loss": 0.2231, "step": 9787500 }, { "epoch": 5.87, "learning_rate": 1.4949684225178708e-05, "loss": 0.2261, "step": 9788000 }, { "epoch": 5.87, "learning_rate": 1.4947584259618143e-05, "loss": 0.2209, "step": 9788500 }, { "epoch": 5.87, "learning_rate": 1.4945484294057577e-05, "loss": 0.2272, "step": 9789000 }, { "epoch": 5.87, "learning_rate": 1.4943384328497012e-05, "loss": 0.2195, "step": 9789500 }, { "epoch": 5.87, "learning_rate": 1.4941284362936447e-05, "loss": 0.2202, "step": 9790000 }, { "epoch": 5.87, "learning_rate": 1.4939184397375884e-05, "loss": 0.2273, "step": 9790500 }, { "epoch": 5.87, "learning_rate": 1.4937084431815319e-05, "loss": 0.2243, "step": 9791000 }, { "epoch": 5.87, "learning_rate": 1.4934988666185872e-05, "loss": 0.2284, "step": 9791500 }, { "epoch": 5.87, "learning_rate": 1.493288870062531e-05, "loss": 0.2215, "step": 9792000 }, { "epoch": 5.87, "learning_rate": 1.4930792934995866e-05, "loss": 0.2254, "step": 9792500 }, { "epoch": 5.87, "learning_rate": 1.4928692969435301e-05, "loss": 0.2254, "step": 9793000 }, { "epoch": 5.87, "learning_rate": 1.4926593003874738e-05, "loss": 0.2257, "step": 9793500 }, { "epoch": 5.87, "learning_rate": 1.492449303831417e-05, "loss": 0.2246, "step": 9794000 }, { "epoch": 5.87, "learning_rate": 1.4922393072753607e-05, "loss": 0.2278, "step": 9794500 }, { "epoch": 5.87, "learning_rate": 1.4920293107193042e-05, "loss": 0.2241, "step": 9795000 }, { "epoch": 5.87, "learning_rate": 1.4918193141632477e-05, "loss": 0.2195, "step": 9795500 }, { "epoch": 5.87, "learning_rate": 1.4916097376003034e-05, "loss": 0.2225, "step": 9796000 }, { "epoch": 5.87, "learning_rate": 1.4913997410442468e-05, "loss": 0.2241, "step": 9796500 }, { "epoch": 5.87, "learning_rate": 1.4911897444881903e-05, "loss": 0.228, "step": 9797000 }, { "epoch": 5.87, "learning_rate": 1.490979747932134e-05, "loss": 0.218, "step": 9797500 }, { "epoch": 5.87, "learning_rate": 1.4907697513760775e-05, "loss": 0.2243, "step": 9798000 }, { "epoch": 5.87, "learning_rate": 1.490559754820021e-05, "loss": 0.2273, "step": 9798500 }, { "epoch": 5.87, "learning_rate": 1.4903497582639643e-05, "loss": 0.2277, "step": 9799000 }, { "epoch": 5.88, "learning_rate": 1.49014018170102e-05, "loss": 0.2268, "step": 9799500 }, { "epoch": 5.88, "learning_rate": 1.4899301851449635e-05, "loss": 0.2283, "step": 9800000 }, { "epoch": 5.88, "eval_loss": 0.21096092462539673, "eval_runtime": 1456.2084, "eval_samples_per_second": 361.706, "eval_steps_per_second": 60.285, "step": 9800000 }, { "epoch": 5.88, "learning_rate": 1.4897201885889072e-05, "loss": 0.2248, "step": 9800500 }, { "epoch": 5.88, "learning_rate": 1.4895101920328507e-05, "loss": 0.226, "step": 9801000 }, { "epoch": 5.88, "learning_rate": 1.4893001954767941e-05, "loss": 0.2178, "step": 9801500 }, { "epoch": 5.88, "learning_rate": 1.4890906189138498e-05, "loss": 0.2246, "step": 9802000 }, { "epoch": 5.88, "learning_rate": 1.4888806223577933e-05, "loss": 0.226, "step": 9802500 }, { "epoch": 5.88, "learning_rate": 1.488670625801737e-05, "loss": 0.2216, "step": 9803000 }, { "epoch": 5.88, "learning_rate": 1.4884606292456805e-05, "loss": 0.2246, "step": 9803500 }, { "epoch": 5.88, "learning_rate": 1.4882506326896238e-05, "loss": 0.2205, "step": 9804000 }, { "epoch": 5.88, "learning_rate": 1.4880406361335674e-05, "loss": 0.2237, "step": 9804500 }, { "epoch": 5.88, "learning_rate": 1.4878306395775109e-05, "loss": 0.2248, "step": 9805000 }, { "epoch": 5.88, "learning_rate": 1.4876210630145666e-05, "loss": 0.2278, "step": 9805500 }, { "epoch": 5.88, "learning_rate": 1.48741106645851e-05, "loss": 0.2186, "step": 9806000 }, { "epoch": 5.88, "learning_rate": 1.4872010699024534e-05, "loss": 0.2282, "step": 9806500 }, { "epoch": 5.88, "learning_rate": 1.4869910733463971e-05, "loss": 0.2242, "step": 9807000 }, { "epoch": 5.88, "learning_rate": 1.4867814967834528e-05, "loss": 0.224, "step": 9807500 }, { "epoch": 5.88, "learning_rate": 1.4865715002273963e-05, "loss": 0.2245, "step": 9808000 }, { "epoch": 5.88, "learning_rate": 1.4863615036713397e-05, "loss": 0.2278, "step": 9808500 }, { "epoch": 5.88, "learning_rate": 1.4861515071152832e-05, "loss": 0.2192, "step": 9809000 }, { "epoch": 5.88, "learning_rate": 1.4859415105592267e-05, "loss": 0.2204, "step": 9809500 }, { "epoch": 5.88, "learning_rate": 1.4857315140031704e-05, "loss": 0.2257, "step": 9810000 }, { "epoch": 5.88, "learning_rate": 1.4855215174471139e-05, "loss": 0.2259, "step": 9810500 }, { "epoch": 5.88, "learning_rate": 1.4853119408841694e-05, "loss": 0.225, "step": 9811000 }, { "epoch": 5.88, "learning_rate": 1.485101944328113e-05, "loss": 0.222, "step": 9811500 }, { "epoch": 5.88, "learning_rate": 1.4848919477720565e-05, "loss": 0.2262, "step": 9812000 }, { "epoch": 5.88, "learning_rate": 1.4846819512160001e-05, "loss": 0.2237, "step": 9812500 }, { "epoch": 5.88, "learning_rate": 1.4844723746530558e-05, "loss": 0.2281, "step": 9813000 }, { "epoch": 5.88, "learning_rate": 1.484262378096999e-05, "loss": 0.226, "step": 9813500 }, { "epoch": 5.88, "learning_rate": 1.4840523815409427e-05, "loss": 0.2283, "step": 9814000 }, { "epoch": 5.88, "learning_rate": 1.4838423849848862e-05, "loss": 0.2286, "step": 9814500 }, { "epoch": 5.88, "learning_rate": 1.4836323884288297e-05, "loss": 0.2219, "step": 9815000 }, { "epoch": 5.88, "learning_rate": 1.4834223918727734e-05, "loss": 0.2302, "step": 9815500 }, { "epoch": 5.89, "learning_rate": 1.4832123953167166e-05, "loss": 0.228, "step": 9816000 }, { "epoch": 5.89, "learning_rate": 1.4830028187537723e-05, "loss": 0.2213, "step": 9816500 }, { "epoch": 5.89, "learning_rate": 1.482792822197716e-05, "loss": 0.2274, "step": 9817000 }, { "epoch": 5.89, "learning_rate": 1.4825828256416595e-05, "loss": 0.228, "step": 9817500 }, { "epoch": 5.89, "learning_rate": 1.4823728290856032e-05, "loss": 0.2273, "step": 9818000 }, { "epoch": 5.89, "learning_rate": 1.4821628325295464e-05, "loss": 0.2268, "step": 9818500 }, { "epoch": 5.89, "learning_rate": 1.4819528359734899e-05, "loss": 0.219, "step": 9819000 }, { "epoch": 5.89, "learning_rate": 1.4817428394174336e-05, "loss": 0.2272, "step": 9819500 }, { "epoch": 5.89, "learning_rate": 1.4815332628544893e-05, "loss": 0.2223, "step": 9820000 }, { "epoch": 5.89, "learning_rate": 1.4813232662984328e-05, "loss": 0.2249, "step": 9820500 }, { "epoch": 5.89, "learning_rate": 1.4811132697423761e-05, "loss": 0.2227, "step": 9821000 }, { "epoch": 5.89, "learning_rate": 1.4809032731863196e-05, "loss": 0.2223, "step": 9821500 }, { "epoch": 5.89, "learning_rate": 1.4806932766302633e-05, "loss": 0.2239, "step": 9822000 }, { "epoch": 5.89, "learning_rate": 1.4804832800742068e-05, "loss": 0.2216, "step": 9822500 }, { "epoch": 5.89, "learning_rate": 1.4802737035112622e-05, "loss": 0.2296, "step": 9823000 }, { "epoch": 5.89, "learning_rate": 1.4800637069552059e-05, "loss": 0.2242, "step": 9823500 }, { "epoch": 5.89, "learning_rate": 1.4798537103991494e-05, "loss": 0.2196, "step": 9824000 }, { "epoch": 5.89, "learning_rate": 1.4796437138430929e-05, "loss": 0.2261, "step": 9824500 }, { "epoch": 5.89, "learning_rate": 1.4794337172870366e-05, "loss": 0.2212, "step": 9825000 }, { "epoch": 5.89, "learning_rate": 1.4792237207309801e-05, "loss": 0.218, "step": 9825500 }, { "epoch": 5.89, "learning_rate": 1.4790141441680355e-05, "loss": 0.2301, "step": 9826000 }, { "epoch": 5.89, "learning_rate": 1.4788041476119791e-05, "loss": 0.2226, "step": 9826500 }, { "epoch": 5.89, "learning_rate": 1.4785941510559227e-05, "loss": 0.2277, "step": 9827000 }, { "epoch": 5.89, "learning_rate": 1.4783841544998663e-05, "loss": 0.2262, "step": 9827500 }, { "epoch": 5.89, "learning_rate": 1.4781741579438099e-05, "loss": 0.2237, "step": 9828000 }, { "epoch": 5.89, "learning_rate": 1.4779645813808652e-05, "loss": 0.2237, "step": 9828500 }, { "epoch": 5.89, "learning_rate": 1.4777545848248089e-05, "loss": 0.2243, "step": 9829000 }, { "epoch": 5.89, "learning_rate": 1.4775445882687524e-05, "loss": 0.2227, "step": 9829500 }, { "epoch": 5.89, "learning_rate": 1.477334591712696e-05, "loss": 0.2222, "step": 9830000 }, { "epoch": 5.89, "learning_rate": 1.4771250151497515e-05, "loss": 0.2233, "step": 9830500 }, { "epoch": 5.89, "learning_rate": 1.476915018593695e-05, "loss": 0.2273, "step": 9831000 }, { "epoch": 5.89, "learning_rate": 1.4767050220376385e-05, "loss": 0.2212, "step": 9831500 }, { "epoch": 5.89, "learning_rate": 1.4764950254815822e-05, "loss": 0.2273, "step": 9832000 }, { "epoch": 5.89, "learning_rate": 1.4762850289255257e-05, "loss": 0.2222, "step": 9832500 }, { "epoch": 5.9, "learning_rate": 1.476075032369469e-05, "loss": 0.2239, "step": 9833000 }, { "epoch": 5.9, "learning_rate": 1.4758650358134126e-05, "loss": 0.2299, "step": 9833500 }, { "epoch": 5.9, "learning_rate": 1.4756554592504682e-05, "loss": 0.2283, "step": 9834000 }, { "epoch": 5.9, "learning_rate": 1.475445462694412e-05, "loss": 0.2242, "step": 9834500 }, { "epoch": 5.9, "learning_rate": 1.4752354661383554e-05, "loss": 0.2215, "step": 9835000 }, { "epoch": 5.9, "learning_rate": 1.4750254695822986e-05, "loss": 0.2274, "step": 9835500 }, { "epoch": 5.9, "learning_rate": 1.4748158930193545e-05, "loss": 0.2166, "step": 9836000 }, { "epoch": 5.9, "learning_rate": 1.474605896463298e-05, "loss": 0.2233, "step": 9836500 }, { "epoch": 5.9, "learning_rate": 1.4743958999072415e-05, "loss": 0.225, "step": 9837000 }, { "epoch": 5.9, "learning_rate": 1.4741859033511852e-05, "loss": 0.2222, "step": 9837500 }, { "epoch": 5.9, "learning_rate": 1.4739759067951284e-05, "loss": 0.2251, "step": 9838000 }, { "epoch": 5.9, "learning_rate": 1.473765910239072e-05, "loss": 0.2225, "step": 9838500 }, { "epoch": 5.9, "learning_rate": 1.4735563336761278e-05, "loss": 0.2174, "step": 9839000 }, { "epoch": 5.9, "learning_rate": 1.4733463371200713e-05, "loss": 0.2287, "step": 9839500 }, { "epoch": 5.9, "learning_rate": 1.473136340564015e-05, "loss": 0.2235, "step": 9840000 }, { "epoch": 5.9, "learning_rate": 1.4729263440079581e-05, "loss": 0.2241, "step": 9840500 }, { "epoch": 5.9, "learning_rate": 1.4727163474519017e-05, "loss": 0.2203, "step": 9841000 }, { "epoch": 5.9, "learning_rate": 1.4725063508958453e-05, "loss": 0.2232, "step": 9841500 }, { "epoch": 5.9, "learning_rate": 1.4722963543397889e-05, "loss": 0.2259, "step": 9842000 }, { "epoch": 5.9, "learning_rate": 1.4720863577837324e-05, "loss": 0.2264, "step": 9842500 }, { "epoch": 5.9, "learning_rate": 1.4718767812207879e-05, "loss": 0.2266, "step": 9843000 }, { "epoch": 5.9, "learning_rate": 1.4716667846647314e-05, "loss": 0.2252, "step": 9843500 }, { "epoch": 5.9, "learning_rate": 1.4714567881086751e-05, "loss": 0.2191, "step": 9844000 }, { "epoch": 5.9, "learning_rate": 1.4712472115457308e-05, "loss": 0.2247, "step": 9844500 }, { "epoch": 5.9, "learning_rate": 1.471037214989674e-05, "loss": 0.219, "step": 9845000 }, { "epoch": 5.9, "learning_rate": 1.4708272184336177e-05, "loss": 0.2215, "step": 9845500 }, { "epoch": 5.9, "learning_rate": 1.4706176418706733e-05, "loss": 0.2238, "step": 9846000 }, { "epoch": 5.9, "learning_rate": 1.4704076453146169e-05, "loss": 0.2256, "step": 9846500 }, { "epoch": 5.9, "learning_rate": 1.4701976487585605e-05, "loss": 0.2242, "step": 9847000 }, { "epoch": 5.9, "learning_rate": 1.4699876522025037e-05, "loss": 0.2278, "step": 9847500 }, { "epoch": 5.9, "learning_rate": 1.4697776556464472e-05, "loss": 0.2212, "step": 9848000 }, { "epoch": 5.9, "learning_rate": 1.469567659090391e-05, "loss": 0.226, "step": 9848500 }, { "epoch": 5.9, "learning_rate": 1.4693576625343344e-05, "loss": 0.2222, "step": 9849000 }, { "epoch": 5.91, "learning_rate": 1.469147665978278e-05, "loss": 0.2216, "step": 9849500 }, { "epoch": 5.91, "learning_rate": 1.4689380894153335e-05, "loss": 0.2189, "step": 9850000 }, { "epoch": 5.91, "learning_rate": 1.468728092859277e-05, "loss": 0.2289, "step": 9850500 }, { "epoch": 5.91, "learning_rate": 1.4685180963032207e-05, "loss": 0.2228, "step": 9851000 }, { "epoch": 5.91, "learning_rate": 1.4683080997471642e-05, "loss": 0.2239, "step": 9851500 }, { "epoch": 5.91, "learning_rate": 1.4680981031911077e-05, "loss": 0.2231, "step": 9852000 }, { "epoch": 5.91, "learning_rate": 1.467888106635051e-05, "loss": 0.228, "step": 9852500 }, { "epoch": 5.91, "learning_rate": 1.4676785300721068e-05, "loss": 0.2271, "step": 9853000 }, { "epoch": 5.91, "learning_rate": 1.4674685335160503e-05, "loss": 0.2249, "step": 9853500 }, { "epoch": 5.91, "learning_rate": 1.467258536959994e-05, "loss": 0.2237, "step": 9854000 }, { "epoch": 5.91, "learning_rate": 1.4670485404039375e-05, "loss": 0.223, "step": 9854500 }, { "epoch": 5.91, "learning_rate": 1.4668385438478808e-05, "loss": 0.2227, "step": 9855000 }, { "epoch": 5.91, "learning_rate": 1.4666289672849365e-05, "loss": 0.228, "step": 9855500 }, { "epoch": 5.91, "learning_rate": 1.46641897072888e-05, "loss": 0.2211, "step": 9856000 }, { "epoch": 5.91, "learning_rate": 1.4662089741728237e-05, "loss": 0.2252, "step": 9856500 }, { "epoch": 5.91, "learning_rate": 1.4659989776167672e-05, "loss": 0.2243, "step": 9857000 }, { "epoch": 5.91, "learning_rate": 1.4657889810607104e-05, "loss": 0.2236, "step": 9857500 }, { "epoch": 5.91, "learning_rate": 1.4655794044977663e-05, "loss": 0.2229, "step": 9858000 }, { "epoch": 5.91, "learning_rate": 1.4653694079417098e-05, "loss": 0.2228, "step": 9858500 }, { "epoch": 5.91, "learning_rate": 1.4651594113856533e-05, "loss": 0.2206, "step": 9859000 }, { "epoch": 5.91, "learning_rate": 1.4649494148295966e-05, "loss": 0.2293, "step": 9859500 }, { "epoch": 5.91, "learning_rate": 1.4647394182735402e-05, "loss": 0.2219, "step": 9860000 }, { "epoch": 5.91, "learning_rate": 1.4645298417105959e-05, "loss": 0.2202, "step": 9860500 }, { "epoch": 5.91, "learning_rate": 1.4643198451545395e-05, "loss": 0.2239, "step": 9861000 }, { "epoch": 5.91, "learning_rate": 1.464109848598483e-05, "loss": 0.2255, "step": 9861500 }, { "epoch": 5.91, "learning_rate": 1.4638998520424264e-05, "loss": 0.2267, "step": 9862000 }, { "epoch": 5.91, "learning_rate": 1.46368985548637e-05, "loss": 0.2184, "step": 9862500 }, { "epoch": 5.91, "learning_rate": 1.4634802789234256e-05, "loss": 0.2235, "step": 9863000 }, { "epoch": 5.91, "learning_rate": 1.4632702823673693e-05, "loss": 0.2246, "step": 9863500 }, { "epoch": 5.91, "learning_rate": 1.4630602858113128e-05, "loss": 0.223, "step": 9864000 }, { "epoch": 5.91, "learning_rate": 1.462850289255256e-05, "loss": 0.2207, "step": 9864500 }, { "epoch": 5.91, "learning_rate": 1.4626407126923119e-05, "loss": 0.2262, "step": 9865000 }, { "epoch": 5.91, "learning_rate": 1.4624307161362554e-05, "loss": 0.2177, "step": 9865500 }, { "epoch": 5.92, "learning_rate": 1.4622207195801989e-05, "loss": 0.2235, "step": 9866000 }, { "epoch": 5.92, "learning_rate": 1.4620107230241426e-05, "loss": 0.2242, "step": 9866500 }, { "epoch": 5.92, "learning_rate": 1.4618007264680858e-05, "loss": 0.2214, "step": 9867000 }, { "epoch": 5.92, "learning_rate": 1.4615911499051414e-05, "loss": 0.2252, "step": 9867500 }, { "epoch": 5.92, "learning_rate": 1.4613811533490851e-05, "loss": 0.2277, "step": 9868000 }, { "epoch": 5.92, "learning_rate": 1.4611711567930286e-05, "loss": 0.2242, "step": 9868500 }, { "epoch": 5.92, "learning_rate": 1.4609611602369722e-05, "loss": 0.2225, "step": 9869000 }, { "epoch": 5.92, "learning_rate": 1.4607511636809155e-05, "loss": 0.2219, "step": 9869500 }, { "epoch": 5.92, "learning_rate": 1.460541167124859e-05, "loss": 0.2201, "step": 9870000 }, { "epoch": 5.92, "learning_rate": 1.4603311705688027e-05, "loss": 0.2224, "step": 9870500 }, { "epoch": 5.92, "learning_rate": 1.4601211740127462e-05, "loss": 0.2276, "step": 9871000 }, { "epoch": 5.92, "learning_rate": 1.4599115974498016e-05, "loss": 0.2245, "step": 9871500 }, { "epoch": 5.92, "learning_rate": 1.4597020208868574e-05, "loss": 0.2237, "step": 9872000 }, { "epoch": 5.92, "learning_rate": 1.459492024330801e-05, "loss": 0.2246, "step": 9872500 }, { "epoch": 5.92, "learning_rate": 1.4592820277747445e-05, "loss": 0.221, "step": 9873000 }, { "epoch": 5.92, "learning_rate": 1.4590720312186882e-05, "loss": 0.2302, "step": 9873500 }, { "epoch": 5.92, "learning_rate": 1.4588620346626313e-05, "loss": 0.2244, "step": 9874000 }, { "epoch": 5.92, "learning_rate": 1.458652038106575e-05, "loss": 0.2257, "step": 9874500 }, { "epoch": 5.92, "learning_rate": 1.4584420415505185e-05, "loss": 0.2212, "step": 9875000 }, { "epoch": 5.92, "learning_rate": 1.458232044994462e-05, "loss": 0.2221, "step": 9875500 }, { "epoch": 5.92, "learning_rate": 1.4580228884246298e-05, "loss": 0.2218, "step": 9876000 }, { "epoch": 5.92, "learning_rate": 1.4578128918685733e-05, "loss": 0.2207, "step": 9876500 }, { "epoch": 5.92, "learning_rate": 1.4576028953125168e-05, "loss": 0.2213, "step": 9877000 }, { "epoch": 5.92, "learning_rate": 1.4573928987564605e-05, "loss": 0.2226, "step": 9877500 }, { "epoch": 5.92, "learning_rate": 1.457182902200404e-05, "loss": 0.2236, "step": 9878000 }, { "epoch": 5.92, "learning_rate": 1.4569729056443475e-05, "loss": 0.2234, "step": 9878500 }, { "epoch": 5.92, "learning_rate": 1.4567629090882909e-05, "loss": 0.2262, "step": 9879000 }, { "epoch": 5.92, "learning_rate": 1.4565529125322344e-05, "loss": 0.2208, "step": 9879500 }, { "epoch": 5.92, "learning_rate": 1.45634333596929e-05, "loss": 0.2264, "step": 9880000 }, { "epoch": 5.92, "learning_rate": 1.456133759406346e-05, "loss": 0.2252, "step": 9880500 }, { "epoch": 5.92, "learning_rate": 1.4559237628502891e-05, "loss": 0.2228, "step": 9881000 }, { "epoch": 5.92, "learning_rate": 1.4557137662942326e-05, "loss": 0.2237, "step": 9881500 }, { "epoch": 5.92, "learning_rate": 1.4555037697381763e-05, "loss": 0.2223, "step": 9882000 }, { "epoch": 5.92, "learning_rate": 1.4552937731821198e-05, "loss": 0.2226, "step": 9882500 }, { "epoch": 5.93, "learning_rate": 1.4550841966191754e-05, "loss": 0.2211, "step": 9883000 }, { "epoch": 5.93, "learning_rate": 1.4548742000631189e-05, "loss": 0.2201, "step": 9883500 }, { "epoch": 5.93, "learning_rate": 1.4546642035070624e-05, "loss": 0.2221, "step": 9884000 }, { "epoch": 5.93, "learning_rate": 1.454454206951006e-05, "loss": 0.2217, "step": 9884500 }, { "epoch": 5.93, "learning_rate": 1.4542442103949496e-05, "loss": 0.2212, "step": 9885000 }, { "epoch": 5.93, "learning_rate": 1.454034633832005e-05, "loss": 0.2224, "step": 9885500 }, { "epoch": 5.93, "learning_rate": 1.4538246372759486e-05, "loss": 0.2189, "step": 9886000 }, { "epoch": 5.93, "learning_rate": 1.4536146407198921e-05, "loss": 0.2243, "step": 9886500 }, { "epoch": 5.93, "learning_rate": 1.4534046441638357e-05, "loss": 0.2252, "step": 9887000 }, { "epoch": 5.93, "learning_rate": 1.4531946476077793e-05, "loss": 0.2231, "step": 9887500 }, { "epoch": 5.93, "learning_rate": 1.4529846510517229e-05, "loss": 0.222, "step": 9888000 }, { "epoch": 5.93, "learning_rate": 1.4527746544956662e-05, "loss": 0.2288, "step": 9888500 }, { "epoch": 5.93, "learning_rate": 1.4525646579396097e-05, "loss": 0.2298, "step": 9889000 }, { "epoch": 5.93, "learning_rate": 1.4523550813766654e-05, "loss": 0.23, "step": 9889500 }, { "epoch": 5.93, "learning_rate": 1.4521455048137211e-05, "loss": 0.2226, "step": 9890000 }, { "epoch": 5.93, "learning_rate": 1.4519355082576645e-05, "loss": 0.2262, "step": 9890500 }, { "epoch": 5.93, "learning_rate": 1.451725511701608e-05, "loss": 0.228, "step": 9891000 }, { "epoch": 5.93, "learning_rate": 1.4515155151455517e-05, "loss": 0.2212, "step": 9891500 }, { "epoch": 5.93, "learning_rate": 1.4513055185894952e-05, "loss": 0.2184, "step": 9892000 }, { "epoch": 5.93, "learning_rate": 1.4510959420265505e-05, "loss": 0.2306, "step": 9892500 }, { "epoch": 5.93, "learning_rate": 1.4508859454704942e-05, "loss": 0.2209, "step": 9893000 }, { "epoch": 5.93, "learning_rate": 1.4506759489144377e-05, "loss": 0.2254, "step": 9893500 }, { "epoch": 5.93, "learning_rate": 1.4504659523583812e-05, "loss": 0.2196, "step": 9894000 }, { "epoch": 5.93, "learning_rate": 1.450255955802325e-05, "loss": 0.2287, "step": 9894500 }, { "epoch": 5.93, "learning_rate": 1.4500459592462684e-05, "loss": 0.2244, "step": 9895000 }, { "epoch": 5.93, "learning_rate": 1.4498359626902118e-05, "loss": 0.2255, "step": 9895500 }, { "epoch": 5.93, "learning_rate": 1.4496259661341553e-05, "loss": 0.2207, "step": 9896000 }, { "epoch": 5.93, "learning_rate": 1.449416389571211e-05, "loss": 0.2218, "step": 9896500 }, { "epoch": 5.93, "learning_rate": 1.4492063930151547e-05, "loss": 0.2273, "step": 9897000 }, { "epoch": 5.93, "learning_rate": 1.44899681645221e-05, "loss": 0.2267, "step": 9897500 }, { "epoch": 5.93, "learning_rate": 1.4487868198961536e-05, "loss": 0.2271, "step": 9898000 }, { "epoch": 5.93, "learning_rate": 1.4485768233400972e-05, "loss": 0.2209, "step": 9898500 }, { "epoch": 5.93, "learning_rate": 1.4483668267840408e-05, "loss": 0.2244, "step": 9899000 }, { "epoch": 5.94, "learning_rate": 1.4481572502210965e-05, "loss": 0.2188, "step": 9899500 }, { "epoch": 5.94, "learning_rate": 1.4479472536650398e-05, "loss": 0.2257, "step": 9900000 }, { "epoch": 5.94, "eval_loss": 0.21005401015281677, "eval_runtime": 1457.8353, "eval_samples_per_second": 361.303, "eval_steps_per_second": 60.217, "step": 9900000 }, { "epoch": 5.94, "learning_rate": 1.4477372571089833e-05, "loss": 0.2231, "step": 9900500 }, { "epoch": 5.94, "learning_rate": 1.4475272605529268e-05, "loss": 0.2252, "step": 9901000 }, { "epoch": 5.94, "learning_rate": 1.4473172639968705e-05, "loss": 0.2239, "step": 9901500 }, { "epoch": 5.94, "learning_rate": 1.447107267440814e-05, "loss": 0.2196, "step": 9902000 }, { "epoch": 5.94, "learning_rate": 1.4468976908778696e-05, "loss": 0.2274, "step": 9902500 }, { "epoch": 5.94, "learning_rate": 1.446687694321813e-05, "loss": 0.2249, "step": 9903000 }, { "epoch": 5.94, "learning_rate": 1.4464776977657566e-05, "loss": 0.2236, "step": 9903500 }, { "epoch": 5.94, "learning_rate": 1.4462677012097003e-05, "loss": 0.2218, "step": 9904000 }, { "epoch": 5.94, "learning_rate": 1.4460577046536438e-05, "loss": 0.2289, "step": 9904500 }, { "epoch": 5.94, "learning_rate": 1.4458477080975871e-05, "loss": 0.2253, "step": 9905000 }, { "epoch": 5.94, "learning_rate": 1.4456377115415306e-05, "loss": 0.227, "step": 9905500 }, { "epoch": 5.94, "learning_rate": 1.4454281349785863e-05, "loss": 0.2223, "step": 9906000 }, { "epoch": 5.94, "learning_rate": 1.4452181384225299e-05, "loss": 0.2226, "step": 9906500 }, { "epoch": 5.94, "learning_rate": 1.4450081418664735e-05, "loss": 0.2166, "step": 9907000 }, { "epoch": 5.94, "learning_rate": 1.4447981453104167e-05, "loss": 0.2239, "step": 9907500 }, { "epoch": 5.94, "learning_rate": 1.4445881487543604e-05, "loss": 0.2218, "step": 9908000 }, { "epoch": 5.94, "learning_rate": 1.4443785721914161e-05, "loss": 0.2247, "step": 9908500 }, { "epoch": 5.94, "learning_rate": 1.4441685756353596e-05, "loss": 0.2302, "step": 9909000 }, { "epoch": 5.94, "learning_rate": 1.4439585790793033e-05, "loss": 0.2233, "step": 9909500 }, { "epoch": 5.94, "learning_rate": 1.4437485825232465e-05, "loss": 0.2207, "step": 9910000 }, { "epoch": 5.94, "learning_rate": 1.44353858596719e-05, "loss": 0.2318, "step": 9910500 }, { "epoch": 5.94, "learning_rate": 1.4433285894111337e-05, "loss": 0.2236, "step": 9911000 }, { "epoch": 5.94, "learning_rate": 1.4431190128481894e-05, "loss": 0.2247, "step": 9911500 }, { "epoch": 5.94, "learning_rate": 1.4429090162921327e-05, "loss": 0.2208, "step": 9912000 }, { "epoch": 5.94, "learning_rate": 1.4426990197360762e-05, "loss": 0.2233, "step": 9912500 }, { "epoch": 5.94, "learning_rate": 1.4424890231800198e-05, "loss": 0.2224, "step": 9913000 }, { "epoch": 5.94, "learning_rate": 1.4422790266239634e-05, "loss": 0.2221, "step": 9913500 }, { "epoch": 5.94, "learning_rate": 1.442069030067907e-05, "loss": 0.2236, "step": 9914000 }, { "epoch": 5.94, "learning_rate": 1.4418590335118505e-05, "loss": 0.2278, "step": 9914500 }, { "epoch": 5.94, "learning_rate": 1.4416490369557938e-05, "loss": 0.2168, "step": 9915000 }, { "epoch": 5.94, "learning_rate": 1.4414394603928495e-05, "loss": 0.2239, "step": 9915500 }, { "epoch": 5.95, "learning_rate": 1.441229463836793e-05, "loss": 0.2168, "step": 9916000 }, { "epoch": 5.95, "learning_rate": 1.4410194672807367e-05, "loss": 0.2242, "step": 9916500 }, { "epoch": 5.95, "learning_rate": 1.4408094707246802e-05, "loss": 0.2233, "step": 9917000 }, { "epoch": 5.95, "learning_rate": 1.4405998941617356e-05, "loss": 0.2293, "step": 9917500 }, { "epoch": 5.95, "learning_rate": 1.4403898976056793e-05, "loss": 0.2214, "step": 9918000 }, { "epoch": 5.95, "learning_rate": 1.4401799010496228e-05, "loss": 0.2237, "step": 9918500 }, { "epoch": 5.95, "learning_rate": 1.4399699044935665e-05, "loss": 0.2263, "step": 9919000 }, { "epoch": 5.95, "learning_rate": 1.4397599079375096e-05, "loss": 0.2266, "step": 9919500 }, { "epoch": 5.95, "learning_rate": 1.4395499113814532e-05, "loss": 0.2152, "step": 9920000 }, { "epoch": 5.95, "learning_rate": 1.4393399148253968e-05, "loss": 0.2243, "step": 9920500 }, { "epoch": 5.95, "learning_rate": 1.4391299182693404e-05, "loss": 0.2292, "step": 9921000 }, { "epoch": 5.95, "learning_rate": 1.438920341706396e-05, "loss": 0.2265, "step": 9921500 }, { "epoch": 5.95, "learning_rate": 1.4387103451503394e-05, "loss": 0.2239, "step": 9922000 }, { "epoch": 5.95, "learning_rate": 1.438500348594283e-05, "loss": 0.2223, "step": 9922500 }, { "epoch": 5.95, "learning_rate": 1.4382907720313386e-05, "loss": 0.2186, "step": 9923000 }, { "epoch": 5.95, "learning_rate": 1.4380807754752823e-05, "loss": 0.2228, "step": 9923500 }, { "epoch": 5.95, "learning_rate": 1.4378711989123377e-05, "loss": 0.2245, "step": 9924000 }, { "epoch": 5.95, "learning_rate": 1.4376612023562813e-05, "loss": 0.2265, "step": 9924500 }, { "epoch": 5.95, "learning_rate": 1.4374512058002249e-05, "loss": 0.2272, "step": 9925000 }, { "epoch": 5.95, "learning_rate": 1.4372412092441684e-05, "loss": 0.2296, "step": 9925500 }, { "epoch": 5.95, "learning_rate": 1.437031212688112e-05, "loss": 0.2218, "step": 9926000 }, { "epoch": 5.95, "learning_rate": 1.4368212161320556e-05, "loss": 0.2218, "step": 9926500 }, { "epoch": 5.95, "learning_rate": 1.4366112195759987e-05, "loss": 0.2252, "step": 9927000 }, { "epoch": 5.95, "learning_rate": 1.4364012230199424e-05, "loss": 0.2256, "step": 9927500 }, { "epoch": 5.95, "learning_rate": 1.436191226463886e-05, "loss": 0.2246, "step": 9928000 }, { "epoch": 5.95, "learning_rate": 1.4359816499009416e-05, "loss": 0.2276, "step": 9928500 }, { "epoch": 5.95, "learning_rate": 1.435771653344885e-05, "loss": 0.2222, "step": 9929000 }, { "epoch": 5.95, "learning_rate": 1.4355616567888285e-05, "loss": 0.2204, "step": 9929500 }, { "epoch": 5.95, "learning_rate": 1.4353516602327722e-05, "loss": 0.2228, "step": 9930000 }, { "epoch": 5.95, "learning_rate": 1.4351416636767157e-05, "loss": 0.2272, "step": 9930500 }, { "epoch": 5.95, "learning_rate": 1.4349316671206592e-05, "loss": 0.2189, "step": 9931000 }, { "epoch": 5.95, "learning_rate": 1.4347220905577147e-05, "loss": 0.2263, "step": 9931500 }, { "epoch": 5.95, "learning_rate": 1.4345120940016583e-05, "loss": 0.2273, "step": 9932000 }, { "epoch": 5.95, "learning_rate": 1.4343020974456018e-05, "loss": 0.2249, "step": 9932500 }, { "epoch": 5.96, "learning_rate": 1.4340921008895455e-05, "loss": 0.2248, "step": 9933000 }, { "epoch": 5.96, "learning_rate": 1.433882104333489e-05, "loss": 0.2244, "step": 9933500 }, { "epoch": 5.96, "learning_rate": 1.4336725277705443e-05, "loss": 0.2204, "step": 9934000 }, { "epoch": 5.96, "learning_rate": 1.433462531214488e-05, "loss": 0.2234, "step": 9934500 }, { "epoch": 5.96, "learning_rate": 1.4332525346584315e-05, "loss": 0.2241, "step": 9935000 }, { "epoch": 5.96, "learning_rate": 1.4330425381023752e-05, "loss": 0.2249, "step": 9935500 }, { "epoch": 5.96, "learning_rate": 1.4328325415463187e-05, "loss": 0.2228, "step": 9936000 }, { "epoch": 5.96, "learning_rate": 1.4326225449902622e-05, "loss": 0.2259, "step": 9936500 }, { "epoch": 5.96, "learning_rate": 1.4324125484342056e-05, "loss": 0.2256, "step": 9937000 }, { "epoch": 5.96, "learning_rate": 1.4322025518781491e-05, "loss": 0.22, "step": 9937500 }, { "epoch": 5.96, "learning_rate": 1.4319929753152048e-05, "loss": 0.2254, "step": 9938000 }, { "epoch": 5.96, "learning_rate": 1.4317829787591485e-05, "loss": 0.2251, "step": 9938500 }, { "epoch": 5.96, "learning_rate": 1.4315729822030917e-05, "loss": 0.2248, "step": 9939000 }, { "epoch": 5.96, "learning_rate": 1.4313629856470354e-05, "loss": 0.222, "step": 9939500 }, { "epoch": 5.96, "learning_rate": 1.4311529890909789e-05, "loss": 0.2268, "step": 9940000 }, { "epoch": 5.96, "learning_rate": 1.4309429925349224e-05, "loss": 0.2252, "step": 9940500 }, { "epoch": 5.96, "learning_rate": 1.430733415971978e-05, "loss": 0.2218, "step": 9941000 }, { "epoch": 5.96, "learning_rate": 1.4305234194159214e-05, "loss": 0.2239, "step": 9941500 }, { "epoch": 5.96, "learning_rate": 1.430313422859865e-05, "loss": 0.2255, "step": 9942000 }, { "epoch": 5.96, "learning_rate": 1.4301034263038086e-05, "loss": 0.2224, "step": 9942500 }, { "epoch": 5.96, "learning_rate": 1.4298934297477521e-05, "loss": 0.2285, "step": 9943000 }, { "epoch": 5.96, "learning_rate": 1.4296838531848078e-05, "loss": 0.2226, "step": 9943500 }, { "epoch": 5.96, "learning_rate": 1.4294738566287512e-05, "loss": 0.2298, "step": 9944000 }, { "epoch": 5.96, "learning_rate": 1.4292638600726947e-05, "loss": 0.2266, "step": 9944500 }, { "epoch": 5.96, "learning_rate": 1.4290538635166384e-05, "loss": 0.2219, "step": 9945000 }, { "epoch": 5.96, "learning_rate": 1.4288438669605819e-05, "loss": 0.2214, "step": 9945500 }, { "epoch": 5.96, "learning_rate": 1.4286338704045254e-05, "loss": 0.2191, "step": 9946000 }, { "epoch": 5.96, "learning_rate": 1.428424293841581e-05, "loss": 0.2198, "step": 9946500 }, { "epoch": 5.96, "learning_rate": 1.4282142972855245e-05, "loss": 0.2274, "step": 9947000 }, { "epoch": 5.96, "learning_rate": 1.428004300729468e-05, "loss": 0.2224, "step": 9947500 }, { "epoch": 5.96, "learning_rate": 1.4277943041734117e-05, "loss": 0.2229, "step": 9948000 }, { "epoch": 5.96, "learning_rate": 1.4275843076173552e-05, "loss": 0.2233, "step": 9948500 }, { "epoch": 5.96, "learning_rate": 1.4273747310544105e-05, "loss": 0.2219, "step": 9949000 }, { "epoch": 5.97, "learning_rate": 1.4271647344983542e-05, "loss": 0.2293, "step": 9949500 }, { "epoch": 5.97, "learning_rate": 1.4269547379422977e-05, "loss": 0.2219, "step": 9950000 }, { "epoch": 5.97, "learning_rate": 1.4267447413862412e-05, "loss": 0.2243, "step": 9950500 }, { "epoch": 5.97, "learning_rate": 1.426534744830185e-05, "loss": 0.2156, "step": 9951000 }, { "epoch": 5.97, "learning_rate": 1.4263251682672403e-05, "loss": 0.2272, "step": 9951500 }, { "epoch": 5.97, "learning_rate": 1.426115171711184e-05, "loss": 0.226, "step": 9952000 }, { "epoch": 5.97, "learning_rate": 1.4259051751551275e-05, "loss": 0.2251, "step": 9952500 }, { "epoch": 5.97, "learning_rate": 1.425695178599071e-05, "loss": 0.2231, "step": 9953000 }, { "epoch": 5.97, "learning_rate": 1.4254856020361265e-05, "loss": 0.2228, "step": 9953500 }, { "epoch": 5.97, "learning_rate": 1.42527560548007e-05, "loss": 0.2237, "step": 9954000 }, { "epoch": 5.97, "learning_rate": 1.4250656089240136e-05, "loss": 0.226, "step": 9954500 }, { "epoch": 5.97, "learning_rate": 1.4248556123679572e-05, "loss": 0.2293, "step": 9955000 }, { "epoch": 5.97, "learning_rate": 1.4246456158119008e-05, "loss": 0.225, "step": 9955500 }, { "epoch": 5.97, "learning_rate": 1.4244356192558441e-05, "loss": 0.2257, "step": 9956000 }, { "epoch": 5.97, "learning_rate": 1.4242256226997876e-05, "loss": 0.2232, "step": 9956500 }, { "epoch": 5.97, "learning_rate": 1.4240156261437311e-05, "loss": 0.2201, "step": 9957000 }, { "epoch": 5.97, "learning_rate": 1.4238060495807868e-05, "loss": 0.222, "step": 9957500 }, { "epoch": 5.97, "learning_rate": 1.4235964730178424e-05, "loss": 0.2239, "step": 9958000 }, { "epoch": 5.97, "learning_rate": 1.4233864764617859e-05, "loss": 0.2215, "step": 9958500 }, { "epoch": 5.97, "learning_rate": 1.4231764799057296e-05, "loss": 0.2249, "step": 9959000 }, { "epoch": 5.97, "learning_rate": 1.422966483349673e-05, "loss": 0.2223, "step": 9959500 }, { "epoch": 5.97, "learning_rate": 1.4227564867936166e-05, "loss": 0.228, "step": 9960000 }, { "epoch": 5.97, "learning_rate": 1.4225464902375603e-05, "loss": 0.2249, "step": 9960500 }, { "epoch": 5.97, "learning_rate": 1.4223364936815035e-05, "loss": 0.2203, "step": 9961000 }, { "epoch": 5.97, "learning_rate": 1.4221264971254471e-05, "loss": 0.2259, "step": 9961500 }, { "epoch": 5.97, "learning_rate": 1.4219169205625028e-05, "loss": 0.2231, "step": 9962000 }, { "epoch": 5.97, "learning_rate": 1.4217069240064463e-05, "loss": 0.2265, "step": 9962500 }, { "epoch": 5.97, "learning_rate": 1.4214969274503899e-05, "loss": 0.2225, "step": 9963000 }, { "epoch": 5.97, "learning_rate": 1.4212869308943332e-05, "loss": 0.22, "step": 9963500 }, { "epoch": 5.97, "learning_rate": 1.4210769343382767e-05, "loss": 0.222, "step": 9964000 }, { "epoch": 5.97, "learning_rate": 1.4208669377822204e-05, "loss": 0.2227, "step": 9964500 }, { "epoch": 5.97, "learning_rate": 1.420656941226164e-05, "loss": 0.223, "step": 9965000 }, { "epoch": 5.97, "learning_rate": 1.4204469446701074e-05, "loss": 0.2203, "step": 9965500 }, { "epoch": 5.98, "learning_rate": 1.420237368107163e-05, "loss": 0.2269, "step": 9966000 }, { "epoch": 5.98, "learning_rate": 1.4200273715511065e-05, "loss": 0.2213, "step": 9966500 }, { "epoch": 5.98, "learning_rate": 1.41981737499505e-05, "loss": 0.2237, "step": 9967000 }, { "epoch": 5.98, "learning_rate": 1.4196073784389937e-05, "loss": 0.2268, "step": 9967500 }, { "epoch": 5.98, "learning_rate": 1.4193973818829372e-05, "loss": 0.2303, "step": 9968000 }, { "epoch": 5.98, "learning_rate": 1.4191873853268805e-05, "loss": 0.2226, "step": 9968500 }, { "epoch": 5.98, "learning_rate": 1.418977388770824e-05, "loss": 0.2248, "step": 9969000 }, { "epoch": 5.98, "learning_rate": 1.4187673922147676e-05, "loss": 0.2221, "step": 9969500 }, { "epoch": 5.98, "learning_rate": 1.4185582356449354e-05, "loss": 0.2241, "step": 9970000 }, { "epoch": 5.98, "learning_rate": 1.4183482390888788e-05, "loss": 0.2248, "step": 9970500 }, { "epoch": 5.98, "learning_rate": 1.4181382425328223e-05, "loss": 0.2235, "step": 9971000 }, { "epoch": 5.98, "learning_rate": 1.417928245976766e-05, "loss": 0.2192, "step": 9971500 }, { "epoch": 5.98, "learning_rate": 1.4177182494207095e-05, "loss": 0.2202, "step": 9972000 }, { "epoch": 5.98, "learning_rate": 1.417508252864653e-05, "loss": 0.2202, "step": 9972500 }, { "epoch": 5.98, "learning_rate": 1.4172982563085967e-05, "loss": 0.2197, "step": 9973000 }, { "epoch": 5.98, "learning_rate": 1.4170882597525399e-05, "loss": 0.2201, "step": 9973500 }, { "epoch": 5.98, "learning_rate": 1.4168786831895956e-05, "loss": 0.2201, "step": 9974000 }, { "epoch": 5.98, "learning_rate": 1.4166686866335393e-05, "loss": 0.2253, "step": 9974500 }, { "epoch": 5.98, "learning_rate": 1.4164586900774828e-05, "loss": 0.2304, "step": 9975000 }, { "epoch": 5.98, "learning_rate": 1.4162486935214261e-05, "loss": 0.2177, "step": 9975500 }, { "epoch": 5.98, "learning_rate": 1.4160391169584818e-05, "loss": 0.2251, "step": 9976000 }, { "epoch": 5.98, "learning_rate": 1.4158291204024253e-05, "loss": 0.2254, "step": 9976500 }, { "epoch": 5.98, "learning_rate": 1.415619123846369e-05, "loss": 0.2211, "step": 9977000 }, { "epoch": 5.98, "learning_rate": 1.4154095472834244e-05, "loss": 0.2265, "step": 9977500 }, { "epoch": 5.98, "learning_rate": 1.4151995507273679e-05, "loss": 0.2207, "step": 9978000 }, { "epoch": 5.98, "learning_rate": 1.4149895541713116e-05, "loss": 0.2227, "step": 9978500 }, { "epoch": 5.98, "learning_rate": 1.4147795576152551e-05, "loss": 0.2245, "step": 9979000 }, { "epoch": 5.98, "learning_rate": 1.4145695610591986e-05, "loss": 0.2186, "step": 9979500 }, { "epoch": 5.98, "learning_rate": 1.4143599844962541e-05, "loss": 0.2223, "step": 9980000 }, { "epoch": 5.98, "learning_rate": 1.4141504079333098e-05, "loss": 0.2221, "step": 9980500 }, { "epoch": 5.98, "learning_rate": 1.4139404113772534e-05, "loss": 0.2188, "step": 9981000 }, { "epoch": 5.98, "learning_rate": 1.413730414821197e-05, "loss": 0.2274, "step": 9981500 }, { "epoch": 5.98, "learning_rate": 1.4135204182651406e-05, "loss": 0.226, "step": 9982000 }, { "epoch": 5.98, "learning_rate": 1.4133104217090839e-05, "loss": 0.2284, "step": 9982500 }, { "epoch": 5.99, "learning_rate": 1.4131004251530274e-05, "loss": 0.227, "step": 9983000 }, { "epoch": 5.99, "learning_rate": 1.412890428596971e-05, "loss": 0.2247, "step": 9983500 }, { "epoch": 5.99, "learning_rate": 1.4126804320409146e-05, "loss": 0.2275, "step": 9984000 }, { "epoch": 5.99, "learning_rate": 1.4124704354848581e-05, "loss": 0.2184, "step": 9984500 }, { "epoch": 5.99, "learning_rate": 1.4122604389288015e-05, "loss": 0.2227, "step": 9985000 }, { "epoch": 5.99, "learning_rate": 1.412050442372745e-05, "loss": 0.2189, "step": 9985500 }, { "epoch": 5.99, "learning_rate": 1.4118404458166885e-05, "loss": 0.2223, "step": 9986000 }, { "epoch": 5.99, "learning_rate": 1.4116308692537442e-05, "loss": 0.2255, "step": 9986500 }, { "epoch": 5.99, "learning_rate": 1.4114208726976879e-05, "loss": 0.2191, "step": 9987000 }, { "epoch": 5.99, "learning_rate": 1.411210876141631e-05, "loss": 0.2252, "step": 9987500 }, { "epoch": 5.99, "learning_rate": 1.4110008795855747e-05, "loss": 0.2224, "step": 9988000 }, { "epoch": 5.99, "learning_rate": 1.4107913030226304e-05, "loss": 0.2228, "step": 9988500 }, { "epoch": 5.99, "learning_rate": 1.410581306466574e-05, "loss": 0.2195, "step": 9989000 }, { "epoch": 5.99, "learning_rate": 1.4103713099105176e-05, "loss": 0.2299, "step": 9989500 }, { "epoch": 5.99, "learning_rate": 1.410161733347573e-05, "loss": 0.2236, "step": 9990000 }, { "epoch": 5.99, "learning_rate": 1.4099517367915165e-05, "loss": 0.2233, "step": 9990500 }, { "epoch": 5.99, "learning_rate": 1.4097417402354602e-05, "loss": 0.2257, "step": 9991000 }, { "epoch": 5.99, "learning_rate": 1.4095317436794037e-05, "loss": 0.2227, "step": 9991500 }, { "epoch": 5.99, "learning_rate": 1.4093217471233472e-05, "loss": 0.2213, "step": 9992000 }, { "epoch": 5.99, "learning_rate": 1.4091117505672906e-05, "loss": 0.2298, "step": 9992500 }, { "epoch": 5.99, "learning_rate": 1.4089017540112341e-05, "loss": 0.226, "step": 9993000 }, { "epoch": 5.99, "learning_rate": 1.4086917574551778e-05, "loss": 0.2208, "step": 9993500 }, { "epoch": 5.99, "learning_rate": 1.4084821808922335e-05, "loss": 0.2207, "step": 9994000 }, { "epoch": 5.99, "learning_rate": 1.4082721843361767e-05, "loss": 0.2247, "step": 9994500 }, { "epoch": 5.99, "learning_rate": 1.4080621877801203e-05, "loss": 0.2168, "step": 9995000 }, { "epoch": 5.99, "learning_rate": 1.4078521912240639e-05, "loss": 0.2209, "step": 9995500 }, { "epoch": 5.99, "learning_rate": 1.4076426146611195e-05, "loss": 0.2225, "step": 9996000 }, { "epoch": 5.99, "learning_rate": 1.407433038098175e-05, "loss": 0.2238, "step": 9996500 }, { "epoch": 5.99, "learning_rate": 1.4072230415421186e-05, "loss": 0.2259, "step": 9997000 }, { "epoch": 5.99, "learning_rate": 1.4070130449860621e-05, "loss": 0.2225, "step": 9997500 }, { "epoch": 5.99, "learning_rate": 1.4068030484300058e-05, "loss": 0.2234, "step": 9998000 }, { "epoch": 5.99, "learning_rate": 1.4065930518739493e-05, "loss": 0.223, "step": 9998500 }, { "epoch": 5.99, "learning_rate": 1.4063830553178928e-05, "loss": 0.2219, "step": 9999000 }, { "epoch": 6.0, "learning_rate": 1.4061730587618362e-05, "loss": 0.2295, "step": 9999500 }, { "epoch": 6.0, "learning_rate": 1.4059634821988919e-05, "loss": 0.2342, "step": 10000000 }, { "epoch": 6.0, "eval_loss": 0.2093830108642578, "eval_runtime": 1451.31, "eval_samples_per_second": 362.927, "eval_steps_per_second": 60.488, "step": 10000000 }, { "epoch": 6.0, "learning_rate": 1.4057534856428355e-05, "loss": 0.2211, "step": 10000500 }, { "epoch": 6.0, "learning_rate": 1.405543489086779e-05, "loss": 0.224, "step": 10001000 }, { "epoch": 6.0, "learning_rate": 1.4053334925307226e-05, "loss": 0.225, "step": 10001500 }, { "epoch": 6.0, "learning_rate": 1.405123495974666e-05, "loss": 0.221, "step": 10002000 }, { "epoch": 6.0, "learning_rate": 1.4049134994186094e-05, "loss": 0.2194, "step": 10002500 }, { "epoch": 6.0, "learning_rate": 1.404703502862553e-05, "loss": 0.2243, "step": 10003000 }, { "epoch": 6.0, "learning_rate": 1.4044935063064966e-05, "loss": 0.2284, "step": 10003500 }, { "epoch": 6.0, "learning_rate": 1.404283929743552e-05, "loss": 0.2262, "step": 10004000 }, { "epoch": 6.0, "learning_rate": 1.4040739331874957e-05, "loss": 0.2202, "step": 10004500 }, { "epoch": 6.0, "learning_rate": 1.4038639366314392e-05, "loss": 0.2246, "step": 10005000 }, { "epoch": 6.0, "learning_rate": 1.4036539400753827e-05, "loss": 0.2221, "step": 10005500 }, { "epoch": 6.0, "learning_rate": 1.4034443635124384e-05, "loss": 0.2244, "step": 10006000 }, { "epoch": 6.0, "learning_rate": 1.4032343669563818e-05, "loss": 0.2188, "step": 10006500 }, { "epoch": 6.0, "learning_rate": 1.4030243704003253e-05, "loss": 0.22, "step": 10007000 }, { "epoch": 6.0, "learning_rate": 1.402814373844269e-05, "loss": 0.2198, "step": 10007500 }, { "epoch": 6.0, "learning_rate": 1.4026047972813246e-05, "loss": 0.2181, "step": 10008000 }, { "epoch": 6.0, "learning_rate": 1.4023948007252682e-05, "loss": 0.2185, "step": 10008500 }, { "epoch": 6.0, "learning_rate": 1.4021848041692115e-05, "loss": 0.2168, "step": 10009000 }, { "epoch": 6.0, "learning_rate": 1.401974807613155e-05, "loss": 0.2129, "step": 10009500 }, { "epoch": 6.0, "learning_rate": 1.4017652310502107e-05, "loss": 0.216, "step": 10010000 }, { "epoch": 6.0, "learning_rate": 1.4015552344941544e-05, "loss": 0.215, "step": 10010500 }, { "epoch": 6.0, "learning_rate": 1.401345237938098e-05, "loss": 0.2186, "step": 10011000 }, { "epoch": 6.0, "learning_rate": 1.4011352413820413e-05, "loss": 0.2102, "step": 10011500 }, { "epoch": 6.0, "learning_rate": 1.400925664819097e-05, "loss": 0.2132, "step": 10012000 }, { "epoch": 6.0, "learning_rate": 1.4007160882561527e-05, "loss": 0.224, "step": 10012500 }, { "epoch": 6.0, "learning_rate": 1.4005060917000962e-05, "loss": 0.2173, "step": 10013000 }, { "epoch": 6.0, "learning_rate": 1.4002960951440395e-05, "loss": 0.2172, "step": 10013500 }, { "epoch": 6.0, "learning_rate": 1.400086098587983e-05, "loss": 0.2185, "step": 10014000 }, { "epoch": 6.0, "learning_rate": 1.3998765220250387e-05, "loss": 0.2138, "step": 10014500 }, { "epoch": 6.0, "learning_rate": 1.3996665254689822e-05, "loss": 0.2161, "step": 10015000 }, { "epoch": 6.0, "learning_rate": 1.3994565289129258e-05, "loss": 0.2208, "step": 10015500 }, { "epoch": 6.0, "learning_rate": 1.3992465323568694e-05, "loss": 0.221, "step": 10016000 }, { "epoch": 6.01, "learning_rate": 1.3990365358008128e-05, "loss": 0.215, "step": 10016500 }, { "epoch": 6.01, "learning_rate": 1.3988265392447563e-05, "loss": 0.2167, "step": 10017000 }, { "epoch": 6.01, "learning_rate": 1.3986165426887e-05, "loss": 0.2199, "step": 10017500 }, { "epoch": 6.01, "learning_rate": 1.3984069661257555e-05, "loss": 0.2213, "step": 10018000 }, { "epoch": 6.01, "learning_rate": 1.398196969569699e-05, "loss": 0.2157, "step": 10018500 }, { "epoch": 6.01, "learning_rate": 1.3979869730136426e-05, "loss": 0.2168, "step": 10019000 }, { "epoch": 6.01, "learning_rate": 1.397776976457586e-05, "loss": 0.2215, "step": 10019500 }, { "epoch": 6.01, "learning_rate": 1.3975669799015296e-05, "loss": 0.2205, "step": 10020000 }, { "epoch": 6.01, "learning_rate": 1.3973574033385853e-05, "loss": 0.2147, "step": 10020500 }, { "epoch": 6.01, "learning_rate": 1.3971474067825286e-05, "loss": 0.2167, "step": 10021000 }, { "epoch": 6.01, "learning_rate": 1.3969374102264723e-05, "loss": 0.2166, "step": 10021500 }, { "epoch": 6.01, "learning_rate": 1.3967274136704158e-05, "loss": 0.2169, "step": 10022000 }, { "epoch": 6.01, "learning_rate": 1.3965174171143592e-05, "loss": 0.2144, "step": 10022500 }, { "epoch": 6.01, "learning_rate": 1.396307840551415e-05, "loss": 0.2153, "step": 10023000 }, { "epoch": 6.01, "learning_rate": 1.3960978439953584e-05, "loss": 0.227, "step": 10023500 }, { "epoch": 6.01, "learning_rate": 1.3958878474393019e-05, "loss": 0.2115, "step": 10024000 }, { "epoch": 6.01, "learning_rate": 1.3956778508832456e-05, "loss": 0.212, "step": 10024500 }, { "epoch": 6.01, "learning_rate": 1.395467854327189e-05, "loss": 0.2162, "step": 10025000 }, { "epoch": 6.01, "learning_rate": 1.3952578577711326e-05, "loss": 0.2153, "step": 10025500 }, { "epoch": 6.01, "learning_rate": 1.3950482812081881e-05, "loss": 0.2171, "step": 10026000 }, { "epoch": 6.01, "learning_rate": 1.3948382846521317e-05, "loss": 0.2174, "step": 10026500 }, { "epoch": 6.01, "learning_rate": 1.3946282880960753e-05, "loss": 0.2185, "step": 10027000 }, { "epoch": 6.01, "learning_rate": 1.3944182915400187e-05, "loss": 0.2219, "step": 10027500 }, { "epoch": 6.01, "learning_rate": 1.3942082949839622e-05, "loss": 0.2145, "step": 10028000 }, { "epoch": 6.01, "learning_rate": 1.3939982984279059e-05, "loss": 0.2103, "step": 10028500 }, { "epoch": 6.01, "learning_rate": 1.3937887218649614e-05, "loss": 0.2164, "step": 10029000 }, { "epoch": 6.01, "learning_rate": 1.393578725308905e-05, "loss": 0.2137, "step": 10029500 }, { "epoch": 6.01, "learning_rate": 1.3933687287528484e-05, "loss": 0.216, "step": 10030000 }, { "epoch": 6.01, "learning_rate": 1.393158732196792e-05, "loss": 0.2141, "step": 10030500 }, { "epoch": 6.01, "learning_rate": 1.3929487356407355e-05, "loss": 0.2178, "step": 10031000 }, { "epoch": 6.01, "learning_rate": 1.392738739084679e-05, "loss": 0.214, "step": 10031500 }, { "epoch": 6.01, "learning_rate": 1.3925287425286225e-05, "loss": 0.2147, "step": 10032000 }, { "epoch": 6.01, "learning_rate": 1.3923191659656782e-05, "loss": 0.2163, "step": 10032500 }, { "epoch": 6.02, "learning_rate": 1.3921091694096217e-05, "loss": 0.2136, "step": 10033000 }, { "epoch": 6.02, "learning_rate": 1.391899172853565e-05, "loss": 0.2127, "step": 10033500 }, { "epoch": 6.02, "learning_rate": 1.3916891762975087e-05, "loss": 0.2178, "step": 10034000 }, { "epoch": 6.02, "learning_rate": 1.3914791797414523e-05, "loss": 0.2162, "step": 10034500 }, { "epoch": 6.02, "learning_rate": 1.3912696031785078e-05, "loss": 0.2124, "step": 10035000 }, { "epoch": 6.02, "learning_rate": 1.3910596066224515e-05, "loss": 0.22, "step": 10035500 }, { "epoch": 6.02, "learning_rate": 1.3908496100663948e-05, "loss": 0.2141, "step": 10036000 }, { "epoch": 6.02, "learning_rate": 1.3906396135103385e-05, "loss": 0.2168, "step": 10036500 }, { "epoch": 6.02, "learning_rate": 1.390429616954282e-05, "loss": 0.2257, "step": 10037000 }, { "epoch": 6.02, "learning_rate": 1.3902196203982254e-05, "loss": 0.2221, "step": 10037500 }, { "epoch": 6.02, "learning_rate": 1.390009623842169e-05, "loss": 0.2171, "step": 10038000 }, { "epoch": 6.02, "learning_rate": 1.3897996272861124e-05, "loss": 0.2178, "step": 10038500 }, { "epoch": 6.02, "learning_rate": 1.3895900507231681e-05, "loss": 0.2212, "step": 10039000 }, { "epoch": 6.02, "learning_rate": 1.3893800541671116e-05, "loss": 0.2151, "step": 10039500 }, { "epoch": 6.02, "learning_rate": 1.3891700576110551e-05, "loss": 0.2156, "step": 10040000 }, { "epoch": 6.02, "learning_rate": 1.3889600610549986e-05, "loss": 0.2203, "step": 10040500 }, { "epoch": 6.02, "learning_rate": 1.3887504844920543e-05, "loss": 0.2206, "step": 10041000 }, { "epoch": 6.02, "learning_rate": 1.3885409079291099e-05, "loss": 0.2193, "step": 10041500 }, { "epoch": 6.02, "learning_rate": 1.3883309113730534e-05, "loss": 0.2156, "step": 10042000 }, { "epoch": 6.02, "learning_rate": 1.388120914816997e-05, "loss": 0.216, "step": 10042500 }, { "epoch": 6.02, "learning_rate": 1.3879109182609404e-05, "loss": 0.2199, "step": 10043000 }, { "epoch": 6.02, "learning_rate": 1.3877009217048841e-05, "loss": 0.2145, "step": 10043500 }, { "epoch": 6.02, "learning_rate": 1.3874909251488276e-05, "loss": 0.2134, "step": 10044000 }, { "epoch": 6.02, "learning_rate": 1.3872813485858831e-05, "loss": 0.2113, "step": 10044500 }, { "epoch": 6.02, "learning_rate": 1.3870713520298268e-05, "loss": 0.2154, "step": 10045000 }, { "epoch": 6.02, "learning_rate": 1.3868617754668823e-05, "loss": 0.2177, "step": 10045500 }, { "epoch": 6.02, "learning_rate": 1.3866517789108259e-05, "loss": 0.2198, "step": 10046000 }, { "epoch": 6.02, "learning_rate": 1.3864417823547694e-05, "loss": 0.2219, "step": 10046500 }, { "epoch": 6.02, "learning_rate": 1.3862317857987129e-05, "loss": 0.2134, "step": 10047000 }, { "epoch": 6.02, "learning_rate": 1.3860217892426564e-05, "loss": 0.2198, "step": 10047500 }, { "epoch": 6.02, "learning_rate": 1.3858117926866e-05, "loss": 0.2146, "step": 10048000 }, { "epoch": 6.02, "learning_rate": 1.3856017961305434e-05, "loss": 0.2179, "step": 10048500 }, { "epoch": 6.02, "learning_rate": 1.385391799574487e-05, "loss": 0.2181, "step": 10049000 }, { "epoch": 6.03, "learning_rate": 1.3851822230115427e-05, "loss": 0.2177, "step": 10049500 }, { "epoch": 6.03, "learning_rate": 1.384972226455486e-05, "loss": 0.2187, "step": 10050000 }, { "epoch": 6.03, "learning_rate": 1.3847622298994297e-05, "loss": 0.2224, "step": 10050500 }, { "epoch": 6.03, "learning_rate": 1.3845522333433732e-05, "loss": 0.2131, "step": 10051000 }, { "epoch": 6.03, "learning_rate": 1.3843430767735409e-05, "loss": 0.213, "step": 10051500 }, { "epoch": 6.03, "learning_rate": 1.3841330802174844e-05, "loss": 0.2198, "step": 10052000 }, { "epoch": 6.03, "learning_rate": 1.383923083661428e-05, "loss": 0.2159, "step": 10052500 }, { "epoch": 6.03, "learning_rate": 1.3837130871053714e-05, "loss": 0.2142, "step": 10053000 }, { "epoch": 6.03, "learning_rate": 1.383503090549315e-05, "loss": 0.2154, "step": 10053500 }, { "epoch": 6.03, "learning_rate": 1.3832930939932585e-05, "loss": 0.2125, "step": 10054000 }, { "epoch": 6.03, "learning_rate": 1.383083097437202e-05, "loss": 0.2149, "step": 10054500 }, { "epoch": 6.03, "learning_rate": 1.3828731008811455e-05, "loss": 0.2221, "step": 10055000 }, { "epoch": 6.03, "learning_rate": 1.382663104325089e-05, "loss": 0.2178, "step": 10055500 }, { "epoch": 6.03, "learning_rate": 1.3824531077690327e-05, "loss": 0.217, "step": 10056000 }, { "epoch": 6.03, "learning_rate": 1.382243111212976e-05, "loss": 0.2181, "step": 10056500 }, { "epoch": 6.03, "learning_rate": 1.3820331146569196e-05, "loss": 0.216, "step": 10057000 }, { "epoch": 6.03, "learning_rate": 1.3818235380939753e-05, "loss": 0.2178, "step": 10057500 }, { "epoch": 6.03, "learning_rate": 1.3816135415379188e-05, "loss": 0.2182, "step": 10058000 }, { "epoch": 6.03, "learning_rate": 1.3814035449818623e-05, "loss": 0.225, "step": 10058500 }, { "epoch": 6.03, "learning_rate": 1.3811935484258058e-05, "loss": 0.2175, "step": 10059000 }, { "epoch": 6.03, "learning_rate": 1.3809835518697493e-05, "loss": 0.2176, "step": 10059500 }, { "epoch": 6.03, "learning_rate": 1.3807735553136928e-05, "loss": 0.2127, "step": 10060000 }, { "epoch": 6.03, "learning_rate": 1.3805635587576364e-05, "loss": 0.2199, "step": 10060500 }, { "epoch": 6.03, "learning_rate": 1.3803539821946919e-05, "loss": 0.2106, "step": 10061000 }, { "epoch": 6.03, "learning_rate": 1.3801439856386356e-05, "loss": 0.2161, "step": 10061500 }, { "epoch": 6.03, "learning_rate": 1.3799339890825791e-05, "loss": 0.2171, "step": 10062000 }, { "epoch": 6.03, "learning_rate": 1.3797239925265224e-05, "loss": 0.218, "step": 10062500 }, { "epoch": 6.03, "learning_rate": 1.3795139959704661e-05, "loss": 0.2212, "step": 10063000 }, { "epoch": 6.03, "learning_rate": 1.3793039994144096e-05, "loss": 0.2179, "step": 10063500 }, { "epoch": 6.03, "learning_rate": 1.3790940028583531e-05, "loss": 0.2178, "step": 10064000 }, { "epoch": 6.03, "learning_rate": 1.3788840063022967e-05, "loss": 0.2184, "step": 10064500 }, { "epoch": 6.03, "learning_rate": 1.3786748497324644e-05, "loss": 0.2124, "step": 10065000 }, { "epoch": 6.03, "learning_rate": 1.3784648531764079e-05, "loss": 0.2139, "step": 10065500 }, { "epoch": 6.03, "learning_rate": 1.3782548566203514e-05, "loss": 0.2163, "step": 10066000 }, { "epoch": 6.04, "learning_rate": 1.378044860064295e-05, "loss": 0.2165, "step": 10066500 }, { "epoch": 6.04, "learning_rate": 1.3778348635082386e-05, "loss": 0.2182, "step": 10067000 }, { "epoch": 6.04, "learning_rate": 1.377624866952182e-05, "loss": 0.2177, "step": 10067500 }, { "epoch": 6.04, "learning_rate": 1.3774152903892376e-05, "loss": 0.2151, "step": 10068000 }, { "epoch": 6.04, "learning_rate": 1.3772052938331812e-05, "loss": 0.2236, "step": 10068500 }, { "epoch": 6.04, "learning_rate": 1.3769952972771247e-05, "loss": 0.2182, "step": 10069000 }, { "epoch": 6.04, "learning_rate": 1.376785300721068e-05, "loss": 0.2189, "step": 10069500 }, { "epoch": 6.04, "learning_rate": 1.3765753041650117e-05, "loss": 0.2168, "step": 10070000 }, { "epoch": 6.04, "learning_rate": 1.3763653076089552e-05, "loss": 0.2144, "step": 10070500 }, { "epoch": 6.04, "learning_rate": 1.3761553110528987e-05, "loss": 0.2199, "step": 10071000 }, { "epoch": 6.04, "learning_rate": 1.3759453144968423e-05, "loss": 0.2149, "step": 10071500 }, { "epoch": 6.04, "learning_rate": 1.3757357379338978e-05, "loss": 0.2104, "step": 10072000 }, { "epoch": 6.04, "learning_rate": 1.3755261613709535e-05, "loss": 0.213, "step": 10072500 }, { "epoch": 6.04, "learning_rate": 1.375316164814897e-05, "loss": 0.2153, "step": 10073000 }, { "epoch": 6.04, "learning_rate": 1.3751061682588405e-05, "loss": 0.215, "step": 10073500 }, { "epoch": 6.04, "learning_rate": 1.3748961717027842e-05, "loss": 0.214, "step": 10074000 }, { "epoch": 6.04, "learning_rate": 1.3746861751467275e-05, "loss": 0.2132, "step": 10074500 }, { "epoch": 6.04, "learning_rate": 1.3744765985837832e-05, "loss": 0.2119, "step": 10075000 }, { "epoch": 6.04, "learning_rate": 1.3742666020277267e-05, "loss": 0.2128, "step": 10075500 }, { "epoch": 6.04, "learning_rate": 1.3740566054716703e-05, "loss": 0.2179, "step": 10076000 }, { "epoch": 6.04, "learning_rate": 1.3738466089156138e-05, "loss": 0.2193, "step": 10076500 }, { "epoch": 6.04, "learning_rate": 1.3736366123595573e-05, "loss": 0.2146, "step": 10077000 }, { "epoch": 6.04, "learning_rate": 1.373427035796613e-05, "loss": 0.2206, "step": 10077500 }, { "epoch": 6.04, "learning_rate": 1.3732170392405563e-05, "loss": 0.2149, "step": 10078000 }, { "epoch": 6.04, "learning_rate": 1.3730070426845e-05, "loss": 0.2213, "step": 10078500 }, { "epoch": 6.04, "learning_rate": 1.3727970461284434e-05, "loss": 0.2209, "step": 10079000 }, { "epoch": 6.04, "learning_rate": 1.372587049572387e-05, "loss": 0.2144, "step": 10079500 }, { "epoch": 6.04, "learning_rate": 1.3723774730094426e-05, "loss": 0.2164, "step": 10080000 }, { "epoch": 6.04, "learning_rate": 1.3721674764533861e-05, "loss": 0.2204, "step": 10080500 }, { "epoch": 6.04, "learning_rate": 1.3719574798973298e-05, "loss": 0.2168, "step": 10081000 }, { "epoch": 6.04, "learning_rate": 1.3717474833412731e-05, "loss": 0.2176, "step": 10081500 }, { "epoch": 6.04, "learning_rate": 1.3715374867852166e-05, "loss": 0.215, "step": 10082000 }, { "epoch": 6.04, "learning_rate": 1.3713274902291603e-05, "loss": 0.2119, "step": 10082500 }, { "epoch": 6.05, "learning_rate": 1.3711174936731037e-05, "loss": 0.2132, "step": 10083000 }, { "epoch": 6.05, "learning_rate": 1.3709074971170474e-05, "loss": 0.2128, "step": 10083500 }, { "epoch": 6.05, "learning_rate": 1.3706979205541029e-05, "loss": 0.2156, "step": 10084000 }, { "epoch": 6.05, "learning_rate": 1.3704883439911586e-05, "loss": 0.2174, "step": 10084500 }, { "epoch": 6.05, "learning_rate": 1.370278347435102e-05, "loss": 0.2145, "step": 10085000 }, { "epoch": 6.05, "learning_rate": 1.3700683508790456e-05, "loss": 0.2138, "step": 10085500 }, { "epoch": 6.05, "learning_rate": 1.3698583543229891e-05, "loss": 0.2187, "step": 10086000 }, { "epoch": 6.05, "learning_rate": 1.3696483577669326e-05, "loss": 0.2143, "step": 10086500 }, { "epoch": 6.05, "learning_rate": 1.3694383612108762e-05, "loss": 0.2166, "step": 10087000 }, { "epoch": 6.05, "learning_rate": 1.3692283646548195e-05, "loss": 0.2175, "step": 10087500 }, { "epoch": 6.05, "learning_rate": 1.3690187880918754e-05, "loss": 0.2198, "step": 10088000 }, { "epoch": 6.05, "learning_rate": 1.3688087915358187e-05, "loss": 0.2196, "step": 10088500 }, { "epoch": 6.05, "learning_rate": 1.3685987949797622e-05, "loss": 0.2156, "step": 10089000 }, { "epoch": 6.05, "learning_rate": 1.3683887984237059e-05, "loss": 0.2177, "step": 10089500 }, { "epoch": 6.05, "learning_rate": 1.3681788018676493e-05, "loss": 0.2135, "step": 10090000 }, { "epoch": 6.05, "learning_rate": 1.367969225304705e-05, "loss": 0.2142, "step": 10090500 }, { "epoch": 6.05, "learning_rate": 1.3677592287486485e-05, "loss": 0.22, "step": 10091000 }, { "epoch": 6.05, "learning_rate": 1.367549232192592e-05, "loss": 0.2214, "step": 10091500 }, { "epoch": 6.05, "learning_rate": 1.3673392356365357e-05, "loss": 0.2157, "step": 10092000 }, { "epoch": 6.05, "learning_rate": 1.367129239080479e-05, "loss": 0.215, "step": 10092500 }, { "epoch": 6.05, "learning_rate": 1.3669196625175347e-05, "loss": 0.2169, "step": 10093000 }, { "epoch": 6.05, "learning_rate": 1.3667096659614782e-05, "loss": 0.2169, "step": 10093500 }, { "epoch": 6.05, "learning_rate": 1.3664996694054217e-05, "loss": 0.2174, "step": 10094000 }, { "epoch": 6.05, "learning_rate": 1.3662896728493653e-05, "loss": 0.2161, "step": 10094500 }, { "epoch": 6.05, "learning_rate": 1.3660796762933088e-05, "loss": 0.2164, "step": 10095000 }, { "epoch": 6.05, "learning_rate": 1.3658696797372523e-05, "loss": 0.2128, "step": 10095500 }, { "epoch": 6.05, "learning_rate": 1.3656601031743078e-05, "loss": 0.2166, "step": 10096000 }, { "epoch": 6.05, "learning_rate": 1.3654501066182515e-05, "loss": 0.2161, "step": 10096500 }, { "epoch": 6.05, "learning_rate": 1.365240110062195e-05, "loss": 0.2207, "step": 10097000 }, { "epoch": 6.05, "learning_rate": 1.3650301135061385e-05, "loss": 0.2151, "step": 10097500 }, { "epoch": 6.05, "learning_rate": 1.364820116950082e-05, "loss": 0.2166, "step": 10098000 }, { "epoch": 6.05, "learning_rate": 1.3646101203940254e-05, "loss": 0.2137, "step": 10098500 }, { "epoch": 6.05, "learning_rate": 1.364400123837969e-05, "loss": 0.2188, "step": 10099000 }, { "epoch": 6.06, "learning_rate": 1.3641901272819126e-05, "loss": 0.2181, "step": 10099500 }, { "epoch": 6.06, "learning_rate": 1.3639805507189681e-05, "loss": 0.2196, "step": 10100000 }, { "epoch": 6.06, "eval_loss": 0.21022085845470428, "eval_runtime": 1453.479, "eval_samples_per_second": 362.386, "eval_steps_per_second": 60.398, "step": 10100000 }, { "epoch": 6.06, "learning_rate": 1.3637705541629118e-05, "loss": 0.2162, "step": 10100500 }, { "epoch": 6.06, "learning_rate": 1.3635605576068551e-05, "loss": 0.2134, "step": 10101000 }, { "epoch": 6.06, "learning_rate": 1.3633505610507988e-05, "loss": 0.2227, "step": 10101500 }, { "epoch": 6.06, "learning_rate": 1.3631409844878544e-05, "loss": 0.2163, "step": 10102000 }, { "epoch": 6.06, "learning_rate": 1.3629309879317979e-05, "loss": 0.2163, "step": 10102500 }, { "epoch": 6.06, "learning_rate": 1.3627209913757416e-05, "loss": 0.2183, "step": 10103000 }, { "epoch": 6.06, "learning_rate": 1.3625109948196849e-05, "loss": 0.217, "step": 10103500 }, { "epoch": 6.06, "learning_rate": 1.3623014182567406e-05, "loss": 0.2155, "step": 10104000 }, { "epoch": 6.06, "learning_rate": 1.3620918416937961e-05, "loss": 0.2174, "step": 10104500 }, { "epoch": 6.06, "learning_rate": 1.3618818451377398e-05, "loss": 0.2138, "step": 10105000 }, { "epoch": 6.06, "learning_rate": 1.3616718485816832e-05, "loss": 0.2166, "step": 10105500 }, { "epoch": 6.06, "learning_rate": 1.3614618520256268e-05, "loss": 0.2168, "step": 10106000 }, { "epoch": 6.06, "learning_rate": 1.3612518554695704e-05, "loss": 0.2192, "step": 10106500 }, { "epoch": 6.06, "learning_rate": 1.3610418589135137e-05, "loss": 0.2157, "step": 10107000 }, { "epoch": 6.06, "learning_rate": 1.3608318623574574e-05, "loss": 0.2141, "step": 10107500 }, { "epoch": 6.06, "learning_rate": 1.360622285794513e-05, "loss": 0.2195, "step": 10108000 }, { "epoch": 6.06, "learning_rate": 1.3604122892384564e-05, "loss": 0.2178, "step": 10108500 }, { "epoch": 6.06, "learning_rate": 1.3602022926824e-05, "loss": 0.2225, "step": 10109000 }, { "epoch": 6.06, "learning_rate": 1.3599922961263435e-05, "loss": 0.2162, "step": 10109500 }, { "epoch": 6.06, "learning_rate": 1.3597822995702871e-05, "loss": 0.2147, "step": 10110000 }, { "epoch": 6.06, "learning_rate": 1.3595723030142305e-05, "loss": 0.22, "step": 10110500 }, { "epoch": 6.06, "learning_rate": 1.359362306458174e-05, "loss": 0.2154, "step": 10111000 }, { "epoch": 6.06, "learning_rate": 1.3591527298952297e-05, "loss": 0.2191, "step": 10111500 }, { "epoch": 6.06, "learning_rate": 1.3589427333391732e-05, "loss": 0.2202, "step": 10112000 }, { "epoch": 6.06, "learning_rate": 1.3587327367831167e-05, "loss": 0.2214, "step": 10112500 }, { "epoch": 6.06, "learning_rate": 1.3585227402270603e-05, "loss": 0.2141, "step": 10113000 }, { "epoch": 6.06, "learning_rate": 1.3583127436710038e-05, "loss": 0.2146, "step": 10113500 }, { "epoch": 6.06, "learning_rate": 1.3581031671080593e-05, "loss": 0.2189, "step": 10114000 }, { "epoch": 6.06, "learning_rate": 1.357893170552003e-05, "loss": 0.2182, "step": 10114500 }, { "epoch": 6.06, "learning_rate": 1.3576831739959465e-05, "loss": 0.2166, "step": 10115000 }, { "epoch": 6.06, "learning_rate": 1.35747317743989e-05, "loss": 0.217, "step": 10115500 }, { "epoch": 6.06, "learning_rate": 1.3572631808838335e-05, "loss": 0.216, "step": 10116000 }, { "epoch": 6.07, "learning_rate": 1.3570531843277769e-05, "loss": 0.2168, "step": 10116500 }, { "epoch": 6.07, "learning_rate": 1.3568431877717206e-05, "loss": 0.2133, "step": 10117000 }, { "epoch": 6.07, "learning_rate": 1.356633191215664e-05, "loss": 0.2187, "step": 10117500 }, { "epoch": 6.07, "learning_rate": 1.3564236146527196e-05, "loss": 0.214, "step": 10118000 }, { "epoch": 6.07, "learning_rate": 1.3562136180966633e-05, "loss": 0.2242, "step": 10118500 }, { "epoch": 6.07, "learning_rate": 1.3560036215406066e-05, "loss": 0.2197, "step": 10119000 }, { "epoch": 6.07, "learning_rate": 1.3557936249845503e-05, "loss": 0.2151, "step": 10119500 }, { "epoch": 6.07, "learning_rate": 1.3555840484216058e-05, "loss": 0.2151, "step": 10120000 }, { "epoch": 6.07, "learning_rate": 1.3553740518655494e-05, "loss": 0.215, "step": 10120500 }, { "epoch": 6.07, "learning_rate": 1.355164055309493e-05, "loss": 0.2146, "step": 10121000 }, { "epoch": 6.07, "learning_rate": 1.3549540587534364e-05, "loss": 0.2175, "step": 10121500 }, { "epoch": 6.07, "learning_rate": 1.3547440621973799e-05, "loss": 0.2186, "step": 10122000 }, { "epoch": 6.07, "learning_rate": 1.3545340656413236e-05, "loss": 0.2209, "step": 10122500 }, { "epoch": 6.07, "learning_rate": 1.354324069085267e-05, "loss": 0.2174, "step": 10123000 }, { "epoch": 6.07, "learning_rate": 1.3541140725292104e-05, "loss": 0.2156, "step": 10123500 }, { "epoch": 6.07, "learning_rate": 1.3539049159593783e-05, "loss": 0.2213, "step": 10124000 }, { "epoch": 6.07, "learning_rate": 1.3536949194033218e-05, "loss": 0.218, "step": 10124500 }, { "epoch": 6.07, "learning_rate": 1.3534849228472652e-05, "loss": 0.2136, "step": 10125000 }, { "epoch": 6.07, "learning_rate": 1.3532749262912089e-05, "loss": 0.211, "step": 10125500 }, { "epoch": 6.07, "learning_rate": 1.3530649297351522e-05, "loss": 0.2152, "step": 10126000 }, { "epoch": 6.07, "learning_rate": 1.3528553531722079e-05, "loss": 0.218, "step": 10126500 }, { "epoch": 6.07, "learning_rate": 1.3526453566161514e-05, "loss": 0.2196, "step": 10127000 }, { "epoch": 6.07, "learning_rate": 1.352435360060095e-05, "loss": 0.2138, "step": 10127500 }, { "epoch": 6.07, "learning_rate": 1.3522253635040386e-05, "loss": 0.223, "step": 10128000 }, { "epoch": 6.07, "learning_rate": 1.352015366947982e-05, "loss": 0.2172, "step": 10128500 }, { "epoch": 6.07, "learning_rate": 1.3518053703919255e-05, "loss": 0.2151, "step": 10129000 }, { "epoch": 6.07, "learning_rate": 1.3515953738358692e-05, "loss": 0.2181, "step": 10129500 }, { "epoch": 6.07, "learning_rate": 1.3513857972729247e-05, "loss": 0.217, "step": 10130000 }, { "epoch": 6.07, "learning_rate": 1.3511758007168682e-05, "loss": 0.2129, "step": 10130500 }, { "epoch": 6.07, "learning_rate": 1.3509658041608117e-05, "loss": 0.2177, "step": 10131000 }, { "epoch": 6.07, "learning_rate": 1.3507558076047552e-05, "loss": 0.2181, "step": 10131500 }, { "epoch": 6.07, "learning_rate": 1.3505458110486988e-05, "loss": 0.2152, "step": 10132000 }, { "epoch": 6.07, "learning_rate": 1.3503358144926423e-05, "loss": 0.2112, "step": 10132500 }, { "epoch": 6.08, "learning_rate": 1.350126237929698e-05, "loss": 0.2196, "step": 10133000 }, { "epoch": 6.08, "learning_rate": 1.3499162413736415e-05, "loss": 0.2163, "step": 10133500 }, { "epoch": 6.08, "learning_rate": 1.349706244817585e-05, "loss": 0.2191, "step": 10134000 }, { "epoch": 6.08, "learning_rate": 1.3494962482615285e-05, "loss": 0.2182, "step": 10134500 }, { "epoch": 6.08, "learning_rate": 1.349286251705472e-05, "loss": 0.2186, "step": 10135000 }, { "epoch": 6.08, "learning_rate": 1.3490762551494155e-05, "loss": 0.223, "step": 10135500 }, { "epoch": 6.08, "learning_rate": 1.348866258593359e-05, "loss": 0.2149, "step": 10136000 }, { "epoch": 6.08, "learning_rate": 1.3486562620373026e-05, "loss": 0.2145, "step": 10136500 }, { "epoch": 6.08, "learning_rate": 1.3484466854743581e-05, "loss": 0.2194, "step": 10137000 }, { "epoch": 6.08, "learning_rate": 1.3482366889183018e-05, "loss": 0.2172, "step": 10137500 }, { "epoch": 6.08, "learning_rate": 1.3480266923622453e-05, "loss": 0.2147, "step": 10138000 }, { "epoch": 6.08, "learning_rate": 1.3478166958061887e-05, "loss": 0.2207, "step": 10138500 }, { "epoch": 6.08, "learning_rate": 1.3476071192432445e-05, "loss": 0.2175, "step": 10139000 }, { "epoch": 6.08, "learning_rate": 1.3473971226871879e-05, "loss": 0.2215, "step": 10139500 }, { "epoch": 6.08, "learning_rate": 1.3471871261311314e-05, "loss": 0.2142, "step": 10140000 }, { "epoch": 6.08, "learning_rate": 1.346977129575075e-05, "loss": 0.2193, "step": 10140500 }, { "epoch": 6.08, "learning_rate": 1.3467671330190184e-05, "loss": 0.2214, "step": 10141000 }, { "epoch": 6.08, "learning_rate": 1.3465575564560741e-05, "loss": 0.2217, "step": 10141500 }, { "epoch": 6.08, "learning_rate": 1.3463475599000176e-05, "loss": 0.2224, "step": 10142000 }, { "epoch": 6.08, "learning_rate": 1.3461375633439611e-05, "loss": 0.2164, "step": 10142500 }, { "epoch": 6.08, "learning_rate": 1.3459275667879047e-05, "loss": 0.214, "step": 10143000 }, { "epoch": 6.08, "learning_rate": 1.3457175702318482e-05, "loss": 0.223, "step": 10143500 }, { "epoch": 6.08, "learning_rate": 1.3455075736757917e-05, "loss": 0.2186, "step": 10144000 }, { "epoch": 6.08, "learning_rate": 1.3452975771197352e-05, "loss": 0.2184, "step": 10144500 }, { "epoch": 6.08, "learning_rate": 1.3450875805636787e-05, "loss": 0.215, "step": 10145000 }, { "epoch": 6.08, "learning_rate": 1.3448780040007342e-05, "loss": 0.2185, "step": 10145500 }, { "epoch": 6.08, "learning_rate": 1.344668007444678e-05, "loss": 0.2188, "step": 10146000 }, { "epoch": 6.08, "learning_rate": 1.3444580108886214e-05, "loss": 0.212, "step": 10146500 }, { "epoch": 6.08, "learning_rate": 1.3442480143325648e-05, "loss": 0.2175, "step": 10147000 }, { "epoch": 6.08, "learning_rate": 1.3440384377696207e-05, "loss": 0.2158, "step": 10147500 }, { "epoch": 6.08, "learning_rate": 1.343828441213564e-05, "loss": 0.2201, "step": 10148000 }, { "epoch": 6.08, "learning_rate": 1.3436184446575075e-05, "loss": 0.2168, "step": 10148500 }, { "epoch": 6.08, "learning_rate": 1.3434084481014512e-05, "loss": 0.2176, "step": 10149000 }, { "epoch": 6.09, "learning_rate": 1.3431984515453945e-05, "loss": 0.2165, "step": 10149500 }, { "epoch": 6.09, "learning_rate": 1.3429884549893382e-05, "loss": 0.2211, "step": 10150000 }, { "epoch": 6.09, "learning_rate": 1.3427784584332817e-05, "loss": 0.2178, "step": 10150500 }, { "epoch": 6.09, "learning_rate": 1.3425684618772251e-05, "loss": 0.2135, "step": 10151000 }, { "epoch": 6.09, "learning_rate": 1.342358885314281e-05, "loss": 0.215, "step": 10151500 }, { "epoch": 6.09, "learning_rate": 1.3421488887582243e-05, "loss": 0.2151, "step": 10152000 }, { "epoch": 6.09, "learning_rate": 1.3419388922021678e-05, "loss": 0.2168, "step": 10152500 }, { "epoch": 6.09, "learning_rate": 1.3417288956461113e-05, "loss": 0.2157, "step": 10153000 }, { "epoch": 6.09, "learning_rate": 1.341519319083167e-05, "loss": 0.2183, "step": 10153500 }, { "epoch": 6.09, "learning_rate": 1.3413093225271105e-05, "loss": 0.2148, "step": 10154000 }, { "epoch": 6.09, "learning_rate": 1.341099325971054e-05, "loss": 0.2175, "step": 10154500 }, { "epoch": 6.09, "learning_rate": 1.3408893294149976e-05, "loss": 0.2156, "step": 10155000 }, { "epoch": 6.09, "learning_rate": 1.3406797528520533e-05, "loss": 0.2172, "step": 10155500 }, { "epoch": 6.09, "learning_rate": 1.3404697562959968e-05, "loss": 0.2179, "step": 10156000 }, { "epoch": 6.09, "learning_rate": 1.3402601797330523e-05, "loss": 0.2142, "step": 10156500 }, { "epoch": 6.09, "learning_rate": 1.340050183176996e-05, "loss": 0.2183, "step": 10157000 }, { "epoch": 6.09, "learning_rate": 1.3398401866209393e-05, "loss": 0.2113, "step": 10157500 }, { "epoch": 6.09, "learning_rate": 1.3396301900648829e-05, "loss": 0.2195, "step": 10158000 }, { "epoch": 6.09, "learning_rate": 1.3394201935088265e-05, "loss": 0.2171, "step": 10158500 }, { "epoch": 6.09, "learning_rate": 1.339210616945882e-05, "loss": 0.2147, "step": 10159000 }, { "epoch": 6.09, "learning_rate": 1.3390006203898256e-05, "loss": 0.2117, "step": 10159500 }, { "epoch": 6.09, "learning_rate": 1.3387906238337691e-05, "loss": 0.2172, "step": 10160000 }, { "epoch": 6.09, "learning_rate": 1.3385806272777126e-05, "loss": 0.2153, "step": 10160500 }, { "epoch": 6.09, "learning_rate": 1.3383706307216561e-05, "loss": 0.217, "step": 10161000 }, { "epoch": 6.09, "learning_rate": 1.3381610541587118e-05, "loss": 0.216, "step": 10161500 }, { "epoch": 6.09, "learning_rate": 1.3379510576026553e-05, "loss": 0.2166, "step": 10162000 }, { "epoch": 6.09, "learning_rate": 1.3377410610465989e-05, "loss": 0.2175, "step": 10162500 }, { "epoch": 6.09, "learning_rate": 1.3375310644905424e-05, "loss": 0.2151, "step": 10163000 }, { "epoch": 6.09, "learning_rate": 1.3373210679344859e-05, "loss": 0.2211, "step": 10163500 }, { "epoch": 6.09, "learning_rate": 1.3371110713784294e-05, "loss": 0.2109, "step": 10164000 }, { "epoch": 6.09, "learning_rate": 1.336901074822373e-05, "loss": 0.2174, "step": 10164500 }, { "epoch": 6.09, "learning_rate": 1.3366914982594284e-05, "loss": 0.2176, "step": 10165000 }, { "epoch": 6.09, "learning_rate": 1.3364815017033721e-05, "loss": 0.2127, "step": 10165500 }, { "epoch": 6.09, "learning_rate": 1.3362715051473155e-05, "loss": 0.2158, "step": 10166000 }, { "epoch": 6.1, "learning_rate": 1.336061508591259e-05, "loss": 0.2183, "step": 10166500 }, { "epoch": 6.1, "learning_rate": 1.3358515120352027e-05, "loss": 0.2146, "step": 10167000 }, { "epoch": 6.1, "learning_rate": 1.335641515479146e-05, "loss": 0.2188, "step": 10167500 }, { "epoch": 6.1, "learning_rate": 1.3354315189230897e-05, "loss": 0.2158, "step": 10168000 }, { "epoch": 6.1, "learning_rate": 1.3352215223670332e-05, "loss": 0.2194, "step": 10168500 }, { "epoch": 6.1, "learning_rate": 1.3350119458040887e-05, "loss": 0.2148, "step": 10169000 }, { "epoch": 6.1, "learning_rate": 1.3348023692411444e-05, "loss": 0.218, "step": 10169500 }, { "epoch": 6.1, "learning_rate": 1.334592372685088e-05, "loss": 0.2246, "step": 10170000 }, { "epoch": 6.1, "learning_rate": 1.3343823761290315e-05, "loss": 0.2166, "step": 10170500 }, { "epoch": 6.1, "learning_rate": 1.334172379572975e-05, "loss": 0.2143, "step": 10171000 }, { "epoch": 6.1, "learning_rate": 1.3339623830169185e-05, "loss": 0.2137, "step": 10171500 }, { "epoch": 6.1, "learning_rate": 1.333752386460862e-05, "loss": 0.2189, "step": 10172000 }, { "epoch": 6.1, "learning_rate": 1.3335428098979177e-05, "loss": 0.2165, "step": 10172500 }, { "epoch": 6.1, "learning_rate": 1.3333328133418612e-05, "loss": 0.2156, "step": 10173000 }, { "epoch": 6.1, "learning_rate": 1.3331228167858047e-05, "loss": 0.2178, "step": 10173500 }, { "epoch": 6.1, "learning_rate": 1.3329128202297483e-05, "loss": 0.2155, "step": 10174000 }, { "epoch": 6.1, "learning_rate": 1.3327032436668038e-05, "loss": 0.2221, "step": 10174500 }, { "epoch": 6.1, "learning_rate": 1.3324932471107475e-05, "loss": 0.2192, "step": 10175000 }, { "epoch": 6.1, "learning_rate": 1.3322832505546908e-05, "loss": 0.2151, "step": 10175500 }, { "epoch": 6.1, "learning_rate": 1.3320732539986343e-05, "loss": 0.2171, "step": 10176000 }, { "epoch": 6.1, "learning_rate": 1.331863257442578e-05, "loss": 0.2215, "step": 10176500 }, { "epoch": 6.1, "learning_rate": 1.3316532608865214e-05, "loss": 0.218, "step": 10177000 }, { "epoch": 6.1, "learning_rate": 1.3314432643304649e-05, "loss": 0.2164, "step": 10177500 }, { "epoch": 6.1, "learning_rate": 1.3312336877675206e-05, "loss": 0.2191, "step": 10178000 }, { "epoch": 6.1, "learning_rate": 1.3310236912114641e-05, "loss": 0.2143, "step": 10178500 }, { "epoch": 6.1, "learning_rate": 1.3308136946554076e-05, "loss": 0.2164, "step": 10179000 }, { "epoch": 6.1, "learning_rate": 1.3306036980993511e-05, "loss": 0.2252, "step": 10179500 }, { "epoch": 6.1, "learning_rate": 1.3303941215364068e-05, "loss": 0.2156, "step": 10180000 }, { "epoch": 6.1, "learning_rate": 1.3301841249803503e-05, "loss": 0.2126, "step": 10180500 }, { "epoch": 6.1, "learning_rate": 1.3299741284242939e-05, "loss": 0.2156, "step": 10181000 }, { "epoch": 6.1, "learning_rate": 1.3297641318682374e-05, "loss": 0.2179, "step": 10181500 }, { "epoch": 6.1, "learning_rate": 1.329554555305293e-05, "loss": 0.2256, "step": 10182000 }, { "epoch": 6.1, "learning_rate": 1.3293445587492366e-05, "loss": 0.2173, "step": 10182500 }, { "epoch": 6.11, "learning_rate": 1.32913456219318e-05, "loss": 0.214, "step": 10183000 }, { "epoch": 6.11, "learning_rate": 1.3289245656371236e-05, "loss": 0.2129, "step": 10183500 }, { "epoch": 6.11, "learning_rate": 1.328714569081067e-05, "loss": 0.2191, "step": 10184000 }, { "epoch": 6.11, "learning_rate": 1.3285045725250105e-05, "loss": 0.2155, "step": 10184500 }, { "epoch": 6.11, "learning_rate": 1.3282945759689542e-05, "loss": 0.2142, "step": 10185000 }, { "epoch": 6.11, "learning_rate": 1.3280845794128975e-05, "loss": 0.2205, "step": 10185500 }, { "epoch": 6.11, "learning_rate": 1.3278750028499532e-05, "loss": 0.2171, "step": 10186000 }, { "epoch": 6.11, "learning_rate": 1.3276650062938967e-05, "loss": 0.2184, "step": 10186500 }, { "epoch": 6.11, "learning_rate": 1.3274554297309524e-05, "loss": 0.2178, "step": 10187000 }, { "epoch": 6.11, "learning_rate": 1.327245433174896e-05, "loss": 0.2165, "step": 10187500 }, { "epoch": 6.11, "learning_rate": 1.3270354366188394e-05, "loss": 0.2173, "step": 10188000 }, { "epoch": 6.11, "learning_rate": 1.326825440062783e-05, "loss": 0.2196, "step": 10188500 }, { "epoch": 6.11, "learning_rate": 1.3266154435067265e-05, "loss": 0.2132, "step": 10189000 }, { "epoch": 6.11, "learning_rate": 1.32640544695067e-05, "loss": 0.2197, "step": 10189500 }, { "epoch": 6.11, "learning_rate": 1.3261954503946135e-05, "loss": 0.2181, "step": 10190000 }, { "epoch": 6.11, "learning_rate": 1.3259858738316692e-05, "loss": 0.2191, "step": 10190500 }, { "epoch": 6.11, "learning_rate": 1.3257758772756127e-05, "loss": 0.2242, "step": 10191000 }, { "epoch": 6.11, "learning_rate": 1.3255658807195562e-05, "loss": 0.2118, "step": 10191500 }, { "epoch": 6.11, "learning_rate": 1.3253558841634997e-05, "loss": 0.2155, "step": 10192000 }, { "epoch": 6.11, "learning_rate": 1.3251458876074431e-05, "loss": 0.2197, "step": 10192500 }, { "epoch": 6.11, "learning_rate": 1.324936311044499e-05, "loss": 0.2175, "step": 10193000 }, { "epoch": 6.11, "learning_rate": 1.3247263144884423e-05, "loss": 0.2198, "step": 10193500 }, { "epoch": 6.11, "learning_rate": 1.3245163179323858e-05, "loss": 0.2177, "step": 10194000 }, { "epoch": 6.11, "learning_rate": 1.3243063213763295e-05, "loss": 0.2149, "step": 10194500 }, { "epoch": 6.11, "learning_rate": 1.3240963248202728e-05, "loss": 0.2185, "step": 10195000 }, { "epoch": 6.11, "learning_rate": 1.3238863282642164e-05, "loss": 0.2177, "step": 10195500 }, { "epoch": 6.11, "learning_rate": 1.323676751701272e-05, "loss": 0.219, "step": 10196000 }, { "epoch": 6.11, "learning_rate": 1.3234667551452156e-05, "loss": 0.2144, "step": 10196500 }, { "epoch": 6.11, "learning_rate": 1.3232567585891591e-05, "loss": 0.2136, "step": 10197000 }, { "epoch": 6.11, "learning_rate": 1.3230467620331026e-05, "loss": 0.2197, "step": 10197500 }, { "epoch": 6.11, "learning_rate": 1.3228367654770461e-05, "loss": 0.219, "step": 10198000 }, { "epoch": 6.11, "learning_rate": 1.3226267689209898e-05, "loss": 0.2169, "step": 10198500 }, { "epoch": 6.11, "learning_rate": 1.3224167723649332e-05, "loss": 0.2195, "step": 10199000 }, { "epoch": 6.12, "learning_rate": 1.3222067758088767e-05, "loss": 0.22, "step": 10199500 }, { "epoch": 6.12, "learning_rate": 1.3219971992459324e-05, "loss": 0.2162, "step": 10200000 }, { "epoch": 6.12, "eval_loss": 0.20991522073745728, "eval_runtime": 1455.0352, "eval_samples_per_second": 361.998, "eval_steps_per_second": 60.333, "step": 10200000 }, { "epoch": 6.12, "learning_rate": 1.3217872026898759e-05, "loss": 0.2208, "step": 10200500 }, { "epoch": 6.12, "learning_rate": 1.3215772061338194e-05, "loss": 0.2194, "step": 10201000 }, { "epoch": 6.12, "learning_rate": 1.3213676295708751e-05, "loss": 0.2203, "step": 10201500 }, { "epoch": 6.12, "learning_rate": 1.3211576330148186e-05, "loss": 0.2194, "step": 10202000 }, { "epoch": 6.12, "learning_rate": 1.320947636458762e-05, "loss": 0.22, "step": 10202500 }, { "epoch": 6.12, "learning_rate": 1.3207376399027056e-05, "loss": 0.2155, "step": 10203000 }, { "epoch": 6.12, "learning_rate": 1.3205280633397612e-05, "loss": 0.2133, "step": 10203500 }, { "epoch": 6.12, "learning_rate": 1.3203180667837047e-05, "loss": 0.2143, "step": 10204000 }, { "epoch": 6.12, "learning_rate": 1.3201080702276482e-05, "loss": 0.2167, "step": 10204500 }, { "epoch": 6.12, "learning_rate": 1.3198980736715917e-05, "loss": 0.2243, "step": 10205000 }, { "epoch": 6.12, "learning_rate": 1.3196880771155354e-05, "loss": 0.2179, "step": 10205500 }, { "epoch": 6.12, "learning_rate": 1.319478500552591e-05, "loss": 0.2204, "step": 10206000 }, { "epoch": 6.12, "learning_rate": 1.3192685039965344e-05, "loss": 0.2234, "step": 10206500 }, { "epoch": 6.12, "learning_rate": 1.319058507440478e-05, "loss": 0.2159, "step": 10207000 }, { "epoch": 6.12, "learning_rate": 1.3188485108844215e-05, "loss": 0.2176, "step": 10207500 }, { "epoch": 6.12, "learning_rate": 1.318638514328365e-05, "loss": 0.215, "step": 10208000 }, { "epoch": 6.12, "learning_rate": 1.3184289377654207e-05, "loss": 0.2182, "step": 10208500 }, { "epoch": 6.12, "learning_rate": 1.3182189412093642e-05, "loss": 0.2171, "step": 10209000 }, { "epoch": 6.12, "learning_rate": 1.3180089446533077e-05, "loss": 0.2129, "step": 10209500 }, { "epoch": 6.12, "learning_rate": 1.3177993680903634e-05, "loss": 0.2188, "step": 10210000 }, { "epoch": 6.12, "learning_rate": 1.3175893715343068e-05, "loss": 0.2164, "step": 10210500 }, { "epoch": 6.12, "learning_rate": 1.3173793749782504e-05, "loss": 0.2148, "step": 10211000 }, { "epoch": 6.12, "learning_rate": 1.317169378422194e-05, "loss": 0.2168, "step": 10211500 }, { "epoch": 6.12, "learning_rate": 1.3169593818661373e-05, "loss": 0.218, "step": 10212000 }, { "epoch": 6.12, "learning_rate": 1.316749385310081e-05, "loss": 0.2199, "step": 10212500 }, { "epoch": 6.12, "learning_rate": 1.3165393887540243e-05, "loss": 0.2181, "step": 10213000 }, { "epoch": 6.12, "learning_rate": 1.3163293921979678e-05, "loss": 0.2152, "step": 10213500 }, { "epoch": 6.12, "learning_rate": 1.3161193956419115e-05, "loss": 0.2214, "step": 10214000 }, { "epoch": 6.12, "learning_rate": 1.315909819078967e-05, "loss": 0.2147, "step": 10214500 }, { "epoch": 6.12, "learning_rate": 1.3156998225229106e-05, "loss": 0.214, "step": 10215000 }, { "epoch": 6.12, "learning_rate": 1.315489825966854e-05, "loss": 0.2156, "step": 10215500 }, { "epoch": 6.12, "learning_rate": 1.3152798294107976e-05, "loss": 0.2116, "step": 10216000 }, { "epoch": 6.13, "learning_rate": 1.3150702528478533e-05, "loss": 0.2165, "step": 10216500 }, { "epoch": 6.13, "learning_rate": 1.3148602562917968e-05, "loss": 0.212, "step": 10217000 }, { "epoch": 6.13, "learning_rate": 1.3146502597357403e-05, "loss": 0.2196, "step": 10217500 }, { "epoch": 6.13, "learning_rate": 1.3144402631796838e-05, "loss": 0.2195, "step": 10218000 }, { "epoch": 6.13, "learning_rate": 1.3142302666236274e-05, "loss": 0.2162, "step": 10218500 }, { "epoch": 6.13, "learning_rate": 1.3140206900606829e-05, "loss": 0.2171, "step": 10219000 }, { "epoch": 6.13, "learning_rate": 1.3138106935046266e-05, "loss": 0.2215, "step": 10219500 }, { "epoch": 6.13, "learning_rate": 1.31360069694857e-05, "loss": 0.2166, "step": 10220000 }, { "epoch": 6.13, "learning_rate": 1.3133907003925134e-05, "loss": 0.2134, "step": 10220500 }, { "epoch": 6.13, "learning_rate": 1.3131811238295693e-05, "loss": 0.2185, "step": 10221000 }, { "epoch": 6.13, "learning_rate": 1.3129711272735126e-05, "loss": 0.2162, "step": 10221500 }, { "epoch": 6.13, "learning_rate": 1.3127611307174562e-05, "loss": 0.2144, "step": 10222000 }, { "epoch": 6.13, "learning_rate": 1.3125511341613997e-05, "loss": 0.2162, "step": 10222500 }, { "epoch": 6.13, "learning_rate": 1.3123411376053432e-05, "loss": 0.2151, "step": 10223000 }, { "epoch": 6.13, "learning_rate": 1.3121315610423989e-05, "loss": 0.2193, "step": 10223500 }, { "epoch": 6.13, "learning_rate": 1.3119215644863424e-05, "loss": 0.2183, "step": 10224000 }, { "epoch": 6.13, "learning_rate": 1.3117115679302859e-05, "loss": 0.223, "step": 10224500 }, { "epoch": 6.13, "learning_rate": 1.3115015713742294e-05, "loss": 0.2138, "step": 10225000 }, { "epoch": 6.13, "learning_rate": 1.311291574818173e-05, "loss": 0.2177, "step": 10225500 }, { "epoch": 6.13, "learning_rate": 1.3110815782621165e-05, "loss": 0.2142, "step": 10226000 }, { "epoch": 6.13, "learning_rate": 1.31087158170606e-05, "loss": 0.2106, "step": 10226500 }, { "epoch": 6.13, "learning_rate": 1.3106620051431157e-05, "loss": 0.2171, "step": 10227000 }, { "epoch": 6.13, "learning_rate": 1.3104520085870592e-05, "loss": 0.215, "step": 10227500 }, { "epoch": 6.13, "learning_rate": 1.3102420120310027e-05, "loss": 0.2149, "step": 10228000 }, { "epoch": 6.13, "learning_rate": 1.3100320154749462e-05, "loss": 0.2211, "step": 10228500 }, { "epoch": 6.13, "learning_rate": 1.3098220189188897e-05, "loss": 0.2181, "step": 10229000 }, { "epoch": 6.13, "learning_rate": 1.3096120223628332e-05, "loss": 0.224, "step": 10229500 }, { "epoch": 6.13, "learning_rate": 1.3094020258067766e-05, "loss": 0.2138, "step": 10230000 }, { "epoch": 6.13, "learning_rate": 1.3091924492438325e-05, "loss": 0.2174, "step": 10230500 }, { "epoch": 6.13, "learning_rate": 1.3089824526877758e-05, "loss": 0.2119, "step": 10231000 }, { "epoch": 6.13, "learning_rate": 1.3087724561317193e-05, "loss": 0.2188, "step": 10231500 }, { "epoch": 6.13, "learning_rate": 1.308562459575663e-05, "loss": 0.216, "step": 10232000 }, { "epoch": 6.13, "learning_rate": 1.3083524630196064e-05, "loss": 0.2195, "step": 10232500 }, { "epoch": 6.14, "learning_rate": 1.30814246646355e-05, "loss": 0.2118, "step": 10233000 }, { "epoch": 6.14, "learning_rate": 1.3079324699074936e-05, "loss": 0.2177, "step": 10233500 }, { "epoch": 6.14, "learning_rate": 1.3077224733514369e-05, "loss": 0.2133, "step": 10234000 }, { "epoch": 6.14, "learning_rate": 1.3075128967884928e-05, "loss": 0.2118, "step": 10234500 }, { "epoch": 6.14, "learning_rate": 1.3073033202255483e-05, "loss": 0.2119, "step": 10235000 }, { "epoch": 6.14, "learning_rate": 1.3070933236694918e-05, "loss": 0.2189, "step": 10235500 }, { "epoch": 6.14, "learning_rate": 1.3068833271134353e-05, "loss": 0.2143, "step": 10236000 }, { "epoch": 6.14, "learning_rate": 1.3066733305573788e-05, "loss": 0.2121, "step": 10236500 }, { "epoch": 6.14, "learning_rate": 1.3064637539944344e-05, "loss": 0.2263, "step": 10237000 }, { "epoch": 6.14, "learning_rate": 1.306253757438378e-05, "loss": 0.2167, "step": 10237500 }, { "epoch": 6.14, "learning_rate": 1.3060437608823216e-05, "loss": 0.2174, "step": 10238000 }, { "epoch": 6.14, "learning_rate": 1.3058337643262649e-05, "loss": 0.2148, "step": 10238500 }, { "epoch": 6.14, "learning_rate": 1.3056237677702086e-05, "loss": 0.2164, "step": 10239000 }, { "epoch": 6.14, "learning_rate": 1.3054137712141521e-05, "loss": 0.219, "step": 10239500 }, { "epoch": 6.14, "learning_rate": 1.3052037746580956e-05, "loss": 0.2194, "step": 10240000 }, { "epoch": 6.14, "learning_rate": 1.3049941980951513e-05, "loss": 0.217, "step": 10240500 }, { "epoch": 6.14, "learning_rate": 1.3047842015390947e-05, "loss": 0.2138, "step": 10241000 }, { "epoch": 6.14, "learning_rate": 1.3045742049830384e-05, "loss": 0.218, "step": 10241500 }, { "epoch": 6.14, "learning_rate": 1.3043642084269817e-05, "loss": 0.2248, "step": 10242000 }, { "epoch": 6.14, "learning_rate": 1.3041546318640374e-05, "loss": 0.2179, "step": 10242500 }, { "epoch": 6.14, "learning_rate": 1.3039446353079809e-05, "loss": 0.2167, "step": 10243000 }, { "epoch": 6.14, "learning_rate": 1.3037346387519244e-05, "loss": 0.2165, "step": 10243500 }, { "epoch": 6.14, "learning_rate": 1.303524642195868e-05, "loss": 0.2124, "step": 10244000 }, { "epoch": 6.14, "learning_rate": 1.3033146456398115e-05, "loss": 0.2126, "step": 10244500 }, { "epoch": 6.14, "learning_rate": 1.303104649083755e-05, "loss": 0.2186, "step": 10245000 }, { "epoch": 6.14, "learning_rate": 1.3028946525276987e-05, "loss": 0.2117, "step": 10245500 }, { "epoch": 6.14, "learning_rate": 1.302684655971642e-05, "loss": 0.2179, "step": 10246000 }, { "epoch": 6.14, "learning_rate": 1.3024750794086977e-05, "loss": 0.2172, "step": 10246500 }, { "epoch": 6.14, "learning_rate": 1.3022655028457534e-05, "loss": 0.2162, "step": 10247000 }, { "epoch": 6.14, "learning_rate": 1.3020555062896969e-05, "loss": 0.2151, "step": 10247500 }, { "epoch": 6.14, "learning_rate": 1.3018455097336403e-05, "loss": 0.214, "step": 10248000 }, { "epoch": 6.14, "learning_rate": 1.301635513177584e-05, "loss": 0.213, "step": 10248500 }, { "epoch": 6.14, "learning_rate": 1.3014255166215275e-05, "loss": 0.2167, "step": 10249000 }, { "epoch": 6.14, "learning_rate": 1.3012155200654708e-05, "loss": 0.2206, "step": 10249500 }, { "epoch": 6.15, "learning_rate": 1.3010055235094145e-05, "loss": 0.2188, "step": 10250000 }, { "epoch": 6.15, "learning_rate": 1.3007955269533578e-05, "loss": 0.2233, "step": 10250500 }, { "epoch": 6.15, "learning_rate": 1.3005859503904135e-05, "loss": 0.2177, "step": 10251000 }, { "epoch": 6.15, "learning_rate": 1.300375953834357e-05, "loss": 0.2178, "step": 10251500 }, { "epoch": 6.15, "learning_rate": 1.3001659572783006e-05, "loss": 0.2169, "step": 10252000 }, { "epoch": 6.15, "learning_rate": 1.2999559607222442e-05, "loss": 0.2177, "step": 10252500 }, { "epoch": 6.15, "learning_rate": 1.2997463841592998e-05, "loss": 0.2225, "step": 10253000 }, { "epoch": 6.15, "learning_rate": 1.2995363876032433e-05, "loss": 0.2135, "step": 10253500 }, { "epoch": 6.15, "learning_rate": 1.2993263910471868e-05, "loss": 0.2158, "step": 10254000 }, { "epoch": 6.15, "learning_rate": 1.2991163944911303e-05, "loss": 0.2151, "step": 10254500 }, { "epoch": 6.15, "learning_rate": 1.2989063979350738e-05, "loss": 0.2173, "step": 10255000 }, { "epoch": 6.15, "learning_rate": 1.2986968213721295e-05, "loss": 0.2217, "step": 10255500 }, { "epoch": 6.15, "learning_rate": 1.298486824816073e-05, "loss": 0.2245, "step": 10256000 }, { "epoch": 6.15, "learning_rate": 1.2982768282600164e-05, "loss": 0.216, "step": 10256500 }, { "epoch": 6.15, "learning_rate": 1.29806683170396e-05, "loss": 0.2199, "step": 10257000 }, { "epoch": 6.15, "learning_rate": 1.2978568351479036e-05, "loss": 0.2187, "step": 10257500 }, { "epoch": 6.15, "learning_rate": 1.2976472585849591e-05, "loss": 0.2149, "step": 10258000 }, { "epoch": 6.15, "learning_rate": 1.2974372620289028e-05, "loss": 0.2162, "step": 10258500 }, { "epoch": 6.15, "learning_rate": 1.2972272654728461e-05, "loss": 0.2176, "step": 10259000 }, { "epoch": 6.15, "learning_rate": 1.2970172689167898e-05, "loss": 0.2181, "step": 10259500 }, { "epoch": 6.15, "learning_rate": 1.2968072723607332e-05, "loss": 0.2171, "step": 10260000 }, { "epoch": 6.15, "learning_rate": 1.2965972758046767e-05, "loss": 0.212, "step": 10260500 }, { "epoch": 6.15, "learning_rate": 1.2963872792486204e-05, "loss": 0.2217, "step": 10261000 }, { "epoch": 6.15, "learning_rate": 1.2961772826925637e-05, "loss": 0.2178, "step": 10261500 }, { "epoch": 6.15, "learning_rate": 1.2959677061296194e-05, "loss": 0.2168, "step": 10262000 }, { "epoch": 6.15, "learning_rate": 1.295757709573563e-05, "loss": 0.2135, "step": 10262500 }, { "epoch": 6.15, "learning_rate": 1.2955477130175064e-05, "loss": 0.2174, "step": 10263000 }, { "epoch": 6.15, "learning_rate": 1.2953381364545621e-05, "loss": 0.2139, "step": 10263500 }, { "epoch": 6.15, "learning_rate": 1.2951285598916178e-05, "loss": 0.2196, "step": 10264000 }, { "epoch": 6.15, "learning_rate": 1.2949185633355612e-05, "loss": 0.2184, "step": 10264500 }, { "epoch": 6.15, "learning_rate": 1.2947085667795049e-05, "loss": 0.2195, "step": 10265000 }, { "epoch": 6.15, "learning_rate": 1.2944985702234484e-05, "loss": 0.2157, "step": 10265500 }, { "epoch": 6.15, "learning_rate": 1.2942885736673917e-05, "loss": 0.2149, "step": 10266000 }, { "epoch": 6.16, "learning_rate": 1.2940785771113354e-05, "loss": 0.2121, "step": 10266500 }, { "epoch": 6.16, "learning_rate": 1.293868580555279e-05, "loss": 0.2149, "step": 10267000 }, { "epoch": 6.16, "learning_rate": 1.2936585839992223e-05, "loss": 0.2132, "step": 10267500 }, { "epoch": 6.16, "learning_rate": 1.293448587443166e-05, "loss": 0.2231, "step": 10268000 }, { "epoch": 6.16, "learning_rate": 1.2932385908871093e-05, "loss": 0.2153, "step": 10268500 }, { "epoch": 6.16, "learning_rate": 1.293028594331053e-05, "loss": 0.2128, "step": 10269000 }, { "epoch": 6.16, "learning_rate": 1.2928185977749965e-05, "loss": 0.2144, "step": 10269500 }, { "epoch": 6.16, "learning_rate": 1.292609021212052e-05, "loss": 0.2197, "step": 10270000 }, { "epoch": 6.16, "learning_rate": 1.2923990246559957e-05, "loss": 0.2143, "step": 10270500 }, { "epoch": 6.16, "learning_rate": 1.292189028099939e-05, "loss": 0.2172, "step": 10271000 }, { "epoch": 6.16, "learning_rate": 1.2919790315438826e-05, "loss": 0.218, "step": 10271500 }, { "epoch": 6.16, "learning_rate": 1.2917694549809383e-05, "loss": 0.2142, "step": 10272000 }, { "epoch": 6.16, "learning_rate": 1.291559878417994e-05, "loss": 0.2174, "step": 10272500 }, { "epoch": 6.16, "learning_rate": 1.2913498818619373e-05, "loss": 0.2129, "step": 10273000 }, { "epoch": 6.16, "learning_rate": 1.291139885305881e-05, "loss": 0.2195, "step": 10273500 }, { "epoch": 6.16, "learning_rate": 1.2909298887498245e-05, "loss": 0.2221, "step": 10274000 }, { "epoch": 6.16, "learning_rate": 1.2907198921937679e-05, "loss": 0.2218, "step": 10274500 }, { "epoch": 6.16, "learning_rate": 1.2905098956377116e-05, "loss": 0.2171, "step": 10275000 }, { "epoch": 6.16, "learning_rate": 1.290299899081655e-05, "loss": 0.2171, "step": 10275500 }, { "epoch": 6.16, "learning_rate": 1.2900903225187106e-05, "loss": 0.2128, "step": 10276000 }, { "epoch": 6.16, "learning_rate": 1.2898803259626543e-05, "loss": 0.2214, "step": 10276500 }, { "epoch": 6.16, "learning_rate": 1.2896703294065976e-05, "loss": 0.2133, "step": 10277000 }, { "epoch": 6.16, "learning_rate": 1.2894603328505413e-05, "loss": 0.2162, "step": 10277500 }, { "epoch": 6.16, "learning_rate": 1.2892503362944848e-05, "loss": 0.2218, "step": 10278000 }, { "epoch": 6.16, "learning_rate": 1.2890407597315404e-05, "loss": 0.22, "step": 10278500 }, { "epoch": 6.16, "learning_rate": 1.288830763175484e-05, "loss": 0.2143, "step": 10279000 }, { "epoch": 6.16, "learning_rate": 1.2886207666194274e-05, "loss": 0.2133, "step": 10279500 }, { "epoch": 6.16, "learning_rate": 1.2884107700633709e-05, "loss": 0.2195, "step": 10280000 }, { "epoch": 6.16, "learning_rate": 1.2882007735073144e-05, "loss": 0.217, "step": 10280500 }, { "epoch": 6.16, "learning_rate": 1.287990776951258e-05, "loss": 0.2157, "step": 10281000 }, { "epoch": 6.16, "learning_rate": 1.2877812003883136e-05, "loss": 0.2146, "step": 10281500 }, { "epoch": 6.16, "learning_rate": 1.2875712038322571e-05, "loss": 0.2179, "step": 10282000 }, { "epoch": 6.16, "learning_rate": 1.2873612072762007e-05, "loss": 0.2168, "step": 10282500 }, { "epoch": 6.17, "learning_rate": 1.2871512107201442e-05, "loss": 0.216, "step": 10283000 }, { "epoch": 6.17, "learning_rate": 1.2869412141640877e-05, "loss": 0.2169, "step": 10283500 }, { "epoch": 6.17, "learning_rate": 1.2867312176080312e-05, "loss": 0.2204, "step": 10284000 }, { "epoch": 6.17, "learning_rate": 1.2865212210519747e-05, "loss": 0.2126, "step": 10284500 }, { "epoch": 6.17, "learning_rate": 1.2863112244959182e-05, "loss": 0.2177, "step": 10285000 }, { "epoch": 6.17, "learning_rate": 1.286102067926086e-05, "loss": 0.2164, "step": 10285500 }, { "epoch": 6.17, "learning_rate": 1.2858920713700296e-05, "loss": 0.2191, "step": 10286000 }, { "epoch": 6.17, "learning_rate": 1.285682074813973e-05, "loss": 0.2165, "step": 10286500 }, { "epoch": 6.17, "learning_rate": 1.2854720782579165e-05, "loss": 0.2131, "step": 10287000 }, { "epoch": 6.17, "learning_rate": 1.2852620817018602e-05, "loss": 0.2195, "step": 10287500 }, { "epoch": 6.17, "learning_rate": 1.2850520851458035e-05, "loss": 0.2149, "step": 10288000 }, { "epoch": 6.17, "learning_rate": 1.2848425085828592e-05, "loss": 0.2207, "step": 10288500 }, { "epoch": 6.17, "learning_rate": 1.2846325120268027e-05, "loss": 0.2189, "step": 10289000 }, { "epoch": 6.17, "learning_rate": 1.2844225154707462e-05, "loss": 0.2219, "step": 10289500 }, { "epoch": 6.17, "learning_rate": 1.2842125189146898e-05, "loss": 0.2207, "step": 10290000 }, { "epoch": 6.17, "learning_rate": 1.2840025223586333e-05, "loss": 0.2129, "step": 10290500 }, { "epoch": 6.17, "learning_rate": 1.2837925258025768e-05, "loss": 0.2125, "step": 10291000 }, { "epoch": 6.17, "learning_rate": 1.2835825292465203e-05, "loss": 0.218, "step": 10291500 }, { "epoch": 6.17, "learning_rate": 1.2833725326904638e-05, "loss": 0.2121, "step": 10292000 }, { "epoch": 6.17, "learning_rate": 1.2831629561275193e-05, "loss": 0.2192, "step": 10292500 }, { "epoch": 6.17, "learning_rate": 1.282952959571463e-05, "loss": 0.2174, "step": 10293000 }, { "epoch": 6.17, "learning_rate": 1.2827429630154065e-05, "loss": 0.2215, "step": 10293500 }, { "epoch": 6.17, "learning_rate": 1.28253296645935e-05, "loss": 0.2185, "step": 10294000 }, { "epoch": 6.17, "learning_rate": 1.2823238098895178e-05, "loss": 0.2175, "step": 10294500 }, { "epoch": 6.17, "learning_rate": 1.2821138133334613e-05, "loss": 0.2171, "step": 10295000 }, { "epoch": 6.17, "learning_rate": 1.2819038167774048e-05, "loss": 0.218, "step": 10295500 }, { "epoch": 6.17, "learning_rate": 1.2816938202213483e-05, "loss": 0.2161, "step": 10296000 }, { "epoch": 6.17, "learning_rate": 1.2814838236652918e-05, "loss": 0.2159, "step": 10296500 }, { "epoch": 6.17, "learning_rate": 1.2812738271092355e-05, "loss": 0.216, "step": 10297000 }, { "epoch": 6.17, "learning_rate": 1.2810638305531789e-05, "loss": 0.2143, "step": 10297500 }, { "epoch": 6.17, "learning_rate": 1.2808538339971224e-05, "loss": 0.2198, "step": 10298000 }, { "epoch": 6.17, "learning_rate": 1.280644257434178e-05, "loss": 0.2153, "step": 10298500 }, { "epoch": 6.17, "learning_rate": 1.2804346808712338e-05, "loss": 0.2114, "step": 10299000 }, { "epoch": 6.17, "learning_rate": 1.2802246843151771e-05, "loss": 0.2159, "step": 10299500 }, { "epoch": 6.18, "learning_rate": 1.2800146877591208e-05, "loss": 0.2134, "step": 10300000 }, { "epoch": 6.18, "eval_loss": 0.20936846733093262, "eval_runtime": 1452.5769, "eval_samples_per_second": 362.611, "eval_steps_per_second": 60.435, "step": 10300000 }, { "epoch": 6.18, "learning_rate": 1.2798046912030641e-05, "loss": 0.2176, "step": 10300500 }, { "epoch": 6.18, "learning_rate": 1.2795946946470078e-05, "loss": 0.2132, "step": 10301000 }, { "epoch": 6.18, "learning_rate": 1.2793846980909513e-05, "loss": 0.2192, "step": 10301500 }, { "epoch": 6.18, "learning_rate": 1.2791747015348947e-05, "loss": 0.2155, "step": 10302000 }, { "epoch": 6.18, "learning_rate": 1.2789647049788384e-05, "loss": 0.2221, "step": 10302500 }, { "epoch": 6.18, "learning_rate": 1.2787551284158939e-05, "loss": 0.2129, "step": 10303000 }, { "epoch": 6.18, "learning_rate": 1.2785455518529496e-05, "loss": 0.2148, "step": 10303500 }, { "epoch": 6.18, "learning_rate": 1.2783355552968931e-05, "loss": 0.2154, "step": 10304000 }, { "epoch": 6.18, "learning_rate": 1.2781255587408366e-05, "loss": 0.2178, "step": 10304500 }, { "epoch": 6.18, "learning_rate": 1.2779155621847801e-05, "loss": 0.2167, "step": 10305000 }, { "epoch": 6.18, "learning_rate": 1.2777055656287237e-05, "loss": 0.2142, "step": 10305500 }, { "epoch": 6.18, "learning_rate": 1.2774955690726672e-05, "loss": 0.2171, "step": 10306000 }, { "epoch": 6.18, "learning_rate": 1.2772859925097227e-05, "loss": 0.2147, "step": 10306500 }, { "epoch": 6.18, "learning_rate": 1.2770759959536664e-05, "loss": 0.2131, "step": 10307000 }, { "epoch": 6.18, "learning_rate": 1.2768659993976099e-05, "loss": 0.2194, "step": 10307500 }, { "epoch": 6.18, "learning_rate": 1.2766560028415534e-05, "loss": 0.2176, "step": 10308000 }, { "epoch": 6.18, "learning_rate": 1.276446006285497e-05, "loss": 0.2128, "step": 10308500 }, { "epoch": 6.18, "learning_rate": 1.2762364297225525e-05, "loss": 0.2236, "step": 10309000 }, { "epoch": 6.18, "learning_rate": 1.2760264331664961e-05, "loss": 0.2138, "step": 10309500 }, { "epoch": 6.18, "learning_rate": 1.2758164366104397e-05, "loss": 0.2191, "step": 10310000 }, { "epoch": 6.18, "learning_rate": 1.275606440054383e-05, "loss": 0.2142, "step": 10310500 }, { "epoch": 6.18, "learning_rate": 1.2753964434983267e-05, "loss": 0.2194, "step": 10311000 }, { "epoch": 6.18, "learning_rate": 1.27518644694227e-05, "loss": 0.2171, "step": 10311500 }, { "epoch": 6.18, "learning_rate": 1.2749764503862136e-05, "loss": 0.2176, "step": 10312000 }, { "epoch": 6.18, "learning_rate": 1.2747664538301572e-05, "loss": 0.2155, "step": 10312500 }, { "epoch": 6.18, "learning_rate": 1.2745568772672128e-05, "loss": 0.2155, "step": 10313000 }, { "epoch": 6.18, "learning_rate": 1.2743473007042683e-05, "loss": 0.2166, "step": 10313500 }, { "epoch": 6.18, "learning_rate": 1.274137304148212e-05, "loss": 0.2171, "step": 10314000 }, { "epoch": 6.18, "learning_rate": 1.2739273075921555e-05, "loss": 0.2173, "step": 10314500 }, { "epoch": 6.18, "learning_rate": 1.273717311036099e-05, "loss": 0.2214, "step": 10315000 }, { "epoch": 6.18, "learning_rate": 1.2735073144800425e-05, "loss": 0.2154, "step": 10315500 }, { "epoch": 6.18, "learning_rate": 1.273297737917098e-05, "loss": 0.2175, "step": 10316000 }, { "epoch": 6.19, "learning_rate": 1.2730877413610417e-05, "loss": 0.2188, "step": 10316500 }, { "epoch": 6.19, "learning_rate": 1.2728781647980973e-05, "loss": 0.2239, "step": 10317000 }, { "epoch": 6.19, "learning_rate": 1.2726681682420408e-05, "loss": 0.2115, "step": 10317500 }, { "epoch": 6.19, "learning_rate": 1.2724581716859845e-05, "loss": 0.2192, "step": 10318000 }, { "epoch": 6.19, "learning_rate": 1.2722481751299278e-05, "loss": 0.2162, "step": 10318500 }, { "epoch": 6.19, "learning_rate": 1.2720381785738713e-05, "loss": 0.221, "step": 10319000 }, { "epoch": 6.19, "learning_rate": 1.271828182017815e-05, "loss": 0.2174, "step": 10319500 }, { "epoch": 6.19, "learning_rate": 1.2716181854617584e-05, "loss": 0.2201, "step": 10320000 }, { "epoch": 6.19, "learning_rate": 1.271408188905702e-05, "loss": 0.2152, "step": 10320500 }, { "epoch": 6.19, "learning_rate": 1.2711981923496454e-05, "loss": 0.2155, "step": 10321000 }, { "epoch": 6.19, "learning_rate": 1.270988615786701e-05, "loss": 0.2147, "step": 10321500 }, { "epoch": 6.19, "learning_rate": 1.2707786192306446e-05, "loss": 0.2177, "step": 10322000 }, { "epoch": 6.19, "learning_rate": 1.2705686226745881e-05, "loss": 0.2166, "step": 10322500 }, { "epoch": 6.19, "learning_rate": 1.2703586261185316e-05, "loss": 0.2172, "step": 10323000 }, { "epoch": 6.19, "learning_rate": 1.2701486295624751e-05, "loss": 0.2196, "step": 10323500 }, { "epoch": 6.19, "learning_rate": 1.2699386330064187e-05, "loss": 0.2211, "step": 10324000 }, { "epoch": 6.19, "learning_rate": 1.2697286364503622e-05, "loss": 0.2262, "step": 10324500 }, { "epoch": 6.19, "learning_rate": 1.2695186398943057e-05, "loss": 0.2171, "step": 10325000 }, { "epoch": 6.19, "learning_rate": 1.2693090633313614e-05, "loss": 0.2218, "step": 10325500 }, { "epoch": 6.19, "learning_rate": 1.2690990667753049e-05, "loss": 0.2181, "step": 10326000 }, { "epoch": 6.19, "learning_rate": 1.2688894902123606e-05, "loss": 0.2185, "step": 10326500 }, { "epoch": 6.19, "learning_rate": 1.268679493656304e-05, "loss": 0.2144, "step": 10327000 }, { "epoch": 6.19, "learning_rate": 1.2684694971002476e-05, "loss": 0.2144, "step": 10327500 }, { "epoch": 6.19, "learning_rate": 1.2682595005441911e-05, "loss": 0.215, "step": 10328000 }, { "epoch": 6.19, "learning_rate": 1.2680495039881345e-05, "loss": 0.2175, "step": 10328500 }, { "epoch": 6.19, "learning_rate": 1.2678395074320782e-05, "loss": 0.2193, "step": 10329000 }, { "epoch": 6.19, "learning_rate": 1.2676299308691337e-05, "loss": 0.221, "step": 10329500 }, { "epoch": 6.19, "learning_rate": 1.2674199343130772e-05, "loss": 0.2178, "step": 10330000 }, { "epoch": 6.19, "learning_rate": 1.2672099377570207e-05, "loss": 0.2153, "step": 10330500 }, { "epoch": 6.19, "learning_rate": 1.2669999412009642e-05, "loss": 0.2211, "step": 10331000 }, { "epoch": 6.19, "learning_rate": 1.2667899446449078e-05, "loss": 0.2178, "step": 10331500 }, { "epoch": 6.19, "learning_rate": 1.2665799480888513e-05, "loss": 0.2124, "step": 10332000 }, { "epoch": 6.19, "learning_rate": 1.2663699515327948e-05, "loss": 0.2162, "step": 10332500 }, { "epoch": 6.2, "learning_rate": 1.2661599549767385e-05, "loss": 0.2235, "step": 10333000 }, { "epoch": 6.2, "learning_rate": 1.265950378413794e-05, "loss": 0.2218, "step": 10333500 }, { "epoch": 6.2, "learning_rate": 1.2657403818577375e-05, "loss": 0.2143, "step": 10334000 }, { "epoch": 6.2, "learning_rate": 1.2655308052947932e-05, "loss": 0.2135, "step": 10334500 }, { "epoch": 6.2, "learning_rate": 1.2653208087387367e-05, "loss": 0.2162, "step": 10335000 }, { "epoch": 6.2, "learning_rate": 1.26511081218268e-05, "loss": 0.2175, "step": 10335500 }, { "epoch": 6.2, "learning_rate": 1.2649008156266238e-05, "loss": 0.2134, "step": 10336000 }, { "epoch": 6.2, "learning_rate": 1.2646908190705673e-05, "loss": 0.215, "step": 10336500 }, { "epoch": 6.2, "learning_rate": 1.2644808225145108e-05, "loss": 0.2189, "step": 10337000 }, { "epoch": 6.2, "learning_rate": 1.2642708259584543e-05, "loss": 0.2231, "step": 10337500 }, { "epoch": 6.2, "learning_rate": 1.2640612493955098e-05, "loss": 0.2222, "step": 10338000 }, { "epoch": 6.2, "learning_rate": 1.2638512528394535e-05, "loss": 0.2237, "step": 10338500 }, { "epoch": 6.2, "learning_rate": 1.263641256283397e-05, "loss": 0.2186, "step": 10339000 }, { "epoch": 6.2, "learning_rate": 1.2634312597273404e-05, "loss": 0.2147, "step": 10339500 }, { "epoch": 6.2, "learning_rate": 1.263221263171284e-05, "loss": 0.217, "step": 10340000 }, { "epoch": 6.2, "learning_rate": 1.2630116866083396e-05, "loss": 0.219, "step": 10340500 }, { "epoch": 6.2, "learning_rate": 1.2628016900522831e-05, "loss": 0.2146, "step": 10341000 }, { "epoch": 6.2, "learning_rate": 1.2625916934962266e-05, "loss": 0.2106, "step": 10341500 }, { "epoch": 6.2, "learning_rate": 1.2623816969401701e-05, "loss": 0.2228, "step": 10342000 }, { "epoch": 6.2, "learning_rate": 1.2621717003841136e-05, "loss": 0.2163, "step": 10342500 }, { "epoch": 6.2, "learning_rate": 1.2619621238211693e-05, "loss": 0.2178, "step": 10343000 }, { "epoch": 6.2, "learning_rate": 1.2617521272651129e-05, "loss": 0.2176, "step": 10343500 }, { "epoch": 6.2, "learning_rate": 1.2615421307090564e-05, "loss": 0.2188, "step": 10344000 }, { "epoch": 6.2, "learning_rate": 1.2613321341529999e-05, "loss": 0.2153, "step": 10344500 }, { "epoch": 6.2, "learning_rate": 1.2611221375969434e-05, "loss": 0.2174, "step": 10345000 }, { "epoch": 6.2, "learning_rate": 1.2609125610339991e-05, "loss": 0.214, "step": 10345500 }, { "epoch": 6.2, "learning_rate": 1.2607025644779426e-05, "loss": 0.217, "step": 10346000 }, { "epoch": 6.2, "learning_rate": 1.260492567921886e-05, "loss": 0.2112, "step": 10346500 }, { "epoch": 6.2, "learning_rate": 1.2602825713658296e-05, "loss": 0.2195, "step": 10347000 }, { "epoch": 6.2, "learning_rate": 1.2600725748097732e-05, "loss": 0.2111, "step": 10347500 }, { "epoch": 6.2, "learning_rate": 1.2598625782537167e-05, "loss": 0.213, "step": 10348000 }, { "epoch": 6.2, "learning_rate": 1.2596530016907724e-05, "loss": 0.2188, "step": 10348500 }, { "epoch": 6.2, "learning_rate": 1.2594430051347157e-05, "loss": 0.2148, "step": 10349000 }, { "epoch": 6.2, "learning_rate": 1.2592330085786594e-05, "loss": 0.2142, "step": 10349500 }, { "epoch": 6.21, "learning_rate": 1.2590230120226028e-05, "loss": 0.2191, "step": 10350000 }, { "epoch": 6.21, "learning_rate": 1.2588130154665463e-05, "loss": 0.2163, "step": 10350500 }, { "epoch": 6.21, "learning_rate": 1.25860301891049e-05, "loss": 0.2206, "step": 10351000 }, { "epoch": 6.21, "learning_rate": 1.2583934423475455e-05, "loss": 0.2186, "step": 10351500 }, { "epoch": 6.21, "learning_rate": 1.258183445791489e-05, "loss": 0.2129, "step": 10352000 }, { "epoch": 6.21, "learning_rate": 1.2579734492354325e-05, "loss": 0.2133, "step": 10352500 }, { "epoch": 6.21, "learning_rate": 1.257763452679376e-05, "loss": 0.2135, "step": 10353000 }, { "epoch": 6.21, "learning_rate": 1.2575534561233195e-05, "loss": 0.223, "step": 10353500 }, { "epoch": 6.21, "learning_rate": 1.257343459567263e-05, "loss": 0.2194, "step": 10354000 }, { "epoch": 6.21, "learning_rate": 1.2571334630112066e-05, "loss": 0.2137, "step": 10354500 }, { "epoch": 6.21, "learning_rate": 1.2569234664551503e-05, "loss": 0.219, "step": 10355000 }, { "epoch": 6.21, "learning_rate": 1.2567138898922058e-05, "loss": 0.2156, "step": 10355500 }, { "epoch": 6.21, "learning_rate": 1.2565038933361493e-05, "loss": 0.2137, "step": 10356000 }, { "epoch": 6.21, "learning_rate": 1.2562938967800928e-05, "loss": 0.2137, "step": 10356500 }, { "epoch": 6.21, "learning_rate": 1.2560839002240363e-05, "loss": 0.2249, "step": 10357000 }, { "epoch": 6.21, "learning_rate": 1.2558743236610919e-05, "loss": 0.2164, "step": 10357500 }, { "epoch": 6.21, "learning_rate": 1.2556643271050355e-05, "loss": 0.2208, "step": 10358000 }, { "epoch": 6.21, "learning_rate": 1.2554543305489789e-05, "loss": 0.2126, "step": 10358500 }, { "epoch": 6.21, "learning_rate": 1.2552443339929224e-05, "loss": 0.2201, "step": 10359000 }, { "epoch": 6.21, "learning_rate": 1.2550343374368661e-05, "loss": 0.2164, "step": 10359500 }, { "epoch": 6.21, "learning_rate": 1.2548243408808094e-05, "loss": 0.211, "step": 10360000 }, { "epoch": 6.21, "learning_rate": 1.2546143443247531e-05, "loss": 0.2206, "step": 10360500 }, { "epoch": 6.21, "learning_rate": 1.2544047677618086e-05, "loss": 0.2171, "step": 10361000 }, { "epoch": 6.21, "learning_rate": 1.2541947712057522e-05, "loss": 0.2161, "step": 10361500 }, { "epoch": 6.21, "learning_rate": 1.2539847746496958e-05, "loss": 0.2129, "step": 10362000 }, { "epoch": 6.21, "learning_rate": 1.2537747780936392e-05, "loss": 0.2116, "step": 10362500 }, { "epoch": 6.21, "learning_rate": 1.2535647815375827e-05, "loss": 0.2167, "step": 10363000 }, { "epoch": 6.21, "learning_rate": 1.2533547849815264e-05, "loss": 0.214, "step": 10363500 }, { "epoch": 6.21, "learning_rate": 1.2531447884254697e-05, "loss": 0.2122, "step": 10364000 }, { "epoch": 6.21, "learning_rate": 1.2529347918694134e-05, "loss": 0.2135, "step": 10364500 }, { "epoch": 6.21, "learning_rate": 1.2527256352995811e-05, "loss": 0.2138, "step": 10365000 }, { "epoch": 6.21, "learning_rate": 1.2525156387435246e-05, "loss": 0.2106, "step": 10365500 }, { "epoch": 6.21, "learning_rate": 1.2523056421874682e-05, "loss": 0.2123, "step": 10366000 }, { "epoch": 6.22, "learning_rate": 1.2520956456314117e-05, "loss": 0.216, "step": 10366500 }, { "epoch": 6.22, "learning_rate": 1.251885649075355e-05, "loss": 0.22, "step": 10367000 }, { "epoch": 6.22, "learning_rate": 1.2516756525192987e-05, "loss": 0.2192, "step": 10367500 }, { "epoch": 6.22, "learning_rate": 1.2514660759563542e-05, "loss": 0.2149, "step": 10368000 }, { "epoch": 6.22, "learning_rate": 1.2512560794002977e-05, "loss": 0.2203, "step": 10368500 }, { "epoch": 6.22, "learning_rate": 1.2510460828442414e-05, "loss": 0.2174, "step": 10369000 }, { "epoch": 6.22, "learning_rate": 1.2508360862881848e-05, "loss": 0.211, "step": 10369500 }, { "epoch": 6.22, "learning_rate": 1.2506260897321283e-05, "loss": 0.2175, "step": 10370000 }, { "epoch": 6.22, "learning_rate": 1.250416513169184e-05, "loss": 0.2145, "step": 10370500 }, { "epoch": 6.22, "learning_rate": 1.2502065166131275e-05, "loss": 0.2182, "step": 10371000 }, { "epoch": 6.22, "learning_rate": 1.249996520057071e-05, "loss": 0.2177, "step": 10371500 }, { "epoch": 6.22, "learning_rate": 1.2497865235010145e-05, "loss": 0.223, "step": 10372000 }, { "epoch": 6.22, "learning_rate": 1.249576526944958e-05, "loss": 0.2132, "step": 10372500 }, { "epoch": 6.22, "learning_rate": 1.2493665303889017e-05, "loss": 0.2141, "step": 10373000 }, { "epoch": 6.22, "learning_rate": 1.2491569538259573e-05, "loss": 0.2194, "step": 10373500 }, { "epoch": 6.22, "learning_rate": 1.2489469572699008e-05, "loss": 0.2132, "step": 10374000 }, { "epoch": 6.22, "learning_rate": 1.2487369607138443e-05, "loss": 0.215, "step": 10374500 }, { "epoch": 6.22, "learning_rate": 1.2485269641577878e-05, "loss": 0.2215, "step": 10375000 }, { "epoch": 6.22, "learning_rate": 1.2483173875948433e-05, "loss": 0.2162, "step": 10375500 }, { "epoch": 6.22, "learning_rate": 1.248107391038787e-05, "loss": 0.2113, "step": 10376000 }, { "epoch": 6.22, "learning_rate": 1.2478973944827305e-05, "loss": 0.2166, "step": 10376500 }, { "epoch": 6.22, "learning_rate": 1.2476873979266739e-05, "loss": 0.2163, "step": 10377000 }, { "epoch": 6.22, "learning_rate": 1.2474774013706176e-05, "loss": 0.2156, "step": 10377500 }, { "epoch": 6.22, "learning_rate": 1.2472674048145609e-05, "loss": 0.22, "step": 10378000 }, { "epoch": 6.22, "learning_rate": 1.2470574082585046e-05, "loss": 0.2151, "step": 10378500 }, { "epoch": 6.22, "learning_rate": 1.2468474117024481e-05, "loss": 0.2171, "step": 10379000 }, { "epoch": 6.22, "learning_rate": 1.2466378351395036e-05, "loss": 0.2179, "step": 10379500 }, { "epoch": 6.22, "learning_rate": 1.2464278385834473e-05, "loss": 0.2191, "step": 10380000 }, { "epoch": 6.22, "learning_rate": 1.2462182620205028e-05, "loss": 0.218, "step": 10380500 }, { "epoch": 6.22, "learning_rate": 1.2460082654644464e-05, "loss": 0.214, "step": 10381000 }, { "epoch": 6.22, "learning_rate": 1.2457982689083899e-05, "loss": 0.2143, "step": 10381500 }, { "epoch": 6.22, "learning_rate": 1.2455882723523334e-05, "loss": 0.2207, "step": 10382000 }, { "epoch": 6.22, "learning_rate": 1.2453782757962769e-05, "loss": 0.2207, "step": 10382500 }, { "epoch": 6.23, "learning_rate": 1.2451682792402204e-05, "loss": 0.2179, "step": 10383000 }, { "epoch": 6.23, "learning_rate": 1.244958282684164e-05, "loss": 0.2191, "step": 10383500 }, { "epoch": 6.23, "learning_rate": 1.2447487061212196e-05, "loss": 0.2109, "step": 10384000 }, { "epoch": 6.23, "learning_rate": 1.2445387095651632e-05, "loss": 0.2165, "step": 10384500 }, { "epoch": 6.23, "learning_rate": 1.2443287130091067e-05, "loss": 0.2162, "step": 10385000 }, { "epoch": 6.23, "learning_rate": 1.2441187164530502e-05, "loss": 0.2154, "step": 10385500 }, { "epoch": 6.23, "learning_rate": 1.2439091398901059e-05, "loss": 0.2189, "step": 10386000 }, { "epoch": 6.23, "learning_rate": 1.2436991433340492e-05, "loss": 0.2139, "step": 10386500 }, { "epoch": 6.23, "learning_rate": 1.2434891467779929e-05, "loss": 0.2185, "step": 10387000 }, { "epoch": 6.23, "learning_rate": 1.2432791502219363e-05, "loss": 0.2157, "step": 10387500 }, { "epoch": 6.23, "learning_rate": 1.2430691536658798e-05, "loss": 0.2164, "step": 10388000 }, { "epoch": 6.23, "learning_rate": 1.2428591571098235e-05, "loss": 0.2211, "step": 10388500 }, { "epoch": 6.23, "learning_rate": 1.242649580546879e-05, "loss": 0.2175, "step": 10389000 }, { "epoch": 6.23, "learning_rate": 1.2424395839908225e-05, "loss": 0.2188, "step": 10389500 }, { "epoch": 6.23, "learning_rate": 1.242229587434766e-05, "loss": 0.2194, "step": 10390000 }, { "epoch": 6.23, "learning_rate": 1.2420195908787095e-05, "loss": 0.2168, "step": 10390500 }, { "epoch": 6.23, "learning_rate": 1.2418095943226532e-05, "loss": 0.2126, "step": 10391000 }, { "epoch": 6.23, "learning_rate": 1.2415995977665966e-05, "loss": 0.2191, "step": 10391500 }, { "epoch": 6.23, "learning_rate": 1.24138960121054e-05, "loss": 0.2188, "step": 10392000 }, { "epoch": 6.23, "learning_rate": 1.2411800246475958e-05, "loss": 0.2168, "step": 10392500 }, { "epoch": 6.23, "learning_rate": 1.2409700280915393e-05, "loss": 0.2133, "step": 10393000 }, { "epoch": 6.23, "learning_rate": 1.2407600315354828e-05, "loss": 0.2182, "step": 10393500 }, { "epoch": 6.23, "learning_rate": 1.2405500349794263e-05, "loss": 0.2214, "step": 10394000 }, { "epoch": 6.23, "learning_rate": 1.240340458416482e-05, "loss": 0.2145, "step": 10394500 }, { "epoch": 6.23, "learning_rate": 1.2401304618604254e-05, "loss": 0.2163, "step": 10395000 }, { "epoch": 6.23, "learning_rate": 1.239920465304369e-05, "loss": 0.2125, "step": 10395500 }, { "epoch": 6.23, "learning_rate": 1.2397104687483124e-05, "loss": 0.2181, "step": 10396000 }, { "epoch": 6.23, "learning_rate": 1.239500472192256e-05, "loss": 0.2177, "step": 10396500 }, { "epoch": 6.23, "learning_rate": 1.2392904756361996e-05, "loss": 0.2131, "step": 10397000 }, { "epoch": 6.23, "learning_rate": 1.239080479080143e-05, "loss": 0.2167, "step": 10397500 }, { "epoch": 6.23, "learning_rate": 1.2388704825240866e-05, "loss": 0.2208, "step": 10398000 }, { "epoch": 6.23, "learning_rate": 1.2386609059611421e-05, "loss": 0.2157, "step": 10398500 }, { "epoch": 6.23, "learning_rate": 1.2384509094050857e-05, "loss": 0.2201, "step": 10399000 }, { "epoch": 6.23, "learning_rate": 1.2382413328421414e-05, "loss": 0.2186, "step": 10399500 }, { "epoch": 6.24, "learning_rate": 1.2380313362860849e-05, "loss": 0.2169, "step": 10400000 }, { "epoch": 6.24, "eval_loss": 0.20847462117671967, "eval_runtime": 1450.6848, "eval_samples_per_second": 363.084, "eval_steps_per_second": 60.514, "step": 10400000 }, { "epoch": 6.24, "learning_rate": 1.2378213397300284e-05, "loss": 0.215, "step": 10400500 }, { "epoch": 6.24, "learning_rate": 1.2376113431739719e-05, "loss": 0.2154, "step": 10401000 }, { "epoch": 6.24, "learning_rate": 1.2374013466179154e-05, "loss": 0.2187, "step": 10401500 }, { "epoch": 6.24, "learning_rate": 1.2371913500618591e-05, "loss": 0.2172, "step": 10402000 }, { "epoch": 6.24, "learning_rate": 1.2369813535058025e-05, "loss": 0.2228, "step": 10402500 }, { "epoch": 6.24, "learning_rate": 1.236771356949746e-05, "loss": 0.2156, "step": 10403000 }, { "epoch": 6.24, "learning_rate": 1.2365617803868017e-05, "loss": 0.2138, "step": 10403500 }, { "epoch": 6.24, "learning_rate": 1.2363517838307452e-05, "loss": 0.2076, "step": 10404000 }, { "epoch": 6.24, "learning_rate": 1.2361417872746885e-05, "loss": 0.2204, "step": 10404500 }, { "epoch": 6.24, "learning_rate": 1.2359317907186322e-05, "loss": 0.2205, "step": 10405000 }, { "epoch": 6.24, "learning_rate": 1.2357222141556877e-05, "loss": 0.224, "step": 10405500 }, { "epoch": 6.24, "learning_rate": 1.2355122175996313e-05, "loss": 0.2155, "step": 10406000 }, { "epoch": 6.24, "learning_rate": 1.235302221043575e-05, "loss": 0.2206, "step": 10406500 }, { "epoch": 6.24, "learning_rate": 1.2350922244875183e-05, "loss": 0.2097, "step": 10407000 }, { "epoch": 6.24, "learning_rate": 1.234882227931462e-05, "loss": 0.2135, "step": 10407500 }, { "epoch": 6.24, "learning_rate": 1.2346722313754055e-05, "loss": 0.2189, "step": 10408000 }, { "epoch": 6.24, "learning_rate": 1.234462654812461e-05, "loss": 0.2227, "step": 10408500 }, { "epoch": 6.24, "learning_rate": 1.2342526582564047e-05, "loss": 0.209, "step": 10409000 }, { "epoch": 6.24, "learning_rate": 1.234042661700348e-05, "loss": 0.216, "step": 10409500 }, { "epoch": 6.24, "learning_rate": 1.2338326651442916e-05, "loss": 0.2186, "step": 10410000 }, { "epoch": 6.24, "learning_rate": 1.2336226685882352e-05, "loss": 0.2155, "step": 10410500 }, { "epoch": 6.24, "learning_rate": 1.2334130920252908e-05, "loss": 0.2142, "step": 10411000 }, { "epoch": 6.24, "learning_rate": 1.2332030954692343e-05, "loss": 0.2146, "step": 10411500 }, { "epoch": 6.24, "learning_rate": 1.2329930989131778e-05, "loss": 0.2179, "step": 10412000 }, { "epoch": 6.24, "learning_rate": 1.2327831023571213e-05, "loss": 0.2216, "step": 10412500 }, { "epoch": 6.24, "learning_rate": 1.232573105801065e-05, "loss": 0.2179, "step": 10413000 }, { "epoch": 6.24, "learning_rate": 1.2323631092450083e-05, "loss": 0.2137, "step": 10413500 }, { "epoch": 6.24, "learning_rate": 1.2321531126889519e-05, "loss": 0.2157, "step": 10414000 }, { "epoch": 6.24, "learning_rate": 1.2319431161328954e-05, "loss": 0.2197, "step": 10414500 }, { "epoch": 6.24, "learning_rate": 1.231733539569951e-05, "loss": 0.2179, "step": 10415000 }, { "epoch": 6.24, "learning_rate": 1.2315235430138944e-05, "loss": 0.2207, "step": 10415500 }, { "epoch": 6.24, "learning_rate": 1.2313135464578381e-05, "loss": 0.2138, "step": 10416000 }, { "epoch": 6.25, "learning_rate": 1.2311035499017816e-05, "loss": 0.2187, "step": 10416500 }, { "epoch": 6.25, "learning_rate": 1.2308943933319493e-05, "loss": 0.2175, "step": 10417000 }, { "epoch": 6.25, "learning_rate": 1.2306843967758928e-05, "loss": 0.2208, "step": 10417500 }, { "epoch": 6.25, "learning_rate": 1.2304744002198364e-05, "loss": 0.2211, "step": 10418000 }, { "epoch": 6.25, "learning_rate": 1.2302644036637799e-05, "loss": 0.2195, "step": 10418500 }, { "epoch": 6.25, "learning_rate": 1.2300544071077234e-05, "loss": 0.2161, "step": 10419000 }, { "epoch": 6.25, "learning_rate": 1.2298444105516669e-05, "loss": 0.2181, "step": 10419500 }, { "epoch": 6.25, "learning_rate": 1.2296344139956106e-05, "loss": 0.2193, "step": 10420000 }, { "epoch": 6.25, "learning_rate": 1.229424417439554e-05, "loss": 0.2173, "step": 10420500 }, { "epoch": 6.25, "learning_rate": 1.2292148408766096e-05, "loss": 0.2147, "step": 10421000 }, { "epoch": 6.25, "learning_rate": 1.2290048443205531e-05, "loss": 0.2173, "step": 10421500 }, { "epoch": 6.25, "learning_rate": 1.2287948477644967e-05, "loss": 0.2149, "step": 10422000 }, { "epoch": 6.25, "learning_rate": 1.2285848512084402e-05, "loss": 0.215, "step": 10422500 }, { "epoch": 6.25, "learning_rate": 1.2283752746454959e-05, "loss": 0.2175, "step": 10423000 }, { "epoch": 6.25, "learning_rate": 1.2281652780894394e-05, "loss": 0.2175, "step": 10423500 }, { "epoch": 6.25, "learning_rate": 1.2279552815333827e-05, "loss": 0.2184, "step": 10424000 }, { "epoch": 6.25, "learning_rate": 1.2277452849773264e-05, "loss": 0.2181, "step": 10424500 }, { "epoch": 6.25, "learning_rate": 1.2275352884212698e-05, "loss": 0.2176, "step": 10425000 }, { "epoch": 6.25, "learning_rate": 1.2273252918652134e-05, "loss": 0.2187, "step": 10425500 }, { "epoch": 6.25, "learning_rate": 1.227115295309157e-05, "loss": 0.2159, "step": 10426000 }, { "epoch": 6.25, "learning_rate": 1.2269052987531003e-05, "loss": 0.212, "step": 10426500 }, { "epoch": 6.25, "learning_rate": 1.2266957221901562e-05, "loss": 0.2104, "step": 10427000 }, { "epoch": 6.25, "learning_rate": 1.2264857256340995e-05, "loss": 0.2212, "step": 10427500 }, { "epoch": 6.25, "learning_rate": 1.226275729078043e-05, "loss": 0.2208, "step": 10428000 }, { "epoch": 6.25, "learning_rate": 1.2260657325219867e-05, "loss": 0.2134, "step": 10428500 }, { "epoch": 6.25, "learning_rate": 1.2258561559590422e-05, "loss": 0.2208, "step": 10429000 }, { "epoch": 6.25, "learning_rate": 1.2256461594029858e-05, "loss": 0.216, "step": 10429500 }, { "epoch": 6.25, "learning_rate": 1.2254365828400415e-05, "loss": 0.211, "step": 10430000 }, { "epoch": 6.25, "learning_rate": 1.225226586283985e-05, "loss": 0.2176, "step": 10430500 }, { "epoch": 6.25, "learning_rate": 1.2250165897279283e-05, "loss": 0.2168, "step": 10431000 }, { "epoch": 6.25, "learning_rate": 1.224806593171872e-05, "loss": 0.2222, "step": 10431500 }, { "epoch": 6.25, "learning_rate": 1.2245965966158155e-05, "loss": 0.2193, "step": 10432000 }, { "epoch": 6.25, "learning_rate": 1.224387020052871e-05, "loss": 0.2226, "step": 10432500 }, { "epoch": 6.26, "learning_rate": 1.2241770234968147e-05, "loss": 0.2198, "step": 10433000 }, { "epoch": 6.26, "learning_rate": 1.223967026940758e-05, "loss": 0.2148, "step": 10433500 }, { "epoch": 6.26, "learning_rate": 1.2237570303847018e-05, "loss": 0.2169, "step": 10434000 }, { "epoch": 6.26, "learning_rate": 1.2235470338286451e-05, "loss": 0.2155, "step": 10434500 }, { "epoch": 6.26, "learning_rate": 1.2233370372725886e-05, "loss": 0.216, "step": 10435000 }, { "epoch": 6.26, "learning_rate": 1.2231270407165323e-05, "loss": 0.2148, "step": 10435500 }, { "epoch": 6.26, "learning_rate": 1.2229170441604757e-05, "loss": 0.211, "step": 10436000 }, { "epoch": 6.26, "learning_rate": 1.2227074675975313e-05, "loss": 0.2174, "step": 10436500 }, { "epoch": 6.26, "learning_rate": 1.222497891034587e-05, "loss": 0.219, "step": 10437000 }, { "epoch": 6.26, "learning_rate": 1.2222878944785306e-05, "loss": 0.218, "step": 10437500 }, { "epoch": 6.26, "learning_rate": 1.222077897922474e-05, "loss": 0.2153, "step": 10438000 }, { "epoch": 6.26, "learning_rate": 1.2218679013664176e-05, "loss": 0.2149, "step": 10438500 }, { "epoch": 6.26, "learning_rate": 1.2216579048103611e-05, "loss": 0.2196, "step": 10439000 }, { "epoch": 6.26, "learning_rate": 1.2214479082543046e-05, "loss": 0.218, "step": 10439500 }, { "epoch": 6.26, "learning_rate": 1.2212383316913603e-05, "loss": 0.2212, "step": 10440000 }, { "epoch": 6.26, "learning_rate": 1.2210283351353037e-05, "loss": 0.216, "step": 10440500 }, { "epoch": 6.26, "learning_rate": 1.2208183385792473e-05, "loss": 0.2142, "step": 10441000 }, { "epoch": 6.26, "learning_rate": 1.2206083420231909e-05, "loss": 0.2148, "step": 10441500 }, { "epoch": 6.26, "learning_rate": 1.2203983454671342e-05, "loss": 0.2134, "step": 10442000 }, { "epoch": 6.26, "learning_rate": 1.2201883489110779e-05, "loss": 0.2184, "step": 10442500 }, { "epoch": 6.26, "learning_rate": 1.2199783523550212e-05, "loss": 0.2171, "step": 10443000 }, { "epoch": 6.26, "learning_rate": 1.219768355798965e-05, "loss": 0.2152, "step": 10443500 }, { "epoch": 6.26, "learning_rate": 1.2195591992291326e-05, "loss": 0.2144, "step": 10444000 }, { "epoch": 6.26, "learning_rate": 1.2193492026730761e-05, "loss": 0.222, "step": 10444500 }, { "epoch": 6.26, "learning_rate": 1.2191392061170197e-05, "loss": 0.2129, "step": 10445000 }, { "epoch": 6.26, "learning_rate": 1.2189292095609632e-05, "loss": 0.2171, "step": 10445500 }, { "epoch": 6.26, "learning_rate": 1.2187192130049067e-05, "loss": 0.2166, "step": 10446000 }, { "epoch": 6.26, "learning_rate": 1.2185092164488502e-05, "loss": 0.2118, "step": 10446500 }, { "epoch": 6.26, "learning_rate": 1.2182996398859059e-05, "loss": 0.2139, "step": 10447000 }, { "epoch": 6.26, "learning_rate": 1.2180896433298493e-05, "loss": 0.2169, "step": 10447500 }, { "epoch": 6.26, "learning_rate": 1.217879646773793e-05, "loss": 0.2197, "step": 10448000 }, { "epoch": 6.26, "learning_rate": 1.2176696502177365e-05, "loss": 0.2104, "step": 10448500 }, { "epoch": 6.26, "learning_rate": 1.2174596536616798e-05, "loss": 0.2157, "step": 10449000 }, { "epoch": 6.26, "learning_rate": 1.2172496571056235e-05, "loss": 0.2171, "step": 10449500 }, { "epoch": 6.27, "learning_rate": 1.217039660549567e-05, "loss": 0.2144, "step": 10450000 }, { "epoch": 6.27, "learning_rate": 1.2168300839866225e-05, "loss": 0.2086, "step": 10450500 }, { "epoch": 6.27, "learning_rate": 1.2166200874305662e-05, "loss": 0.2127, "step": 10451000 }, { "epoch": 6.27, "learning_rate": 1.2164100908745096e-05, "loss": 0.2159, "step": 10451500 }, { "epoch": 6.27, "learning_rate": 1.2162000943184532e-05, "loss": 0.2139, "step": 10452000 }, { "epoch": 6.27, "learning_rate": 1.2159900977623968e-05, "loss": 0.2158, "step": 10452500 }, { "epoch": 6.27, "learning_rate": 1.2157801012063401e-05, "loss": 0.2164, "step": 10453000 }, { "epoch": 6.27, "learning_rate": 1.2155701046502838e-05, "loss": 0.2195, "step": 10453500 }, { "epoch": 6.27, "learning_rate": 1.2153605280873393e-05, "loss": 0.2153, "step": 10454000 }, { "epoch": 6.27, "learning_rate": 1.2151505315312828e-05, "loss": 0.2168, "step": 10454500 }, { "epoch": 6.27, "learning_rate": 1.2149405349752263e-05, "loss": 0.2113, "step": 10455000 }, { "epoch": 6.27, "learning_rate": 1.2147305384191699e-05, "loss": 0.2192, "step": 10455500 }, { "epoch": 6.27, "learning_rate": 1.2145205418631135e-05, "loss": 0.2184, "step": 10456000 }, { "epoch": 6.27, "learning_rate": 1.2143105453070569e-05, "loss": 0.2198, "step": 10456500 }, { "epoch": 6.27, "learning_rate": 1.2141005487510004e-05, "loss": 0.2169, "step": 10457000 }, { "epoch": 6.27, "learning_rate": 1.2138905521949441e-05, "loss": 0.2118, "step": 10457500 }, { "epoch": 6.27, "learning_rate": 1.2136809756319996e-05, "loss": 0.2149, "step": 10458000 }, { "epoch": 6.27, "learning_rate": 1.2134709790759431e-05, "loss": 0.2194, "step": 10458500 }, { "epoch": 6.27, "learning_rate": 1.2132609825198866e-05, "loss": 0.2185, "step": 10459000 }, { "epoch": 6.27, "learning_rate": 1.2130514059569423e-05, "loss": 0.2178, "step": 10459500 }, { "epoch": 6.27, "learning_rate": 1.2128414094008857e-05, "loss": 0.212, "step": 10460000 }, { "epoch": 6.27, "learning_rate": 1.2126314128448294e-05, "loss": 0.214, "step": 10460500 }, { "epoch": 6.27, "learning_rate": 1.2124214162887729e-05, "loss": 0.2135, "step": 10461000 }, { "epoch": 6.27, "learning_rate": 1.2122114197327164e-05, "loss": 0.2173, "step": 10461500 }, { "epoch": 6.27, "learning_rate": 1.21200142317666e-05, "loss": 0.2161, "step": 10462000 }, { "epoch": 6.27, "learning_rate": 1.2117918466137154e-05, "loss": 0.215, "step": 10462500 }, { "epoch": 6.27, "learning_rate": 1.2115818500576591e-05, "loss": 0.2123, "step": 10463000 }, { "epoch": 6.27, "learning_rate": 1.2113718535016025e-05, "loss": 0.2191, "step": 10463500 }, { "epoch": 6.27, "learning_rate": 1.211161856945546e-05, "loss": 0.2238, "step": 10464000 }, { "epoch": 6.27, "learning_rate": 1.2109518603894897e-05, "loss": 0.214, "step": 10464500 }, { "epoch": 6.27, "learning_rate": 1.210741863833433e-05, "loss": 0.2204, "step": 10465000 }, { "epoch": 6.27, "learning_rate": 1.2105318672773767e-05, "loss": 0.2181, "step": 10465500 }, { "epoch": 6.27, "learning_rate": 1.2103222907144322e-05, "loss": 0.2147, "step": 10466000 }, { "epoch": 6.28, "learning_rate": 1.2101122941583757e-05, "loss": 0.2193, "step": 10466500 }, { "epoch": 6.28, "learning_rate": 1.2099022976023194e-05, "loss": 0.219, "step": 10467000 }, { "epoch": 6.28, "learning_rate": 1.2096923010462628e-05, "loss": 0.2174, "step": 10467500 }, { "epoch": 6.28, "learning_rate": 1.2094823044902063e-05, "loss": 0.2221, "step": 10468000 }, { "epoch": 6.28, "learning_rate": 1.20927230793415e-05, "loss": 0.2176, "step": 10468500 }, { "epoch": 6.28, "learning_rate": 1.2090623113780933e-05, "loss": 0.2149, "step": 10469000 }, { "epoch": 6.28, "learning_rate": 1.208852314822037e-05, "loss": 0.2222, "step": 10469500 }, { "epoch": 6.28, "learning_rate": 1.2086427382590925e-05, "loss": 0.2142, "step": 10470000 }, { "epoch": 6.28, "learning_rate": 1.208432741703036e-05, "loss": 0.217, "step": 10470500 }, { "epoch": 6.28, "learning_rate": 1.2082231651400916e-05, "loss": 0.2136, "step": 10471000 }, { "epoch": 6.28, "learning_rate": 1.2080131685840353e-05, "loss": 0.2102, "step": 10471500 }, { "epoch": 6.28, "learning_rate": 1.2078031720279786e-05, "loss": 0.2205, "step": 10472000 }, { "epoch": 6.28, "learning_rate": 1.2075931754719223e-05, "loss": 0.2201, "step": 10472500 }, { "epoch": 6.28, "learning_rate": 1.2073835989089778e-05, "loss": 0.2232, "step": 10473000 }, { "epoch": 6.28, "learning_rate": 1.2071736023529213e-05, "loss": 0.2156, "step": 10473500 }, { "epoch": 6.28, "learning_rate": 1.206963605796865e-05, "loss": 0.2176, "step": 10474000 }, { "epoch": 6.28, "learning_rate": 1.2067536092408084e-05, "loss": 0.2161, "step": 10474500 }, { "epoch": 6.28, "learning_rate": 1.2065436126847519e-05, "loss": 0.2135, "step": 10475000 }, { "epoch": 6.28, "learning_rate": 1.2063340361218076e-05, "loss": 0.2152, "step": 10475500 }, { "epoch": 6.28, "learning_rate": 1.2061240395657511e-05, "loss": 0.2115, "step": 10476000 }, { "epoch": 6.28, "learning_rate": 1.2059140430096946e-05, "loss": 0.2147, "step": 10476500 }, { "epoch": 6.28, "learning_rate": 1.2057040464536381e-05, "loss": 0.2164, "step": 10477000 }, { "epoch": 6.28, "learning_rate": 1.2054940498975816e-05, "loss": 0.2169, "step": 10477500 }, { "epoch": 6.28, "learning_rate": 1.2052840533415253e-05, "loss": 0.2133, "step": 10478000 }, { "epoch": 6.28, "learning_rate": 1.2050740567854687e-05, "loss": 0.216, "step": 10478500 }, { "epoch": 6.28, "learning_rate": 1.2048640602294122e-05, "loss": 0.2131, "step": 10479000 }, { "epoch": 6.28, "learning_rate": 1.2046544836664679e-05, "loss": 0.2145, "step": 10479500 }, { "epoch": 6.28, "learning_rate": 1.2044444871104114e-05, "loss": 0.2115, "step": 10480000 }, { "epoch": 6.28, "learning_rate": 1.204234910547467e-05, "loss": 0.2152, "step": 10480500 }, { "epoch": 6.28, "learning_rate": 1.2040249139914106e-05, "loss": 0.218, "step": 10481000 }, { "epoch": 6.28, "learning_rate": 1.2038149174353541e-05, "loss": 0.2203, "step": 10481500 }, { "epoch": 6.28, "learning_rate": 1.2036049208792975e-05, "loss": 0.2137, "step": 10482000 }, { "epoch": 6.28, "learning_rate": 1.2033949243232412e-05, "loss": 0.2164, "step": 10482500 }, { "epoch": 6.28, "learning_rate": 1.2031849277671845e-05, "loss": 0.2208, "step": 10483000 }, { "epoch": 6.29, "learning_rate": 1.2029749312111282e-05, "loss": 0.2217, "step": 10483500 }, { "epoch": 6.29, "learning_rate": 1.2027653546481837e-05, "loss": 0.2141, "step": 10484000 }, { "epoch": 6.29, "learning_rate": 1.2025553580921272e-05, "loss": 0.2179, "step": 10484500 }, { "epoch": 6.29, "learning_rate": 1.2023453615360709e-05, "loss": 0.2165, "step": 10485000 }, { "epoch": 6.29, "learning_rate": 1.2021353649800143e-05, "loss": 0.2168, "step": 10485500 }, { "epoch": 6.29, "learning_rate": 1.2019253684239578e-05, "loss": 0.2094, "step": 10486000 }, { "epoch": 6.29, "learning_rate": 1.2017153718679015e-05, "loss": 0.2151, "step": 10486500 }, { "epoch": 6.29, "learning_rate": 1.201505795304957e-05, "loss": 0.2174, "step": 10487000 }, { "epoch": 6.29, "learning_rate": 1.2012957987489005e-05, "loss": 0.2197, "step": 10487500 }, { "epoch": 6.29, "learning_rate": 1.201085802192844e-05, "loss": 0.2227, "step": 10488000 }, { "epoch": 6.29, "learning_rate": 1.2008758056367875e-05, "loss": 0.2176, "step": 10488500 }, { "epoch": 6.29, "learning_rate": 1.2006658090807312e-05, "loss": 0.2131, "step": 10489000 }, { "epoch": 6.29, "learning_rate": 1.2004558125246746e-05, "loss": 0.2122, "step": 10489500 }, { "epoch": 6.29, "learning_rate": 1.200245815968618e-05, "loss": 0.2226, "step": 10490000 }, { "epoch": 6.29, "learning_rate": 1.2000358194125616e-05, "loss": 0.2189, "step": 10490500 }, { "epoch": 6.29, "learning_rate": 1.1998262428496173e-05, "loss": 0.2155, "step": 10491000 }, { "epoch": 6.29, "learning_rate": 1.1996162462935606e-05, "loss": 0.2187, "step": 10491500 }, { "epoch": 6.29, "learning_rate": 1.1994062497375043e-05, "loss": 0.2165, "step": 10492000 }, { "epoch": 6.29, "learning_rate": 1.1991962531814478e-05, "loss": 0.2231, "step": 10492500 }, { "epoch": 6.29, "learning_rate": 1.1989866766185034e-05, "loss": 0.2101, "step": 10493000 }, { "epoch": 6.29, "learning_rate": 1.198776680062447e-05, "loss": 0.2122, "step": 10493500 }, { "epoch": 6.29, "learning_rate": 1.1985671034995026e-05, "loss": 0.2139, "step": 10494000 }, { "epoch": 6.29, "learning_rate": 1.1983571069434461e-05, "loss": 0.2163, "step": 10494500 }, { "epoch": 6.29, "learning_rate": 1.1981471103873896e-05, "loss": 0.218, "step": 10495000 }, { "epoch": 6.29, "learning_rate": 1.1979371138313331e-05, "loss": 0.218, "step": 10495500 }, { "epoch": 6.29, "learning_rate": 1.1977271172752768e-05, "loss": 0.2178, "step": 10496000 }, { "epoch": 6.29, "learning_rate": 1.1975171207192201e-05, "loss": 0.2171, "step": 10496500 }, { "epoch": 6.29, "learning_rate": 1.1973071241631637e-05, "loss": 0.2213, "step": 10497000 }, { "epoch": 6.29, "learning_rate": 1.1970971276071073e-05, "loss": 0.216, "step": 10497500 }, { "epoch": 6.29, "learning_rate": 1.1968871310510507e-05, "loss": 0.2122, "step": 10498000 }, { "epoch": 6.29, "learning_rate": 1.1966775544881064e-05, "loss": 0.2176, "step": 10498500 }, { "epoch": 6.29, "learning_rate": 1.1964675579320499e-05, "loss": 0.2106, "step": 10499000 }, { "epoch": 6.29, "learning_rate": 1.1962575613759934e-05, "loss": 0.2199, "step": 10499500 }, { "epoch": 6.3, "learning_rate": 1.196047564819937e-05, "loss": 0.2196, "step": 10500000 }, { "epoch": 6.3, "eval_loss": 0.20738865435123444, "eval_runtime": 1452.8622, "eval_samples_per_second": 362.54, "eval_steps_per_second": 60.423, "step": 10500000 }, { "epoch": 6.3, "learning_rate": 1.1958375682638805e-05, "loss": 0.2127, "step": 10500500 }, { "epoch": 6.3, "learning_rate": 1.195627571707824e-05, "loss": 0.2187, "step": 10501000 }, { "epoch": 6.3, "learning_rate": 1.1954175751517675e-05, "loss": 0.219, "step": 10501500 }, { "epoch": 6.3, "learning_rate": 1.1952079985888232e-05, "loss": 0.2188, "step": 10502000 }, { "epoch": 6.3, "learning_rate": 1.1949980020327665e-05, "loss": 0.2136, "step": 10502500 }, { "epoch": 6.3, "learning_rate": 1.1947880054767102e-05, "loss": 0.2138, "step": 10503000 }, { "epoch": 6.3, "learning_rate": 1.1945780089206537e-05, "loss": 0.2124, "step": 10503500 }, { "epoch": 6.3, "learning_rate": 1.1943680123645972e-05, "loss": 0.217, "step": 10504000 }, { "epoch": 6.3, "learning_rate": 1.1941580158085408e-05, "loss": 0.2146, "step": 10504500 }, { "epoch": 6.3, "learning_rate": 1.1939480192524843e-05, "loss": 0.2162, "step": 10505000 }, { "epoch": 6.3, "learning_rate": 1.1937380226964278e-05, "loss": 0.2144, "step": 10505500 }, { "epoch": 6.3, "learning_rate": 1.1935284461334835e-05, "loss": 0.2144, "step": 10506000 }, { "epoch": 6.3, "learning_rate": 1.193318869570539e-05, "loss": 0.216, "step": 10506500 }, { "epoch": 6.3, "learning_rate": 1.1931088730144827e-05, "loss": 0.2154, "step": 10507000 }, { "epoch": 6.3, "learning_rate": 1.192898876458426e-05, "loss": 0.2139, "step": 10507500 }, { "epoch": 6.3, "learning_rate": 1.1926888799023696e-05, "loss": 0.2122, "step": 10508000 }, { "epoch": 6.3, "learning_rate": 1.192478883346313e-05, "loss": 0.2137, "step": 10508500 }, { "epoch": 6.3, "learning_rate": 1.1922693067833688e-05, "loss": 0.2127, "step": 10509000 }, { "epoch": 6.3, "learning_rate": 1.1920593102273121e-05, "loss": 0.2153, "step": 10509500 }, { "epoch": 6.3, "learning_rate": 1.1918493136712558e-05, "loss": 0.2141, "step": 10510000 }, { "epoch": 6.3, "learning_rate": 1.1916393171151993e-05, "loss": 0.216, "step": 10510500 }, { "epoch": 6.3, "learning_rate": 1.1914293205591428e-05, "loss": 0.2154, "step": 10511000 }, { "epoch": 6.3, "learning_rate": 1.1912193240030863e-05, "loss": 0.2137, "step": 10511500 }, { "epoch": 6.3, "learning_rate": 1.1910097474401419e-05, "loss": 0.2223, "step": 10512000 }, { "epoch": 6.3, "learning_rate": 1.1907997508840856e-05, "loss": 0.2165, "step": 10512500 }, { "epoch": 6.3, "learning_rate": 1.190589754328029e-05, "loss": 0.2195, "step": 10513000 }, { "epoch": 6.3, "learning_rate": 1.1903797577719724e-05, "loss": 0.2162, "step": 10513500 }, { "epoch": 6.3, "learning_rate": 1.1901697612159161e-05, "loss": 0.212, "step": 10514000 }, { "epoch": 6.3, "learning_rate": 1.1899597646598596e-05, "loss": 0.216, "step": 10514500 }, { "epoch": 6.3, "learning_rate": 1.189749768103803e-05, "loss": 0.218, "step": 10515000 }, { "epoch": 6.3, "learning_rate": 1.1895401915408588e-05, "loss": 0.2214, "step": 10515500 }, { "epoch": 6.3, "learning_rate": 1.1893301949848022e-05, "loss": 0.2149, "step": 10516000 }, { "epoch": 6.31, "learning_rate": 1.1891201984287457e-05, "loss": 0.2192, "step": 10516500 }, { "epoch": 6.31, "learning_rate": 1.1889102018726894e-05, "loss": 0.2182, "step": 10517000 }, { "epoch": 6.31, "learning_rate": 1.1887006253097449e-05, "loss": 0.2179, "step": 10517500 }, { "epoch": 6.31, "learning_rate": 1.1884906287536884e-05, "loss": 0.2204, "step": 10518000 }, { "epoch": 6.31, "learning_rate": 1.188280632197632e-05, "loss": 0.2192, "step": 10518500 }, { "epoch": 6.31, "learning_rate": 1.1880706356415754e-05, "loss": 0.2188, "step": 10519000 }, { "epoch": 6.31, "learning_rate": 1.187860639085519e-05, "loss": 0.2228, "step": 10519500 }, { "epoch": 6.31, "learning_rate": 1.1876510625225747e-05, "loss": 0.2176, "step": 10520000 }, { "epoch": 6.31, "learning_rate": 1.187441065966518e-05, "loss": 0.2128, "step": 10520500 }, { "epoch": 6.31, "learning_rate": 1.1872310694104617e-05, "loss": 0.2191, "step": 10521000 }, { "epoch": 6.31, "learning_rate": 1.1870210728544052e-05, "loss": 0.2195, "step": 10521500 }, { "epoch": 6.31, "learning_rate": 1.1868110762983487e-05, "loss": 0.2188, "step": 10522000 }, { "epoch": 6.31, "learning_rate": 1.1866010797422922e-05, "loss": 0.2174, "step": 10522500 }, { "epoch": 6.31, "learning_rate": 1.1863910831862358e-05, "loss": 0.2168, "step": 10523000 }, { "epoch": 6.31, "learning_rate": 1.1861815066232914e-05, "loss": 0.2188, "step": 10523500 }, { "epoch": 6.31, "learning_rate": 1.185971510067235e-05, "loss": 0.2174, "step": 10524000 }, { "epoch": 6.31, "learning_rate": 1.1857615135111783e-05, "loss": 0.2099, "step": 10524500 }, { "epoch": 6.31, "learning_rate": 1.185551516955122e-05, "loss": 0.2152, "step": 10525000 }, { "epoch": 6.31, "learning_rate": 1.1853415203990655e-05, "loss": 0.2178, "step": 10525500 }, { "epoch": 6.31, "learning_rate": 1.185131943836121e-05, "loss": 0.2177, "step": 10526000 }, { "epoch": 6.31, "learning_rate": 1.1849219472800647e-05, "loss": 0.2176, "step": 10526500 }, { "epoch": 6.31, "learning_rate": 1.184711950724008e-05, "loss": 0.2168, "step": 10527000 }, { "epoch": 6.31, "learning_rate": 1.1845019541679516e-05, "loss": 0.2193, "step": 10527500 }, { "epoch": 6.31, "learning_rate": 1.1842919576118951e-05, "loss": 0.2187, "step": 10528000 }, { "epoch": 6.31, "learning_rate": 1.1840823810489508e-05, "loss": 0.2142, "step": 10528500 }, { "epoch": 6.31, "learning_rate": 1.1838723844928943e-05, "loss": 0.2161, "step": 10529000 }, { "epoch": 6.31, "learning_rate": 1.1836623879368378e-05, "loss": 0.2184, "step": 10529500 }, { "epoch": 6.31, "learning_rate": 1.1834523913807813e-05, "loss": 0.2139, "step": 10530000 }, { "epoch": 6.31, "learning_rate": 1.1832423948247249e-05, "loss": 0.2164, "step": 10530500 }, { "epoch": 6.31, "learning_rate": 1.1830328182617806e-05, "loss": 0.2143, "step": 10531000 }, { "epoch": 6.31, "learning_rate": 1.1828228217057239e-05, "loss": 0.2168, "step": 10531500 }, { "epoch": 6.31, "learning_rate": 1.1826128251496676e-05, "loss": 0.2258, "step": 10532000 }, { "epoch": 6.31, "learning_rate": 1.1824028285936111e-05, "loss": 0.213, "step": 10532500 }, { "epoch": 6.31, "learning_rate": 1.1821928320375544e-05, "loss": 0.2098, "step": 10533000 }, { "epoch": 6.32, "learning_rate": 1.1819832554746103e-05, "loss": 0.2135, "step": 10533500 }, { "epoch": 6.32, "learning_rate": 1.1817732589185537e-05, "loss": 0.2142, "step": 10534000 }, { "epoch": 6.32, "learning_rate": 1.1815632623624972e-05, "loss": 0.2162, "step": 10534500 }, { "epoch": 6.32, "learning_rate": 1.1813532658064409e-05, "loss": 0.2191, "step": 10535000 }, { "epoch": 6.32, "learning_rate": 1.1811432692503842e-05, "loss": 0.2164, "step": 10535500 }, { "epoch": 6.32, "learning_rate": 1.1809332726943279e-05, "loss": 0.2168, "step": 10536000 }, { "epoch": 6.32, "learning_rate": 1.1807232761382712e-05, "loss": 0.2131, "step": 10536500 }, { "epoch": 6.32, "learning_rate": 1.1805132795822147e-05, "loss": 0.218, "step": 10537000 }, { "epoch": 6.32, "learning_rate": 1.1803041230123826e-05, "loss": 0.2188, "step": 10537500 }, { "epoch": 6.32, "learning_rate": 1.1800941264563261e-05, "loss": 0.2203, "step": 10538000 }, { "epoch": 6.32, "learning_rate": 1.1798841299002695e-05, "loss": 0.221, "step": 10538500 }, { "epoch": 6.32, "learning_rate": 1.1796741333442132e-05, "loss": 0.216, "step": 10539000 }, { "epoch": 6.32, "learning_rate": 1.1794641367881567e-05, "loss": 0.2138, "step": 10539500 }, { "epoch": 6.32, "learning_rate": 1.1792545602252122e-05, "loss": 0.2135, "step": 10540000 }, { "epoch": 6.32, "learning_rate": 1.1790445636691559e-05, "loss": 0.2128, "step": 10540500 }, { "epoch": 6.32, "learning_rate": 1.1788345671130992e-05, "loss": 0.2134, "step": 10541000 }, { "epoch": 6.32, "learning_rate": 1.178624570557043e-05, "loss": 0.2122, "step": 10541500 }, { "epoch": 6.32, "learning_rate": 1.1784145740009864e-05, "loss": 0.2163, "step": 10542000 }, { "epoch": 6.32, "learning_rate": 1.1782045774449298e-05, "loss": 0.2098, "step": 10542500 }, { "epoch": 6.32, "learning_rate": 1.1779945808888735e-05, "loss": 0.2178, "step": 10543000 }, { "epoch": 6.32, "learning_rate": 1.177785004325929e-05, "loss": 0.2113, "step": 10543500 }, { "epoch": 6.32, "learning_rate": 1.1775750077698725e-05, "loss": 0.2163, "step": 10544000 }, { "epoch": 6.32, "learning_rate": 1.1773650112138162e-05, "loss": 0.2195, "step": 10544500 }, { "epoch": 6.32, "learning_rate": 1.1771550146577595e-05, "loss": 0.2166, "step": 10545000 }, { "epoch": 6.32, "learning_rate": 1.1769454380948152e-05, "loss": 0.2213, "step": 10545500 }, { "epoch": 6.32, "learning_rate": 1.1767354415387588e-05, "loss": 0.216, "step": 10546000 }, { "epoch": 6.32, "learning_rate": 1.1765254449827023e-05, "loss": 0.2149, "step": 10546500 }, { "epoch": 6.32, "learning_rate": 1.1763154484266458e-05, "loss": 0.2144, "step": 10547000 }, { "epoch": 6.32, "learning_rate": 1.1761054518705893e-05, "loss": 0.2138, "step": 10547500 }, { "epoch": 6.32, "learning_rate": 1.1758954553145328e-05, "loss": 0.216, "step": 10548000 }, { "epoch": 6.32, "learning_rate": 1.1756854587584763e-05, "loss": 0.2126, "step": 10548500 }, { "epoch": 6.32, "learning_rate": 1.1754754622024198e-05, "loss": 0.2135, "step": 10549000 }, { "epoch": 6.32, "learning_rate": 1.1752658856394754e-05, "loss": 0.2174, "step": 10549500 }, { "epoch": 6.33, "learning_rate": 1.175055889083419e-05, "loss": 0.2144, "step": 10550000 }, { "epoch": 6.33, "learning_rate": 1.1748458925273626e-05, "loss": 0.2171, "step": 10550500 }, { "epoch": 6.33, "learning_rate": 1.174635895971306e-05, "loss": 0.2191, "step": 10551000 }, { "epoch": 6.33, "learning_rate": 1.1744263194083618e-05, "loss": 0.2202, "step": 10551500 }, { "epoch": 6.33, "learning_rate": 1.1742167428454173e-05, "loss": 0.2152, "step": 10552000 }, { "epoch": 6.33, "learning_rate": 1.1740067462893608e-05, "loss": 0.2157, "step": 10552500 }, { "epoch": 6.33, "learning_rate": 1.1737967497333043e-05, "loss": 0.2178, "step": 10553000 }, { "epoch": 6.33, "learning_rate": 1.1735867531772479e-05, "loss": 0.2173, "step": 10553500 }, { "epoch": 6.33, "learning_rate": 1.1733771766143034e-05, "loss": 0.2196, "step": 10554000 }, { "epoch": 6.33, "learning_rate": 1.173167180058247e-05, "loss": 0.2192, "step": 10554500 }, { "epoch": 6.33, "learning_rate": 1.1729571835021906e-05, "loss": 0.2097, "step": 10555000 }, { "epoch": 6.33, "learning_rate": 1.1727471869461341e-05, "loss": 0.2187, "step": 10555500 }, { "epoch": 6.33, "learning_rate": 1.1725376103831898e-05, "loss": 0.2183, "step": 10556000 }, { "epoch": 6.33, "learning_rate": 1.1723276138271331e-05, "loss": 0.2106, "step": 10556500 }, { "epoch": 6.33, "learning_rate": 1.1721176172710768e-05, "loss": 0.2176, "step": 10557000 }, { "epoch": 6.33, "learning_rate": 1.1719076207150203e-05, "loss": 0.2158, "step": 10557500 }, { "epoch": 6.33, "learning_rate": 1.1716976241589637e-05, "loss": 0.2136, "step": 10558000 }, { "epoch": 6.33, "learning_rate": 1.1714876276029074e-05, "loss": 0.2225, "step": 10558500 }, { "epoch": 6.33, "learning_rate": 1.1712780510399629e-05, "loss": 0.214, "step": 10559000 }, { "epoch": 6.33, "learning_rate": 1.1710680544839064e-05, "loss": 0.2146, "step": 10559500 }, { "epoch": 6.33, "learning_rate": 1.17085805792785e-05, "loss": 0.2184, "step": 10560000 }, { "epoch": 6.33, "learning_rate": 1.1706480613717934e-05, "loss": 0.2163, "step": 10560500 }, { "epoch": 6.33, "learning_rate": 1.1704380648157371e-05, "loss": 0.2119, "step": 10561000 }, { "epoch": 6.33, "learning_rate": 1.1702280682596805e-05, "loss": 0.2181, "step": 10561500 }, { "epoch": 6.33, "learning_rate": 1.170018071703624e-05, "loss": 0.2095, "step": 10562000 }, { "epoch": 6.33, "learning_rate": 1.1698080751475677e-05, "loss": 0.2184, "step": 10562500 }, { "epoch": 6.33, "learning_rate": 1.1695984985846232e-05, "loss": 0.2249, "step": 10563000 }, { "epoch": 6.33, "learning_rate": 1.1693885020285667e-05, "loss": 0.214, "step": 10563500 }, { "epoch": 6.33, "learning_rate": 1.1691789254656224e-05, "loss": 0.2156, "step": 10564000 }, { "epoch": 6.33, "learning_rate": 1.168968928909566e-05, "loss": 0.2164, "step": 10564500 }, { "epoch": 6.33, "learning_rate": 1.1687589323535093e-05, "loss": 0.2237, "step": 10565000 }, { "epoch": 6.33, "learning_rate": 1.168548935797453e-05, "loss": 0.2198, "step": 10565500 }, { "epoch": 6.33, "learning_rate": 1.1683389392413965e-05, "loss": 0.2193, "step": 10566000 }, { "epoch": 6.34, "learning_rate": 1.16812894268534e-05, "loss": 0.2123, "step": 10566500 }, { "epoch": 6.34, "learning_rate": 1.1679189461292835e-05, "loss": 0.2196, "step": 10567000 }, { "epoch": 6.34, "learning_rate": 1.1677089495732269e-05, "loss": 0.2114, "step": 10567500 }, { "epoch": 6.34, "learning_rate": 1.1674993730102827e-05, "loss": 0.2094, "step": 10568000 }, { "epoch": 6.34, "learning_rate": 1.167289376454226e-05, "loss": 0.2112, "step": 10568500 }, { "epoch": 6.34, "learning_rate": 1.1670797998912818e-05, "loss": 0.2108, "step": 10569000 }, { "epoch": 6.34, "learning_rate": 1.1668698033352253e-05, "loss": 0.2154, "step": 10569500 }, { "epoch": 6.34, "learning_rate": 1.1666598067791688e-05, "loss": 0.218, "step": 10570000 }, { "epoch": 6.34, "learning_rate": 1.1664498102231123e-05, "loss": 0.215, "step": 10570500 }, { "epoch": 6.34, "learning_rate": 1.1662398136670558e-05, "loss": 0.2115, "step": 10571000 }, { "epoch": 6.34, "learning_rate": 1.1660298171109993e-05, "loss": 0.2134, "step": 10571500 }, { "epoch": 6.34, "learning_rate": 1.1658198205549429e-05, "loss": 0.2196, "step": 10572000 }, { "epoch": 6.34, "learning_rate": 1.1656098239988864e-05, "loss": 0.2176, "step": 10572500 }, { "epoch": 6.34, "learning_rate": 1.165400247435942e-05, "loss": 0.2239, "step": 10573000 }, { "epoch": 6.34, "learning_rate": 1.1651902508798856e-05, "loss": 0.2176, "step": 10573500 }, { "epoch": 6.34, "learning_rate": 1.1649802543238291e-05, "loss": 0.2162, "step": 10574000 }, { "epoch": 6.34, "learning_rate": 1.1647702577677726e-05, "loss": 0.2173, "step": 10574500 }, { "epoch": 6.34, "learning_rate": 1.1645602612117161e-05, "loss": 0.2184, "step": 10575000 }, { "epoch": 6.34, "learning_rate": 1.1643502646556596e-05, "loss": 0.2153, "step": 10575500 }, { "epoch": 6.34, "learning_rate": 1.1641402680996032e-05, "loss": 0.2108, "step": 10576000 }, { "epoch": 6.34, "learning_rate": 1.1639306915366589e-05, "loss": 0.2212, "step": 10576500 }, { "epoch": 6.34, "learning_rate": 1.1637206949806022e-05, "loss": 0.2121, "step": 10577000 }, { "epoch": 6.34, "learning_rate": 1.1635106984245459e-05, "loss": 0.2139, "step": 10577500 }, { "epoch": 6.34, "learning_rate": 1.1633007018684894e-05, "loss": 0.2195, "step": 10578000 }, { "epoch": 6.34, "learning_rate": 1.163091125305545e-05, "loss": 0.2102, "step": 10578500 }, { "epoch": 6.34, "learning_rate": 1.1628811287494886e-05, "loss": 0.2141, "step": 10579000 }, { "epoch": 6.34, "learning_rate": 1.162671132193432e-05, "loss": 0.2169, "step": 10579500 }, { "epoch": 6.34, "learning_rate": 1.1624611356373755e-05, "loss": 0.2171, "step": 10580000 }, { "epoch": 6.34, "learning_rate": 1.1622511390813192e-05, "loss": 0.2173, "step": 10580500 }, { "epoch": 6.34, "learning_rate": 1.1620415625183747e-05, "loss": 0.2162, "step": 10581000 }, { "epoch": 6.34, "learning_rate": 1.1618315659623182e-05, "loss": 0.2097, "step": 10581500 }, { "epoch": 6.34, "learning_rate": 1.1616215694062617e-05, "loss": 0.2095, "step": 10582000 }, { "epoch": 6.34, "learning_rate": 1.1614115728502052e-05, "loss": 0.2194, "step": 10582500 }, { "epoch": 6.34, "learning_rate": 1.1612015762941487e-05, "loss": 0.2139, "step": 10583000 }, { "epoch": 6.35, "learning_rate": 1.1609915797380923e-05, "loss": 0.2164, "step": 10583500 }, { "epoch": 6.35, "learning_rate": 1.1607815831820358e-05, "loss": 0.2182, "step": 10584000 }, { "epoch": 6.35, "learning_rate": 1.1605715866259793e-05, "loss": 0.2168, "step": 10584500 }, { "epoch": 6.35, "learning_rate": 1.160362010063035e-05, "loss": 0.2168, "step": 10585000 }, { "epoch": 6.35, "learning_rate": 1.1601520135069783e-05, "loss": 0.2122, "step": 10585500 }, { "epoch": 6.35, "learning_rate": 1.159942016950922e-05, "loss": 0.215, "step": 10586000 }, { "epoch": 6.35, "learning_rate": 1.1597324403879777e-05, "loss": 0.2194, "step": 10586500 }, { "epoch": 6.35, "learning_rate": 1.159522443831921e-05, "loss": 0.2209, "step": 10587000 }, { "epoch": 6.35, "learning_rate": 1.1593124472758647e-05, "loss": 0.2162, "step": 10587500 }, { "epoch": 6.35, "learning_rate": 1.1591024507198081e-05, "loss": 0.2159, "step": 10588000 }, { "epoch": 6.35, "learning_rate": 1.1588924541637516e-05, "loss": 0.215, "step": 10588500 }, { "epoch": 6.35, "learning_rate": 1.1586824576076953e-05, "loss": 0.2134, "step": 10589000 }, { "epoch": 6.35, "learning_rate": 1.1584724610516386e-05, "loss": 0.2161, "step": 10589500 }, { "epoch": 6.35, "learning_rate": 1.1582624644955823e-05, "loss": 0.2175, "step": 10590000 }, { "epoch": 6.35, "learning_rate": 1.1580528879326378e-05, "loss": 0.223, "step": 10590500 }, { "epoch": 6.35, "learning_rate": 1.1578428913765814e-05, "loss": 0.2174, "step": 10591000 }, { "epoch": 6.35, "learning_rate": 1.157632894820525e-05, "loss": 0.2129, "step": 10591500 }, { "epoch": 6.35, "learning_rate": 1.1574228982644684e-05, "loss": 0.213, "step": 10592000 }, { "epoch": 6.35, "learning_rate": 1.1572133217015241e-05, "loss": 0.2183, "step": 10592500 }, { "epoch": 6.35, "learning_rate": 1.1570033251454676e-05, "loss": 0.2188, "step": 10593000 }, { "epoch": 6.35, "learning_rate": 1.1567933285894111e-05, "loss": 0.2182, "step": 10593500 }, { "epoch": 6.35, "learning_rate": 1.1565833320333546e-05, "loss": 0.2239, "step": 10594000 }, { "epoch": 6.35, "learning_rate": 1.1563737554704103e-05, "loss": 0.2206, "step": 10594500 }, { "epoch": 6.35, "learning_rate": 1.1561637589143538e-05, "loss": 0.2183, "step": 10595000 }, { "epoch": 6.35, "learning_rate": 1.1559537623582974e-05, "loss": 0.2203, "step": 10595500 }, { "epoch": 6.35, "learning_rate": 1.1557437658022409e-05, "loss": 0.219, "step": 10596000 }, { "epoch": 6.35, "learning_rate": 1.1555341892392964e-05, "loss": 0.217, "step": 10596500 }, { "epoch": 6.35, "learning_rate": 1.1553246126763521e-05, "loss": 0.215, "step": 10597000 }, { "epoch": 6.35, "learning_rate": 1.1551146161202956e-05, "loss": 0.2143, "step": 10597500 }, { "epoch": 6.35, "learning_rate": 1.1549046195642391e-05, "loss": 0.2094, "step": 10598000 }, { "epoch": 6.35, "learning_rate": 1.1546946230081826e-05, "loss": 0.2117, "step": 10598500 }, { "epoch": 6.35, "learning_rate": 1.1544846264521262e-05, "loss": 0.2151, "step": 10599000 }, { "epoch": 6.35, "learning_rate": 1.1542750498891817e-05, "loss": 0.2131, "step": 10599500 }, { "epoch": 6.36, "learning_rate": 1.1540650533331254e-05, "loss": 0.2167, "step": 10600000 }, { "epoch": 6.36, "eval_loss": 0.2069634646177292, "eval_runtime": 1450.9738, "eval_samples_per_second": 363.011, "eval_steps_per_second": 60.502, "step": 10600000 }, { "epoch": 6.36, "learning_rate": 1.1538550567770689e-05, "loss": 0.2176, "step": 10600500 }, { "epoch": 6.36, "learning_rate": 1.1536450602210122e-05, "loss": 0.2132, "step": 10601000 }, { "epoch": 6.36, "learning_rate": 1.1534354836580681e-05, "loss": 0.2187, "step": 10601500 }, { "epoch": 6.36, "learning_rate": 1.1532254871020114e-05, "loss": 0.2198, "step": 10602000 }, { "epoch": 6.36, "learning_rate": 1.153015490545955e-05, "loss": 0.217, "step": 10602500 }, { "epoch": 6.36, "learning_rate": 1.1528054939898986e-05, "loss": 0.2174, "step": 10603000 }, { "epoch": 6.36, "learning_rate": 1.152595497433842e-05, "loss": 0.2143, "step": 10603500 }, { "epoch": 6.36, "learning_rate": 1.1523855008777857e-05, "loss": 0.2113, "step": 10604000 }, { "epoch": 6.36, "learning_rate": 1.1521755043217292e-05, "loss": 0.2222, "step": 10604500 }, { "epoch": 6.36, "learning_rate": 1.1519659277587847e-05, "loss": 0.2203, "step": 10605000 }, { "epoch": 6.36, "learning_rate": 1.1517559312027284e-05, "loss": 0.2186, "step": 10605500 }, { "epoch": 6.36, "learning_rate": 1.1515459346466718e-05, "loss": 0.217, "step": 10606000 }, { "epoch": 6.36, "learning_rate": 1.1513359380906153e-05, "loss": 0.22, "step": 10606500 }, { "epoch": 6.36, "learning_rate": 1.1511259415345588e-05, "loss": 0.215, "step": 10607000 }, { "epoch": 6.36, "learning_rate": 1.1509159449785023e-05, "loss": 0.2202, "step": 10607500 }, { "epoch": 6.36, "learning_rate": 1.1507059484224458e-05, "loss": 0.2196, "step": 10608000 }, { "epoch": 6.36, "learning_rate": 1.1504959518663893e-05, "loss": 0.2157, "step": 10608500 }, { "epoch": 6.36, "learning_rate": 1.1502859553103328e-05, "loss": 0.2224, "step": 10609000 }, { "epoch": 6.36, "learning_rate": 1.1500763787473885e-05, "loss": 0.218, "step": 10609500 }, { "epoch": 6.36, "learning_rate": 1.149866382191332e-05, "loss": 0.2167, "step": 10610000 }, { "epoch": 6.36, "learning_rate": 1.1496563856352756e-05, "loss": 0.2193, "step": 10610500 }, { "epoch": 6.36, "learning_rate": 1.1494468090723313e-05, "loss": 0.2173, "step": 10611000 }, { "epoch": 6.36, "learning_rate": 1.1492368125162748e-05, "loss": 0.2176, "step": 10611500 }, { "epoch": 6.36, "learning_rate": 1.1490268159602181e-05, "loss": 0.2154, "step": 10612000 }, { "epoch": 6.36, "learning_rate": 1.1488168194041618e-05, "loss": 0.2162, "step": 10612500 }, { "epoch": 6.36, "learning_rate": 1.1486068228481053e-05, "loss": 0.2199, "step": 10613000 }, { "epoch": 6.36, "learning_rate": 1.1483968262920488e-05, "loss": 0.2164, "step": 10613500 }, { "epoch": 6.36, "learning_rate": 1.1481872497291045e-05, "loss": 0.2101, "step": 10614000 }, { "epoch": 6.36, "learning_rate": 1.1479772531730479e-05, "loss": 0.2171, "step": 10614500 }, { "epoch": 6.36, "learning_rate": 1.1477672566169916e-05, "loss": 0.2173, "step": 10615000 }, { "epoch": 6.36, "learning_rate": 1.1475572600609349e-05, "loss": 0.2199, "step": 10615500 }, { "epoch": 6.36, "learning_rate": 1.1473472635048784e-05, "loss": 0.2093, "step": 10616000 }, { "epoch": 6.37, "learning_rate": 1.1471372669488221e-05, "loss": 0.2193, "step": 10616500 }, { "epoch": 6.37, "learning_rate": 1.1469272703927655e-05, "loss": 0.2164, "step": 10617000 }, { "epoch": 6.37, "learning_rate": 1.1467176938298212e-05, "loss": 0.2138, "step": 10617500 }, { "epoch": 6.37, "learning_rate": 1.1465076972737647e-05, "loss": 0.2129, "step": 10618000 }, { "epoch": 6.37, "learning_rate": 1.1462977007177082e-05, "loss": 0.2207, "step": 10618500 }, { "epoch": 6.37, "learning_rate": 1.1460877041616517e-05, "loss": 0.2128, "step": 10619000 }, { "epoch": 6.37, "learning_rate": 1.1458781275987074e-05, "loss": 0.2221, "step": 10619500 }, { "epoch": 6.37, "learning_rate": 1.1456681310426509e-05, "loss": 0.2179, "step": 10620000 }, { "epoch": 6.37, "learning_rate": 1.1454581344865944e-05, "loss": 0.2226, "step": 10620500 }, { "epoch": 6.37, "learning_rate": 1.145248137930538e-05, "loss": 0.2165, "step": 10621000 }, { "epoch": 6.37, "learning_rate": 1.1450385613675935e-05, "loss": 0.2153, "step": 10621500 }, { "epoch": 6.37, "learning_rate": 1.1448285648115372e-05, "loss": 0.2147, "step": 10622000 }, { "epoch": 6.37, "learning_rate": 1.1446185682554807e-05, "loss": 0.2119, "step": 10622500 }, { "epoch": 6.37, "learning_rate": 1.144408571699424e-05, "loss": 0.2184, "step": 10623000 }, { "epoch": 6.37, "learning_rate": 1.1441985751433677e-05, "loss": 0.2141, "step": 10623500 }, { "epoch": 6.37, "learning_rate": 1.1439885785873112e-05, "loss": 0.2198, "step": 10624000 }, { "epoch": 6.37, "learning_rate": 1.1437785820312546e-05, "loss": 0.2178, "step": 10624500 }, { "epoch": 6.37, "learning_rate": 1.1435685854751982e-05, "loss": 0.2118, "step": 10625000 }, { "epoch": 6.37, "learning_rate": 1.1433585889191416e-05, "loss": 0.2149, "step": 10625500 }, { "epoch": 6.37, "learning_rate": 1.1431485923630853e-05, "loss": 0.2145, "step": 10626000 }, { "epoch": 6.37, "learning_rate": 1.1429385958070288e-05, "loss": 0.2183, "step": 10626500 }, { "epoch": 6.37, "learning_rate": 1.1427285992509721e-05, "loss": 0.2173, "step": 10627000 }, { "epoch": 6.37, "learning_rate": 1.142519022688028e-05, "loss": 0.2198, "step": 10627500 }, { "epoch": 6.37, "learning_rate": 1.1423090261319714e-05, "loss": 0.2138, "step": 10628000 }, { "epoch": 6.37, "learning_rate": 1.1420990295759149e-05, "loss": 0.2111, "step": 10628500 }, { "epoch": 6.37, "learning_rate": 1.1418894530129706e-05, "loss": 0.2188, "step": 10629000 }, { "epoch": 6.37, "learning_rate": 1.141679456456914e-05, "loss": 0.2218, "step": 10629500 }, { "epoch": 6.37, "learning_rate": 1.1414694599008576e-05, "loss": 0.2173, "step": 10630000 }, { "epoch": 6.37, "learning_rate": 1.1412594633448011e-05, "loss": 0.2182, "step": 10630500 }, { "epoch": 6.37, "learning_rate": 1.1410498867818568e-05, "loss": 0.2158, "step": 10631000 }, { "epoch": 6.37, "learning_rate": 1.1408398902258003e-05, "loss": 0.2144, "step": 10631500 }, { "epoch": 6.37, "learning_rate": 1.1406298936697438e-05, "loss": 0.2128, "step": 10632000 }, { "epoch": 6.37, "learning_rate": 1.1404198971136874e-05, "loss": 0.2129, "step": 10632500 }, { "epoch": 6.37, "learning_rate": 1.1402099005576309e-05, "loss": 0.2165, "step": 10633000 }, { "epoch": 6.38, "learning_rate": 1.1400003239946866e-05, "loss": 0.2168, "step": 10633500 }, { "epoch": 6.38, "learning_rate": 1.1397903274386299e-05, "loss": 0.217, "step": 10634000 }, { "epoch": 6.38, "learning_rate": 1.1395803308825736e-05, "loss": 0.2184, "step": 10634500 }, { "epoch": 6.38, "learning_rate": 1.139370334326517e-05, "loss": 0.2141, "step": 10635000 }, { "epoch": 6.38, "learning_rate": 1.1391603377704605e-05, "loss": 0.2167, "step": 10635500 }, { "epoch": 6.38, "learning_rate": 1.1389503412144041e-05, "loss": 0.2179, "step": 10636000 }, { "epoch": 6.38, "learning_rate": 1.1387403446583475e-05, "loss": 0.2172, "step": 10636500 }, { "epoch": 6.38, "learning_rate": 1.1385303481022912e-05, "loss": 0.219, "step": 10637000 }, { "epoch": 6.38, "learning_rate": 1.1383207715393467e-05, "loss": 0.2124, "step": 10637500 }, { "epoch": 6.38, "learning_rate": 1.1381107749832902e-05, "loss": 0.2156, "step": 10638000 }, { "epoch": 6.38, "learning_rate": 1.1379011984203459e-05, "loss": 0.2141, "step": 10638500 }, { "epoch": 6.38, "learning_rate": 1.1376912018642894e-05, "loss": 0.2143, "step": 10639000 }, { "epoch": 6.38, "learning_rate": 1.137481205308233e-05, "loss": 0.2166, "step": 10639500 }, { "epoch": 6.38, "learning_rate": 1.1372712087521765e-05, "loss": 0.2171, "step": 10640000 }, { "epoch": 6.38, "learning_rate": 1.13706121219612e-05, "loss": 0.2139, "step": 10640500 }, { "epoch": 6.38, "learning_rate": 1.1368516356331755e-05, "loss": 0.2162, "step": 10641000 }, { "epoch": 6.38, "learning_rate": 1.1366416390771192e-05, "loss": 0.2121, "step": 10641500 }, { "epoch": 6.38, "learning_rate": 1.1364316425210627e-05, "loss": 0.2166, "step": 10642000 }, { "epoch": 6.38, "learning_rate": 1.136221645965006e-05, "loss": 0.2212, "step": 10642500 }, { "epoch": 6.38, "learning_rate": 1.1360116494089497e-05, "loss": 0.2126, "step": 10643000 }, { "epoch": 6.38, "learning_rate": 1.135801652852893e-05, "loss": 0.2164, "step": 10643500 }, { "epoch": 6.38, "learning_rate": 1.1355916562968368e-05, "loss": 0.2121, "step": 10644000 }, { "epoch": 6.38, "learning_rate": 1.1353816597407803e-05, "loss": 0.2123, "step": 10644500 }, { "epoch": 6.38, "learning_rate": 1.1351720831778358e-05, "loss": 0.2166, "step": 10645000 }, { "epoch": 6.38, "learning_rate": 1.1349620866217795e-05, "loss": 0.2178, "step": 10645500 }, { "epoch": 6.38, "learning_rate": 1.1347520900657228e-05, "loss": 0.2166, "step": 10646000 }, { "epoch": 6.38, "learning_rate": 1.1345420935096663e-05, "loss": 0.2135, "step": 10646500 }, { "epoch": 6.38, "learning_rate": 1.134332516946722e-05, "loss": 0.2174, "step": 10647000 }, { "epoch": 6.38, "learning_rate": 1.1341225203906656e-05, "loss": 0.2184, "step": 10647500 }, { "epoch": 6.38, "learning_rate": 1.133912523834609e-05, "loss": 0.2188, "step": 10648000 }, { "epoch": 6.38, "learning_rate": 1.1337025272785526e-05, "loss": 0.2174, "step": 10648500 }, { "epoch": 6.38, "learning_rate": 1.1334925307224961e-05, "loss": 0.2096, "step": 10649000 }, { "epoch": 6.38, "learning_rate": 1.1332825341664398e-05, "loss": 0.2196, "step": 10649500 }, { "epoch": 6.39, "learning_rate": 1.1330729576034953e-05, "loss": 0.2185, "step": 10650000 }, { "epoch": 6.39, "learning_rate": 1.1328629610474388e-05, "loss": 0.2204, "step": 10650500 }, { "epoch": 6.39, "learning_rate": 1.1326529644913823e-05, "loss": 0.214, "step": 10651000 }, { "epoch": 6.39, "learning_rate": 1.1324429679353259e-05, "loss": 0.2131, "step": 10651500 }, { "epoch": 6.39, "learning_rate": 1.1322329713792692e-05, "loss": 0.2179, "step": 10652000 }, { "epoch": 6.39, "learning_rate": 1.1320229748232129e-05, "loss": 0.2148, "step": 10652500 }, { "epoch": 6.39, "learning_rate": 1.1318129782671564e-05, "loss": 0.216, "step": 10653000 }, { "epoch": 6.39, "learning_rate": 1.1316029817111e-05, "loss": 0.2184, "step": 10653500 }, { "epoch": 6.39, "learning_rate": 1.1313934051481556e-05, "loss": 0.2151, "step": 10654000 }, { "epoch": 6.39, "learning_rate": 1.1311838285852111e-05, "loss": 0.2191, "step": 10654500 }, { "epoch": 6.39, "learning_rate": 1.1309738320291547e-05, "loss": 0.2201, "step": 10655000 }, { "epoch": 6.39, "learning_rate": 1.1307638354730982e-05, "loss": 0.2165, "step": 10655500 }, { "epoch": 6.39, "learning_rate": 1.1305538389170417e-05, "loss": 0.2212, "step": 10656000 }, { "epoch": 6.39, "learning_rate": 1.1303442623540974e-05, "loss": 0.2213, "step": 10656500 }, { "epoch": 6.39, "learning_rate": 1.1301342657980409e-05, "loss": 0.2178, "step": 10657000 }, { "epoch": 6.39, "learning_rate": 1.1299242692419844e-05, "loss": 0.2123, "step": 10657500 }, { "epoch": 6.39, "learning_rate": 1.129714272685928e-05, "loss": 0.2183, "step": 10658000 }, { "epoch": 6.39, "learning_rate": 1.1295042761298714e-05, "loss": 0.217, "step": 10658500 }, { "epoch": 6.39, "learning_rate": 1.129294279573815e-05, "loss": 0.2159, "step": 10659000 }, { "epoch": 6.39, "learning_rate": 1.1290847030108707e-05, "loss": 0.2132, "step": 10659500 }, { "epoch": 6.39, "learning_rate": 1.1288747064548142e-05, "loss": 0.2157, "step": 10660000 }, { "epoch": 6.39, "learning_rate": 1.1286647098987575e-05, "loss": 0.2186, "step": 10660500 }, { "epoch": 6.39, "learning_rate": 1.1284547133427012e-05, "loss": 0.2159, "step": 10661000 }, { "epoch": 6.39, "learning_rate": 1.1282447167866447e-05, "loss": 0.2139, "step": 10661500 }, { "epoch": 6.39, "learning_rate": 1.1280347202305882e-05, "loss": 0.216, "step": 10662000 }, { "epoch": 6.39, "learning_rate": 1.1278247236745318e-05, "loss": 0.2107, "step": 10662500 }, { "epoch": 6.39, "learning_rate": 1.1276151471115873e-05, "loss": 0.2157, "step": 10663000 }, { "epoch": 6.39, "learning_rate": 1.127405150555531e-05, "loss": 0.2149, "step": 10663500 }, { "epoch": 6.39, "learning_rate": 1.1271951539994743e-05, "loss": 0.2167, "step": 10664000 }, { "epoch": 6.39, "learning_rate": 1.12698557743653e-05, "loss": 0.2215, "step": 10664500 }, { "epoch": 6.39, "learning_rate": 1.1267755808804735e-05, "loss": 0.2198, "step": 10665000 }, { "epoch": 6.39, "learning_rate": 1.126565584324417e-05, "loss": 0.2175, "step": 10665500 }, { "epoch": 6.39, "learning_rate": 1.1263555877683606e-05, "loss": 0.215, "step": 10666000 }, { "epoch": 6.39, "learning_rate": 1.126145591212304e-05, "loss": 0.2185, "step": 10666500 }, { "epoch": 6.4, "learning_rate": 1.1259355946562476e-05, "loss": 0.2179, "step": 10667000 }, { "epoch": 6.4, "learning_rate": 1.1257255981001913e-05, "loss": 0.2211, "step": 10667500 }, { "epoch": 6.4, "learning_rate": 1.1255156015441346e-05, "loss": 0.2187, "step": 10668000 }, { "epoch": 6.4, "learning_rate": 1.1253060249811903e-05, "loss": 0.2184, "step": 10668500 }, { "epoch": 6.4, "learning_rate": 1.1250960284251338e-05, "loss": 0.2142, "step": 10669000 }, { "epoch": 6.4, "learning_rate": 1.1248860318690773e-05, "loss": 0.2148, "step": 10669500 }, { "epoch": 6.4, "learning_rate": 1.1246760353130209e-05, "loss": 0.2169, "step": 10670000 }, { "epoch": 6.4, "learning_rate": 1.1244660387569644e-05, "loss": 0.2163, "step": 10670500 }, { "epoch": 6.4, "learning_rate": 1.1242560422009079e-05, "loss": 0.2186, "step": 10671000 }, { "epoch": 6.4, "learning_rate": 1.1240464656379634e-05, "loss": 0.2198, "step": 10671500 }, { "epoch": 6.4, "learning_rate": 1.1238364690819071e-05, "loss": 0.2163, "step": 10672000 }, { "epoch": 6.4, "learning_rate": 1.1236264725258504e-05, "loss": 0.2173, "step": 10672500 }, { "epoch": 6.4, "learning_rate": 1.1234164759697941e-05, "loss": 0.222, "step": 10673000 }, { "epoch": 6.4, "learning_rate": 1.1232064794137376e-05, "loss": 0.2129, "step": 10673500 }, { "epoch": 6.4, "learning_rate": 1.122996482857681e-05, "loss": 0.2197, "step": 10674000 }, { "epoch": 6.4, "learning_rate": 1.1227864863016247e-05, "loss": 0.2131, "step": 10674500 }, { "epoch": 6.4, "learning_rate": 1.1225769097386802e-05, "loss": 0.2192, "step": 10675000 }, { "epoch": 6.4, "learning_rate": 1.1223669131826237e-05, "loss": 0.2172, "step": 10675500 }, { "epoch": 6.4, "learning_rate": 1.1221569166265674e-05, "loss": 0.2109, "step": 10676000 }, { "epoch": 6.4, "learning_rate": 1.1219469200705107e-05, "loss": 0.2146, "step": 10676500 }, { "epoch": 6.4, "learning_rate": 1.1217369235144544e-05, "loss": 0.215, "step": 10677000 }, { "epoch": 6.4, "learning_rate": 1.121526926958398e-05, "loss": 0.2188, "step": 10677500 }, { "epoch": 6.4, "learning_rate": 1.1213173503954535e-05, "loss": 0.2154, "step": 10678000 }, { "epoch": 6.4, "learning_rate": 1.1211073538393972e-05, "loss": 0.2185, "step": 10678500 }, { "epoch": 6.4, "learning_rate": 1.1208973572833405e-05, "loss": 0.2168, "step": 10679000 }, { "epoch": 6.4, "learning_rate": 1.120687360727284e-05, "loss": 0.2183, "step": 10679500 }, { "epoch": 6.4, "learning_rate": 1.1204773641712275e-05, "loss": 0.2201, "step": 10680000 }, { "epoch": 6.4, "learning_rate": 1.120267367615171e-05, "loss": 0.217, "step": 10680500 }, { "epoch": 6.4, "learning_rate": 1.1200573710591147e-05, "loss": 0.2165, "step": 10681000 }, { "epoch": 6.4, "learning_rate": 1.1198477944961703e-05, "loss": 0.221, "step": 10681500 }, { "epoch": 6.4, "learning_rate": 1.1196377979401138e-05, "loss": 0.2197, "step": 10682000 }, { "epoch": 6.4, "learning_rate": 1.1194278013840573e-05, "loss": 0.2147, "step": 10682500 }, { "epoch": 6.4, "learning_rate": 1.1192178048280008e-05, "loss": 0.2163, "step": 10683000 }, { "epoch": 6.41, "learning_rate": 1.1190078082719443e-05, "loss": 0.213, "step": 10683500 }, { "epoch": 6.41, "learning_rate": 1.118798231709e-05, "loss": 0.2136, "step": 10684000 }, { "epoch": 6.41, "learning_rate": 1.1185882351529435e-05, "loss": 0.216, "step": 10684500 }, { "epoch": 6.41, "learning_rate": 1.1183782385968869e-05, "loss": 0.2187, "step": 10685000 }, { "epoch": 6.41, "learning_rate": 1.1181682420408306e-05, "loss": 0.2145, "step": 10685500 }, { "epoch": 6.41, "learning_rate": 1.1179586654778861e-05, "loss": 0.2113, "step": 10686000 }, { "epoch": 6.41, "learning_rate": 1.1177490889149418e-05, "loss": 0.2155, "step": 10686500 }, { "epoch": 6.41, "learning_rate": 1.1175390923588853e-05, "loss": 0.2146, "step": 10687000 }, { "epoch": 6.41, "learning_rate": 1.1173290958028288e-05, "loss": 0.2174, "step": 10687500 }, { "epoch": 6.41, "learning_rate": 1.1171190992467723e-05, "loss": 0.2134, "step": 10688000 }, { "epoch": 6.41, "learning_rate": 1.1169091026907159e-05, "loss": 0.2146, "step": 10688500 }, { "epoch": 6.41, "learning_rate": 1.1166991061346594e-05, "loss": 0.2161, "step": 10689000 }, { "epoch": 6.41, "learning_rate": 1.1164891095786029e-05, "loss": 0.2136, "step": 10689500 }, { "epoch": 6.41, "learning_rate": 1.1162791130225464e-05, "loss": 0.2196, "step": 10690000 }, { "epoch": 6.41, "learning_rate": 1.1160691164664899e-05, "loss": 0.215, "step": 10690500 }, { "epoch": 6.41, "learning_rate": 1.1158595399035456e-05, "loss": 0.214, "step": 10691000 }, { "epoch": 6.41, "learning_rate": 1.1156495433474891e-05, "loss": 0.2285, "step": 10691500 }, { "epoch": 6.41, "learning_rate": 1.1154395467914325e-05, "loss": 0.2151, "step": 10692000 }, { "epoch": 6.41, "learning_rate": 1.1152295502353762e-05, "loss": 0.2161, "step": 10692500 }, { "epoch": 6.41, "learning_rate": 1.1150195536793197e-05, "loss": 0.2177, "step": 10693000 }, { "epoch": 6.41, "learning_rate": 1.1148095571232632e-05, "loss": 0.2111, "step": 10693500 }, { "epoch": 6.41, "learning_rate": 1.1145999805603189e-05, "loss": 0.2189, "step": 10694000 }, { "epoch": 6.41, "learning_rate": 1.1143899840042622e-05, "loss": 0.2139, "step": 10694500 }, { "epoch": 6.41, "learning_rate": 1.1141799874482059e-05, "loss": 0.2143, "step": 10695000 }, { "epoch": 6.41, "learning_rate": 1.1139699908921494e-05, "loss": 0.2136, "step": 10695500 }, { "epoch": 6.41, "learning_rate": 1.1137599943360928e-05, "loss": 0.2205, "step": 10696000 }, { "epoch": 6.41, "learning_rate": 1.1135499977800365e-05, "loss": 0.2133, "step": 10696500 }, { "epoch": 6.41, "learning_rate": 1.113340421217092e-05, "loss": 0.2173, "step": 10697000 }, { "epoch": 6.41, "learning_rate": 1.1131304246610355e-05, "loss": 0.2135, "step": 10697500 }, { "epoch": 6.41, "learning_rate": 1.1129204281049792e-05, "loss": 0.2108, "step": 10698000 }, { "epoch": 6.41, "learning_rate": 1.1127104315489225e-05, "loss": 0.2111, "step": 10698500 }, { "epoch": 6.41, "learning_rate": 1.1125008549859782e-05, "loss": 0.2173, "step": 10699000 }, { "epoch": 6.41, "learning_rate": 1.1122908584299217e-05, "loss": 0.2139, "step": 10699500 }, { "epoch": 6.42, "learning_rate": 1.1120808618738653e-05, "loss": 0.216, "step": 10700000 }, { "epoch": 6.42, "eval_loss": 0.20629100501537323, "eval_runtime": 1453.6024, "eval_samples_per_second": 362.355, "eval_steps_per_second": 60.393, "step": 10700000 }, { "epoch": 6.42, "learning_rate": 1.1118708653178088e-05, "loss": 0.2108, "step": 10700500 }, { "epoch": 6.42, "learning_rate": 1.1116608687617523e-05, "loss": 0.2169, "step": 10701000 }, { "epoch": 6.42, "learning_rate": 1.1114508722056958e-05, "loss": 0.2185, "step": 10701500 }, { "epoch": 6.42, "learning_rate": 1.1112412956427515e-05, "loss": 0.2142, "step": 10702000 }, { "epoch": 6.42, "learning_rate": 1.111031299086695e-05, "loss": 0.2173, "step": 10702500 }, { "epoch": 6.42, "learning_rate": 1.1108213025306384e-05, "loss": 0.2129, "step": 10703000 }, { "epoch": 6.42, "learning_rate": 1.110611305974582e-05, "loss": 0.2171, "step": 10703500 }, { "epoch": 6.42, "learning_rate": 1.1104013094185256e-05, "loss": 0.2143, "step": 10704000 }, { "epoch": 6.42, "learning_rate": 1.1101917328555811e-05, "loss": 0.2142, "step": 10704500 }, { "epoch": 6.42, "learning_rate": 1.1099817362995248e-05, "loss": 0.2187, "step": 10705000 }, { "epoch": 6.42, "learning_rate": 1.1097717397434681e-05, "loss": 0.2186, "step": 10705500 }, { "epoch": 6.42, "learning_rate": 1.1095617431874118e-05, "loss": 0.2143, "step": 10706000 }, { "epoch": 6.42, "learning_rate": 1.1093521666244673e-05, "loss": 0.2146, "step": 10706500 }, { "epoch": 6.42, "learning_rate": 1.1091421700684108e-05, "loss": 0.2121, "step": 10707000 }, { "epoch": 6.42, "learning_rate": 1.1089321735123545e-05, "loss": 0.2167, "step": 10707500 }, { "epoch": 6.42, "learning_rate": 1.1087221769562979e-05, "loss": 0.2173, "step": 10708000 }, { "epoch": 6.42, "learning_rate": 1.1085121804002414e-05, "loss": 0.2195, "step": 10708500 }, { "epoch": 6.42, "learning_rate": 1.1083026038372971e-05, "loss": 0.2157, "step": 10709000 }, { "epoch": 6.42, "learning_rate": 1.1080926072812406e-05, "loss": 0.216, "step": 10709500 }, { "epoch": 6.42, "learning_rate": 1.107882610725184e-05, "loss": 0.2141, "step": 10710000 }, { "epoch": 6.42, "learning_rate": 1.1076726141691276e-05, "loss": 0.2136, "step": 10710500 }, { "epoch": 6.42, "learning_rate": 1.1074626176130711e-05, "loss": 0.2187, "step": 10711000 }, { "epoch": 6.42, "learning_rate": 1.1072526210570147e-05, "loss": 0.2131, "step": 10711500 }, { "epoch": 6.42, "learning_rate": 1.1070426245009582e-05, "loss": 0.2129, "step": 10712000 }, { "epoch": 6.42, "learning_rate": 1.1068326279449017e-05, "loss": 0.2171, "step": 10712500 }, { "epoch": 6.42, "learning_rate": 1.1066230513819574e-05, "loss": 0.2117, "step": 10713000 }, { "epoch": 6.42, "learning_rate": 1.106413474819013e-05, "loss": 0.2165, "step": 10713500 }, { "epoch": 6.42, "learning_rate": 1.1062034782629564e-05, "loss": 0.2129, "step": 10714000 }, { "epoch": 6.42, "learning_rate": 1.1059934817069001e-05, "loss": 0.2112, "step": 10714500 }, { "epoch": 6.42, "learning_rate": 1.1057834851508435e-05, "loss": 0.2126, "step": 10715000 }, { "epoch": 6.42, "learning_rate": 1.105573488594787e-05, "loss": 0.213, "step": 10715500 }, { "epoch": 6.42, "learning_rate": 1.1053634920387307e-05, "loss": 0.2124, "step": 10716000 }, { "epoch": 6.42, "learning_rate": 1.105153495482674e-05, "loss": 0.2228, "step": 10716500 }, { "epoch": 6.43, "learning_rate": 1.1049439189197297e-05, "loss": 0.2125, "step": 10717000 }, { "epoch": 6.43, "learning_rate": 1.1047339223636732e-05, "loss": 0.2156, "step": 10717500 }, { "epoch": 6.43, "learning_rate": 1.1045239258076167e-05, "loss": 0.2173, "step": 10718000 }, { "epoch": 6.43, "learning_rate": 1.1043139292515603e-05, "loss": 0.2191, "step": 10718500 }, { "epoch": 6.43, "learning_rate": 1.1041039326955038e-05, "loss": 0.2182, "step": 10719000 }, { "epoch": 6.43, "learning_rate": 1.1038939361394473e-05, "loss": 0.2191, "step": 10719500 }, { "epoch": 6.43, "learning_rate": 1.1036839395833908e-05, "loss": 0.2174, "step": 10720000 }, { "epoch": 6.43, "learning_rate": 1.1034739430273343e-05, "loss": 0.2162, "step": 10720500 }, { "epoch": 6.43, "learning_rate": 1.1032643664643898e-05, "loss": 0.2154, "step": 10721000 }, { "epoch": 6.43, "learning_rate": 1.1030547899014457e-05, "loss": 0.2077, "step": 10721500 }, { "epoch": 6.43, "learning_rate": 1.102844793345389e-05, "loss": 0.219, "step": 10722000 }, { "epoch": 6.43, "learning_rate": 1.1026347967893326e-05, "loss": 0.2159, "step": 10722500 }, { "epoch": 6.43, "learning_rate": 1.1024248002332763e-05, "loss": 0.2173, "step": 10723000 }, { "epoch": 6.43, "learning_rate": 1.1022152236703318e-05, "loss": 0.2152, "step": 10723500 }, { "epoch": 6.43, "learning_rate": 1.1020052271142753e-05, "loss": 0.2191, "step": 10724000 }, { "epoch": 6.43, "learning_rate": 1.1017952305582188e-05, "loss": 0.2147, "step": 10724500 }, { "epoch": 6.43, "learning_rate": 1.1015852340021623e-05, "loss": 0.2131, "step": 10725000 }, { "epoch": 6.43, "learning_rate": 1.101375237446106e-05, "loss": 0.2148, "step": 10725500 }, { "epoch": 6.43, "learning_rate": 1.1011652408900494e-05, "loss": 0.2172, "step": 10726000 }, { "epoch": 6.43, "learning_rate": 1.100955664327105e-05, "loss": 0.2122, "step": 10726500 }, { "epoch": 6.43, "learning_rate": 1.1007456677710486e-05, "loss": 0.2162, "step": 10727000 }, { "epoch": 6.43, "learning_rate": 1.100535671214992e-05, "loss": 0.2124, "step": 10727500 }, { "epoch": 6.43, "learning_rate": 1.1003256746589356e-05, "loss": 0.222, "step": 10728000 }, { "epoch": 6.43, "learning_rate": 1.1001156781028791e-05, "loss": 0.2121, "step": 10728500 }, { "epoch": 6.43, "learning_rate": 1.0999056815468226e-05, "loss": 0.2164, "step": 10729000 }, { "epoch": 6.43, "learning_rate": 1.0996956849907661e-05, "loss": 0.2066, "step": 10729500 }, { "epoch": 6.43, "learning_rate": 1.0994856884347097e-05, "loss": 0.2178, "step": 10730000 }, { "epoch": 6.43, "learning_rate": 1.0992765318648774e-05, "loss": 0.2123, "step": 10730500 }, { "epoch": 6.43, "learning_rate": 1.0990665353088209e-05, "loss": 0.2161, "step": 10731000 }, { "epoch": 6.43, "learning_rate": 1.0988565387527644e-05, "loss": 0.216, "step": 10731500 }, { "epoch": 6.43, "learning_rate": 1.0986465421967079e-05, "loss": 0.2126, "step": 10732000 }, { "epoch": 6.43, "learning_rate": 1.0984365456406516e-05, "loss": 0.2154, "step": 10732500 }, { "epoch": 6.43, "learning_rate": 1.098226549084595e-05, "loss": 0.2154, "step": 10733000 }, { "epoch": 6.44, "learning_rate": 1.0980169725216506e-05, "loss": 0.2146, "step": 10733500 }, { "epoch": 6.44, "learning_rate": 1.0978069759655942e-05, "loss": 0.215, "step": 10734000 }, { "epoch": 6.44, "learning_rate": 1.0975969794095377e-05, "loss": 0.2152, "step": 10734500 }, { "epoch": 6.44, "learning_rate": 1.0973869828534812e-05, "loss": 0.214, "step": 10735000 }, { "epoch": 6.44, "learning_rate": 1.0971769862974247e-05, "loss": 0.2248, "step": 10735500 }, { "epoch": 6.44, "learning_rate": 1.0969669897413682e-05, "loss": 0.2169, "step": 10736000 }, { "epoch": 6.44, "learning_rate": 1.0967574131784237e-05, "loss": 0.2215, "step": 10736500 }, { "epoch": 6.44, "learning_rate": 1.0965474166223674e-05, "loss": 0.2144, "step": 10737000 }, { "epoch": 6.44, "learning_rate": 1.096337420066311e-05, "loss": 0.2186, "step": 10737500 }, { "epoch": 6.44, "learning_rate": 1.0961274235102545e-05, "loss": 0.2154, "step": 10738000 }, { "epoch": 6.44, "learning_rate": 1.095917426954198e-05, "loss": 0.2177, "step": 10738500 }, { "epoch": 6.44, "learning_rate": 1.0957074303981413e-05, "loss": 0.2235, "step": 10739000 }, { "epoch": 6.44, "learning_rate": 1.095497433842085e-05, "loss": 0.2192, "step": 10739500 }, { "epoch": 6.44, "learning_rate": 1.0952874372860285e-05, "loss": 0.2134, "step": 10740000 }, { "epoch": 6.44, "learning_rate": 1.095077860723084e-05, "loss": 0.2175, "step": 10740500 }, { "epoch": 6.44, "learning_rate": 1.0948678641670277e-05, "loss": 0.2134, "step": 10741000 }, { "epoch": 6.44, "learning_rate": 1.094657867610971e-05, "loss": 0.2133, "step": 10741500 }, { "epoch": 6.44, "learning_rate": 1.0944482910480268e-05, "loss": 0.2123, "step": 10742000 }, { "epoch": 6.44, "learning_rate": 1.0942382944919703e-05, "loss": 0.2135, "step": 10742500 }, { "epoch": 6.44, "learning_rate": 1.0940282979359138e-05, "loss": 0.2102, "step": 10743000 }, { "epoch": 6.44, "learning_rate": 1.0938183013798575e-05, "loss": 0.2165, "step": 10743500 }, { "epoch": 6.44, "learning_rate": 1.093608724816913e-05, "loss": 0.2182, "step": 10744000 }, { "epoch": 6.44, "learning_rate": 1.0933987282608565e-05, "loss": 0.2141, "step": 10744500 }, { "epoch": 6.44, "learning_rate": 1.0931887317048e-05, "loss": 0.2123, "step": 10745000 }, { "epoch": 6.44, "learning_rate": 1.0929787351487436e-05, "loss": 0.2127, "step": 10745500 }, { "epoch": 6.44, "learning_rate": 1.092768738592687e-05, "loss": 0.2143, "step": 10746000 }, { "epoch": 6.44, "learning_rate": 1.0925591620297428e-05, "loss": 0.2154, "step": 10746500 }, { "epoch": 6.44, "learning_rate": 1.0923495854667983e-05, "loss": 0.2214, "step": 10747000 }, { "epoch": 6.44, "learning_rate": 1.0921395889107418e-05, "loss": 0.2172, "step": 10747500 }, { "epoch": 6.44, "learning_rate": 1.0919295923546855e-05, "loss": 0.2209, "step": 10748000 }, { "epoch": 6.44, "learning_rate": 1.0917195957986288e-05, "loss": 0.2125, "step": 10748500 }, { "epoch": 6.44, "learning_rate": 1.0915095992425724e-05, "loss": 0.2116, "step": 10749000 }, { "epoch": 6.44, "learning_rate": 1.0912996026865159e-05, "loss": 0.217, "step": 10749500 }, { "epoch": 6.45, "learning_rate": 1.0910896061304594e-05, "loss": 0.2158, "step": 10750000 }, { "epoch": 6.45, "learning_rate": 1.090879609574403e-05, "loss": 0.2166, "step": 10750500 }, { "epoch": 6.45, "learning_rate": 1.0906700330114586e-05, "loss": 0.2135, "step": 10751000 }, { "epoch": 6.45, "learning_rate": 1.0904600364554021e-05, "loss": 0.2139, "step": 10751500 }, { "epoch": 6.45, "learning_rate": 1.0902500398993456e-05, "loss": 0.2124, "step": 10752000 }, { "epoch": 6.45, "learning_rate": 1.0900400433432891e-05, "loss": 0.2142, "step": 10752500 }, { "epoch": 6.45, "learning_rate": 1.0898300467872327e-05, "loss": 0.2142, "step": 10753000 }, { "epoch": 6.45, "learning_rate": 1.0896204702242884e-05, "loss": 0.2213, "step": 10753500 }, { "epoch": 6.45, "learning_rate": 1.0894104736682319e-05, "loss": 0.216, "step": 10754000 }, { "epoch": 6.45, "learning_rate": 1.0892004771121752e-05, "loss": 0.2231, "step": 10754500 }, { "epoch": 6.45, "learning_rate": 1.0889904805561189e-05, "loss": 0.2084, "step": 10755000 }, { "epoch": 6.45, "learning_rate": 1.0887804840000624e-05, "loss": 0.2209, "step": 10755500 }, { "epoch": 6.45, "learning_rate": 1.088570487444006e-05, "loss": 0.2144, "step": 10756000 }, { "epoch": 6.45, "learning_rate": 1.0883609108810616e-05, "loss": 0.2166, "step": 10756500 }, { "epoch": 6.45, "learning_rate": 1.088150914325005e-05, "loss": 0.2164, "step": 10757000 }, { "epoch": 6.45, "learning_rate": 1.0879409177689487e-05, "loss": 0.2201, "step": 10757500 }, { "epoch": 6.45, "learning_rate": 1.087730921212892e-05, "loss": 0.2126, "step": 10758000 }, { "epoch": 6.45, "learning_rate": 1.0875209246568355e-05, "loss": 0.2138, "step": 10758500 }, { "epoch": 6.45, "learning_rate": 1.0873109281007792e-05, "loss": 0.2183, "step": 10759000 }, { "epoch": 6.45, "learning_rate": 1.0871009315447226e-05, "loss": 0.2118, "step": 10759500 }, { "epoch": 6.45, "learning_rate": 1.0868909349886662e-05, "loss": 0.2189, "step": 10760000 }, { "epoch": 6.45, "learning_rate": 1.0866813584257218e-05, "loss": 0.2141, "step": 10760500 }, { "epoch": 6.45, "learning_rate": 1.0864713618696653e-05, "loss": 0.2102, "step": 10761000 }, { "epoch": 6.45, "learning_rate": 1.0862617853067208e-05, "loss": 0.214, "step": 10761500 }, { "epoch": 6.45, "learning_rate": 1.0860517887506645e-05, "loss": 0.2115, "step": 10762000 }, { "epoch": 6.45, "learning_rate": 1.085841792194608e-05, "loss": 0.2137, "step": 10762500 }, { "epoch": 6.45, "learning_rate": 1.0856317956385515e-05, "loss": 0.2117, "step": 10763000 }, { "epoch": 6.45, "learning_rate": 1.0854222190756072e-05, "loss": 0.22, "step": 10763500 }, { "epoch": 6.45, "learning_rate": 1.0852122225195506e-05, "loss": 0.2157, "step": 10764000 }, { "epoch": 6.45, "learning_rate": 1.0850022259634943e-05, "loss": 0.2136, "step": 10764500 }, { "epoch": 6.45, "learning_rate": 1.0847922294074378e-05, "loss": 0.2127, "step": 10765000 }, { "epoch": 6.45, "learning_rate": 1.0845822328513811e-05, "loss": 0.2204, "step": 10765500 }, { "epoch": 6.45, "learning_rate": 1.0843722362953248e-05, "loss": 0.215, "step": 10766000 }, { "epoch": 6.45, "learning_rate": 1.0841622397392683e-05, "loss": 0.22, "step": 10766500 }, { "epoch": 6.46, "learning_rate": 1.0839522431832118e-05, "loss": 0.2187, "step": 10767000 }, { "epoch": 6.46, "learning_rate": 1.0837426666202675e-05, "loss": 0.2121, "step": 10767500 }, { "epoch": 6.46, "learning_rate": 1.083533090057323e-05, "loss": 0.2175, "step": 10768000 }, { "epoch": 6.46, "learning_rate": 1.0833230935012666e-05, "loss": 0.2112, "step": 10768500 }, { "epoch": 6.46, "learning_rate": 1.08311309694521e-05, "loss": 0.2173, "step": 10769000 }, { "epoch": 6.46, "learning_rate": 1.0829031003891536e-05, "loss": 0.2201, "step": 10769500 }, { "epoch": 6.46, "learning_rate": 1.0826935238262093e-05, "loss": 0.2191, "step": 10770000 }, { "epoch": 6.46, "learning_rate": 1.0824835272701528e-05, "loss": 0.2218, "step": 10770500 }, { "epoch": 6.46, "learning_rate": 1.0822735307140962e-05, "loss": 0.2135, "step": 10771000 }, { "epoch": 6.46, "learning_rate": 1.0820635341580398e-05, "loss": 0.2146, "step": 10771500 }, { "epoch": 6.46, "learning_rate": 1.0818539575950954e-05, "loss": 0.2176, "step": 10772000 }, { "epoch": 6.46, "learning_rate": 1.0816439610390389e-05, "loss": 0.2152, "step": 10772500 }, { "epoch": 6.46, "learning_rate": 1.0814339644829826e-05, "loss": 0.222, "step": 10773000 }, { "epoch": 6.46, "learning_rate": 1.0812239679269259e-05, "loss": 0.2201, "step": 10773500 }, { "epoch": 6.46, "learning_rate": 1.0810139713708694e-05, "loss": 0.2173, "step": 10774000 }, { "epoch": 6.46, "learning_rate": 1.0808043948079251e-05, "loss": 0.2167, "step": 10774500 }, { "epoch": 6.46, "learning_rate": 1.0805943982518686e-05, "loss": 0.2162, "step": 10775000 }, { "epoch": 6.46, "learning_rate": 1.0803844016958122e-05, "loss": 0.2166, "step": 10775500 }, { "epoch": 6.46, "learning_rate": 1.0801744051397557e-05, "loss": 0.2208, "step": 10776000 }, { "epoch": 6.46, "learning_rate": 1.0799648285768114e-05, "loss": 0.2138, "step": 10776500 }, { "epoch": 6.46, "learning_rate": 1.0797548320207549e-05, "loss": 0.2091, "step": 10777000 }, { "epoch": 6.46, "learning_rate": 1.0795448354646984e-05, "loss": 0.2205, "step": 10777500 }, { "epoch": 6.46, "learning_rate": 1.0793348389086419e-05, "loss": 0.2134, "step": 10778000 }, { "epoch": 6.46, "learning_rate": 1.0791252623456976e-05, "loss": 0.2174, "step": 10778500 }, { "epoch": 6.46, "learning_rate": 1.0789152657896411e-05, "loss": 0.2193, "step": 10779000 }, { "epoch": 6.46, "learning_rate": 1.0787052692335845e-05, "loss": 0.2131, "step": 10779500 }, { "epoch": 6.46, "learning_rate": 1.0784952726775282e-05, "loss": 0.2227, "step": 10780000 }, { "epoch": 6.46, "learning_rate": 1.0782856961145837e-05, "loss": 0.2172, "step": 10780500 }, { "epoch": 6.46, "learning_rate": 1.0780756995585272e-05, "loss": 0.2183, "step": 10781000 }, { "epoch": 6.46, "learning_rate": 1.0778657030024707e-05, "loss": 0.219, "step": 10781500 }, { "epoch": 6.46, "learning_rate": 1.0776557064464142e-05, "loss": 0.2177, "step": 10782000 }, { "epoch": 6.46, "learning_rate": 1.0774457098903577e-05, "loss": 0.2125, "step": 10782500 }, { "epoch": 6.46, "learning_rate": 1.0772361333274134e-05, "loss": 0.2189, "step": 10783000 }, { "epoch": 6.47, "learning_rate": 1.077026136771357e-05, "loss": 0.2151, "step": 10783500 }, { "epoch": 6.47, "learning_rate": 1.0768161402153005e-05, "loss": 0.2146, "step": 10784000 }, { "epoch": 6.47, "learning_rate": 1.076606143659244e-05, "loss": 0.2136, "step": 10784500 }, { "epoch": 6.47, "learning_rate": 1.0763961471031875e-05, "loss": 0.2175, "step": 10785000 }, { "epoch": 6.47, "learning_rate": 1.076186150547131e-05, "loss": 0.2174, "step": 10785500 }, { "epoch": 6.47, "learning_rate": 1.0759765739841867e-05, "loss": 0.2157, "step": 10786000 }, { "epoch": 6.47, "learning_rate": 1.07576657742813e-05, "loss": 0.2156, "step": 10786500 }, { "epoch": 6.47, "learning_rate": 1.0755565808720737e-05, "loss": 0.2193, "step": 10787000 }, { "epoch": 6.47, "learning_rate": 1.0753465843160173e-05, "loss": 0.2199, "step": 10787500 }, { "epoch": 6.47, "learning_rate": 1.0751365877599608e-05, "loss": 0.2202, "step": 10788000 }, { "epoch": 6.47, "learning_rate": 1.0749270111970165e-05, "loss": 0.2194, "step": 10788500 }, { "epoch": 6.47, "learning_rate": 1.0747170146409598e-05, "loss": 0.2162, "step": 10789000 }, { "epoch": 6.47, "learning_rate": 1.0745070180849035e-05, "loss": 0.2163, "step": 10789500 }, { "epoch": 6.47, "learning_rate": 1.0742970215288468e-05, "loss": 0.2212, "step": 10790000 }, { "epoch": 6.47, "learning_rate": 1.0740870249727904e-05, "loss": 0.2151, "step": 10790500 }, { "epoch": 6.47, "learning_rate": 1.073877028416734e-05, "loss": 0.216, "step": 10791000 }, { "epoch": 6.47, "learning_rate": 1.0736670318606774e-05, "loss": 0.2167, "step": 10791500 }, { "epoch": 6.47, "learning_rate": 1.0734570353046209e-05, "loss": 0.2132, "step": 10792000 }, { "epoch": 6.47, "learning_rate": 1.0732478787347888e-05, "loss": 0.2146, "step": 10792500 }, { "epoch": 6.47, "learning_rate": 1.0730378821787323e-05, "loss": 0.217, "step": 10793000 }, { "epoch": 6.47, "learning_rate": 1.0728278856226756e-05, "loss": 0.2151, "step": 10793500 }, { "epoch": 6.47, "learning_rate": 1.0726178890666193e-05, "loss": 0.2157, "step": 10794000 }, { "epoch": 6.47, "learning_rate": 1.0724078925105628e-05, "loss": 0.2107, "step": 10794500 }, { "epoch": 6.47, "learning_rate": 1.0721978959545064e-05, "loss": 0.214, "step": 10795000 }, { "epoch": 6.47, "learning_rate": 1.0719878993984499e-05, "loss": 0.2131, "step": 10795500 }, { "epoch": 6.47, "learning_rate": 1.0717783228355054e-05, "loss": 0.2143, "step": 10796000 }, { "epoch": 6.47, "learning_rate": 1.0715683262794491e-05, "loss": 0.2122, "step": 10796500 }, { "epoch": 6.47, "learning_rate": 1.0713583297233926e-05, "loss": 0.2119, "step": 10797000 }, { "epoch": 6.47, "learning_rate": 1.071148333167336e-05, "loss": 0.2154, "step": 10797500 }, { "epoch": 6.47, "learning_rate": 1.0709383366112796e-05, "loss": 0.2153, "step": 10798000 }, { "epoch": 6.47, "learning_rate": 1.0707287600483352e-05, "loss": 0.2179, "step": 10798500 }, { "epoch": 6.47, "learning_rate": 1.0705187634922787e-05, "loss": 0.218, "step": 10799000 }, { "epoch": 6.47, "learning_rate": 1.0703087669362224e-05, "loss": 0.217, "step": 10799500 }, { "epoch": 6.48, "learning_rate": 1.0700987703801657e-05, "loss": 0.2174, "step": 10800000 }, { "epoch": 6.48, "eval_loss": 0.2060076892375946, "eval_runtime": 1455.6282, "eval_samples_per_second": 361.851, "eval_steps_per_second": 60.309, "step": 10800000 }, { "epoch": 6.48, "learning_rate": 1.0698887738241092e-05, "loss": 0.2135, "step": 10800500 }, { "epoch": 6.48, "learning_rate": 1.0696787772680527e-05, "loss": 0.221, "step": 10801000 }, { "epoch": 6.48, "learning_rate": 1.0694687807119963e-05, "loss": 0.2141, "step": 10801500 }, { "epoch": 6.48, "learning_rate": 1.06925878415594e-05, "loss": 0.2114, "step": 10802000 }, { "epoch": 6.48, "learning_rate": 1.0690492075929955e-05, "loss": 0.2154, "step": 10802500 }, { "epoch": 6.48, "learning_rate": 1.068839211036939e-05, "loss": 0.214, "step": 10803000 }, { "epoch": 6.48, "learning_rate": 1.0686296344739947e-05, "loss": 0.2166, "step": 10803500 }, { "epoch": 6.48, "learning_rate": 1.0684196379179382e-05, "loss": 0.2142, "step": 10804000 }, { "epoch": 6.48, "learning_rate": 1.0682096413618815e-05, "loss": 0.2136, "step": 10804500 }, { "epoch": 6.48, "learning_rate": 1.0679996448058252e-05, "loss": 0.2125, "step": 10805000 }, { "epoch": 6.48, "learning_rate": 1.0677896482497687e-05, "loss": 0.2093, "step": 10805500 }, { "epoch": 6.48, "learning_rate": 1.0675796516937123e-05, "loss": 0.2132, "step": 10806000 }, { "epoch": 6.48, "learning_rate": 1.0673696551376558e-05, "loss": 0.2171, "step": 10806500 }, { "epoch": 6.48, "learning_rate": 1.0671600785747113e-05, "loss": 0.2177, "step": 10807000 }, { "epoch": 6.48, "learning_rate": 1.066950082018655e-05, "loss": 0.2146, "step": 10807500 }, { "epoch": 6.48, "learning_rate": 1.0667400854625985e-05, "loss": 0.2154, "step": 10808000 }, { "epoch": 6.48, "learning_rate": 1.0665300889065418e-05, "loss": 0.2187, "step": 10808500 }, { "epoch": 6.48, "learning_rate": 1.0663205123435977e-05, "loss": 0.2173, "step": 10809000 }, { "epoch": 6.48, "learning_rate": 1.066110515787541e-05, "loss": 0.2179, "step": 10809500 }, { "epoch": 6.48, "learning_rate": 1.0659005192314846e-05, "loss": 0.2118, "step": 10810000 }, { "epoch": 6.48, "learning_rate": 1.065690522675428e-05, "loss": 0.2198, "step": 10810500 }, { "epoch": 6.48, "learning_rate": 1.0654805261193716e-05, "loss": 0.2161, "step": 10811000 }, { "epoch": 6.48, "learning_rate": 1.0652705295633151e-05, "loss": 0.218, "step": 10811500 }, { "epoch": 6.48, "learning_rate": 1.0650609530003708e-05, "loss": 0.2199, "step": 10812000 }, { "epoch": 6.48, "learning_rate": 1.0648509564443143e-05, "loss": 0.213, "step": 10812500 }, { "epoch": 6.48, "learning_rate": 1.0646409598882578e-05, "loss": 0.2124, "step": 10813000 }, { "epoch": 6.48, "learning_rate": 1.0644309633322014e-05, "loss": 0.2132, "step": 10813500 }, { "epoch": 6.48, "learning_rate": 1.0642209667761449e-05, "loss": 0.2161, "step": 10814000 }, { "epoch": 6.48, "learning_rate": 1.0640109702200884e-05, "loss": 0.2139, "step": 10814500 }, { "epoch": 6.48, "learning_rate": 1.0638009736640319e-05, "loss": 0.2099, "step": 10815000 }, { "epoch": 6.48, "learning_rate": 1.0635909771079754e-05, "loss": 0.2152, "step": 10815500 }, { "epoch": 6.48, "learning_rate": 1.0633814005450311e-05, "loss": 0.2177, "step": 10816000 }, { "epoch": 6.48, "learning_rate": 1.0631718239820866e-05, "loss": 0.2159, "step": 10816500 }, { "epoch": 6.49, "learning_rate": 1.0629618274260302e-05, "loss": 0.214, "step": 10817000 }, { "epoch": 6.49, "learning_rate": 1.0627518308699738e-05, "loss": 0.2143, "step": 10817500 }, { "epoch": 6.49, "learning_rate": 1.0625418343139172e-05, "loss": 0.2149, "step": 10818000 }, { "epoch": 6.49, "learning_rate": 1.0623318377578607e-05, "loss": 0.212, "step": 10818500 }, { "epoch": 6.49, "learning_rate": 1.0621222611949164e-05, "loss": 0.2175, "step": 10819000 }, { "epoch": 6.49, "learning_rate": 1.0619122646388599e-05, "loss": 0.2156, "step": 10819500 }, { "epoch": 6.49, "learning_rate": 1.0617022680828034e-05, "loss": 0.2121, "step": 10820000 }, { "epoch": 6.49, "learning_rate": 1.061492271526747e-05, "loss": 0.2123, "step": 10820500 }, { "epoch": 6.49, "learning_rate": 1.0612822749706905e-05, "loss": 0.2139, "step": 10821000 }, { "epoch": 6.49, "learning_rate": 1.061072278414634e-05, "loss": 0.2157, "step": 10821500 }, { "epoch": 6.49, "learning_rate": 1.0608622818585775e-05, "loss": 0.2168, "step": 10822000 }, { "epoch": 6.49, "learning_rate": 1.060652285302521e-05, "loss": 0.2187, "step": 10822500 }, { "epoch": 6.49, "learning_rate": 1.0604427087395767e-05, "loss": 0.2174, "step": 10823000 }, { "epoch": 6.49, "learning_rate": 1.0602327121835202e-05, "loss": 0.211, "step": 10823500 }, { "epoch": 6.49, "learning_rate": 1.0600227156274637e-05, "loss": 0.2167, "step": 10824000 }, { "epoch": 6.49, "learning_rate": 1.0598127190714072e-05, "loss": 0.2161, "step": 10824500 }, { "epoch": 6.49, "learning_rate": 1.0596031425084628e-05, "loss": 0.2144, "step": 10825000 }, { "epoch": 6.49, "learning_rate": 1.0593935659455185e-05, "loss": 0.2152, "step": 10825500 }, { "epoch": 6.49, "learning_rate": 1.059183569389462e-05, "loss": 0.2158, "step": 10826000 }, { "epoch": 6.49, "learning_rate": 1.0589735728334055e-05, "loss": 0.2202, "step": 10826500 }, { "epoch": 6.49, "learning_rate": 1.0587635762773492e-05, "loss": 0.2148, "step": 10827000 }, { "epoch": 6.49, "learning_rate": 1.0585535797212925e-05, "loss": 0.2158, "step": 10827500 }, { "epoch": 6.49, "learning_rate": 1.058343583165236e-05, "loss": 0.2166, "step": 10828000 }, { "epoch": 6.49, "learning_rate": 1.0581335866091796e-05, "loss": 0.2148, "step": 10828500 }, { "epoch": 6.49, "learning_rate": 1.057923590053123e-05, "loss": 0.2156, "step": 10829000 }, { "epoch": 6.49, "learning_rate": 1.0577140134901786e-05, "loss": 0.2172, "step": 10829500 }, { "epoch": 6.49, "learning_rate": 1.0575040169341223e-05, "loss": 0.2152, "step": 10830000 }, { "epoch": 6.49, "learning_rate": 1.0572940203780658e-05, "loss": 0.2118, "step": 10830500 }, { "epoch": 6.49, "learning_rate": 1.0570840238220093e-05, "loss": 0.2139, "step": 10831000 }, { "epoch": 6.49, "learning_rate": 1.056874447259065e-05, "loss": 0.2169, "step": 10831500 }, { "epoch": 6.49, "learning_rate": 1.0566644507030084e-05, "loss": 0.2118, "step": 10832000 }, { "epoch": 6.49, "learning_rate": 1.056454454146952e-05, "loss": 0.215, "step": 10832500 }, { "epoch": 6.49, "learning_rate": 1.0562444575908956e-05, "loss": 0.2183, "step": 10833000 }, { "epoch": 6.5, "learning_rate": 1.0560344610348389e-05, "loss": 0.2115, "step": 10833500 }, { "epoch": 6.5, "learning_rate": 1.0558244644787826e-05, "loss": 0.2146, "step": 10834000 }, { "epoch": 6.5, "learning_rate": 1.0556144679227261e-05, "loss": 0.2125, "step": 10834500 }, { "epoch": 6.5, "learning_rate": 1.0554048913597816e-05, "loss": 0.2221, "step": 10835000 }, { "epoch": 6.5, "learning_rate": 1.0551953147968373e-05, "loss": 0.2162, "step": 10835500 }, { "epoch": 6.5, "learning_rate": 1.0549853182407808e-05, "loss": 0.2135, "step": 10836000 }, { "epoch": 6.5, "learning_rate": 1.0547753216847244e-05, "loss": 0.2168, "step": 10836500 }, { "epoch": 6.5, "learning_rate": 1.0545653251286679e-05, "loss": 0.2168, "step": 10837000 }, { "epoch": 6.5, "learning_rate": 1.0543553285726114e-05, "loss": 0.2143, "step": 10837500 }, { "epoch": 6.5, "learning_rate": 1.0541453320165549e-05, "loss": 0.2155, "step": 10838000 }, { "epoch": 6.5, "learning_rate": 1.0539353354604984e-05, "loss": 0.2157, "step": 10838500 }, { "epoch": 6.5, "learning_rate": 1.053725338904442e-05, "loss": 0.2189, "step": 10839000 }, { "epoch": 6.5, "learning_rate": 1.0535153423483855e-05, "loss": 0.2131, "step": 10839500 }, { "epoch": 6.5, "learning_rate": 1.053305345792329e-05, "loss": 0.2113, "step": 10840000 }, { "epoch": 6.5, "learning_rate": 1.0530953492362725e-05, "loss": 0.2212, "step": 10840500 }, { "epoch": 6.5, "learning_rate": 1.052885352680216e-05, "loss": 0.2199, "step": 10841000 }, { "epoch": 6.5, "learning_rate": 1.0526757761172717e-05, "loss": 0.2141, "step": 10841500 }, { "epoch": 6.5, "learning_rate": 1.0524657795612152e-05, "loss": 0.2161, "step": 10842000 }, { "epoch": 6.5, "learning_rate": 1.0522557830051587e-05, "loss": 0.2182, "step": 10842500 }, { "epoch": 6.5, "learning_rate": 1.0520457864491022e-05, "loss": 0.2113, "step": 10843000 }, { "epoch": 6.5, "learning_rate": 1.0518357898930458e-05, "loss": 0.216, "step": 10843500 }, { "epoch": 6.5, "learning_rate": 1.0516257933369893e-05, "loss": 0.2178, "step": 10844000 }, { "epoch": 6.5, "learning_rate": 1.0514157967809328e-05, "loss": 0.2133, "step": 10844500 }, { "epoch": 6.5, "learning_rate": 1.0512058002248763e-05, "loss": 0.214, "step": 10845000 }, { "epoch": 6.5, "learning_rate": 1.050996643655044e-05, "loss": 0.2132, "step": 10845500 }, { "epoch": 6.5, "learning_rate": 1.0507866470989875e-05, "loss": 0.219, "step": 10846000 }, { "epoch": 6.5, "learning_rate": 1.0505766505429312e-05, "loss": 0.2185, "step": 10846500 }, { "epoch": 6.5, "learning_rate": 1.0503666539868746e-05, "loss": 0.2105, "step": 10847000 }, { "epoch": 6.5, "learning_rate": 1.050156657430818e-05, "loss": 0.2172, "step": 10847500 }, { "epoch": 6.5, "learning_rate": 1.0499466608747616e-05, "loss": 0.2125, "step": 10848000 }, { "epoch": 6.5, "learning_rate": 1.0497366643187051e-05, "loss": 0.2141, "step": 10848500 }, { "epoch": 6.5, "learning_rate": 1.0495266677626488e-05, "loss": 0.2159, "step": 10849000 }, { "epoch": 6.5, "learning_rate": 1.0493170911997043e-05, "loss": 0.2157, "step": 10849500 }, { "epoch": 6.51, "learning_rate": 1.0491070946436478e-05, "loss": 0.2195, "step": 10850000 }, { "epoch": 6.51, "learning_rate": 1.0488970980875913e-05, "loss": 0.2215, "step": 10850500 }, { "epoch": 6.51, "learning_rate": 1.0486871015315349e-05, "loss": 0.2137, "step": 10851000 }, { "epoch": 6.51, "learning_rate": 1.0484775249685904e-05, "loss": 0.2162, "step": 10851500 }, { "epoch": 6.51, "learning_rate": 1.048267528412534e-05, "loss": 0.2168, "step": 10852000 }, { "epoch": 6.51, "learning_rate": 1.0480575318564776e-05, "loss": 0.2112, "step": 10852500 }, { "epoch": 6.51, "learning_rate": 1.047847535300421e-05, "loss": 0.2143, "step": 10853000 }, { "epoch": 6.51, "learning_rate": 1.0476375387443646e-05, "loss": 0.222, "step": 10853500 }, { "epoch": 6.51, "learning_rate": 1.0474275421883081e-05, "loss": 0.2125, "step": 10854000 }, { "epoch": 6.51, "learning_rate": 1.0472175456322516e-05, "loss": 0.2177, "step": 10854500 }, { "epoch": 6.51, "learning_rate": 1.0470079690693073e-05, "loss": 0.2212, "step": 10855000 }, { "epoch": 6.51, "learning_rate": 1.0467979725132507e-05, "loss": 0.2147, "step": 10855500 }, { "epoch": 6.51, "learning_rate": 1.0465879759571944e-05, "loss": 0.213, "step": 10856000 }, { "epoch": 6.51, "learning_rate": 1.0463779794011377e-05, "loss": 0.2141, "step": 10856500 }, { "epoch": 6.51, "learning_rate": 1.0461679828450812e-05, "loss": 0.2153, "step": 10857000 }, { "epoch": 6.51, "learning_rate": 1.045958406282137e-05, "loss": 0.2156, "step": 10857500 }, { "epoch": 6.51, "learning_rate": 1.0457484097260804e-05, "loss": 0.2139, "step": 10858000 }, { "epoch": 6.51, "learning_rate": 1.045538413170024e-05, "loss": 0.2208, "step": 10858500 }, { "epoch": 6.51, "learning_rate": 1.0453284166139675e-05, "loss": 0.2214, "step": 10859000 }, { "epoch": 6.51, "learning_rate": 1.045118420057911e-05, "loss": 0.2135, "step": 10859500 }, { "epoch": 6.51, "learning_rate": 1.0449084235018547e-05, "loss": 0.2165, "step": 10860000 }, { "epoch": 6.51, "learning_rate": 1.0446988469389102e-05, "loss": 0.215, "step": 10860500 }, { "epoch": 6.51, "learning_rate": 1.0444888503828537e-05, "loss": 0.2154, "step": 10861000 }, { "epoch": 6.51, "learning_rate": 1.0442788538267972e-05, "loss": 0.2175, "step": 10861500 }, { "epoch": 6.51, "learning_rate": 1.0440688572707407e-05, "loss": 0.2122, "step": 10862000 }, { "epoch": 6.51, "learning_rate": 1.0438588607146843e-05, "loss": 0.2169, "step": 10862500 }, { "epoch": 6.51, "learning_rate": 1.0436488641586278e-05, "loss": 0.2146, "step": 10863000 }, { "epoch": 6.51, "learning_rate": 1.0434392875956835e-05, "loss": 0.2136, "step": 10863500 }, { "epoch": 6.51, "learning_rate": 1.0432292910396268e-05, "loss": 0.2143, "step": 10864000 }, { "epoch": 6.51, "learning_rate": 1.0430192944835705e-05, "loss": 0.2185, "step": 10864500 }, { "epoch": 6.51, "learning_rate": 1.0428092979275139e-05, "loss": 0.2187, "step": 10865000 }, { "epoch": 6.51, "learning_rate": 1.0425993013714575e-05, "loss": 0.2131, "step": 10865500 }, { "epoch": 6.51, "learning_rate": 1.042389304815401e-05, "loss": 0.2138, "step": 10866000 }, { "epoch": 6.51, "learning_rate": 1.0421793082593444e-05, "loss": 0.2178, "step": 10866500 }, { "epoch": 6.52, "learning_rate": 1.0419697316964003e-05, "loss": 0.2166, "step": 10867000 }, { "epoch": 6.52, "learning_rate": 1.0417597351403436e-05, "loss": 0.2169, "step": 10867500 }, { "epoch": 6.52, "learning_rate": 1.0415497385842871e-05, "loss": 0.2113, "step": 10868000 }, { "epoch": 6.52, "learning_rate": 1.0413397420282308e-05, "loss": 0.2153, "step": 10868500 }, { "epoch": 6.52, "learning_rate": 1.0411297454721742e-05, "loss": 0.2159, "step": 10869000 }, { "epoch": 6.52, "learning_rate": 1.0409197489161178e-05, "loss": 0.2127, "step": 10869500 }, { "epoch": 6.52, "learning_rate": 1.0407097523600614e-05, "loss": 0.2177, "step": 10870000 }, { "epoch": 6.52, "learning_rate": 1.0404997558040047e-05, "loss": 0.2187, "step": 10870500 }, { "epoch": 6.52, "learning_rate": 1.0402905992341724e-05, "loss": 0.2119, "step": 10871000 }, { "epoch": 6.52, "learning_rate": 1.0400806026781161e-05, "loss": 0.214, "step": 10871500 }, { "epoch": 6.52, "learning_rate": 1.0398706061220596e-05, "loss": 0.2146, "step": 10872000 }, { "epoch": 6.52, "learning_rate": 1.0396606095660031e-05, "loss": 0.2175, "step": 10872500 }, { "epoch": 6.52, "learning_rate": 1.0394506130099466e-05, "loss": 0.2148, "step": 10873000 }, { "epoch": 6.52, "learning_rate": 1.0392406164538902e-05, "loss": 0.2168, "step": 10873500 }, { "epoch": 6.52, "learning_rate": 1.0390310398909459e-05, "loss": 0.2179, "step": 10874000 }, { "epoch": 6.52, "learning_rate": 1.0388210433348894e-05, "loss": 0.2145, "step": 10874500 }, { "epoch": 6.52, "learning_rate": 1.0386110467788327e-05, "loss": 0.2194, "step": 10875000 }, { "epoch": 6.52, "learning_rate": 1.0384010502227764e-05, "loss": 0.2182, "step": 10875500 }, { "epoch": 6.52, "learning_rate": 1.0381910536667197e-05, "loss": 0.2133, "step": 10876000 }, { "epoch": 6.52, "learning_rate": 1.0379814771037754e-05, "loss": 0.2187, "step": 10876500 }, { "epoch": 6.52, "learning_rate": 1.037771480547719e-05, "loss": 0.2076, "step": 10877000 }, { "epoch": 6.52, "learning_rate": 1.0375614839916625e-05, "loss": 0.2186, "step": 10877500 }, { "epoch": 6.52, "learning_rate": 1.0373514874356062e-05, "loss": 0.2141, "step": 10878000 }, { "epoch": 6.52, "learning_rate": 1.0371414908795495e-05, "loss": 0.2184, "step": 10878500 }, { "epoch": 6.52, "learning_rate": 1.036931494323493e-05, "loss": 0.2168, "step": 10879000 }, { "epoch": 6.52, "learning_rate": 1.0367219177605487e-05, "loss": 0.211, "step": 10879500 }, { "epoch": 6.52, "learning_rate": 1.0365119212044922e-05, "loss": 0.2083, "step": 10880000 }, { "epoch": 6.52, "learning_rate": 1.0363019246484357e-05, "loss": 0.2156, "step": 10880500 }, { "epoch": 6.52, "learning_rate": 1.0360919280923793e-05, "loss": 0.2159, "step": 10881000 }, { "epoch": 6.52, "learning_rate": 1.0358819315363228e-05, "loss": 0.2123, "step": 10881500 }, { "epoch": 6.52, "learning_rate": 1.0356723549733783e-05, "loss": 0.2125, "step": 10882000 }, { "epoch": 6.52, "learning_rate": 1.035462358417322e-05, "loss": 0.2189, "step": 10882500 }, { "epoch": 6.52, "learning_rate": 1.0352523618612655e-05, "loss": 0.2186, "step": 10883000 }, { "epoch": 6.53, "learning_rate": 1.035042365305209e-05, "loss": 0.2118, "step": 10883500 }, { "epoch": 6.53, "learning_rate": 1.0348323687491525e-05, "loss": 0.2151, "step": 10884000 }, { "epoch": 6.53, "learning_rate": 1.0346223721930959e-05, "loss": 0.2139, "step": 10884500 }, { "epoch": 6.53, "learning_rate": 1.0344123756370396e-05, "loss": 0.215, "step": 10885000 }, { "epoch": 6.53, "learning_rate": 1.034202379080983e-05, "loss": 0.217, "step": 10885500 }, { "epoch": 6.53, "learning_rate": 1.0339928025180386e-05, "loss": 0.2156, "step": 10886000 }, { "epoch": 6.53, "learning_rate": 1.0337828059619823e-05, "loss": 0.2159, "step": 10886500 }, { "epoch": 6.53, "learning_rate": 1.0335728094059256e-05, "loss": 0.2161, "step": 10887000 }, { "epoch": 6.53, "learning_rate": 1.0333628128498693e-05, "loss": 0.2169, "step": 10887500 }, { "epoch": 6.53, "learning_rate": 1.033153656280037e-05, "loss": 0.2173, "step": 10888000 }, { "epoch": 6.53, "learning_rate": 1.0329436597239805e-05, "loss": 0.2125, "step": 10888500 }, { "epoch": 6.53, "learning_rate": 1.032734083161036e-05, "loss": 0.2161, "step": 10889000 }, { "epoch": 6.53, "learning_rate": 1.0325240866049798e-05, "loss": 0.2141, "step": 10889500 }, { "epoch": 6.53, "learning_rate": 1.0323140900489231e-05, "loss": 0.2132, "step": 10890000 }, { "epoch": 6.53, "learning_rate": 1.0321040934928666e-05, "loss": 0.2148, "step": 10890500 }, { "epoch": 6.53, "learning_rate": 1.0318940969368103e-05, "loss": 0.2139, "step": 10891000 }, { "epoch": 6.53, "learning_rate": 1.0316841003807536e-05, "loss": 0.217, "step": 10891500 }, { "epoch": 6.53, "learning_rate": 1.0314741038246973e-05, "loss": 0.2167, "step": 10892000 }, { "epoch": 6.53, "learning_rate": 1.0312641072686408e-05, "loss": 0.2141, "step": 10892500 }, { "epoch": 6.53, "learning_rate": 1.0310541107125842e-05, "loss": 0.2147, "step": 10893000 }, { "epoch": 6.53, "learning_rate": 1.0308441141565279e-05, "loss": 0.2114, "step": 10893500 }, { "epoch": 6.53, "learning_rate": 1.0306341176004712e-05, "loss": 0.2107, "step": 10894000 }, { "epoch": 6.53, "learning_rate": 1.0304241210444149e-05, "loss": 0.2192, "step": 10894500 }, { "epoch": 6.53, "learning_rate": 1.0302145444814704e-05, "loss": 0.2129, "step": 10895000 }, { "epoch": 6.53, "learning_rate": 1.030004547925414e-05, "loss": 0.2165, "step": 10895500 }, { "epoch": 6.53, "learning_rate": 1.0297949713624696e-05, "loss": 0.2173, "step": 10896000 }, { "epoch": 6.53, "learning_rate": 1.0295849748064132e-05, "loss": 0.2131, "step": 10896500 }, { "epoch": 6.53, "learning_rate": 1.0293749782503567e-05, "loss": 0.2133, "step": 10897000 }, { "epoch": 6.53, "learning_rate": 1.0291649816943002e-05, "loss": 0.2157, "step": 10897500 }, { "epoch": 6.53, "learning_rate": 1.0289549851382437e-05, "loss": 0.2093, "step": 10898000 }, { "epoch": 6.53, "learning_rate": 1.0287449885821872e-05, "loss": 0.2143, "step": 10898500 }, { "epoch": 6.53, "learning_rate": 1.0285349920261307e-05, "loss": 0.2126, "step": 10899000 }, { "epoch": 6.53, "learning_rate": 1.0283254154631864e-05, "loss": 0.2208, "step": 10899500 }, { "epoch": 6.53, "learning_rate": 1.0281154189071298e-05, "loss": 0.2179, "step": 10900000 }, { "epoch": 6.53, "eval_loss": 0.2048807442188263, "eval_runtime": 1454.4861, "eval_samples_per_second": 362.135, "eval_steps_per_second": 60.356, "step": 10900000 }, { "epoch": 6.54, "learning_rate": 1.0279054223510735e-05, "loss": 0.2149, "step": 10900500 }, { "epoch": 6.54, "learning_rate": 1.027695425795017e-05, "loss": 0.2125, "step": 10901000 }, { "epoch": 6.54, "learning_rate": 1.0274854292389605e-05, "loss": 0.2108, "step": 10901500 }, { "epoch": 6.54, "learning_rate": 1.027275432682904e-05, "loss": 0.216, "step": 10902000 }, { "epoch": 6.54, "learning_rate": 1.0270658561199595e-05, "loss": 0.2164, "step": 10902500 }, { "epoch": 6.54, "learning_rate": 1.0268558595639032e-05, "loss": 0.2155, "step": 10903000 }, { "epoch": 6.54, "learning_rate": 1.0266458630078467e-05, "loss": 0.2136, "step": 10903500 }, { "epoch": 6.54, "learning_rate": 1.02643586645179e-05, "loss": 0.2181, "step": 10904000 }, { "epoch": 6.54, "learning_rate": 1.0262258698957338e-05, "loss": 0.2172, "step": 10904500 }, { "epoch": 6.54, "learning_rate": 1.0260158733396771e-05, "loss": 0.2114, "step": 10905000 }, { "epoch": 6.54, "learning_rate": 1.0258062967767328e-05, "loss": 0.2184, "step": 10905500 }, { "epoch": 6.54, "learning_rate": 1.0255963002206763e-05, "loss": 0.2053, "step": 10906000 }, { "epoch": 6.54, "learning_rate": 1.0253863036646198e-05, "loss": 0.2145, "step": 10906500 }, { "epoch": 6.54, "learning_rate": 1.0251763071085635e-05, "loss": 0.216, "step": 10907000 }, { "epoch": 6.54, "learning_rate": 1.0249663105525069e-05, "loss": 0.2132, "step": 10907500 }, { "epoch": 6.54, "learning_rate": 1.0247563139964504e-05, "loss": 0.2158, "step": 10908000 }, { "epoch": 6.54, "learning_rate": 1.024546317440394e-05, "loss": 0.2189, "step": 10908500 }, { "epoch": 6.54, "learning_rate": 1.0243367408774496e-05, "loss": 0.2171, "step": 10909000 }, { "epoch": 6.54, "learning_rate": 1.0241267443213931e-05, "loss": 0.2136, "step": 10909500 }, { "epoch": 6.54, "learning_rate": 1.0239167477653366e-05, "loss": 0.2152, "step": 10910000 }, { "epoch": 6.54, "learning_rate": 1.0237067512092801e-05, "loss": 0.2117, "step": 10910500 }, { "epoch": 6.54, "learning_rate": 1.0234971746463357e-05, "loss": 0.2164, "step": 10911000 }, { "epoch": 6.54, "learning_rate": 1.0232871780902794e-05, "loss": 0.215, "step": 10911500 }, { "epoch": 6.54, "learning_rate": 1.0230771815342229e-05, "loss": 0.2207, "step": 10912000 }, { "epoch": 6.54, "learning_rate": 1.0228671849781664e-05, "loss": 0.2109, "step": 10912500 }, { "epoch": 6.54, "learning_rate": 1.0226571884221099e-05, "loss": 0.2104, "step": 10913000 }, { "epoch": 6.54, "learning_rate": 1.0224471918660532e-05, "loss": 0.211, "step": 10913500 }, { "epoch": 6.54, "learning_rate": 1.022237195309997e-05, "loss": 0.2149, "step": 10914000 }, { "epoch": 6.54, "learning_rate": 1.0220271987539404e-05, "loss": 0.2163, "step": 10914500 }, { "epoch": 6.54, "learning_rate": 1.021817622190996e-05, "loss": 0.2128, "step": 10915000 }, { "epoch": 6.54, "learning_rate": 1.0216076256349397e-05, "loss": 0.2167, "step": 10915500 }, { "epoch": 6.54, "learning_rate": 1.021397629078883e-05, "loss": 0.2175, "step": 10916000 }, { "epoch": 6.54, "learning_rate": 1.0211876325228267e-05, "loss": 0.2127, "step": 10916500 }, { "epoch": 6.55, "learning_rate": 1.0209780559598822e-05, "loss": 0.2131, "step": 10917000 }, { "epoch": 6.55, "learning_rate": 1.0207680594038257e-05, "loss": 0.2157, "step": 10917500 }, { "epoch": 6.55, "learning_rate": 1.0205580628477694e-05, "loss": 0.2178, "step": 10918000 }, { "epoch": 6.55, "learning_rate": 1.020348486284825e-05, "loss": 0.2172, "step": 10918500 }, { "epoch": 6.55, "learning_rate": 1.0201384897287685e-05, "loss": 0.2142, "step": 10919000 }, { "epoch": 6.55, "learning_rate": 1.019928493172712e-05, "loss": 0.2154, "step": 10919500 }, { "epoch": 6.55, "learning_rate": 1.0197184966166555e-05, "loss": 0.2194, "step": 10920000 }, { "epoch": 6.55, "learning_rate": 1.019508500060599e-05, "loss": 0.2177, "step": 10920500 }, { "epoch": 6.55, "learning_rate": 1.0192989234976547e-05, "loss": 0.2132, "step": 10921000 }, { "epoch": 6.55, "learning_rate": 1.0190889269415982e-05, "loss": 0.2196, "step": 10921500 }, { "epoch": 6.55, "learning_rate": 1.0188789303855416e-05, "loss": 0.2127, "step": 10922000 }, { "epoch": 6.55, "learning_rate": 1.0186689338294852e-05, "loss": 0.2125, "step": 10922500 }, { "epoch": 6.55, "learning_rate": 1.0184589372734286e-05, "loss": 0.2127, "step": 10923000 }, { "epoch": 6.55, "learning_rate": 1.0182489407173723e-05, "loss": 0.2099, "step": 10923500 }, { "epoch": 6.55, "learning_rate": 1.0180393641544278e-05, "loss": 0.2171, "step": 10924000 }, { "epoch": 6.55, "learning_rate": 1.0178293675983713e-05, "loss": 0.2163, "step": 10924500 }, { "epoch": 6.55, "learning_rate": 1.017619371042315e-05, "loss": 0.2149, "step": 10925000 }, { "epoch": 6.55, "learning_rate": 1.0174093744862584e-05, "loss": 0.216, "step": 10925500 }, { "epoch": 6.55, "learning_rate": 1.017199797923314e-05, "loss": 0.2158, "step": 10926000 }, { "epoch": 6.55, "learning_rate": 1.0169898013672576e-05, "loss": 0.2156, "step": 10926500 }, { "epoch": 6.55, "learning_rate": 1.016779804811201e-05, "loss": 0.2167, "step": 10927000 }, { "epoch": 6.55, "learning_rate": 1.0165698082551446e-05, "loss": 0.2157, "step": 10927500 }, { "epoch": 6.55, "learning_rate": 1.0163598116990881e-05, "loss": 0.2105, "step": 10928000 }, { "epoch": 6.55, "learning_rate": 1.0161498151430316e-05, "loss": 0.2142, "step": 10928500 }, { "epoch": 6.55, "learning_rate": 1.0159402385800872e-05, "loss": 0.2167, "step": 10929000 }, { "epoch": 6.55, "learning_rate": 1.0157302420240308e-05, "loss": 0.2094, "step": 10929500 }, { "epoch": 6.55, "learning_rate": 1.0155202454679744e-05, "loss": 0.2212, "step": 10930000 }, { "epoch": 6.55, "learning_rate": 1.0153102489119179e-05, "loss": 0.2162, "step": 10930500 }, { "epoch": 6.55, "learning_rate": 1.0151002523558614e-05, "loss": 0.2145, "step": 10931000 }, { "epoch": 6.55, "learning_rate": 1.0148902557998047e-05, "loss": 0.2172, "step": 10931500 }, { "epoch": 6.55, "learning_rate": 1.0146802592437484e-05, "loss": 0.2123, "step": 10932000 }, { "epoch": 6.55, "learning_rate": 1.014470682680804e-05, "loss": 0.2161, "step": 10932500 }, { "epoch": 6.55, "learning_rate": 1.0142606861247475e-05, "loss": 0.2189, "step": 10933000 }, { "epoch": 6.56, "learning_rate": 1.0140506895686911e-05, "loss": 0.2185, "step": 10933500 }, { "epoch": 6.56, "learning_rate": 1.0138406930126345e-05, "loss": 0.2089, "step": 10934000 }, { "epoch": 6.56, "learning_rate": 1.0136306964565782e-05, "loss": 0.2163, "step": 10934500 }, { "epoch": 6.56, "learning_rate": 1.0134206999005217e-05, "loss": 0.2132, "step": 10935000 }, { "epoch": 6.56, "learning_rate": 1.013210703344465e-05, "loss": 0.2149, "step": 10935500 }, { "epoch": 6.56, "learning_rate": 1.0130007067884087e-05, "loss": 0.2148, "step": 10936000 }, { "epoch": 6.56, "learning_rate": 1.0127915502185764e-05, "loss": 0.2157, "step": 10936500 }, { "epoch": 6.56, "learning_rate": 1.01258155366252e-05, "loss": 0.2149, "step": 10937000 }, { "epoch": 6.56, "learning_rate": 1.0123715571064635e-05, "loss": 0.2149, "step": 10937500 }, { "epoch": 6.56, "learning_rate": 1.012161560550407e-05, "loss": 0.2116, "step": 10938000 }, { "epoch": 6.56, "learning_rate": 1.0119515639943505e-05, "loss": 0.2183, "step": 10938500 }, { "epoch": 6.56, "learning_rate": 1.0117419874314062e-05, "loss": 0.2114, "step": 10939000 }, { "epoch": 6.56, "learning_rate": 1.0115319908753497e-05, "loss": 0.2189, "step": 10939500 }, { "epoch": 6.56, "learning_rate": 1.011321994319293e-05, "loss": 0.2146, "step": 10940000 }, { "epoch": 6.56, "learning_rate": 1.0111119977632367e-05, "loss": 0.2216, "step": 10940500 }, { "epoch": 6.56, "learning_rate": 1.0109020012071802e-05, "loss": 0.2138, "step": 10941000 }, { "epoch": 6.56, "learning_rate": 1.0106924246442358e-05, "loss": 0.2163, "step": 10941500 }, { "epoch": 6.56, "learning_rate": 1.0104824280881795e-05, "loss": 0.2208, "step": 10942000 }, { "epoch": 6.56, "learning_rate": 1.0102724315321228e-05, "loss": 0.2165, "step": 10942500 }, { "epoch": 6.56, "learning_rate": 1.0100624349760665e-05, "loss": 0.2124, "step": 10943000 }, { "epoch": 6.56, "learning_rate": 1.0098524384200098e-05, "loss": 0.2131, "step": 10943500 }, { "epoch": 6.56, "learning_rate": 1.0096424418639533e-05, "loss": 0.212, "step": 10944000 }, { "epoch": 6.56, "learning_rate": 1.009432445307897e-05, "loss": 0.2144, "step": 10944500 }, { "epoch": 6.56, "learning_rate": 1.0092224487518404e-05, "loss": 0.2108, "step": 10945000 }, { "epoch": 6.56, "learning_rate": 1.009012872188896e-05, "loss": 0.2138, "step": 10945500 }, { "epoch": 6.56, "learning_rate": 1.0088028756328396e-05, "loss": 0.2092, "step": 10946000 }, { "epoch": 6.56, "learning_rate": 1.0085928790767831e-05, "loss": 0.2158, "step": 10946500 }, { "epoch": 6.56, "learning_rate": 1.0083828825207268e-05, "loss": 0.2131, "step": 10947000 }, { "epoch": 6.56, "learning_rate": 1.0081737259508945e-05, "loss": 0.2146, "step": 10947500 }, { "epoch": 6.56, "learning_rate": 1.0079637293948378e-05, "loss": 0.212, "step": 10948000 }, { "epoch": 6.56, "learning_rate": 1.0077537328387814e-05, "loss": 0.2165, "step": 10948500 }, { "epoch": 6.56, "learning_rate": 1.007543736282725e-05, "loss": 0.2137, "step": 10949000 }, { "epoch": 6.56, "learning_rate": 1.0073337397266684e-05, "loss": 0.2132, "step": 10949500 }, { "epoch": 6.56, "learning_rate": 1.007124163163724e-05, "loss": 0.2093, "step": 10950000 }, { "epoch": 6.57, "learning_rate": 1.0069141666076676e-05, "loss": 0.219, "step": 10950500 }, { "epoch": 6.57, "learning_rate": 1.0067041700516111e-05, "loss": 0.2128, "step": 10951000 }, { "epoch": 6.57, "learning_rate": 1.0064941734955548e-05, "loss": 0.2131, "step": 10951500 }, { "epoch": 6.57, "learning_rate": 1.0062845969326103e-05, "loss": 0.2191, "step": 10952000 }, { "epoch": 6.57, "learning_rate": 1.0060746003765538e-05, "loss": 0.217, "step": 10952500 }, { "epoch": 6.57, "learning_rate": 1.0058650238136095e-05, "loss": 0.2197, "step": 10953000 }, { "epoch": 6.57, "learning_rate": 1.005655027257553e-05, "loss": 0.2116, "step": 10953500 }, { "epoch": 6.57, "learning_rate": 1.0054450307014964e-05, "loss": 0.2186, "step": 10954000 }, { "epoch": 6.57, "learning_rate": 1.00523503414544e-05, "loss": 0.2185, "step": 10954500 }, { "epoch": 6.57, "learning_rate": 1.0050250375893834e-05, "loss": 0.2175, "step": 10955000 }, { "epoch": 6.57, "learning_rate": 1.004815041033327e-05, "loss": 0.2136, "step": 10955500 }, { "epoch": 6.57, "learning_rate": 1.0046050444772706e-05, "loss": 0.2191, "step": 10956000 }, { "epoch": 6.57, "learning_rate": 1.004395047921214e-05, "loss": 0.2122, "step": 10956500 }, { "epoch": 6.57, "learning_rate": 1.0041850513651577e-05, "loss": 0.2104, "step": 10957000 }, { "epoch": 6.57, "learning_rate": 1.0039754748022132e-05, "loss": 0.2122, "step": 10957500 }, { "epoch": 6.57, "learning_rate": 1.0037654782461567e-05, "loss": 0.2189, "step": 10958000 }, { "epoch": 6.57, "learning_rate": 1.0035554816901004e-05, "loss": 0.2171, "step": 10958500 }, { "epoch": 6.57, "learning_rate": 1.0033454851340437e-05, "loss": 0.2144, "step": 10959000 }, { "epoch": 6.57, "learning_rate": 1.0031354885779872e-05, "loss": 0.2147, "step": 10959500 }, { "epoch": 6.57, "learning_rate": 1.002925492021931e-05, "loss": 0.2177, "step": 10960000 }, { "epoch": 6.57, "learning_rate": 1.0027159154589865e-05, "loss": 0.2164, "step": 10960500 }, { "epoch": 6.57, "learning_rate": 1.00250591890293e-05, "loss": 0.2124, "step": 10961000 }, { "epoch": 6.57, "learning_rate": 1.0022959223468735e-05, "loss": 0.2148, "step": 10961500 }, { "epoch": 6.57, "learning_rate": 1.002085925790817e-05, "loss": 0.215, "step": 10962000 }, { "epoch": 6.57, "learning_rate": 1.0018763492278725e-05, "loss": 0.2136, "step": 10962500 }, { "epoch": 6.57, "learning_rate": 1.0016663526718162e-05, "loss": 0.2189, "step": 10963000 }, { "epoch": 6.57, "learning_rate": 1.0014563561157596e-05, "loss": 0.2224, "step": 10963500 }, { "epoch": 6.57, "learning_rate": 1.0012463595597032e-05, "loss": 0.214, "step": 10964000 }, { "epoch": 6.57, "learning_rate": 1.0010363630036468e-05, "loss": 0.2197, "step": 10964500 }, { "epoch": 6.57, "learning_rate": 1.0008263664475901e-05, "loss": 0.2171, "step": 10965000 }, { "epoch": 6.57, "learning_rate": 1.000616789884646e-05, "loss": 0.2167, "step": 10965500 }, { "epoch": 6.57, "learning_rate": 1.0004067933285893e-05, "loss": 0.2103, "step": 10966000 }, { "epoch": 6.57, "learning_rate": 1.000197216765645e-05, "loss": 0.2149, "step": 10966500 }, { "epoch": 6.58, "learning_rate": 9.999872202095885e-06, "loss": 0.2143, "step": 10967000 }, { "epoch": 6.58, "learning_rate": 9.99777223653532e-06, "loss": 0.2108, "step": 10967500 }, { "epoch": 6.58, "learning_rate": 9.995672270974756e-06, "loss": 0.2127, "step": 10968000 }, { "epoch": 6.58, "learning_rate": 9.99357230541419e-06, "loss": 0.212, "step": 10968500 }, { "epoch": 6.58, "learning_rate": 9.991472339853626e-06, "loss": 0.2134, "step": 10969000 }, { "epoch": 6.58, "learning_rate": 9.989372374293063e-06, "loss": 0.2117, "step": 10969500 }, { "epoch": 6.58, "learning_rate": 9.987272408732496e-06, "loss": 0.2149, "step": 10970000 }, { "epoch": 6.58, "learning_rate": 9.985172443171931e-06, "loss": 0.2149, "step": 10970500 }, { "epoch": 6.58, "learning_rate": 9.983072477611367e-06, "loss": 0.2197, "step": 10971000 }, { "epoch": 6.58, "learning_rate": 9.980972512050802e-06, "loss": 0.2185, "step": 10971500 }, { "epoch": 6.58, "learning_rate": 9.978876746421357e-06, "loss": 0.2182, "step": 10972000 }, { "epoch": 6.58, "learning_rate": 9.976776780860794e-06, "loss": 0.2112, "step": 10972500 }, { "epoch": 6.58, "learning_rate": 9.974676815300229e-06, "loss": 0.2161, "step": 10973000 }, { "epoch": 6.58, "learning_rate": 9.972576849739664e-06, "loss": 0.209, "step": 10973500 }, { "epoch": 6.58, "learning_rate": 9.9704768841791e-06, "loss": 0.2149, "step": 10974000 }, { "epoch": 6.58, "learning_rate": 9.968376918618534e-06, "loss": 0.2182, "step": 10974500 }, { "epoch": 6.58, "learning_rate": 9.96627695305797e-06, "loss": 0.2147, "step": 10975000 }, { "epoch": 6.58, "learning_rate": 9.964181187428527e-06, "loss": 0.2175, "step": 10975500 }, { "epoch": 6.58, "learning_rate": 9.96208122186796e-06, "loss": 0.2115, "step": 10976000 }, { "epoch": 6.58, "learning_rate": 9.959981256307397e-06, "loss": 0.2128, "step": 10976500 }, { "epoch": 6.58, "learning_rate": 9.957881290746832e-06, "loss": 0.2112, "step": 10977000 }, { "epoch": 6.58, "learning_rate": 9.955781325186267e-06, "loss": 0.2116, "step": 10977500 }, { "epoch": 6.58, "learning_rate": 9.953685559556824e-06, "loss": 0.2105, "step": 10978000 }, { "epoch": 6.58, "learning_rate": 9.951585593996258e-06, "loss": 0.2186, "step": 10978500 }, { "epoch": 6.58, "learning_rate": 9.949485628435694e-06, "loss": 0.2155, "step": 10979000 }, { "epoch": 6.58, "learning_rate": 9.94738566287513e-06, "loss": 0.2157, "step": 10979500 }, { "epoch": 6.58, "learning_rate": 9.945285697314563e-06, "loss": 0.2052, "step": 10980000 }, { "epoch": 6.58, "learning_rate": 9.943189931685122e-06, "loss": 0.2102, "step": 10980500 }, { "epoch": 6.58, "learning_rate": 9.941089966124555e-06, "loss": 0.2125, "step": 10981000 }, { "epoch": 6.58, "learning_rate": 9.93899000056399e-06, "loss": 0.2148, "step": 10981500 }, { "epoch": 6.58, "learning_rate": 9.936890035003425e-06, "loss": 0.2118, "step": 10982000 }, { "epoch": 6.58, "learning_rate": 9.93479006944286e-06, "loss": 0.2156, "step": 10982500 }, { "epoch": 6.58, "learning_rate": 9.932690103882297e-06, "loss": 0.22, "step": 10983000 }, { "epoch": 6.59, "learning_rate": 9.930590138321731e-06, "loss": 0.2172, "step": 10983500 }, { "epoch": 6.59, "learning_rate": 9.928490172761166e-06, "loss": 0.2165, "step": 10984000 }, { "epoch": 6.59, "learning_rate": 9.926398607062843e-06, "loss": 0.2114, "step": 10984500 }, { "epoch": 6.59, "learning_rate": 9.92429864150228e-06, "loss": 0.218, "step": 10985000 }, { "epoch": 6.59, "learning_rate": 9.922198675941713e-06, "loss": 0.2163, "step": 10985500 }, { "epoch": 6.59, "learning_rate": 9.92009871038115e-06, "loss": 0.2151, "step": 10986000 }, { "epoch": 6.59, "learning_rate": 9.917998744820585e-06, "loss": 0.2125, "step": 10986500 }, { "epoch": 6.59, "learning_rate": 9.915898779260019e-06, "loss": 0.2119, "step": 10987000 }, { "epoch": 6.59, "learning_rate": 9.913798813699456e-06, "loss": 0.2095, "step": 10987500 }, { "epoch": 6.59, "learning_rate": 9.911698848138891e-06, "loss": 0.2126, "step": 10988000 }, { "epoch": 6.59, "learning_rate": 9.909603082509446e-06, "loss": 0.2129, "step": 10988500 }, { "epoch": 6.59, "learning_rate": 9.907503116948883e-06, "loss": 0.2124, "step": 10989000 }, { "epoch": 6.59, "learning_rate": 9.905403151388316e-06, "loss": 0.2227, "step": 10989500 }, { "epoch": 6.59, "learning_rate": 9.903307385758873e-06, "loss": 0.2185, "step": 10990000 }, { "epoch": 6.59, "learning_rate": 9.901207420198309e-06, "loss": 0.2136, "step": 10990500 }, { "epoch": 6.59, "learning_rate": 9.899107454637744e-06, "loss": 0.2126, "step": 10991000 }, { "epoch": 6.59, "learning_rate": 9.897007489077179e-06, "loss": 0.2123, "step": 10991500 }, { "epoch": 6.59, "learning_rate": 9.894907523516614e-06, "loss": 0.2118, "step": 10992000 }, { "epoch": 6.59, "learning_rate": 9.89280755795605e-06, "loss": 0.2143, "step": 10992500 }, { "epoch": 6.59, "learning_rate": 9.890711792326606e-06, "loss": 0.2165, "step": 10993000 }, { "epoch": 6.59, "learning_rate": 9.888616026697161e-06, "loss": 0.2177, "step": 10993500 }, { "epoch": 6.59, "learning_rate": 9.886516061136597e-06, "loss": 0.2178, "step": 10994000 }, { "epoch": 6.59, "learning_rate": 9.884416095576033e-06, "loss": 0.2138, "step": 10994500 }, { "epoch": 6.59, "learning_rate": 9.882316130015467e-06, "loss": 0.2128, "step": 10995000 }, { "epoch": 6.59, "learning_rate": 9.880216164454902e-06, "loss": 0.2166, "step": 10995500 }, { "epoch": 6.59, "learning_rate": 9.878116198894339e-06, "loss": 0.2156, "step": 10996000 }, { "epoch": 6.59, "learning_rate": 9.876016233333772e-06, "loss": 0.2144, "step": 10996500 }, { "epoch": 6.59, "learning_rate": 9.87391626777321e-06, "loss": 0.2147, "step": 10997000 }, { "epoch": 6.59, "learning_rate": 9.871816302212644e-06, "loss": 0.2229, "step": 10997500 }, { "epoch": 6.59, "learning_rate": 9.8697205365832e-06, "loss": 0.2127, "step": 10998000 }, { "epoch": 6.59, "learning_rate": 9.867620571022636e-06, "loss": 0.2139, "step": 10998500 }, { "epoch": 6.59, "learning_rate": 9.86552060546207e-06, "loss": 0.2122, "step": 10999000 }, { "epoch": 6.59, "learning_rate": 9.863420639901505e-06, "loss": 0.2164, "step": 10999500 }, { "epoch": 6.59, "learning_rate": 9.86132067434094e-06, "loss": 0.2102, "step": 11000000 }, { "epoch": 6.59, "eval_loss": 0.2050001174211502, "eval_runtime": 1455.8706, "eval_samples_per_second": 361.79, "eval_steps_per_second": 60.299, "step": 11000000 }, { "epoch": 6.6, "learning_rate": 9.859220708780375e-06, "loss": 0.213, "step": 11000500 }, { "epoch": 6.6, "learning_rate": 9.857120743219812e-06, "loss": 0.2105, "step": 11001000 }, { "epoch": 6.6, "learning_rate": 9.855020777659246e-06, "loss": 0.2151, "step": 11001500 }, { "epoch": 6.6, "learning_rate": 9.852925012029803e-06, "loss": 0.2137, "step": 11002000 }, { "epoch": 6.6, "learning_rate": 9.850829246400358e-06, "loss": 0.2135, "step": 11002500 }, { "epoch": 6.6, "learning_rate": 9.848729280839795e-06, "loss": 0.2153, "step": 11003000 }, { "epoch": 6.6, "learning_rate": 9.846629315279228e-06, "loss": 0.2163, "step": 11003500 }, { "epoch": 6.6, "learning_rate": 9.844529349718665e-06, "loss": 0.2153, "step": 11004000 }, { "epoch": 6.6, "learning_rate": 9.8424293841581e-06, "loss": 0.2119, "step": 11004500 }, { "epoch": 6.6, "learning_rate": 9.840333618528656e-06, "loss": 0.2143, "step": 11005000 }, { "epoch": 6.6, "learning_rate": 9.838233652968092e-06, "loss": 0.2124, "step": 11005500 }, { "epoch": 6.6, "learning_rate": 9.836133687407526e-06, "loss": 0.2147, "step": 11006000 }, { "epoch": 6.6, "learning_rate": 9.834033721846961e-06, "loss": 0.2125, "step": 11006500 }, { "epoch": 6.6, "learning_rate": 9.831933756286398e-06, "loss": 0.2132, "step": 11007000 }, { "epoch": 6.6, "learning_rate": 9.829833790725831e-06, "loss": 0.215, "step": 11007500 }, { "epoch": 6.6, "learning_rate": 9.827738025096388e-06, "loss": 0.2155, "step": 11008000 }, { "epoch": 6.6, "learning_rate": 9.825638059535823e-06, "loss": 0.2184, "step": 11008500 }, { "epoch": 6.6, "learning_rate": 9.823538093975259e-06, "loss": 0.2118, "step": 11009000 }, { "epoch": 6.6, "learning_rate": 9.821438128414695e-06, "loss": 0.2196, "step": 11009500 }, { "epoch": 6.6, "learning_rate": 9.819338162854129e-06, "loss": 0.2134, "step": 11010000 }, { "epoch": 6.6, "learning_rate": 9.817238197293564e-06, "loss": 0.2141, "step": 11010500 }, { "epoch": 6.6, "learning_rate": 9.815138231733e-06, "loss": 0.2142, "step": 11011000 }, { "epoch": 6.6, "learning_rate": 9.813038266172434e-06, "loss": 0.2159, "step": 11011500 }, { "epoch": 6.6, "learning_rate": 9.810946700474111e-06, "loss": 0.2122, "step": 11012000 }, { "epoch": 6.6, "learning_rate": 9.808846734913548e-06, "loss": 0.2144, "step": 11012500 }, { "epoch": 6.6, "learning_rate": 9.806746769352982e-06, "loss": 0.2169, "step": 11013000 }, { "epoch": 6.6, "learning_rate": 9.804646803792417e-06, "loss": 0.2139, "step": 11013500 }, { "epoch": 6.6, "learning_rate": 9.802546838231854e-06, "loss": 0.2131, "step": 11014000 }, { "epoch": 6.6, "learning_rate": 9.800451072602409e-06, "loss": 0.2165, "step": 11014500 }, { "epoch": 6.6, "learning_rate": 9.798351107041844e-06, "loss": 0.2137, "step": 11015000 }, { "epoch": 6.6, "learning_rate": 9.79625114148128e-06, "loss": 0.2131, "step": 11015500 }, { "epoch": 6.6, "learning_rate": 9.794151175920714e-06, "loss": 0.2165, "step": 11016000 }, { "epoch": 6.6, "learning_rate": 9.792051210360151e-06, "loss": 0.2142, "step": 11016500 }, { "epoch": 6.61, "learning_rate": 9.789951244799585e-06, "loss": 0.2136, "step": 11017000 }, { "epoch": 6.61, "learning_rate": 9.787855479170142e-06, "loss": 0.2182, "step": 11017500 }, { "epoch": 6.61, "learning_rate": 9.785755513609577e-06, "loss": 0.2143, "step": 11018000 }, { "epoch": 6.61, "learning_rate": 9.783655548049012e-06, "loss": 0.2172, "step": 11018500 }, { "epoch": 6.61, "learning_rate": 9.781555582488447e-06, "loss": 0.2177, "step": 11019000 }, { "epoch": 6.61, "learning_rate": 9.779459816859004e-06, "loss": 0.2171, "step": 11019500 }, { "epoch": 6.61, "learning_rate": 9.77735985129844e-06, "loss": 0.2147, "step": 11020000 }, { "epoch": 6.61, "learning_rate": 9.775259885737873e-06, "loss": 0.2144, "step": 11020500 }, { "epoch": 6.61, "learning_rate": 9.77315992017731e-06, "loss": 0.214, "step": 11021000 }, { "epoch": 6.61, "learning_rate": 9.771059954616743e-06, "loss": 0.2163, "step": 11021500 }, { "epoch": 6.61, "learning_rate": 9.7689641889873e-06, "loss": 0.2203, "step": 11022000 }, { "epoch": 6.61, "learning_rate": 9.766864223426735e-06, "loss": 0.2074, "step": 11022500 }, { "epoch": 6.61, "learning_rate": 9.76476425786617e-06, "loss": 0.213, "step": 11023000 }, { "epoch": 6.61, "learning_rate": 9.762664292305607e-06, "loss": 0.2147, "step": 11023500 }, { "epoch": 6.61, "learning_rate": 9.760568526676162e-06, "loss": 0.2123, "step": 11024000 }, { "epoch": 6.61, "learning_rate": 9.758468561115598e-06, "loss": 0.2125, "step": 11024500 }, { "epoch": 6.61, "learning_rate": 9.756368595555033e-06, "loss": 0.2162, "step": 11025000 }, { "epoch": 6.61, "learning_rate": 9.754268629994468e-06, "loss": 0.2169, "step": 11025500 }, { "epoch": 6.61, "learning_rate": 9.752168664433903e-06, "loss": 0.2156, "step": 11026000 }, { "epoch": 6.61, "learning_rate": 9.75007289880446e-06, "loss": 0.2161, "step": 11026500 }, { "epoch": 6.61, "learning_rate": 9.747972933243895e-06, "loss": 0.2159, "step": 11027000 }, { "epoch": 6.61, "learning_rate": 9.745872967683329e-06, "loss": 0.2173, "step": 11027500 }, { "epoch": 6.61, "learning_rate": 9.743773002122765e-06, "loss": 0.2136, "step": 11028000 }, { "epoch": 6.61, "learning_rate": 9.74167723649332e-06, "loss": 0.213, "step": 11028500 }, { "epoch": 6.61, "learning_rate": 9.739577270932756e-06, "loss": 0.2123, "step": 11029000 }, { "epoch": 6.61, "learning_rate": 9.737477305372193e-06, "loss": 0.2109, "step": 11029500 }, { "epoch": 6.61, "learning_rate": 9.735377339811626e-06, "loss": 0.2156, "step": 11030000 }, { "epoch": 6.61, "learning_rate": 9.733277374251063e-06, "loss": 0.2161, "step": 11030500 }, { "epoch": 6.61, "learning_rate": 9.731177408690496e-06, "loss": 0.2117, "step": 11031000 }, { "epoch": 6.61, "learning_rate": 9.729081643061053e-06, "loss": 0.2171, "step": 11031500 }, { "epoch": 6.61, "learning_rate": 9.726981677500489e-06, "loss": 0.2166, "step": 11032000 }, { "epoch": 6.61, "learning_rate": 9.724881711939924e-06, "loss": 0.2132, "step": 11032500 }, { "epoch": 6.61, "learning_rate": 9.722781746379359e-06, "loss": 0.215, "step": 11033000 }, { "epoch": 6.62, "learning_rate": 9.720681780818794e-06, "loss": 0.2121, "step": 11033500 }, { "epoch": 6.62, "learning_rate": 9.718586015189351e-06, "loss": 0.2114, "step": 11034000 }, { "epoch": 6.62, "learning_rate": 9.716486049628786e-06, "loss": 0.2134, "step": 11034500 }, { "epoch": 6.62, "learning_rate": 9.714386084068221e-06, "loss": 0.2164, "step": 11035000 }, { "epoch": 6.62, "learning_rate": 9.712286118507656e-06, "loss": 0.2124, "step": 11035500 }, { "epoch": 6.62, "learning_rate": 9.710186152947092e-06, "loss": 0.2125, "step": 11036000 }, { "epoch": 6.62, "learning_rate": 9.708090387317649e-06, "loss": 0.2185, "step": 11036500 }, { "epoch": 6.62, "learning_rate": 9.705990421757082e-06, "loss": 0.2122, "step": 11037000 }, { "epoch": 6.62, "learning_rate": 9.703890456196519e-06, "loss": 0.2122, "step": 11037500 }, { "epoch": 6.62, "learning_rate": 9.701790490635954e-06, "loss": 0.2136, "step": 11038000 }, { "epoch": 6.62, "learning_rate": 9.699690525075388e-06, "loss": 0.2094, "step": 11038500 }, { "epoch": 6.62, "learning_rate": 9.697590559514824e-06, "loss": 0.2099, "step": 11039000 }, { "epoch": 6.62, "learning_rate": 9.695490593954258e-06, "loss": 0.2169, "step": 11039500 }, { "epoch": 6.62, "learning_rate": 9.693390628393695e-06, "loss": 0.2167, "step": 11040000 }, { "epoch": 6.62, "learning_rate": 9.69129486276425e-06, "loss": 0.2141, "step": 11040500 }, { "epoch": 6.62, "learning_rate": 9.689199097134807e-06, "loss": 0.2199, "step": 11041000 }, { "epoch": 6.62, "learning_rate": 9.687099131574242e-06, "loss": 0.2173, "step": 11041500 }, { "epoch": 6.62, "learning_rate": 9.684999166013677e-06, "loss": 0.2143, "step": 11042000 }, { "epoch": 6.62, "learning_rate": 9.682899200453112e-06, "loss": 0.2148, "step": 11042500 }, { "epoch": 6.62, "learning_rate": 9.680799234892548e-06, "loss": 0.2112, "step": 11043000 }, { "epoch": 6.62, "learning_rate": 9.678703469263104e-06, "loss": 0.2153, "step": 11043500 }, { "epoch": 6.62, "learning_rate": 9.676603503702538e-06, "loss": 0.2168, "step": 11044000 }, { "epoch": 6.62, "learning_rate": 9.674503538141975e-06, "loss": 0.2185, "step": 11044500 }, { "epoch": 6.62, "learning_rate": 9.67240357258141e-06, "loss": 0.2183, "step": 11045000 }, { "epoch": 6.62, "learning_rate": 9.670303607020843e-06, "loss": 0.2104, "step": 11045500 }, { "epoch": 6.62, "learning_rate": 9.66820364146028e-06, "loss": 0.2159, "step": 11046000 }, { "epoch": 6.62, "learning_rate": 9.666103675899715e-06, "loss": 0.2154, "step": 11046500 }, { "epoch": 6.62, "learning_rate": 9.66400371033915e-06, "loss": 0.2127, "step": 11047000 }, { "epoch": 6.62, "learning_rate": 9.661907944709708e-06, "loss": 0.2154, "step": 11047500 }, { "epoch": 6.62, "learning_rate": 9.659807979149141e-06, "loss": 0.2184, "step": 11048000 }, { "epoch": 6.62, "learning_rate": 9.657712213519698e-06, "loss": 0.2129, "step": 11048500 }, { "epoch": 6.62, "learning_rate": 9.655612247959133e-06, "loss": 0.2117, "step": 11049000 }, { "epoch": 6.62, "learning_rate": 9.653512282398568e-06, "loss": 0.2168, "step": 11049500 }, { "epoch": 6.62, "learning_rate": 9.651412316838005e-06, "loss": 0.2105, "step": 11050000 }, { "epoch": 6.63, "learning_rate": 9.649312351277439e-06, "loss": 0.2187, "step": 11050500 }, { "epoch": 6.63, "learning_rate": 9.647212385716874e-06, "loss": 0.2207, "step": 11051000 }, { "epoch": 6.63, "learning_rate": 9.645112420156309e-06, "loss": 0.2134, "step": 11051500 }, { "epoch": 6.63, "learning_rate": 9.643016654526866e-06, "loss": 0.2185, "step": 11052000 }, { "epoch": 6.63, "learning_rate": 9.640916688966301e-06, "loss": 0.2134, "step": 11052500 }, { "epoch": 6.63, "learning_rate": 9.638816723405736e-06, "loss": 0.2139, "step": 11053000 }, { "epoch": 6.63, "learning_rate": 9.636716757845171e-06, "loss": 0.2155, "step": 11053500 }, { "epoch": 6.63, "learning_rate": 9.634616792284606e-06, "loss": 0.2104, "step": 11054000 }, { "epoch": 6.63, "learning_rate": 9.632521026655163e-06, "loss": 0.2176, "step": 11054500 }, { "epoch": 6.63, "learning_rate": 9.630421061094597e-06, "loss": 0.2205, "step": 11055000 }, { "epoch": 6.63, "learning_rate": 9.628321095534034e-06, "loss": 0.2124, "step": 11055500 }, { "epoch": 6.63, "learning_rate": 9.626221129973469e-06, "loss": 0.2227, "step": 11056000 }, { "epoch": 6.63, "learning_rate": 9.624121164412902e-06, "loss": 0.2204, "step": 11056500 }, { "epoch": 6.63, "learning_rate": 9.62202119885234e-06, "loss": 0.2165, "step": 11057000 }, { "epoch": 6.63, "learning_rate": 9.619925433222894e-06, "loss": 0.2135, "step": 11057500 }, { "epoch": 6.63, "learning_rate": 9.61782546766233e-06, "loss": 0.2159, "step": 11058000 }, { "epoch": 6.63, "learning_rate": 9.615725502101766e-06, "loss": 0.2138, "step": 11058500 }, { "epoch": 6.63, "learning_rate": 9.6136255365412e-06, "loss": 0.2163, "step": 11059000 }, { "epoch": 6.63, "learning_rate": 9.611525570980637e-06, "loss": 0.2148, "step": 11059500 }, { "epoch": 6.63, "learning_rate": 9.60942560542007e-06, "loss": 0.2114, "step": 11060000 }, { "epoch": 6.63, "learning_rate": 9.607325639859505e-06, "loss": 0.2094, "step": 11060500 }, { "epoch": 6.63, "learning_rate": 9.605229874230062e-06, "loss": 0.21, "step": 11061000 }, { "epoch": 6.63, "learning_rate": 9.603129908669497e-06, "loss": 0.217, "step": 11061500 }, { "epoch": 6.63, "learning_rate": 9.601029943108933e-06, "loss": 0.2137, "step": 11062000 }, { "epoch": 6.63, "learning_rate": 9.598929977548368e-06, "loss": 0.2135, "step": 11062500 }, { "epoch": 6.63, "learning_rate": 9.596830011987803e-06, "loss": 0.2143, "step": 11063000 }, { "epoch": 6.63, "learning_rate": 9.59473004642724e-06, "loss": 0.2156, "step": 11063500 }, { "epoch": 6.63, "learning_rate": 9.592634280797795e-06, "loss": 0.2154, "step": 11064000 }, { "epoch": 6.63, "learning_rate": 9.59053431523723e-06, "loss": 0.222, "step": 11064500 }, { "epoch": 6.63, "learning_rate": 9.588434349676665e-06, "loss": 0.213, "step": 11065000 }, { "epoch": 6.63, "learning_rate": 9.5863343841161e-06, "loss": 0.2128, "step": 11065500 }, { "epoch": 6.63, "learning_rate": 9.584234418555536e-06, "loss": 0.2182, "step": 11066000 }, { "epoch": 6.63, "learning_rate": 9.582138652926093e-06, "loss": 0.2134, "step": 11066500 }, { "epoch": 6.64, "learning_rate": 9.580038687365528e-06, "loss": 0.2114, "step": 11067000 }, { "epoch": 6.64, "learning_rate": 9.577938721804961e-06, "loss": 0.2149, "step": 11067500 }, { "epoch": 6.64, "learning_rate": 9.575838756244398e-06, "loss": 0.2164, "step": 11068000 }, { "epoch": 6.64, "learning_rate": 9.573738790683832e-06, "loss": 0.2098, "step": 11068500 }, { "epoch": 6.64, "learning_rate": 9.571638825123268e-06, "loss": 0.2207, "step": 11069000 }, { "epoch": 6.64, "learning_rate": 9.569538859562704e-06, "loss": 0.2192, "step": 11069500 }, { "epoch": 6.64, "learning_rate": 9.567443093933259e-06, "loss": 0.2102, "step": 11070000 }, { "epoch": 6.64, "learning_rate": 9.565343128372696e-06, "loss": 0.2161, "step": 11070500 }, { "epoch": 6.64, "learning_rate": 9.563243162812129e-06, "loss": 0.2156, "step": 11071000 }, { "epoch": 6.64, "learning_rate": 9.561143197251564e-06, "loss": 0.2157, "step": 11071500 }, { "epoch": 6.64, "learning_rate": 9.559043231691001e-06, "loss": 0.2101, "step": 11072000 }, { "epoch": 6.64, "learning_rate": 9.556943266130435e-06, "loss": 0.2119, "step": 11072500 }, { "epoch": 6.64, "learning_rate": 9.554847500500992e-06, "loss": 0.2114, "step": 11073000 }, { "epoch": 6.64, "learning_rate": 9.552747534940427e-06, "loss": 0.2086, "step": 11073500 }, { "epoch": 6.64, "learning_rate": 9.550647569379862e-06, "loss": 0.2148, "step": 11074000 }, { "epoch": 6.64, "learning_rate": 9.548547603819299e-06, "loss": 0.2112, "step": 11074500 }, { "epoch": 6.64, "learning_rate": 9.546447638258732e-06, "loss": 0.2145, "step": 11075000 }, { "epoch": 6.64, "learning_rate": 9.544347672698167e-06, "loss": 0.2188, "step": 11075500 }, { "epoch": 6.64, "learning_rate": 9.542247707137602e-06, "loss": 0.2172, "step": 11076000 }, { "epoch": 6.64, "learning_rate": 9.540147741577038e-06, "loss": 0.2151, "step": 11076500 }, { "epoch": 6.64, "learning_rate": 9.538051975947593e-06, "loss": 0.214, "step": 11077000 }, { "epoch": 6.64, "learning_rate": 9.535956210318152e-06, "loss": 0.2176, "step": 11077500 }, { "epoch": 6.64, "learning_rate": 9.533860444688707e-06, "loss": 0.2216, "step": 11078000 }, { "epoch": 6.64, "learning_rate": 9.531760479128142e-06, "loss": 0.2088, "step": 11078500 }, { "epoch": 6.64, "learning_rate": 9.529660513567579e-06, "loss": 0.2137, "step": 11079000 }, { "epoch": 6.64, "learning_rate": 9.527560548007012e-06, "loss": 0.2133, "step": 11079500 }, { "epoch": 6.64, "learning_rate": 9.525460582446447e-06, "loss": 0.2141, "step": 11080000 }, { "epoch": 6.64, "learning_rate": 9.523360616885883e-06, "loss": 0.2135, "step": 11080500 }, { "epoch": 6.64, "learning_rate": 9.521260651325318e-06, "loss": 0.2146, "step": 11081000 }, { "epoch": 6.64, "learning_rate": 9.519160685764755e-06, "loss": 0.2166, "step": 11081500 }, { "epoch": 6.64, "learning_rate": 9.51706492013531e-06, "loss": 0.2148, "step": 11082000 }, { "epoch": 6.64, "learning_rate": 9.514964954574745e-06, "loss": 0.2151, "step": 11082500 }, { "epoch": 6.64, "learning_rate": 9.51286498901418e-06, "loss": 0.2088, "step": 11083000 }, { "epoch": 6.65, "learning_rate": 9.510765023453615e-06, "loss": 0.2181, "step": 11083500 }, { "epoch": 6.65, "learning_rate": 9.50866505789305e-06, "loss": 0.2126, "step": 11084000 }, { "epoch": 6.65, "learning_rate": 9.506565092332486e-06, "loss": 0.2111, "step": 11084500 }, { "epoch": 6.65, "learning_rate": 9.50446512677192e-06, "loss": 0.2188, "step": 11085000 }, { "epoch": 6.65, "learning_rate": 9.502369361142476e-06, "loss": 0.2175, "step": 11085500 }, { "epoch": 6.65, "learning_rate": 9.500269395581913e-06, "loss": 0.2175, "step": 11086000 }, { "epoch": 6.65, "learning_rate": 9.498169430021348e-06, "loss": 0.2173, "step": 11086500 }, { "epoch": 6.65, "learning_rate": 9.496069464460783e-06, "loss": 0.2105, "step": 11087000 }, { "epoch": 6.65, "learning_rate": 9.493969498900218e-06, "loss": 0.2157, "step": 11087500 }, { "epoch": 6.65, "learning_rate": 9.491869533339652e-06, "loss": 0.2122, "step": 11088000 }, { "epoch": 6.65, "learning_rate": 9.489769567779089e-06, "loss": 0.2162, "step": 11088500 }, { "epoch": 6.65, "learning_rate": 9.487669602218524e-06, "loss": 0.2137, "step": 11089000 }, { "epoch": 6.65, "learning_rate": 9.485578036520201e-06, "loss": 0.2141, "step": 11089500 }, { "epoch": 6.65, "learning_rate": 9.483478070959636e-06, "loss": 0.2122, "step": 11090000 }, { "epoch": 6.65, "learning_rate": 9.481378105399071e-06, "loss": 0.2149, "step": 11090500 }, { "epoch": 6.65, "learning_rate": 9.479278139838506e-06, "loss": 0.2145, "step": 11091000 }, { "epoch": 6.65, "learning_rate": 9.477178174277941e-06, "loss": 0.2111, "step": 11091500 }, { "epoch": 6.65, "learning_rate": 9.475078208717377e-06, "loss": 0.2151, "step": 11092000 }, { "epoch": 6.65, "learning_rate": 9.472978243156813e-06, "loss": 0.219, "step": 11092500 }, { "epoch": 6.65, "learning_rate": 9.470878277596247e-06, "loss": 0.2137, "step": 11093000 }, { "epoch": 6.65, "learning_rate": 9.468782511966804e-06, "loss": 0.217, "step": 11093500 }, { "epoch": 6.65, "learning_rate": 9.466682546406239e-06, "loss": 0.2177, "step": 11094000 }, { "epoch": 6.65, "learning_rate": 9.464582580845674e-06, "loss": 0.2157, "step": 11094500 }, { "epoch": 6.65, "learning_rate": 9.46248261528511e-06, "loss": 0.2176, "step": 11095000 }, { "epoch": 6.65, "learning_rate": 9.460386849655666e-06, "loss": 0.216, "step": 11095500 }, { "epoch": 6.65, "learning_rate": 9.458286884095101e-06, "loss": 0.2164, "step": 11096000 }, { "epoch": 6.65, "learning_rate": 9.456186918534535e-06, "loss": 0.2238, "step": 11096500 }, { "epoch": 6.65, "learning_rate": 9.454086952973972e-06, "loss": 0.2083, "step": 11097000 }, { "epoch": 6.65, "learning_rate": 9.451986987413405e-06, "loss": 0.2155, "step": 11097500 }, { "epoch": 6.65, "learning_rate": 9.449891221783962e-06, "loss": 0.2201, "step": 11098000 }, { "epoch": 6.65, "learning_rate": 9.447791256223397e-06, "loss": 0.2173, "step": 11098500 }, { "epoch": 6.65, "learning_rate": 9.445691290662833e-06, "loss": 0.2124, "step": 11099000 }, { "epoch": 6.65, "learning_rate": 9.44359132510227e-06, "loss": 0.2118, "step": 11099500 }, { "epoch": 6.65, "learning_rate": 9.441495559472825e-06, "loss": 0.2189, "step": 11100000 }, { "epoch": 6.65, "eval_loss": 0.20419564843177795, "eval_runtime": 1456.2936, "eval_samples_per_second": 361.685, "eval_steps_per_second": 60.281, "step": 11100000 }, { "epoch": 6.66, "learning_rate": 9.43939559391226e-06, "loss": 0.2153, "step": 11100500 }, { "epoch": 6.66, "learning_rate": 9.437295628351695e-06, "loss": 0.2103, "step": 11101000 }, { "epoch": 6.66, "learning_rate": 9.43519566279113e-06, "loss": 0.2125, "step": 11101500 }, { "epoch": 6.66, "learning_rate": 9.433095697230565e-06, "loss": 0.2097, "step": 11102000 }, { "epoch": 6.66, "learning_rate": 9.430999931601122e-06, "loss": 0.213, "step": 11102500 }, { "epoch": 6.66, "learning_rate": 9.428899966040557e-06, "loss": 0.2124, "step": 11103000 }, { "epoch": 6.66, "learning_rate": 9.42680000047999e-06, "loss": 0.2107, "step": 11103500 }, { "epoch": 6.66, "learning_rate": 9.424700034919428e-06, "loss": 0.2143, "step": 11104000 }, { "epoch": 6.66, "learning_rate": 9.422600069358863e-06, "loss": 0.2148, "step": 11104500 }, { "epoch": 6.66, "learning_rate": 9.420504303729418e-06, "loss": 0.2102, "step": 11105000 }, { "epoch": 6.66, "learning_rate": 9.418404338168855e-06, "loss": 0.2138, "step": 11105500 }, { "epoch": 6.66, "learning_rate": 9.416304372608288e-06, "loss": 0.2101, "step": 11106000 }, { "epoch": 6.66, "learning_rate": 9.414204407047725e-06, "loss": 0.2163, "step": 11106500 }, { "epoch": 6.66, "learning_rate": 9.412104441487159e-06, "loss": 0.2153, "step": 11107000 }, { "epoch": 6.66, "learning_rate": 9.410004475926594e-06, "loss": 0.2151, "step": 11107500 }, { "epoch": 6.66, "learning_rate": 9.40790451036603e-06, "loss": 0.2096, "step": 11108000 }, { "epoch": 6.66, "learning_rate": 9.405804544805464e-06, "loss": 0.2165, "step": 11108500 }, { "epoch": 6.66, "learning_rate": 9.403704579244901e-06, "loss": 0.2097, "step": 11109000 }, { "epoch": 6.66, "learning_rate": 9.401613013546578e-06, "loss": 0.2167, "step": 11109500 }, { "epoch": 6.66, "learning_rate": 9.399513047986013e-06, "loss": 0.2178, "step": 11110000 }, { "epoch": 6.66, "learning_rate": 9.397413082425447e-06, "loss": 0.2128, "step": 11110500 }, { "epoch": 6.66, "learning_rate": 9.395313116864884e-06, "loss": 0.2132, "step": 11111000 }, { "epoch": 6.66, "learning_rate": 9.393213151304319e-06, "loss": 0.2103, "step": 11111500 }, { "epoch": 6.66, "learning_rate": 9.391117385674874e-06, "loss": 0.2131, "step": 11112000 }, { "epoch": 6.66, "learning_rate": 9.38901742011431e-06, "loss": 0.2116, "step": 11112500 }, { "epoch": 6.66, "learning_rate": 9.386917454553744e-06, "loss": 0.2149, "step": 11113000 }, { "epoch": 6.66, "learning_rate": 9.384817488993181e-06, "loss": 0.216, "step": 11113500 }, { "epoch": 6.66, "learning_rate": 9.382717523432616e-06, "loss": 0.2105, "step": 11114000 }, { "epoch": 6.66, "learning_rate": 9.38061755787205e-06, "loss": 0.2102, "step": 11114500 }, { "epoch": 6.66, "learning_rate": 9.378517592311487e-06, "loss": 0.2147, "step": 11115000 }, { "epoch": 6.66, "learning_rate": 9.376421826682042e-06, "loss": 0.2081, "step": 11115500 }, { "epoch": 6.66, "learning_rate": 9.374321861121477e-06, "loss": 0.2121, "step": 11116000 }, { "epoch": 6.66, "learning_rate": 9.372221895560914e-06, "loss": 0.2135, "step": 11116500 }, { "epoch": 6.67, "learning_rate": 9.370121930000347e-06, "loss": 0.2149, "step": 11117000 }, { "epoch": 6.67, "learning_rate": 9.368021964439784e-06, "loss": 0.212, "step": 11117500 }, { "epoch": 6.67, "learning_rate": 9.365921998879218e-06, "loss": 0.2142, "step": 11118000 }, { "epoch": 6.67, "learning_rate": 9.363822033318653e-06, "loss": 0.2162, "step": 11118500 }, { "epoch": 6.67, "learning_rate": 9.36172206775809e-06, "loss": 0.2177, "step": 11119000 }, { "epoch": 6.67, "learning_rate": 9.359626302128645e-06, "loss": 0.214, "step": 11119500 }, { "epoch": 6.67, "learning_rate": 9.35752633656808e-06, "loss": 0.2124, "step": 11120000 }, { "epoch": 6.67, "learning_rate": 9.355430570938637e-06, "loss": 0.2117, "step": 11120500 }, { "epoch": 6.67, "learning_rate": 9.353330605378072e-06, "loss": 0.2088, "step": 11121000 }, { "epoch": 6.67, "learning_rate": 9.351230639817506e-06, "loss": 0.212, "step": 11121500 }, { "epoch": 6.67, "learning_rate": 9.349130674256942e-06, "loss": 0.2153, "step": 11122000 }, { "epoch": 6.67, "learning_rate": 9.347030708696378e-06, "loss": 0.2186, "step": 11122500 }, { "epoch": 6.67, "learning_rate": 9.344930743135813e-06, "loss": 0.2136, "step": 11123000 }, { "epoch": 6.67, "learning_rate": 9.342830777575248e-06, "loss": 0.2152, "step": 11123500 }, { "epoch": 6.67, "learning_rate": 9.340735011945803e-06, "loss": 0.2139, "step": 11124000 }, { "epoch": 6.67, "learning_rate": 9.33863504638524e-06, "loss": 0.208, "step": 11124500 }, { "epoch": 6.67, "learning_rate": 9.336539280755795e-06, "loss": 0.2197, "step": 11125000 }, { "epoch": 6.67, "learning_rate": 9.33443931519523e-06, "loss": 0.2148, "step": 11125500 }, { "epoch": 6.67, "learning_rate": 9.332339349634667e-06, "loss": 0.2111, "step": 11126000 }, { "epoch": 6.67, "learning_rate": 9.3302393840741e-06, "loss": 0.2157, "step": 11126500 }, { "epoch": 6.67, "learning_rate": 9.328139418513536e-06, "loss": 0.2115, "step": 11127000 }, { "epoch": 6.67, "learning_rate": 9.326039452952971e-06, "loss": 0.2096, "step": 11127500 }, { "epoch": 6.67, "learning_rate": 9.323939487392406e-06, "loss": 0.2104, "step": 11128000 }, { "epoch": 6.67, "learning_rate": 9.321839521831843e-06, "loss": 0.2131, "step": 11128500 }, { "epoch": 6.67, "learning_rate": 9.319739556271277e-06, "loss": 0.2183, "step": 11129000 }, { "epoch": 6.67, "learning_rate": 9.317639590710712e-06, "loss": 0.2134, "step": 11129500 }, { "epoch": 6.67, "learning_rate": 9.315539625150149e-06, "loss": 0.2145, "step": 11130000 }, { "epoch": 6.67, "learning_rate": 9.313439659589582e-06, "loss": 0.2121, "step": 11130500 }, { "epoch": 6.67, "learning_rate": 9.311343893960139e-06, "loss": 0.2129, "step": 11131000 }, { "epoch": 6.67, "learning_rate": 9.309243928399574e-06, "loss": 0.2133, "step": 11131500 }, { "epoch": 6.67, "learning_rate": 9.30714396283901e-06, "loss": 0.2127, "step": 11132000 }, { "epoch": 6.67, "learning_rate": 9.305043997278444e-06, "loss": 0.2097, "step": 11132500 }, { "epoch": 6.67, "learning_rate": 9.30294403171788e-06, "loss": 0.2119, "step": 11133000 }, { "epoch": 6.67, "learning_rate": 9.300848266088437e-06, "loss": 0.2148, "step": 11133500 }, { "epoch": 6.68, "learning_rate": 9.298748300527872e-06, "loss": 0.2132, "step": 11134000 }, { "epoch": 6.68, "learning_rate": 9.296648334967307e-06, "loss": 0.2174, "step": 11134500 }, { "epoch": 6.68, "learning_rate": 9.29454836940674e-06, "loss": 0.2142, "step": 11135000 }, { "epoch": 6.68, "learning_rate": 9.292448403846177e-06, "loss": 0.2121, "step": 11135500 }, { "epoch": 6.68, "learning_rate": 9.290348438285612e-06, "loss": 0.2195, "step": 11136000 }, { "epoch": 6.68, "learning_rate": 9.288252672656168e-06, "loss": 0.2147, "step": 11136500 }, { "epoch": 6.68, "learning_rate": 9.286152707095604e-06, "loss": 0.2222, "step": 11137000 }, { "epoch": 6.68, "learning_rate": 9.284052741535038e-06, "loss": 0.215, "step": 11137500 }, { "epoch": 6.68, "learning_rate": 9.281952775974475e-06, "loss": 0.2194, "step": 11138000 }, { "epoch": 6.68, "learning_rate": 9.27985281041391e-06, "loss": 0.2124, "step": 11138500 }, { "epoch": 6.68, "learning_rate": 9.277752844853343e-06, "loss": 0.2144, "step": 11139000 }, { "epoch": 6.68, "learning_rate": 9.27565287929278e-06, "loss": 0.215, "step": 11139500 }, { "epoch": 6.68, "learning_rate": 9.273557113663335e-06, "loss": 0.2044, "step": 11140000 }, { "epoch": 6.68, "learning_rate": 9.27145714810277e-06, "loss": 0.2171, "step": 11140500 }, { "epoch": 6.68, "learning_rate": 9.269357182542207e-06, "loss": 0.2139, "step": 11141000 }, { "epoch": 6.68, "learning_rate": 9.267257216981641e-06, "loss": 0.2118, "step": 11141500 }, { "epoch": 6.68, "learning_rate": 9.265157251421076e-06, "loss": 0.2102, "step": 11142000 }, { "epoch": 6.68, "learning_rate": 9.263061485791633e-06, "loss": 0.2173, "step": 11142500 }, { "epoch": 6.68, "learning_rate": 9.260961520231068e-06, "loss": 0.2104, "step": 11143000 }, { "epoch": 6.68, "learning_rate": 9.258861554670503e-06, "loss": 0.2149, "step": 11143500 }, { "epoch": 6.68, "learning_rate": 9.256761589109938e-06, "loss": 0.2122, "step": 11144000 }, { "epoch": 6.68, "learning_rate": 9.254661623549374e-06, "loss": 0.2168, "step": 11144500 }, { "epoch": 6.68, "learning_rate": 9.252561657988809e-06, "loss": 0.2165, "step": 11145000 }, { "epoch": 6.68, "learning_rate": 9.250465892359366e-06, "loss": 0.2136, "step": 11145500 }, { "epoch": 6.68, "learning_rate": 9.2483659267988e-06, "loss": 0.2158, "step": 11146000 }, { "epoch": 6.68, "learning_rate": 9.246265961238236e-06, "loss": 0.2118, "step": 11146500 }, { "epoch": 6.68, "learning_rate": 9.244165995677671e-06, "loss": 0.2131, "step": 11147000 }, { "epoch": 6.68, "learning_rate": 9.242066030117105e-06, "loss": 0.2131, "step": 11147500 }, { "epoch": 6.68, "learning_rate": 9.239970264487663e-06, "loss": 0.2185, "step": 11148000 }, { "epoch": 6.68, "learning_rate": 9.237870298927097e-06, "loss": 0.2194, "step": 11148500 }, { "epoch": 6.68, "learning_rate": 9.235770333366532e-06, "loss": 0.2135, "step": 11149000 }, { "epoch": 6.68, "learning_rate": 9.233670367805969e-06, "loss": 0.2146, "step": 11149500 }, { "epoch": 6.68, "learning_rate": 9.231570402245402e-06, "loss": 0.2166, "step": 11150000 }, { "epoch": 6.69, "learning_rate": 9.229470436684839e-06, "loss": 0.2189, "step": 11150500 }, { "epoch": 6.69, "learning_rate": 9.227370471124274e-06, "loss": 0.2108, "step": 11151000 }, { "epoch": 6.69, "learning_rate": 9.225270505563708e-06, "loss": 0.2112, "step": 11151500 }, { "epoch": 6.69, "learning_rate": 9.223174739934266e-06, "loss": 0.216, "step": 11152000 }, { "epoch": 6.69, "learning_rate": 9.221078974304822e-06, "loss": 0.2126, "step": 11152500 }, { "epoch": 6.69, "learning_rate": 9.218979008744257e-06, "loss": 0.2185, "step": 11153000 }, { "epoch": 6.69, "learning_rate": 9.216879043183692e-06, "loss": 0.2134, "step": 11153500 }, { "epoch": 6.69, "learning_rate": 9.214779077623127e-06, "loss": 0.2099, "step": 11154000 }, { "epoch": 6.69, "learning_rate": 9.212683311993682e-06, "loss": 0.2116, "step": 11154500 }, { "epoch": 6.69, "learning_rate": 9.21058334643312e-06, "loss": 0.2128, "step": 11155000 }, { "epoch": 6.69, "learning_rate": 9.208483380872553e-06, "loss": 0.2145, "step": 11155500 }, { "epoch": 6.69, "learning_rate": 9.20638341531199e-06, "loss": 0.2155, "step": 11156000 }, { "epoch": 6.69, "learning_rate": 9.204283449751425e-06, "loss": 0.2147, "step": 11156500 }, { "epoch": 6.69, "learning_rate": 9.202183484190858e-06, "loss": 0.2165, "step": 11157000 }, { "epoch": 6.69, "learning_rate": 9.200083518630295e-06, "loss": 0.2155, "step": 11157500 }, { "epoch": 6.69, "learning_rate": 9.19798355306973e-06, "loss": 0.2119, "step": 11158000 }, { "epoch": 6.69, "learning_rate": 9.195887787440285e-06, "loss": 0.2192, "step": 11158500 }, { "epoch": 6.69, "learning_rate": 9.193787821879722e-06, "loss": 0.2137, "step": 11159000 }, { "epoch": 6.69, "learning_rate": 9.191692056250277e-06, "loss": 0.2122, "step": 11159500 }, { "epoch": 6.69, "learning_rate": 9.189592090689713e-06, "loss": 0.211, "step": 11160000 }, { "epoch": 6.69, "learning_rate": 9.187492125129148e-06, "loss": 0.2123, "step": 11160500 }, { "epoch": 6.69, "learning_rate": 9.185392159568583e-06, "loss": 0.2145, "step": 11161000 }, { "epoch": 6.69, "learning_rate": 9.183292194008018e-06, "loss": 0.2182, "step": 11161500 }, { "epoch": 6.69, "learning_rate": 9.181192228447453e-06, "loss": 0.2171, "step": 11162000 }, { "epoch": 6.69, "learning_rate": 9.17909646281801e-06, "loss": 0.2123, "step": 11162500 }, { "epoch": 6.69, "learning_rate": 9.176996497257445e-06, "loss": 0.22, "step": 11163000 }, { "epoch": 6.69, "learning_rate": 9.17489653169688e-06, "loss": 0.2135, "step": 11163500 }, { "epoch": 6.69, "learning_rate": 9.172796566136314e-06, "loss": 0.214, "step": 11164000 }, { "epoch": 6.69, "learning_rate": 9.17069660057575e-06, "loss": 0.2175, "step": 11164500 }, { "epoch": 6.69, "learning_rate": 9.168596635015186e-06, "loss": 0.2136, "step": 11165000 }, { "epoch": 6.69, "learning_rate": 9.16649666945462e-06, "loss": 0.2119, "step": 11165500 }, { "epoch": 6.69, "learning_rate": 9.164396703894056e-06, "loss": 0.2149, "step": 11166000 }, { "epoch": 6.69, "learning_rate": 9.162300938264612e-06, "loss": 0.2154, "step": 11166500 }, { "epoch": 6.7, "learning_rate": 9.160205172635169e-06, "loss": 0.2148, "step": 11167000 }, { "epoch": 6.7, "learning_rate": 9.158105207074604e-06, "loss": 0.2191, "step": 11167500 }, { "epoch": 6.7, "learning_rate": 9.156005241514039e-06, "loss": 0.2134, "step": 11168000 }, { "epoch": 6.7, "learning_rate": 9.153905275953474e-06, "loss": 0.211, "step": 11168500 }, { "epoch": 6.7, "learning_rate": 9.151809510324031e-06, "loss": 0.2142, "step": 11169000 }, { "epoch": 6.7, "learning_rate": 9.149709544763466e-06, "loss": 0.212, "step": 11169500 }, { "epoch": 6.7, "learning_rate": 9.147609579202901e-06, "loss": 0.2158, "step": 11170000 }, { "epoch": 6.7, "learning_rate": 9.145509613642336e-06, "loss": 0.2141, "step": 11170500 }, { "epoch": 6.7, "learning_rate": 9.143409648081772e-06, "loss": 0.2093, "step": 11171000 }, { "epoch": 6.7, "learning_rate": 9.141309682521207e-06, "loss": 0.217, "step": 11171500 }, { "epoch": 6.7, "learning_rate": 9.139209716960642e-06, "loss": 0.2162, "step": 11172000 }, { "epoch": 6.7, "learning_rate": 9.137109751400077e-06, "loss": 0.2144, "step": 11172500 }, { "epoch": 6.7, "learning_rate": 9.135013985770634e-06, "loss": 0.211, "step": 11173000 }, { "epoch": 6.7, "learning_rate": 9.132914020210067e-06, "loss": 0.2136, "step": 11173500 }, { "epoch": 6.7, "learning_rate": 9.130814054649504e-06, "loss": 0.2171, "step": 11174000 }, { "epoch": 6.7, "learning_rate": 9.12871408908894e-06, "loss": 0.2098, "step": 11174500 }, { "epoch": 6.7, "learning_rate": 9.126618323459495e-06, "loss": 0.2126, "step": 11175000 }, { "epoch": 6.7, "learning_rate": 9.124518357898932e-06, "loss": 0.2144, "step": 11175500 }, { "epoch": 6.7, "learning_rate": 9.122418392338365e-06, "loss": 0.2203, "step": 11176000 }, { "epoch": 6.7, "learning_rate": 9.1203184267778e-06, "loss": 0.2217, "step": 11176500 }, { "epoch": 6.7, "learning_rate": 9.118222661148357e-06, "loss": 0.2109, "step": 11177000 }, { "epoch": 6.7, "learning_rate": 9.116122695587792e-06, "loss": 0.209, "step": 11177500 }, { "epoch": 6.7, "learning_rate": 9.114026929958348e-06, "loss": 0.2132, "step": 11178000 }, { "epoch": 6.7, "learning_rate": 9.111926964397784e-06, "loss": 0.2097, "step": 11178500 }, { "epoch": 6.7, "learning_rate": 9.10982699883722e-06, "loss": 0.2151, "step": 11179000 }, { "epoch": 6.7, "learning_rate": 9.107727033276653e-06, "loss": 0.216, "step": 11179500 }, { "epoch": 6.7, "learning_rate": 9.105631267647212e-06, "loss": 0.2139, "step": 11180000 }, { "epoch": 6.7, "learning_rate": 9.103531302086645e-06, "loss": 0.2161, "step": 11180500 }, { "epoch": 6.7, "learning_rate": 9.101435536457202e-06, "loss": 0.213, "step": 11181000 }, { "epoch": 6.7, "learning_rate": 9.099335570896637e-06, "loss": 0.2115, "step": 11181500 }, { "epoch": 6.7, "learning_rate": 9.097235605336072e-06, "loss": 0.2071, "step": 11182000 }, { "epoch": 6.7, "learning_rate": 9.095135639775508e-06, "loss": 0.217, "step": 11182500 }, { "epoch": 6.7, "learning_rate": 9.093039874146065e-06, "loss": 0.2216, "step": 11183000 }, { "epoch": 6.7, "learning_rate": 9.0909399085855e-06, "loss": 0.2141, "step": 11183500 }, { "epoch": 6.71, "learning_rate": 9.088839943024933e-06, "loss": 0.2103, "step": 11184000 }, { "epoch": 6.71, "learning_rate": 9.08673997746437e-06, "loss": 0.212, "step": 11184500 }, { "epoch": 6.71, "learning_rate": 9.084640011903805e-06, "loss": 0.2144, "step": 11185000 }, { "epoch": 6.71, "learning_rate": 9.08254004634324e-06, "loss": 0.2147, "step": 11185500 }, { "epoch": 6.71, "learning_rate": 9.080440080782675e-06, "loss": 0.2155, "step": 11186000 }, { "epoch": 6.71, "learning_rate": 9.078340115222109e-06, "loss": 0.2165, "step": 11186500 }, { "epoch": 6.71, "learning_rate": 9.076240149661546e-06, "loss": 0.2185, "step": 11187000 }, { "epoch": 6.71, "learning_rate": 9.074140184100981e-06, "loss": 0.2142, "step": 11187500 }, { "epoch": 6.71, "learning_rate": 9.072040218540416e-06, "loss": 0.2141, "step": 11188000 }, { "epoch": 6.71, "learning_rate": 9.069940252979851e-06, "loss": 0.2147, "step": 11188500 }, { "epoch": 6.71, "learning_rate": 9.067844487350406e-06, "loss": 0.2089, "step": 11189000 }, { "epoch": 6.71, "learning_rate": 9.065744521789843e-06, "loss": 0.2139, "step": 11189500 }, { "epoch": 6.71, "learning_rate": 9.063644556229278e-06, "loss": 0.2166, "step": 11190000 }, { "epoch": 6.71, "learning_rate": 9.061544590668712e-06, "loss": 0.2117, "step": 11190500 }, { "epoch": 6.71, "learning_rate": 9.05944882503927e-06, "loss": 0.2143, "step": 11191000 }, { "epoch": 6.71, "learning_rate": 9.057348859478704e-06, "loss": 0.2119, "step": 11191500 }, { "epoch": 6.71, "learning_rate": 9.05524889391814e-06, "loss": 0.2173, "step": 11192000 }, { "epoch": 6.71, "learning_rate": 9.053148928357576e-06, "loss": 0.2191, "step": 11192500 }, { "epoch": 6.71, "learning_rate": 9.05104896279701e-06, "loss": 0.2148, "step": 11193000 }, { "epoch": 6.71, "learning_rate": 9.048953197167566e-06, "loss": 0.2113, "step": 11193500 }, { "epoch": 6.71, "learning_rate": 9.046853231607002e-06, "loss": 0.2112, "step": 11194000 }, { "epoch": 6.71, "learning_rate": 9.044753266046437e-06, "loss": 0.2135, "step": 11194500 }, { "epoch": 6.71, "learning_rate": 9.042653300485872e-06, "loss": 0.2161, "step": 11195000 }, { "epoch": 6.71, "learning_rate": 9.040553334925307e-06, "loss": 0.2159, "step": 11195500 }, { "epoch": 6.71, "learning_rate": 9.038453369364742e-06, "loss": 0.2157, "step": 11196000 }, { "epoch": 6.71, "learning_rate": 9.036353403804177e-06, "loss": 0.2132, "step": 11196500 }, { "epoch": 6.71, "learning_rate": 9.034257638174734e-06, "loss": 0.2123, "step": 11197000 }, { "epoch": 6.71, "learning_rate": 9.032157672614168e-06, "loss": 0.2102, "step": 11197500 }, { "epoch": 6.71, "learning_rate": 9.030057707053605e-06, "loss": 0.2146, "step": 11198000 }, { "epoch": 6.71, "learning_rate": 9.02795774149304e-06, "loss": 0.2128, "step": 11198500 }, { "epoch": 6.71, "learning_rate": 9.025861975863595e-06, "loss": 0.2116, "step": 11199000 }, { "epoch": 6.71, "learning_rate": 9.023762010303032e-06, "loss": 0.2103, "step": 11199500 }, { "epoch": 6.71, "learning_rate": 9.021662044742465e-06, "loss": 0.214, "step": 11200000 }, { "epoch": 6.71, "eval_loss": 0.20409980416297913, "eval_runtime": 1455.7761, "eval_samples_per_second": 361.814, "eval_steps_per_second": 60.303, "step": 11200000 }, { "epoch": 6.72, "learning_rate": 9.019562079181902e-06, "loss": 0.2165, "step": 11200500 }, { "epoch": 6.72, "learning_rate": 9.017462113621337e-06, "loss": 0.2161, "step": 11201000 }, { "epoch": 6.72, "learning_rate": 9.015366347991893e-06, "loss": 0.2179, "step": 11201500 }, { "epoch": 6.72, "learning_rate": 9.01326638243133e-06, "loss": 0.2148, "step": 11202000 }, { "epoch": 6.72, "learning_rate": 9.011166416870763e-06, "loss": 0.2117, "step": 11202500 }, { "epoch": 6.72, "learning_rate": 9.009066451310198e-06, "loss": 0.2099, "step": 11203000 }, { "epoch": 6.72, "learning_rate": 9.006970685680755e-06, "loss": 0.2172, "step": 11203500 }, { "epoch": 6.72, "learning_rate": 9.00487072012019e-06, "loss": 0.2123, "step": 11204000 }, { "epoch": 6.72, "learning_rate": 9.002770754559624e-06, "loss": 0.213, "step": 11204500 }, { "epoch": 6.72, "learning_rate": 9.00067078899906e-06, "loss": 0.2117, "step": 11205000 }, { "epoch": 6.72, "learning_rate": 8.998570823438496e-06, "loss": 0.2125, "step": 11205500 }, { "epoch": 6.72, "learning_rate": 8.996475057809051e-06, "loss": 0.2167, "step": 11206000 }, { "epoch": 6.72, "learning_rate": 8.994375092248488e-06, "loss": 0.2118, "step": 11206500 }, { "epoch": 6.72, "learning_rate": 8.992275126687921e-06, "loss": 0.2125, "step": 11207000 }, { "epoch": 6.72, "learning_rate": 8.990175161127358e-06, "loss": 0.2107, "step": 11207500 }, { "epoch": 6.72, "learning_rate": 8.988075195566793e-06, "loss": 0.2133, "step": 11208000 }, { "epoch": 6.72, "learning_rate": 8.985975230006227e-06, "loss": 0.21, "step": 11208500 }, { "epoch": 6.72, "learning_rate": 8.983875264445664e-06, "loss": 0.2131, "step": 11209000 }, { "epoch": 6.72, "learning_rate": 8.981779498816219e-06, "loss": 0.2217, "step": 11209500 }, { "epoch": 6.72, "learning_rate": 8.979679533255654e-06, "loss": 0.2165, "step": 11210000 }, { "epoch": 6.72, "learning_rate": 8.97757956769509e-06, "loss": 0.2141, "step": 11210500 }, { "epoch": 6.72, "learning_rate": 8.975479602134524e-06, "loss": 0.2143, "step": 11211000 }, { "epoch": 6.72, "learning_rate": 8.973383836505081e-06, "loss": 0.217, "step": 11211500 }, { "epoch": 6.72, "learning_rate": 8.971283870944516e-06, "loss": 0.2127, "step": 11212000 }, { "epoch": 6.72, "learning_rate": 8.969183905383952e-06, "loss": 0.2121, "step": 11212500 }, { "epoch": 6.72, "learning_rate": 8.967083939823387e-06, "loss": 0.2194, "step": 11213000 }, { "epoch": 6.72, "learning_rate": 8.964983974262822e-06, "loss": 0.2169, "step": 11213500 }, { "epoch": 6.72, "learning_rate": 8.962884008702257e-06, "loss": 0.2138, "step": 11214000 }, { "epoch": 6.72, "learning_rate": 8.960788243072814e-06, "loss": 0.2167, "step": 11214500 }, { "epoch": 6.72, "learning_rate": 8.958688277512249e-06, "loss": 0.2071, "step": 11215000 }, { "epoch": 6.72, "learning_rate": 8.956588311951683e-06, "loss": 0.2119, "step": 11215500 }, { "epoch": 6.72, "learning_rate": 8.95448834639112e-06, "loss": 0.2127, "step": 11216000 }, { "epoch": 6.72, "learning_rate": 8.952388380830555e-06, "loss": 0.2092, "step": 11216500 }, { "epoch": 6.73, "learning_rate": 8.95028841526999e-06, "loss": 0.2141, "step": 11217000 }, { "epoch": 6.73, "learning_rate": 8.948188449709425e-06, "loss": 0.2137, "step": 11217500 }, { "epoch": 6.73, "learning_rate": 8.94608848414886e-06, "loss": 0.2115, "step": 11218000 }, { "epoch": 6.73, "learning_rate": 8.943988518588295e-06, "loss": 0.2111, "step": 11218500 }, { "epoch": 6.73, "learning_rate": 8.941896952889972e-06, "loss": 0.2118, "step": 11219000 }, { "epoch": 6.73, "learning_rate": 8.939796987329407e-06, "loss": 0.2144, "step": 11219500 }, { "epoch": 6.73, "learning_rate": 8.937697021768844e-06, "loss": 0.217, "step": 11220000 }, { "epoch": 6.73, "learning_rate": 8.935597056208278e-06, "loss": 0.2124, "step": 11220500 }, { "epoch": 6.73, "learning_rate": 8.933497090647713e-06, "loss": 0.2164, "step": 11221000 }, { "epoch": 6.73, "learning_rate": 8.93140132501827e-06, "loss": 0.2078, "step": 11221500 }, { "epoch": 6.73, "learning_rate": 8.929301359457705e-06, "loss": 0.2166, "step": 11222000 }, { "epoch": 6.73, "learning_rate": 8.92720139389714e-06, "loss": 0.2141, "step": 11222500 }, { "epoch": 6.73, "learning_rate": 8.925101428336575e-06, "loss": 0.2111, "step": 11223000 }, { "epoch": 6.73, "learning_rate": 8.92300146277601e-06, "loss": 0.2102, "step": 11223500 }, { "epoch": 6.73, "learning_rate": 8.920905697146566e-06, "loss": 0.2144, "step": 11224000 }, { "epoch": 6.73, "learning_rate": 8.918805731586003e-06, "loss": 0.2085, "step": 11224500 }, { "epoch": 6.73, "learning_rate": 8.916705766025436e-06, "loss": 0.21, "step": 11225000 }, { "epoch": 6.73, "learning_rate": 8.914605800464873e-06, "loss": 0.2135, "step": 11225500 }, { "epoch": 6.73, "learning_rate": 8.912505834904308e-06, "loss": 0.2096, "step": 11226000 }, { "epoch": 6.73, "learning_rate": 8.910410069274863e-06, "loss": 0.2125, "step": 11226500 }, { "epoch": 6.73, "learning_rate": 8.9083101037143e-06, "loss": 0.216, "step": 11227000 }, { "epoch": 6.73, "learning_rate": 8.906210138153734e-06, "loss": 0.2151, "step": 11227500 }, { "epoch": 6.73, "learning_rate": 8.904110172593169e-06, "loss": 0.211, "step": 11228000 }, { "epoch": 6.73, "learning_rate": 8.902014406963726e-06, "loss": 0.2137, "step": 11228500 }, { "epoch": 6.73, "learning_rate": 8.899914441403161e-06, "loss": 0.2131, "step": 11229000 }, { "epoch": 6.73, "learning_rate": 8.897814475842596e-06, "loss": 0.2117, "step": 11229500 }, { "epoch": 6.73, "learning_rate": 8.895714510282031e-06, "loss": 0.214, "step": 11230000 }, { "epoch": 6.73, "learning_rate": 8.893614544721466e-06, "loss": 0.2146, "step": 11230500 }, { "epoch": 6.73, "learning_rate": 8.891518779092022e-06, "loss": 0.2162, "step": 11231000 }, { "epoch": 6.73, "learning_rate": 8.889418813531458e-06, "loss": 0.2107, "step": 11231500 }, { "epoch": 6.73, "learning_rate": 8.887318847970894e-06, "loss": 0.2126, "step": 11232000 }, { "epoch": 6.73, "learning_rate": 8.885218882410329e-06, "loss": 0.2068, "step": 11232500 }, { "epoch": 6.73, "learning_rate": 8.883118916849764e-06, "loss": 0.2142, "step": 11233000 }, { "epoch": 6.73, "learning_rate": 8.881018951289197e-06, "loss": 0.214, "step": 11233500 }, { "epoch": 6.74, "learning_rate": 8.878918985728634e-06, "loss": 0.2127, "step": 11234000 }, { "epoch": 6.74, "learning_rate": 8.87681902016807e-06, "loss": 0.2146, "step": 11234500 }, { "epoch": 6.74, "learning_rate": 8.874723254538625e-06, "loss": 0.2114, "step": 11235000 }, { "epoch": 6.74, "learning_rate": 8.872627488909182e-06, "loss": 0.2175, "step": 11235500 }, { "epoch": 6.74, "learning_rate": 8.870527523348617e-06, "loss": 0.2153, "step": 11236000 }, { "epoch": 6.74, "learning_rate": 8.868427557788052e-06, "loss": 0.2133, "step": 11236500 }, { "epoch": 6.74, "learning_rate": 8.866327592227487e-06, "loss": 0.2117, "step": 11237000 }, { "epoch": 6.74, "learning_rate": 8.864231826598044e-06, "loss": 0.2104, "step": 11237500 }, { "epoch": 6.74, "learning_rate": 8.862131861037477e-06, "loss": 0.2142, "step": 11238000 }, { "epoch": 6.74, "learning_rate": 8.860031895476914e-06, "loss": 0.2158, "step": 11238500 }, { "epoch": 6.74, "learning_rate": 8.85793192991635e-06, "loss": 0.214, "step": 11239000 }, { "epoch": 6.74, "learning_rate": 8.855831964355785e-06, "loss": 0.2091, "step": 11239500 }, { "epoch": 6.74, "learning_rate": 8.853736198726342e-06, "loss": 0.2129, "step": 11240000 }, { "epoch": 6.74, "learning_rate": 8.851636233165775e-06, "loss": 0.2146, "step": 11240500 }, { "epoch": 6.74, "learning_rate": 8.849536267605212e-06, "loss": 0.2135, "step": 11241000 }, { "epoch": 6.74, "learning_rate": 8.847436302044647e-06, "loss": 0.2121, "step": 11241500 }, { "epoch": 6.74, "learning_rate": 8.845340536415202e-06, "loss": 0.217, "step": 11242000 }, { "epoch": 6.74, "learning_rate": 8.84324057085464e-06, "loss": 0.2166, "step": 11242500 }, { "epoch": 6.74, "learning_rate": 8.841140605294073e-06, "loss": 0.2109, "step": 11243000 }, { "epoch": 6.74, "learning_rate": 8.839040639733508e-06, "loss": 0.2146, "step": 11243500 }, { "epoch": 6.74, "learning_rate": 8.836940674172943e-06, "loss": 0.2163, "step": 11244000 }, { "epoch": 6.74, "learning_rate": 8.834840708612378e-06, "loss": 0.2121, "step": 11244500 }, { "epoch": 6.74, "learning_rate": 8.832740743051815e-06, "loss": 0.2126, "step": 11245000 }, { "epoch": 6.74, "learning_rate": 8.830640777491248e-06, "loss": 0.2048, "step": 11245500 }, { "epoch": 6.74, "learning_rate": 8.828545011861805e-06, "loss": 0.2142, "step": 11246000 }, { "epoch": 6.74, "learning_rate": 8.82644504630124e-06, "loss": 0.2067, "step": 11246500 }, { "epoch": 6.74, "learning_rate": 8.824345080740676e-06, "loss": 0.2183, "step": 11247000 }, { "epoch": 6.74, "learning_rate": 8.82224511518011e-06, "loss": 0.2081, "step": 11247500 }, { "epoch": 6.74, "learning_rate": 8.820149349550668e-06, "loss": 0.2114, "step": 11248000 }, { "epoch": 6.74, "learning_rate": 8.818049383990103e-06, "loss": 0.2169, "step": 11248500 }, { "epoch": 6.74, "learning_rate": 8.815949418429536e-06, "loss": 0.2171, "step": 11249000 }, { "epoch": 6.74, "learning_rate": 8.813849452868973e-06, "loss": 0.2098, "step": 11249500 }, { "epoch": 6.74, "learning_rate": 8.811749487308408e-06, "loss": 0.2147, "step": 11250000 }, { "epoch": 6.75, "learning_rate": 8.809653721678964e-06, "loss": 0.208, "step": 11250500 }, { "epoch": 6.75, "learning_rate": 8.8075537561184e-06, "loss": 0.2149, "step": 11251000 }, { "epoch": 6.75, "learning_rate": 8.805453790557834e-06, "loss": 0.2087, "step": 11251500 }, { "epoch": 6.75, "learning_rate": 8.80335382499727e-06, "loss": 0.2067, "step": 11252000 }, { "epoch": 6.75, "learning_rate": 8.801253859436704e-06, "loss": 0.2166, "step": 11252500 }, { "epoch": 6.75, "learning_rate": 8.79915389387614e-06, "loss": 0.2131, "step": 11253000 }, { "epoch": 6.75, "learning_rate": 8.797058128246698e-06, "loss": 0.2105, "step": 11253500 }, { "epoch": 6.75, "learning_rate": 8.794958162686132e-06, "loss": 0.2125, "step": 11254000 }, { "epoch": 6.75, "learning_rate": 8.792858197125567e-06, "loss": 0.2127, "step": 11254500 }, { "epoch": 6.75, "learning_rate": 8.790758231565002e-06, "loss": 0.2149, "step": 11255000 }, { "epoch": 6.75, "learning_rate": 8.788658266004437e-06, "loss": 0.212, "step": 11255500 }, { "epoch": 6.75, "learning_rate": 8.786562500374992e-06, "loss": 0.2117, "step": 11256000 }, { "epoch": 6.75, "learning_rate": 8.784462534814429e-06, "loss": 0.2173, "step": 11256500 }, { "epoch": 6.75, "learning_rate": 8.782362569253864e-06, "loss": 0.2101, "step": 11257000 }, { "epoch": 6.75, "learning_rate": 8.7802626036933e-06, "loss": 0.2121, "step": 11257500 }, { "epoch": 6.75, "learning_rate": 8.778162638132735e-06, "loss": 0.2086, "step": 11258000 }, { "epoch": 6.75, "learning_rate": 8.77606687250329e-06, "loss": 0.2125, "step": 11258500 }, { "epoch": 6.75, "learning_rate": 8.773966906942727e-06, "loss": 0.2124, "step": 11259000 }, { "epoch": 6.75, "learning_rate": 8.771866941382162e-06, "loss": 0.2162, "step": 11259500 }, { "epoch": 6.75, "learning_rate": 8.769766975821595e-06, "loss": 0.2153, "step": 11260000 }, { "epoch": 6.75, "learning_rate": 8.767667010261032e-06, "loss": 0.2157, "step": 11260500 }, { "epoch": 6.75, "learning_rate": 8.765567044700467e-06, "loss": 0.2124, "step": 11261000 }, { "epoch": 6.75, "learning_rate": 8.763471279071023e-06, "loss": 0.2241, "step": 11261500 }, { "epoch": 6.75, "learning_rate": 8.76137131351046e-06, "loss": 0.2138, "step": 11262000 }, { "epoch": 6.75, "learning_rate": 8.759271347949893e-06, "loss": 0.2131, "step": 11262500 }, { "epoch": 6.75, "learning_rate": 8.75717138238933e-06, "loss": 0.2156, "step": 11263000 }, { "epoch": 6.75, "learning_rate": 8.755071416828763e-06, "loss": 0.2124, "step": 11263500 }, { "epoch": 6.75, "learning_rate": 8.75297565119932e-06, "loss": 0.212, "step": 11264000 }, { "epoch": 6.75, "learning_rate": 8.750875685638755e-06, "loss": 0.2143, "step": 11264500 }, { "epoch": 6.75, "learning_rate": 8.74877572007819e-06, "loss": 0.2096, "step": 11265000 }, { "epoch": 6.75, "learning_rate": 8.746675754517626e-06, "loss": 0.2144, "step": 11265500 }, { "epoch": 6.75, "learning_rate": 8.74457578895706e-06, "loss": 0.2157, "step": 11266000 }, { "epoch": 6.75, "learning_rate": 8.742475823396496e-06, "loss": 0.2123, "step": 11266500 }, { "epoch": 6.76, "learning_rate": 8.740375857835933e-06, "loss": 0.2114, "step": 11267000 }, { "epoch": 6.76, "learning_rate": 8.738275892275366e-06, "loss": 0.2157, "step": 11267500 }, { "epoch": 6.76, "learning_rate": 8.736184326577043e-06, "loss": 0.2171, "step": 11268000 }, { "epoch": 6.76, "learning_rate": 8.734084361016478e-06, "loss": 0.2123, "step": 11268500 }, { "epoch": 6.76, "learning_rate": 8.731984395455915e-06, "loss": 0.2139, "step": 11269000 }, { "epoch": 6.76, "learning_rate": 8.729884429895349e-06, "loss": 0.2112, "step": 11269500 }, { "epoch": 6.76, "learning_rate": 8.727784464334786e-06, "loss": 0.2065, "step": 11270000 }, { "epoch": 6.76, "learning_rate": 8.72568449877422e-06, "loss": 0.2147, "step": 11270500 }, { "epoch": 6.76, "learning_rate": 8.723588733144776e-06, "loss": 0.2109, "step": 11271000 }, { "epoch": 6.76, "learning_rate": 8.721488767584213e-06, "loss": 0.2108, "step": 11271500 }, { "epoch": 6.76, "learning_rate": 8.719388802023646e-06, "loss": 0.2127, "step": 11272000 }, { "epoch": 6.76, "learning_rate": 8.717288836463081e-06, "loss": 0.2132, "step": 11272500 }, { "epoch": 6.76, "learning_rate": 8.715188870902517e-06, "loss": 0.2154, "step": 11273000 }, { "epoch": 6.76, "learning_rate": 8.713088905341952e-06, "loss": 0.2118, "step": 11273500 }, { "epoch": 6.76, "learning_rate": 8.710993139712507e-06, "loss": 0.2091, "step": 11274000 }, { "epoch": 6.76, "learning_rate": 8.708893174151944e-06, "loss": 0.2102, "step": 11274500 }, { "epoch": 6.76, "learning_rate": 8.706793208591379e-06, "loss": 0.2156, "step": 11275000 }, { "epoch": 6.76, "learning_rate": 8.704693243030814e-06, "loss": 0.2119, "step": 11275500 }, { "epoch": 6.76, "learning_rate": 8.70259327747025e-06, "loss": 0.2152, "step": 11276000 }, { "epoch": 6.76, "learning_rate": 8.700493311909685e-06, "loss": 0.2105, "step": 11276500 }, { "epoch": 6.76, "learning_rate": 8.69839334634912e-06, "loss": 0.2175, "step": 11277000 }, { "epoch": 6.76, "learning_rate": 8.696297580719677e-06, "loss": 0.2154, "step": 11277500 }, { "epoch": 6.76, "learning_rate": 8.69419761515911e-06, "loss": 0.2137, "step": 11278000 }, { "epoch": 6.76, "learning_rate": 8.692097649598547e-06, "loss": 0.2154, "step": 11278500 }, { "epoch": 6.76, "learning_rate": 8.689997684037982e-06, "loss": 0.2096, "step": 11279000 }, { "epoch": 6.76, "learning_rate": 8.687901918408537e-06, "loss": 0.2086, "step": 11279500 }, { "epoch": 6.76, "learning_rate": 8.685801952847974e-06, "loss": 0.2126, "step": 11280000 }, { "epoch": 6.76, "learning_rate": 8.683701987287408e-06, "loss": 0.2163, "step": 11280500 }, { "epoch": 6.76, "learning_rate": 8.681602021726845e-06, "loss": 0.2144, "step": 11281000 }, { "epoch": 6.76, "learning_rate": 8.679502056166278e-06, "loss": 0.2158, "step": 11281500 }, { "epoch": 6.76, "learning_rate": 8.677402090605713e-06, "loss": 0.2144, "step": 11282000 }, { "epoch": 6.76, "learning_rate": 8.67530212504515e-06, "loss": 0.2112, "step": 11282500 }, { "epoch": 6.76, "learning_rate": 8.673202159484583e-06, "loss": 0.2152, "step": 11283000 }, { "epoch": 6.76, "learning_rate": 8.67110639385514e-06, "loss": 0.21, "step": 11283500 }, { "epoch": 6.77, "learning_rate": 8.669006428294576e-06, "loss": 0.2113, "step": 11284000 }, { "epoch": 6.77, "learning_rate": 8.66690646273401e-06, "loss": 0.207, "step": 11284500 }, { "epoch": 6.77, "learning_rate": 8.664806497173448e-06, "loss": 0.2141, "step": 11285000 }, { "epoch": 6.77, "learning_rate": 8.662710731544003e-06, "loss": 0.2131, "step": 11285500 }, { "epoch": 6.77, "learning_rate": 8.660610765983438e-06, "loss": 0.2082, "step": 11286000 }, { "epoch": 6.77, "learning_rate": 8.658510800422873e-06, "loss": 0.2108, "step": 11286500 }, { "epoch": 6.77, "learning_rate": 8.656410834862308e-06, "loss": 0.2117, "step": 11287000 }, { "epoch": 6.77, "learning_rate": 8.654315069232864e-06, "loss": 0.209, "step": 11287500 }, { "epoch": 6.77, "learning_rate": 8.6522151036723e-06, "loss": 0.2118, "step": 11288000 }, { "epoch": 6.77, "learning_rate": 8.650115138111736e-06, "loss": 0.2134, "step": 11288500 }, { "epoch": 6.77, "learning_rate": 8.648015172551169e-06, "loss": 0.2073, "step": 11289000 }, { "epoch": 6.77, "learning_rate": 8.645919406921728e-06, "loss": 0.2157, "step": 11289500 }, { "epoch": 6.77, "learning_rate": 8.643819441361161e-06, "loss": 0.2166, "step": 11290000 }, { "epoch": 6.77, "learning_rate": 8.641719475800596e-06, "loss": 0.2111, "step": 11290500 }, { "epoch": 6.77, "learning_rate": 8.639619510240033e-06, "loss": 0.2122, "step": 11291000 }, { "epoch": 6.77, "learning_rate": 8.637519544679467e-06, "loss": 0.2141, "step": 11291500 }, { "epoch": 6.77, "learning_rate": 8.635423779050024e-06, "loss": 0.2159, "step": 11292000 }, { "epoch": 6.77, "learning_rate": 8.633323813489459e-06, "loss": 0.2136, "step": 11292500 }, { "epoch": 6.77, "learning_rate": 8.631223847928894e-06, "loss": 0.2114, "step": 11293000 }, { "epoch": 6.77, "learning_rate": 8.629123882368329e-06, "loss": 0.2127, "step": 11293500 }, { "epoch": 6.77, "learning_rate": 8.627028116738886e-06, "loss": 0.215, "step": 11294000 }, { "epoch": 6.77, "learning_rate": 8.62492815117832e-06, "loss": 0.2105, "step": 11294500 }, { "epoch": 6.77, "learning_rate": 8.622828185617756e-06, "loss": 0.2114, "step": 11295000 }, { "epoch": 6.77, "learning_rate": 8.620728220057191e-06, "loss": 0.2095, "step": 11295500 }, { "epoch": 6.77, "learning_rate": 8.618628254496625e-06, "loss": 0.2045, "step": 11296000 }, { "epoch": 6.77, "learning_rate": 8.616532488867184e-06, "loss": 0.2182, "step": 11296500 }, { "epoch": 6.77, "learning_rate": 8.614432523306617e-06, "loss": 0.2143, "step": 11297000 }, { "epoch": 6.77, "learning_rate": 8.612332557746052e-06, "loss": 0.2103, "step": 11297500 }, { "epoch": 6.77, "learning_rate": 8.610232592185489e-06, "loss": 0.2131, "step": 11298000 }, { "epoch": 6.77, "learning_rate": 8.608136826556044e-06, "loss": 0.2122, "step": 11298500 }, { "epoch": 6.77, "learning_rate": 8.60603686099548e-06, "loss": 0.2144, "step": 11299000 }, { "epoch": 6.77, "learning_rate": 8.603936895434915e-06, "loss": 0.2102, "step": 11299500 }, { "epoch": 6.77, "learning_rate": 8.60183692987435e-06, "loss": 0.2151, "step": 11300000 }, { "epoch": 6.77, "eval_loss": 0.20324400067329407, "eval_runtime": 1460.3245, "eval_samples_per_second": 360.687, "eval_steps_per_second": 60.115, "step": 11300000 }, { "epoch": 6.78, "learning_rate": 8.599736964313787e-06, "loss": 0.2142, "step": 11300500 }, { "epoch": 6.78, "learning_rate": 8.59763699875322e-06, "loss": 0.2197, "step": 11301000 }, { "epoch": 6.78, "learning_rate": 8.595537033192655e-06, "loss": 0.2137, "step": 11301500 }, { "epoch": 6.78, "learning_rate": 8.59343706763209e-06, "loss": 0.2133, "step": 11302000 }, { "epoch": 6.78, "learning_rate": 8.591341302002647e-06, "loss": 0.2134, "step": 11302500 }, { "epoch": 6.78, "learning_rate": 8.58924133644208e-06, "loss": 0.2099, "step": 11303000 }, { "epoch": 6.78, "learning_rate": 8.587141370881518e-06, "loss": 0.2104, "step": 11303500 }, { "epoch": 6.78, "learning_rate": 8.585041405320953e-06, "loss": 0.2134, "step": 11304000 }, { "epoch": 6.78, "learning_rate": 8.582945639691508e-06, "loss": 0.2159, "step": 11304500 }, { "epoch": 6.78, "learning_rate": 8.580845674130945e-06, "loss": 0.2103, "step": 11305000 }, { "epoch": 6.78, "learning_rate": 8.5787499085015e-06, "loss": 0.2114, "step": 11305500 }, { "epoch": 6.78, "learning_rate": 8.576649942940935e-06, "loss": 0.2144, "step": 11306000 }, { "epoch": 6.78, "learning_rate": 8.57454997738037e-06, "loss": 0.209, "step": 11306500 }, { "epoch": 6.78, "learning_rate": 8.572450011819806e-06, "loss": 0.2137, "step": 11307000 }, { "epoch": 6.78, "learning_rate": 8.570350046259242e-06, "loss": 0.2107, "step": 11307500 }, { "epoch": 6.78, "learning_rate": 8.568250080698676e-06, "loss": 0.2143, "step": 11308000 }, { "epoch": 6.78, "learning_rate": 8.566154315069233e-06, "loss": 0.2091, "step": 11308500 }, { "epoch": 6.78, "learning_rate": 8.564054349508668e-06, "loss": 0.2128, "step": 11309000 }, { "epoch": 6.78, "learning_rate": 8.561954383948103e-06, "loss": 0.2085, "step": 11309500 }, { "epoch": 6.78, "learning_rate": 8.559854418387538e-06, "loss": 0.2135, "step": 11310000 }, { "epoch": 6.78, "learning_rate": 8.557754452826973e-06, "loss": 0.2142, "step": 11310500 }, { "epoch": 6.78, "learning_rate": 8.55565868719753e-06, "loss": 0.2109, "step": 11311000 }, { "epoch": 6.78, "learning_rate": 8.553558721636964e-06, "loss": 0.2151, "step": 11311500 }, { "epoch": 6.78, "learning_rate": 8.5514587560764e-06, "loss": 0.2087, "step": 11312000 }, { "epoch": 6.78, "learning_rate": 8.549358790515834e-06, "loss": 0.2048, "step": 11312500 }, { "epoch": 6.78, "learning_rate": 8.547263024886391e-06, "loss": 0.2163, "step": 11313000 }, { "epoch": 6.78, "learning_rate": 8.545163059325826e-06, "loss": 0.2178, "step": 11313500 }, { "epoch": 6.78, "learning_rate": 8.543063093765261e-06, "loss": 0.2168, "step": 11314000 }, { "epoch": 6.78, "learning_rate": 8.540963128204698e-06, "loss": 0.2143, "step": 11314500 }, { "epoch": 6.78, "learning_rate": 8.538863162644132e-06, "loss": 0.2171, "step": 11315000 }, { "epoch": 6.78, "learning_rate": 8.536767397014689e-06, "loss": 0.218, "step": 11315500 }, { "epoch": 6.78, "learning_rate": 8.534667431454124e-06, "loss": 0.2124, "step": 11316000 }, { "epoch": 6.78, "learning_rate": 8.532567465893559e-06, "loss": 0.2134, "step": 11316500 }, { "epoch": 6.78, "learning_rate": 8.530467500332994e-06, "loss": 0.2065, "step": 11317000 }, { "epoch": 6.79, "learning_rate": 8.52836753477243e-06, "loss": 0.2129, "step": 11317500 }, { "epoch": 6.79, "learning_rate": 8.526271769142986e-06, "loss": 0.2145, "step": 11318000 }, { "epoch": 6.79, "learning_rate": 8.52417180358242e-06, "loss": 0.21, "step": 11318500 }, { "epoch": 6.79, "learning_rate": 8.522071838021857e-06, "loss": 0.2143, "step": 11319000 }, { "epoch": 6.79, "learning_rate": 8.519971872461292e-06, "loss": 0.2157, "step": 11319500 }, { "epoch": 6.79, "learning_rate": 8.517871906900727e-06, "loss": 0.2146, "step": 11320000 }, { "epoch": 6.79, "learning_rate": 8.515776141271284e-06, "loss": 0.211, "step": 11320500 }, { "epoch": 6.79, "learning_rate": 8.513676175710717e-06, "loss": 0.21, "step": 11321000 }, { "epoch": 6.79, "learning_rate": 8.511576210150154e-06, "loss": 0.2084, "step": 11321500 }, { "epoch": 6.79, "learning_rate": 8.509476244589588e-06, "loss": 0.2135, "step": 11322000 }, { "epoch": 6.79, "learning_rate": 8.507380478960145e-06, "loss": 0.215, "step": 11322500 }, { "epoch": 6.79, "learning_rate": 8.505280513399581e-06, "loss": 0.2066, "step": 11323000 }, { "epoch": 6.79, "learning_rate": 8.503180547839015e-06, "loss": 0.2106, "step": 11323500 }, { "epoch": 6.79, "learning_rate": 8.50108058227845e-06, "loss": 0.2097, "step": 11324000 }, { "epoch": 6.79, "learning_rate": 8.498984816649007e-06, "loss": 0.2128, "step": 11324500 }, { "epoch": 6.79, "learning_rate": 8.496884851088442e-06, "loss": 0.2153, "step": 11325000 }, { "epoch": 6.79, "learning_rate": 8.494784885527877e-06, "loss": 0.2157, "step": 11325500 }, { "epoch": 6.79, "learning_rate": 8.492684919967313e-06, "loss": 0.2134, "step": 11326000 }, { "epoch": 6.79, "learning_rate": 8.490589154337868e-06, "loss": 0.2092, "step": 11326500 }, { "epoch": 6.79, "learning_rate": 8.488489188777305e-06, "loss": 0.2145, "step": 11327000 }, { "epoch": 6.79, "learning_rate": 8.48638922321674e-06, "loss": 0.2136, "step": 11327500 }, { "epoch": 6.79, "learning_rate": 8.484289257656173e-06, "loss": 0.2117, "step": 11328000 }, { "epoch": 6.79, "learning_rate": 8.48218929209561e-06, "loss": 0.2105, "step": 11328500 }, { "epoch": 6.79, "learning_rate": 8.480093526466165e-06, "loss": 0.2121, "step": 11329000 }, { "epoch": 6.79, "learning_rate": 8.4779935609056e-06, "loss": 0.212, "step": 11329500 }, { "epoch": 6.79, "learning_rate": 8.475893595345037e-06, "loss": 0.2192, "step": 11330000 }, { "epoch": 6.79, "learning_rate": 8.47379362978447e-06, "loss": 0.2168, "step": 11330500 }, { "epoch": 6.79, "learning_rate": 8.471693664223906e-06, "loss": 0.2127, "step": 11331000 }, { "epoch": 6.79, "learning_rate": 8.469593698663343e-06, "loss": 0.2121, "step": 11331500 }, { "epoch": 6.79, "learning_rate": 8.467493733102776e-06, "loss": 0.2131, "step": 11332000 }, { "epoch": 6.79, "learning_rate": 8.465393767542213e-06, "loss": 0.2112, "step": 11332500 }, { "epoch": 6.79, "learning_rate": 8.46330220184389e-06, "loss": 0.2133, "step": 11333000 }, { "epoch": 6.79, "learning_rate": 8.461202236283325e-06, "loss": 0.2141, "step": 11333500 }, { "epoch": 6.8, "learning_rate": 8.45910227072276e-06, "loss": 0.2174, "step": 11334000 }, { "epoch": 6.8, "learning_rate": 8.457002305162196e-06, "loss": 0.2204, "step": 11334500 }, { "epoch": 6.8, "learning_rate": 8.454902339601629e-06, "loss": 0.2172, "step": 11335000 }, { "epoch": 6.8, "learning_rate": 8.452806573972188e-06, "loss": 0.209, "step": 11335500 }, { "epoch": 6.8, "learning_rate": 8.450706608411621e-06, "loss": 0.2134, "step": 11336000 }, { "epoch": 6.8, "learning_rate": 8.448606642851056e-06, "loss": 0.2135, "step": 11336500 }, { "epoch": 6.8, "learning_rate": 8.446506677290493e-06, "loss": 0.2057, "step": 11337000 }, { "epoch": 6.8, "learning_rate": 8.444410911661049e-06, "loss": 0.2168, "step": 11337500 }, { "epoch": 6.8, "learning_rate": 8.442310946100484e-06, "loss": 0.2126, "step": 11338000 }, { "epoch": 6.8, "learning_rate": 8.440210980539919e-06, "loss": 0.2134, "step": 11338500 }, { "epoch": 6.8, "learning_rate": 8.438111014979354e-06, "loss": 0.2107, "step": 11339000 }, { "epoch": 6.8, "learning_rate": 8.436011049418789e-06, "loss": 0.2133, "step": 11339500 }, { "epoch": 6.8, "learning_rate": 8.433915283789346e-06, "loss": 0.214, "step": 11340000 }, { "epoch": 6.8, "learning_rate": 8.431815318228781e-06, "loss": 0.2164, "step": 11340500 }, { "epoch": 6.8, "learning_rate": 8.429715352668216e-06, "loss": 0.2138, "step": 11341000 }, { "epoch": 6.8, "learning_rate": 8.427615387107652e-06, "loss": 0.2131, "step": 11341500 }, { "epoch": 6.8, "learning_rate": 8.425519621478207e-06, "loss": 0.216, "step": 11342000 }, { "epoch": 6.8, "learning_rate": 8.423419655917644e-06, "loss": 0.2115, "step": 11342500 }, { "epoch": 6.8, "learning_rate": 8.421319690357079e-06, "loss": 0.2101, "step": 11343000 }, { "epoch": 6.8, "learning_rate": 8.419219724796512e-06, "loss": 0.2151, "step": 11343500 }, { "epoch": 6.8, "learning_rate": 8.417119759235949e-06, "loss": 0.215, "step": 11344000 }, { "epoch": 6.8, "learning_rate": 8.415023993606504e-06, "loss": 0.2141, "step": 11344500 }, { "epoch": 6.8, "learning_rate": 8.41292402804594e-06, "loss": 0.2176, "step": 11345000 }, { "epoch": 6.8, "learning_rate": 8.410824062485375e-06, "loss": 0.2163, "step": 11345500 }, { "epoch": 6.8, "learning_rate": 8.40872409692481e-06, "loss": 0.2151, "step": 11346000 }, { "epoch": 6.8, "learning_rate": 8.406624131364247e-06, "loss": 0.2169, "step": 11346500 }, { "epoch": 6.8, "learning_rate": 8.404528365734802e-06, "loss": 0.2152, "step": 11347000 }, { "epoch": 6.8, "learning_rate": 8.402428400174237e-06, "loss": 0.2148, "step": 11347500 }, { "epoch": 6.8, "learning_rate": 8.400328434613672e-06, "loss": 0.2113, "step": 11348000 }, { "epoch": 6.8, "learning_rate": 8.398228469053107e-06, "loss": 0.212, "step": 11348500 }, { "epoch": 6.8, "learning_rate": 8.396128503492543e-06, "loss": 0.2094, "step": 11349000 }, { "epoch": 6.8, "learning_rate": 8.394028537931978e-06, "loss": 0.2151, "step": 11349500 }, { "epoch": 6.8, "learning_rate": 8.391932772302535e-06, "loss": 0.2134, "step": 11350000 }, { "epoch": 6.81, "learning_rate": 8.389832806741968e-06, "loss": 0.2091, "step": 11350500 }, { "epoch": 6.81, "learning_rate": 8.387732841181405e-06, "loss": 0.2111, "step": 11351000 }, { "epoch": 6.81, "learning_rate": 8.38563287562084e-06, "loss": 0.215, "step": 11351500 }, { "epoch": 6.81, "learning_rate": 8.383537109991395e-06, "loss": 0.2129, "step": 11352000 }, { "epoch": 6.81, "learning_rate": 8.381437144430832e-06, "loss": 0.2077, "step": 11352500 }, { "epoch": 6.81, "learning_rate": 8.379337178870266e-06, "loss": 0.2136, "step": 11353000 }, { "epoch": 6.81, "learning_rate": 8.377237213309703e-06, "loss": 0.212, "step": 11353500 }, { "epoch": 6.81, "learning_rate": 8.375137247749136e-06, "loss": 0.2133, "step": 11354000 }, { "epoch": 6.81, "learning_rate": 8.373037282188571e-06, "loss": 0.2095, "step": 11354500 }, { "epoch": 6.81, "learning_rate": 8.370937316628008e-06, "loss": 0.2156, "step": 11355000 }, { "epoch": 6.81, "learning_rate": 8.368837351067441e-06, "loss": 0.2073, "step": 11355500 }, { "epoch": 6.81, "learning_rate": 8.366741585437998e-06, "loss": 0.2121, "step": 11356000 }, { "epoch": 6.81, "learning_rate": 8.364641619877434e-06, "loss": 0.2117, "step": 11356500 }, { "epoch": 6.81, "learning_rate": 8.362541654316869e-06, "loss": 0.2117, "step": 11357000 }, { "epoch": 6.81, "learning_rate": 8.360441688756304e-06, "loss": 0.2125, "step": 11357500 }, { "epoch": 6.81, "learning_rate": 8.358345923126861e-06, "loss": 0.2107, "step": 11358000 }, { "epoch": 6.81, "learning_rate": 8.356250157497416e-06, "loss": 0.2175, "step": 11358500 }, { "epoch": 6.81, "learning_rate": 8.354150191936851e-06, "loss": 0.2099, "step": 11359000 }, { "epoch": 6.81, "learning_rate": 8.352050226376288e-06, "loss": 0.2134, "step": 11359500 }, { "epoch": 6.81, "learning_rate": 8.349950260815722e-06, "loss": 0.209, "step": 11360000 }, { "epoch": 6.81, "learning_rate": 8.347850295255158e-06, "loss": 0.2079, "step": 11360500 }, { "epoch": 6.81, "learning_rate": 8.345750329694594e-06, "loss": 0.2144, "step": 11361000 }, { "epoch": 6.81, "learning_rate": 8.343654564065149e-06, "loss": 0.2118, "step": 11361500 }, { "epoch": 6.81, "learning_rate": 8.341554598504586e-06, "loss": 0.214, "step": 11362000 }, { "epoch": 6.81, "learning_rate": 8.33945463294402e-06, "loss": 0.2173, "step": 11362500 }, { "epoch": 6.81, "learning_rate": 8.337354667383454e-06, "loss": 0.2107, "step": 11363000 }, { "epoch": 6.81, "learning_rate": 8.335254701822891e-06, "loss": 0.212, "step": 11363500 }, { "epoch": 6.81, "learning_rate": 8.333154736262325e-06, "loss": 0.2159, "step": 11364000 }, { "epoch": 6.81, "learning_rate": 8.331054770701761e-06, "loss": 0.2108, "step": 11364500 }, { "epoch": 6.81, "learning_rate": 8.328954805141195e-06, "loss": 0.2126, "step": 11365000 }, { "epoch": 6.81, "learning_rate": 8.326863239442874e-06, "loss": 0.2113, "step": 11365500 }, { "epoch": 6.81, "learning_rate": 8.324763273882307e-06, "loss": 0.2108, "step": 11366000 }, { "epoch": 6.81, "learning_rate": 8.322663308321744e-06, "loss": 0.2143, "step": 11366500 }, { "epoch": 6.81, "learning_rate": 8.320563342761177e-06, "loss": 0.2122, "step": 11367000 }, { "epoch": 6.82, "learning_rate": 8.318463377200614e-06, "loss": 0.2162, "step": 11367500 }, { "epoch": 6.82, "learning_rate": 8.31636341164005e-06, "loss": 0.2096, "step": 11368000 }, { "epoch": 6.82, "learning_rate": 8.314263446079483e-06, "loss": 0.2221, "step": 11368500 }, { "epoch": 6.82, "learning_rate": 8.31216348051892e-06, "loss": 0.2079, "step": 11369000 }, { "epoch": 6.82, "learning_rate": 8.310067714889475e-06, "loss": 0.2109, "step": 11369500 }, { "epoch": 6.82, "learning_rate": 8.30796774932891e-06, "loss": 0.2109, "step": 11370000 }, { "epoch": 6.82, "learning_rate": 8.305871983699467e-06, "loss": 0.2059, "step": 11370500 }, { "epoch": 6.82, "learning_rate": 8.303772018138902e-06, "loss": 0.2145, "step": 11371000 }, { "epoch": 6.82, "learning_rate": 8.301672052578337e-06, "loss": 0.2138, "step": 11371500 }, { "epoch": 6.82, "learning_rate": 8.299572087017773e-06, "loss": 0.2151, "step": 11372000 }, { "epoch": 6.82, "learning_rate": 8.297472121457208e-06, "loss": 0.2093, "step": 11372500 }, { "epoch": 6.82, "learning_rate": 8.295376355827763e-06, "loss": 0.2075, "step": 11373000 }, { "epoch": 6.82, "learning_rate": 8.2932763902672e-06, "loss": 0.2105, "step": 11373500 }, { "epoch": 6.82, "learning_rate": 8.291176424706635e-06, "loss": 0.2153, "step": 11374000 }, { "epoch": 6.82, "learning_rate": 8.28907645914607e-06, "loss": 0.2101, "step": 11374500 }, { "epoch": 6.82, "learning_rate": 8.286976493585505e-06, "loss": 0.2112, "step": 11375000 }, { "epoch": 6.82, "learning_rate": 8.28488072795606e-06, "loss": 0.2139, "step": 11375500 }, { "epoch": 6.82, "learning_rate": 8.282780762395497e-06, "loss": 0.2091, "step": 11376000 }, { "epoch": 6.82, "learning_rate": 8.280680796834931e-06, "loss": 0.2111, "step": 11376500 }, { "epoch": 6.82, "learning_rate": 8.278580831274366e-06, "loss": 0.2143, "step": 11377000 }, { "epoch": 6.82, "learning_rate": 8.276480865713803e-06, "loss": 0.2151, "step": 11377500 }, { "epoch": 6.82, "learning_rate": 8.274385100084358e-06, "loss": 0.2153, "step": 11378000 }, { "epoch": 6.82, "learning_rate": 8.272285134523793e-06, "loss": 0.2081, "step": 11378500 }, { "epoch": 6.82, "learning_rate": 8.270185168963229e-06, "loss": 0.2127, "step": 11379000 }, { "epoch": 6.82, "learning_rate": 8.268085203402664e-06, "loss": 0.2122, "step": 11379500 }, { "epoch": 6.82, "learning_rate": 8.2659852378421e-06, "loss": 0.2103, "step": 11380000 }, { "epoch": 6.82, "learning_rate": 8.263889472212656e-06, "loss": 0.2173, "step": 11380500 }, { "epoch": 6.82, "learning_rate": 8.261789506652091e-06, "loss": 0.215, "step": 11381000 }, { "epoch": 6.82, "learning_rate": 8.259689541091526e-06, "loss": 0.2068, "step": 11381500 }, { "epoch": 6.82, "learning_rate": 8.257589575530961e-06, "loss": 0.2118, "step": 11382000 }, { "epoch": 6.82, "learning_rate": 8.255489609970396e-06, "loss": 0.2046, "step": 11382500 }, { "epoch": 6.82, "learning_rate": 8.253389644409832e-06, "loss": 0.211, "step": 11383000 }, { "epoch": 6.82, "learning_rate": 8.251293878780389e-06, "loss": 0.2106, "step": 11383500 }, { "epoch": 6.83, "learning_rate": 8.249193913219822e-06, "loss": 0.2127, "step": 11384000 }, { "epoch": 6.83, "learning_rate": 8.247093947659259e-06, "loss": 0.2154, "step": 11384500 }, { "epoch": 6.83, "learning_rate": 8.244993982098692e-06, "loss": 0.2109, "step": 11385000 }, { "epoch": 6.83, "learning_rate": 8.242894016538129e-06, "loss": 0.2084, "step": 11385500 }, { "epoch": 6.83, "learning_rate": 8.240794050977564e-06, "loss": 0.2177, "step": 11386000 }, { "epoch": 6.83, "learning_rate": 8.238694085416998e-06, "loss": 0.2152, "step": 11386500 }, { "epoch": 6.83, "learning_rate": 8.236598319787556e-06, "loss": 0.2161, "step": 11387000 }, { "epoch": 6.83, "learning_rate": 8.23449835422699e-06, "loss": 0.2144, "step": 11387500 }, { "epoch": 6.83, "learning_rate": 8.232398388666425e-06, "loss": 0.2188, "step": 11388000 }, { "epoch": 6.83, "learning_rate": 8.230298423105862e-06, "loss": 0.2151, "step": 11388500 }, { "epoch": 6.83, "learning_rate": 8.228198457545295e-06, "loss": 0.2144, "step": 11389000 }, { "epoch": 6.83, "learning_rate": 8.226102691915852e-06, "loss": 0.2095, "step": 11389500 }, { "epoch": 6.83, "learning_rate": 8.224002726355287e-06, "loss": 0.2164, "step": 11390000 }, { "epoch": 6.83, "learning_rate": 8.221902760794723e-06, "loss": 0.2078, "step": 11390500 }, { "epoch": 6.83, "learning_rate": 8.21980279523416e-06, "loss": 0.2115, "step": 11391000 }, { "epoch": 6.83, "learning_rate": 8.217707029604715e-06, "loss": 0.2101, "step": 11391500 }, { "epoch": 6.83, "learning_rate": 8.21560706404415e-06, "loss": 0.2135, "step": 11392000 }, { "epoch": 6.83, "learning_rate": 8.213507098483585e-06, "loss": 0.2092, "step": 11392500 }, { "epoch": 6.83, "learning_rate": 8.21140713292302e-06, "loss": 0.2076, "step": 11393000 }, { "epoch": 6.83, "learning_rate": 8.209307167362455e-06, "loss": 0.2125, "step": 11393500 }, { "epoch": 6.83, "learning_rate": 8.20720720180189e-06, "loss": 0.2111, "step": 11394000 }, { "epoch": 6.83, "learning_rate": 8.205107236241326e-06, "loss": 0.2118, "step": 11394500 }, { "epoch": 6.83, "learning_rate": 8.20300727068076e-06, "loss": 0.2145, "step": 11395000 }, { "epoch": 6.83, "learning_rate": 8.200911505051318e-06, "loss": 0.2111, "step": 11395500 }, { "epoch": 6.83, "learning_rate": 8.198811539490751e-06, "loss": 0.2157, "step": 11396000 }, { "epoch": 6.83, "learning_rate": 8.196711573930188e-06, "loss": 0.213, "step": 11396500 }, { "epoch": 6.83, "learning_rate": 8.194611608369623e-06, "loss": 0.2103, "step": 11397000 }, { "epoch": 6.83, "learning_rate": 8.192515842740178e-06, "loss": 0.2118, "step": 11397500 }, { "epoch": 6.83, "learning_rate": 8.190420077110735e-06, "loss": 0.2114, "step": 11398000 }, { "epoch": 6.83, "learning_rate": 8.18832011155017e-06, "loss": 0.2182, "step": 11398500 }, { "epoch": 6.83, "learning_rate": 8.186220145989606e-06, "loss": 0.2102, "step": 11399000 }, { "epoch": 6.83, "learning_rate": 8.184120180429041e-06, "loss": 0.2149, "step": 11399500 }, { "epoch": 6.83, "learning_rate": 8.182020214868476e-06, "loss": 0.2104, "step": 11400000 }, { "epoch": 6.83, "eval_loss": 0.20217104256153107, "eval_runtime": 1456.7703, "eval_samples_per_second": 361.567, "eval_steps_per_second": 60.261, "step": 11400000 }, { "epoch": 6.84, "learning_rate": 8.179924449239031e-06, "loss": 0.2131, "step": 11400500 }, { "epoch": 6.84, "learning_rate": 8.177824483678468e-06, "loss": 0.2135, "step": 11401000 }, { "epoch": 6.84, "learning_rate": 8.175724518117903e-06, "loss": 0.2107, "step": 11401500 }, { "epoch": 6.84, "learning_rate": 8.173624552557337e-06, "loss": 0.2204, "step": 11402000 }, { "epoch": 6.84, "learning_rate": 8.171528786927895e-06, "loss": 0.2138, "step": 11402500 }, { "epoch": 6.84, "learning_rate": 8.169428821367329e-06, "loss": 0.2118, "step": 11403000 }, { "epoch": 6.84, "learning_rate": 8.167328855806764e-06, "loss": 0.2123, "step": 11403500 }, { "epoch": 6.84, "learning_rate": 8.165228890246201e-06, "loss": 0.2149, "step": 11404000 }, { "epoch": 6.84, "learning_rate": 8.163128924685634e-06, "loss": 0.2126, "step": 11404500 }, { "epoch": 6.84, "learning_rate": 8.161028959125071e-06, "loss": 0.2129, "step": 11405000 }, { "epoch": 6.84, "learning_rate": 8.158933193495626e-06, "loss": 0.2114, "step": 11405500 }, { "epoch": 6.84, "learning_rate": 8.156833227935062e-06, "loss": 0.2131, "step": 11406000 }, { "epoch": 6.84, "learning_rate": 8.154733262374497e-06, "loss": 0.2135, "step": 11406500 }, { "epoch": 6.84, "learning_rate": 8.152633296813932e-06, "loss": 0.2106, "step": 11407000 }, { "epoch": 6.84, "learning_rate": 8.150533331253367e-06, "loss": 0.2029, "step": 11407500 }, { "epoch": 6.84, "learning_rate": 8.148433365692802e-06, "loss": 0.2135, "step": 11408000 }, { "epoch": 6.84, "learning_rate": 8.14633760006336e-06, "loss": 0.2137, "step": 11408500 }, { "epoch": 6.84, "learning_rate": 8.144237634502793e-06, "loss": 0.2108, "step": 11409000 }, { "epoch": 6.84, "learning_rate": 8.14213766894223e-06, "loss": 0.2119, "step": 11409500 }, { "epoch": 6.84, "learning_rate": 8.140037703381665e-06, "loss": 0.2106, "step": 11410000 }, { "epoch": 6.84, "learning_rate": 8.1379377378211e-06, "loss": 0.2142, "step": 11410500 }, { "epoch": 6.84, "learning_rate": 8.135837772260535e-06, "loss": 0.2122, "step": 11411000 }, { "epoch": 6.84, "learning_rate": 8.13373780669997e-06, "loss": 0.2123, "step": 11411500 }, { "epoch": 6.84, "learning_rate": 8.131637841139405e-06, "loss": 0.2151, "step": 11412000 }, { "epoch": 6.84, "learning_rate": 8.129542075509962e-06, "loss": 0.2113, "step": 11412500 }, { "epoch": 6.84, "learning_rate": 8.127442109949396e-06, "loss": 0.2106, "step": 11413000 }, { "epoch": 6.84, "learning_rate": 8.125346344319954e-06, "loss": 0.2096, "step": 11413500 }, { "epoch": 6.84, "learning_rate": 8.123246378759388e-06, "loss": 0.2103, "step": 11414000 }, { "epoch": 6.84, "learning_rate": 8.121146413198823e-06, "loss": 0.2178, "step": 11414500 }, { "epoch": 6.84, "learning_rate": 8.119046447638258e-06, "loss": 0.2085, "step": 11415000 }, { "epoch": 6.84, "learning_rate": 8.116950682008815e-06, "loss": 0.2166, "step": 11415500 }, { "epoch": 6.84, "learning_rate": 8.11485071644825e-06, "loss": 0.2146, "step": 11416000 }, { "epoch": 6.84, "learning_rate": 8.112750750887685e-06, "loss": 0.2128, "step": 11416500 }, { "epoch": 6.84, "learning_rate": 8.11065078532712e-06, "loss": 0.2124, "step": 11417000 }, { "epoch": 6.85, "learning_rate": 8.108550819766556e-06, "loss": 0.2078, "step": 11417500 }, { "epoch": 6.85, "learning_rate": 8.106455054137113e-06, "loss": 0.2117, "step": 11418000 }, { "epoch": 6.85, "learning_rate": 8.104355088576546e-06, "loss": 0.2094, "step": 11418500 }, { "epoch": 6.85, "learning_rate": 8.102255123015983e-06, "loss": 0.2122, "step": 11419000 }, { "epoch": 6.85, "learning_rate": 8.100155157455418e-06, "loss": 0.2161, "step": 11419500 }, { "epoch": 6.85, "learning_rate": 8.098055191894852e-06, "loss": 0.209, "step": 11420000 }, { "epoch": 6.85, "learning_rate": 8.095955226334288e-06, "loss": 0.2139, "step": 11420500 }, { "epoch": 6.85, "learning_rate": 8.093859460704844e-06, "loss": 0.208, "step": 11421000 }, { "epoch": 6.85, "learning_rate": 8.091759495144279e-06, "loss": 0.216, "step": 11421500 }, { "epoch": 6.85, "learning_rate": 8.089659529583716e-06, "loss": 0.211, "step": 11422000 }, { "epoch": 6.85, "learning_rate": 8.087559564023149e-06, "loss": 0.2135, "step": 11422500 }, { "epoch": 6.85, "learning_rate": 8.085459598462586e-06, "loss": 0.2152, "step": 11423000 }, { "epoch": 6.85, "learning_rate": 8.08335963290202e-06, "loss": 0.2115, "step": 11423500 }, { "epoch": 6.85, "learning_rate": 8.081259667341455e-06, "loss": 0.2165, "step": 11424000 }, { "epoch": 6.85, "learning_rate": 8.079159701780891e-06, "loss": 0.2122, "step": 11424500 }, { "epoch": 6.85, "learning_rate": 8.077068136082569e-06, "loss": 0.216, "step": 11425000 }, { "epoch": 6.85, "learning_rate": 8.074968170522002e-06, "loss": 0.2118, "step": 11425500 }, { "epoch": 6.85, "learning_rate": 8.072868204961439e-06, "loss": 0.2111, "step": 11426000 }, { "epoch": 6.85, "learning_rate": 8.070768239400874e-06, "loss": 0.2145, "step": 11426500 }, { "epoch": 6.85, "learning_rate": 8.068668273840307e-06, "loss": 0.216, "step": 11427000 }, { "epoch": 6.85, "learning_rate": 8.066576708141988e-06, "loss": 0.2156, "step": 11427500 }, { "epoch": 6.85, "learning_rate": 8.064476742581421e-06, "loss": 0.2123, "step": 11428000 }, { "epoch": 6.85, "learning_rate": 8.062376777020857e-06, "loss": 0.2111, "step": 11428500 }, { "epoch": 6.85, "learning_rate": 8.060276811460292e-06, "loss": 0.2139, "step": 11429000 }, { "epoch": 6.85, "learning_rate": 8.058176845899727e-06, "loss": 0.2131, "step": 11429500 }, { "epoch": 6.85, "learning_rate": 8.056076880339162e-06, "loss": 0.2071, "step": 11430000 }, { "epoch": 6.85, "learning_rate": 8.053976914778597e-06, "loss": 0.2081, "step": 11430500 }, { "epoch": 6.85, "learning_rate": 8.051876949218032e-06, "loss": 0.2185, "step": 11431000 }, { "epoch": 6.85, "learning_rate": 8.049776983657469e-06, "loss": 0.2064, "step": 11431500 }, { "epoch": 6.85, "learning_rate": 8.047681218028024e-06, "loss": 0.2159, "step": 11432000 }, { "epoch": 6.85, "learning_rate": 8.04558125246746e-06, "loss": 0.2103, "step": 11432500 }, { "epoch": 6.85, "learning_rate": 8.043481286906895e-06, "loss": 0.213, "step": 11433000 }, { "epoch": 6.85, "learning_rate": 8.04138132134633e-06, "loss": 0.2127, "step": 11433500 }, { "epoch": 6.86, "learning_rate": 8.039281355785765e-06, "loss": 0.2142, "step": 11434000 }, { "epoch": 6.86, "learning_rate": 8.037185590156322e-06, "loss": 0.2136, "step": 11434500 }, { "epoch": 6.86, "learning_rate": 8.035085624595757e-06, "loss": 0.2115, "step": 11435000 }, { "epoch": 6.86, "learning_rate": 8.032985659035192e-06, "loss": 0.2144, "step": 11435500 }, { "epoch": 6.86, "learning_rate": 8.030885693474627e-06, "loss": 0.2113, "step": 11436000 }, { "epoch": 6.86, "learning_rate": 8.028785727914061e-06, "loss": 0.2125, "step": 11436500 }, { "epoch": 6.86, "learning_rate": 8.02668996228462e-06, "loss": 0.2172, "step": 11437000 }, { "epoch": 6.86, "learning_rate": 8.024589996724053e-06, "loss": 0.21, "step": 11437500 }, { "epoch": 6.86, "learning_rate": 8.022490031163488e-06, "loss": 0.2161, "step": 11438000 }, { "epoch": 6.86, "learning_rate": 8.020390065602925e-06, "loss": 0.2126, "step": 11438500 }, { "epoch": 6.86, "learning_rate": 8.018290100042358e-06, "loss": 0.2131, "step": 11439000 }, { "epoch": 6.86, "learning_rate": 8.016190134481794e-06, "loss": 0.214, "step": 11439500 }, { "epoch": 6.86, "learning_rate": 8.01409016892123e-06, "loss": 0.2111, "step": 11440000 }, { "epoch": 6.86, "learning_rate": 8.011994403291786e-06, "loss": 0.2092, "step": 11440500 }, { "epoch": 6.86, "learning_rate": 8.009894437731221e-06, "loss": 0.2104, "step": 11441000 }, { "epoch": 6.86, "learning_rate": 8.007794472170656e-06, "loss": 0.2109, "step": 11441500 }, { "epoch": 6.86, "learning_rate": 8.005694506610091e-06, "loss": 0.2091, "step": 11442000 }, { "epoch": 6.86, "learning_rate": 8.003594541049528e-06, "loss": 0.211, "step": 11442500 }, { "epoch": 6.86, "learning_rate": 8.001494575488961e-06, "loss": 0.2107, "step": 11443000 }, { "epoch": 6.86, "learning_rate": 7.999394609928397e-06, "loss": 0.2146, "step": 11443500 }, { "epoch": 6.86, "learning_rate": 7.997298844298954e-06, "loss": 0.2099, "step": 11444000 }, { "epoch": 6.86, "learning_rate": 7.995198878738389e-06, "loss": 0.2148, "step": 11444500 }, { "epoch": 6.86, "learning_rate": 7.993098913177822e-06, "loss": 0.2078, "step": 11445000 }, { "epoch": 6.86, "learning_rate": 7.990998947617259e-06, "loss": 0.2107, "step": 11445500 }, { "epoch": 6.86, "learning_rate": 7.988898982056694e-06, "loss": 0.2109, "step": 11446000 }, { "epoch": 6.86, "learning_rate": 7.98679901649613e-06, "loss": 0.2131, "step": 11446500 }, { "epoch": 6.86, "learning_rate": 7.984699050935565e-06, "loss": 0.2138, "step": 11447000 }, { "epoch": 6.86, "learning_rate": 7.982599085375e-06, "loss": 0.2084, "step": 11447500 }, { "epoch": 6.86, "learning_rate": 7.980503319745557e-06, "loss": 0.2174, "step": 11448000 }, { "epoch": 6.86, "learning_rate": 7.978403354184992e-06, "loss": 0.2172, "step": 11448500 }, { "epoch": 6.86, "learning_rate": 7.976303388624425e-06, "loss": 0.2111, "step": 11449000 }, { "epoch": 6.86, "learning_rate": 7.974203423063862e-06, "loss": 0.2115, "step": 11449500 }, { "epoch": 6.86, "learning_rate": 7.972107657434417e-06, "loss": 0.2126, "step": 11450000 }, { "epoch": 6.87, "learning_rate": 7.970007691873853e-06, "loss": 0.2122, "step": 11450500 }, { "epoch": 6.87, "learning_rate": 7.96790772631329e-06, "loss": 0.2102, "step": 11451000 }, { "epoch": 6.87, "learning_rate": 7.965807760752723e-06, "loss": 0.2075, "step": 11451500 }, { "epoch": 6.87, "learning_rate": 7.96371199512328e-06, "loss": 0.2123, "step": 11452000 }, { "epoch": 6.87, "learning_rate": 7.961612029562715e-06, "loss": 0.214, "step": 11452500 }, { "epoch": 6.87, "learning_rate": 7.95951206400215e-06, "loss": 0.215, "step": 11453000 }, { "epoch": 6.87, "learning_rate": 7.957412098441585e-06, "loss": 0.212, "step": 11453500 }, { "epoch": 6.87, "learning_rate": 7.955316332812142e-06, "loss": 0.2094, "step": 11454000 }, { "epoch": 6.87, "learning_rate": 7.953216367251576e-06, "loss": 0.2128, "step": 11454500 }, { "epoch": 6.87, "learning_rate": 7.951116401691013e-06, "loss": 0.2182, "step": 11455000 }, { "epoch": 6.87, "learning_rate": 7.949016436130448e-06, "loss": 0.2126, "step": 11455500 }, { "epoch": 6.87, "learning_rate": 7.946916470569881e-06, "loss": 0.2119, "step": 11456000 }, { "epoch": 6.87, "learning_rate": 7.94482070494044e-06, "loss": 0.2114, "step": 11456500 }, { "epoch": 6.87, "learning_rate": 7.942720739379873e-06, "loss": 0.2096, "step": 11457000 }, { "epoch": 6.87, "learning_rate": 7.940620773819308e-06, "loss": 0.2119, "step": 11457500 }, { "epoch": 6.87, "learning_rate": 7.938520808258745e-06, "loss": 0.2103, "step": 11458000 }, { "epoch": 6.87, "learning_rate": 7.936420842698179e-06, "loss": 0.2136, "step": 11458500 }, { "epoch": 6.87, "learning_rate": 7.934320877137616e-06, "loss": 0.2083, "step": 11459000 }, { "epoch": 6.87, "learning_rate": 7.93222511150817e-06, "loss": 0.2139, "step": 11459500 }, { "epoch": 6.87, "learning_rate": 7.930125145947606e-06, "loss": 0.2102, "step": 11460000 }, { "epoch": 6.87, "learning_rate": 7.928025180387043e-06, "loss": 0.2142, "step": 11460500 }, { "epoch": 6.87, "learning_rate": 7.925925214826476e-06, "loss": 0.2167, "step": 11461000 }, { "epoch": 6.87, "learning_rate": 7.923825249265911e-06, "loss": 0.216, "step": 11461500 }, { "epoch": 6.87, "learning_rate": 7.921725283705348e-06, "loss": 0.2171, "step": 11462000 }, { "epoch": 6.87, "learning_rate": 7.919629518075904e-06, "loss": 0.2081, "step": 11462500 }, { "epoch": 6.87, "learning_rate": 7.917529552515339e-06, "loss": 0.2136, "step": 11463000 }, { "epoch": 6.87, "learning_rate": 7.915429586954774e-06, "loss": 0.209, "step": 11463500 }, { "epoch": 6.87, "learning_rate": 7.913329621394209e-06, "loss": 0.2123, "step": 11464000 }, { "epoch": 6.87, "learning_rate": 7.911229655833644e-06, "loss": 0.2096, "step": 11464500 }, { "epoch": 6.87, "learning_rate": 7.909133890204201e-06, "loss": 0.2079, "step": 11465000 }, { "epoch": 6.87, "learning_rate": 7.907033924643635e-06, "loss": 0.2094, "step": 11465500 }, { "epoch": 6.87, "learning_rate": 7.904933959083071e-06, "loss": 0.2141, "step": 11466000 }, { "epoch": 6.87, "learning_rate": 7.902833993522507e-06, "loss": 0.2093, "step": 11466500 }, { "epoch": 6.87, "learning_rate": 7.90073402796194e-06, "loss": 0.2129, "step": 11467000 }, { "epoch": 6.88, "learning_rate": 7.898638262332499e-06, "loss": 0.2166, "step": 11467500 }, { "epoch": 6.88, "learning_rate": 7.896538296771932e-06, "loss": 0.2182, "step": 11468000 }, { "epoch": 6.88, "learning_rate": 7.894438331211367e-06, "loss": 0.2136, "step": 11468500 }, { "epoch": 6.88, "learning_rate": 7.892338365650804e-06, "loss": 0.2147, "step": 11469000 }, { "epoch": 6.88, "learning_rate": 7.890238400090238e-06, "loss": 0.2074, "step": 11469500 }, { "epoch": 6.88, "learning_rate": 7.888142634460795e-06, "loss": 0.2059, "step": 11470000 }, { "epoch": 6.88, "learning_rate": 7.88604266890023e-06, "loss": 0.2149, "step": 11470500 }, { "epoch": 6.88, "learning_rate": 7.883942703339665e-06, "loss": 0.2133, "step": 11471000 }, { "epoch": 6.88, "learning_rate": 7.881842737779102e-06, "loss": 0.2152, "step": 11471500 }, { "epoch": 6.88, "learning_rate": 7.879742772218535e-06, "loss": 0.2097, "step": 11472000 }, { "epoch": 6.88, "learning_rate": 7.877647006589092e-06, "loss": 0.2156, "step": 11472500 }, { "epoch": 6.88, "learning_rate": 7.875547041028527e-06, "loss": 0.2127, "step": 11473000 }, { "epoch": 6.88, "learning_rate": 7.873447075467962e-06, "loss": 0.2175, "step": 11473500 }, { "epoch": 6.88, "learning_rate": 7.871347109907396e-06, "loss": 0.2144, "step": 11474000 }, { "epoch": 6.88, "learning_rate": 7.869247144346833e-06, "loss": 0.2075, "step": 11474500 }, { "epoch": 6.88, "learning_rate": 7.867151378717388e-06, "loss": 0.2157, "step": 11475000 }, { "epoch": 6.88, "learning_rate": 7.865055613087945e-06, "loss": 0.2124, "step": 11475500 }, { "epoch": 6.88, "learning_rate": 7.86295564752738e-06, "loss": 0.2106, "step": 11476000 }, { "epoch": 6.88, "learning_rate": 7.860855681966815e-06, "loss": 0.2145, "step": 11476500 }, { "epoch": 6.88, "learning_rate": 7.85875571640625e-06, "loss": 0.2162, "step": 11477000 }, { "epoch": 6.88, "learning_rate": 7.856655750845686e-06, "loss": 0.2158, "step": 11477500 }, { "epoch": 6.88, "learning_rate": 7.85455578528512e-06, "loss": 0.2103, "step": 11478000 }, { "epoch": 6.88, "learning_rate": 7.852455819724558e-06, "loss": 0.2124, "step": 11478500 }, { "epoch": 6.88, "learning_rate": 7.850355854163991e-06, "loss": 0.2121, "step": 11479000 }, { "epoch": 6.88, "learning_rate": 7.848255888603426e-06, "loss": 0.2096, "step": 11479500 }, { "epoch": 6.88, "learning_rate": 7.846160122973983e-06, "loss": 0.2156, "step": 11480000 }, { "epoch": 6.88, "learning_rate": 7.844060157413418e-06, "loss": 0.214, "step": 11480500 }, { "epoch": 6.88, "learning_rate": 7.841960191852854e-06, "loss": 0.2069, "step": 11481000 }, { "epoch": 6.88, "learning_rate": 7.839860226292289e-06, "loss": 0.2162, "step": 11481500 }, { "epoch": 6.88, "learning_rate": 7.837760260731724e-06, "loss": 0.2151, "step": 11482000 }, { "epoch": 6.88, "learning_rate": 7.835660295171159e-06, "loss": 0.2206, "step": 11482500 }, { "epoch": 6.88, "learning_rate": 7.833564529541716e-06, "loss": 0.2201, "step": 11483000 }, { "epoch": 6.88, "learning_rate": 7.83146456398115e-06, "loss": 0.2105, "step": 11483500 }, { "epoch": 6.89, "learning_rate": 7.829364598420586e-06, "loss": 0.2096, "step": 11484000 }, { "epoch": 6.89, "learning_rate": 7.827264632860021e-06, "loss": 0.2139, "step": 11484500 }, { "epoch": 6.89, "learning_rate": 7.825164667299455e-06, "loss": 0.215, "step": 11485000 }, { "epoch": 6.89, "learning_rate": 7.823068901670013e-06, "loss": 0.2092, "step": 11485500 }, { "epoch": 6.89, "learning_rate": 7.820968936109447e-06, "loss": 0.2129, "step": 11486000 }, { "epoch": 6.89, "learning_rate": 7.818868970548882e-06, "loss": 0.2113, "step": 11486500 }, { "epoch": 6.89, "learning_rate": 7.816769004988319e-06, "loss": 0.212, "step": 11487000 }, { "epoch": 6.89, "learning_rate": 7.814669039427752e-06, "loss": 0.2145, "step": 11487500 }, { "epoch": 6.89, "learning_rate": 7.81257327379831e-06, "loss": 0.2125, "step": 11488000 }, { "epoch": 6.89, "learning_rate": 7.810473308237745e-06, "loss": 0.2122, "step": 11488500 }, { "epoch": 6.89, "learning_rate": 7.80837334267718e-06, "loss": 0.2134, "step": 11489000 }, { "epoch": 6.89, "learning_rate": 7.806273377116617e-06, "loss": 0.2086, "step": 11489500 }, { "epoch": 6.89, "learning_rate": 7.80417341155605e-06, "loss": 0.2117, "step": 11490000 }, { "epoch": 6.89, "learning_rate": 7.802073445995485e-06, "loss": 0.2055, "step": 11490500 }, { "epoch": 6.89, "learning_rate": 7.799977680366042e-06, "loss": 0.2086, "step": 11491000 }, { "epoch": 6.89, "learning_rate": 7.797877714805477e-06, "loss": 0.211, "step": 11491500 }, { "epoch": 6.89, "learning_rate": 7.79577774924491e-06, "loss": 0.2149, "step": 11492000 }, { "epoch": 6.89, "learning_rate": 7.793677783684348e-06, "loss": 0.2139, "step": 11492500 }, { "epoch": 6.89, "learning_rate": 7.791577818123783e-06, "loss": 0.2122, "step": 11493000 }, { "epoch": 6.89, "learning_rate": 7.789477852563218e-06, "loss": 0.2154, "step": 11493500 }, { "epoch": 6.89, "learning_rate": 7.787377887002653e-06, "loss": 0.2131, "step": 11494000 }, { "epoch": 6.89, "learning_rate": 7.785282121373208e-06, "loss": 0.2093, "step": 11494500 }, { "epoch": 6.89, "learning_rate": 7.783182155812645e-06, "loss": 0.2115, "step": 11495000 }, { "epoch": 6.89, "learning_rate": 7.78108219025208e-06, "loss": 0.2118, "step": 11495500 }, { "epoch": 6.89, "learning_rate": 7.778982224691514e-06, "loss": 0.2122, "step": 11496000 }, { "epoch": 6.89, "learning_rate": 7.776886459062072e-06, "loss": 0.209, "step": 11496500 }, { "epoch": 6.89, "learning_rate": 7.774786493501506e-06, "loss": 0.2115, "step": 11497000 }, { "epoch": 6.89, "learning_rate": 7.772686527940941e-06, "loss": 0.2137, "step": 11497500 }, { "epoch": 6.89, "learning_rate": 7.770586562380378e-06, "loss": 0.2144, "step": 11498000 }, { "epoch": 6.89, "learning_rate": 7.768486596819811e-06, "loss": 0.2102, "step": 11498500 }, { "epoch": 6.89, "learning_rate": 7.766386631259248e-06, "loss": 0.2155, "step": 11499000 }, { "epoch": 6.89, "learning_rate": 7.764290865629803e-06, "loss": 0.2127, "step": 11499500 }, { "epoch": 6.89, "learning_rate": 7.762190900069239e-06, "loss": 0.2184, "step": 11500000 }, { "epoch": 6.89, "eval_loss": 0.20188398659229279, "eval_runtime": 1465.7429, "eval_samples_per_second": 359.354, "eval_steps_per_second": 59.892, "step": 11500000 }, { "epoch": 6.9, "learning_rate": 7.760090934508675e-06, "loss": 0.2129, "step": 11500500 }, { "epoch": 6.9, "learning_rate": 7.757990968948109e-06, "loss": 0.2124, "step": 11501000 }, { "epoch": 6.9, "learning_rate": 7.755891003387544e-06, "loss": 0.2129, "step": 11501500 }, { "epoch": 6.9, "learning_rate": 7.75379103782698e-06, "loss": 0.2078, "step": 11502000 }, { "epoch": 6.9, "learning_rate": 7.751695272197536e-06, "loss": 0.2135, "step": 11502500 }, { "epoch": 6.9, "learning_rate": 7.74959530663697e-06, "loss": 0.2083, "step": 11503000 }, { "epoch": 6.9, "learning_rate": 7.747495341076406e-06, "loss": 0.2111, "step": 11503500 }, { "epoch": 6.9, "learning_rate": 7.745395375515842e-06, "loss": 0.2139, "step": 11504000 }, { "epoch": 6.9, "learning_rate": 7.743299609886397e-06, "loss": 0.2112, "step": 11504500 }, { "epoch": 6.9, "learning_rate": 7.741199644325834e-06, "loss": 0.21, "step": 11505000 }, { "epoch": 6.9, "learning_rate": 7.739099678765267e-06, "loss": 0.2116, "step": 11505500 }, { "epoch": 6.9, "learning_rate": 7.736999713204704e-06, "loss": 0.2085, "step": 11506000 }, { "epoch": 6.9, "learning_rate": 7.73489974764414e-06, "loss": 0.213, "step": 11506500 }, { "epoch": 6.9, "learning_rate": 7.732799782083573e-06, "loss": 0.2084, "step": 11507000 }, { "epoch": 6.9, "learning_rate": 7.73069981652301e-06, "loss": 0.2249, "step": 11507500 }, { "epoch": 6.9, "learning_rate": 7.728604050893565e-06, "loss": 0.2144, "step": 11508000 }, { "epoch": 6.9, "learning_rate": 7.726504085333e-06, "loss": 0.2173, "step": 11508500 }, { "epoch": 6.9, "learning_rate": 7.724404119772437e-06, "loss": 0.2098, "step": 11509000 }, { "epoch": 6.9, "learning_rate": 7.72230415421187e-06, "loss": 0.2086, "step": 11509500 }, { "epoch": 6.9, "learning_rate": 7.720208388582427e-06, "loss": 0.2156, "step": 11510000 }, { "epoch": 6.9, "learning_rate": 7.718108423021862e-06, "loss": 0.2125, "step": 11510500 }, { "epoch": 6.9, "learning_rate": 7.716008457461298e-06, "loss": 0.2107, "step": 11511000 }, { "epoch": 6.9, "learning_rate": 7.713908491900733e-06, "loss": 0.2115, "step": 11511500 }, { "epoch": 6.9, "learning_rate": 7.711808526340168e-06, "loss": 0.2098, "step": 11512000 }, { "epoch": 6.9, "learning_rate": 7.709712760710723e-06, "loss": 0.2146, "step": 11512500 }, { "epoch": 6.9, "learning_rate": 7.70761279515016e-06, "loss": 0.2118, "step": 11513000 }, { "epoch": 6.9, "learning_rate": 7.705512829589595e-06, "loss": 0.2148, "step": 11513500 }, { "epoch": 6.9, "learning_rate": 7.703412864029029e-06, "loss": 0.2124, "step": 11514000 }, { "epoch": 6.9, "learning_rate": 7.701312898468465e-06, "loss": 0.2112, "step": 11514500 }, { "epoch": 6.9, "learning_rate": 7.69921713283902e-06, "loss": 0.213, "step": 11515000 }, { "epoch": 6.9, "learning_rate": 7.697117167278456e-06, "loss": 0.2129, "step": 11515500 }, { "epoch": 6.9, "learning_rate": 7.695017201717893e-06, "loss": 0.2126, "step": 11516000 }, { "epoch": 6.9, "learning_rate": 7.692917236157326e-06, "loss": 0.2154, "step": 11516500 }, { "epoch": 6.9, "learning_rate": 7.690817270596763e-06, "loss": 0.214, "step": 11517000 }, { "epoch": 6.91, "learning_rate": 7.688721504967318e-06, "loss": 0.2082, "step": 11517500 }, { "epoch": 6.91, "learning_rate": 7.686621539406753e-06, "loss": 0.2083, "step": 11518000 }, { "epoch": 6.91, "learning_rate": 7.68452157384619e-06, "loss": 0.2107, "step": 11518500 }, { "epoch": 6.91, "learning_rate": 7.682421608285624e-06, "loss": 0.2101, "step": 11519000 }, { "epoch": 6.91, "learning_rate": 7.680321642725059e-06, "loss": 0.2104, "step": 11519500 }, { "epoch": 6.91, "learning_rate": 7.678225877095616e-06, "loss": 0.2151, "step": 11520000 }, { "epoch": 6.91, "learning_rate": 7.676125911535051e-06, "loss": 0.2124, "step": 11520500 }, { "epoch": 6.91, "learning_rate": 7.674025945974484e-06, "loss": 0.2071, "step": 11521000 }, { "epoch": 6.91, "learning_rate": 7.671925980413921e-06, "loss": 0.2148, "step": 11521500 }, { "epoch": 6.91, "learning_rate": 7.669830214784477e-06, "loss": 0.2218, "step": 11522000 }, { "epoch": 6.91, "learning_rate": 7.667730249223912e-06, "loss": 0.2114, "step": 11522500 }, { "epoch": 6.91, "learning_rate": 7.665630283663349e-06, "loss": 0.2057, "step": 11523000 }, { "epoch": 6.91, "learning_rate": 7.663530318102782e-06, "loss": 0.2082, "step": 11523500 }, { "epoch": 6.91, "learning_rate": 7.661430352542219e-06, "loss": 0.2119, "step": 11524000 }, { "epoch": 6.91, "learning_rate": 7.659330386981654e-06, "loss": 0.2105, "step": 11524500 }, { "epoch": 6.91, "learning_rate": 7.657230421421087e-06, "loss": 0.2131, "step": 11525000 }, { "epoch": 6.91, "learning_rate": 7.655130455860524e-06, "loss": 0.2081, "step": 11525500 }, { "epoch": 6.91, "learning_rate": 7.65303049029996e-06, "loss": 0.2127, "step": 11526000 }, { "epoch": 6.91, "learning_rate": 7.650930524739395e-06, "loss": 0.2181, "step": 11526500 }, { "epoch": 6.91, "learning_rate": 7.64883055917883e-06, "loss": 0.2137, "step": 11527000 }, { "epoch": 6.91, "learning_rate": 7.646730593618263e-06, "loss": 0.2099, "step": 11527500 }, { "epoch": 6.91, "learning_rate": 7.644639027919942e-06, "loss": 0.2086, "step": 11528000 }, { "epoch": 6.91, "learning_rate": 7.642539062359377e-06, "loss": 0.2136, "step": 11528500 }, { "epoch": 6.91, "learning_rate": 7.640439096798812e-06, "loss": 0.2081, "step": 11529000 }, { "epoch": 6.91, "learning_rate": 7.638339131238247e-06, "loss": 0.2096, "step": 11529500 }, { "epoch": 6.91, "learning_rate": 7.636243365608804e-06, "loss": 0.2093, "step": 11530000 }, { "epoch": 6.91, "learning_rate": 7.634143400048238e-06, "loss": 0.2108, "step": 11530500 }, { "epoch": 6.91, "learning_rate": 7.632043434487675e-06, "loss": 0.2076, "step": 11531000 }, { "epoch": 6.91, "learning_rate": 7.62994346892711e-06, "loss": 0.2023, "step": 11531500 }, { "epoch": 6.91, "learning_rate": 7.627843503366544e-06, "loss": 0.2164, "step": 11532000 }, { "epoch": 6.91, "learning_rate": 7.62574353780598e-06, "loss": 0.2095, "step": 11532500 }, { "epoch": 6.91, "learning_rate": 7.623643572245415e-06, "loss": 0.2098, "step": 11533000 }, { "epoch": 6.91, "learning_rate": 7.6215478066159715e-06, "loss": 0.214, "step": 11533500 }, { "epoch": 6.92, "learning_rate": 7.6194478410554075e-06, "loss": 0.2134, "step": 11534000 }, { "epoch": 6.92, "learning_rate": 7.617347875494841e-06, "loss": 0.216, "step": 11534500 }, { "epoch": 6.92, "learning_rate": 7.615247909934277e-06, "loss": 0.2079, "step": 11535000 }, { "epoch": 6.92, "learning_rate": 7.613147944373713e-06, "loss": 0.2104, "step": 11535500 }, { "epoch": 6.92, "learning_rate": 7.611047978813147e-06, "loss": 0.2109, "step": 11536000 }, { "epoch": 6.92, "learning_rate": 7.608952213183704e-06, "loss": 0.2119, "step": 11536500 }, { "epoch": 6.92, "learning_rate": 7.6068522476231385e-06, "loss": 0.2114, "step": 11537000 }, { "epoch": 6.92, "learning_rate": 7.6047522820625745e-06, "loss": 0.2115, "step": 11537500 }, { "epoch": 6.92, "learning_rate": 7.60265231650201e-06, "loss": 0.2132, "step": 11538000 }, { "epoch": 6.92, "learning_rate": 7.600556550872566e-06, "loss": 0.2108, "step": 11538500 }, { "epoch": 6.92, "learning_rate": 7.598456585312002e-06, "loss": 0.2107, "step": 11539000 }, { "epoch": 6.92, "learning_rate": 7.596356619751436e-06, "loss": 0.2154, "step": 11539500 }, { "epoch": 6.92, "learning_rate": 7.594256654190871e-06, "loss": 0.2121, "step": 11540000 }, { "epoch": 6.92, "learning_rate": 7.5921566886303055e-06, "loss": 0.2099, "step": 11540500 }, { "epoch": 6.92, "learning_rate": 7.5900567230697415e-06, "loss": 0.2161, "step": 11541000 }, { "epoch": 6.92, "learning_rate": 7.5879567575091775e-06, "loss": 0.2124, "step": 11541500 }, { "epoch": 6.92, "learning_rate": 7.585856791948611e-06, "loss": 0.2147, "step": 11542000 }, { "epoch": 6.92, "learning_rate": 7.583765226250289e-06, "loss": 0.207, "step": 11542500 }, { "epoch": 6.92, "learning_rate": 7.581665260689724e-06, "loss": 0.2101, "step": 11543000 }, { "epoch": 6.92, "learning_rate": 7.57956529512916e-06, "loss": 0.2065, "step": 11543500 }, { "epoch": 6.92, "learning_rate": 7.577465329568594e-06, "loss": 0.2173, "step": 11544000 }, { "epoch": 6.92, "learning_rate": 7.575369563939151e-06, "loss": 0.2098, "step": 11544500 }, { "epoch": 6.92, "learning_rate": 7.573269598378586e-06, "loss": 0.2134, "step": 11545000 }, { "epoch": 6.92, "learning_rate": 7.571169632818022e-06, "loss": 0.2118, "step": 11545500 }, { "epoch": 6.92, "learning_rate": 7.569069667257458e-06, "loss": 0.2139, "step": 11546000 }, { "epoch": 6.92, "learning_rate": 7.566969701696892e-06, "loss": 0.213, "step": 11546500 }, { "epoch": 6.92, "learning_rate": 7.564869736136327e-06, "loss": 0.2124, "step": 11547000 }, { "epoch": 6.92, "learning_rate": 7.562773970506883e-06, "loss": 0.2148, "step": 11547500 }, { "epoch": 6.92, "learning_rate": 7.560674004946319e-06, "loss": 0.2142, "step": 11548000 }, { "epoch": 6.92, "learning_rate": 7.558574039385754e-06, "loss": 0.2067, "step": 11548500 }, { "epoch": 6.92, "learning_rate": 7.556474073825189e-06, "loss": 0.2124, "step": 11549000 }, { "epoch": 6.92, "learning_rate": 7.554374108264625e-06, "loss": 0.2157, "step": 11549500 }, { "epoch": 6.92, "learning_rate": 7.552278342635181e-06, "loss": 0.2115, "step": 11550000 }, { "epoch": 6.92, "learning_rate": 7.550178377074616e-06, "loss": 0.2097, "step": 11550500 }, { "epoch": 6.93, "learning_rate": 7.54807841151405e-06, "loss": 0.212, "step": 11551000 }, { "epoch": 6.93, "learning_rate": 7.545978445953486e-06, "loss": 0.2094, "step": 11551500 }, { "epoch": 6.93, "learning_rate": 7.543878480392922e-06, "loss": 0.2088, "step": 11552000 }, { "epoch": 6.93, "learning_rate": 7.5417827147634775e-06, "loss": 0.21, "step": 11552500 }, { "epoch": 6.93, "learning_rate": 7.5396827492029135e-06, "loss": 0.212, "step": 11553000 }, { "epoch": 6.93, "learning_rate": 7.537582783642348e-06, "loss": 0.2045, "step": 11553500 }, { "epoch": 6.93, "learning_rate": 7.535482818081783e-06, "loss": 0.209, "step": 11554000 }, { "epoch": 6.93, "learning_rate": 7.533382852521219e-06, "loss": 0.2078, "step": 11554500 }, { "epoch": 6.93, "learning_rate": 7.531282886960653e-06, "loss": 0.2122, "step": 11555000 }, { "epoch": 6.93, "learning_rate": 7.529182921400089e-06, "loss": 0.2166, "step": 11555500 }, { "epoch": 6.93, "learning_rate": 7.5270829558395244e-06, "loss": 0.21, "step": 11556000 }, { "epoch": 6.93, "learning_rate": 7.5249871902100805e-06, "loss": 0.2138, "step": 11556500 }, { "epoch": 6.93, "learning_rate": 7.5228872246495165e-06, "loss": 0.2072, "step": 11557000 }, { "epoch": 6.93, "learning_rate": 7.520787259088951e-06, "loss": 0.2161, "step": 11557500 }, { "epoch": 6.93, "learning_rate": 7.518687293528386e-06, "loss": 0.2074, "step": 11558000 }, { "epoch": 6.93, "learning_rate": 7.51658732796782e-06, "loss": 0.2124, "step": 11558500 }, { "epoch": 6.93, "learning_rate": 7.514487362407256e-06, "loss": 0.2109, "step": 11559000 }, { "epoch": 6.93, "learning_rate": 7.512391596777812e-06, "loss": 0.2102, "step": 11559500 }, { "epoch": 6.93, "learning_rate": 7.510291631217248e-06, "loss": 0.2135, "step": 11560000 }, { "epoch": 6.93, "learning_rate": 7.508191665656684e-06, "loss": 0.205, "step": 11560500 }, { "epoch": 6.93, "learning_rate": 7.506091700096118e-06, "loss": 0.2101, "step": 11561000 }, { "epoch": 6.93, "learning_rate": 7.503991734535553e-06, "loss": 0.2108, "step": 11561500 }, { "epoch": 6.93, "learning_rate": 7.501895968906109e-06, "loss": 0.2161, "step": 11562000 }, { "epoch": 6.93, "learning_rate": 7.499796003345545e-06, "loss": 0.2118, "step": 11562500 }, { "epoch": 6.93, "learning_rate": 7.49769603778498e-06, "loss": 0.212, "step": 11563000 }, { "epoch": 6.93, "learning_rate": 7.495596072224415e-06, "loss": 0.2103, "step": 11563500 }, { "epoch": 6.93, "learning_rate": 7.493496106663851e-06, "loss": 0.2143, "step": 11564000 }, { "epoch": 6.93, "learning_rate": 7.491400341034407e-06, "loss": 0.2118, "step": 11564500 }, { "epoch": 6.93, "learning_rate": 7.489300375473842e-06, "loss": 0.2165, "step": 11565000 }, { "epoch": 6.93, "learning_rate": 7.487204609844398e-06, "loss": 0.2123, "step": 11565500 }, { "epoch": 6.93, "learning_rate": 7.485104644283834e-06, "loss": 0.2076, "step": 11566000 }, { "epoch": 6.93, "learning_rate": 7.483004678723269e-06, "loss": 0.2138, "step": 11566500 }, { "epoch": 6.93, "learning_rate": 7.4809047131627035e-06, "loss": 0.2137, "step": 11567000 }, { "epoch": 6.94, "learning_rate": 7.4788047476021395e-06, "loss": 0.2108, "step": 11567500 }, { "epoch": 6.94, "learning_rate": 7.4767047820415755e-06, "loss": 0.2074, "step": 11568000 }, { "epoch": 6.94, "learning_rate": 7.47460481648101e-06, "loss": 0.214, "step": 11568500 }, { "epoch": 6.94, "learning_rate": 7.472504850920445e-06, "loss": 0.2093, "step": 11569000 }, { "epoch": 6.94, "learning_rate": 7.470409085291001e-06, "loss": 0.2076, "step": 11569500 }, { "epoch": 6.94, "learning_rate": 7.468309119730437e-06, "loss": 0.2111, "step": 11570000 }, { "epoch": 6.94, "learning_rate": 7.4662091541698705e-06, "loss": 0.2086, "step": 11570500 }, { "epoch": 6.94, "learning_rate": 7.4641091886093065e-06, "loss": 0.2158, "step": 11571000 }, { "epoch": 6.94, "learning_rate": 7.4620092230487425e-06, "loss": 0.2151, "step": 11571500 }, { "epoch": 6.94, "learning_rate": 7.459909257488177e-06, "loss": 0.2115, "step": 11572000 }, { "epoch": 6.94, "learning_rate": 7.457813491858734e-06, "loss": 0.2085, "step": 11572500 }, { "epoch": 6.94, "learning_rate": 7.455713526298168e-06, "loss": 0.2147, "step": 11573000 }, { "epoch": 6.94, "learning_rate": 7.453613560737604e-06, "loss": 0.2098, "step": 11573500 }, { "epoch": 6.94, "learning_rate": 7.451513595177039e-06, "loss": 0.2072, "step": 11574000 }, { "epoch": 6.94, "learning_rate": 7.4494136296164735e-06, "loss": 0.2151, "step": 11574500 }, { "epoch": 6.94, "learning_rate": 7.4473136640559095e-06, "loss": 0.2113, "step": 11575000 }, { "epoch": 6.94, "learning_rate": 7.445217898426466e-06, "loss": 0.2092, "step": 11575500 }, { "epoch": 6.94, "learning_rate": 7.443117932865901e-06, "loss": 0.2089, "step": 11576000 }, { "epoch": 6.94, "learning_rate": 7.441017967305337e-06, "loss": 0.2126, "step": 11576500 }, { "epoch": 6.94, "learning_rate": 7.438918001744771e-06, "loss": 0.2117, "step": 11577000 }, { "epoch": 6.94, "learning_rate": 7.436818036184207e-06, "loss": 0.2092, "step": 11577500 }, { "epoch": 6.94, "learning_rate": 7.434718070623641e-06, "loss": 0.2118, "step": 11578000 }, { "epoch": 6.94, "learning_rate": 7.432622304994198e-06, "loss": 0.2103, "step": 11578500 }, { "epoch": 6.94, "learning_rate": 7.430522339433633e-06, "loss": 0.2036, "step": 11579000 }, { "epoch": 6.94, "learning_rate": 7.428422373873068e-06, "loss": 0.2107, "step": 11579500 }, { "epoch": 6.94, "learning_rate": 7.426322408312504e-06, "loss": 0.2116, "step": 11580000 }, { "epoch": 6.94, "learning_rate": 7.42422664268306e-06, "loss": 0.208, "step": 11580500 }, { "epoch": 6.94, "learning_rate": 7.422126677122495e-06, "loss": 0.2125, "step": 11581000 }, { "epoch": 6.94, "learning_rate": 7.420026711561929e-06, "loss": 0.2154, "step": 11581500 }, { "epoch": 6.94, "learning_rate": 7.417926746001365e-06, "loss": 0.2094, "step": 11582000 }, { "epoch": 6.94, "learning_rate": 7.415826780440801e-06, "loss": 0.2108, "step": 11582500 }, { "epoch": 6.94, "learning_rate": 7.413726814880236e-06, "loss": 0.2137, "step": 11583000 }, { "epoch": 6.94, "learning_rate": 7.411626849319671e-06, "loss": 0.211, "step": 11583500 }, { "epoch": 6.95, "learning_rate": 7.409526883759107e-06, "loss": 0.2112, "step": 11584000 }, { "epoch": 6.95, "learning_rate": 7.407431118129663e-06, "loss": 0.2171, "step": 11584500 }, { "epoch": 6.95, "learning_rate": 7.405331152569098e-06, "loss": 0.2089, "step": 11585000 }, { "epoch": 6.95, "learning_rate": 7.4032311870085324e-06, "loss": 0.2143, "step": 11585500 }, { "epoch": 6.95, "learning_rate": 7.4011312214479684e-06, "loss": 0.2132, "step": 11586000 }, { "epoch": 6.95, "learning_rate": 7.3990354558185246e-06, "loss": 0.2112, "step": 11586500 }, { "epoch": 6.95, "learning_rate": 7.3969396901890815e-06, "loss": 0.2147, "step": 11587000 }, { "epoch": 6.95, "learning_rate": 7.394839724628516e-06, "loss": 0.2128, "step": 11587500 }, { "epoch": 6.95, "learning_rate": 7.392739759067952e-06, "loss": 0.2074, "step": 11588000 }, { "epoch": 6.95, "learning_rate": 7.390643993438507e-06, "loss": 0.2129, "step": 11588500 }, { "epoch": 6.95, "learning_rate": 7.388544027877943e-06, "loss": 0.2091, "step": 11589000 }, { "epoch": 6.95, "learning_rate": 7.386444062317377e-06, "loss": 0.2077, "step": 11589500 }, { "epoch": 6.95, "learning_rate": 7.3843440967568126e-06, "loss": 0.2166, "step": 11590000 }, { "epoch": 6.95, "learning_rate": 7.3822441311962486e-06, "loss": 0.2131, "step": 11590500 }, { "epoch": 6.95, "learning_rate": 7.380144165635683e-06, "loss": 0.2113, "step": 11591000 }, { "epoch": 6.95, "learning_rate": 7.378044200075119e-06, "loss": 0.2094, "step": 11591500 }, { "epoch": 6.95, "learning_rate": 7.375944234514554e-06, "loss": 0.2089, "step": 11592000 }, { "epoch": 6.95, "learning_rate": 7.373844268953988e-06, "loss": 0.2146, "step": 11592500 }, { "epoch": 6.95, "learning_rate": 7.371748503324546e-06, "loss": 0.211, "step": 11593000 }, { "epoch": 6.95, "learning_rate": 7.3696485377639804e-06, "loss": 0.2145, "step": 11593500 }, { "epoch": 6.95, "learning_rate": 7.367548572203416e-06, "loss": 0.2147, "step": 11594000 }, { "epoch": 6.95, "learning_rate": 7.365448606642852e-06, "loss": 0.2126, "step": 11594500 }, { "epoch": 6.95, "learning_rate": 7.363348641082286e-06, "loss": 0.2173, "step": 11595000 }, { "epoch": 6.95, "learning_rate": 7.361248675521722e-06, "loss": 0.2118, "step": 11595500 }, { "epoch": 6.95, "learning_rate": 7.359152909892277e-06, "loss": 0.213, "step": 11596000 }, { "epoch": 6.95, "learning_rate": 7.357052944331713e-06, "loss": 0.2102, "step": 11596500 }, { "epoch": 6.95, "learning_rate": 7.3549529787711475e-06, "loss": 0.2085, "step": 11597000 }, { "epoch": 6.95, "learning_rate": 7.3528530132105835e-06, "loss": 0.2094, "step": 11597500 }, { "epoch": 6.95, "learning_rate": 7.350757247581139e-06, "loss": 0.2074, "step": 11598000 }, { "epoch": 6.95, "learning_rate": 7.348657282020575e-06, "loss": 0.2088, "step": 11598500 }, { "epoch": 6.95, "learning_rate": 7.346557316460011e-06, "loss": 0.2138, "step": 11599000 }, { "epoch": 6.95, "learning_rate": 7.344457350899444e-06, "loss": 0.2157, "step": 11599500 }, { "epoch": 6.95, "learning_rate": 7.34235738533888e-06, "loss": 0.2108, "step": 11600000 }, { "epoch": 6.95, "eval_loss": 0.20170070230960846, "eval_runtime": 1465.1662, "eval_samples_per_second": 359.495, "eval_steps_per_second": 59.916, "step": 11600000 }, { "epoch": 6.95, "learning_rate": 7.340257419778316e-06, "loss": 0.2098, "step": 11600500 }, { "epoch": 6.96, "learning_rate": 7.3381574542177505e-06, "loss": 0.2064, "step": 11601000 }, { "epoch": 6.96, "learning_rate": 7.336057488657186e-06, "loss": 0.2133, "step": 11601500 }, { "epoch": 6.96, "learning_rate": 7.333961723027742e-06, "loss": 0.2011, "step": 11602000 }, { "epoch": 6.96, "learning_rate": 7.331865957398299e-06, "loss": 0.2082, "step": 11602500 }, { "epoch": 6.96, "learning_rate": 7.329765991837733e-06, "loss": 0.2167, "step": 11603000 }, { "epoch": 6.96, "learning_rate": 7.327666026277169e-06, "loss": 0.2127, "step": 11603500 }, { "epoch": 6.96, "learning_rate": 7.325566060716605e-06, "loss": 0.2132, "step": 11604000 }, { "epoch": 6.96, "learning_rate": 7.32347029508716e-06, "loss": 0.2161, "step": 11604500 }, { "epoch": 6.96, "learning_rate": 7.321370329526596e-06, "loss": 0.2051, "step": 11605000 }, { "epoch": 6.96, "learning_rate": 7.319270363966031e-06, "loss": 0.2165, "step": 11605500 }, { "epoch": 6.96, "learning_rate": 7.317170398405467e-06, "loss": 0.2131, "step": 11606000 }, { "epoch": 6.96, "learning_rate": 7.315070432844902e-06, "loss": 0.213, "step": 11606500 }, { "epoch": 6.96, "learning_rate": 7.312970467284336e-06, "loss": 0.2123, "step": 11607000 }, { "epoch": 6.96, "learning_rate": 7.310874701654894e-06, "loss": 0.2084, "step": 11607500 }, { "epoch": 6.96, "learning_rate": 7.308774736094327e-06, "loss": 0.2063, "step": 11608000 }, { "epoch": 6.96, "learning_rate": 7.306674770533763e-06, "loss": 0.2087, "step": 11608500 }, { "epoch": 6.96, "learning_rate": 7.304574804973198e-06, "loss": 0.2071, "step": 11609000 }, { "epoch": 6.96, "learning_rate": 7.302474839412634e-06, "loss": 0.2145, "step": 11609500 }, { "epoch": 6.96, "learning_rate": 7.300374873852069e-06, "loss": 0.212, "step": 11610000 }, { "epoch": 6.96, "learning_rate": 7.298274908291503e-06, "loss": 0.2122, "step": 11610500 }, { "epoch": 6.96, "learning_rate": 7.296174942730939e-06, "loss": 0.2111, "step": 11611000 }, { "epoch": 6.96, "learning_rate": 7.294074977170375e-06, "loss": 0.2062, "step": 11611500 }, { "epoch": 6.96, "learning_rate": 7.291975011609809e-06, "loss": 0.2106, "step": 11612000 }, { "epoch": 6.96, "learning_rate": 7.289879245980366e-06, "loss": 0.214, "step": 11612500 }, { "epoch": 6.96, "learning_rate": 7.287779280419801e-06, "loss": 0.2104, "step": 11613000 }, { "epoch": 6.96, "learning_rate": 7.285679314859237e-06, "loss": 0.2052, "step": 11613500 }, { "epoch": 6.96, "learning_rate": 7.283579349298672e-06, "loss": 0.215, "step": 11614000 }, { "epoch": 6.96, "learning_rate": 7.281479383738106e-06, "loss": 0.2071, "step": 11614500 }, { "epoch": 6.96, "learning_rate": 7.279379418177542e-06, "loss": 0.2132, "step": 11615000 }, { "epoch": 6.96, "learning_rate": 7.277283652548098e-06, "loss": 0.2127, "step": 11615500 }, { "epoch": 6.96, "learning_rate": 7.275183686987533e-06, "loss": 0.211, "step": 11616000 }, { "epoch": 6.96, "learning_rate": 7.273083721426968e-06, "loss": 0.2098, "step": 11616500 }, { "epoch": 6.96, "learning_rate": 7.270983755866404e-06, "loss": 0.2127, "step": 11617000 }, { "epoch": 6.97, "learning_rate": 7.26888379030584e-06, "loss": 0.2114, "step": 11617500 }, { "epoch": 6.97, "learning_rate": 7.266783824745273e-06, "loss": 0.2172, "step": 11618000 }, { "epoch": 6.97, "learning_rate": 7.264688059115831e-06, "loss": 0.2213, "step": 11618500 }, { "epoch": 6.97, "learning_rate": 7.262588093555265e-06, "loss": 0.2119, "step": 11619000 }, { "epoch": 6.97, "learning_rate": 7.2604881279947004e-06, "loss": 0.213, "step": 11619500 }, { "epoch": 6.97, "learning_rate": 7.2583881624341364e-06, "loss": 0.2103, "step": 11620000 }, { "epoch": 6.97, "learning_rate": 7.256288196873571e-06, "loss": 0.2135, "step": 11620500 }, { "epoch": 6.97, "learning_rate": 7.254192431244128e-06, "loss": 0.2155, "step": 11621000 }, { "epoch": 6.97, "learning_rate": 7.252092465683562e-06, "loss": 0.2113, "step": 11621500 }, { "epoch": 6.97, "learning_rate": 7.249992500122998e-06, "loss": 0.2136, "step": 11622000 }, { "epoch": 6.97, "learning_rate": 7.247892534562434e-06, "loss": 0.2081, "step": 11622500 }, { "epoch": 6.97, "learning_rate": 7.245796768932989e-06, "loss": 0.2096, "step": 11623000 }, { "epoch": 6.97, "learning_rate": 7.243696803372425e-06, "loss": 0.2053, "step": 11623500 }, { "epoch": 6.97, "learning_rate": 7.24159683781186e-06, "loss": 0.2105, "step": 11624000 }, { "epoch": 6.97, "learning_rate": 7.239496872251296e-06, "loss": 0.2072, "step": 11624500 }, { "epoch": 6.97, "learning_rate": 7.237396906690729e-06, "loss": 0.2178, "step": 11625000 }, { "epoch": 6.97, "learning_rate": 7.235301141061287e-06, "loss": 0.2061, "step": 11625500 }, { "epoch": 6.97, "learning_rate": 7.233201175500721e-06, "loss": 0.2107, "step": 11626000 }, { "epoch": 6.97, "learning_rate": 7.231101209940156e-06, "loss": 0.2097, "step": 11626500 }, { "epoch": 6.97, "learning_rate": 7.229001244379592e-06, "loss": 0.2125, "step": 11627000 }, { "epoch": 6.97, "learning_rate": 7.226901278819027e-06, "loss": 0.2135, "step": 11627500 }, { "epoch": 6.97, "learning_rate": 7.224805513189584e-06, "loss": 0.2104, "step": 11628000 }, { "epoch": 6.97, "learning_rate": 7.222705547629018e-06, "loss": 0.2108, "step": 11628500 }, { "epoch": 6.97, "learning_rate": 7.220605582068454e-06, "loss": 0.2131, "step": 11629000 }, { "epoch": 6.97, "learning_rate": 7.21850561650789e-06, "loss": 0.2114, "step": 11629500 }, { "epoch": 6.97, "learning_rate": 7.216409850878445e-06, "loss": 0.2092, "step": 11630000 }, { "epoch": 6.97, "learning_rate": 7.214309885317881e-06, "loss": 0.2058, "step": 11630500 }, { "epoch": 6.97, "learning_rate": 7.2122099197573155e-06, "loss": 0.2159, "step": 11631000 }, { "epoch": 6.97, "learning_rate": 7.2101099541967515e-06, "loss": 0.2147, "step": 11631500 }, { "epoch": 6.97, "learning_rate": 7.208009988636187e-06, "loss": 0.2078, "step": 11632000 }, { "epoch": 6.97, "learning_rate": 7.205914223006743e-06, "loss": 0.2095, "step": 11632500 }, { "epoch": 6.97, "learning_rate": 7.203814257446179e-06, "loss": 0.2145, "step": 11633000 }, { "epoch": 6.97, "learning_rate": 7.201714291885613e-06, "loss": 0.2118, "step": 11633500 }, { "epoch": 6.98, "learning_rate": 7.199614326325048e-06, "loss": 0.2123, "step": 11634000 }, { "epoch": 6.98, "learning_rate": 7.197518560695604e-06, "loss": 0.2126, "step": 11634500 }, { "epoch": 6.98, "learning_rate": 7.19541859513504e-06, "loss": 0.2117, "step": 11635000 }, { "epoch": 6.98, "learning_rate": 7.193318629574474e-06, "loss": 0.2074, "step": 11635500 }, { "epoch": 6.98, "learning_rate": 7.19121866401391e-06, "loss": 0.2092, "step": 11636000 }, { "epoch": 6.98, "learning_rate": 7.189118698453346e-06, "loss": 0.2085, "step": 11636500 }, { "epoch": 6.98, "learning_rate": 7.187022932823901e-06, "loss": 0.211, "step": 11637000 }, { "epoch": 6.98, "learning_rate": 7.184922967263337e-06, "loss": 0.2078, "step": 11637500 }, { "epoch": 6.98, "learning_rate": 7.182827201633893e-06, "loss": 0.2069, "step": 11638000 }, { "epoch": 6.98, "learning_rate": 7.180727236073328e-06, "loss": 0.2099, "step": 11638500 }, { "epoch": 6.98, "learning_rate": 7.178627270512763e-06, "loss": 0.2078, "step": 11639000 }, { "epoch": 6.98, "learning_rate": 7.176527304952199e-06, "loss": 0.2088, "step": 11639500 }, { "epoch": 6.98, "learning_rate": 7.174427339391635e-06, "loss": 0.2149, "step": 11640000 }, { "epoch": 6.98, "learning_rate": 7.172327373831069e-06, "loss": 0.2128, "step": 11640500 }, { "epoch": 6.98, "learning_rate": 7.170227408270504e-06, "loss": 0.2045, "step": 11641000 }, { "epoch": 6.98, "learning_rate": 7.16812744270994e-06, "loss": 0.2051, "step": 11641500 }, { "epoch": 6.98, "learning_rate": 7.166027477149374e-06, "loss": 0.2078, "step": 11642000 }, { "epoch": 6.98, "learning_rate": 7.163931711519931e-06, "loss": 0.2138, "step": 11642500 }, { "epoch": 6.98, "learning_rate": 7.161831745959366e-06, "loss": 0.2093, "step": 11643000 }, { "epoch": 6.98, "learning_rate": 7.159731780398802e-06, "loss": 0.206, "step": 11643500 }, { "epoch": 6.98, "learning_rate": 7.157631814838238e-06, "loss": 0.2148, "step": 11644000 }, { "epoch": 6.98, "learning_rate": 7.155531849277671e-06, "loss": 0.2104, "step": 11644500 }, { "epoch": 6.98, "learning_rate": 7.153431883717107e-06, "loss": 0.2076, "step": 11645000 }, { "epoch": 6.98, "learning_rate": 7.151336118087663e-06, "loss": 0.2069, "step": 11645500 }, { "epoch": 6.98, "learning_rate": 7.149236152527098e-06, "loss": 0.2165, "step": 11646000 }, { "epoch": 6.98, "learning_rate": 7.147136186966533e-06, "loss": 0.2177, "step": 11646500 }, { "epoch": 6.98, "learning_rate": 7.145036221405969e-06, "loss": 0.2104, "step": 11647000 }, { "epoch": 6.98, "learning_rate": 7.142936255845405e-06, "loss": 0.2109, "step": 11647500 }, { "epoch": 6.98, "learning_rate": 7.140836290284839e-06, "loss": 0.2083, "step": 11648000 }, { "epoch": 6.98, "learning_rate": 7.138740524655396e-06, "loss": 0.211, "step": 11648500 }, { "epoch": 6.98, "learning_rate": 7.13664055909483e-06, "loss": 0.2126, "step": 11649000 }, { "epoch": 6.98, "learning_rate": 7.134540593534266e-06, "loss": 0.2101, "step": 11649500 }, { "epoch": 6.98, "learning_rate": 7.132440627973701e-06, "loss": 0.211, "step": 11650000 }, { "epoch": 6.98, "learning_rate": 7.1303448623442575e-06, "loss": 0.2146, "step": 11650500 }, { "epoch": 6.99, "learning_rate": 7.1282448967836935e-06, "loss": 0.2119, "step": 11651000 }, { "epoch": 6.99, "learning_rate": 7.126144931223128e-06, "loss": 0.2129, "step": 11651500 }, { "epoch": 6.99, "learning_rate": 7.124044965662563e-06, "loss": 0.2102, "step": 11652000 }, { "epoch": 6.99, "learning_rate": 7.121949200033119e-06, "loss": 0.2114, "step": 11652500 }, { "epoch": 6.99, "learning_rate": 7.119849234472555e-06, "loss": 0.2124, "step": 11653000 }, { "epoch": 6.99, "learning_rate": 7.11774926891199e-06, "loss": 0.2099, "step": 11653500 }, { "epoch": 6.99, "learning_rate": 7.1156493033514246e-06, "loss": 0.2127, "step": 11654000 }, { "epoch": 6.99, "learning_rate": 7.1135493377908606e-06, "loss": 0.2113, "step": 11654500 }, { "epoch": 6.99, "learning_rate": 7.111449372230295e-06, "loss": 0.2124, "step": 11655000 }, { "epoch": 6.99, "learning_rate": 7.10934940666973e-06, "loss": 0.2049, "step": 11655500 }, { "epoch": 6.99, "learning_rate": 7.107249441109166e-06, "loss": 0.2105, "step": 11656000 }, { "epoch": 6.99, "learning_rate": 7.105153675479722e-06, "loss": 0.2113, "step": 11656500 }, { "epoch": 6.99, "learning_rate": 7.103053709919157e-06, "loss": 0.2143, "step": 11657000 }, { "epoch": 6.99, "learning_rate": 7.100953744358592e-06, "loss": 0.2126, "step": 11657500 }, { "epoch": 6.99, "learning_rate": 7.098853778798028e-06, "loss": 0.2048, "step": 11658000 }, { "epoch": 6.99, "learning_rate": 7.096753813237464e-06, "loss": 0.2142, "step": 11658500 }, { "epoch": 6.99, "learning_rate": 7.094653847676898e-06, "loss": 0.2083, "step": 11659000 }, { "epoch": 6.99, "learning_rate": 7.092558082047455e-06, "loss": 0.2079, "step": 11659500 }, { "epoch": 6.99, "learning_rate": 7.090458116486889e-06, "loss": 0.2111, "step": 11660000 }, { "epoch": 6.99, "learning_rate": 7.088358150926325e-06, "loss": 0.2057, "step": 11660500 }, { "epoch": 6.99, "learning_rate": 7.08625818536576e-06, "loss": 0.2109, "step": 11661000 }, { "epoch": 6.99, "learning_rate": 7.0841624197363164e-06, "loss": 0.2098, "step": 11661500 }, { "epoch": 6.99, "learning_rate": 7.0820624541757524e-06, "loss": 0.2106, "step": 11662000 }, { "epoch": 6.99, "learning_rate": 7.079962488615186e-06, "loss": 0.2086, "step": 11662500 }, { "epoch": 6.99, "learning_rate": 7.077862523054622e-06, "loss": 0.2047, "step": 11663000 }, { "epoch": 6.99, "learning_rate": 7.075762557494056e-06, "loss": 0.2079, "step": 11663500 }, { "epoch": 6.99, "learning_rate": 7.073666791864613e-06, "loss": 0.2049, "step": 11664000 }, { "epoch": 6.99, "learning_rate": 7.0715668263040475e-06, "loss": 0.214, "step": 11664500 }, { "epoch": 6.99, "learning_rate": 7.0694668607434835e-06, "loss": 0.2177, "step": 11665000 }, { "epoch": 6.99, "learning_rate": 7.0673668951829195e-06, "loss": 0.2151, "step": 11665500 }, { "epoch": 6.99, "learning_rate": 7.065266929622354e-06, "loss": 0.2172, "step": 11666000 }, { "epoch": 6.99, "learning_rate": 7.063166964061789e-06, "loss": 0.2048, "step": 11666500 }, { "epoch": 6.99, "learning_rate": 7.061071198432345e-06, "loss": 0.2073, "step": 11667000 }, { "epoch": 7.0, "learning_rate": 7.058971232871781e-06, "loss": 0.2128, "step": 11667500 }, { "epoch": 7.0, "learning_rate": 7.056871267311216e-06, "loss": 0.211, "step": 11668000 }, { "epoch": 7.0, "learning_rate": 7.0547713017506505e-06, "loss": 0.2077, "step": 11668500 }, { "epoch": 7.0, "learning_rate": 7.052675536121208e-06, "loss": 0.2169, "step": 11669000 }, { "epoch": 7.0, "learning_rate": 7.050579770491764e-06, "loss": 0.2128, "step": 11669500 }, { "epoch": 7.0, "learning_rate": 7.0484798049312e-06, "loss": 0.2127, "step": 11670000 }, { "epoch": 7.0, "learning_rate": 7.046379839370634e-06, "loss": 0.215, "step": 11670500 }, { "epoch": 7.0, "learning_rate": 7.04427987381007e-06, "loss": 0.2095, "step": 11671000 }, { "epoch": 7.0, "learning_rate": 7.042179908249505e-06, "loss": 0.2139, "step": 11671500 }, { "epoch": 7.0, "learning_rate": 7.040079942688939e-06, "loss": 0.2056, "step": 11672000 }, { "epoch": 7.0, "learning_rate": 7.037979977128375e-06, "loss": 0.2119, "step": 11672500 }, { "epoch": 7.0, "learning_rate": 7.03588001156781e-06, "loss": 0.209, "step": 11673000 }, { "epoch": 7.0, "learning_rate": 7.033784245938367e-06, "loss": 0.2096, "step": 11673500 }, { "epoch": 7.0, "learning_rate": 7.031684280377803e-06, "loss": 0.2162, "step": 11674000 }, { "epoch": 7.0, "learning_rate": 7.029584314817237e-06, "loss": 0.2118, "step": 11674500 }, { "epoch": 7.0, "learning_rate": 7.027484349256672e-06, "loss": 0.2093, "step": 11675000 }, { "epoch": 7.0, "learning_rate": 7.025384383696106e-06, "loss": 0.2082, "step": 11675500 }, { "epoch": 7.0, "learning_rate": 7.023284418135542e-06, "loss": 0.2019, "step": 11676000 }, { "epoch": 7.0, "learning_rate": 7.0211886525060985e-06, "loss": 0.2017, "step": 11676500 }, { "epoch": 7.0, "learning_rate": 7.019088686945534e-06, "loss": 0.2027, "step": 11677000 }, { "epoch": 7.0, "learning_rate": 7.01698872138497e-06, "loss": 0.2065, "step": 11677500 }, { "epoch": 7.0, "learning_rate": 7.014888755824404e-06, "loss": 0.2021, "step": 11678000 }, { "epoch": 7.0, "learning_rate": 7.012792990194961e-06, "loss": 0.2079, "step": 11678500 }, { "epoch": 7.0, "learning_rate": 7.010693024634395e-06, "loss": 0.2037, "step": 11679000 }, { "epoch": 7.0, "learning_rate": 7.008593059073831e-06, "loss": 0.2039, "step": 11679500 }, { "epoch": 7.0, "learning_rate": 7.006493093513267e-06, "loss": 0.2103, "step": 11680000 }, { "epoch": 7.0, "learning_rate": 7.004393127952701e-06, "loss": 0.2026, "step": 11680500 }, { "epoch": 7.0, "learning_rate": 7.002293162392137e-06, "loss": 0.2027, "step": 11681000 }, { "epoch": 7.0, "learning_rate": 7.000197396762693e-06, "loss": 0.2023, "step": 11681500 }, { "epoch": 7.0, "learning_rate": 6.998097431202128e-06, "loss": 0.199, "step": 11682000 }, { "epoch": 7.0, "learning_rate": 6.995997465641563e-06, "loss": 0.2042, "step": 11682500 }, { "epoch": 7.0, "learning_rate": 6.993897500080998e-06, "loss": 0.2023, "step": 11683000 }, { "epoch": 7.0, "learning_rate": 6.991797534520434e-06, "loss": 0.205, "step": 11683500 }, { "epoch": 7.01, "learning_rate": 6.9897017688909895e-06, "loss": 0.2091, "step": 11684000 }, { "epoch": 7.01, "learning_rate": 6.9876018033304255e-06, "loss": 0.2035, "step": 11684500 }, { "epoch": 7.01, "learning_rate": 6.985501837769861e-06, "loss": 0.2067, "step": 11685000 }, { "epoch": 7.01, "learning_rate": 6.983401872209296e-06, "loss": 0.2061, "step": 11685500 }, { "epoch": 7.01, "learning_rate": 6.98130190664873e-06, "loss": 0.2068, "step": 11686000 }, { "epoch": 7.01, "learning_rate": 6.979201941088166e-06, "loss": 0.2041, "step": 11686500 }, { "epoch": 7.01, "learning_rate": 6.977106175458722e-06, "loss": 0.2017, "step": 11687000 }, { "epoch": 7.01, "learning_rate": 6.975006209898157e-06, "loss": 0.2088, "step": 11687500 }, { "epoch": 7.01, "learning_rate": 6.9729062443375926e-06, "loss": 0.2019, "step": 11688000 }, { "epoch": 7.01, "learning_rate": 6.970806278777028e-06, "loss": 0.2105, "step": 11688500 }, { "epoch": 7.01, "learning_rate": 6.968706313216464e-06, "loss": 0.2051, "step": 11689000 }, { "epoch": 7.01, "learning_rate": 6.966606347655899e-06, "loss": 0.2004, "step": 11689500 }, { "epoch": 7.01, "learning_rate": 6.964506382095333e-06, "loss": 0.2039, "step": 11690000 }, { "epoch": 7.01, "learning_rate": 6.962406416534768e-06, "loss": 0.2071, "step": 11690500 }, { "epoch": 7.01, "learning_rate": 6.960310650905325e-06, "loss": 0.2034, "step": 11691000 }, { "epoch": 7.01, "learning_rate": 6.95821068534476e-06, "loss": 0.2022, "step": 11691500 }, { "epoch": 7.01, "learning_rate": 6.956110719784196e-06, "loss": 0.2084, "step": 11692000 }, { "epoch": 7.01, "learning_rate": 6.954010754223631e-06, "loss": 0.2033, "step": 11692500 }, { "epoch": 7.01, "learning_rate": 6.951910788663066e-06, "loss": 0.2047, "step": 11693000 }, { "epoch": 7.01, "learning_rate": 6.949815023033622e-06, "loss": 0.1998, "step": 11693500 }, { "epoch": 7.01, "learning_rate": 6.947715057473057e-06, "loss": 0.2042, "step": 11694000 }, { "epoch": 7.01, "learning_rate": 6.945615091912492e-06, "loss": 0.2008, "step": 11694500 }, { "epoch": 7.01, "learning_rate": 6.943515126351928e-06, "loss": 0.2034, "step": 11695000 }, { "epoch": 7.01, "learning_rate": 6.941419360722484e-06, "loss": 0.2048, "step": 11695500 }, { "epoch": 7.01, "learning_rate": 6.93931939516192e-06, "loss": 0.2015, "step": 11696000 }, { "epoch": 7.01, "learning_rate": 6.937219429601355e-06, "loss": 0.2039, "step": 11696500 }, { "epoch": 7.01, "learning_rate": 6.935119464040789e-06, "loss": 0.1992, "step": 11697000 }, { "epoch": 7.01, "learning_rate": 6.933019498480225e-06, "loss": 0.2032, "step": 11697500 }, { "epoch": 7.01, "learning_rate": 6.93091953291966e-06, "loss": 0.2063, "step": 11698000 }, { "epoch": 7.01, "learning_rate": 6.928819567359095e-06, "loss": 0.2115, "step": 11698500 }, { "epoch": 7.01, "learning_rate": 6.92671960179853e-06, "loss": 0.2095, "step": 11699000 }, { "epoch": 7.01, "learning_rate": 6.9246280361002084e-06, "loss": 0.2073, "step": 11699500 }, { "epoch": 7.01, "learning_rate": 6.922528070539643e-06, "loss": 0.1997, "step": 11700000 }, { "epoch": 7.01, "eval_loss": 0.2016766220331192, "eval_runtime": 1460.69, "eval_samples_per_second": 360.597, "eval_steps_per_second": 60.1, "step": 11700000 }, { "epoch": 7.01, "learning_rate": 6.920428104979078e-06, "loss": 0.2021, "step": 11700500 }, { "epoch": 7.02, "learning_rate": 6.918328139418513e-06, "loss": 0.2043, "step": 11701000 }, { "epoch": 7.02, "learning_rate": 6.91623237378907e-06, "loss": 0.203, "step": 11701500 }, { "epoch": 7.02, "learning_rate": 6.914132408228504e-06, "loss": 0.201, "step": 11702000 }, { "epoch": 7.02, "learning_rate": 6.91203244266794e-06, "loss": 0.2037, "step": 11702500 }, { "epoch": 7.02, "learning_rate": 6.9099324771073755e-06, "loss": 0.2054, "step": 11703000 }, { "epoch": 7.02, "learning_rate": 6.907832511546811e-06, "loss": 0.198, "step": 11703500 }, { "epoch": 7.02, "learning_rate": 6.905732545986246e-06, "loss": 0.2091, "step": 11704000 }, { "epoch": 7.02, "learning_rate": 6.903632580425681e-06, "loss": 0.2056, "step": 11704500 }, { "epoch": 7.02, "learning_rate": 6.901536814796238e-06, "loss": 0.2067, "step": 11705000 }, { "epoch": 7.02, "learning_rate": 6.899436849235672e-06, "loss": 0.2024, "step": 11705500 }, { "epoch": 7.02, "learning_rate": 6.897336883675107e-06, "loss": 0.2052, "step": 11706000 }, { "epoch": 7.02, "learning_rate": 6.8952369181145425e-06, "loss": 0.209, "step": 11706500 }, { "epoch": 7.02, "learning_rate": 6.8931369525539785e-06, "loss": 0.2015, "step": 11707000 }, { "epoch": 7.02, "learning_rate": 6.891041186924534e-06, "loss": 0.2023, "step": 11707500 }, { "epoch": 7.02, "learning_rate": 6.88894122136397e-06, "loss": 0.2098, "step": 11708000 }, { "epoch": 7.02, "learning_rate": 6.886841255803405e-06, "loss": 0.2095, "step": 11708500 }, { "epoch": 7.02, "learning_rate": 6.88474129024284e-06, "loss": 0.2076, "step": 11709000 }, { "epoch": 7.02, "learning_rate": 6.882641324682274e-06, "loss": 0.2056, "step": 11709500 }, { "epoch": 7.02, "learning_rate": 6.880545559052831e-06, "loss": 0.2001, "step": 11710000 }, { "epoch": 7.02, "learning_rate": 6.8784455934922665e-06, "loss": 0.2033, "step": 11710500 }, { "epoch": 7.02, "learning_rate": 6.876345627931702e-06, "loss": 0.2042, "step": 11711000 }, { "epoch": 7.02, "learning_rate": 6.874245662371137e-06, "loss": 0.2079, "step": 11711500 }, { "epoch": 7.02, "learning_rate": 6.872145696810572e-06, "loss": 0.2045, "step": 11712000 }, { "epoch": 7.02, "learning_rate": 6.870049931181128e-06, "loss": 0.2073, "step": 11712500 }, { "epoch": 7.02, "learning_rate": 6.867949965620563e-06, "loss": 0.2046, "step": 11713000 }, { "epoch": 7.02, "learning_rate": 6.865850000059999e-06, "loss": 0.2078, "step": 11713500 }, { "epoch": 7.02, "learning_rate": 6.863750034499434e-06, "loss": 0.2048, "step": 11714000 }, { "epoch": 7.02, "learning_rate": 6.8616500689388695e-06, "loss": 0.2042, "step": 11714500 }, { "epoch": 7.02, "learning_rate": 6.859554303309426e-06, "loss": 0.206, "step": 11715000 }, { "epoch": 7.02, "learning_rate": 6.857454337748861e-06, "loss": 0.2101, "step": 11715500 }, { "epoch": 7.02, "learning_rate": 6.855354372188296e-06, "loss": 0.2052, "step": 11716000 }, { "epoch": 7.02, "learning_rate": 6.853254406627731e-06, "loss": 0.201, "step": 11716500 }, { "epoch": 7.02, "learning_rate": 6.851154441067166e-06, "loss": 0.2043, "step": 11717000 }, { "epoch": 7.03, "learning_rate": 6.849058675437723e-06, "loss": 0.2038, "step": 11717500 }, { "epoch": 7.03, "learning_rate": 6.8469587098771575e-06, "loss": 0.2026, "step": 11718000 }, { "epoch": 7.03, "learning_rate": 6.844858744316593e-06, "loss": 0.2014, "step": 11718500 }, { "epoch": 7.03, "learning_rate": 6.842758778756029e-06, "loss": 0.2065, "step": 11719000 }, { "epoch": 7.03, "learning_rate": 6.840658813195464e-06, "loss": 0.2053, "step": 11719500 }, { "epoch": 7.03, "learning_rate": 6.838558847634899e-06, "loss": 0.2023, "step": 11720000 }, { "epoch": 7.03, "learning_rate": 6.836463082005455e-06, "loss": 0.2103, "step": 11720500 }, { "epoch": 7.03, "learning_rate": 6.83436311644489e-06, "loss": 0.2107, "step": 11721000 }, { "epoch": 7.03, "learning_rate": 6.8322631508843254e-06, "loss": 0.2013, "step": 11721500 }, { "epoch": 7.03, "learning_rate": 6.830163185323761e-06, "loss": 0.2029, "step": 11722000 }, { "epoch": 7.03, "learning_rate": 6.828063219763196e-06, "loss": 0.2059, "step": 11722500 }, { "epoch": 7.03, "learning_rate": 6.825963254202631e-06, "loss": 0.2051, "step": 11723000 }, { "epoch": 7.03, "learning_rate": 6.823867488573187e-06, "loss": 0.2086, "step": 11723500 }, { "epoch": 7.03, "learning_rate": 6.821767523012622e-06, "loss": 0.2041, "step": 11724000 }, { "epoch": 7.03, "learning_rate": 6.819667557452057e-06, "loss": 0.2084, "step": 11724500 }, { "epoch": 7.03, "learning_rate": 6.817567591891493e-06, "loss": 0.2022, "step": 11725000 }, { "epoch": 7.03, "learning_rate": 6.8154676263309285e-06, "loss": 0.2009, "step": 11725500 }, { "epoch": 7.03, "learning_rate": 6.813371860701485e-06, "loss": 0.2049, "step": 11726000 }, { "epoch": 7.03, "learning_rate": 6.81127189514092e-06, "loss": 0.2077, "step": 11726500 }, { "epoch": 7.03, "learning_rate": 6.809171929580355e-06, "loss": 0.2066, "step": 11727000 }, { "epoch": 7.03, "learning_rate": 6.80707196401979e-06, "loss": 0.2052, "step": 11727500 }, { "epoch": 7.03, "learning_rate": 6.804971998459225e-06, "loss": 0.2024, "step": 11728000 }, { "epoch": 7.03, "learning_rate": 6.80287203289866e-06, "loss": 0.2023, "step": 11728500 }, { "epoch": 7.03, "learning_rate": 6.8007762672692165e-06, "loss": 0.2061, "step": 11729000 }, { "epoch": 7.03, "learning_rate": 6.798676301708652e-06, "loss": 0.2027, "step": 11729500 }, { "epoch": 7.03, "learning_rate": 6.796576336148087e-06, "loss": 0.2024, "step": 11730000 }, { "epoch": 7.03, "learning_rate": 6.794476370587523e-06, "loss": 0.1987, "step": 11730500 }, { "epoch": 7.03, "learning_rate": 6.792376405026958e-06, "loss": 0.2008, "step": 11731000 }, { "epoch": 7.03, "learning_rate": 6.790280639397514e-06, "loss": 0.2084, "step": 11731500 }, { "epoch": 7.03, "learning_rate": 6.788180673836949e-06, "loss": 0.2015, "step": 11732000 }, { "epoch": 7.03, "learning_rate": 6.786080708276384e-06, "loss": 0.1996, "step": 11732500 }, { "epoch": 7.03, "learning_rate": 6.783980742715819e-06, "loss": 0.2046, "step": 11733000 }, { "epoch": 7.03, "learning_rate": 6.781880777155255e-06, "loss": 0.209, "step": 11733500 }, { "epoch": 7.04, "learning_rate": 6.77978081159469e-06, "loss": 0.2001, "step": 11734000 }, { "epoch": 7.04, "learning_rate": 6.777685045965246e-06, "loss": 0.2073, "step": 11734500 }, { "epoch": 7.04, "learning_rate": 6.775585080404681e-06, "loss": 0.2057, "step": 11735000 }, { "epoch": 7.04, "learning_rate": 6.773485114844116e-06, "loss": 0.2102, "step": 11735500 }, { "epoch": 7.04, "learning_rate": 6.771385149283552e-06, "loss": 0.2019, "step": 11736000 }, { "epoch": 7.04, "learning_rate": 6.7692851837229865e-06, "loss": 0.2031, "step": 11736500 }, { "epoch": 7.04, "learning_rate": 6.767185218162422e-06, "loss": 0.2044, "step": 11737000 }, { "epoch": 7.04, "learning_rate": 6.765085252601857e-06, "loss": 0.2063, "step": 11737500 }, { "epoch": 7.04, "learning_rate": 6.762989486972414e-06, "loss": 0.1954, "step": 11738000 }, { "epoch": 7.04, "learning_rate": 6.760889521411848e-06, "loss": 0.2073, "step": 11738500 }, { "epoch": 7.04, "learning_rate": 6.758789555851284e-06, "loss": 0.209, "step": 11739000 }, { "epoch": 7.04, "learning_rate": 6.756689590290719e-06, "loss": 0.2024, "step": 11739500 }, { "epoch": 7.04, "learning_rate": 6.754593824661275e-06, "loss": 0.2079, "step": 11740000 }, { "epoch": 7.04, "learning_rate": 6.7524938591007105e-06, "loss": 0.2009, "step": 11740500 }, { "epoch": 7.04, "learning_rate": 6.750393893540146e-06, "loss": 0.2026, "step": 11741000 }, { "epoch": 7.04, "learning_rate": 6.748293927979582e-06, "loss": 0.2062, "step": 11741500 }, { "epoch": 7.04, "learning_rate": 6.746198162350137e-06, "loss": 0.2055, "step": 11742000 }, { "epoch": 7.04, "learning_rate": 6.744098196789573e-06, "loss": 0.2071, "step": 11742500 }, { "epoch": 7.04, "learning_rate": 6.741998231229008e-06, "loss": 0.2045, "step": 11743000 }, { "epoch": 7.04, "learning_rate": 6.739898265668443e-06, "loss": 0.2087, "step": 11743500 }, { "epoch": 7.04, "learning_rate": 6.7377983001078776e-06, "loss": 0.2042, "step": 11744000 }, { "epoch": 7.04, "learning_rate": 6.7356983345473136e-06, "loss": 0.2068, "step": 11744500 }, { "epoch": 7.04, "learning_rate": 6.733598368986749e-06, "loss": 0.2066, "step": 11745000 }, { "epoch": 7.04, "learning_rate": 6.731502603357305e-06, "loss": 0.2078, "step": 11745500 }, { "epoch": 7.04, "learning_rate": 6.72940263779674e-06, "loss": 0.2026, "step": 11746000 }, { "epoch": 7.04, "learning_rate": 6.727302672236175e-06, "loss": 0.2045, "step": 11746500 }, { "epoch": 7.04, "learning_rate": 6.72520270667561e-06, "loss": 0.2039, "step": 11747000 }, { "epoch": 7.04, "learning_rate": 6.7231027411150454e-06, "loss": 0.2008, "step": 11747500 }, { "epoch": 7.04, "learning_rate": 6.721002775554481e-06, "loss": 0.204, "step": 11748000 }, { "epoch": 7.04, "learning_rate": 6.718902809993916e-06, "loss": 0.2055, "step": 11748500 }, { "epoch": 7.04, "learning_rate": 6.716802844433352e-06, "loss": 0.202, "step": 11749000 }, { "epoch": 7.04, "learning_rate": 6.714707078803907e-06, "loss": 0.2057, "step": 11749500 }, { "epoch": 7.04, "learning_rate": 6.712607113243343e-06, "loss": 0.2057, "step": 11750000 }, { "epoch": 7.04, "learning_rate": 6.710511347613899e-06, "loss": 0.2039, "step": 11750500 }, { "epoch": 7.05, "learning_rate": 6.708411382053334e-06, "loss": 0.2065, "step": 11751000 }, { "epoch": 7.05, "learning_rate": 6.7063114164927694e-06, "loss": 0.2015, "step": 11751500 }, { "epoch": 7.05, "learning_rate": 6.704211450932205e-06, "loss": 0.2079, "step": 11752000 }, { "epoch": 7.05, "learning_rate": 6.70211148537164e-06, "loss": 0.2111, "step": 11752500 }, { "epoch": 7.05, "learning_rate": 6.700011519811075e-06, "loss": 0.2096, "step": 11753000 }, { "epoch": 7.05, "learning_rate": 6.697915754181631e-06, "loss": 0.2067, "step": 11753500 }, { "epoch": 7.05, "learning_rate": 6.695815788621067e-06, "loss": 0.2064, "step": 11754000 }, { "epoch": 7.05, "learning_rate": 6.693715823060501e-06, "loss": 0.2054, "step": 11754500 }, { "epoch": 7.05, "learning_rate": 6.6916158574999365e-06, "loss": 0.2035, "step": 11755000 }, { "epoch": 7.05, "learning_rate": 6.6895158919393725e-06, "loss": 0.2053, "step": 11755500 }, { "epoch": 7.05, "learning_rate": 6.687415926378808e-06, "loss": 0.2055, "step": 11756000 }, { "epoch": 7.05, "learning_rate": 6.685315960818243e-06, "loss": 0.2052, "step": 11756500 }, { "epoch": 7.05, "learning_rate": 6.683215995257677e-06, "loss": 0.2107, "step": 11757000 }, { "epoch": 7.05, "learning_rate": 6.681120229628234e-06, "loss": 0.2004, "step": 11757500 }, { "epoch": 7.05, "learning_rate": 6.679020264067669e-06, "loss": 0.2044, "step": 11758000 }, { "epoch": 7.05, "learning_rate": 6.676924498438225e-06, "loss": 0.2024, "step": 11758500 }, { "epoch": 7.05, "learning_rate": 6.6748245328776605e-06, "loss": 0.2033, "step": 11759000 }, { "epoch": 7.05, "learning_rate": 6.6727245673170965e-06, "loss": 0.2044, "step": 11759500 }, { "epoch": 7.05, "learning_rate": 6.670624601756531e-06, "loss": 0.2035, "step": 11760000 }, { "epoch": 7.05, "learning_rate": 6.668524636195966e-06, "loss": 0.2076, "step": 11760500 }, { "epoch": 7.05, "learning_rate": 6.666428870566523e-06, "loss": 0.2021, "step": 11761000 }, { "epoch": 7.05, "learning_rate": 6.664328905005958e-06, "loss": 0.2081, "step": 11761500 }, { "epoch": 7.05, "learning_rate": 6.662228939445392e-06, "loss": 0.2061, "step": 11762000 }, { "epoch": 7.05, "learning_rate": 6.660128973884828e-06, "loss": 0.2024, "step": 11762500 }, { "epoch": 7.05, "learning_rate": 6.6580332082553845e-06, "loss": 0.2049, "step": 11763000 }, { "epoch": 7.05, "learning_rate": 6.65593324269482e-06, "loss": 0.2056, "step": 11763500 }, { "epoch": 7.05, "learning_rate": 6.653833277134255e-06, "loss": 0.2055, "step": 11764000 }, { "epoch": 7.05, "learning_rate": 6.65173331157369e-06, "loss": 0.2026, "step": 11764500 }, { "epoch": 7.05, "learning_rate": 6.649633346013126e-06, "loss": 0.206, "step": 11765000 }, { "epoch": 7.05, "learning_rate": 6.64753338045256e-06, "loss": 0.2055, "step": 11765500 }, { "epoch": 7.05, "learning_rate": 6.645433414891995e-06, "loss": 0.2075, "step": 11766000 }, { "epoch": 7.05, "learning_rate": 6.643337649262552e-06, "loss": 0.2067, "step": 11766500 }, { "epoch": 7.05, "learning_rate": 6.6412376837019875e-06, "loss": 0.2053, "step": 11767000 }, { "epoch": 7.06, "learning_rate": 6.639137718141422e-06, "loss": 0.2019, "step": 11767500 }, { "epoch": 7.06, "learning_rate": 6.637037752580858e-06, "loss": 0.2038, "step": 11768000 }, { "epoch": 7.06, "learning_rate": 6.634937787020293e-06, "loss": 0.2033, "step": 11768500 }, { "epoch": 7.06, "learning_rate": 6.632842021390849e-06, "loss": 0.2044, "step": 11769000 }, { "epoch": 7.06, "learning_rate": 6.630742055830284e-06, "loss": 0.2032, "step": 11769500 }, { "epoch": 7.06, "learning_rate": 6.628642090269719e-06, "loss": 0.2081, "step": 11770000 }, { "epoch": 7.06, "learning_rate": 6.626542124709155e-06, "loss": 0.2057, "step": 11770500 }, { "epoch": 7.06, "learning_rate": 6.62444215914859e-06, "loss": 0.2087, "step": 11771000 }, { "epoch": 7.06, "learning_rate": 6.622342193588025e-06, "loss": 0.2077, "step": 11771500 }, { "epoch": 7.06, "learning_rate": 6.62024222802746e-06, "loss": 0.213, "step": 11772000 }, { "epoch": 7.06, "learning_rate": 6.618146462398016e-06, "loss": 0.21, "step": 11772500 }, { "epoch": 7.06, "learning_rate": 6.616046496837451e-06, "loss": 0.2047, "step": 11773000 }, { "epoch": 7.06, "learning_rate": 6.613946531276887e-06, "loss": 0.2102, "step": 11773500 }, { "epoch": 7.06, "learning_rate": 6.611846565716322e-06, "loss": 0.2023, "step": 11774000 }, { "epoch": 7.06, "learning_rate": 6.6097508000868785e-06, "loss": 0.2038, "step": 11774500 }, { "epoch": 7.06, "learning_rate": 6.607650834526314e-06, "loss": 0.2068, "step": 11775000 }, { "epoch": 7.06, "learning_rate": 6.605550868965749e-06, "loss": 0.2062, "step": 11775500 }, { "epoch": 7.06, "learning_rate": 6.603450903405184e-06, "loss": 0.2011, "step": 11776000 }, { "epoch": 7.06, "learning_rate": 6.601350937844619e-06, "loss": 0.2086, "step": 11776500 }, { "epoch": 7.06, "learning_rate": 6.599255172215175e-06, "loss": 0.2034, "step": 11777000 }, { "epoch": 7.06, "learning_rate": 6.597155206654611e-06, "loss": 0.2028, "step": 11777500 }, { "epoch": 7.06, "learning_rate": 6.5950552410940456e-06, "loss": 0.2049, "step": 11778000 }, { "epoch": 7.06, "learning_rate": 6.592955275533481e-06, "loss": 0.2123, "step": 11778500 }, { "epoch": 7.06, "learning_rate": 6.590859509904038e-06, "loss": 0.2033, "step": 11779000 }, { "epoch": 7.06, "learning_rate": 6.588759544343473e-06, "loss": 0.2021, "step": 11779500 }, { "epoch": 7.06, "learning_rate": 6.586659578782908e-06, "loss": 0.2068, "step": 11780000 }, { "epoch": 7.06, "learning_rate": 6.584559613222343e-06, "loss": 0.2105, "step": 11780500 }, { "epoch": 7.06, "learning_rate": 6.582459647661778e-06, "loss": 0.2012, "step": 11781000 }, { "epoch": 7.06, "learning_rate": 6.580363882032334e-06, "loss": 0.2061, "step": 11781500 }, { "epoch": 7.06, "learning_rate": 6.5782639164717696e-06, "loss": 0.2024, "step": 11782000 }, { "epoch": 7.06, "learning_rate": 6.576163950911205e-06, "loss": 0.2061, "step": 11782500 }, { "epoch": 7.06, "learning_rate": 6.574063985350641e-06, "loss": 0.2031, "step": 11783000 }, { "epoch": 7.06, "learning_rate": 6.571964019790075e-06, "loss": 0.2014, "step": 11783500 }, { "epoch": 7.06, "learning_rate": 6.569868254160632e-06, "loss": 0.2097, "step": 11784000 }, { "epoch": 7.07, "learning_rate": 6.567768288600067e-06, "loss": 0.2036, "step": 11784500 }, { "epoch": 7.07, "learning_rate": 6.565668323039502e-06, "loss": 0.2078, "step": 11785000 }, { "epoch": 7.07, "learning_rate": 6.563568357478937e-06, "loss": 0.201, "step": 11785500 }, { "epoch": 7.07, "learning_rate": 6.561468391918373e-06, "loss": 0.2023, "step": 11786000 }, { "epoch": 7.07, "learning_rate": 6.559372626288929e-06, "loss": 0.2037, "step": 11786500 }, { "epoch": 7.07, "learning_rate": 6.557272660728364e-06, "loss": 0.2011, "step": 11787000 }, { "epoch": 7.07, "learning_rate": 6.555172695167799e-06, "loss": 0.2068, "step": 11787500 }, { "epoch": 7.07, "learning_rate": 6.553072729607234e-06, "loss": 0.2021, "step": 11788000 }, { "epoch": 7.07, "learning_rate": 6.55097276404667e-06, "loss": 0.2045, "step": 11788500 }, { "epoch": 7.07, "learning_rate": 6.5488727984861045e-06, "loss": 0.2066, "step": 11789000 }, { "epoch": 7.07, "learning_rate": 6.5467770328566614e-06, "loss": 0.2034, "step": 11789500 }, { "epoch": 7.07, "learning_rate": 6.544677067296097e-06, "loss": 0.2051, "step": 11790000 }, { "epoch": 7.07, "learning_rate": 6.542577101735532e-06, "loss": 0.2044, "step": 11790500 }, { "epoch": 7.07, "learning_rate": 6.540477136174966e-06, "loss": 0.2043, "step": 11791000 }, { "epoch": 7.07, "learning_rate": 6.538377170614402e-06, "loss": 0.204, "step": 11791500 }, { "epoch": 7.07, "learning_rate": 6.536277205053837e-06, "loss": 0.203, "step": 11792000 }, { "epoch": 7.07, "learning_rate": 6.534181439424393e-06, "loss": 0.2025, "step": 11792500 }, { "epoch": 7.07, "learning_rate": 6.5320814738638285e-06, "loss": 0.2054, "step": 11793000 }, { "epoch": 7.07, "learning_rate": 6.529981508303264e-06, "loss": 0.2068, "step": 11793500 }, { "epoch": 7.07, "learning_rate": 6.5278815427427e-06, "loss": 0.2081, "step": 11794000 }, { "epoch": 7.07, "learning_rate": 6.525785777113255e-06, "loss": 0.2088, "step": 11794500 }, { "epoch": 7.07, "learning_rate": 6.523685811552691e-06, "loss": 0.204, "step": 11795000 }, { "epoch": 7.07, "learning_rate": 6.521585845992126e-06, "loss": 0.2009, "step": 11795500 }, { "epoch": 7.07, "learning_rate": 6.51948588043156e-06, "loss": 0.2097, "step": 11796000 }, { "epoch": 7.07, "learning_rate": 6.5173859148709955e-06, "loss": 0.2046, "step": 11796500 }, { "epoch": 7.07, "learning_rate": 6.5152859493104315e-06, "loss": 0.2005, "step": 11797000 }, { "epoch": 7.07, "learning_rate": 6.513190183680988e-06, "loss": 0.2074, "step": 11797500 }, { "epoch": 7.07, "learning_rate": 6.511090218120423e-06, "loss": 0.2071, "step": 11798000 }, { "epoch": 7.07, "learning_rate": 6.508990252559858e-06, "loss": 0.1996, "step": 11798500 }, { "epoch": 7.07, "learning_rate": 6.506890286999293e-06, "loss": 0.2049, "step": 11799000 }, { "epoch": 7.07, "learning_rate": 6.504794521369849e-06, "loss": 0.2049, "step": 11799500 }, { "epoch": 7.07, "learning_rate": 6.502694555809284e-06, "loss": 0.203, "step": 11800000 }, { "epoch": 7.07, "eval_loss": 0.20080356299877167, "eval_runtime": 1459.6968, "eval_samples_per_second": 360.842, "eval_steps_per_second": 60.141, "step": 11800000 }, { "epoch": 7.07, "learning_rate": 6.500598790179841e-06, "loss": 0.201, "step": 11800500 }, { "epoch": 7.08, "learning_rate": 6.498498824619276e-06, "loss": 0.2007, "step": 11801000 }, { "epoch": 7.08, "learning_rate": 6.496398859058711e-06, "loss": 0.203, "step": 11801500 }, { "epoch": 7.08, "learning_rate": 6.494298893498147e-06, "loss": 0.2045, "step": 11802000 }, { "epoch": 7.08, "learning_rate": 6.492198927937582e-06, "loss": 0.205, "step": 11802500 }, { "epoch": 7.08, "learning_rate": 6.490098962377017e-06, "loss": 0.2067, "step": 11803000 }, { "epoch": 7.08, "learning_rate": 6.487998996816452e-06, "loss": 0.2031, "step": 11803500 }, { "epoch": 7.08, "learning_rate": 6.485899031255887e-06, "loss": 0.2096, "step": 11804000 }, { "epoch": 7.08, "learning_rate": 6.483803265626444e-06, "loss": 0.2065, "step": 11804500 }, { "epoch": 7.08, "learning_rate": 6.481703300065879e-06, "loss": 0.2047, "step": 11805000 }, { "epoch": 7.08, "learning_rate": 6.479603334505314e-06, "loss": 0.2038, "step": 11805500 }, { "epoch": 7.08, "learning_rate": 6.477503368944749e-06, "loss": 0.2042, "step": 11806000 }, { "epoch": 7.08, "learning_rate": 6.475403403384185e-06, "loss": 0.2026, "step": 11806500 }, { "epoch": 7.08, "learning_rate": 6.473303437823619e-06, "loss": 0.2013, "step": 11807000 }, { "epoch": 7.08, "learning_rate": 6.471203472263054e-06, "loss": 0.1992, "step": 11807500 }, { "epoch": 7.08, "learning_rate": 6.469107706633611e-06, "loss": 0.2091, "step": 11808000 }, { "epoch": 7.08, "learning_rate": 6.4670077410730465e-06, "loss": 0.2051, "step": 11808500 }, { "epoch": 7.08, "learning_rate": 6.464907775512482e-06, "loss": 0.2065, "step": 11809000 }, { "epoch": 7.08, "learning_rate": 6.462807809951917e-06, "loss": 0.2048, "step": 11809500 }, { "epoch": 7.08, "learning_rate": 6.460707844391352e-06, "loss": 0.2049, "step": 11810000 }, { "epoch": 7.08, "learning_rate": 6.458607878830787e-06, "loss": 0.2022, "step": 11810500 }, { "epoch": 7.08, "learning_rate": 6.456507913270222e-06, "loss": 0.2036, "step": 11811000 }, { "epoch": 7.08, "learning_rate": 6.454412147640778e-06, "loss": 0.201, "step": 11811500 }, { "epoch": 7.08, "learning_rate": 6.452312182080214e-06, "loss": 0.207, "step": 11812000 }, { "epoch": 7.08, "learning_rate": 6.450212216519649e-06, "loss": 0.204, "step": 11812500 }, { "epoch": 7.08, "learning_rate": 6.448112250959084e-06, "loss": 0.2004, "step": 11813000 }, { "epoch": 7.08, "learning_rate": 6.446012285398519e-06, "loss": 0.2027, "step": 11813500 }, { "epoch": 7.08, "learning_rate": 6.443912319837955e-06, "loss": 0.2086, "step": 11814000 }, { "epoch": 7.08, "learning_rate": 6.44181655420851e-06, "loss": 0.2003, "step": 11814500 }, { "epoch": 7.08, "learning_rate": 6.439716588647946e-06, "loss": 0.2059, "step": 11815000 }, { "epoch": 7.08, "learning_rate": 6.4376166230873814e-06, "loss": 0.2037, "step": 11815500 }, { "epoch": 7.08, "learning_rate": 6.435516657526817e-06, "loss": 0.2087, "step": 11816000 }, { "epoch": 7.08, "learning_rate": 6.433416691966252e-06, "loss": 0.2051, "step": 11816500 }, { "epoch": 7.08, "learning_rate": 6.431316726405687e-06, "loss": 0.2065, "step": 11817000 }, { "epoch": 7.09, "learning_rate": 6.429220960776244e-06, "loss": 0.2031, "step": 11817500 }, { "epoch": 7.09, "learning_rate": 6.427120995215678e-06, "loss": 0.2032, "step": 11818000 }, { "epoch": 7.09, "learning_rate": 6.425021029655113e-06, "loss": 0.2045, "step": 11818500 }, { "epoch": 7.09, "learning_rate": 6.4229210640945485e-06, "loss": 0.2026, "step": 11819000 }, { "epoch": 7.09, "learning_rate": 6.4208210985339845e-06, "loss": 0.2023, "step": 11819500 }, { "epoch": 7.09, "learning_rate": 6.41872533290454e-06, "loss": 0.205, "step": 11820000 }, { "epoch": 7.09, "learning_rate": 6.416625367343976e-06, "loss": 0.2039, "step": 11820500 }, { "epoch": 7.09, "learning_rate": 6.414525401783411e-06, "loss": 0.2021, "step": 11821000 }, { "epoch": 7.09, "learning_rate": 6.412425436222846e-06, "loss": 0.2056, "step": 11821500 }, { "epoch": 7.09, "learning_rate": 6.41032547066228e-06, "loss": 0.2017, "step": 11822000 }, { "epoch": 7.09, "learning_rate": 6.408225505101716e-06, "loss": 0.2026, "step": 11822500 }, { "epoch": 7.09, "learning_rate": 6.4061297394722725e-06, "loss": 0.2008, "step": 11823000 }, { "epoch": 7.09, "learning_rate": 6.404029773911708e-06, "loss": 0.2051, "step": 11823500 }, { "epoch": 7.09, "learning_rate": 6.401929808351143e-06, "loss": 0.2112, "step": 11824000 }, { "epoch": 7.09, "learning_rate": 6.399829842790578e-06, "loss": 0.2009, "step": 11824500 }, { "epoch": 7.09, "learning_rate": 6.397734077161134e-06, "loss": 0.1985, "step": 11825000 }, { "epoch": 7.09, "learning_rate": 6.395634111600569e-06, "loss": 0.2086, "step": 11825500 }, { "epoch": 7.09, "learning_rate": 6.393534146040005e-06, "loss": 0.2031, "step": 11826000 }, { "epoch": 7.09, "learning_rate": 6.39143418047944e-06, "loss": 0.205, "step": 11826500 }, { "epoch": 7.09, "learning_rate": 6.3893342149188755e-06, "loss": 0.2053, "step": 11827000 }, { "epoch": 7.09, "learning_rate": 6.387238449289432e-06, "loss": 0.2058, "step": 11827500 }, { "epoch": 7.09, "learning_rate": 6.385138483728867e-06, "loss": 0.2061, "step": 11828000 }, { "epoch": 7.09, "learning_rate": 6.383038518168302e-06, "loss": 0.203, "step": 11828500 }, { "epoch": 7.09, "learning_rate": 6.380938552607737e-06, "loss": 0.2023, "step": 11829000 }, { "epoch": 7.09, "learning_rate": 6.378838587047172e-06, "loss": 0.2035, "step": 11829500 }, { "epoch": 7.09, "learning_rate": 6.376738621486607e-06, "loss": 0.2098, "step": 11830000 }, { "epoch": 7.09, "learning_rate": 6.3746428558571635e-06, "loss": 0.2082, "step": 11830500 }, { "epoch": 7.09, "learning_rate": 6.372542890296599e-06, "loss": 0.2054, "step": 11831000 }, { "epoch": 7.09, "learning_rate": 6.370442924736035e-06, "loss": 0.204, "step": 11831500 }, { "epoch": 7.09, "learning_rate": 6.36834295917547e-06, "loss": 0.2018, "step": 11832000 }, { "epoch": 7.09, "learning_rate": 6.366242993614905e-06, "loss": 0.2047, "step": 11832500 }, { "epoch": 7.09, "learning_rate": 6.364147227985461e-06, "loss": 0.2064, "step": 11833000 }, { "epoch": 7.09, "learning_rate": 6.362047262424896e-06, "loss": 0.2032, "step": 11833500 }, { "epoch": 7.09, "learning_rate": 6.359947296864331e-06, "loss": 0.2038, "step": 11834000 }, { "epoch": 7.1, "learning_rate": 6.3578473313037665e-06, "loss": 0.2004, "step": 11834500 }, { "epoch": 7.1, "learning_rate": 6.355747365743202e-06, "loss": 0.2047, "step": 11835000 }, { "epoch": 7.1, "learning_rate": 6.353647400182637e-06, "loss": 0.2046, "step": 11835500 }, { "epoch": 7.1, "learning_rate": 6.351551634553193e-06, "loss": 0.2087, "step": 11836000 }, { "epoch": 7.1, "learning_rate": 6.349451668992628e-06, "loss": 0.2011, "step": 11836500 }, { "epoch": 7.1, "learning_rate": 6.347351703432063e-06, "loss": 0.2019, "step": 11837000 }, { "epoch": 7.1, "learning_rate": 6.345251737871499e-06, "loss": 0.204, "step": 11837500 }, { "epoch": 7.1, "learning_rate": 6.343151772310934e-06, "loss": 0.2116, "step": 11838000 }, { "epoch": 7.1, "learning_rate": 6.3410560066814905e-06, "loss": 0.2014, "step": 11838500 }, { "epoch": 7.1, "learning_rate": 6.338956041120926e-06, "loss": 0.2053, "step": 11839000 }, { "epoch": 7.1, "learning_rate": 6.336856075560361e-06, "loss": 0.2074, "step": 11839500 }, { "epoch": 7.1, "learning_rate": 6.334756109999796e-06, "loss": 0.2001, "step": 11840000 }, { "epoch": 7.1, "learning_rate": 6.332660344370352e-06, "loss": 0.2067, "step": 11840500 }, { "epoch": 7.1, "learning_rate": 6.330560378809788e-06, "loss": 0.2082, "step": 11841000 }, { "epoch": 7.1, "learning_rate": 6.3284604132492224e-06, "loss": 0.202, "step": 11841500 }, { "epoch": 7.1, "learning_rate": 6.326360447688658e-06, "loss": 0.2104, "step": 11842000 }, { "epoch": 7.1, "learning_rate": 6.324260482128093e-06, "loss": 0.2057, "step": 11842500 }, { "epoch": 7.1, "learning_rate": 6.322160516567529e-06, "loss": 0.2059, "step": 11843000 }, { "epoch": 7.1, "learning_rate": 6.320064750938084e-06, "loss": 0.2051, "step": 11843500 }, { "epoch": 7.1, "learning_rate": 6.31796478537752e-06, "loss": 0.2024, "step": 11844000 }, { "epoch": 7.1, "learning_rate": 6.315864819816955e-06, "loss": 0.2019, "step": 11844500 }, { "epoch": 7.1, "learning_rate": 6.31376485425639e-06, "loss": 0.2028, "step": 11845000 }, { "epoch": 7.1, "learning_rate": 6.3116648886958255e-06, "loss": 0.2012, "step": 11845500 }, { "epoch": 7.1, "learning_rate": 6.3095691230663816e-06, "loss": 0.2047, "step": 11846000 }, { "epoch": 7.1, "learning_rate": 6.307473357436938e-06, "loss": 0.2101, "step": 11846500 }, { "epoch": 7.1, "learning_rate": 6.305373391876373e-06, "loss": 0.2044, "step": 11847000 }, { "epoch": 7.1, "learning_rate": 6.303273426315809e-06, "loss": 0.2006, "step": 11847500 }, { "epoch": 7.1, "learning_rate": 6.301173460755244e-06, "loss": 0.2074, "step": 11848000 }, { "epoch": 7.1, "learning_rate": 6.299073495194678e-06, "loss": 0.2024, "step": 11848500 }, { "epoch": 7.1, "learning_rate": 6.2969735296341135e-06, "loss": 0.2081, "step": 11849000 }, { "epoch": 7.1, "learning_rate": 6.2948735640735495e-06, "loss": 0.2036, "step": 11849500 }, { "epoch": 7.1, "learning_rate": 6.292773598512985e-06, "loss": 0.205, "step": 11850000 }, { "epoch": 7.1, "learning_rate": 6.29067363295242e-06, "loss": 0.197, "step": 11850500 }, { "epoch": 7.11, "learning_rate": 6.288573667391854e-06, "loss": 0.2056, "step": 11851000 }, { "epoch": 7.11, "learning_rate": 6.286477901762411e-06, "loss": 0.206, "step": 11851500 }, { "epoch": 7.11, "learning_rate": 6.284377936201846e-06, "loss": 0.2093, "step": 11852000 }, { "epoch": 7.11, "learning_rate": 6.282277970641281e-06, "loss": 0.2099, "step": 11852500 }, { "epoch": 7.11, "learning_rate": 6.2801780050807165e-06, "loss": 0.205, "step": 11853000 }, { "epoch": 7.11, "learning_rate": 6.278078039520152e-06, "loss": 0.2071, "step": 11853500 }, { "epoch": 7.11, "learning_rate": 6.275978073959588e-06, "loss": 0.1996, "step": 11854000 }, { "epoch": 7.11, "learning_rate": 6.273878108399022e-06, "loss": 0.2102, "step": 11854500 }, { "epoch": 7.11, "learning_rate": 6.271778142838457e-06, "loss": 0.1992, "step": 11855000 }, { "epoch": 7.11, "learning_rate": 6.269686577140134e-06, "loss": 0.2113, "step": 11855500 }, { "epoch": 7.11, "learning_rate": 6.26758661157957e-06, "loss": 0.2084, "step": 11856000 }, { "epoch": 7.11, "learning_rate": 6.265486646019005e-06, "loss": 0.2017, "step": 11856500 }, { "epoch": 7.11, "learning_rate": 6.2633866804584405e-06, "loss": 0.2059, "step": 11857000 }, { "epoch": 7.11, "learning_rate": 6.261286714897876e-06, "loss": 0.205, "step": 11857500 }, { "epoch": 7.11, "learning_rate": 6.259186749337311e-06, "loss": 0.1991, "step": 11858000 }, { "epoch": 7.11, "learning_rate": 6.257086783776746e-06, "loss": 0.2064, "step": 11858500 }, { "epoch": 7.11, "learning_rate": 6.254991018147303e-06, "loss": 0.2055, "step": 11859000 }, { "epoch": 7.11, "learning_rate": 6.252891052586737e-06, "loss": 0.2052, "step": 11859500 }, { "epoch": 7.11, "learning_rate": 6.250791087026172e-06, "loss": 0.2016, "step": 11860000 }, { "epoch": 7.11, "learning_rate": 6.248691121465608e-06, "loss": 0.2039, "step": 11860500 }, { "epoch": 7.11, "learning_rate": 6.2465911559050435e-06, "loss": 0.2105, "step": 11861000 }, { "epoch": 7.11, "learning_rate": 6.244491190344478e-06, "loss": 0.2008, "step": 11861500 }, { "epoch": 7.11, "learning_rate": 6.242395424715035e-06, "loss": 0.2037, "step": 11862000 }, { "epoch": 7.11, "learning_rate": 6.24029545915447e-06, "loss": 0.2048, "step": 11862500 }, { "epoch": 7.11, "learning_rate": 6.238195493593905e-06, "loss": 0.205, "step": 11863000 }, { "epoch": 7.11, "learning_rate": 6.23609552803334e-06, "loss": 0.2033, "step": 11863500 }, { "epoch": 7.11, "learning_rate": 6.233995562472775e-06, "loss": 0.201, "step": 11864000 }, { "epoch": 7.11, "learning_rate": 6.2318955969122106e-06, "loss": 0.1993, "step": 11864500 }, { "epoch": 7.11, "learning_rate": 6.229799831282767e-06, "loss": 0.2076, "step": 11865000 }, { "epoch": 7.11, "learning_rate": 6.227699865722202e-06, "loss": 0.2055, "step": 11865500 }, { "epoch": 7.11, "learning_rate": 6.225599900161637e-06, "loss": 0.1989, "step": 11866000 }, { "epoch": 7.11, "learning_rate": 6.223499934601073e-06, "loss": 0.2018, "step": 11866500 }, { "epoch": 7.11, "learning_rate": 6.221404168971628e-06, "loss": 0.2037, "step": 11867000 }, { "epoch": 7.12, "learning_rate": 6.219304203411064e-06, "loss": 0.2077, "step": 11867500 }, { "epoch": 7.12, "learning_rate": 6.217204237850499e-06, "loss": 0.1997, "step": 11868000 }, { "epoch": 7.12, "learning_rate": 6.2151042722899346e-06, "loss": 0.2005, "step": 11868500 }, { "epoch": 7.12, "learning_rate": 6.213008506660491e-06, "loss": 0.198, "step": 11869000 }, { "epoch": 7.12, "learning_rate": 6.210908541099926e-06, "loss": 0.2013, "step": 11869500 }, { "epoch": 7.12, "learning_rate": 6.208808575539362e-06, "loss": 0.2046, "step": 11870000 }, { "epoch": 7.12, "learning_rate": 6.206708609978796e-06, "loss": 0.2094, "step": 11870500 }, { "epoch": 7.12, "learning_rate": 6.204608644418231e-06, "loss": 0.2096, "step": 11871000 }, { "epoch": 7.12, "learning_rate": 6.2025086788576664e-06, "loss": 0.2035, "step": 11871500 }, { "epoch": 7.12, "learning_rate": 6.2004129132282226e-06, "loss": 0.2012, "step": 11872000 }, { "epoch": 7.12, "learning_rate": 6.198312947667658e-06, "loss": 0.2013, "step": 11872500 }, { "epoch": 7.12, "learning_rate": 6.196212982107094e-06, "loss": 0.2039, "step": 11873000 }, { "epoch": 7.12, "learning_rate": 6.194113016546529e-06, "loss": 0.2081, "step": 11873500 }, { "epoch": 7.12, "learning_rate": 6.192013050985963e-06, "loss": 0.2072, "step": 11874000 }, { "epoch": 7.12, "learning_rate": 6.189913085425398e-06, "loss": 0.2078, "step": 11874500 }, { "epoch": 7.12, "learning_rate": 6.187813119864834e-06, "loss": 0.2063, "step": 11875000 }, { "epoch": 7.12, "learning_rate": 6.1857131543042695e-06, "loss": 0.209, "step": 11875500 }, { "epoch": 7.12, "learning_rate": 6.183617388674826e-06, "loss": 0.2011, "step": 11876000 }, { "epoch": 7.12, "learning_rate": 6.181517423114261e-06, "loss": 0.2024, "step": 11876500 }, { "epoch": 7.12, "learning_rate": 6.179417457553696e-06, "loss": 0.2078, "step": 11877000 }, { "epoch": 7.12, "learning_rate": 6.177321691924252e-06, "loss": 0.2015, "step": 11877500 }, { "epoch": 7.12, "learning_rate": 6.175221726363687e-06, "loss": 0.1993, "step": 11878000 }, { "epoch": 7.12, "learning_rate": 6.173121760803123e-06, "loss": 0.2043, "step": 11878500 }, { "epoch": 7.12, "learning_rate": 6.171021795242558e-06, "loss": 0.2038, "step": 11879000 }, { "epoch": 7.12, "learning_rate": 6.168921829681993e-06, "loss": 0.2007, "step": 11879500 }, { "epoch": 7.12, "learning_rate": 6.16682606405255e-06, "loss": 0.2013, "step": 11880000 }, { "epoch": 7.12, "learning_rate": 6.164726098491985e-06, "loss": 0.2083, "step": 11880500 }, { "epoch": 7.12, "learning_rate": 6.16262613293142e-06, "loss": 0.2079, "step": 11881000 }, { "epoch": 7.12, "learning_rate": 6.160526167370855e-06, "loss": 0.2038, "step": 11881500 }, { "epoch": 7.12, "learning_rate": 6.15842620181029e-06, "loss": 0.2053, "step": 11882000 }, { "epoch": 7.12, "learning_rate": 6.156326236249725e-06, "loss": 0.2024, "step": 11882500 }, { "epoch": 7.12, "learning_rate": 6.1542304706202815e-06, "loss": 0.2002, "step": 11883000 }, { "epoch": 7.12, "learning_rate": 6.152130505059717e-06, "loss": 0.2021, "step": 11883500 }, { "epoch": 7.12, "learning_rate": 6.150030539499153e-06, "loss": 0.2029, "step": 11884000 }, { "epoch": 7.13, "learning_rate": 6.147930573938588e-06, "loss": 0.2062, "step": 11884500 }, { "epoch": 7.13, "learning_rate": 6.145830608378022e-06, "loss": 0.2045, "step": 11885000 }, { "epoch": 7.13, "learning_rate": 6.143730642817457e-06, "loss": 0.2074, "step": 11885500 }, { "epoch": 7.13, "learning_rate": 6.141630677256893e-06, "loss": 0.2, "step": 11886000 }, { "epoch": 7.13, "learning_rate": 6.139530711696328e-06, "loss": 0.2036, "step": 11886500 }, { "epoch": 7.13, "learning_rate": 6.1374349460668845e-06, "loss": 0.2047, "step": 11887000 }, { "epoch": 7.13, "learning_rate": 6.13533498050632e-06, "loss": 0.2096, "step": 11887500 }, { "epoch": 7.13, "learning_rate": 6.133239214876877e-06, "loss": 0.2079, "step": 11888000 }, { "epoch": 7.13, "learning_rate": 6.131139249316311e-06, "loss": 0.2064, "step": 11888500 }, { "epoch": 7.13, "learning_rate": 6.129039283755746e-06, "loss": 0.202, "step": 11889000 }, { "epoch": 7.13, "learning_rate": 6.126939318195181e-06, "loss": 0.2081, "step": 11889500 }, { "epoch": 7.13, "learning_rate": 6.124843552565737e-06, "loss": 0.2084, "step": 11890000 }, { "epoch": 7.13, "learning_rate": 6.1227435870051725e-06, "loss": 0.2055, "step": 11890500 }, { "epoch": 7.13, "learning_rate": 6.1206436214446085e-06, "loss": 0.2054, "step": 11891000 }, { "epoch": 7.13, "learning_rate": 6.118543655884044e-06, "loss": 0.2017, "step": 11891500 }, { "epoch": 7.13, "learning_rate": 6.116443690323478e-06, "loss": 0.2086, "step": 11892000 }, { "epoch": 7.13, "learning_rate": 6.114347924694035e-06, "loss": 0.2032, "step": 11892500 }, { "epoch": 7.13, "learning_rate": 6.11224795913347e-06, "loss": 0.2026, "step": 11893000 }, { "epoch": 7.13, "learning_rate": 6.110147993572905e-06, "loss": 0.2076, "step": 11893500 }, { "epoch": 7.13, "learning_rate": 6.10804802801234e-06, "loss": 0.2049, "step": 11894000 }, { "epoch": 7.13, "learning_rate": 6.1059480624517755e-06, "loss": 0.2032, "step": 11894500 }, { "epoch": 7.13, "learning_rate": 6.103848096891211e-06, "loss": 0.2045, "step": 11895000 }, { "epoch": 7.13, "learning_rate": 6.101748131330647e-06, "loss": 0.2027, "step": 11895500 }, { "epoch": 7.13, "learning_rate": 6.099648165770081e-06, "loss": 0.206, "step": 11896000 }, { "epoch": 7.13, "learning_rate": 6.097552400140638e-06, "loss": 0.2016, "step": 11896500 }, { "epoch": 7.13, "learning_rate": 6.095452434580073e-06, "loss": 0.2028, "step": 11897000 }, { "epoch": 7.13, "learning_rate": 6.093352469019507e-06, "loss": 0.2005, "step": 11897500 }, { "epoch": 7.13, "learning_rate": 6.091252503458943e-06, "loss": 0.2062, "step": 11898000 }, { "epoch": 7.13, "learning_rate": 6.0891567378294995e-06, "loss": 0.2073, "step": 11898500 }, { "epoch": 7.13, "learning_rate": 6.087056772268935e-06, "loss": 0.2057, "step": 11899000 }, { "epoch": 7.13, "learning_rate": 6.084961006639491e-06, "loss": 0.2008, "step": 11899500 }, { "epoch": 7.13, "learning_rate": 6.082861041078927e-06, "loss": 0.2079, "step": 11900000 }, { "epoch": 7.13, "eval_loss": 0.20063015818595886, "eval_runtime": 1459.7353, "eval_samples_per_second": 360.833, "eval_steps_per_second": 60.139, "step": 11900000 }, { "epoch": 7.13, "learning_rate": 6.080761075518362e-06, "loss": 0.2083, "step": 11900500 }, { "epoch": 7.14, "learning_rate": 6.078661109957796e-06, "loss": 0.2099, "step": 11901000 }, { "epoch": 7.14, "learning_rate": 6.076561144397231e-06, "loss": 0.2054, "step": 11901500 }, { "epoch": 7.14, "learning_rate": 6.074461178836667e-06, "loss": 0.2092, "step": 11902000 }, { "epoch": 7.14, "learning_rate": 6.0723612132761026e-06, "loss": 0.2066, "step": 11902500 }, { "epoch": 7.14, "learning_rate": 6.070261247715537e-06, "loss": 0.2008, "step": 11903000 }, { "epoch": 7.14, "learning_rate": 6.068161282154972e-06, "loss": 0.206, "step": 11903500 }, { "epoch": 7.14, "learning_rate": 6.066061316594408e-06, "loss": 0.202, "step": 11904000 }, { "epoch": 7.14, "learning_rate": 6.063961351033843e-06, "loss": 0.2074, "step": 11904500 }, { "epoch": 7.14, "learning_rate": 6.061865585404399e-06, "loss": 0.2072, "step": 11905000 }, { "epoch": 7.14, "learning_rate": 6.0597656198438344e-06, "loss": 0.2081, "step": 11905500 }, { "epoch": 7.14, "learning_rate": 6.05766565428327e-06, "loss": 0.2079, "step": 11906000 }, { "epoch": 7.14, "learning_rate": 6.055565688722706e-06, "loss": 0.2018, "step": 11906500 }, { "epoch": 7.14, "learning_rate": 6.05346572316214e-06, "loss": 0.2079, "step": 11907000 }, { "epoch": 7.14, "learning_rate": 6.051369957532697e-06, "loss": 0.2089, "step": 11907500 }, { "epoch": 7.14, "learning_rate": 6.049269991972132e-06, "loss": 0.2042, "step": 11908000 }, { "epoch": 7.14, "learning_rate": 6.047170026411566e-06, "loss": 0.2028, "step": 11908500 }, { "epoch": 7.14, "learning_rate": 6.0450700608510015e-06, "loss": 0.2016, "step": 11909000 }, { "epoch": 7.14, "learning_rate": 6.0429742952215584e-06, "loss": 0.2033, "step": 11909500 }, { "epoch": 7.14, "learning_rate": 6.040874329660993e-06, "loss": 0.2004, "step": 11910000 }, { "epoch": 7.14, "learning_rate": 6.038774364100429e-06, "loss": 0.2007, "step": 11910500 }, { "epoch": 7.14, "learning_rate": 6.036674398539864e-06, "loss": 0.2043, "step": 11911000 }, { "epoch": 7.14, "learning_rate": 6.034574432979299e-06, "loss": 0.2024, "step": 11911500 }, { "epoch": 7.14, "learning_rate": 6.032478667349855e-06, "loss": 0.2043, "step": 11912000 }, { "epoch": 7.14, "learning_rate": 6.03037870178929e-06, "loss": 0.2027, "step": 11912500 }, { "epoch": 7.14, "learning_rate": 6.028278736228726e-06, "loss": 0.2031, "step": 11913000 }, { "epoch": 7.14, "learning_rate": 6.0261787706681615e-06, "loss": 0.2037, "step": 11913500 }, { "epoch": 7.14, "learning_rate": 6.024078805107596e-06, "loss": 0.2062, "step": 11914000 }, { "epoch": 7.14, "learning_rate": 6.021983039478153e-06, "loss": 0.2053, "step": 11914500 }, { "epoch": 7.14, "learning_rate": 6.019883073917588e-06, "loss": 0.2043, "step": 11915000 }, { "epoch": 7.14, "learning_rate": 6.017783108357022e-06, "loss": 0.2118, "step": 11915500 }, { "epoch": 7.14, "learning_rate": 6.015683142796458e-06, "loss": 0.2027, "step": 11916000 }, { "epoch": 7.14, "learning_rate": 6.013587377167014e-06, "loss": 0.2057, "step": 11916500 }, { "epoch": 7.14, "learning_rate": 6.0114874116064495e-06, "loss": 0.2066, "step": 11917000 }, { "epoch": 7.15, "learning_rate": 6.009387446045885e-06, "loss": 0.2054, "step": 11917500 }, { "epoch": 7.15, "learning_rate": 6.00728748048532e-06, "loss": 0.2081, "step": 11918000 }, { "epoch": 7.15, "learning_rate": 6.005191714855877e-06, "loss": 0.2065, "step": 11918500 }, { "epoch": 7.15, "learning_rate": 6.003091749295311e-06, "loss": 0.2071, "step": 11919000 }, { "epoch": 7.15, "learning_rate": 6.000991783734746e-06, "loss": 0.2067, "step": 11919500 }, { "epoch": 7.15, "learning_rate": 5.998891818174182e-06, "loss": 0.2057, "step": 11920000 }, { "epoch": 7.15, "learning_rate": 5.996791852613617e-06, "loss": 0.2093, "step": 11920500 }, { "epoch": 7.15, "learning_rate": 5.994691887053052e-06, "loss": 0.1992, "step": 11921000 }, { "epoch": 7.15, "learning_rate": 5.992591921492488e-06, "loss": 0.2007, "step": 11921500 }, { "epoch": 7.15, "learning_rate": 5.990496155863044e-06, "loss": 0.2037, "step": 11922000 }, { "epoch": 7.15, "learning_rate": 5.988396190302479e-06, "loss": 0.2038, "step": 11922500 }, { "epoch": 7.15, "learning_rate": 5.986300424673035e-06, "loss": 0.2028, "step": 11923000 }, { "epoch": 7.15, "learning_rate": 5.984200459112471e-06, "loss": 0.2081, "step": 11923500 }, { "epoch": 7.15, "learning_rate": 5.982100493551906e-06, "loss": 0.2056, "step": 11924000 }, { "epoch": 7.15, "learning_rate": 5.9800005279913405e-06, "loss": 0.2039, "step": 11924500 }, { "epoch": 7.15, "learning_rate": 5.977900562430776e-06, "loss": 0.2037, "step": 11925000 }, { "epoch": 7.15, "learning_rate": 5.975804796801333e-06, "loss": 0.2079, "step": 11925500 }, { "epoch": 7.15, "learning_rate": 5.973704831240767e-06, "loss": 0.205, "step": 11926000 }, { "epoch": 7.15, "learning_rate": 5.971604865680203e-06, "loss": 0.2045, "step": 11926500 }, { "epoch": 7.15, "learning_rate": 5.969504900119638e-06, "loss": 0.2048, "step": 11927000 }, { "epoch": 7.15, "learning_rate": 5.967404934559073e-06, "loss": 0.2035, "step": 11927500 }, { "epoch": 7.15, "learning_rate": 5.9653049689985075e-06, "loss": 0.2044, "step": 11928000 }, { "epoch": 7.15, "learning_rate": 5.9632050034379435e-06, "loss": 0.2076, "step": 11928500 }, { "epoch": 7.15, "learning_rate": 5.961105037877379e-06, "loss": 0.2057, "step": 11929000 }, { "epoch": 7.15, "learning_rate": 5.959005072316814e-06, "loss": 0.2007, "step": 11929500 }, { "epoch": 7.15, "learning_rate": 5.95690510675625e-06, "loss": 0.2027, "step": 11930000 }, { "epoch": 7.15, "learning_rate": 5.954805141195684e-06, "loss": 0.1992, "step": 11930500 }, { "epoch": 7.15, "learning_rate": 5.952709375566241e-06, "loss": 0.2016, "step": 11931000 }, { "epoch": 7.15, "learning_rate": 5.950613609936796e-06, "loss": 0.2068, "step": 11931500 }, { "epoch": 7.15, "learning_rate": 5.948513644376232e-06, "loss": 0.2013, "step": 11932000 }, { "epoch": 7.15, "learning_rate": 5.9464136788156675e-06, "loss": 0.2029, "step": 11932500 }, { "epoch": 7.15, "learning_rate": 5.944313713255103e-06, "loss": 0.2052, "step": 11933000 }, { "epoch": 7.15, "learning_rate": 5.942213747694537e-06, "loss": 0.2017, "step": 11933500 }, { "epoch": 7.15, "learning_rate": 5.940113782133973e-06, "loss": 0.202, "step": 11934000 }, { "epoch": 7.16, "learning_rate": 5.938013816573408e-06, "loss": 0.2023, "step": 11934500 }, { "epoch": 7.16, "learning_rate": 5.935913851012843e-06, "loss": 0.2034, "step": 11935000 }, { "epoch": 7.16, "learning_rate": 5.9338138854522784e-06, "loss": 0.2053, "step": 11935500 }, { "epoch": 7.16, "learning_rate": 5.931713919891714e-06, "loss": 0.2103, "step": 11936000 }, { "epoch": 7.16, "learning_rate": 5.929613954331149e-06, "loss": 0.2024, "step": 11936500 }, { "epoch": 7.16, "learning_rate": 5.927513988770584e-06, "loss": 0.2075, "step": 11937000 }, { "epoch": 7.16, "learning_rate": 5.92541822314114e-06, "loss": 0.2087, "step": 11937500 }, { "epoch": 7.16, "learning_rate": 5.923322457511697e-06, "loss": 0.2021, "step": 11938000 }, { "epoch": 7.16, "learning_rate": 5.921222491951132e-06, "loss": 0.2078, "step": 11938500 }, { "epoch": 7.16, "learning_rate": 5.9191225263905664e-06, "loss": 0.2011, "step": 11939000 }, { "epoch": 7.16, "learning_rate": 5.9170225608300024e-06, "loss": 0.209, "step": 11939500 }, { "epoch": 7.16, "learning_rate": 5.914922595269438e-06, "loss": 0.2061, "step": 11940000 }, { "epoch": 7.16, "learning_rate": 5.912822629708873e-06, "loss": 0.2043, "step": 11940500 }, { "epoch": 7.16, "learning_rate": 5.910726864079429e-06, "loss": 0.2021, "step": 11941000 }, { "epoch": 7.16, "learning_rate": 5.908626898518864e-06, "loss": 0.2034, "step": 11941500 }, { "epoch": 7.16, "learning_rate": 5.906526932958299e-06, "loss": 0.2052, "step": 11942000 }, { "epoch": 7.16, "learning_rate": 5.904426967397735e-06, "loss": 0.2034, "step": 11942500 }, { "epoch": 7.16, "learning_rate": 5.9023270018371695e-06, "loss": 0.2032, "step": 11943000 }, { "epoch": 7.16, "learning_rate": 5.900227036276605e-06, "loss": 0.2022, "step": 11943500 }, { "epoch": 7.16, "learning_rate": 5.898127070716041e-06, "loss": 0.2022, "step": 11944000 }, { "epoch": 7.16, "learning_rate": 5.896027105155476e-06, "loss": 0.2064, "step": 11944500 }, { "epoch": 7.16, "learning_rate": 5.893935539457153e-06, "loss": 0.2045, "step": 11945000 }, { "epoch": 7.16, "learning_rate": 5.891835573896588e-06, "loss": 0.1994, "step": 11945500 }, { "epoch": 7.16, "learning_rate": 5.889735608336023e-06, "loss": 0.2057, "step": 11946000 }, { "epoch": 7.16, "learning_rate": 5.887635642775458e-06, "loss": 0.2018, "step": 11946500 }, { "epoch": 7.16, "learning_rate": 5.8855356772148935e-06, "loss": 0.2054, "step": 11947000 }, { "epoch": 7.16, "learning_rate": 5.883435711654329e-06, "loss": 0.2031, "step": 11947500 }, { "epoch": 7.16, "learning_rate": 5.881335746093765e-06, "loss": 0.2018, "step": 11948000 }, { "epoch": 7.16, "learning_rate": 5.879235780533199e-06, "loss": 0.2041, "step": 11948500 }, { "epoch": 7.16, "learning_rate": 5.877140014903756e-06, "loss": 0.2041, "step": 11949000 }, { "epoch": 7.16, "learning_rate": 5.875044249274311e-06, "loss": 0.2068, "step": 11949500 }, { "epoch": 7.16, "learning_rate": 5.872944283713747e-06, "loss": 0.204, "step": 11950000 }, { "epoch": 7.16, "learning_rate": 5.870844318153182e-06, "loss": 0.2076, "step": 11950500 }, { "epoch": 7.17, "learning_rate": 5.8687443525926175e-06, "loss": 0.2067, "step": 11951000 }, { "epoch": 7.17, "learning_rate": 5.866644387032053e-06, "loss": 0.2068, "step": 11951500 }, { "epoch": 7.17, "learning_rate": 5.864544421471488e-06, "loss": 0.2099, "step": 11952000 }, { "epoch": 7.17, "learning_rate": 5.862448655842045e-06, "loss": 0.2017, "step": 11952500 }, { "epoch": 7.17, "learning_rate": 5.860348690281479e-06, "loss": 0.2019, "step": 11953000 }, { "epoch": 7.17, "learning_rate": 5.858248724720914e-06, "loss": 0.2066, "step": 11953500 }, { "epoch": 7.17, "learning_rate": 5.856148759160349e-06, "loss": 0.2023, "step": 11954000 }, { "epoch": 7.17, "learning_rate": 5.854048793599785e-06, "loss": 0.2033, "step": 11954500 }, { "epoch": 7.17, "learning_rate": 5.8519488280392205e-06, "loss": 0.2036, "step": 11955000 }, { "epoch": 7.17, "learning_rate": 5.849848862478655e-06, "loss": 0.21, "step": 11955500 }, { "epoch": 7.17, "learning_rate": 5.847757296780333e-06, "loss": 0.2067, "step": 11956000 }, { "epoch": 7.17, "learning_rate": 5.845657331219768e-06, "loss": 0.2084, "step": 11956500 }, { "epoch": 7.17, "learning_rate": 5.843557365659203e-06, "loss": 0.2011, "step": 11957000 }, { "epoch": 7.17, "learning_rate": 5.841457400098638e-06, "loss": 0.2074, "step": 11957500 }, { "epoch": 7.17, "learning_rate": 5.839357434538073e-06, "loss": 0.2072, "step": 11958000 }, { "epoch": 7.17, "learning_rate": 5.8372574689775085e-06, "loss": 0.2032, "step": 11958500 }, { "epoch": 7.17, "learning_rate": 5.835157503416944e-06, "loss": 0.2087, "step": 11959000 }, { "epoch": 7.17, "learning_rate": 5.833057537856379e-06, "loss": 0.2061, "step": 11959500 }, { "epoch": 7.17, "learning_rate": 5.830957572295815e-06, "loss": 0.2052, "step": 11960000 }, { "epoch": 7.17, "learning_rate": 5.82885760673525e-06, "loss": 0.2036, "step": 11960500 }, { "epoch": 7.17, "learning_rate": 5.826757641174684e-06, "loss": 0.198, "step": 11961000 }, { "epoch": 7.17, "learning_rate": 5.824657675614119e-06, "loss": 0.2042, "step": 11961500 }, { "epoch": 7.17, "learning_rate": 5.822561909984676e-06, "loss": 0.2024, "step": 11962000 }, { "epoch": 7.17, "learning_rate": 5.820461944424111e-06, "loss": 0.206, "step": 11962500 }, { "epoch": 7.17, "learning_rate": 5.818361978863547e-06, "loss": 0.202, "step": 11963000 }, { "epoch": 7.17, "learning_rate": 5.816262013302982e-06, "loss": 0.2074, "step": 11963500 }, { "epoch": 7.17, "learning_rate": 5.814162047742417e-06, "loss": 0.2087, "step": 11964000 }, { "epoch": 7.17, "learning_rate": 5.812062082181852e-06, "loss": 0.2021, "step": 11964500 }, { "epoch": 7.17, "learning_rate": 5.809962116621287e-06, "loss": 0.2019, "step": 11965000 }, { "epoch": 7.17, "learning_rate": 5.807866350991844e-06, "loss": 0.2018, "step": 11965500 }, { "epoch": 7.17, "learning_rate": 5.805766385431279e-06, "loss": 0.2038, "step": 11966000 }, { "epoch": 7.17, "learning_rate": 5.803666419870714e-06, "loss": 0.2088, "step": 11966500 }, { "epoch": 7.17, "learning_rate": 5.801566454310149e-06, "loss": 0.2031, "step": 11967000 }, { "epoch": 7.18, "learning_rate": 5.799470688680706e-06, "loss": 0.2031, "step": 11967500 }, { "epoch": 7.18, "learning_rate": 5.79737072312014e-06, "loss": 0.2077, "step": 11968000 }, { "epoch": 7.18, "learning_rate": 5.795270757559576e-06, "loss": 0.2043, "step": 11968500 }, { "epoch": 7.18, "learning_rate": 5.793170791999011e-06, "loss": 0.201, "step": 11969000 }, { "epoch": 7.18, "learning_rate": 5.791075026369567e-06, "loss": 0.207, "step": 11969500 }, { "epoch": 7.18, "learning_rate": 5.7889750608090026e-06, "loss": 0.206, "step": 11970000 }, { "epoch": 7.18, "learning_rate": 5.786875095248438e-06, "loss": 0.2041, "step": 11970500 }, { "epoch": 7.18, "learning_rate": 5.784775129687873e-06, "loss": 0.1995, "step": 11971000 }, { "epoch": 7.18, "learning_rate": 5.782675164127308e-06, "loss": 0.2082, "step": 11971500 }, { "epoch": 7.18, "learning_rate": 5.780579398497864e-06, "loss": 0.2034, "step": 11972000 }, { "epoch": 7.18, "learning_rate": 5.7784794329373e-06, "loss": 0.2065, "step": 11972500 }, { "epoch": 7.18, "learning_rate": 5.776379467376735e-06, "loss": 0.2065, "step": 11973000 }, { "epoch": 7.18, "learning_rate": 5.77427950181617e-06, "loss": 0.2048, "step": 11973500 }, { "epoch": 7.18, "learning_rate": 5.772179536255606e-06, "loss": 0.2008, "step": 11974000 }, { "epoch": 7.18, "learning_rate": 5.770083770626162e-06, "loss": 0.2046, "step": 11974500 }, { "epoch": 7.18, "learning_rate": 5.767983805065597e-06, "loss": 0.2005, "step": 11975000 }, { "epoch": 7.18, "learning_rate": 5.765883839505032e-06, "loss": 0.2055, "step": 11975500 }, { "epoch": 7.18, "learning_rate": 5.763783873944467e-06, "loss": 0.2011, "step": 11976000 }, { "epoch": 7.18, "learning_rate": 5.761683908383902e-06, "loss": 0.2043, "step": 11976500 }, { "epoch": 7.18, "learning_rate": 5.7595839428233375e-06, "loss": 0.1988, "step": 11977000 }, { "epoch": 7.18, "learning_rate": 5.757483977262773e-06, "loss": 0.2022, "step": 11977500 }, { "epoch": 7.18, "learning_rate": 5.755384011702208e-06, "loss": 0.2036, "step": 11978000 }, { "epoch": 7.18, "learning_rate": 5.753288246072765e-06, "loss": 0.2047, "step": 11978500 }, { "epoch": 7.18, "learning_rate": 5.751192480443321e-06, "loss": 0.2092, "step": 11979000 }, { "epoch": 7.18, "learning_rate": 5.749092514882756e-06, "loss": 0.2058, "step": 11979500 }, { "epoch": 7.18, "learning_rate": 5.746992549322191e-06, "loss": 0.2066, "step": 11980000 }, { "epoch": 7.18, "learning_rate": 5.7448925837616255e-06, "loss": 0.2043, "step": 11980500 }, { "epoch": 7.18, "learning_rate": 5.7427926182010615e-06, "loss": 0.2026, "step": 11981000 }, { "epoch": 7.18, "learning_rate": 5.740692652640497e-06, "loss": 0.2046, "step": 11981500 }, { "epoch": 7.18, "learning_rate": 5.738592687079932e-06, "loss": 0.2075, "step": 11982000 }, { "epoch": 7.18, "learning_rate": 5.736492721519367e-06, "loss": 0.2071, "step": 11982500 }, { "epoch": 7.18, "learning_rate": 5.734396955889923e-06, "loss": 0.2015, "step": 11983000 }, { "epoch": 7.18, "learning_rate": 5.732296990329359e-06, "loss": 0.2092, "step": 11983500 }, { "epoch": 7.18, "learning_rate": 5.730197024768794e-06, "loss": 0.2006, "step": 11984000 }, { "epoch": 7.19, "learning_rate": 5.7280970592082285e-06, "loss": 0.2046, "step": 11984500 }, { "epoch": 7.19, "learning_rate": 5.725997093647664e-06, "loss": 0.2065, "step": 11985000 }, { "epoch": 7.19, "learning_rate": 5.723901328018221e-06, "loss": 0.207, "step": 11985500 }, { "epoch": 7.19, "learning_rate": 5.721801362457655e-06, "loss": 0.2064, "step": 11986000 }, { "epoch": 7.19, "learning_rate": 5.719701396897091e-06, "loss": 0.2025, "step": 11986500 }, { "epoch": 7.19, "learning_rate": 5.717601431336526e-06, "loss": 0.2059, "step": 11987000 }, { "epoch": 7.19, "learning_rate": 5.715505665707082e-06, "loss": 0.2061, "step": 11987500 }, { "epoch": 7.19, "learning_rate": 5.713405700146517e-06, "loss": 0.2091, "step": 11988000 }, { "epoch": 7.19, "learning_rate": 5.7113057345859525e-06, "loss": 0.2027, "step": 11988500 }, { "epoch": 7.19, "learning_rate": 5.7092057690253885e-06, "loss": 0.2026, "step": 11989000 }, { "epoch": 7.19, "learning_rate": 5.707105803464823e-06, "loss": 0.2045, "step": 11989500 }, { "epoch": 7.19, "learning_rate": 5.705005837904258e-06, "loss": 0.2111, "step": 11990000 }, { "epoch": 7.19, "learning_rate": 5.702910072274815e-06, "loss": 0.202, "step": 11990500 }, { "epoch": 7.19, "learning_rate": 5.70081010671425e-06, "loss": 0.2016, "step": 11991000 }, { "epoch": 7.19, "learning_rate": 5.698710141153684e-06, "loss": 0.2054, "step": 11991500 }, { "epoch": 7.19, "learning_rate": 5.69661017559312e-06, "loss": 0.2041, "step": 11992000 }, { "epoch": 7.19, "learning_rate": 5.6945144099636765e-06, "loss": 0.2033, "step": 11992500 }, { "epoch": 7.19, "learning_rate": 5.692414444403112e-06, "loss": 0.2079, "step": 11993000 }, { "epoch": 7.19, "learning_rate": 5.690314478842547e-06, "loss": 0.211, "step": 11993500 }, { "epoch": 7.19, "learning_rate": 5.688214513281982e-06, "loss": 0.2114, "step": 11994000 }, { "epoch": 7.19, "learning_rate": 5.686118747652538e-06, "loss": 0.2041, "step": 11994500 }, { "epoch": 7.19, "learning_rate": 5.684018782091973e-06, "loss": 0.206, "step": 11995000 }, { "epoch": 7.19, "learning_rate": 5.681918816531408e-06, "loss": 0.2044, "step": 11995500 }, { "epoch": 7.19, "learning_rate": 5.679818850970844e-06, "loss": 0.2017, "step": 11996000 }, { "epoch": 7.19, "learning_rate": 5.6777188854102795e-06, "loss": 0.206, "step": 11996500 }, { "epoch": 7.19, "learning_rate": 5.675623119780836e-06, "loss": 0.2089, "step": 11997000 }, { "epoch": 7.19, "learning_rate": 5.673523154220271e-06, "loss": 0.2043, "step": 11997500 }, { "epoch": 7.19, "learning_rate": 5.671423188659706e-06, "loss": 0.2105, "step": 11998000 }, { "epoch": 7.19, "learning_rate": 5.669323223099141e-06, "loss": 0.2059, "step": 11998500 }, { "epoch": 7.19, "learning_rate": 5.667223257538576e-06, "loss": 0.2082, "step": 11999000 }, { "epoch": 7.19, "learning_rate": 5.665127491909133e-06, "loss": 0.2067, "step": 11999500 }, { "epoch": 7.19, "learning_rate": 5.6630275263485675e-06, "loss": 0.2089, "step": 12000000 }, { "epoch": 7.19, "eval_loss": 0.20064257085323334, "eval_runtime": 1457.5703, "eval_samples_per_second": 361.369, "eval_steps_per_second": 60.228, "step": 12000000 }, { "epoch": 7.19, "learning_rate": 5.660927560788003e-06, "loss": 0.2029, "step": 12000500 }, { "epoch": 7.2, "learning_rate": 5.658827595227438e-06, "loss": 0.2085, "step": 12001000 }, { "epoch": 7.2, "learning_rate": 5.656731829597995e-06, "loss": 0.2036, "step": 12001500 }, { "epoch": 7.2, "learning_rate": 5.654631864037429e-06, "loss": 0.2027, "step": 12002000 }, { "epoch": 7.2, "learning_rate": 5.652531898476865e-06, "loss": 0.206, "step": 12002500 }, { "epoch": 7.2, "learning_rate": 5.6504319329163e-06, "loss": 0.2023, "step": 12003000 }, { "epoch": 7.2, "learning_rate": 5.648336167286856e-06, "loss": 0.2072, "step": 12003500 }, { "epoch": 7.2, "learning_rate": 5.6462362017262915e-06, "loss": 0.2086, "step": 12004000 }, { "epoch": 7.2, "learning_rate": 5.644136236165727e-06, "loss": 0.2072, "step": 12004500 }, { "epoch": 7.2, "learning_rate": 5.642036270605163e-06, "loss": 0.2038, "step": 12005000 }, { "epoch": 7.2, "learning_rate": 5.639936305044597e-06, "loss": 0.2064, "step": 12005500 }, { "epoch": 7.2, "learning_rate": 5.637836339484032e-06, "loss": 0.205, "step": 12006000 }, { "epoch": 7.2, "learning_rate": 5.635740573854589e-06, "loss": 0.2034, "step": 12006500 }, { "epoch": 7.2, "learning_rate": 5.633640608294023e-06, "loss": 0.2051, "step": 12007000 }, { "epoch": 7.2, "learning_rate": 5.6315406427334586e-06, "loss": 0.2052, "step": 12007500 }, { "epoch": 7.2, "learning_rate": 5.6294406771728946e-06, "loss": 0.2017, "step": 12008000 }, { "epoch": 7.2, "learning_rate": 5.627344911543451e-06, "loss": 0.2038, "step": 12008500 }, { "epoch": 7.2, "learning_rate": 5.625244945982886e-06, "loss": 0.2053, "step": 12009000 }, { "epoch": 7.2, "learning_rate": 5.623144980422321e-06, "loss": 0.2044, "step": 12009500 }, { "epoch": 7.2, "learning_rate": 5.621045014861756e-06, "loss": 0.2055, "step": 12010000 }, { "epoch": 7.2, "learning_rate": 5.618945049301191e-06, "loss": 0.2072, "step": 12010500 }, { "epoch": 7.2, "learning_rate": 5.6168450837406265e-06, "loss": 0.1995, "step": 12011000 }, { "epoch": 7.2, "learning_rate": 5.6147493181111826e-06, "loss": 0.2114, "step": 12011500 }, { "epoch": 7.2, "learning_rate": 5.6126493525506186e-06, "loss": 0.2052, "step": 12012000 }, { "epoch": 7.2, "learning_rate": 5.610549386990053e-06, "loss": 0.2073, "step": 12012500 }, { "epoch": 7.2, "learning_rate": 5.608449421429488e-06, "loss": 0.2018, "step": 12013000 }, { "epoch": 7.2, "learning_rate": 5.606349455868924e-06, "loss": 0.2051, "step": 12013500 }, { "epoch": 7.2, "learning_rate": 5.60425369023948e-06, "loss": 0.2024, "step": 12014000 }, { "epoch": 7.2, "learning_rate": 5.602153724678915e-06, "loss": 0.201, "step": 12014500 }, { "epoch": 7.2, "learning_rate": 5.6000537591183505e-06, "loss": 0.2039, "step": 12015000 }, { "epoch": 7.2, "learning_rate": 5.597953793557786e-06, "loss": 0.2034, "step": 12015500 }, { "epoch": 7.2, "learning_rate": 5.595853827997221e-06, "loss": 0.2051, "step": 12016000 }, { "epoch": 7.2, "learning_rate": 5.593753862436656e-06, "loss": 0.2079, "step": 12016500 }, { "epoch": 7.2, "learning_rate": 5.591653896876091e-06, "loss": 0.2048, "step": 12017000 }, { "epoch": 7.2, "learning_rate": 5.589558131246648e-06, "loss": 0.2038, "step": 12017500 }, { "epoch": 7.21, "learning_rate": 5.587458165686082e-06, "loss": 0.2061, "step": 12018000 }, { "epoch": 7.21, "learning_rate": 5.5853582001255175e-06, "loss": 0.208, "step": 12018500 }, { "epoch": 7.21, "learning_rate": 5.5832582345649535e-06, "loss": 0.2028, "step": 12019000 }, { "epoch": 7.21, "learning_rate": 5.581158269004389e-06, "loss": 0.2074, "step": 12019500 }, { "epoch": 7.21, "learning_rate": 5.579058303443824e-06, "loss": 0.2072, "step": 12020000 }, { "epoch": 7.21, "learning_rate": 5.57696253781438e-06, "loss": 0.2008, "step": 12020500 }, { "epoch": 7.21, "learning_rate": 5.574862572253815e-06, "loss": 0.2044, "step": 12021000 }, { "epoch": 7.21, "learning_rate": 5.57276260669325e-06, "loss": 0.2024, "step": 12021500 }, { "epoch": 7.21, "learning_rate": 5.570662641132685e-06, "loss": 0.2029, "step": 12022000 }, { "epoch": 7.21, "learning_rate": 5.5685626755721205e-06, "loss": 0.2079, "step": 12022500 }, { "epoch": 7.21, "learning_rate": 5.566462710011556e-06, "loss": 0.2049, "step": 12023000 }, { "epoch": 7.21, "learning_rate": 5.564362744450991e-06, "loss": 0.2022, "step": 12023500 }, { "epoch": 7.21, "learning_rate": 5.562266978821547e-06, "loss": 0.2084, "step": 12024000 }, { "epoch": 7.21, "learning_rate": 5.560167013260982e-06, "loss": 0.2031, "step": 12024500 }, { "epoch": 7.21, "learning_rate": 5.558067047700418e-06, "loss": 0.1992, "step": 12025000 }, { "epoch": 7.21, "learning_rate": 5.555971282070973e-06, "loss": 0.2101, "step": 12025500 }, { "epoch": 7.21, "learning_rate": 5.553871316510409e-06, "loss": 0.2066, "step": 12026000 }, { "epoch": 7.21, "learning_rate": 5.5517713509498445e-06, "loss": 0.1979, "step": 12026500 }, { "epoch": 7.21, "learning_rate": 5.54967138538928e-06, "loss": 0.2033, "step": 12027000 }, { "epoch": 7.21, "learning_rate": 5.547571419828715e-06, "loss": 0.208, "step": 12027500 }, { "epoch": 7.21, "learning_rate": 5.54547145426815e-06, "loss": 0.206, "step": 12028000 }, { "epoch": 7.21, "learning_rate": 5.543371488707585e-06, "loss": 0.2033, "step": 12028500 }, { "epoch": 7.21, "learning_rate": 5.54127152314702e-06, "loss": 0.2028, "step": 12029000 }, { "epoch": 7.21, "learning_rate": 5.5391715575864554e-06, "loss": 0.2094, "step": 12029500 }, { "epoch": 7.21, "learning_rate": 5.537071592025891e-06, "loss": 0.2084, "step": 12030000 }, { "epoch": 7.21, "learning_rate": 5.534971626465326e-06, "loss": 0.202, "step": 12030500 }, { "epoch": 7.21, "learning_rate": 5.532875860835882e-06, "loss": 0.2084, "step": 12031000 }, { "epoch": 7.21, "learning_rate": 5.530775895275317e-06, "loss": 0.2021, "step": 12031500 }, { "epoch": 7.21, "learning_rate": 5.528675929714752e-06, "loss": 0.2118, "step": 12032000 }, { "epoch": 7.21, "learning_rate": 5.526575964154188e-06, "loss": 0.2069, "step": 12032500 }, { "epoch": 7.21, "learning_rate": 5.5244801985247434e-06, "loss": 0.2056, "step": 12033000 }, { "epoch": 7.21, "learning_rate": 5.5223802329641794e-06, "loss": 0.2015, "step": 12033500 }, { "epoch": 7.21, "learning_rate": 5.520280267403615e-06, "loss": 0.2029, "step": 12034000 }, { "epoch": 7.22, "learning_rate": 5.51818030184305e-06, "loss": 0.2062, "step": 12034500 }, { "epoch": 7.22, "learning_rate": 5.516080336282485e-06, "loss": 0.2005, "step": 12035000 }, { "epoch": 7.22, "learning_rate": 5.513984570653041e-06, "loss": 0.2069, "step": 12035500 }, { "epoch": 7.22, "learning_rate": 5.511884605092477e-06, "loss": 0.2022, "step": 12036000 }, { "epoch": 7.22, "learning_rate": 5.509784639531911e-06, "loss": 0.1994, "step": 12036500 }, { "epoch": 7.22, "learning_rate": 5.5076846739713465e-06, "loss": 0.2088, "step": 12037000 }, { "epoch": 7.22, "learning_rate": 5.5055889083419034e-06, "loss": 0.2073, "step": 12037500 }, { "epoch": 7.22, "learning_rate": 5.503488942781339e-06, "loss": 0.2016, "step": 12038000 }, { "epoch": 7.22, "learning_rate": 5.501388977220773e-06, "loss": 0.2024, "step": 12038500 }, { "epoch": 7.22, "learning_rate": 5.499289011660209e-06, "loss": 0.2057, "step": 12039000 }, { "epoch": 7.22, "learning_rate": 5.497189046099644e-06, "loss": 0.199, "step": 12039500 }, { "epoch": 7.22, "learning_rate": 5.4950932804702e-06, "loss": 0.2044, "step": 12040000 }, { "epoch": 7.22, "learning_rate": 5.492993314909635e-06, "loss": 0.2035, "step": 12040500 }, { "epoch": 7.22, "learning_rate": 5.4908933493490705e-06, "loss": 0.211, "step": 12041000 }, { "epoch": 7.22, "learning_rate": 5.4887933837885065e-06, "loss": 0.2044, "step": 12041500 }, { "epoch": 7.22, "learning_rate": 5.486693418227941e-06, "loss": 0.2096, "step": 12042000 }, { "epoch": 7.22, "learning_rate": 5.484593452667376e-06, "loss": 0.2022, "step": 12042500 }, { "epoch": 7.22, "learning_rate": 5.482493487106811e-06, "loss": 0.2027, "step": 12043000 }, { "epoch": 7.22, "learning_rate": 5.480393521546247e-06, "loss": 0.203, "step": 12043500 }, { "epoch": 7.22, "learning_rate": 5.478297755916802e-06, "loss": 0.2024, "step": 12044000 }, { "epoch": 7.22, "learning_rate": 5.476197790356238e-06, "loss": 0.2061, "step": 12044500 }, { "epoch": 7.22, "learning_rate": 5.4741020247267945e-06, "loss": 0.2025, "step": 12045000 }, { "epoch": 7.22, "learning_rate": 5.47200205916623e-06, "loss": 0.2049, "step": 12045500 }, { "epoch": 7.22, "learning_rate": 5.469902093605665e-06, "loss": 0.2027, "step": 12046000 }, { "epoch": 7.22, "learning_rate": 5.4678021280451e-06, "loss": 0.2031, "step": 12046500 }, { "epoch": 7.22, "learning_rate": 5.465706362415656e-06, "loss": 0.199, "step": 12047000 }, { "epoch": 7.22, "learning_rate": 5.463606396855091e-06, "loss": 0.2013, "step": 12047500 }, { "epoch": 7.22, "learning_rate": 5.461506431294526e-06, "loss": 0.2052, "step": 12048000 }, { "epoch": 7.22, "learning_rate": 5.459406465733962e-06, "loss": 0.2044, "step": 12048500 }, { "epoch": 7.22, "learning_rate": 5.457306500173397e-06, "loss": 0.2095, "step": 12049000 }, { "epoch": 7.22, "learning_rate": 5.455210734543954e-06, "loss": 0.2048, "step": 12049500 }, { "epoch": 7.22, "learning_rate": 5.453110768983389e-06, "loss": 0.2042, "step": 12050000 }, { "epoch": 7.22, "learning_rate": 5.451010803422824e-06, "loss": 0.2056, "step": 12050500 }, { "epoch": 7.23, "learning_rate": 5.448910837862259e-06, "loss": 0.2052, "step": 12051000 }, { "epoch": 7.23, "learning_rate": 5.446815072232815e-06, "loss": 0.2033, "step": 12051500 }, { "epoch": 7.23, "learning_rate": 5.444719306603371e-06, "loss": 0.202, "step": 12052000 }, { "epoch": 7.23, "learning_rate": 5.4426193410428065e-06, "loss": 0.2049, "step": 12052500 }, { "epoch": 7.23, "learning_rate": 5.4405193754822425e-06, "loss": 0.2073, "step": 12053000 }, { "epoch": 7.23, "learning_rate": 5.438419409921678e-06, "loss": 0.2041, "step": 12053500 }, { "epoch": 7.23, "learning_rate": 5.436319444361112e-06, "loss": 0.2046, "step": 12054000 }, { "epoch": 7.23, "learning_rate": 5.434219478800547e-06, "loss": 0.2002, "step": 12054500 }, { "epoch": 7.23, "learning_rate": 5.432119513239983e-06, "loss": 0.2, "step": 12055000 }, { "epoch": 7.23, "learning_rate": 5.430019547679418e-06, "loss": 0.2034, "step": 12055500 }, { "epoch": 7.23, "learning_rate": 5.427919582118853e-06, "loss": 0.2048, "step": 12056000 }, { "epoch": 7.23, "learning_rate": 5.4258196165582885e-06, "loss": 0.2004, "step": 12056500 }, { "epoch": 7.23, "learning_rate": 5.423719650997724e-06, "loss": 0.2028, "step": 12057000 }, { "epoch": 7.23, "learning_rate": 5.421623885368281e-06, "loss": 0.1995, "step": 12057500 }, { "epoch": 7.23, "learning_rate": 5.419523919807715e-06, "loss": 0.1987, "step": 12058000 }, { "epoch": 7.23, "learning_rate": 5.41742395424715e-06, "loss": 0.2054, "step": 12058500 }, { "epoch": 7.23, "learning_rate": 5.415323988686585e-06, "loss": 0.2053, "step": 12059000 }, { "epoch": 7.23, "learning_rate": 5.413224023126021e-06, "loss": 0.2145, "step": 12059500 }, { "epoch": 7.23, "learning_rate": 5.4111282574965765e-06, "loss": 0.2062, "step": 12060000 }, { "epoch": 7.23, "learning_rate": 5.4090282919360125e-06, "loss": 0.2085, "step": 12060500 }, { "epoch": 7.23, "learning_rate": 5.406928326375448e-06, "loss": 0.2054, "step": 12061000 }, { "epoch": 7.23, "learning_rate": 5.404828360814883e-06, "loss": 0.2025, "step": 12061500 }, { "epoch": 7.23, "learning_rate": 5.402728395254317e-06, "loss": 0.2084, "step": 12062000 }, { "epoch": 7.23, "learning_rate": 5.400632629624874e-06, "loss": 0.208, "step": 12062500 }, { "epoch": 7.23, "learning_rate": 5.398532664064309e-06, "loss": 0.2077, "step": 12063000 }, { "epoch": 7.23, "learning_rate": 5.396432698503744e-06, "loss": 0.2039, "step": 12063500 }, { "epoch": 7.23, "learning_rate": 5.3943327329431796e-06, "loss": 0.2042, "step": 12064000 }, { "epoch": 7.23, "learning_rate": 5.392232767382615e-06, "loss": 0.2031, "step": 12064500 }, { "epoch": 7.23, "learning_rate": 5.390132801822051e-06, "loss": 0.2094, "step": 12065000 }, { "epoch": 7.23, "learning_rate": 5.388037036192606e-06, "loss": 0.2046, "step": 12065500 }, { "epoch": 7.23, "learning_rate": 5.385937070632042e-06, "loss": 0.2028, "step": 12066000 }, { "epoch": 7.23, "learning_rate": 5.383837105071477e-06, "loss": 0.2065, "step": 12066500 }, { "epoch": 7.23, "learning_rate": 5.3817371395109114e-06, "loss": 0.199, "step": 12067000 }, { "epoch": 7.23, "learning_rate": 5.379641373881468e-06, "loss": 0.2071, "step": 12067500 }, { "epoch": 7.24, "learning_rate": 5.3775414083209036e-06, "loss": 0.2076, "step": 12068000 }, { "epoch": 7.24, "learning_rate": 5.375441442760339e-06, "loss": 0.2035, "step": 12068500 }, { "epoch": 7.24, "learning_rate": 5.373341477199774e-06, "loss": 0.2036, "step": 12069000 }, { "epoch": 7.24, "learning_rate": 5.371241511639209e-06, "loss": 0.1996, "step": 12069500 }, { "epoch": 7.24, "learning_rate": 5.369141546078644e-06, "loss": 0.2, "step": 12070000 }, { "epoch": 7.24, "learning_rate": 5.3670457804492e-06, "loss": 0.2058, "step": 12070500 }, { "epoch": 7.24, "learning_rate": 5.3649458148886354e-06, "loss": 0.2052, "step": 12071000 }, { "epoch": 7.24, "learning_rate": 5.3628458493280714e-06, "loss": 0.2071, "step": 12071500 }, { "epoch": 7.24, "learning_rate": 5.360745883767507e-06, "loss": 0.2032, "step": 12072000 }, { "epoch": 7.24, "learning_rate": 5.358650118138063e-06, "loss": 0.2023, "step": 12072500 }, { "epoch": 7.24, "learning_rate": 5.356550152577498e-06, "loss": 0.2034, "step": 12073000 }, { "epoch": 7.24, "learning_rate": 5.354450187016933e-06, "loss": 0.2083, "step": 12073500 }, { "epoch": 7.24, "learning_rate": 5.352350221456368e-06, "loss": 0.2064, "step": 12074000 }, { "epoch": 7.24, "learning_rate": 5.350250255895803e-06, "loss": 0.2047, "step": 12074500 }, { "epoch": 7.24, "learning_rate": 5.3481502903352385e-06, "loss": 0.2037, "step": 12075000 }, { "epoch": 7.24, "learning_rate": 5.346050324774674e-06, "loss": 0.206, "step": 12075500 }, { "epoch": 7.24, "learning_rate": 5.343950359214109e-06, "loss": 0.2062, "step": 12076000 }, { "epoch": 7.24, "learning_rate": 5.341854593584665e-06, "loss": 0.2036, "step": 12076500 }, { "epoch": 7.24, "learning_rate": 5.3397546280241e-06, "loss": 0.2059, "step": 12077000 }, { "epoch": 7.24, "learning_rate": 5.337654662463536e-06, "loss": 0.204, "step": 12077500 }, { "epoch": 7.24, "learning_rate": 5.33555469690297e-06, "loss": 0.2001, "step": 12078000 }, { "epoch": 7.24, "learning_rate": 5.333463131204648e-06, "loss": 0.2072, "step": 12078500 }, { "epoch": 7.24, "learning_rate": 5.331363165644083e-06, "loss": 0.202, "step": 12079000 }, { "epoch": 7.24, "learning_rate": 5.3292674000146395e-06, "loss": 0.2047, "step": 12079500 }, { "epoch": 7.24, "learning_rate": 5.327167434454075e-06, "loss": 0.2051, "step": 12080000 }, { "epoch": 7.24, "learning_rate": 5.32506746889351e-06, "loss": 0.2025, "step": 12080500 }, { "epoch": 7.24, "learning_rate": 5.322967503332945e-06, "loss": 0.2039, "step": 12081000 }, { "epoch": 7.24, "learning_rate": 5.32086753777238e-06, "loss": 0.2047, "step": 12081500 }, { "epoch": 7.24, "learning_rate": 5.318767572211816e-06, "loss": 0.2037, "step": 12082000 }, { "epoch": 7.24, "learning_rate": 5.316667606651251e-06, "loss": 0.2035, "step": 12082500 }, { "epoch": 7.24, "learning_rate": 5.314567641090686e-06, "loss": 0.2077, "step": 12083000 }, { "epoch": 7.24, "learning_rate": 5.312471875461243e-06, "loss": 0.2103, "step": 12083500 }, { "epoch": 7.24, "learning_rate": 5.310371909900678e-06, "loss": 0.2072, "step": 12084000 }, { "epoch": 7.25, "learning_rate": 5.308271944340112e-06, "loss": 0.2054, "step": 12084500 }, { "epoch": 7.25, "learning_rate": 5.306171978779548e-06, "loss": 0.2039, "step": 12085000 }, { "epoch": 7.25, "learning_rate": 5.304072013218983e-06, "loss": 0.2064, "step": 12085500 }, { "epoch": 7.25, "learning_rate": 5.301972047658418e-06, "loss": 0.2061, "step": 12086000 }, { "epoch": 7.25, "learning_rate": 5.2998720820978535e-06, "loss": 0.2064, "step": 12086500 }, { "epoch": 7.25, "learning_rate": 5.297772116537289e-06, "loss": 0.2076, "step": 12087000 }, { "epoch": 7.25, "learning_rate": 5.295680550838967e-06, "loss": 0.2074, "step": 12087500 }, { "epoch": 7.25, "learning_rate": 5.293580585278401e-06, "loss": 0.2068, "step": 12088000 }, { "epoch": 7.25, "learning_rate": 5.291480619717837e-06, "loss": 0.2052, "step": 12088500 }, { "epoch": 7.25, "learning_rate": 5.289380654157272e-06, "loss": 0.1998, "step": 12089000 }, { "epoch": 7.25, "learning_rate": 5.287280688596707e-06, "loss": 0.2016, "step": 12089500 }, { "epoch": 7.25, "learning_rate": 5.2851807230361415e-06, "loss": 0.2017, "step": 12090000 }, { "epoch": 7.25, "learning_rate": 5.2830849574066985e-06, "loss": 0.2032, "step": 12090500 }, { "epoch": 7.25, "learning_rate": 5.280984991846134e-06, "loss": 0.2085, "step": 12091000 }, { "epoch": 7.25, "learning_rate": 5.278885026285569e-06, "loss": 0.2072, "step": 12091500 }, { "epoch": 7.25, "learning_rate": 5.276785060725004e-06, "loss": 0.2058, "step": 12092000 }, { "epoch": 7.25, "learning_rate": 5.274685095164439e-06, "loss": 0.2054, "step": 12092500 }, { "epoch": 7.25, "learning_rate": 5.272585129603874e-06, "loss": 0.2037, "step": 12093000 }, { "epoch": 7.25, "learning_rate": 5.27048516404331e-06, "loss": 0.2031, "step": 12093500 }, { "epoch": 7.25, "learning_rate": 5.2683851984827445e-06, "loss": 0.2054, "step": 12094000 }, { "epoch": 7.25, "learning_rate": 5.26628523292218e-06, "loss": 0.2091, "step": 12094500 }, { "epoch": 7.25, "learning_rate": 5.264189467292737e-06, "loss": 0.2045, "step": 12095000 }, { "epoch": 7.25, "learning_rate": 5.262089501732171e-06, "loss": 0.2082, "step": 12095500 }, { "epoch": 7.25, "learning_rate": 5.259989536171607e-06, "loss": 0.2005, "step": 12096000 }, { "epoch": 7.25, "learning_rate": 5.257889570611042e-06, "loss": 0.2017, "step": 12096500 }, { "epoch": 7.25, "learning_rate": 5.255793804981598e-06, "loss": 0.2103, "step": 12097000 }, { "epoch": 7.25, "learning_rate": 5.253693839421033e-06, "loss": 0.2028, "step": 12097500 }, { "epoch": 7.25, "learning_rate": 5.2515938738604685e-06, "loss": 0.2046, "step": 12098000 }, { "epoch": 7.25, "learning_rate": 5.249493908299904e-06, "loss": 0.2071, "step": 12098500 }, { "epoch": 7.25, "learning_rate": 5.24739394273934e-06, "loss": 0.2059, "step": 12099000 }, { "epoch": 7.25, "learning_rate": 5.245293977178774e-06, "loss": 0.207, "step": 12099500 }, { "epoch": 7.25, "learning_rate": 5.243194011618209e-06, "loss": 0.2018, "step": 12100000 }, { "epoch": 7.25, "eval_loss": 0.19976554811000824, "eval_runtime": 1460.1965, "eval_samples_per_second": 360.719, "eval_steps_per_second": 60.12, "step": 12100000 }, { "epoch": 7.25, "learning_rate": 5.241094046057644e-06, "loss": 0.2052, "step": 12100500 }, { "epoch": 7.26, "learning_rate": 5.23899408049708e-06, "loss": 0.207, "step": 12101000 }, { "epoch": 7.26, "learning_rate": 5.2368983148676356e-06, "loss": 0.207, "step": 12101500 }, { "epoch": 7.26, "learning_rate": 5.2347983493070716e-06, "loss": 0.2073, "step": 12102000 }, { "epoch": 7.26, "learning_rate": 5.232698383746507e-06, "loss": 0.2049, "step": 12102500 }, { "epoch": 7.26, "learning_rate": 5.230598418185941e-06, "loss": 0.2007, "step": 12103000 }, { "epoch": 7.26, "learning_rate": 5.228502652556498e-06, "loss": 0.2064, "step": 12103500 }, { "epoch": 7.26, "learning_rate": 5.226402686995933e-06, "loss": 0.2034, "step": 12104000 }, { "epoch": 7.26, "learning_rate": 5.224302721435368e-06, "loss": 0.2086, "step": 12104500 }, { "epoch": 7.26, "learning_rate": 5.2222027558748034e-06, "loss": 0.2034, "step": 12105000 }, { "epoch": 7.26, "learning_rate": 5.22010699024536e-06, "loss": 0.208, "step": 12105500 }, { "epoch": 7.26, "learning_rate": 5.2180070246847956e-06, "loss": 0.2085, "step": 12106000 }, { "epoch": 7.26, "learning_rate": 5.21590705912423e-06, "loss": 0.2161, "step": 12106500 }, { "epoch": 7.26, "learning_rate": 5.213807093563665e-06, "loss": 0.2015, "step": 12107000 }, { "epoch": 7.26, "learning_rate": 5.211711327934222e-06, "loss": 0.2123, "step": 12107500 }, { "epoch": 7.26, "learning_rate": 5.209611362373656e-06, "loss": 0.2067, "step": 12108000 }, { "epoch": 7.26, "learning_rate": 5.207511396813092e-06, "loss": 0.2063, "step": 12108500 }, { "epoch": 7.26, "learning_rate": 5.2054114312525274e-06, "loss": 0.2001, "step": 12109000 }, { "epoch": 7.26, "learning_rate": 5.203311465691963e-06, "loss": 0.2036, "step": 12109500 }, { "epoch": 7.26, "learning_rate": 5.201211500131398e-06, "loss": 0.2052, "step": 12110000 }, { "epoch": 7.26, "learning_rate": 5.199111534570833e-06, "loss": 0.2058, "step": 12110500 }, { "epoch": 7.26, "learning_rate": 5.197011569010268e-06, "loss": 0.2043, "step": 12111000 }, { "epoch": 7.26, "learning_rate": 5.194911603449703e-06, "loss": 0.2032, "step": 12111500 }, { "epoch": 7.26, "learning_rate": 5.192811637889139e-06, "loss": 0.2008, "step": 12112000 }, { "epoch": 7.26, "learning_rate": 5.1907116723285735e-06, "loss": 0.2015, "step": 12112500 }, { "epoch": 7.26, "learning_rate": 5.188611706768009e-06, "loss": 0.2046, "step": 12113000 }, { "epoch": 7.26, "learning_rate": 5.186515941138566e-06, "loss": 0.2013, "step": 12113500 }, { "epoch": 7.26, "learning_rate": 5.184415975578e-06, "loss": 0.204, "step": 12114000 }, { "epoch": 7.26, "learning_rate": 5.182316010017435e-06, "loss": 0.2113, "step": 12114500 }, { "epoch": 7.26, "learning_rate": 5.180216044456871e-06, "loss": 0.2051, "step": 12115000 }, { "epoch": 7.26, "learning_rate": 5.178120278827427e-06, "loss": 0.2069, "step": 12115500 }, { "epoch": 7.26, "learning_rate": 5.176020313266862e-06, "loss": 0.2069, "step": 12116000 }, { "epoch": 7.26, "learning_rate": 5.1739203477062975e-06, "loss": 0.2, "step": 12116500 }, { "epoch": 7.26, "learning_rate": 5.171820382145733e-06, "loss": 0.2035, "step": 12117000 }, { "epoch": 7.26, "learning_rate": 5.169720416585169e-06, "loss": 0.2061, "step": 12117500 }, { "epoch": 7.27, "learning_rate": 5.167620451024603e-06, "loss": 0.207, "step": 12118000 }, { "epoch": 7.27, "learning_rate": 5.16552468539516e-06, "loss": 0.2032, "step": 12118500 }, { "epoch": 7.27, "learning_rate": 5.163424719834595e-06, "loss": 0.2037, "step": 12119000 }, { "epoch": 7.27, "learning_rate": 5.161324754274029e-06, "loss": 0.2068, "step": 12119500 }, { "epoch": 7.27, "learning_rate": 5.1592247887134645e-06, "loss": 0.2022, "step": 12120000 }, { "epoch": 7.27, "learning_rate": 5.1571290230840215e-06, "loss": 0.2032, "step": 12120500 }, { "epoch": 7.27, "learning_rate": 5.155029057523456e-06, "loss": 0.2012, "step": 12121000 }, { "epoch": 7.27, "learning_rate": 5.152929091962892e-06, "loss": 0.2053, "step": 12121500 }, { "epoch": 7.27, "learning_rate": 5.150829126402327e-06, "loss": 0.1957, "step": 12122000 }, { "epoch": 7.27, "learning_rate": 5.148729160841762e-06, "loss": 0.2118, "step": 12122500 }, { "epoch": 7.27, "learning_rate": 5.146633395212318e-06, "loss": 0.2036, "step": 12123000 }, { "epoch": 7.27, "learning_rate": 5.144533429651753e-06, "loss": 0.2049, "step": 12123500 }, { "epoch": 7.27, "learning_rate": 5.142433464091189e-06, "loss": 0.2069, "step": 12124000 }, { "epoch": 7.27, "learning_rate": 5.1403334985306245e-06, "loss": 0.198, "step": 12124500 }, { "epoch": 7.27, "learning_rate": 5.138233532970059e-06, "loss": 0.2059, "step": 12125000 }, { "epoch": 7.27, "learning_rate": 5.136137767340616e-06, "loss": 0.2042, "step": 12125500 }, { "epoch": 7.27, "learning_rate": 5.134037801780051e-06, "loss": 0.2059, "step": 12126000 }, { "epoch": 7.27, "learning_rate": 5.131937836219485e-06, "loss": 0.2066, "step": 12126500 }, { "epoch": 7.27, "learning_rate": 5.129837870658921e-06, "loss": 0.203, "step": 12127000 }, { "epoch": 7.27, "learning_rate": 5.127737905098356e-06, "loss": 0.2013, "step": 12127500 }, { "epoch": 7.27, "learning_rate": 5.1256379395377916e-06, "loss": 0.2015, "step": 12128000 }, { "epoch": 7.27, "learning_rate": 5.123542173908348e-06, "loss": 0.2062, "step": 12128500 }, { "epoch": 7.27, "learning_rate": 5.121442208347783e-06, "loss": 0.2084, "step": 12129000 }, { "epoch": 7.27, "learning_rate": 5.119342242787218e-06, "loss": 0.2036, "step": 12129500 }, { "epoch": 7.27, "learning_rate": 5.117242277226654e-06, "loss": 0.2066, "step": 12130000 }, { "epoch": 7.27, "learning_rate": 5.115146511597209e-06, "loss": 0.2012, "step": 12130500 }, { "epoch": 7.27, "learning_rate": 5.113046546036645e-06, "loss": 0.2019, "step": 12131000 }, { "epoch": 7.27, "learning_rate": 5.11094658047608e-06, "loss": 0.2005, "step": 12131500 }, { "epoch": 7.27, "learning_rate": 5.108846614915515e-06, "loss": 0.2014, "step": 12132000 }, { "epoch": 7.27, "learning_rate": 5.106746649354951e-06, "loss": 0.2049, "step": 12132500 }, { "epoch": 7.27, "learning_rate": 5.104650883725507e-06, "loss": 0.2008, "step": 12133000 }, { "epoch": 7.27, "learning_rate": 5.102550918164942e-06, "loss": 0.2074, "step": 12133500 }, { "epoch": 7.27, "learning_rate": 5.100450952604377e-06, "loss": 0.1975, "step": 12134000 }, { "epoch": 7.28, "learning_rate": 5.098350987043812e-06, "loss": 0.2128, "step": 12134500 }, { "epoch": 7.28, "learning_rate": 5.096255221414369e-06, "loss": 0.2072, "step": 12135000 }, { "epoch": 7.28, "learning_rate": 5.0941552558538036e-06, "loss": 0.2001, "step": 12135500 }, { "epoch": 7.28, "learning_rate": 5.092055290293239e-06, "loss": 0.2046, "step": 12136000 }, { "epoch": 7.28, "learning_rate": 5.089955324732675e-06, "loss": 0.2036, "step": 12136500 }, { "epoch": 7.28, "learning_rate": 5.08785535917211e-06, "loss": 0.2035, "step": 12137000 }, { "epoch": 7.28, "learning_rate": 5.085759593542666e-06, "loss": 0.2056, "step": 12137500 }, { "epoch": 7.28, "learning_rate": 5.083659627982101e-06, "loss": 0.2045, "step": 12138000 }, { "epoch": 7.28, "learning_rate": 5.081559662421536e-06, "loss": 0.2093, "step": 12138500 }, { "epoch": 7.28, "learning_rate": 5.079459696860971e-06, "loss": 0.2055, "step": 12139000 }, { "epoch": 7.28, "learning_rate": 5.0773639312315276e-06, "loss": 0.2024, "step": 12139500 }, { "epoch": 7.28, "learning_rate": 5.0752639656709636e-06, "loss": 0.2014, "step": 12140000 }, { "epoch": 7.28, "learning_rate": 5.073164000110398e-06, "loss": 0.2019, "step": 12140500 }, { "epoch": 7.28, "learning_rate": 5.071064034549833e-06, "loss": 0.2054, "step": 12141000 }, { "epoch": 7.28, "learning_rate": 5.068964068989268e-06, "loss": 0.2041, "step": 12141500 }, { "epoch": 7.28, "learning_rate": 5.066864103428704e-06, "loss": 0.2036, "step": 12142000 }, { "epoch": 7.28, "learning_rate": 5.0647683377992594e-06, "loss": 0.2017, "step": 12142500 }, { "epoch": 7.28, "learning_rate": 5.0626683722386954e-06, "loss": 0.2064, "step": 12143000 }, { "epoch": 7.28, "learning_rate": 5.060568406678131e-06, "loss": 0.205, "step": 12143500 }, { "epoch": 7.28, "learning_rate": 5.058468441117566e-06, "loss": 0.2031, "step": 12144000 }, { "epoch": 7.28, "learning_rate": 5.056368475557e-06, "loss": 0.2092, "step": 12144500 }, { "epoch": 7.28, "learning_rate": 5.054268509996436e-06, "loss": 0.2073, "step": 12145000 }, { "epoch": 7.28, "learning_rate": 5.052172744366992e-06, "loss": 0.2134, "step": 12145500 }, { "epoch": 7.28, "learning_rate": 5.050072778806427e-06, "loss": 0.206, "step": 12146000 }, { "epoch": 7.28, "learning_rate": 5.0479728132458625e-06, "loss": 0.1997, "step": 12146500 }, { "epoch": 7.28, "learning_rate": 5.045872847685298e-06, "loss": 0.2024, "step": 12147000 }, { "epoch": 7.28, "learning_rate": 5.043772882124734e-06, "loss": 0.2053, "step": 12147500 }, { "epoch": 7.28, "learning_rate": 5.041677116495289e-06, "loss": 0.2049, "step": 12148000 }, { "epoch": 7.28, "learning_rate": 5.039577150934725e-06, "loss": 0.2011, "step": 12148500 }, { "epoch": 7.28, "learning_rate": 5.03747718537416e-06, "loss": 0.2009, "step": 12149000 }, { "epoch": 7.28, "learning_rate": 5.035377219813595e-06, "loss": 0.2074, "step": 12149500 }, { "epoch": 7.28, "learning_rate": 5.0332772542530295e-06, "loss": 0.2064, "step": 12150000 }, { "epoch": 7.28, "learning_rate": 5.0311772886924655e-06, "loss": 0.2091, "step": 12150500 }, { "epoch": 7.29, "learning_rate": 5.029077323131901e-06, "loss": 0.2, "step": 12151000 }, { "epoch": 7.29, "learning_rate": 5.026981557502457e-06, "loss": 0.2042, "step": 12151500 }, { "epoch": 7.29, "learning_rate": 5.024881591941892e-06, "loss": 0.1992, "step": 12152000 }, { "epoch": 7.29, "learning_rate": 5.022781626381327e-06, "loss": 0.2014, "step": 12152500 }, { "epoch": 7.29, "learning_rate": 5.020681660820762e-06, "loss": 0.2041, "step": 12153000 }, { "epoch": 7.29, "learning_rate": 5.018585895191318e-06, "loss": 0.2075, "step": 12153500 }, { "epoch": 7.29, "learning_rate": 5.0164859296307535e-06, "loss": 0.2024, "step": 12154000 }, { "epoch": 7.29, "learning_rate": 5.0143859640701895e-06, "loss": 0.2071, "step": 12154500 }, { "epoch": 7.29, "learning_rate": 5.012285998509625e-06, "loss": 0.2063, "step": 12155000 }, { "epoch": 7.29, "learning_rate": 5.010186032949059e-06, "loss": 0.2057, "step": 12155500 }, { "epoch": 7.29, "learning_rate": 5.008086067388495e-06, "loss": 0.2039, "step": 12156000 }, { "epoch": 7.29, "learning_rate": 5.00598610182793e-06, "loss": 0.2041, "step": 12156500 }, { "epoch": 7.29, "learning_rate": 5.003886136267365e-06, "loss": 0.2039, "step": 12157000 }, { "epoch": 7.29, "learning_rate": 5.001790370637921e-06, "loss": 0.2058, "step": 12157500 }, { "epoch": 7.29, "learning_rate": 4.999694605008478e-06, "loss": 0.2023, "step": 12158000 }, { "epoch": 7.29, "learning_rate": 4.997594639447913e-06, "loss": 0.2025, "step": 12158500 }, { "epoch": 7.29, "learning_rate": 4.995494673887348e-06, "loss": 0.2055, "step": 12159000 }, { "epoch": 7.29, "learning_rate": 4.993394708326783e-06, "loss": 0.207, "step": 12159500 }, { "epoch": 7.29, "learning_rate": 4.991294742766219e-06, "loss": 0.2034, "step": 12160000 }, { "epoch": 7.29, "learning_rate": 4.989198977136774e-06, "loss": 0.2011, "step": 12160500 }, { "epoch": 7.29, "learning_rate": 4.98709901157621e-06, "loss": 0.2014, "step": 12161000 }, { "epoch": 7.29, "learning_rate": 4.984999046015645e-06, "loss": 0.2042, "step": 12161500 }, { "epoch": 7.29, "learning_rate": 4.9828990804550805e-06, "loss": 0.21, "step": 12162000 }, { "epoch": 7.29, "learning_rate": 4.980799114894516e-06, "loss": 0.2012, "step": 12162500 }, { "epoch": 7.29, "learning_rate": 4.978699149333951e-06, "loss": 0.2045, "step": 12163000 }, { "epoch": 7.29, "learning_rate": 4.976603383704508e-06, "loss": 0.205, "step": 12163500 }, { "epoch": 7.29, "learning_rate": 4.974503418143942e-06, "loss": 0.2079, "step": 12164000 }, { "epoch": 7.29, "learning_rate": 4.972403452583377e-06, "loss": 0.2024, "step": 12164500 }, { "epoch": 7.29, "learning_rate": 4.9703034870228124e-06, "loss": 0.204, "step": 12165000 }, { "epoch": 7.29, "learning_rate": 4.968207721393369e-06, "loss": 0.2097, "step": 12165500 }, { "epoch": 7.29, "learning_rate": 4.966107755832804e-06, "loss": 0.2039, "step": 12166000 }, { "epoch": 7.29, "learning_rate": 4.96400779027224e-06, "loss": 0.2066, "step": 12166500 }, { "epoch": 7.29, "learning_rate": 4.961907824711675e-06, "loss": 0.2077, "step": 12167000 }, { "epoch": 7.29, "learning_rate": 4.95980785915111e-06, "loss": 0.2044, "step": 12167500 }, { "epoch": 7.3, "learning_rate": 4.957712093521666e-06, "loss": 0.2073, "step": 12168000 }, { "epoch": 7.3, "learning_rate": 4.955612127961101e-06, "loss": 0.2056, "step": 12168500 }, { "epoch": 7.3, "learning_rate": 4.953512162400536e-06, "loss": 0.2088, "step": 12169000 }, { "epoch": 7.3, "learning_rate": 4.9514121968399716e-06, "loss": 0.2018, "step": 12169500 }, { "epoch": 7.3, "learning_rate": 4.949316431210528e-06, "loss": 0.2026, "step": 12170000 }, { "epoch": 7.3, "learning_rate": 4.947216465649964e-06, "loss": 0.2032, "step": 12170500 }, { "epoch": 7.3, "learning_rate": 4.945116500089399e-06, "loss": 0.2021, "step": 12171000 }, { "epoch": 7.3, "learning_rate": 4.943016534528833e-06, "loss": 0.2003, "step": 12171500 }, { "epoch": 7.3, "learning_rate": 4.940916568968269e-06, "loss": 0.205, "step": 12172000 }, { "epoch": 7.3, "learning_rate": 4.938820803338825e-06, "loss": 0.2004, "step": 12172500 }, { "epoch": 7.3, "learning_rate": 4.93672083777826e-06, "loss": 0.2067, "step": 12173000 }, { "epoch": 7.3, "learning_rate": 4.9346208722176956e-06, "loss": 0.2039, "step": 12173500 }, { "epoch": 7.3, "learning_rate": 4.932520906657131e-06, "loss": 0.2017, "step": 12174000 }, { "epoch": 7.3, "learning_rate": 4.930420941096566e-06, "loss": 0.2036, "step": 12174500 }, { "epoch": 7.3, "learning_rate": 4.928325175467122e-06, "loss": 0.2118, "step": 12175000 }, { "epoch": 7.3, "learning_rate": 4.926225209906557e-06, "loss": 0.2044, "step": 12175500 }, { "epoch": 7.3, "learning_rate": 4.924125244345993e-06, "loss": 0.1989, "step": 12176000 }, { "epoch": 7.3, "learning_rate": 4.9220252787854275e-06, "loss": 0.2017, "step": 12176500 }, { "epoch": 7.3, "learning_rate": 4.919929513155984e-06, "loss": 0.2049, "step": 12177000 }, { "epoch": 7.3, "learning_rate": 4.9178295475954196e-06, "loss": 0.2009, "step": 12177500 }, { "epoch": 7.3, "learning_rate": 4.915729582034855e-06, "loss": 0.2024, "step": 12178000 }, { "epoch": 7.3, "learning_rate": 4.91362961647429e-06, "loss": 0.2008, "step": 12178500 }, { "epoch": 7.3, "learning_rate": 4.911533850844846e-06, "loss": 0.2014, "step": 12179000 }, { "epoch": 7.3, "learning_rate": 4.909433885284282e-06, "loss": 0.207, "step": 12179500 }, { "epoch": 7.3, "learning_rate": 4.907333919723716e-06, "loss": 0.2101, "step": 12180000 }, { "epoch": 7.3, "learning_rate": 4.9052339541631515e-06, "loss": 0.2043, "step": 12180500 }, { "epoch": 7.3, "learning_rate": 4.903133988602587e-06, "loss": 0.2045, "step": 12181000 }, { "epoch": 7.3, "learning_rate": 4.901034023042023e-06, "loss": 0.2066, "step": 12181500 }, { "epoch": 7.3, "learning_rate": 4.898934057481457e-06, "loss": 0.205, "step": 12182000 }, { "epoch": 7.3, "learning_rate": 4.896838291852014e-06, "loss": 0.2019, "step": 12182500 }, { "epoch": 7.3, "learning_rate": 4.894738326291449e-06, "loss": 0.1952, "step": 12183000 }, { "epoch": 7.3, "learning_rate": 4.892638360730884e-06, "loss": 0.2025, "step": 12183500 }, { "epoch": 7.3, "learning_rate": 4.8905383951703185e-06, "loss": 0.2015, "step": 12184000 }, { "epoch": 7.31, "learning_rate": 4.8884426295408754e-06, "loss": 0.2058, "step": 12184500 }, { "epoch": 7.31, "learning_rate": 4.886342663980311e-06, "loss": 0.203, "step": 12185000 }, { "epoch": 7.31, "learning_rate": 4.884242698419746e-06, "loss": 0.2043, "step": 12185500 }, { "epoch": 7.31, "learning_rate": 4.882142732859181e-06, "loss": 0.2098, "step": 12186000 }, { "epoch": 7.31, "learning_rate": 4.880042767298616e-06, "loss": 0.2021, "step": 12186500 }, { "epoch": 7.31, "learning_rate": 4.877947001669172e-06, "loss": 0.2037, "step": 12187000 }, { "epoch": 7.31, "learning_rate": 4.875847036108607e-06, "loss": 0.2053, "step": 12187500 }, { "epoch": 7.31, "learning_rate": 4.873747070548043e-06, "loss": 0.2093, "step": 12188000 }, { "epoch": 7.31, "learning_rate": 4.8716471049874785e-06, "loss": 0.2047, "step": 12188500 }, { "epoch": 7.31, "learning_rate": 4.869547139426914e-06, "loss": 0.2014, "step": 12189000 }, { "epoch": 7.31, "learning_rate": 4.867447173866348e-06, "loss": 0.2044, "step": 12189500 }, { "epoch": 7.31, "learning_rate": 4.865347208305784e-06, "loss": 0.2028, "step": 12190000 }, { "epoch": 7.31, "learning_rate": 4.86325144267634e-06, "loss": 0.2056, "step": 12190500 }, { "epoch": 7.31, "learning_rate": 4.861151477115775e-06, "loss": 0.204, "step": 12191000 }, { "epoch": 7.31, "learning_rate": 4.85905151155521e-06, "loss": 0.2043, "step": 12191500 }, { "epoch": 7.31, "learning_rate": 4.8569515459946455e-06, "loss": 0.2005, "step": 12192000 }, { "epoch": 7.31, "learning_rate": 4.854851580434081e-06, "loss": 0.2014, "step": 12192500 }, { "epoch": 7.31, "learning_rate": 4.852751614873516e-06, "loss": 0.2076, "step": 12193000 }, { "epoch": 7.31, "learning_rate": 4.850651649312951e-06, "loss": 0.2039, "step": 12193500 }, { "epoch": 7.31, "learning_rate": 4.848551683752386e-06, "loss": 0.2026, "step": 12194000 }, { "epoch": 7.31, "learning_rate": 4.846455918122943e-06, "loss": 0.2094, "step": 12194500 }, { "epoch": 7.31, "learning_rate": 4.844355952562377e-06, "loss": 0.2091, "step": 12195000 }, { "epoch": 7.31, "learning_rate": 4.842255987001813e-06, "loss": 0.2039, "step": 12195500 }, { "epoch": 7.31, "learning_rate": 4.8401560214412485e-06, "loss": 0.2106, "step": 12196000 }, { "epoch": 7.31, "learning_rate": 4.838060255811805e-06, "loss": 0.206, "step": 12196500 }, { "epoch": 7.31, "learning_rate": 4.83596029025124e-06, "loss": 0.2045, "step": 12197000 }, { "epoch": 7.31, "learning_rate": 4.833860324690675e-06, "loss": 0.2048, "step": 12197500 }, { "epoch": 7.31, "learning_rate": 4.83176035913011e-06, "loss": 0.2094, "step": 12198000 }, { "epoch": 7.31, "learning_rate": 4.829660393569545e-06, "loss": 0.2006, "step": 12198500 }, { "epoch": 7.31, "learning_rate": 4.827564627940101e-06, "loss": 0.2012, "step": 12199000 }, { "epoch": 7.31, "learning_rate": 4.825464662379537e-06, "loss": 0.2066, "step": 12199500 }, { "epoch": 7.31, "learning_rate": 4.823364696818972e-06, "loss": 0.2037, "step": 12200000 }, { "epoch": 7.31, "eval_loss": 0.19901762902736664, "eval_runtime": 1455.8104, "eval_samples_per_second": 361.805, "eval_steps_per_second": 60.301, "step": 12200000 } ], "max_steps": 13343552, "num_train_epochs": 8, "total_flos": 2.4899756453449114e+18, "trial_name": null, "trial_params": null }