{ "best_metric": null, "best_model_checkpoint": null, "epoch": 24.955603327413776, "eval_steps": 500, "global_step": 267000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 4.9906533320871115e-05, "loss": 6.5864, "step": 500 }, { "epoch": 0.09, "learning_rate": 4.981306664174222e-05, "loss": 5.4802, "step": 1000 }, { "epoch": 0.14, "learning_rate": 4.9719599962613325e-05, "loss": 4.7221, "step": 1500 }, { "epoch": 0.19, "learning_rate": 4.962613328348444e-05, "loss": 4.2025, "step": 2000 }, { "epoch": 0.23, "learning_rate": 4.953266660435555e-05, "loss": 3.8489, "step": 2500 }, { "epoch": 0.28, "learning_rate": 4.943919992522666e-05, "loss": 3.5856, "step": 3000 }, { "epoch": 0.33, "learning_rate": 4.934573324609777e-05, "loss": 3.3865, "step": 3500 }, { "epoch": 0.37, "learning_rate": 4.925226656696888e-05, "loss": 3.2125, "step": 4000 }, { "epoch": 0.42, "learning_rate": 4.9158799887839984e-05, "loss": 3.0721, "step": 4500 }, { "epoch": 0.47, "learning_rate": 4.9065333208711096e-05, "loss": 2.9681, "step": 5000 }, { "epoch": 0.51, "learning_rate": 4.897186652958221e-05, "loss": 2.8801, "step": 5500 }, { "epoch": 0.56, "learning_rate": 4.8878399850453314e-05, "loss": 2.782, "step": 6000 }, { "epoch": 0.61, "learning_rate": 4.8784933171324426e-05, "loss": 2.7237, "step": 6500 }, { "epoch": 0.65, "learning_rate": 4.869146649219554e-05, "loss": 2.6715, "step": 7000 }, { "epoch": 0.7, "learning_rate": 4.859799981306664e-05, "loss": 2.637, "step": 7500 }, { "epoch": 0.75, "learning_rate": 4.850453313393775e-05, "loss": 2.5817, "step": 8000 }, { "epoch": 0.79, "learning_rate": 4.841106645480886e-05, "loss": 2.5346, "step": 8500 }, { "epoch": 0.84, "learning_rate": 4.831759977567997e-05, "loss": 2.49, "step": 9000 }, { "epoch": 0.89, "learning_rate": 4.8224133096551085e-05, "loss": 2.4597, "step": 9500 }, { "epoch": 0.93, "learning_rate": 4.813066641742219e-05, "loss": 2.4381, "step": 10000 }, { "epoch": 0.98, "learning_rate": 4.8037199738293296e-05, "loss": 2.3995, "step": 10500 }, { "epoch": 1.03, "learning_rate": 4.794373305916441e-05, "loss": 2.385, "step": 11000 }, { "epoch": 1.07, "learning_rate": 4.785026638003552e-05, "loss": 2.3463, "step": 11500 }, { "epoch": 1.12, "learning_rate": 4.775679970090663e-05, "loss": 2.3033, "step": 12000 }, { "epoch": 1.17, "learning_rate": 4.766333302177774e-05, "loss": 2.2915, "step": 12500 }, { "epoch": 1.22, "learning_rate": 4.756986634264885e-05, "loss": 2.2722, "step": 13000 }, { "epoch": 1.26, "learning_rate": 4.7476399663519955e-05, "loss": 2.2443, "step": 13500 }, { "epoch": 1.31, "learning_rate": 4.738293298439107e-05, "loss": 2.2412, "step": 14000 }, { "epoch": 1.36, "learning_rate": 4.728946630526218e-05, "loss": 2.22, "step": 14500 }, { "epoch": 1.4, "learning_rate": 4.7195999626133284e-05, "loss": 2.19, "step": 15000 }, { "epoch": 1.45, "learning_rate": 4.7102532947004396e-05, "loss": 2.1646, "step": 15500 }, { "epoch": 1.5, "learning_rate": 4.700906626787551e-05, "loss": 2.1665, "step": 16000 }, { "epoch": 1.54, "learning_rate": 4.6915599588746614e-05, "loss": 2.1406, "step": 16500 }, { "epoch": 1.59, "learning_rate": 4.682213290961772e-05, "loss": 2.1191, "step": 17000 }, { "epoch": 1.64, "learning_rate": 4.672866623048883e-05, "loss": 2.1071, "step": 17500 }, { "epoch": 1.68, "learning_rate": 4.663519955135994e-05, "loss": 2.0976, "step": 18000 }, { "epoch": 1.73, "learning_rate": 4.6541732872231055e-05, "loss": 2.0935, "step": 18500 }, { "epoch": 1.78, "learning_rate": 4.644826619310216e-05, "loss": 2.0682, "step": 19000 }, { "epoch": 1.82, "learning_rate": 4.6354799513973266e-05, "loss": 2.0487, "step": 19500 }, { "epoch": 1.87, "learning_rate": 4.626133283484438e-05, "loss": 2.0384, "step": 20000 }, { "epoch": 1.92, "learning_rate": 4.616786615571549e-05, "loss": 2.0184, "step": 20500 }, { "epoch": 1.96, "learning_rate": 4.60743994765866e-05, "loss": 2.0046, "step": 21000 }, { "epoch": 2.01, "learning_rate": 4.598093279745771e-05, "loss": 1.9972, "step": 21500 }, { "epoch": 2.06, "learning_rate": 4.588746611832882e-05, "loss": 1.9796, "step": 22000 }, { "epoch": 2.1, "learning_rate": 4.5793999439199925e-05, "loss": 1.9909, "step": 22500 }, { "epoch": 2.15, "learning_rate": 4.570053276007104e-05, "loss": 1.9776, "step": 23000 }, { "epoch": 2.2, "learning_rate": 4.560706608094214e-05, "loss": 1.9556, "step": 23500 }, { "epoch": 2.24, "learning_rate": 4.5513599401813255e-05, "loss": 1.9427, "step": 24000 }, { "epoch": 2.29, "learning_rate": 4.542013272268437e-05, "loss": 1.9452, "step": 24500 }, { "epoch": 2.34, "learning_rate": 4.532666604355548e-05, "loss": 1.9365, "step": 25000 }, { "epoch": 2.38, "learning_rate": 4.5233199364426584e-05, "loss": 1.925, "step": 25500 }, { "epoch": 2.43, "learning_rate": 4.513973268529769e-05, "loss": 1.9045, "step": 26000 }, { "epoch": 2.48, "learning_rate": 4.50462660061688e-05, "loss": 1.8886, "step": 26500 }, { "epoch": 2.52, "learning_rate": 4.4952799327039914e-05, "loss": 1.9086, "step": 27000 }, { "epoch": 2.57, "learning_rate": 4.4859332647911026e-05, "loss": 1.882, "step": 27500 }, { "epoch": 2.62, "learning_rate": 4.476586596878213e-05, "loss": 1.8831, "step": 28000 }, { "epoch": 2.66, "learning_rate": 4.4672399289653237e-05, "loss": 1.8745, "step": 28500 }, { "epoch": 2.71, "learning_rate": 4.457893261052435e-05, "loss": 1.8645, "step": 29000 }, { "epoch": 2.76, "learning_rate": 4.448546593139546e-05, "loss": 1.8481, "step": 29500 }, { "epoch": 2.8, "learning_rate": 4.439199925226657e-05, "loss": 1.8524, "step": 30000 }, { "epoch": 2.85, "learning_rate": 4.429853257313768e-05, "loss": 1.8299, "step": 30500 }, { "epoch": 2.9, "learning_rate": 4.420506589400879e-05, "loss": 1.835, "step": 31000 }, { "epoch": 2.94, "learning_rate": 4.4111599214879896e-05, "loss": 1.8246, "step": 31500 }, { "epoch": 2.99, "learning_rate": 4.401813253575101e-05, "loss": 1.8154, "step": 32000 }, { "epoch": 3.04, "learning_rate": 4.392466585662211e-05, "loss": 1.8097, "step": 32500 }, { "epoch": 3.08, "learning_rate": 4.3831199177493225e-05, "loss": 1.7977, "step": 33000 }, { "epoch": 3.13, "learning_rate": 4.373773249836434e-05, "loss": 1.7857, "step": 33500 }, { "epoch": 3.18, "learning_rate": 4.364426581923545e-05, "loss": 1.7933, "step": 34000 }, { "epoch": 3.22, "learning_rate": 4.3550799140106555e-05, "loss": 1.774, "step": 34500 }, { "epoch": 3.27, "learning_rate": 4.345733246097766e-05, "loss": 1.7747, "step": 35000 }, { "epoch": 3.32, "learning_rate": 4.336386578184877e-05, "loss": 1.7663, "step": 35500 }, { "epoch": 3.36, "learning_rate": 4.3270399102719884e-05, "loss": 1.7746, "step": 36000 }, { "epoch": 3.41, "learning_rate": 4.3176932423590996e-05, "loss": 1.7586, "step": 36500 }, { "epoch": 3.46, "learning_rate": 4.30834657444621e-05, "loss": 1.7545, "step": 37000 }, { "epoch": 3.51, "learning_rate": 4.298999906533321e-05, "loss": 1.7424, "step": 37500 }, { "epoch": 3.55, "learning_rate": 4.289653238620432e-05, "loss": 1.7472, "step": 38000 }, { "epoch": 3.6, "learning_rate": 4.280306570707543e-05, "loss": 1.7587, "step": 38500 }, { "epoch": 3.65, "learning_rate": 4.2709599027946537e-05, "loss": 1.7486, "step": 39000 }, { "epoch": 3.69, "learning_rate": 4.261613234881765e-05, "loss": 1.7288, "step": 39500 }, { "epoch": 3.74, "learning_rate": 4.252266566968876e-05, "loss": 1.7361, "step": 40000 }, { "epoch": 3.79, "learning_rate": 4.2429198990559866e-05, "loss": 1.7089, "step": 40500 }, { "epoch": 3.83, "learning_rate": 4.233573231143098e-05, "loss": 1.7253, "step": 41000 }, { "epoch": 3.88, "learning_rate": 4.2242265632302084e-05, "loss": 1.7134, "step": 41500 }, { "epoch": 3.93, "learning_rate": 4.2148798953173196e-05, "loss": 1.6936, "step": 42000 }, { "epoch": 3.97, "learning_rate": 4.205533227404431e-05, "loss": 1.7115, "step": 42500 }, { "epoch": 4.02, "learning_rate": 4.196186559491542e-05, "loss": 1.6981, "step": 43000 }, { "epoch": 4.07, "learning_rate": 4.1868398915786525e-05, "loss": 1.6913, "step": 43500 }, { "epoch": 4.11, "learning_rate": 4.177493223665763e-05, "loss": 1.6916, "step": 44000 }, { "epoch": 4.16, "learning_rate": 4.168146555752874e-05, "loss": 1.6953, "step": 44500 }, { "epoch": 4.21, "learning_rate": 4.1587998878399855e-05, "loss": 1.6778, "step": 45000 }, { "epoch": 4.25, "learning_rate": 4.149453219927097e-05, "loss": 1.6706, "step": 45500 }, { "epoch": 4.3, "learning_rate": 4.140106552014207e-05, "loss": 1.6703, "step": 46000 }, { "epoch": 4.35, "learning_rate": 4.130759884101318e-05, "loss": 1.6639, "step": 46500 }, { "epoch": 4.39, "learning_rate": 4.121413216188429e-05, "loss": 1.6728, "step": 47000 }, { "epoch": 4.44, "learning_rate": 4.11206654827554e-05, "loss": 1.6553, "step": 47500 }, { "epoch": 4.49, "learning_rate": 4.102719880362651e-05, "loss": 1.6433, "step": 48000 }, { "epoch": 4.53, "learning_rate": 4.093373212449762e-05, "loss": 1.6499, "step": 48500 }, { "epoch": 4.58, "learning_rate": 4.084026544536873e-05, "loss": 1.6553, "step": 49000 }, { "epoch": 4.63, "learning_rate": 4.0746798766239837e-05, "loss": 1.6401, "step": 49500 }, { "epoch": 4.67, "learning_rate": 4.065333208711095e-05, "loss": 1.6444, "step": 50000 }, { "epoch": 4.72, "learning_rate": 4.0559865407982054e-05, "loss": 1.6398, "step": 50500 }, { "epoch": 4.77, "learning_rate": 4.0466398728853166e-05, "loss": 1.6338, "step": 51000 }, { "epoch": 4.81, "learning_rate": 4.037293204972428e-05, "loss": 1.6194, "step": 51500 }, { "epoch": 4.86, "learning_rate": 4.027946537059539e-05, "loss": 1.6327, "step": 52000 }, { "epoch": 4.91, "learning_rate": 4.018599869146649e-05, "loss": 1.6232, "step": 52500 }, { "epoch": 4.95, "learning_rate": 4.00925320123376e-05, "loss": 1.6296, "step": 53000 }, { "epoch": 5.0, "learning_rate": 3.999906533320871e-05, "loss": 1.6152, "step": 53500 }, { "epoch": 5.05, "learning_rate": 3.9905598654079825e-05, "loss": 1.6012, "step": 54000 }, { "epoch": 5.09, "learning_rate": 3.981213197495093e-05, "loss": 1.6087, "step": 54500 }, { "epoch": 5.14, "learning_rate": 3.971866529582204e-05, "loss": 1.5971, "step": 55000 }, { "epoch": 5.19, "learning_rate": 3.962519861669315e-05, "loss": 1.5956, "step": 55500 }, { "epoch": 5.23, "learning_rate": 3.953173193756426e-05, "loss": 1.5947, "step": 56000 }, { "epoch": 5.28, "learning_rate": 3.943826525843537e-05, "loss": 1.5993, "step": 56500 }, { "epoch": 5.33, "learning_rate": 3.934479857930648e-05, "loss": 1.5816, "step": 57000 }, { "epoch": 5.37, "learning_rate": 3.925133190017759e-05, "loss": 1.5837, "step": 57500 }, { "epoch": 5.42, "learning_rate": 3.91578652210487e-05, "loss": 1.5854, "step": 58000 }, { "epoch": 5.47, "learning_rate": 3.906439854191981e-05, "loss": 1.5734, "step": 58500 }, { "epoch": 5.51, "learning_rate": 3.897093186279092e-05, "loss": 1.578, "step": 59000 }, { "epoch": 5.56, "learning_rate": 3.8877465183662024e-05, "loss": 1.5817, "step": 59500 }, { "epoch": 5.61, "learning_rate": 3.8783998504533137e-05, "loss": 1.578, "step": 60000 }, { "epoch": 5.65, "learning_rate": 3.869053182540425e-05, "loss": 1.5732, "step": 60500 }, { "epoch": 5.7, "learning_rate": 3.8597065146275354e-05, "loss": 1.5606, "step": 61000 }, { "epoch": 5.75, "learning_rate": 3.850359846714646e-05, "loss": 1.5709, "step": 61500 }, { "epoch": 5.79, "learning_rate": 3.841013178801757e-05, "loss": 1.5607, "step": 62000 }, { "epoch": 5.84, "learning_rate": 3.8316665108888684e-05, "loss": 1.5687, "step": 62500 }, { "epoch": 5.89, "learning_rate": 3.8223198429759796e-05, "loss": 1.5488, "step": 63000 }, { "epoch": 5.94, "learning_rate": 3.81297317506309e-05, "loss": 1.5601, "step": 63500 }, { "epoch": 5.98, "learning_rate": 3.803626507150201e-05, "loss": 1.5611, "step": 64000 }, { "epoch": 6.03, "learning_rate": 3.794279839237312e-05, "loss": 1.5515, "step": 64500 }, { "epoch": 6.08, "learning_rate": 3.784933171324423e-05, "loss": 1.5412, "step": 65000 }, { "epoch": 6.12, "learning_rate": 3.775586503411534e-05, "loss": 1.5434, "step": 65500 }, { "epoch": 6.17, "learning_rate": 3.766239835498645e-05, "loss": 1.5372, "step": 66000 }, { "epoch": 6.22, "learning_rate": 3.756893167585756e-05, "loss": 1.5293, "step": 66500 }, { "epoch": 6.26, "learning_rate": 3.7475464996728665e-05, "loss": 1.5335, "step": 67000 }, { "epoch": 6.31, "learning_rate": 3.738199831759978e-05, "loss": 1.5296, "step": 67500 }, { "epoch": 6.36, "learning_rate": 3.728853163847088e-05, "loss": 1.5238, "step": 68000 }, { "epoch": 6.4, "learning_rate": 3.7195064959341995e-05, "loss": 1.5269, "step": 68500 }, { "epoch": 6.45, "learning_rate": 3.710159828021311e-05, "loss": 1.5233, "step": 69000 }, { "epoch": 6.5, "learning_rate": 3.700813160108422e-05, "loss": 1.5234, "step": 69500 }, { "epoch": 6.54, "learning_rate": 3.6914664921955325e-05, "loss": 1.5277, "step": 70000 }, { "epoch": 6.59, "learning_rate": 3.682119824282643e-05, "loss": 1.5185, "step": 70500 }, { "epoch": 6.64, "learning_rate": 3.672773156369754e-05, "loss": 1.5185, "step": 71000 }, { "epoch": 6.68, "learning_rate": 3.6634264884568654e-05, "loss": 1.5316, "step": 71500 }, { "epoch": 6.73, "learning_rate": 3.6540798205439766e-05, "loss": 1.5165, "step": 72000 }, { "epoch": 6.78, "learning_rate": 3.644733152631087e-05, "loss": 1.5067, "step": 72500 }, { "epoch": 6.82, "learning_rate": 3.635386484718198e-05, "loss": 1.5108, "step": 73000 }, { "epoch": 6.87, "learning_rate": 3.626039816805309e-05, "loss": 1.4999, "step": 73500 }, { "epoch": 6.92, "learning_rate": 3.61669314889242e-05, "loss": 1.4997, "step": 74000 }, { "epoch": 6.96, "learning_rate": 3.607346480979531e-05, "loss": 1.513, "step": 74500 }, { "epoch": 7.01, "learning_rate": 3.597999813066642e-05, "loss": 1.5012, "step": 75000 }, { "epoch": 7.06, "learning_rate": 3.588653145153753e-05, "loss": 1.4837, "step": 75500 }, { "epoch": 7.1, "learning_rate": 3.5793064772408636e-05, "loss": 1.4824, "step": 76000 }, { "epoch": 7.15, "learning_rate": 3.569959809327975e-05, "loss": 1.4896, "step": 76500 }, { "epoch": 7.2, "learning_rate": 3.560613141415085e-05, "loss": 1.4865, "step": 77000 }, { "epoch": 7.24, "learning_rate": 3.5512664735021965e-05, "loss": 1.4937, "step": 77500 }, { "epoch": 7.29, "learning_rate": 3.541919805589308e-05, "loss": 1.4828, "step": 78000 }, { "epoch": 7.34, "learning_rate": 3.532573137676419e-05, "loss": 1.4767, "step": 78500 }, { "epoch": 7.38, "learning_rate": 3.5232264697635295e-05, "loss": 1.4878, "step": 79000 }, { "epoch": 7.43, "learning_rate": 3.51387980185064e-05, "loss": 1.4946, "step": 79500 }, { "epoch": 7.48, "learning_rate": 3.504533133937751e-05, "loss": 1.4756, "step": 80000 }, { "epoch": 7.52, "learning_rate": 3.4951864660248625e-05, "loss": 1.464, "step": 80500 }, { "epoch": 7.57, "learning_rate": 3.485839798111974e-05, "loss": 1.4754, "step": 81000 }, { "epoch": 7.62, "learning_rate": 3.476493130199084e-05, "loss": 1.472, "step": 81500 }, { "epoch": 7.66, "learning_rate": 3.467146462286195e-05, "loss": 1.4716, "step": 82000 }, { "epoch": 7.71, "learning_rate": 3.457799794373306e-05, "loss": 1.4888, "step": 82500 }, { "epoch": 7.76, "learning_rate": 3.448453126460417e-05, "loss": 1.4678, "step": 83000 }, { "epoch": 7.8, "learning_rate": 3.439106458547528e-05, "loss": 1.4694, "step": 83500 }, { "epoch": 7.85, "learning_rate": 3.429759790634639e-05, "loss": 1.4729, "step": 84000 }, { "epoch": 7.9, "learning_rate": 3.42041312272175e-05, "loss": 1.4558, "step": 84500 }, { "epoch": 7.94, "learning_rate": 3.4110664548088606e-05, "loss": 1.4597, "step": 85000 }, { "epoch": 7.99, "learning_rate": 3.401719786895972e-05, "loss": 1.449, "step": 85500 }, { "epoch": 8.04, "learning_rate": 3.3923731189830824e-05, "loss": 1.4543, "step": 86000 }, { "epoch": 8.08, "learning_rate": 3.3830264510701936e-05, "loss": 1.4437, "step": 86500 }, { "epoch": 8.13, "learning_rate": 3.373679783157305e-05, "loss": 1.4423, "step": 87000 }, { "epoch": 8.18, "learning_rate": 3.364333115244416e-05, "loss": 1.4417, "step": 87500 }, { "epoch": 8.23, "learning_rate": 3.3549864473315265e-05, "loss": 1.4505, "step": 88000 }, { "epoch": 8.27, "learning_rate": 3.345639779418637e-05, "loss": 1.4426, "step": 88500 }, { "epoch": 8.32, "learning_rate": 3.336293111505748e-05, "loss": 1.4359, "step": 89000 }, { "epoch": 8.37, "learning_rate": 3.3269464435928595e-05, "loss": 1.4463, "step": 89500 }, { "epoch": 8.41, "learning_rate": 3.317599775679971e-05, "loss": 1.4395, "step": 90000 }, { "epoch": 8.46, "learning_rate": 3.308253107767081e-05, "loss": 1.4388, "step": 90500 }, { "epoch": 8.51, "learning_rate": 3.298906439854192e-05, "loss": 1.4395, "step": 91000 }, { "epoch": 8.55, "learning_rate": 3.289559771941303e-05, "loss": 1.4335, "step": 91500 }, { "epoch": 8.6, "learning_rate": 3.280213104028414e-05, "loss": 1.4334, "step": 92000 }, { "epoch": 8.65, "learning_rate": 3.270866436115525e-05, "loss": 1.4457, "step": 92500 }, { "epoch": 8.69, "learning_rate": 3.261519768202636e-05, "loss": 1.4371, "step": 93000 }, { "epoch": 8.74, "learning_rate": 3.252173100289747e-05, "loss": 1.4299, "step": 93500 }, { "epoch": 8.79, "learning_rate": 3.242826432376858e-05, "loss": 1.4414, "step": 94000 }, { "epoch": 8.83, "learning_rate": 3.233479764463969e-05, "loss": 1.4191, "step": 94500 }, { "epoch": 8.88, "learning_rate": 3.2241330965510794e-05, "loss": 1.4261, "step": 95000 }, { "epoch": 8.93, "learning_rate": 3.2147864286381906e-05, "loss": 1.4292, "step": 95500 }, { "epoch": 8.97, "learning_rate": 3.205439760725302e-05, "loss": 1.4245, "step": 96000 }, { "epoch": 9.02, "learning_rate": 3.196093092812413e-05, "loss": 1.4169, "step": 96500 }, { "epoch": 9.07, "learning_rate": 3.186746424899523e-05, "loss": 1.4099, "step": 97000 }, { "epoch": 9.11, "learning_rate": 3.177399756986634e-05, "loss": 1.4087, "step": 97500 }, { "epoch": 9.16, "learning_rate": 3.168053089073745e-05, "loss": 1.4289, "step": 98000 }, { "epoch": 9.21, "learning_rate": 3.1587064211608565e-05, "loss": 1.4251, "step": 98500 }, { "epoch": 9.25, "learning_rate": 3.149359753247967e-05, "loss": 1.4228, "step": 99000 }, { "epoch": 9.3, "learning_rate": 3.140013085335078e-05, "loss": 1.4062, "step": 99500 }, { "epoch": 9.35, "learning_rate": 3.130666417422189e-05, "loss": 1.4032, "step": 100000 }, { "epoch": 9.39, "learning_rate": 3.1213197495093e-05, "loss": 1.4143, "step": 100500 }, { "epoch": 9.44, "learning_rate": 3.111973081596411e-05, "loss": 1.4038, "step": 101000 }, { "epoch": 9.49, "learning_rate": 3.102626413683522e-05, "loss": 1.3984, "step": 101500 }, { "epoch": 9.53, "learning_rate": 3.093279745770633e-05, "loss": 1.4098, "step": 102000 }, { "epoch": 9.58, "learning_rate": 3.083933077857744e-05, "loss": 1.4021, "step": 102500 }, { "epoch": 9.63, "learning_rate": 3.074586409944855e-05, "loss": 1.4041, "step": 103000 }, { "epoch": 9.67, "learning_rate": 3.065239742031966e-05, "loss": 1.3972, "step": 103500 }, { "epoch": 9.72, "learning_rate": 3.0558930741190765e-05, "loss": 1.3955, "step": 104000 }, { "epoch": 9.77, "learning_rate": 3.0465464062061877e-05, "loss": 1.4066, "step": 104500 }, { "epoch": 9.81, "learning_rate": 3.037199738293299e-05, "loss": 1.4019, "step": 105000 }, { "epoch": 9.86, "learning_rate": 3.0278530703804098e-05, "loss": 1.3893, "step": 105500 }, { "epoch": 9.91, "learning_rate": 3.0185064024675203e-05, "loss": 1.3995, "step": 106000 }, { "epoch": 9.95, "learning_rate": 3.0091597345546312e-05, "loss": 1.3958, "step": 106500 }, { "epoch": 10.0, "learning_rate": 2.9998130666417424e-05, "loss": 1.3839, "step": 107000 }, { "epoch": 10.05, "learning_rate": 2.9904663987288533e-05, "loss": 1.3878, "step": 107500 }, { "epoch": 10.09, "learning_rate": 2.9811197308159645e-05, "loss": 1.3848, "step": 108000 }, { "epoch": 10.14, "learning_rate": 2.9717730629030753e-05, "loss": 1.3804, "step": 108500 }, { "epoch": 10.19, "learning_rate": 2.962426394990186e-05, "loss": 1.3841, "step": 109000 }, { "epoch": 10.23, "learning_rate": 2.953079727077297e-05, "loss": 1.3878, "step": 109500 }, { "epoch": 10.28, "learning_rate": 2.943733059164408e-05, "loss": 1.3662, "step": 110000 }, { "epoch": 10.33, "learning_rate": 2.934386391251519e-05, "loss": 1.3775, "step": 110500 }, { "epoch": 10.37, "learning_rate": 2.92503972333863e-05, "loss": 1.3757, "step": 111000 }, { "epoch": 10.42, "learning_rate": 2.9156930554257412e-05, "loss": 1.3816, "step": 111500 }, { "epoch": 10.47, "learning_rate": 2.9063463875128514e-05, "loss": 1.3769, "step": 112000 }, { "epoch": 10.52, "learning_rate": 2.8969997195999626e-05, "loss": 1.3824, "step": 112500 }, { "epoch": 10.56, "learning_rate": 2.8876530516870735e-05, "loss": 1.3753, "step": 113000 }, { "epoch": 10.61, "learning_rate": 2.8783063837741847e-05, "loss": 1.3728, "step": 113500 }, { "epoch": 10.66, "learning_rate": 2.8689597158612956e-05, "loss": 1.3699, "step": 114000 }, { "epoch": 10.7, "learning_rate": 2.8596130479484068e-05, "loss": 1.3758, "step": 114500 }, { "epoch": 10.75, "learning_rate": 2.8502663800355173e-05, "loss": 1.3763, "step": 115000 }, { "epoch": 10.8, "learning_rate": 2.8409197121226282e-05, "loss": 1.3677, "step": 115500 }, { "epoch": 10.84, "learning_rate": 2.8315730442097394e-05, "loss": 1.3578, "step": 116000 }, { "epoch": 10.89, "learning_rate": 2.8222263762968503e-05, "loss": 1.3693, "step": 116500 }, { "epoch": 10.94, "learning_rate": 2.8128797083839615e-05, "loss": 1.3726, "step": 117000 }, { "epoch": 10.98, "learning_rate": 2.8035330404710724e-05, "loss": 1.3772, "step": 117500 }, { "epoch": 11.03, "learning_rate": 2.794186372558183e-05, "loss": 1.3472, "step": 118000 }, { "epoch": 11.08, "learning_rate": 2.784839704645294e-05, "loss": 1.3638, "step": 118500 }, { "epoch": 11.12, "learning_rate": 2.775493036732405e-05, "loss": 1.3556, "step": 119000 }, { "epoch": 11.17, "learning_rate": 2.7661463688195162e-05, "loss": 1.3598, "step": 119500 }, { "epoch": 11.22, "learning_rate": 2.756799700906627e-05, "loss": 1.344, "step": 120000 }, { "epoch": 11.26, "learning_rate": 2.7474530329937383e-05, "loss": 1.3532, "step": 120500 }, { "epoch": 11.31, "learning_rate": 2.7381063650808485e-05, "loss": 1.351, "step": 121000 }, { "epoch": 11.36, "learning_rate": 2.7287596971679597e-05, "loss": 1.3555, "step": 121500 }, { "epoch": 11.4, "learning_rate": 2.7194130292550706e-05, "loss": 1.361, "step": 122000 }, { "epoch": 11.45, "learning_rate": 2.7100663613421818e-05, "loss": 1.3472, "step": 122500 }, { "epoch": 11.5, "learning_rate": 2.7007196934292926e-05, "loss": 1.3462, "step": 123000 }, { "epoch": 11.54, "learning_rate": 2.691373025516404e-05, "loss": 1.3539, "step": 123500 }, { "epoch": 11.59, "learning_rate": 2.6820263576035144e-05, "loss": 1.3493, "step": 124000 }, { "epoch": 11.64, "learning_rate": 2.6726796896906253e-05, "loss": 1.3504, "step": 124500 }, { "epoch": 11.68, "learning_rate": 2.6633330217777365e-05, "loss": 1.3548, "step": 125000 }, { "epoch": 11.73, "learning_rate": 2.6539863538648473e-05, "loss": 1.3373, "step": 125500 }, { "epoch": 11.78, "learning_rate": 2.6446396859519586e-05, "loss": 1.3506, "step": 126000 }, { "epoch": 11.82, "learning_rate": 2.6352930180390694e-05, "loss": 1.3431, "step": 126500 }, { "epoch": 11.87, "learning_rate": 2.62594635012618e-05, "loss": 1.3458, "step": 127000 }, { "epoch": 11.92, "learning_rate": 2.616599682213291e-05, "loss": 1.345, "step": 127500 }, { "epoch": 11.96, "learning_rate": 2.607253014300402e-05, "loss": 1.3478, "step": 128000 }, { "epoch": 12.01, "learning_rate": 2.597906346387513e-05, "loss": 1.3453, "step": 128500 }, { "epoch": 12.06, "learning_rate": 2.588559678474624e-05, "loss": 1.3339, "step": 129000 }, { "epoch": 12.1, "learning_rate": 2.5792130105617347e-05, "loss": 1.3325, "step": 129500 }, { "epoch": 12.15, "learning_rate": 2.5698663426488455e-05, "loss": 1.339, "step": 130000 }, { "epoch": 12.2, "learning_rate": 2.5605196747359567e-05, "loss": 1.3329, "step": 130500 }, { "epoch": 12.24, "learning_rate": 2.5511730068230676e-05, "loss": 1.3341, "step": 131000 }, { "epoch": 12.29, "learning_rate": 2.5418263389101788e-05, "loss": 1.3396, "step": 131500 }, { "epoch": 12.34, "learning_rate": 2.5324796709972897e-05, "loss": 1.3341, "step": 132000 }, { "epoch": 12.38, "learning_rate": 2.5231330030844002e-05, "loss": 1.3358, "step": 132500 }, { "epoch": 12.43, "learning_rate": 2.5137863351715114e-05, "loss": 1.3294, "step": 133000 }, { "epoch": 12.48, "learning_rate": 2.5044396672586223e-05, "loss": 1.3339, "step": 133500 }, { "epoch": 12.52, "learning_rate": 2.4950929993457335e-05, "loss": 1.3338, "step": 134000 }, { "epoch": 12.57, "learning_rate": 2.4857463314328444e-05, "loss": 1.324, "step": 134500 }, { "epoch": 12.62, "learning_rate": 2.4763996635199553e-05, "loss": 1.3188, "step": 135000 }, { "epoch": 12.66, "learning_rate": 2.467052995607066e-05, "loss": 1.3244, "step": 135500 }, { "epoch": 12.71, "learning_rate": 2.4577063276941773e-05, "loss": 1.3296, "step": 136000 }, { "epoch": 12.76, "learning_rate": 2.448359659781288e-05, "loss": 1.3148, "step": 136500 }, { "epoch": 12.8, "learning_rate": 2.439012991868399e-05, "loss": 1.3261, "step": 137000 }, { "epoch": 12.85, "learning_rate": 2.42966632395551e-05, "loss": 1.3166, "step": 137500 }, { "epoch": 12.9, "learning_rate": 2.420319656042621e-05, "loss": 1.3137, "step": 138000 }, { "epoch": 12.95, "learning_rate": 2.410972988129732e-05, "loss": 1.3217, "step": 138500 }, { "epoch": 12.99, "learning_rate": 2.401626320216843e-05, "loss": 1.3341, "step": 139000 }, { "epoch": 13.04, "learning_rate": 2.3922796523039538e-05, "loss": 1.3248, "step": 139500 }, { "epoch": 13.09, "learning_rate": 2.3829329843910647e-05, "loss": 1.3087, "step": 140000 }, { "epoch": 13.13, "learning_rate": 2.373586316478176e-05, "loss": 1.3049, "step": 140500 }, { "epoch": 13.18, "learning_rate": 2.3642396485652864e-05, "loss": 1.3074, "step": 141000 }, { "epoch": 13.23, "learning_rate": 2.3548929806523976e-05, "loss": 1.3133, "step": 141500 }, { "epoch": 13.27, "learning_rate": 2.3455463127395085e-05, "loss": 1.3221, "step": 142000 }, { "epoch": 13.32, "learning_rate": 2.3361996448266194e-05, "loss": 1.3113, "step": 142500 }, { "epoch": 13.37, "learning_rate": 2.3268529769137302e-05, "loss": 1.3138, "step": 143000 }, { "epoch": 13.41, "learning_rate": 2.3175063090008414e-05, "loss": 1.3091, "step": 143500 }, { "epoch": 13.46, "learning_rate": 2.3081596410879523e-05, "loss": 1.3132, "step": 144000 }, { "epoch": 13.51, "learning_rate": 2.2988129731750632e-05, "loss": 1.3095, "step": 144500 }, { "epoch": 13.55, "learning_rate": 2.2894663052621744e-05, "loss": 1.3046, "step": 145000 }, { "epoch": 13.6, "learning_rate": 2.280119637349285e-05, "loss": 1.3136, "step": 145500 }, { "epoch": 13.65, "learning_rate": 2.270772969436396e-05, "loss": 1.3067, "step": 146000 }, { "epoch": 13.69, "learning_rate": 2.261426301523507e-05, "loss": 1.3025, "step": 146500 }, { "epoch": 13.74, "learning_rate": 2.252079633610618e-05, "loss": 1.3085, "step": 147000 }, { "epoch": 13.79, "learning_rate": 2.2427329656977288e-05, "loss": 1.2976, "step": 147500 }, { "epoch": 13.83, "learning_rate": 2.23338629778484e-05, "loss": 1.3007, "step": 148000 }, { "epoch": 13.88, "learning_rate": 2.224039629871951e-05, "loss": 1.3138, "step": 148500 }, { "epoch": 13.93, "learning_rate": 2.2146929619590617e-05, "loss": 1.3143, "step": 149000 }, { "epoch": 13.97, "learning_rate": 2.205346294046173e-05, "loss": 1.3029, "step": 149500 }, { "epoch": 14.02, "learning_rate": 2.1959996261332835e-05, "loss": 1.2919, "step": 150000 }, { "epoch": 14.07, "learning_rate": 2.1866529582203947e-05, "loss": 1.2982, "step": 150500 }, { "epoch": 14.11, "learning_rate": 2.1773062903075055e-05, "loss": 1.3012, "step": 151000 }, { "epoch": 14.16, "learning_rate": 2.1679596223946164e-05, "loss": 1.2841, "step": 151500 }, { "epoch": 14.21, "learning_rate": 2.1586129544817273e-05, "loss": 1.3044, "step": 152000 }, { "epoch": 14.25, "learning_rate": 2.1492662865688385e-05, "loss": 1.2973, "step": 152500 }, { "epoch": 14.3, "learning_rate": 2.1399196186559494e-05, "loss": 1.2884, "step": 153000 }, { "epoch": 14.35, "learning_rate": 2.1305729507430602e-05, "loss": 1.2883, "step": 153500 }, { "epoch": 14.39, "learning_rate": 2.1212262828301714e-05, "loss": 1.2993, "step": 154000 }, { "epoch": 14.44, "learning_rate": 2.111879614917282e-05, "loss": 1.2919, "step": 154500 }, { "epoch": 14.49, "learning_rate": 2.1025329470043932e-05, "loss": 1.3026, "step": 155000 }, { "epoch": 14.53, "learning_rate": 2.093186279091504e-05, "loss": 1.2882, "step": 155500 }, { "epoch": 14.58, "learning_rate": 2.083839611178615e-05, "loss": 1.289, "step": 156000 }, { "epoch": 14.63, "learning_rate": 2.0744929432657258e-05, "loss": 1.2917, "step": 156500 }, { "epoch": 14.67, "learning_rate": 2.065146275352837e-05, "loss": 1.2897, "step": 157000 }, { "epoch": 14.72, "learning_rate": 2.0557996074399475e-05, "loss": 1.2859, "step": 157500 }, { "epoch": 14.77, "learning_rate": 2.0464529395270588e-05, "loss": 1.2924, "step": 158000 }, { "epoch": 14.81, "learning_rate": 2.0371062716141696e-05, "loss": 1.2873, "step": 158500 }, { "epoch": 14.86, "learning_rate": 2.0277596037012805e-05, "loss": 1.29, "step": 159000 }, { "epoch": 14.91, "learning_rate": 2.0184129357883917e-05, "loss": 1.2848, "step": 159500 }, { "epoch": 14.95, "learning_rate": 2.0090662678755026e-05, "loss": 1.2831, "step": 160000 }, { "epoch": 15.0, "learning_rate": 1.9997195999626135e-05, "loss": 1.2841, "step": 160500 }, { "epoch": 15.05, "learning_rate": 1.9903729320497243e-05, "loss": 1.2716, "step": 161000 }, { "epoch": 15.09, "learning_rate": 1.9810262641368352e-05, "loss": 1.2795, "step": 161500 }, { "epoch": 15.14, "learning_rate": 1.971679596223946e-05, "loss": 1.2836, "step": 162000 }, { "epoch": 15.19, "learning_rate": 1.9623329283110573e-05, "loss": 1.2854, "step": 162500 }, { "epoch": 15.24, "learning_rate": 1.952986260398168e-05, "loss": 1.2819, "step": 163000 }, { "epoch": 15.28, "learning_rate": 1.943639592485279e-05, "loss": 1.2762, "step": 163500 }, { "epoch": 15.33, "learning_rate": 1.9342929245723902e-05, "loss": 1.2638, "step": 164000 }, { "epoch": 15.38, "learning_rate": 1.9249462566595008e-05, "loss": 1.269, "step": 164500 }, { "epoch": 15.42, "learning_rate": 1.915599588746612e-05, "loss": 1.2691, "step": 165000 }, { "epoch": 15.47, "learning_rate": 1.906252920833723e-05, "loss": 1.2802, "step": 165500 }, { "epoch": 15.52, "learning_rate": 1.8969062529208337e-05, "loss": 1.275, "step": 166000 }, { "epoch": 15.56, "learning_rate": 1.8875595850079446e-05, "loss": 1.278, "step": 166500 }, { "epoch": 15.61, "learning_rate": 1.8782129170950558e-05, "loss": 1.2768, "step": 167000 }, { "epoch": 15.66, "learning_rate": 1.8688662491821667e-05, "loss": 1.2761, "step": 167500 }, { "epoch": 15.7, "learning_rate": 1.8595195812692775e-05, "loss": 1.271, "step": 168000 }, { "epoch": 15.75, "learning_rate": 1.8501729133563888e-05, "loss": 1.2687, "step": 168500 }, { "epoch": 15.8, "learning_rate": 1.8408262454434993e-05, "loss": 1.2644, "step": 169000 }, { "epoch": 15.84, "learning_rate": 1.8314795775306105e-05, "loss": 1.2732, "step": 169500 }, { "epoch": 15.89, "learning_rate": 1.8221329096177214e-05, "loss": 1.2742, "step": 170000 }, { "epoch": 15.94, "learning_rate": 1.8127862417048322e-05, "loss": 1.266, "step": 170500 }, { "epoch": 15.98, "learning_rate": 1.803439573791943e-05, "loss": 1.27, "step": 171000 }, { "epoch": 16.03, "learning_rate": 1.7940929058790543e-05, "loss": 1.2682, "step": 171500 }, { "epoch": 16.08, "learning_rate": 1.784746237966165e-05, "loss": 1.2584, "step": 172000 }, { "epoch": 16.12, "learning_rate": 1.775399570053276e-05, "loss": 1.2702, "step": 172500 }, { "epoch": 16.17, "learning_rate": 1.766052902140387e-05, "loss": 1.2602, "step": 173000 }, { "epoch": 16.22, "learning_rate": 1.7567062342274978e-05, "loss": 1.2595, "step": 173500 }, { "epoch": 16.26, "learning_rate": 1.747359566314609e-05, "loss": 1.261, "step": 174000 }, { "epoch": 16.31, "learning_rate": 1.73801289840172e-05, "loss": 1.2556, "step": 174500 }, { "epoch": 16.36, "learning_rate": 1.7286662304888308e-05, "loss": 1.2722, "step": 175000 }, { "epoch": 16.4, "learning_rate": 1.7193195625759416e-05, "loss": 1.2553, "step": 175500 }, { "epoch": 16.45, "learning_rate": 1.709972894663053e-05, "loss": 1.2577, "step": 176000 }, { "epoch": 16.5, "learning_rate": 1.7006262267501634e-05, "loss": 1.2607, "step": 176500 }, { "epoch": 16.54, "learning_rate": 1.6912795588372746e-05, "loss": 1.2646, "step": 177000 }, { "epoch": 16.59, "learning_rate": 1.6819328909243855e-05, "loss": 1.267, "step": 177500 }, { "epoch": 16.64, "learning_rate": 1.6725862230114963e-05, "loss": 1.2596, "step": 178000 }, { "epoch": 16.68, "learning_rate": 1.6632395550986075e-05, "loss": 1.2553, "step": 178500 }, { "epoch": 16.73, "learning_rate": 1.6538928871857184e-05, "loss": 1.2538, "step": 179000 }, { "epoch": 16.78, "learning_rate": 1.6445462192728293e-05, "loss": 1.2626, "step": 179500 }, { "epoch": 16.82, "learning_rate": 1.63519955135994e-05, "loss": 1.2551, "step": 180000 }, { "epoch": 16.87, "learning_rate": 1.6258528834470514e-05, "loss": 1.2569, "step": 180500 }, { "epoch": 16.92, "learning_rate": 1.616506215534162e-05, "loss": 1.2535, "step": 181000 }, { "epoch": 16.96, "learning_rate": 1.607159547621273e-05, "loss": 1.2591, "step": 181500 }, { "epoch": 17.01, "learning_rate": 1.597812879708384e-05, "loss": 1.2508, "step": 182000 }, { "epoch": 17.06, "learning_rate": 1.588466211795495e-05, "loss": 1.2517, "step": 182500 }, { "epoch": 17.1, "learning_rate": 1.579119543882606e-05, "loss": 1.2546, "step": 183000 }, { "epoch": 17.15, "learning_rate": 1.569772875969717e-05, "loss": 1.241, "step": 183500 }, { "epoch": 17.2, "learning_rate": 1.5604262080568278e-05, "loss": 1.2421, "step": 184000 }, { "epoch": 17.24, "learning_rate": 1.5510795401439387e-05, "loss": 1.243, "step": 184500 }, { "epoch": 17.29, "learning_rate": 1.54173287223105e-05, "loss": 1.2459, "step": 185000 }, { "epoch": 17.34, "learning_rate": 1.5323862043181604e-05, "loss": 1.2497, "step": 185500 }, { "epoch": 17.38, "learning_rate": 1.5230395364052716e-05, "loss": 1.2433, "step": 186000 }, { "epoch": 17.43, "learning_rate": 1.5136928684923827e-05, "loss": 1.2497, "step": 186500 }, { "epoch": 17.48, "learning_rate": 1.5043462005794934e-05, "loss": 1.2416, "step": 187000 }, { "epoch": 17.53, "learning_rate": 1.4949995326666044e-05, "loss": 1.2457, "step": 187500 }, { "epoch": 17.57, "learning_rate": 1.4856528647537155e-05, "loss": 1.2516, "step": 188000 }, { "epoch": 17.62, "learning_rate": 1.4763061968408262e-05, "loss": 1.2492, "step": 188500 }, { "epoch": 17.67, "learning_rate": 1.4669595289279372e-05, "loss": 1.2462, "step": 189000 }, { "epoch": 17.71, "learning_rate": 1.4576128610150482e-05, "loss": 1.2485, "step": 189500 }, { "epoch": 17.76, "learning_rate": 1.4482661931021591e-05, "loss": 1.2398, "step": 190000 }, { "epoch": 17.81, "learning_rate": 1.4389195251892702e-05, "loss": 1.2464, "step": 190500 }, { "epoch": 17.85, "learning_rate": 1.4295728572763812e-05, "loss": 1.2444, "step": 191000 }, { "epoch": 17.9, "learning_rate": 1.4202261893634919e-05, "loss": 1.2464, "step": 191500 }, { "epoch": 17.95, "learning_rate": 1.410879521450603e-05, "loss": 1.2507, "step": 192000 }, { "epoch": 17.99, "learning_rate": 1.401532853537714e-05, "loss": 1.2374, "step": 192500 }, { "epoch": 18.04, "learning_rate": 1.3921861856248247e-05, "loss": 1.2439, "step": 193000 }, { "epoch": 18.09, "learning_rate": 1.3828395177119357e-05, "loss": 1.2455, "step": 193500 }, { "epoch": 18.13, "learning_rate": 1.3734928497990468e-05, "loss": 1.2368, "step": 194000 }, { "epoch": 18.18, "learning_rate": 1.3641461818861575e-05, "loss": 1.2434, "step": 194500 }, { "epoch": 18.23, "learning_rate": 1.3547995139732685e-05, "loss": 1.2292, "step": 195000 }, { "epoch": 18.27, "learning_rate": 1.3454528460603796e-05, "loss": 1.2316, "step": 195500 }, { "epoch": 18.32, "learning_rate": 1.3361061781474904e-05, "loss": 1.2312, "step": 196000 }, { "epoch": 18.37, "learning_rate": 1.3267595102346015e-05, "loss": 1.229, "step": 196500 }, { "epoch": 18.41, "learning_rate": 1.3174128423217125e-05, "loss": 1.2375, "step": 197000 }, { "epoch": 18.46, "learning_rate": 1.3080661744088232e-05, "loss": 1.2346, "step": 197500 }, { "epoch": 18.51, "learning_rate": 1.2987195064959343e-05, "loss": 1.2334, "step": 198000 }, { "epoch": 18.55, "learning_rate": 1.2893728385830453e-05, "loss": 1.2317, "step": 198500 }, { "epoch": 18.6, "learning_rate": 1.280026170670156e-05, "loss": 1.2342, "step": 199000 }, { "epoch": 18.65, "learning_rate": 1.270679502757267e-05, "loss": 1.2327, "step": 199500 }, { "epoch": 18.69, "learning_rate": 1.261332834844378e-05, "loss": 1.2303, "step": 200000 }, { "epoch": 18.74, "learning_rate": 1.251986166931489e-05, "loss": 1.2406, "step": 200500 }, { "epoch": 18.79, "learning_rate": 1.2426394990186e-05, "loss": 1.2306, "step": 201000 }, { "epoch": 18.83, "learning_rate": 1.2332928311057109e-05, "loss": 1.2289, "step": 201500 }, { "epoch": 18.88, "learning_rate": 1.2239461631928219e-05, "loss": 1.231, "step": 202000 }, { "epoch": 18.93, "learning_rate": 1.2145994952799328e-05, "loss": 1.227, "step": 202500 }, { "epoch": 18.97, "learning_rate": 1.2052528273670437e-05, "loss": 1.2329, "step": 203000 }, { "epoch": 19.02, "learning_rate": 1.1959061594541547e-05, "loss": 1.2278, "step": 203500 }, { "epoch": 19.07, "learning_rate": 1.1865594915412656e-05, "loss": 1.2342, "step": 204000 }, { "epoch": 19.11, "learning_rate": 1.1772128236283764e-05, "loss": 1.2174, "step": 204500 }, { "epoch": 19.16, "learning_rate": 1.1678661557154875e-05, "loss": 1.2299, "step": 205000 }, { "epoch": 19.21, "learning_rate": 1.1585194878025985e-05, "loss": 1.2276, "step": 205500 }, { "epoch": 19.25, "learning_rate": 1.1491728198897094e-05, "loss": 1.2266, "step": 206000 }, { "epoch": 19.3, "learning_rate": 1.1398261519768204e-05, "loss": 1.2229, "step": 206500 }, { "epoch": 19.35, "learning_rate": 1.1304794840639313e-05, "loss": 1.2258, "step": 207000 }, { "epoch": 19.39, "learning_rate": 1.1211328161510422e-05, "loss": 1.2275, "step": 207500 }, { "epoch": 19.44, "learning_rate": 1.1117861482381532e-05, "loss": 1.2148, "step": 208000 }, { "epoch": 19.49, "learning_rate": 1.1024394803252641e-05, "loss": 1.2229, "step": 208500 }, { "epoch": 19.53, "learning_rate": 1.093092812412375e-05, "loss": 1.2218, "step": 209000 }, { "epoch": 19.58, "learning_rate": 1.083746144499486e-05, "loss": 1.2114, "step": 209500 }, { "epoch": 19.63, "learning_rate": 1.0743994765865969e-05, "loss": 1.2243, "step": 210000 }, { "epoch": 19.67, "learning_rate": 1.0650528086737079e-05, "loss": 1.2211, "step": 210500 }, { "epoch": 19.72, "learning_rate": 1.0557061407608188e-05, "loss": 1.2223, "step": 211000 }, { "epoch": 19.77, "learning_rate": 1.0463594728479298e-05, "loss": 1.2263, "step": 211500 }, { "epoch": 19.81, "learning_rate": 1.0370128049350407e-05, "loss": 1.2184, "step": 212000 }, { "epoch": 19.86, "learning_rate": 1.0276661370221516e-05, "loss": 1.2193, "step": 212500 }, { "epoch": 19.91, "learning_rate": 1.0183194691092626e-05, "loss": 1.2147, "step": 213000 }, { "epoch": 19.96, "learning_rate": 1.0089728011963735e-05, "loss": 1.2179, "step": 213500 }, { "epoch": 20.0, "learning_rate": 9.996261332834844e-06, "loss": 1.2093, "step": 214000 }, { "epoch": 20.05, "learning_rate": 9.902794653705954e-06, "loss": 1.2069, "step": 214500 }, { "epoch": 20.1, "learning_rate": 9.809327974577064e-06, "loss": 1.2147, "step": 215000 }, { "epoch": 20.14, "learning_rate": 9.715861295448173e-06, "loss": 1.2125, "step": 215500 }, { "epoch": 20.19, "learning_rate": 9.622394616319283e-06, "loss": 1.2221, "step": 216000 }, { "epoch": 20.24, "learning_rate": 9.528927937190392e-06, "loss": 1.2056, "step": 216500 }, { "epoch": 20.28, "learning_rate": 9.435461258061501e-06, "loss": 1.211, "step": 217000 }, { "epoch": 20.33, "learning_rate": 9.341994578932611e-06, "loss": 1.2146, "step": 217500 }, { "epoch": 20.38, "learning_rate": 9.24852789980372e-06, "loss": 1.2152, "step": 218000 }, { "epoch": 20.42, "learning_rate": 9.155061220674829e-06, "loss": 1.2146, "step": 218500 }, { "epoch": 20.47, "learning_rate": 9.06159454154594e-06, "loss": 1.2256, "step": 219000 }, { "epoch": 20.52, "learning_rate": 8.968127862417048e-06, "loss": 1.2058, "step": 219500 }, { "epoch": 20.56, "learning_rate": 8.874661183288158e-06, "loss": 1.2128, "step": 220000 }, { "epoch": 20.61, "learning_rate": 8.781194504159269e-06, "loss": 1.2137, "step": 220500 }, { "epoch": 20.66, "learning_rate": 8.687727825030377e-06, "loss": 1.2129, "step": 221000 }, { "epoch": 20.7, "learning_rate": 8.594261145901486e-06, "loss": 1.2181, "step": 221500 }, { "epoch": 20.75, "learning_rate": 8.500794466772597e-06, "loss": 1.201, "step": 222000 }, { "epoch": 20.8, "learning_rate": 8.407327787643705e-06, "loss": 1.2162, "step": 222500 }, { "epoch": 20.84, "learning_rate": 8.313861108514814e-06, "loss": 1.2077, "step": 223000 }, { "epoch": 20.89, "learning_rate": 8.220394429385924e-06, "loss": 1.2098, "step": 223500 }, { "epoch": 20.94, "learning_rate": 8.126927750257033e-06, "loss": 1.2092, "step": 224000 }, { "epoch": 20.98, "learning_rate": 8.033461071128144e-06, "loss": 1.2132, "step": 224500 }, { "epoch": 21.03, "learning_rate": 7.939994391999252e-06, "loss": 1.2166, "step": 225000 }, { "epoch": 21.08, "learning_rate": 7.846527712870363e-06, "loss": 1.2063, "step": 225500 }, { "epoch": 21.12, "learning_rate": 7.753061033741471e-06, "loss": 1.2029, "step": 226000 }, { "epoch": 21.17, "learning_rate": 7.659594354612582e-06, "loss": 1.201, "step": 226500 }, { "epoch": 21.22, "learning_rate": 7.5661276754836905e-06, "loss": 1.2006, "step": 227000 }, { "epoch": 21.26, "learning_rate": 7.472660996354799e-06, "loss": 1.2028, "step": 227500 }, { "epoch": 21.31, "learning_rate": 7.37919431722591e-06, "loss": 1.1959, "step": 228000 }, { "epoch": 21.36, "learning_rate": 7.285727638097019e-06, "loss": 1.2126, "step": 228500 }, { "epoch": 21.4, "learning_rate": 7.192260958968128e-06, "loss": 1.208, "step": 229000 }, { "epoch": 21.45, "learning_rate": 7.098794279839238e-06, "loss": 1.2026, "step": 229500 }, { "epoch": 21.5, "learning_rate": 7.005327600710347e-06, "loss": 1.2069, "step": 230000 }, { "epoch": 21.54, "learning_rate": 6.911860921581456e-06, "loss": 1.2042, "step": 230500 }, { "epoch": 21.59, "learning_rate": 6.818394242452566e-06, "loss": 1.2057, "step": 231000 }, { "epoch": 21.64, "learning_rate": 6.724927563323676e-06, "loss": 1.212, "step": 231500 }, { "epoch": 21.68, "learning_rate": 6.6314608841947845e-06, "loss": 1.2012, "step": 232000 }, { "epoch": 21.73, "learning_rate": 6.537994205065895e-06, "loss": 1.2066, "step": 232500 }, { "epoch": 21.78, "learning_rate": 6.444527525937004e-06, "loss": 1.2042, "step": 233000 }, { "epoch": 21.82, "learning_rate": 6.351060846808113e-06, "loss": 1.2101, "step": 233500 }, { "epoch": 21.87, "learning_rate": 6.257594167679224e-06, "loss": 1.1961, "step": 234000 }, { "epoch": 21.92, "learning_rate": 6.164127488550332e-06, "loss": 1.1994, "step": 234500 }, { "epoch": 21.96, "learning_rate": 6.070660809421441e-06, "loss": 1.2034, "step": 235000 }, { "epoch": 22.01, "learning_rate": 5.977194130292551e-06, "loss": 1.1977, "step": 235500 }, { "epoch": 22.06, "learning_rate": 5.883727451163661e-06, "loss": 1.1961, "step": 236000 }, { "epoch": 22.1, "learning_rate": 5.79026077203477e-06, "loss": 1.1929, "step": 236500 }, { "epoch": 22.15, "learning_rate": 5.696794092905879e-06, "loss": 1.1968, "step": 237000 }, { "epoch": 22.2, "learning_rate": 5.603327413776989e-06, "loss": 1.1946, "step": 237500 }, { "epoch": 22.25, "learning_rate": 5.5098607346480976e-06, "loss": 1.1971, "step": 238000 }, { "epoch": 22.29, "learning_rate": 5.416394055519208e-06, "loss": 1.2018, "step": 238500 }, { "epoch": 22.34, "learning_rate": 5.3229273763903175e-06, "loss": 1.1986, "step": 239000 }, { "epoch": 22.39, "learning_rate": 5.229460697261426e-06, "loss": 1.1877, "step": 239500 }, { "epoch": 22.43, "learning_rate": 5.135994018132536e-06, "loss": 1.1955, "step": 240000 }, { "epoch": 22.48, "learning_rate": 5.042527339003645e-06, "loss": 1.1989, "step": 240500 }, { "epoch": 22.53, "learning_rate": 4.949060659874755e-06, "loss": 1.1888, "step": 241000 }, { "epoch": 22.57, "learning_rate": 4.8555939807458645e-06, "loss": 1.2003, "step": 241500 }, { "epoch": 22.62, "learning_rate": 4.762127301616974e-06, "loss": 1.1945, "step": 242000 }, { "epoch": 22.67, "learning_rate": 4.668660622488083e-06, "loss": 1.1833, "step": 242500 }, { "epoch": 22.71, "learning_rate": 4.575193943359192e-06, "loss": 1.1866, "step": 243000 }, { "epoch": 22.76, "learning_rate": 4.481727264230303e-06, "loss": 1.1924, "step": 243500 }, { "epoch": 22.81, "learning_rate": 4.3882605851014115e-06, "loss": 1.1924, "step": 244000 }, { "epoch": 22.85, "learning_rate": 4.294793905972521e-06, "loss": 1.1894, "step": 244500 }, { "epoch": 22.9, "learning_rate": 4.201327226843631e-06, "loss": 1.2008, "step": 245000 }, { "epoch": 22.95, "learning_rate": 4.107860547714739e-06, "loss": 1.1959, "step": 245500 }, { "epoch": 22.99, "learning_rate": 4.01439386858585e-06, "loss": 1.1996, "step": 246000 }, { "epoch": 23.04, "learning_rate": 3.920927189456959e-06, "loss": 1.1928, "step": 246500 }, { "epoch": 23.09, "learning_rate": 3.827460510328068e-06, "loss": 1.195, "step": 247000 }, { "epoch": 23.13, "learning_rate": 3.7339938311991776e-06, "loss": 1.1873, "step": 247500 }, { "epoch": 23.18, "learning_rate": 3.6405271520702876e-06, "loss": 1.1977, "step": 248000 }, { "epoch": 23.23, "learning_rate": 3.5470604729413963e-06, "loss": 1.1909, "step": 248500 }, { "epoch": 23.27, "learning_rate": 3.4535937938125063e-06, "loss": 1.1881, "step": 249000 }, { "epoch": 23.32, "learning_rate": 3.360127114683616e-06, "loss": 1.1914, "step": 249500 }, { "epoch": 23.37, "learning_rate": 3.266660435554725e-06, "loss": 1.1813, "step": 250000 }, { "epoch": 23.41, "learning_rate": 3.1731937564258346e-06, "loss": 1.2004, "step": 250500 }, { "epoch": 23.46, "learning_rate": 3.0797270772969437e-06, "loss": 1.1867, "step": 251000 }, { "epoch": 23.51, "learning_rate": 2.9862603981680533e-06, "loss": 1.1933, "step": 251500 }, { "epoch": 23.55, "learning_rate": 2.892793719039163e-06, "loss": 1.1834, "step": 252000 }, { "epoch": 23.6, "learning_rate": 2.799327039910272e-06, "loss": 1.18, "step": 252500 }, { "epoch": 23.65, "learning_rate": 2.7058603607813815e-06, "loss": 1.1923, "step": 253000 }, { "epoch": 23.69, "learning_rate": 2.612393681652491e-06, "loss": 1.1912, "step": 253500 }, { "epoch": 23.74, "learning_rate": 2.5189270025236007e-06, "loss": 1.1888, "step": 254000 }, { "epoch": 23.79, "learning_rate": 2.42546032339471e-06, "loss": 1.1843, "step": 254500 }, { "epoch": 23.83, "learning_rate": 2.3319936442658194e-06, "loss": 1.1911, "step": 255000 }, { "epoch": 23.88, "learning_rate": 2.238526965136929e-06, "loss": 1.1943, "step": 255500 }, { "epoch": 23.93, "learning_rate": 2.145060286008038e-06, "loss": 1.1811, "step": 256000 }, { "epoch": 23.97, "learning_rate": 2.0515936068791476e-06, "loss": 1.1904, "step": 256500 }, { "epoch": 24.02, "learning_rate": 1.958126927750257e-06, "loss": 1.1752, "step": 257000 }, { "epoch": 24.07, "learning_rate": 1.8646602486213666e-06, "loss": 1.1851, "step": 257500 }, { "epoch": 24.11, "learning_rate": 1.771193569492476e-06, "loss": 1.1874, "step": 258000 }, { "epoch": 24.16, "learning_rate": 1.6777268903635857e-06, "loss": 1.1904, "step": 258500 }, { "epoch": 24.21, "learning_rate": 1.5842602112346948e-06, "loss": 1.188, "step": 259000 }, { "epoch": 24.25, "learning_rate": 1.4907935321058044e-06, "loss": 1.1808, "step": 259500 }, { "epoch": 24.3, "learning_rate": 1.3973268529769137e-06, "loss": 1.1855, "step": 260000 }, { "epoch": 24.35, "learning_rate": 1.3038601738480233e-06, "loss": 1.1856, "step": 260500 }, { "epoch": 24.39, "learning_rate": 1.2103934947191327e-06, "loss": 1.1943, "step": 261000 }, { "epoch": 24.44, "learning_rate": 1.1169268155902422e-06, "loss": 1.19, "step": 261500 }, { "epoch": 24.49, "learning_rate": 1.0234601364613516e-06, "loss": 1.1819, "step": 262000 }, { "epoch": 24.54, "learning_rate": 9.299934573324609e-07, "loss": 1.1836, "step": 262500 }, { "epoch": 24.58, "learning_rate": 8.365267782035705e-07, "loss": 1.182, "step": 263000 }, { "epoch": 24.63, "learning_rate": 7.4306009907468e-07, "loss": 1.1844, "step": 263500 }, { "epoch": 24.68, "learning_rate": 6.495934199457893e-07, "loss": 1.1928, "step": 264000 }, { "epoch": 24.72, "learning_rate": 5.561267408168989e-07, "loss": 1.1838, "step": 264500 }, { "epoch": 24.77, "learning_rate": 4.626600616880083e-07, "loss": 1.1849, "step": 265000 }, { "epoch": 24.82, "learning_rate": 3.691933825591177e-07, "loss": 1.1851, "step": 265500 }, { "epoch": 24.86, "learning_rate": 2.7572670343022714e-07, "loss": 1.1919, "step": 266000 }, { "epoch": 24.91, "learning_rate": 1.8226002430133658e-07, "loss": 1.1752, "step": 266500 }, { "epoch": 24.96, "learning_rate": 8.879334517244602e-08, "loss": 1.1807, "step": 267000 } ], "logging_steps": 500, "max_steps": 267475, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 500, "total_flos": 1.1243414742110208e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }