diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,94240 +1,51960 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 4.6764159947816, - "global_step": 7800000, + "epoch": 2.5780242022513944, + "global_step": 4300000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "learning_rate": 2.7664e-06, - "loss": 5.2774, + "learning_rate": 2.7608e-06, + "loss": 3.1459, "step": 500 }, { "epoch": 0.0, - "learning_rate": 5.5664e-06, - "loss": 2.8368, + "learning_rate": 5.5608e-06, + "loss": 2.2923, "step": 1000 }, { "epoch": 0.0, - "learning_rate": 8.3664e-06, - "loss": 2.7102, + "learning_rate": 8.3608e-06, + "loss": 2.1518, "step": 1500 }, { "epoch": 0.0, - "learning_rate": 1.1166399999999999e-05, - "loss": 2.563, + "learning_rate": 1.11608e-05, + "loss": 2.0375, "step": 2000 }, { "epoch": 0.0, - "learning_rate": 1.39664e-05, - "loss": 2.4577, + "learning_rate": 1.39552e-05, + "loss": 1.9593, "step": 2500 }, { "epoch": 0.0, - "learning_rate": 1.67664e-05, - "loss": 2.3862, + "learning_rate": 1.6755200000000002e-05, + "loss": 1.8917, "step": 3000 }, { "epoch": 0.0, - "learning_rate": 1.95664e-05, - "loss": 2.3317, + "learning_rate": 1.95552e-05, + "loss": 1.8308, "step": 3500 }, { "epoch": 0.0, - "learning_rate": 2.23664e-05, - "loss": 2.2799, + "learning_rate": 2.23552e-05, + "loss": 1.771, "step": 4000 }, { "epoch": 0.0, - "learning_rate": 2.51664e-05, - "loss": 2.2089, + "learning_rate": 2.51552e-05, + "loss": 1.7222, "step": 4500 }, { "epoch": 0.0, - "learning_rate": 2.79664e-05, - "loss": 2.1503, + "learning_rate": 2.79552e-05, + "loss": 1.6701, "step": 5000 }, { "epoch": 0.0, - "learning_rate": 3.07664e-05, - "loss": 2.1188, + "learning_rate": 3.07552e-05, + "loss": 1.6394, "step": 5500 }, { "epoch": 0.0, - "learning_rate": 3.3566400000000004e-05, - "loss": 2.0651, + "learning_rate": 3.35552e-05, + "loss": 1.6025, "step": 6000 }, { "epoch": 0.0, - "learning_rate": 3.63664e-05, - "loss": 2.01, + "learning_rate": 3.63552e-05, + "loss": 1.558, "step": 6500 }, { "epoch": 0.0, - "learning_rate": 3.91664e-05, - "loss": 1.952, + "learning_rate": 3.91552e-05, + "loss": 1.5174, "step": 7000 }, { "epoch": 0.0, - "learning_rate": 4.19664e-05, - "loss": 1.8711, + "learning_rate": 4.19552e-05, + "loss": 1.4765, "step": 7500 }, { "epoch": 0.0, - "learning_rate": 4.47664e-05, - "loss": 1.8241, + "learning_rate": 4.47552e-05, + "loss": 1.4487, "step": 8000 }, { "epoch": 0.01, - "learning_rate": 4.75664e-05, - "loss": 1.7604, + "learning_rate": 4.7555199999999995e-05, + "loss": 1.407, "step": 8500 }, { "epoch": 0.01, - "learning_rate": 5.03664e-05, - "loss": 1.7357, + "learning_rate": 5.03552e-05, + "loss": 1.3904, "step": 9000 }, { "epoch": 0.01, - "learning_rate": 5.31664e-05, - "loss": 1.6838, + "learning_rate": 5.31552e-05, + "loss": 1.3567, "step": 9500 }, { "epoch": 0.01, - "learning_rate": 5.59664e-05, - "loss": 1.6501, + "learning_rate": 5.5955199999999996e-05, + "loss": 1.3242, "step": 10000 }, { "epoch": 0.01, - "learning_rate": 5.599792523402617e-05, - "loss": 1.5994, + "learning_rate": 5.59979336338884e-05, + "loss": 1.2825, "step": 10500 }, { "epoch": 0.01, - "learning_rate": 5.5995825268465594e-05, - "loss": 1.5364, + "learning_rate": 5.5995833668327834e-05, + "loss": 1.2554, "step": 11000 }, { "epoch": 0.01, - "learning_rate": 5.599372530290503e-05, - "loss": 1.5108, + "learning_rate": 5.5993733702767274e-05, + "loss": 1.2237, "step": 11500 }, { "epoch": 0.01, - "learning_rate": 5.599162533734447e-05, - "loss": 1.4591, + "learning_rate": 5.599163373720671e-05, + "loss": 1.191, "step": 12000 }, { "epoch": 0.01, - "learning_rate": 5.59895253717839e-05, - "loss": 1.4329, + "learning_rate": 5.598953377164614e-05, + "loss": 1.1583, "step": 12500 }, { "epoch": 0.01, - "learning_rate": 5.5987425406223334e-05, - "loss": 1.3767, + "learning_rate": 5.598744220594782e-05, + "loss": 1.1338, "step": 13000 }, { "epoch": 0.01, - "learning_rate": 5.5985325440662774e-05, - "loss": 1.3685, + "learning_rate": 5.598534224038726e-05, + "loss": 1.1141, "step": 13500 }, { "epoch": 0.01, - "learning_rate": 5.598322967503333e-05, - "loss": 1.3409, + "learning_rate": 5.5983242274826695e-05, + "loss": 1.0981, "step": 14000 }, { "epoch": 0.01, - "learning_rate": 5.598112970947276e-05, - "loss": 1.2896, + "learning_rate": 5.598114230926613e-05, + "loss": 1.0626, "step": 14500 }, { "epoch": 0.01, - "learning_rate": 5.5979029743912195e-05, - "loss": 1.2794, + "learning_rate": 5.597904234370556e-05, + "loss": 1.0491, "step": 15000 }, { "epoch": 0.01, - "learning_rate": 5.5976929778351635e-05, - "loss": 1.267, + "learning_rate": 5.5976942378144995e-05, + "loss": 1.0267, "step": 15500 }, { "epoch": 0.01, - "learning_rate": 5.597483401272219e-05, - "loss": 1.2646, + "learning_rate": 5.597484241258443e-05, + "loss": 1.0133, "step": 16000 }, { "epoch": 0.01, - "learning_rate": 5.597273404716162e-05, - "loss": 1.2289, + "learning_rate": 5.597274244702387e-05, + "loss": 0.9943, "step": 16500 }, { "epoch": 0.01, - "learning_rate": 5.597063828153218e-05, - "loss": 1.2068, + "learning_rate": 5.59706424814633e-05, + "loss": 0.9781, "step": 17000 }, { "epoch": 0.01, - "learning_rate": 5.596853831597162e-05, - "loss": 1.1855, + "learning_rate": 5.5968546715833856e-05, + "loss": 0.962, "step": 17500 }, { "epoch": 0.01, - "learning_rate": 5.5966442550342176e-05, - "loss": 1.157, + "learning_rate": 5.5966450950204417e-05, + "loss": 0.932, "step": 18000 }, { "epoch": 0.01, - "learning_rate": 5.596434258478161e-05, - "loss": 1.1478, + "learning_rate": 5.596435098464385e-05, + "loss": 0.9308, "step": 18500 }, { "epoch": 0.01, - "learning_rate": 5.596224261922104e-05, - "loss": 1.1547, + "learning_rate": 5.596225101908329e-05, + "loss": 0.9272, "step": 19000 }, { "epoch": 0.01, - "learning_rate": 5.5960142653660483e-05, - "loss": 1.1175, + "learning_rate": 5.596015105352272e-05, + "loss": 0.891, "step": 19500 }, { "epoch": 0.01, - "learning_rate": 5.595804268809992e-05, - "loss": 1.1145, + "learning_rate": 5.595805108796216e-05, + "loss": 0.892, "step": 20000 }, { "epoch": 0.01, - "learning_rate": 5.5955942722539344e-05, - "loss": 1.1134, + "learning_rate": 5.595595112240159e-05, + "loss": 0.8786, "step": 20500 }, { "epoch": 0.01, - "learning_rate": 5.5953842756978784e-05, - "loss": 1.0724, + "learning_rate": 5.5953851156841024e-05, + "loss": 0.8616, "step": 21000 }, { "epoch": 0.01, - "learning_rate": 5.595174279141822e-05, - "loss": 1.0697, + "learning_rate": 5.5951755391211584e-05, + "loss": 0.8534, "step": 21500 }, { "epoch": 0.01, - "learning_rate": 5.594964282585766e-05, - "loss": 1.0638, + "learning_rate": 5.594965542565102e-05, + "loss": 0.8456, "step": 22000 }, { "epoch": 0.01, - "learning_rate": 5.594754286029709e-05, - "loss": 1.0493, + "learning_rate": 5.594755546009045e-05, + "loss": 0.8451, "step": 22500 }, { "epoch": 0.01, - "learning_rate": 5.5945442894736524e-05, - "loss": 1.0498, + "learning_rate": 5.5945455494529885e-05, + "loss": 0.8221, "step": 23000 }, { "epoch": 0.01, - "learning_rate": 5.5943342929175965e-05, - "loss": 1.0211, + "learning_rate": 5.5943355528969325e-05, + "loss": 0.819, "step": 23500 }, { "epoch": 0.01, - "learning_rate": 5.594124716354652e-05, - "loss": 1.0341, + "learning_rate": 5.594125556340876e-05, + "loss": 0.8183, "step": 24000 }, { "epoch": 0.01, - "learning_rate": 5.593914719798595e-05, - "loss": 1.0237, + "learning_rate": 5.593915559784819e-05, + "loss": 0.7892, "step": 24500 }, { "epoch": 0.01, - "learning_rate": 5.5937047232425385e-05, - "loss": 1.0052, + "learning_rate": 5.593705563228763e-05, + "loss": 0.7881, "step": 25000 }, { "epoch": 0.02, - "learning_rate": 5.5934947266864825e-05, - "loss": 0.9896, + "learning_rate": 5.5934955666727066e-05, + "loss": 0.7828, "step": 25500 }, { "epoch": 0.02, - "learning_rate": 5.593285150123538e-05, - "loss": 1.0035, + "learning_rate": 5.59328557011665e-05, + "loss": 0.7862, "step": 26000 }, { "epoch": 0.02, - "learning_rate": 5.593075153567481e-05, - "loss": 0.971, + "learning_rate": 5.593075573560594e-05, + "loss": 0.7678, "step": 26500 }, { "epoch": 0.02, "learning_rate": 5.592865577004537e-05, - "loss": 0.9709, + "loss": 0.7619, "step": 27000 }, { "epoch": 0.02, - "learning_rate": 5.59265558044848e-05, - "loss": 0.9857, + "learning_rate": 5.5926560004415926e-05, + "loss": 0.7673, "step": 27500 }, { "epoch": 0.02, - "learning_rate": 5.592445583892424e-05, - "loss": 0.9686, + "learning_rate": 5.592446003885537e-05, + "loss": 0.7457, "step": 28000 }, { "epoch": 0.02, - "learning_rate": 5.592235587336367e-05, - "loss": 0.9634, + "learning_rate": 5.59223600732948e-05, + "loss": 0.7518, "step": 28500 }, { "epoch": 0.02, "learning_rate": 5.5920260107734234e-05, - "loss": 0.9445, + "loss": 0.7346, "step": 29000 }, { "epoch": 0.02, "learning_rate": 5.5918160142173674e-05, - "loss": 0.9376, + "loss": 0.7264, "step": 29500 }, { "epoch": 0.02, "learning_rate": 5.59160601766131e-05, - "loss": 0.9412, + "loss": 0.7292, "step": 30000 }, { "epoch": 0.02, "learning_rate": 5.5913960211052534e-05, - "loss": 0.9431, + "loss": 0.7319, "step": 30500 }, { "epoch": 0.02, "learning_rate": 5.5911860245491974e-05, - "loss": 0.9063, + "loss": 0.7061, "step": 31000 }, { "epoch": 0.02, "learning_rate": 5.590976027993141e-05, - "loss": 0.9193, + "loss": 0.7116, "step": 31500 }, { "epoch": 0.02, - "learning_rate": 5.590766031437084e-05, - "loss": 0.9074, + "learning_rate": 5.590766451430196e-05, + "loss": 0.699, "step": 32000 }, { "epoch": 0.02, - "learning_rate": 5.590556034881028e-05, - "loss": 0.9163, + "learning_rate": 5.5905564548741395e-05, + "loss": 0.71, "step": 32500 }, { "epoch": 0.02, - "learning_rate": 5.5903460383249715e-05, - "loss": 0.8946, + "learning_rate": 5.5903472983043075e-05, + "loss": 0.6935, "step": 33000 }, { "epoch": 0.02, - "learning_rate": 5.590136041768915e-05, - "loss": 0.8955, + "learning_rate": 5.590137301748251e-05, + "loss": 0.6926, "step": 33500 }, { "epoch": 0.02, - "learning_rate": 5.589926045212859e-05, - "loss": 0.886, + "learning_rate": 5.589927305192195e-05, + "loss": 0.6876, "step": 34000 }, { "epoch": 0.02, - "learning_rate": 5.589716048656802e-05, - "loss": 0.8956, + "learning_rate": 5.589717308636138e-05, + "loss": 0.6931, "step": 34500 }, { "epoch": 0.02, - "learning_rate": 5.5895060521007455e-05, - "loss": 0.8857, + "learning_rate": 5.589507312080082e-05, + "loss": 0.6736, "step": 35000 }, { "epoch": 0.02, - "learning_rate": 5.589296055544689e-05, - "loss": 0.891, + "learning_rate": 5.5892973155240256e-05, + "loss": 0.6877, "step": 35500 }, { "epoch": 0.02, - "learning_rate": 5.589086058988632e-05, - "loss": 0.8747, + "learning_rate": 5.589087318967969e-05, + "loss": 0.6748, "step": 36000 }, { "epoch": 0.02, - "learning_rate": 5.5888760624325756e-05, - "loss": 0.8796, + "learning_rate": 5.588877322411913e-05, + "loss": 0.6819, "step": 36500 }, { "epoch": 0.02, - "learning_rate": 5.5886664858696316e-05, - "loss": 0.8558, + "learning_rate": 5.5886673258558556e-05, + "loss": 0.6536, "step": 37000 }, { "epoch": 0.02, - "learning_rate": 5.588456489313575e-05, - "loss": 0.8488, + "learning_rate": 5.588457329299799e-05, + "loss": 0.6594, "step": 37500 }, { "epoch": 0.02, - "learning_rate": 5.588246492757518e-05, - "loss": 0.8656, + "learning_rate": 5.588247332743743e-05, + "loss": 0.6567, "step": 38000 }, { "epoch": 0.02, - "learning_rate": 5.588036496201462e-05, - "loss": 0.8456, + "learning_rate": 5.5880373361876863e-05, + "loss": 0.6501, "step": 38500 }, { "epoch": 0.02, - "learning_rate": 5.5878269196385184e-05, - "loss": 0.8532, + "learning_rate": 5.58782733963163e-05, + "loss": 0.6611, "step": 39000 }, { "epoch": 0.02, - "learning_rate": 5.587616923082462e-05, - "loss": 0.8454, + "learning_rate": 5.587617343075574e-05, + "loss": 0.6472, "step": 39500 }, { "epoch": 0.02, "learning_rate": 5.587407346519517e-05, - "loss": 0.8487, + "loss": 0.6428, "step": 40000 }, { "epoch": 0.02, "learning_rate": 5.5871973499634604e-05, - "loss": 0.839, + "loss": 0.6432, "step": 40500 }, { "epoch": 0.02, "learning_rate": 5.5869873534074044e-05, - "loss": 0.8438, + "loss": 0.6463, "step": 41000 }, { "epoch": 0.02, - "learning_rate": 5.586777356851348e-05, - "loss": 0.8217, + "learning_rate": 5.58677777684446e-05, + "loss": 0.6352, "step": 41500 }, { "epoch": 0.03, - "learning_rate": 5.586567360295291e-05, - "loss": 0.8215, + "learning_rate": 5.586567780288403e-05, + "loss": 0.6305, "step": 42000 }, { "epoch": 0.03, - "learning_rate": 5.5863573637392345e-05, - "loss": 0.8145, + "learning_rate": 5.5863577837323465e-05, + "loss": 0.6295, "step": 42500 }, { "epoch": 0.03, - "learning_rate": 5.586147367183178e-05, - "loss": 0.825, + "learning_rate": 5.5861477871762905e-05, + "loss": 0.6243, "step": 43000 }, { "epoch": 0.03, - "learning_rate": 5.585937370627121e-05, - "loss": 0.8251, + "learning_rate": 5.585937790620234e-05, + "loss": 0.6334, "step": 43500 }, { "epoch": 0.03, - "learning_rate": 5.585727374071065e-05, - "loss": 0.8294, + "learning_rate": 5.585727794064177e-05, + "loss": 0.6281, "step": 44000 }, { "epoch": 0.03, - "learning_rate": 5.5855173775150085e-05, - "loss": 0.83, + "learning_rate": 5.585517797508121e-05, + "loss": 0.6229, "step": 44500 }, { "epoch": 0.03, - "learning_rate": 5.585307380958952e-05, - "loss": 0.813, + "learning_rate": 5.585307800952064e-05, + "loss": 0.6205, "step": 45000 }, { "epoch": 0.03, - "learning_rate": 5.585097384402896e-05, - "loss": 0.8084, + "learning_rate": 5.585097804396008e-05, + "loss": 0.608, "step": 45500 }, { "epoch": 0.03, "learning_rate": 5.584888227833064e-05, - "loss": 0.7944, + "loss": 0.6021, "step": 46000 }, { "epoch": 0.03, - "learning_rate": 5.584678651270119e-05, - "loss": 0.8026, + "learning_rate": 5.584678231277007e-05, + "loss": 0.6154, "step": 46500 }, { "epoch": 0.03, - "learning_rate": 5.5844686547140627e-05, - "loss": 0.8034, + "learning_rate": 5.5844682347209506e-05, + "loss": 0.6113, "step": 47000 }, { "epoch": 0.03, - "learning_rate": 5.584258658158006e-05, - "loss": 0.7919, + "learning_rate": 5.584259078151118e-05, + "loss": 0.5978, "step": 47500 }, { "epoch": 0.03, "learning_rate": 5.5840490815950614e-05, - "loss": 0.7927, + "loss": 0.6058, "step": 48000 }, { "epoch": 0.03, "learning_rate": 5.5838390850390054e-05, - "loss": 0.8037, + "loss": 0.6041, "step": 48500 }, { "epoch": 0.03, "learning_rate": 5.583629088482949e-05, - "loss": 0.7751, + "loss": 0.587, "step": 49000 }, { "epoch": 0.03, - "learning_rate": 5.583419091926892e-05, - "loss": 0.7858, + "learning_rate": 5.583419931913117e-05, + "loss": 0.5985, "step": 49500 }, { "epoch": 0.03, - "learning_rate": 5.583209095370836e-05, - "loss": 0.7772, + "learning_rate": 5.58320993535706e-05, + "loss": 0.5861, "step": 50000 }, { "epoch": 0.03, - "learning_rate": 5.5829995188078915e-05, - "loss": 0.7833, + "learning_rate": 5.582999938801004e-05, + "loss": 0.5957, "step": 50500 }, { "epoch": 0.03, - "learning_rate": 5.582789522251835e-05, - "loss": 0.7686, + "learning_rate": 5.5827899422449475e-05, + "loss": 0.5793, "step": 51000 }, { "epoch": 0.03, - "learning_rate": 5.582579525695779e-05, - "loss": 0.7825, + "learning_rate": 5.58257994568889e-05, + "loss": 0.5888, "step": 51500 }, { "epoch": 0.03, - "learning_rate": 5.582369529139722e-05, - "loss": 0.7745, + "learning_rate": 5.582369949132834e-05, + "loss": 0.5848, "step": 52000 }, { "epoch": 0.03, - "learning_rate": 5.5821595325836655e-05, - "loss": 0.7813, + "learning_rate": 5.5821599525767775e-05, + "loss": 0.5887, "step": 52500 }, { "epoch": 0.03, - "learning_rate": 5.5819495360276095e-05, - "loss": 0.7818, + "learning_rate": 5.581949956020721e-05, + "loss": 0.5913, "step": 53000 }, { "epoch": 0.03, - "learning_rate": 5.581739539471553e-05, - "loss": 0.7688, + "learning_rate": 5.581739959464665e-05, + "loss": 0.5806, "step": 53500 }, { "epoch": 0.03, - "learning_rate": 5.581529542915496e-05, - "loss": 0.7534, + "learning_rate": 5.581529962908608e-05, + "loss": 0.5742, "step": 54000 }, { "epoch": 0.03, - "learning_rate": 5.5813195463594396e-05, - "loss": 0.7563, + "learning_rate": 5.5813203863456636e-05, + "loss": 0.5717, "step": 54500 }, { "epoch": 0.03, - "learning_rate": 5.581109549803383e-05, - "loss": 0.7565, + "learning_rate": 5.581110389789607e-05, + "loss": 0.5705, "step": 55000 }, { "epoch": 0.03, - "learning_rate": 5.580899553247326e-05, - "loss": 0.7614, + "learning_rate": 5.580900393233551e-05, + "loss": 0.5651, "step": 55500 }, { "epoch": 0.03, - "learning_rate": 5.58068955669127e-05, - "loss": 0.7446, + "learning_rate": 5.580690396677494e-05, + "loss": 0.566, "step": 56000 }, { "epoch": 0.03, - "learning_rate": 5.5804795601352136e-05, - "loss": 0.7602, + "learning_rate": 5.5804804001214377e-05, + "loss": 0.5639, "step": 56500 }, { "epoch": 0.03, - "learning_rate": 5.580269563579157e-05, - "loss": 0.7522, + "learning_rate": 5.580270403565382e-05, + "loss": 0.5625, "step": 57000 }, { "epoch": 0.03, - "learning_rate": 5.5800599870162123e-05, - "loss": 0.7555, + "learning_rate": 5.580060827002437e-05, + "loss": 0.5732, "step": 57500 }, { "epoch": 0.03, - "learning_rate": 5.5798499904601564e-05, - "loss": 0.7548, + "learning_rate": 5.5798508304463804e-05, + "loss": 0.5654, "step": 58000 }, { "epoch": 0.04, - "learning_rate": 5.5796399939041e-05, - "loss": 0.7483, + "learning_rate": 5.5796408338903244e-05, + "loss": 0.5634, "step": 58500 }, { "epoch": 0.04, - "learning_rate": 5.579429997348043e-05, - "loss": 0.7502, + "learning_rate": 5.579430837334268e-05, + "loss": 0.5601, "step": 59000 }, { "epoch": 0.04, - "learning_rate": 5.579220000791987e-05, - "loss": 0.7493, + "learning_rate": 5.579221260771323e-05, + "loss": 0.5543, "step": 59500 }, { "epoch": 0.04, - "learning_rate": 5.5790100042359304e-05, - "loss": 0.7501, + "learning_rate": 5.5790112642152665e-05, + "loss": 0.5553, "step": 60000 }, { "epoch": 0.04, - "learning_rate": 5.578800007679874e-05, - "loss": 0.7415, + "learning_rate": 5.5788012676592105e-05, + "loss": 0.5601, "step": 60500 }, { "epoch": 0.04, - "learning_rate": 5.578590011123818e-05, - "loss": 0.7266, + "learning_rate": 5.578591271103154e-05, + "loss": 0.5546, "step": 61000 }, { "epoch": 0.04, - "learning_rate": 5.578380434560873e-05, - "loss": 0.7446, + "learning_rate": 5.578381694540209e-05, + "loss": 0.5519, "step": 61500 }, { "epoch": 0.04, - "learning_rate": 5.5781704380048165e-05, - "loss": 0.7204, + "learning_rate": 5.5781716979841525e-05, + "loss": 0.5419, "step": 62000 }, { "epoch": 0.04, - "learning_rate": 5.5779604414487605e-05, - "loss": 0.7352, + "learning_rate": 5.5779617014280966e-05, + "loss": 0.5539, "step": 62500 }, { "epoch": 0.04, - "learning_rate": 5.577750444892704e-05, - "loss": 0.7359, + "learning_rate": 5.57775170487204e-05, + "loss": 0.5537, "step": 63000 }, { "epoch": 0.04, - "learning_rate": 5.577540448336647e-05, - "loss": 0.725, + "learning_rate": 5.577541708315983e-05, + "loss": 0.5453, "step": 63500 }, { "epoch": 0.04, - "learning_rate": 5.577330451780591e-05, - "loss": 0.7196, + "learning_rate": 5.577331711759927e-05, + "loss": 0.5351, "step": 64000 }, { "epoch": 0.04, - "learning_rate": 5.5771208752176466e-05, - "loss": 0.7326, + "learning_rate": 5.5771217152038706e-05, + "loss": 0.5424, "step": 64500 }, { "epoch": 0.04, - "learning_rate": 5.57691087866159e-05, - "loss": 0.7366, + "learning_rate": 5.576911718647814e-05, + "loss": 0.5449, "step": 65000 }, { "epoch": 0.04, - "learning_rate": 5.576700882105533e-05, - "loss": 0.7191, + "learning_rate": 5.576701722091758e-05, + "loss": 0.5305, "step": 65500 }, { "epoch": 0.04, - "learning_rate": 5.576490885549477e-05, - "loss": 0.7332, + "learning_rate": 5.576491725535701e-05, + "loss": 0.5493, "step": 66000 }, { "epoch": 0.04, - "learning_rate": 5.576281308986533e-05, - "loss": 0.72, + "learning_rate": 5.576281728979645e-05, + "loss": 0.5389, "step": 66500 }, { "epoch": 0.04, - "learning_rate": 5.576071312430476e-05, - "loss": 0.7274, + "learning_rate": 5.576071732423588e-05, + "loss": 0.5438, "step": 67000 }, { "epoch": 0.04, - "learning_rate": 5.5758613158744194e-05, - "loss": 0.7141, + "learning_rate": 5.5758617358675314e-05, + "loss": 0.5312, "step": 67500 }, { "epoch": 0.04, - "learning_rate": 5.5756513193183634e-05, - "loss": 0.7202, + "learning_rate": 5.5756517393114754e-05, + "loss": 0.5406, "step": 68000 }, { "epoch": 0.04, - "learning_rate": 5.575441322762307e-05, - "loss": 0.7082, + "learning_rate": 5.575441742755419e-05, + "loss": 0.5324, "step": 68500 }, { "epoch": 0.04, - "learning_rate": 5.575231326206251e-05, - "loss": 0.708, + "learning_rate": 5.575231746199362e-05, + "loss": 0.525, "step": 69000 }, { "epoch": 0.04, - "learning_rate": 5.5750213296501934e-05, - "loss": 0.7177, + "learning_rate": 5.575021749643306e-05, + "loss": 0.5379, "step": 69500 }, { "epoch": 0.04, - "learning_rate": 5.574811333094137e-05, - "loss": 0.7094, + "learning_rate": 5.5748117530872495e-05, + "loss": 0.525, "step": 70000 }, { "epoch": 0.04, - "learning_rate": 5.574601336538081e-05, - "loss": 0.7085, + "learning_rate": 5.574602176524305e-05, + "loss": 0.5259, "step": 70500 }, { "epoch": 0.04, - "learning_rate": 5.574391759975137e-05, - "loss": 0.7188, + "learning_rate": 5.574392179968248e-05, + "loss": 0.5382, "step": 71000 }, { "epoch": 0.04, - "learning_rate": 5.57418176341908e-05, - "loss": 0.7026, + "learning_rate": 5.574182183412192e-05, + "loss": 0.5267, "step": 71500 }, { "epoch": 0.04, - "learning_rate": 5.573971766863023e-05, - "loss": 0.7047, + "learning_rate": 5.5739721868561355e-05, + "loss": 0.5234, "step": 72000 }, { "epoch": 0.04, - "learning_rate": 5.573761770306967e-05, - "loss": 0.6978, + "learning_rate": 5.573762190300079e-05, + "loss": 0.5185, "step": 72500 }, { "epoch": 0.04, - "learning_rate": 5.57355177375091e-05, - "loss": 0.6972, + "learning_rate": 5.573552193744023e-05, + "loss": 0.5199, "step": 73000 }, { "epoch": 0.04, - "learning_rate": 5.5733417771948536e-05, - "loss": 0.6942, + "learning_rate": 5.573342617181078e-05, + "loss": 0.516, "step": 73500 }, { "epoch": 0.04, - "learning_rate": 5.573132200631909e-05, - "loss": 0.7024, + "learning_rate": 5.5731326206250216e-05, + "loss": 0.5283, "step": 74000 }, { "epoch": 0.04, - "learning_rate": 5.572922204075853e-05, - "loss": 0.6919, + "learning_rate": 5.5729226240689656e-05, + "loss": 0.5129, "step": 74500 }, { "epoch": 0.04, - "learning_rate": 5.572712207519796e-05, - "loss": 0.7045, + "learning_rate": 5.572712627512909e-05, + "loss": 0.5237, "step": 75000 }, { "epoch": 0.05, - "learning_rate": 5.57250221096374e-05, - "loss": 0.7054, + "learning_rate": 5.572502630956852e-05, + "loss": 0.5169, "step": 75500 }, { "epoch": 0.05, "learning_rate": 5.572292634400796e-05, - "loss": 0.6966, + "loss": 0.5173, "step": 76000 }, { "epoch": 0.05, - "learning_rate": 5.572082637844739e-05, - "loss": 0.6926, + "learning_rate": 5.572083057837852e-05, + "loss": 0.5154, "step": 76500 }, { "epoch": 0.05, - "learning_rate": 5.5718726412886824e-05, - "loss": 0.6988, + "learning_rate": 5.571873061281795e-05, + "loss": 0.5201, "step": 77000 }, { "epoch": 0.05, - "learning_rate": 5.5716626447326264e-05, - "loss": 0.7083, + "learning_rate": 5.5716630647257384e-05, + "loss": 0.5189, "step": 77500 }, { "epoch": 0.05, - "learning_rate": 5.57145264817657e-05, - "loss": 0.7026, + "learning_rate": 5.5714530681696824e-05, + "loss": 0.5188, "step": 78000 }, { "epoch": 0.05, - "learning_rate": 5.571242651620513e-05, - "loss": 0.6944, + "learning_rate": 5.571243071613626e-05, + "loss": 0.5234, "step": 78500 }, { "epoch": 0.05, - "learning_rate": 5.571032655064457e-05, - "loss": 0.696, + "learning_rate": 5.5710330750575684e-05, + "loss": 0.5079, "step": 79000 }, { "epoch": 0.05, - "learning_rate": 5.5708226585084004e-05, - "loss": 0.6893, + "learning_rate": 5.5708230785015124e-05, + "loss": 0.5155, "step": 79500 }, { "epoch": 0.05, "learning_rate": 5.570613081945456e-05, - "loss": 0.6844, + "loss": 0.51, "step": 80000 }, { "epoch": 0.05, - "learning_rate": 5.570403085389399e-05, - "loss": 0.7073, + "learning_rate": 5.570403505382512e-05, + "loss": 0.5124, "step": 80500 }, { "epoch": 0.05, - "learning_rate": 5.570193088833343e-05, - "loss": 0.6991, + "learning_rate": 5.570193508826455e-05, + "loss": 0.5136, "step": 81000 }, { "epoch": 0.05, "learning_rate": 5.5699835122703985e-05, - "loss": 0.684, + "loss": 0.5012, "step": 81500 }, { "epoch": 0.05, "learning_rate": 5.569773515714342e-05, - "loss": 0.678, + "loss": 0.5043, "step": 82000 }, { "epoch": 0.05, - "learning_rate": 5.569563519158286e-05, - "loss": 0.6667, + "learning_rate": 5.569563939151398e-05, + "loss": 0.5009, "step": 82500 }, { "epoch": 0.05, - "learning_rate": 5.569353522602229e-05, - "loss": 0.6921, + "learning_rate": 5.569353942595342e-05, + "loss": 0.5163, "step": 83000 }, { "epoch": 0.05, - "learning_rate": 5.5691435260461726e-05, - "loss": 0.6854, + "learning_rate": 5.569143946039285e-05, + "loss": 0.4996, "step": 83500 }, { "epoch": 0.05, - "learning_rate": 5.5689335294901166e-05, - "loss": 0.6604, + "learning_rate": 5.568933949483228e-05, + "loss": 0.4944, "step": 84000 }, { "epoch": 0.05, - "learning_rate": 5.56872353293406e-05, - "loss": 0.6793, + "learning_rate": 5.568723952927172e-05, + "loss": 0.502, "step": 84500 }, { "epoch": 0.05, - "learning_rate": 5.568513536378003e-05, - "loss": 0.6776, + "learning_rate": 5.568514376364228e-05, + "loss": 0.5044, "step": 85000 }, { "epoch": 0.05, - "learning_rate": 5.568303539821947e-05, - "loss": 0.6625, + "learning_rate": 5.5683047998012834e-05, + "loss": 0.4984, "step": 85500 }, { "epoch": 0.05, - "learning_rate": 5.568093963259003e-05, - "loss": 0.6535, + "learning_rate": 5.568094803245227e-05, + "loss": 0.4897, "step": 86000 }, { "epoch": 0.05, - "learning_rate": 5.567883966702946e-05, - "loss": 0.6705, + "learning_rate": 5.56788480668917e-05, + "loss": 0.4983, "step": 86500 }, { "epoch": 0.05, - "learning_rate": 5.5676743901400014e-05, - "loss": 0.6665, + "learning_rate": 5.567674810133114e-05, + "loss": 0.4933, "step": 87000 }, { "epoch": 0.05, - "learning_rate": 5.567464393583945e-05, - "loss": 0.6646, + "learning_rate": 5.5674648135770574e-05, + "loss": 0.4931, "step": 87500 }, { "epoch": 0.05, - "learning_rate": 5.567254817021001e-05, - "loss": 0.6637, + "learning_rate": 5.567255237014113e-05, + "loss": 0.4891, "step": 88000 }, { "epoch": 0.05, - "learning_rate": 5.567044820464944e-05, - "loss": 0.6533, + "learning_rate": 5.567045660451168e-05, + "loss": 0.4873, "step": 88500 }, { "epoch": 0.05, - "learning_rate": 5.5668348239088875e-05, - "loss": 0.6846, + "learning_rate": 5.566835663895112e-05, + "loss": 0.5022, "step": 89000 }, { "epoch": 0.05, - "learning_rate": 5.5666248273528315e-05, - "loss": 0.653, + "learning_rate": 5.5666256673390555e-05, + "loss": 0.4927, "step": 89500 }, { "epoch": 0.05, - "learning_rate": 5.566414830796775e-05, - "loss": 0.6661, + "learning_rate": 5.566415670782999e-05, + "loss": 0.4897, "step": 90000 }, { "epoch": 0.05, - "learning_rate": 5.566204834240718e-05, - "loss": 0.6696, + "learning_rate": 5.566205674226943e-05, + "loss": 0.4911, "step": 90500 }, { "epoch": 0.05, - "learning_rate": 5.565994837684662e-05, - "loss": 0.6534, + "learning_rate": 5.565995677670886e-05, + "loss": 0.4928, "step": 91000 }, { "epoch": 0.05, - "learning_rate": 5.5657848411286055e-05, - "loss": 0.6579, + "learning_rate": 5.5657856811148296e-05, + "loss": 0.4867, "step": 91500 }, { "epoch": 0.06, - "learning_rate": 5.565574844572549e-05, - "loss": 0.6549, + "learning_rate": 5.5655756845587736e-05, + "loss": 0.4917, "step": 92000 }, { "epoch": 0.06, - "learning_rate": 5.565364848016493e-05, - "loss": 0.6649, + "learning_rate": 5.565365688002717e-05, + "loss": 0.4985, "step": 92500 }, { "epoch": 0.06, - "learning_rate": 5.565154851460436e-05, - "loss": 0.6772, + "learning_rate": 5.56515569144666e-05, + "loss": 0.4989, "step": 93000 }, { "epoch": 0.06, - "learning_rate": 5.5649448549043796e-05, - "loss": 0.641, + "learning_rate": 5.5649456948906036e-05, + "loss": 0.4795, "step": 93500 }, { "epoch": 0.06, - "learning_rate": 5.564735278341435e-05, - "loss": 0.6538, + "learning_rate": 5.564735698334547e-05, + "loss": 0.4883, "step": 94000 }, { "epoch": 0.06, - "learning_rate": 5.564525281785379e-05, - "loss": 0.6657, + "learning_rate": 5.56452570177849e-05, + "loss": 0.4935, "step": 94500 }, { "epoch": 0.06, - "learning_rate": 5.564315285229322e-05, - "loss": 0.6514, + "learning_rate": 5.5643157052224343e-05, + "loss": 0.4823, "step": 95000 }, { "epoch": 0.06, - "learning_rate": 5.564105288673266e-05, - "loss": 0.6577, + "learning_rate": 5.564105708666378e-05, + "loss": 0.4875, "step": 95500 }, { "epoch": 0.06, "learning_rate": 5.563895712110321e-05, - "loss": 0.6627, + "loss": 0.4897, "step": 96000 }, { "epoch": 0.06, "learning_rate": 5.563685715554265e-05, - "loss": 0.6378, + "loss": 0.4853, "step": 96500 }, { "epoch": 0.06, "learning_rate": 5.5634757189982084e-05, - "loss": 0.6515, + "loss": 0.4821, "step": 97000 }, { "epoch": 0.06, "learning_rate": 5.563265722442152e-05, - "loss": 0.6565, + "loss": 0.4917, "step": 97500 }, { "epoch": 0.06, - "learning_rate": 5.563056145879208e-05, - "loss": 0.6562, + "learning_rate": 5.563055725886096e-05, + "loss": 0.4819, "step": 98000 }, { "epoch": 0.06, "learning_rate": 5.562846149323151e-05, - "loss": 0.6596, + "loss": 0.4833, "step": 98500 }, { "epoch": 0.06, - "learning_rate": 5.5626365727602065e-05, - "loss": 0.645, + "learning_rate": 5.5626361527670945e-05, + "loss": 0.4791, "step": 99000 }, { "epoch": 0.06, - "learning_rate": 5.56242657620415e-05, - "loss": 0.6391, + "learning_rate": 5.5624261562110385e-05, + "loss": 0.4753, "step": 99500 }, { "epoch": 0.06, - "learning_rate": 5.562216999641206e-05, - "loss": 0.643, + "learning_rate": 5.562216579648094e-05, + "loss": 0.4758, "step": 100000 }, { "epoch": 0.06, - "eval_loss": 0.5988019108772278, - "eval_runtime": 1105.0166, - "eval_samples_per_second": 476.663, - "eval_steps_per_second": 79.444, + "eval_loss": 0.4188617467880249, + "eval_runtime": 1463.8783, + "eval_samples_per_second": 359.811, + "eval_steps_per_second": 59.969, "step": 100000 }, { "epoch": 0.06, - "learning_rate": 5.562007003085149e-05, - "loss": 0.651, + "learning_rate": 5.562006583092037e-05, + "loss": 0.4766, "step": 100500 }, { "epoch": 0.06, - "learning_rate": 5.5617970065290926e-05, - "loss": 0.6459, + "learning_rate": 5.5617965865359805e-05, + "loss": 0.4784, "step": 101000 }, { "epoch": 0.06, - "learning_rate": 5.561587009973036e-05, - "loss": 0.6533, + "learning_rate": 5.5615865899799246e-05, + "loss": 0.4848, "step": 101500 }, { "epoch": 0.06, - "learning_rate": 5.56137701341698e-05, - "loss": 0.6464, + "learning_rate": 5.561376593423868e-05, + "loss": 0.4756, "step": 102000 }, { "epoch": 0.06, "learning_rate": 5.561167016860923e-05, - "loss": 0.6404, + "loss": 0.4712, "step": 102500 }, { "epoch": 0.06, "learning_rate": 5.5609570203048666e-05, - "loss": 0.6339, + "loss": 0.4643, "step": 103000 }, { "epoch": 0.06, "learning_rate": 5.5607470237488106e-05, - "loss": 0.6304, + "loss": 0.4696, "step": 103500 }, { "epoch": 0.06, "learning_rate": 5.560537027192754e-05, - "loss": 0.6243, + "loss": 0.4663, "step": 104000 }, { "epoch": 0.06, - "learning_rate": 5.560327870622922e-05, - "loss": 0.6351, + "learning_rate": 5.560327030636697e-05, + "loss": 0.4673, "step": 104500 }, { "epoch": 0.06, - "learning_rate": 5.560117874066865e-05, - "loss": 0.6334, + "learning_rate": 5.5601174540737534e-05, + "loss": 0.4698, "step": 105000 }, { "epoch": 0.06, - "learning_rate": 5.559907877510809e-05, - "loss": 0.653, + "learning_rate": 5.559907457517697e-05, + "loss": 0.4769, "step": 105500 }, { "epoch": 0.06, "learning_rate": 5.559697880954752e-05, - "loss": 0.629, + "loss": 0.4661, "step": 106000 }, { "epoch": 0.06, "learning_rate": 5.5594878843986954e-05, - "loss": 0.6343, + "loss": 0.4669, "step": 106500 }, { "epoch": 0.06, "learning_rate": 5.5592778878426394e-05, - "loss": 0.6432, + "loss": 0.4755, "step": 107000 }, { "epoch": 0.06, "learning_rate": 5.559067891286583e-05, - "loss": 0.6456, + "loss": 0.4788, "step": 107500 }, { "epoch": 0.06, - "learning_rate": 5.558857894730526e-05, - "loss": 0.6396, + "learning_rate": 5.558858314723638e-05, + "loss": 0.4656, "step": 108000 }, { "epoch": 0.07, - "learning_rate": 5.55864789817447e-05, - "loss": 0.6394, + "learning_rate": 5.5586483181675815e-05, + "loss": 0.4668, "step": 108500 }, { "epoch": 0.07, - "learning_rate": 5.5584379016184135e-05, - "loss": 0.6285, + "learning_rate": 5.5584383216115255e-05, + "loss": 0.4627, "step": 109000 }, { "epoch": 0.07, - "learning_rate": 5.558227905062357e-05, - "loss": 0.6253, + "learning_rate": 5.558228325055469e-05, + "loss": 0.4587, "step": 109500 }, { "epoch": 0.07, - "learning_rate": 5.558017908506301e-05, - "loss": 0.6239, + "learning_rate": 5.558018328499412e-05, + "loss": 0.4613, "step": 110000 }, { "epoch": 0.07, - "learning_rate": 5.5578079119502435e-05, - "loss": 0.6322, + "learning_rate": 5.557808331943356e-05, + "loss": 0.4696, "step": 110500 }, { "epoch": 0.07, "learning_rate": 5.5575983353872996e-05, - "loss": 0.628, + "loss": 0.46, "step": 111000 }, { "epoch": 0.07, "learning_rate": 5.557388338831243e-05, - "loss": 0.6487, + "loss": 0.4726, "step": 111500 }, { "epoch": 0.07, - "learning_rate": 5.557178342275187e-05, - "loss": 0.636, + "learning_rate": 5.557179182261411e-05, + "loss": 0.4659, "step": 112000 }, { "epoch": 0.07, - "learning_rate": 5.55696834571913e-05, - "loss": 0.6543, + "learning_rate": 5.556969185705354e-05, + "loss": 0.4703, "step": 112500 }, { "epoch": 0.07, - "learning_rate": 5.5567587691561856e-05, - "loss": 0.6227, + "learning_rate": 5.5567591891492977e-05, + "loss": 0.4658, "step": 113000 }, { "epoch": 0.07, - "learning_rate": 5.55654877260013e-05, - "loss": 0.6214, + "learning_rate": 5.556549192593241e-05, + "loss": 0.4621, "step": 113500 }, { "epoch": 0.07, - "learning_rate": 5.556338776044073e-05, - "loss": 0.6391, + "learning_rate": 5.556339196037185e-05, + "loss": 0.4674, "step": 114000 }, { "epoch": 0.07, - "learning_rate": 5.5561287794880164e-05, - "loss": 0.6198, + "learning_rate": 5.5561291994811284e-05, + "loss": 0.458, "step": 114500 }, { "epoch": 0.07, - "learning_rate": 5.555919622918184e-05, - "loss": 0.625, + "learning_rate": 5.555919202925072e-05, + "loss": 0.4681, "step": 115000 }, { "epoch": 0.07, - "learning_rate": 5.555709626362127e-05, - "loss": 0.6129, + "learning_rate": 5.555709206369016e-05, + "loss": 0.4529, "step": 115500 }, { "epoch": 0.07, - "learning_rate": 5.555499629806071e-05, - "loss": 0.6249, + "learning_rate": 5.555499209812959e-05, + "loss": 0.4601, "step": 116000 }, { "epoch": 0.07, "learning_rate": 5.5552896332500144e-05, - "loss": 0.6214, + "loss": 0.4605, "step": 116500 }, { "epoch": 0.07, - "learning_rate": 5.55508005668707e-05, - "loss": 0.6385, + "learning_rate": 5.555079636693958e-05, + "loss": 0.4651, "step": 117000 }, { "epoch": 0.07, - "learning_rate": 5.554870060131014e-05, - "loss": 0.614, + "learning_rate": 5.554869640137902e-05, + "loss": 0.4543, "step": 117500 }, { "epoch": 0.07, - "learning_rate": 5.554660063574957e-05, - "loss": 0.623, + "learning_rate": 5.55466048356807e-05, + "loss": 0.4616, "step": 118000 }, { "epoch": 0.07, "learning_rate": 5.554450487012013e-05, - "loss": 0.6309, + "loss": 0.462, "step": 118500 }, { "epoch": 0.07, "learning_rate": 5.5542404904559566e-05, - "loss": 0.6279, + "loss": 0.4563, "step": 119000 }, { "epoch": 0.07, "learning_rate": 5.5540304938999e-05, - "loss": 0.6299, + "loss": 0.4565, "step": 119500 }, { "epoch": 0.07, "learning_rate": 5.553820497343843e-05, - "loss": 0.6167, + "loss": 0.4569, "step": 120000 }, { "epoch": 0.07, "learning_rate": 5.5536105007877866e-05, - "loss": 0.6138, + "loss": 0.4499, "step": 120500 }, { "epoch": 0.07, "learning_rate": 5.5534005042317306e-05, - "loss": 0.619, + "loss": 0.4561, "step": 121000 }, { "epoch": 0.07, "learning_rate": 5.553190507675674e-05, - "loss": 0.617, + "loss": 0.4622, "step": 121500 }, { "epoch": 0.07, "learning_rate": 5.552980511119617e-05, - "loss": 0.6174, + "loss": 0.4593, "step": 122000 }, { "epoch": 0.07, "learning_rate": 5.552770514563561e-05, - "loss": 0.6095, + "loss": 0.4452, "step": 122500 }, { "epoch": 0.07, "learning_rate": 5.552560518007505e-05, - "loss": 0.6124, + "loss": 0.4466, "step": 123000 }, { "epoch": 0.07, "learning_rate": 5.552350521451448e-05, - "loss": 0.6111, + "loss": 0.4473, "step": 123500 }, { "epoch": 0.07, "learning_rate": 5.552140524895392e-05, - "loss": 0.6188, + "loss": 0.4585, "step": 124000 }, { "epoch": 0.07, - "learning_rate": 5.5519309483324474e-05, - "loss": 0.6072, + "learning_rate": 5.5519305283393354e-05, + "loss": 0.4428, "step": 124500 }, { "epoch": 0.07, - "learning_rate": 5.551720951776391e-05, - "loss": 0.6157, + "learning_rate": 5.551720531783278e-05, + "loss": 0.457, "step": 125000 }, { "epoch": 0.08, - "learning_rate": 5.551510955220334e-05, - "loss": 0.6249, + "learning_rate": 5.551510535227222e-05, + "loss": 0.4548, "step": 125500 }, { "epoch": 0.08, "learning_rate": 5.551300958664278e-05, - "loss": 0.6178, + "loss": 0.4563, "step": 126000 }, { "epoch": 0.08, - "learning_rate": 5.5510913821013335e-05, - "loss": 0.5944, + "learning_rate": 5.5510909621082215e-05, + "loss": 0.4404, "step": 126500 }, { "epoch": 0.08, - "learning_rate": 5.550881805538389e-05, - "loss": 0.6196, + "learning_rate": 5.5508809655521655e-05, + "loss": 0.4466, "step": 127000 }, { "epoch": 0.08, - "learning_rate": 5.550671808982332e-05, - "loss": 0.6001, + "learning_rate": 5.550670968996108e-05, + "loss": 0.4426, "step": 127500 }, { "epoch": 0.08, - "learning_rate": 5.550461812426276e-05, - "loss": 0.6155, + "learning_rate": 5.5504609724400515e-05, + "loss": 0.4459, "step": 128000 }, { "epoch": 0.08, - "learning_rate": 5.5502518158702196e-05, - "loss": 0.6237, + "learning_rate": 5.5502513958771075e-05, + "loss": 0.4522, "step": 128500 }, { "epoch": 0.08, - "learning_rate": 5.550041819314163e-05, - "loss": 0.614, + "learning_rate": 5.5500413993210516e-05, + "loss": 0.4473, "step": 129000 }, { "epoch": 0.08, - "learning_rate": 5.549831822758107e-05, - "loss": 0.6185, + "learning_rate": 5.549831402764995e-05, + "loss": 0.4513, "step": 129500 }, { "epoch": 0.08, - "learning_rate": 5.54962182620205e-05, - "loss": 0.6114, + "learning_rate": 5.5496214062089376e-05, + "loss": 0.4486, "step": 130000 }, { "epoch": 0.08, - "learning_rate": 5.5494118296459936e-05, - "loss": 0.6114, + "learning_rate": 5.5494114096528816e-05, + "loss": 0.4447, "step": 130500 }, { "epoch": 0.08, - "learning_rate": 5.549202253083049e-05, - "loss": 0.6071, + "learning_rate": 5.5492018330899376e-05, + "loss": 0.4521, "step": 131000 }, { "epoch": 0.08, - "learning_rate": 5.548992256526993e-05, - "loss": 0.6046, + "learning_rate": 5.548991836533881e-05, + "loss": 0.4408, "step": 131500 }, { "epoch": 0.08, - "learning_rate": 5.5487822599709363e-05, - "loss": 0.5886, + "learning_rate": 5.5487818399778237e-05, + "loss": 0.4317, "step": 132000 }, { "epoch": 0.08, - "learning_rate": 5.54857226341488e-05, - "loss": 0.6095, + "learning_rate": 5.548571843421768e-05, + "loss": 0.4462, "step": 132500 }, { "epoch": 0.08, - "learning_rate": 5.548362686851936e-05, - "loss": 0.6063, + "learning_rate": 5.548361846865711e-05, + "loss": 0.4425, "step": 133000 }, { "epoch": 0.08, - "learning_rate": 5.548152690295879e-05, - "loss": 0.6083, + "learning_rate": 5.5481518503096544e-05, + "loss": 0.4411, "step": 133500 }, { "epoch": 0.08, - "learning_rate": 5.5479426937398224e-05, - "loss": 0.6127, + "learning_rate": 5.5479418537535984e-05, + "loss": 0.4425, "step": 134000 }, { "epoch": 0.08, - "learning_rate": 5.5477326971837664e-05, - "loss": 0.5944, + "learning_rate": 5.547731857197542e-05, + "loss": 0.4325, "step": 134500 }, { "epoch": 0.08, - "learning_rate": 5.547523120620822e-05, - "loss": 0.6011, + "learning_rate": 5.547522280634597e-05, + "loss": 0.4416, "step": 135000 }, { "epoch": 0.08, - "learning_rate": 5.547313124064765e-05, - "loss": 0.6024, + "learning_rate": 5.547312704071653e-05, + "loss": 0.4449, "step": 135500 }, { "epoch": 0.08, "learning_rate": 5.5471031275087085e-05, - "loss": 0.5883, + "loss": 0.4314, "step": 136000 }, { "epoch": 0.08, "learning_rate": 5.5468931309526525e-05, - "loss": 0.6046, + "loss": 0.4423, "step": 136500 }, { "epoch": 0.08, - "learning_rate": 5.546683554389708e-05, - "loss": 0.6, + "learning_rate": 5.546683134396596e-05, + "loss": 0.4397, "step": 137000 }, { "epoch": 0.08, - "learning_rate": 5.546473977826764e-05, - "loss": 0.6035, + "learning_rate": 5.546473137840539e-05, + "loss": 0.451, "step": 137500 }, { "epoch": 0.08, - "learning_rate": 5.546263981270707e-05, - "loss": 0.5914, + "learning_rate": 5.546263141284483e-05, + "loss": 0.4381, "step": 138000 }, { "epoch": 0.08, - "learning_rate": 5.5460539847146506e-05, - "loss": 0.5996, + "learning_rate": 5.5460531447284266e-05, + "loss": 0.4419, "step": 138500 }, { "epoch": 0.08, - "learning_rate": 5.545843988158594e-05, - "loss": 0.5983, + "learning_rate": 5.54584314817237e-05, + "loss": 0.4423, "step": 139000 }, { "epoch": 0.08, - "learning_rate": 5.545633991602537e-05, - "loss": 0.6085, + "learning_rate": 5.545633151616313e-05, + "loss": 0.4388, "step": 139500 }, { "epoch": 0.08, - "learning_rate": 5.545423995046481e-05, - "loss": 0.6016, + "learning_rate": 5.5454231550602566e-05, + "loss": 0.439, "step": 140000 }, { "epoch": 0.08, - "learning_rate": 5.5452139984904247e-05, - "loss": 0.6105, + "learning_rate": 5.5452135784973126e-05, + "loss": 0.4423, "step": 140500 }, { "epoch": 0.08, "learning_rate": 5.545004001934368e-05, - "loss": 0.586, + "loss": 0.4337, "step": 141000 }, { "epoch": 0.08, - "learning_rate": 5.5447944253714234e-05, - "loss": 0.5941, + "learning_rate": 5.544794005378312e-05, + "loss": 0.433, "step": 141500 }, { "epoch": 0.09, - "learning_rate": 5.5445844288153674e-05, - "loss": 0.5863, + "learning_rate": 5.5445840088222554e-05, + "loss": 0.4321, "step": 142000 }, { "epoch": 0.09, - "learning_rate": 5.544374432259311e-05, - "loss": 0.5876, + "learning_rate": 5.544374012266199e-05, + "loss": 0.4354, "step": 142500 }, { "epoch": 0.09, - "learning_rate": 5.544164855696367e-05, - "loss": 0.5826, + "learning_rate": 5.544164015710143e-05, + "loss": 0.4414, "step": 143000 }, { "epoch": 0.09, - "learning_rate": 5.5439548591403094e-05, - "loss": 0.6028, + "learning_rate": 5.543954019154086e-05, + "loss": 0.4383, "step": 143500 }, { "epoch": 0.09, - "learning_rate": 5.5437448625842535e-05, - "loss": 0.5828, + "learning_rate": 5.543744022598029e-05, + "loss": 0.4277, "step": 144000 }, { "epoch": 0.09, - "learning_rate": 5.543534866028197e-05, - "loss": 0.593, + "learning_rate": 5.543534026041973e-05, + "loss": 0.4329, "step": 144500 }, { "epoch": 0.09, - "learning_rate": 5.54332486947214e-05, - "loss": 0.603, + "learning_rate": 5.543324449479029e-05, + "loss": 0.4383, "step": 145000 }, { "epoch": 0.09, - "learning_rate": 5.543115292909196e-05, - "loss": 0.5927, + "learning_rate": 5.543114452922972e-05, + "loss": 0.4345, "step": 145500 }, { "epoch": 0.09, - "learning_rate": 5.5429052963531395e-05, - "loss": 0.586, + "learning_rate": 5.5429044563669155e-05, + "loss": 0.4318, "step": 146000 }, { "epoch": 0.09, - "learning_rate": 5.542695299797083e-05, - "loss": 0.5799, + "learning_rate": 5.542694459810859e-05, + "loss": 0.4278, "step": 146500 }, { "epoch": 0.09, - "learning_rate": 5.542485303241027e-05, - "loss": 0.5945, + "learning_rate": 5.542484883247915e-05, + "loss": 0.4366, "step": 147000 }, { "epoch": 0.09, - "learning_rate": 5.54227530668497e-05, - "loss": 0.5945, + "learning_rate": 5.542274886691858e-05, + "loss": 0.4337, "step": 147500 }, { "epoch": 0.09, - "learning_rate": 5.5420653101289136e-05, - "loss": 0.5845, + "learning_rate": 5.542064890135802e-05, + "loss": 0.4334, "step": 148000 }, { "epoch": 0.09, - "learning_rate": 5.5418553135728576e-05, - "loss": 0.5927, + "learning_rate": 5.5418548935797456e-05, + "loss": 0.4361, "step": 148500 }, { "epoch": 0.09, - "learning_rate": 5.541645317016801e-05, - "loss": 0.5795, + "learning_rate": 5.541644897023688e-05, + "loss": 0.4277, "step": 149000 }, { "epoch": 0.09, - "learning_rate": 5.541435740453856e-05, - "loss": 0.5943, + "learning_rate": 5.541434900467632e-05, + "loss": 0.4323, "step": 149500 }, { "epoch": 0.09, - "learning_rate": 5.5412257438978e-05, - "loss": 0.5831, + "learning_rate": 5.5412249039115756e-05, + "loss": 0.4315, "step": 150000 }, { "epoch": 0.09, - "learning_rate": 5.541016167334855e-05, - "loss": 0.5892, + "learning_rate": 5.541014907355519e-05, + "loss": 0.4296, "step": 150500 }, { "epoch": 0.09, - "learning_rate": 5.540806170778799e-05, - "loss": 0.5704, + "learning_rate": 5.540805330792575e-05, + "loss": 0.4193, "step": 151000 }, { "epoch": 0.09, - "learning_rate": 5.5405961742227424e-05, - "loss": 0.5815, + "learning_rate": 5.5405953342365184e-05, + "loss": 0.4244, "step": 151500 }, { "epoch": 0.09, - "learning_rate": 5.540386177666686e-05, - "loss": 0.5885, + "learning_rate": 5.540385337680462e-05, + "loss": 0.4283, "step": 152000 }, { "epoch": 0.09, - "learning_rate": 5.54017618111063e-05, - "loss": 0.5842, + "learning_rate": 5.540175341124405e-05, + "loss": 0.4335, "step": 152500 }, { "epoch": 0.09, - "learning_rate": 5.539966184554573e-05, - "loss": 0.5934, + "learning_rate": 5.539965344568349e-05, + "loss": 0.43, "step": 153000 }, { "epoch": 0.09, - "learning_rate": 5.539756187998517e-05, - "loss": 0.5923, + "learning_rate": 5.5397557680054044e-05, + "loss": 0.4356, "step": 153500 }, { "epoch": 0.09, - "learning_rate": 5.5395466114355725e-05, - "loss": 0.5913, + "learning_rate": 5.539545771449348e-05, + "loss": 0.4342, "step": 154000 }, { "epoch": 0.09, - "learning_rate": 5.539336614879516e-05, - "loss": 0.5814, + "learning_rate": 5.539335774893292e-05, + "loss": 0.4295, "step": 154500 }, { "epoch": 0.09, - "learning_rate": 5.539126618323459e-05, - "loss": 0.5746, + "learning_rate": 5.539125778337235e-05, + "loss": 0.4247, "step": 155000 }, { "epoch": 0.09, - "learning_rate": 5.538916621767403e-05, - "loss": 0.5825, + "learning_rate": 5.538916201774291e-05, + "loss": 0.4325, "step": 155500 }, { "epoch": 0.09, - "learning_rate": 5.5387066252113465e-05, - "loss": 0.5735, + "learning_rate": 5.538706205218234e-05, + "loss": 0.4211, "step": 156000 }, { "epoch": 0.09, - "learning_rate": 5.53849662865529e-05, - "loss": 0.5981, + "learning_rate": 5.538496208662178e-05, + "loss": 0.4278, "step": 156500 }, { "epoch": 0.09, "learning_rate": 5.538286632099234e-05, - "loss": 0.577, + "loss": 0.4199, "step": 157000 }, { "epoch": 0.09, "learning_rate": 5.538076635543177e-05, - "loss": 0.5785, + "loss": 0.426, "step": 157500 }, { "epoch": 0.09, - "learning_rate": 5.5378670589802326e-05, - "loss": 0.5823, + "learning_rate": 5.5378666389871206e-05, + "loss": 0.4256, "step": 158000 }, { "epoch": 0.1, - "learning_rate": 5.537657062424176e-05, - "loss": 0.585, + "learning_rate": 5.537656642431064e-05, + "loss": 0.4297, "step": 158500 }, { "epoch": 0.1, - "learning_rate": 5.53744706586812e-05, - "loss": 0.5801, + "learning_rate": 5.537446645875007e-05, + "loss": 0.422, "step": 159000 }, { "epoch": 0.1, - "learning_rate": 5.537237069312063e-05, - "loss": 0.5874, + "learning_rate": 5.5372366493189506e-05, + "loss": 0.427, "step": 159500 }, { "epoch": 0.1, - "learning_rate": 5.537027072756007e-05, - "loss": 0.5864, + "learning_rate": 5.537026652762895e-05, + "loss": 0.4286, "step": 160000 }, { "epoch": 0.1, - "learning_rate": 5.536817076199951e-05, - "loss": 0.5837, + "learning_rate": 5.536816656206838e-05, + "loss": 0.4255, "step": 160500 }, { "epoch": 0.1, - "learning_rate": 5.5366070796438934e-05, - "loss": 0.575, + "learning_rate": 5.5366066596507814e-05, + "loss": 0.4217, "step": 161000 }, { "epoch": 0.1, - "learning_rate": 5.5363970830878374e-05, - "loss": 0.5878, + "learning_rate": 5.5363966630947254e-05, + "loss": 0.4265, "step": 161500 }, { "epoch": 0.1, - "learning_rate": 5.5361875065248934e-05, - "loss": 0.5764, + "learning_rate": 5.536186666538669e-05, + "loss": 0.4225, "step": 162000 }, { "epoch": 0.1, - "learning_rate": 5.535977509968837e-05, - "loss": 0.578, + "learning_rate": 5.535976669982612e-05, + "loss": 0.4129, "step": 162500 }, { "epoch": 0.1, "learning_rate": 5.5357675134127794e-05, - "loss": 0.577, + "loss": 0.4189, "step": 163000 }, { "epoch": 0.1, - "learning_rate": 5.5355575168567235e-05, - "loss": 0.589, + "learning_rate": 5.5355579368498355e-05, + "loss": 0.4356, "step": 163500 }, { "epoch": 0.1, - "learning_rate": 5.5353479402937795e-05, - "loss": 0.5692, + "learning_rate": 5.535348360286891e-05, + "loss": 0.4138, "step": 164000 }, { "epoch": 0.1, - "learning_rate": 5.535137943737723e-05, - "loss": 0.5749, + "learning_rate": 5.535138363730835e-05, + "loss": 0.4174, "step": 164500 }, { "epoch": 0.1, - "learning_rate": 5.534927947181666e-05, - "loss": 0.5844, + "learning_rate": 5.534928367174778e-05, + "loss": 0.4203, "step": 165000 }, { "epoch": 0.1, - "learning_rate": 5.5347179506256095e-05, - "loss": 0.5632, + "learning_rate": 5.5347183706187216e-05, + "loss": 0.416, "step": 165500 }, { "epoch": 0.1, - "learning_rate": 5.534507954069553e-05, - "loss": 0.5652, + "learning_rate": 5.5345083740626656e-05, + "loss": 0.418, "step": 166000 }, { "epoch": 0.1, - "learning_rate": 5.534297957513496e-05, - "loss": 0.5785, + "learning_rate": 5.534298377506609e-05, + "loss": 0.4276, "step": 166500 }, { "epoch": 0.1, - "learning_rate": 5.53408796095744e-05, - "loss": 0.5914, + "learning_rate": 5.534088800943664e-05, + "loss": 0.426, "step": 167000 }, { "epoch": 0.1, - "learning_rate": 5.5338779644013836e-05, - "loss": 0.5692, + "learning_rate": 5.533878804387608e-05, + "loss": 0.421, "step": 167500 }, { "epoch": 0.1, - "learning_rate": 5.533668387838439e-05, - "loss": 0.5671, + "learning_rate": 5.5336688078315516e-05, + "loss": 0.4202, "step": 168000 }, { "epoch": 0.1, - "learning_rate": 5.533458391282383e-05, - "loss": 0.5776, + "learning_rate": 5.533458811275495e-05, + "loss": 0.4176, "step": 168500 }, { "epoch": 0.1, - "learning_rate": 5.533248394726326e-05, - "loss": 0.5768, + "learning_rate": 5.533248814719439e-05, + "loss": 0.418, "step": 169000 }, { "epoch": 0.1, - "learning_rate": 5.53303839817027e-05, - "loss": 0.5734, + "learning_rate": 5.5330388181633824e-05, + "loss": 0.4193, "step": 169500 }, { "epoch": 0.1, "learning_rate": 5.532828821607326e-05, - "loss": 0.5654, + "loss": 0.4184, "step": 170000 }, { "epoch": 0.1, "learning_rate": 5.532618825051269e-05, - "loss": 0.5818, + "loss": 0.4254, "step": 170500 }, { "epoch": 0.1, "learning_rate": 5.5324088284952124e-05, - "loss": 0.5662, + "loss": 0.4183, "step": 171000 }, { "epoch": 0.1, "learning_rate": 5.532198831939156e-05, - "loss": 0.5571, + "loss": 0.4116, "step": 171500 }, { "epoch": 0.1, "learning_rate": 5.5319888353831e-05, - "loss": 0.5798, + "loss": 0.4253, "step": 172000 }, { "epoch": 0.1, "learning_rate": 5.531778838827043e-05, - "loss": 0.575, + "loss": 0.4157, "step": 172500 }, { "epoch": 0.1, "learning_rate": 5.5315688422709865e-05, - "loss": 0.5658, + "loss": 0.4147, "step": 173000 }, { "epoch": 0.1, "learning_rate": 5.5313588457149305e-05, - "loss": 0.5768, + "loss": 0.418, "step": 173500 }, { "epoch": 0.1, "learning_rate": 5.531149269151986e-05, - "loss": 0.5787, + "loss": 0.4159, "step": 174000 }, { "epoch": 0.1, - "learning_rate": 5.530939272595929e-05, - "loss": 0.569, + "learning_rate": 5.530939692589042e-05, + "loss": 0.4249, "step": 174500 }, { "epoch": 0.1, - "learning_rate": 5.5307292760398725e-05, - "loss": 0.5707, + "learning_rate": 5.5307296960329845e-05, + "loss": 0.4197, "step": 175000 }, { "epoch": 0.11, - "learning_rate": 5.5305196994769286e-05, - "loss": 0.5553, + "learning_rate": 5.5305201194700406e-05, + "loss": 0.4136, "step": 175500 }, { "epoch": 0.11, - "learning_rate": 5.530309702920872e-05, - "loss": 0.5654, + "learning_rate": 5.5303101229139846e-05, + "loss": 0.4135, "step": 176000 }, { "epoch": 0.11, - "learning_rate": 5.530099706364815e-05, - "loss": 0.5799, + "learning_rate": 5.530100126357928e-05, + "loss": 0.4221, "step": 176500 }, { "epoch": 0.11, - "learning_rate": 5.529889709808759e-05, - "loss": 0.562, + "learning_rate": 5.529890129801871e-05, + "loss": 0.4109, "step": 177000 }, { "epoch": 0.11, - "learning_rate": 5.5296797132527026e-05, - "loss": 0.5681, + "learning_rate": 5.5296801332458146e-05, + "loss": 0.4158, "step": 177500 }, { "epoch": 0.11, - "learning_rate": 5.529469716696646e-05, - "loss": 0.5594, + "learning_rate": 5.529470136689758e-05, + "loss": 0.4116, "step": 178000 }, { "epoch": 0.11, - "learning_rate": 5.52925972014059e-05, - "loss": 0.5675, + "learning_rate": 5.529260140133701e-05, + "loss": 0.4113, "step": 178500 }, { "epoch": 0.11, - "learning_rate": 5.5290497235845333e-05, - "loss": 0.5614, + "learning_rate": 5.5290505635707574e-05, + "loss": 0.4084, "step": 179000 }, { "epoch": 0.11, - "learning_rate": 5.528840147021589e-05, - "loss": 0.572, + "learning_rate": 5.5288405670147014e-05, + "loss": 0.4126, "step": 179500 }, { "epoch": 0.11, - "learning_rate": 5.528630150465532e-05, - "loss": 0.5761, + "learning_rate": 5.528630570458644e-05, + "loss": 0.4181, "step": 180000 }, { "epoch": 0.11, - "learning_rate": 5.528420153909476e-05, - "loss": 0.564, + "learning_rate": 5.5284205739025874e-05, + "loss": 0.4165, "step": 180500 }, { "epoch": 0.11, - "learning_rate": 5.5282101573534194e-05, - "loss": 0.5656, + "learning_rate": 5.5282105773465314e-05, + "loss": 0.4147, "step": 181000 }, { "epoch": 0.11, "learning_rate": 5.528000580790475e-05, - "loss": 0.5641, + "loss": 0.4092, "step": 181500 }, { "epoch": 0.11, "learning_rate": 5.527790584234418e-05, - "loss": 0.55, + "loss": 0.4088, "step": 182000 }, { "epoch": 0.11, - "learning_rate": 5.527580587678362e-05, - "loss": 0.563, + "learning_rate": 5.527581007671474e-05, + "loss": 0.412, "step": 182500 }, { "epoch": 0.11, - "learning_rate": 5.5273705911223055e-05, - "loss": 0.5729, + "learning_rate": 5.5273710111154175e-05, + "loss": 0.419, "step": 183000 }, { "epoch": 0.11, "learning_rate": 5.527161014559361e-05, - "loss": 0.5712, + "loss": 0.4161, "step": 183500 }, { "epoch": 0.11, "learning_rate": 5.526951018003305e-05, - "loss": 0.564, + "loss": 0.4086, "step": 184000 }, { "epoch": 0.11, "learning_rate": 5.526741021447248e-05, - "loss": 0.5646, + "loss": 0.4079, "step": 184500 }, { "epoch": 0.11, - "learning_rate": 5.5265314448843036e-05, - "loss": 0.5621, + "learning_rate": 5.5265310248911916e-05, + "loss": 0.4116, "step": 185000 }, { "epoch": 0.11, "learning_rate": 5.526321448328247e-05, - "loss": 0.5797, + "loss": 0.4101, "step": 185500 }, { "epoch": 0.11, "learning_rate": 5.526111451772191e-05, - "loss": 0.5627, + "loss": 0.4155, "step": 186000 }, { "epoch": 0.11, "learning_rate": 5.525901455216134e-05, - "loss": 0.5543, + "loss": 0.4071, "step": 186500 }, { "epoch": 0.11, "learning_rate": 5.5256914586600776e-05, - "loss": 0.5549, + "loss": 0.4119, "step": 187000 }, { "epoch": 0.11, "learning_rate": 5.525481462104022e-05, - "loss": 0.561, + "loss": 0.4115, "step": 187500 }, { "epoch": 0.11, "learning_rate": 5.525271465547965e-05, - "loss": 0.5621, + "loss": 0.4148, "step": 188000 }, { "epoch": 0.11, "learning_rate": 5.5250614689919084e-05, - "loss": 0.5659, + "loss": 0.4143, "step": 188500 }, { "epoch": 0.11, - "learning_rate": 5.524851892428964e-05, - "loss": 0.5729, + "learning_rate": 5.5248514724358524e-05, + "loss": 0.4145, "step": 189000 }, { "epoch": 0.11, - "learning_rate": 5.524641895872908e-05, - "loss": 0.5564, + "learning_rate": 5.524641475879796e-05, + "loss": 0.4082, "step": 189500 }, { "epoch": 0.11, - "learning_rate": 5.524431899316851e-05, - "loss": 0.5502, + "learning_rate": 5.5244314793237384e-05, + "loss": 0.4026, "step": 190000 }, { "epoch": 0.11, - "learning_rate": 5.5242219027607944e-05, - "loss": 0.5512, + "learning_rate": 5.5242214827676824e-05, + "loss": 0.4091, "step": 190500 }, { "epoch": 0.11, - "learning_rate": 5.5240123261978505e-05, - "loss": 0.573, + "learning_rate": 5.524011486211626e-05, + "loss": 0.4181, "step": 191000 }, { "epoch": 0.11, - "learning_rate": 5.523802329641794e-05, - "loss": 0.5524, + "learning_rate": 5.523801489655569e-05, + "loss": 0.4103, "step": 191500 }, { "epoch": 0.12, - "learning_rate": 5.523592333085737e-05, - "loss": 0.5603, + "learning_rate": 5.523591493099513e-05, + "loss": 0.4039, "step": 192000 }, { "epoch": 0.12, - "learning_rate": 5.523382336529681e-05, - "loss": 0.5545, + "learning_rate": 5.5233814965434565e-05, + "loss": 0.4091, "step": 192500 }, { "epoch": 0.12, - "learning_rate": 5.5231723399736245e-05, - "loss": 0.5635, + "learning_rate": 5.523171919980512e-05, + "loss": 0.4057, "step": 193000 }, { "epoch": 0.12, - "learning_rate": 5.522962343417568e-05, - "loss": 0.5609, + "learning_rate": 5.522961923424456e-05, + "loss": 0.406, "step": 193500 }, { "epoch": 0.12, - "learning_rate": 5.522752346861512e-05, - "loss": 0.5582, + "learning_rate": 5.522751926868399e-05, + "loss": 0.4119, "step": 194000 }, { "epoch": 0.12, - "learning_rate": 5.522542770298567e-05, - "loss": 0.5688, + "learning_rate": 5.5225419303123425e-05, + "loss": 0.4022, "step": 194500 }, { "epoch": 0.12, - "learning_rate": 5.5223331937356226e-05, - "loss": 0.558, + "learning_rate": 5.5223319337562866e-05, + "loss": 0.4067, "step": 195000 }, { "epoch": 0.12, - "learning_rate": 5.522123197179566e-05, - "loss": 0.5551, + "learning_rate": 5.52212193720023e-05, + "loss": 0.405, "step": 195500 }, { "epoch": 0.12, - "learning_rate": 5.521913200623509e-05, - "loss": 0.5647, + "learning_rate": 5.521911940644173e-05, + "loss": 0.4099, "step": 196000 }, { "epoch": 0.12, - "learning_rate": 5.521703204067453e-05, - "loss": 0.5512, + "learning_rate": 5.5217023640812286e-05, + "loss": 0.4029, "step": 196500 }, { "epoch": 0.12, - "learning_rate": 5.521493207511397e-05, - "loss": 0.5618, + "learning_rate": 5.5214923675251726e-05, + "loss": 0.4078, "step": 197000 }, { "epoch": 0.12, - "learning_rate": 5.52128321095534e-05, - "loss": 0.5632, + "learning_rate": 5.521282370969116e-05, + "loss": 0.4075, "step": 197500 }, { "epoch": 0.12, - "learning_rate": 5.521073214399284e-05, - "loss": 0.5519, + "learning_rate": 5.521072374413059e-05, + "loss": 0.4093, "step": 198000 }, { "epoch": 0.12, - "learning_rate": 5.5208632178432274e-05, - "loss": 0.5499, + "learning_rate": 5.5208623778570034e-05, + "loss": 0.4008, "step": 198500 }, { "epoch": 0.12, - "learning_rate": 5.5206532212871714e-05, - "loss": 0.5545, + "learning_rate": 5.520652801294059e-05, + "loss": 0.4099, "step": 199000 }, { "epoch": 0.12, "learning_rate": 5.520443224731114e-05, - "loss": 0.5632, + "loss": 0.4116, "step": 199500 }, { "epoch": 0.12, "learning_rate": 5.5202332281750574e-05, - "loss": 0.5529, + "loss": 0.4025, "step": 200000 }, { "epoch": 0.12, - "eval_loss": 0.5203812718391418, - "eval_runtime": 1102.9266, - "eval_samples_per_second": 477.566, - "eval_steps_per_second": 79.595, + "eval_loss": 0.3602813482284546, + "eval_runtime": 1456.1894, + "eval_samples_per_second": 361.711, + "eval_steps_per_second": 60.285, "step": 200000 }, { "epoch": 0.12, "learning_rate": 5.5200232316190014e-05, - "loss": 0.5682, + "loss": 0.4132, "step": 200500 }, { "epoch": 0.12, "learning_rate": 5.519813235062945e-05, - "loss": 0.552, + "loss": 0.4045, "step": 201000 }, { "epoch": 0.12, - "learning_rate": 5.519603658500001e-05, - "loss": 0.5572, + "learning_rate": 5.519603238506888e-05, + "loss": 0.4019, "step": 201500 }, { "epoch": 0.12, - "learning_rate": 5.5193936619439435e-05, - "loss": 0.5527, + "learning_rate": 5.519393241950832e-05, + "loss": 0.4016, "step": 202000 }, { "epoch": 0.12, - "learning_rate": 5.5191836653878875e-05, - "loss": 0.558, + "learning_rate": 5.5191832453947755e-05, + "loss": 0.4016, "step": 202500 }, { "epoch": 0.12, - "learning_rate": 5.518973668831831e-05, - "loss": 0.5685, + "learning_rate": 5.518973248838719e-05, + "loss": 0.4096, "step": 203000 }, { "epoch": 0.12, - "learning_rate": 5.518764092268887e-05, - "loss": 0.552, + "learning_rate": 5.518763672275774e-05, + "loss": 0.4073, "step": 203500 }, { "epoch": 0.12, - "learning_rate": 5.51855409571283e-05, - "loss": 0.5501, + "learning_rate": 5.518553675719718e-05, + "loss": 0.3978, "step": 204000 }, { "epoch": 0.12, - "learning_rate": 5.5183440991567736e-05, - "loss": 0.5613, + "learning_rate": 5.5183436791636616e-05, + "loss": 0.4116, "step": 204500 }, { "epoch": 0.12, - "learning_rate": 5.518134102600717e-05, - "loss": 0.5478, + "learning_rate": 5.5181345225938296e-05, + "loss": 0.3938, "step": 205000 }, { "epoch": 0.12, "learning_rate": 5.517924526037773e-05, - "loss": 0.5551, + "loss": 0.4072, "step": 205500 }, { "epoch": 0.12, "learning_rate": 5.517714529481717e-05, - "loss": 0.562, + "loss": 0.4074, "step": 206000 }, { "epoch": 0.12, - "learning_rate": 5.5175049529187724e-05, - "loss": 0.5332, + "learning_rate": 5.5175045329256603e-05, + "loss": 0.3947, "step": 206500 }, { "epoch": 0.12, - "learning_rate": 5.517295376355828e-05, - "loss": 0.5469, + "learning_rate": 5.517294536369603e-05, + "loss": 0.4071, "step": 207000 }, { "epoch": 0.12, - "learning_rate": 5.517085379799771e-05, - "loss": 0.5506, + "learning_rate": 5.517084539813547e-05, + "loss": 0.401, "step": 207500 }, { "epoch": 0.12, - "learning_rate": 5.5168753832437144e-05, - "loss": 0.553, + "learning_rate": 5.5168745432574904e-05, + "loss": 0.4028, "step": 208000 }, { "epoch": 0.13, - "learning_rate": 5.5166653866876584e-05, - "loss": 0.5563, + "learning_rate": 5.516664546701434e-05, + "loss": 0.402, "step": 208500 }, { "epoch": 0.13, - "learning_rate": 5.516455810124714e-05, - "loss": 0.5437, + "learning_rate": 5.516454550145378e-05, + "loss": 0.404, "step": 209000 }, { "epoch": 0.13, - "learning_rate": 5.516245813568657e-05, - "loss": 0.5451, + "learning_rate": 5.516244553589321e-05, + "loss": 0.4008, "step": 209500 }, { "epoch": 0.13, - "learning_rate": 5.5160358170126005e-05, - "loss": 0.5472, + "learning_rate": 5.5160345570332644e-05, + "loss": 0.3951, "step": 210000 }, { "epoch": 0.13, - "learning_rate": 5.5158258204565445e-05, - "loss": 0.5546, + "learning_rate": 5.5158245604772085e-05, + "loss": 0.4028, "step": 210500 }, { "epoch": 0.13, - "learning_rate": 5.515615823900488e-05, - "loss": 0.5433, + "learning_rate": 5.515614563921152e-05, + "loss": 0.3955, "step": 211000 }, { "epoch": 0.13, - "learning_rate": 5.515405827344431e-05, - "loss": 0.5362, + "learning_rate": 5.515404987358207e-05, + "loss": 0.3963, "step": 211500 }, { "epoch": 0.13, - "learning_rate": 5.515195830788375e-05, - "loss": 0.5573, + "learning_rate": 5.5151954107952625e-05, + "loss": 0.4061, "step": 212000 }, { "epoch": 0.13, - "learning_rate": 5.5149858342323186e-05, - "loss": 0.5527, + "learning_rate": 5.5149854142392065e-05, + "loss": 0.3977, "step": 212500 }, { "epoch": 0.13, - "learning_rate": 5.5147758376762626e-05, - "loss": 0.5508, + "learning_rate": 5.51477541768315e-05, + "loss": 0.3988, "step": 213000 }, { "epoch": 0.13, - "learning_rate": 5.514565841120206e-05, - "loss": 0.5515, + "learning_rate": 5.514565421127093e-05, + "loss": 0.4016, "step": 213500 }, { "epoch": 0.13, - "learning_rate": 5.5143558445641486e-05, - "loss": 0.5471, + "learning_rate": 5.514355424571037e-05, + "loss": 0.3964, "step": 214000 }, { "epoch": 0.13, - "learning_rate": 5.5141458480080926e-05, - "loss": 0.544, + "learning_rate": 5.5141454280149806e-05, + "loss": 0.3928, "step": 214500 }, { "epoch": 0.13, - "learning_rate": 5.5139362714451487e-05, - "loss": 0.5456, + "learning_rate": 5.513935431458924e-05, + "loss": 0.3944, "step": 215000 }, { "epoch": 0.13, - "learning_rate": 5.513726274889092e-05, - "loss": 0.5522, + "learning_rate": 5.513725434902868e-05, + "loss": 0.3999, "step": 215500 }, { "epoch": 0.13, - "learning_rate": 5.5135162783330353e-05, - "loss": 0.5364, + "learning_rate": 5.513515438346811e-05, + "loss": 0.3927, "step": 216000 }, { "epoch": 0.13, - "learning_rate": 5.513306281776979e-05, - "loss": 0.5532, + "learning_rate": 5.513305861783867e-05, + "loss": 0.4025, "step": 216500 }, { "epoch": 0.13, - "learning_rate": 5.513096705214035e-05, - "loss": 0.5588, + "learning_rate": 5.51309586522781e-05, + "loss": 0.403, "step": 217000 }, { "epoch": 0.13, - "learning_rate": 5.512886708657978e-05, - "loss": 0.5398, + "learning_rate": 5.512885868671754e-05, + "loss": 0.3986, "step": 217500 }, { "epoch": 0.13, - "learning_rate": 5.5126767121019214e-05, - "loss": 0.5508, + "learning_rate": 5.5126758721156974e-05, + "loss": 0.4009, "step": 218000 }, { "epoch": 0.13, - "learning_rate": 5.512466715545865e-05, - "loss": 0.5418, + "learning_rate": 5.512465875559641e-05, + "loss": 0.3935, "step": 218500 }, { "epoch": 0.13, - "learning_rate": 5.512257138982921e-05, - "loss": 0.5413, + "learning_rate": 5.512255879003585e-05, + "loss": 0.3926, "step": 219000 }, { "epoch": 0.13, - "learning_rate": 5.512047142426864e-05, - "loss": 0.5562, + "learning_rate": 5.5120458824475274e-05, + "loss": 0.397, "step": 219500 }, { "epoch": 0.13, - "learning_rate": 5.511837145870808e-05, - "loss": 0.5446, + "learning_rate": 5.511835885891471e-05, + "loss": 0.3966, "step": 220000 }, { "epoch": 0.13, - "learning_rate": 5.5116271493147515e-05, - "loss": 0.5451, + "learning_rate": 5.511626729321639e-05, + "loss": 0.3969, "step": 220500 }, { "epoch": 0.13, - "learning_rate": 5.511417152758694e-05, - "loss": 0.5441, + "learning_rate": 5.511416732765583e-05, + "loss": 0.3954, "step": 221000 }, { "epoch": 0.13, - "learning_rate": 5.51120757619575e-05, - "loss": 0.54, + "learning_rate": 5.511206736209526e-05, + "loss": 0.395, "step": 221500 }, { "epoch": 0.13, - "learning_rate": 5.510997579639694e-05, - "loss": 0.5524, + "learning_rate": 5.5109967396534695e-05, + "loss": 0.4024, "step": 222000 }, { "epoch": 0.13, - "learning_rate": 5.5107875830836376e-05, - "loss": 0.5398, + "learning_rate": 5.5107867430974136e-05, + "loss": 0.3966, "step": 222500 }, { "epoch": 0.13, - "learning_rate": 5.510577586527581e-05, - "loss": 0.556, + "learning_rate": 5.510576746541357e-05, + "loss": 0.4017, "step": 223000 }, { "epoch": 0.13, - "learning_rate": 5.510368009964636e-05, - "loss": 0.5415, + "learning_rate": 5.5103667499853e-05, + "loss": 0.3943, "step": 223500 }, { "epoch": 0.13, - "learning_rate": 5.5101584334016917e-05, - "loss": 0.5344, + "learning_rate": 5.510156753429244e-05, + "loss": 0.3931, "step": 224000 }, { "epoch": 0.13, - "learning_rate": 5.509948436845636e-05, - "loss": 0.5388, + "learning_rate": 5.5099471768662996e-05, + "loss": 0.3857, "step": 224500 }, { "epoch": 0.13, - "learning_rate": 5.509738440289579e-05, - "loss": 0.5386, + "learning_rate": 5.509737180310243e-05, + "loss": 0.3974, "step": 225000 }, { "epoch": 0.14, - "learning_rate": 5.509528443733523e-05, - "loss": 0.5383, + "learning_rate": 5.509527183754186e-05, + "loss": 0.3892, "step": 225500 }, { "epoch": 0.14, - "learning_rate": 5.5093184471774664e-05, - "loss": 0.5424, + "learning_rate": 5.5093171871981304e-05, + "loss": 0.3923, "step": 226000 }, { "epoch": 0.14, - "learning_rate": 5.50910845062141e-05, - "loss": 0.5463, + "learning_rate": 5.509107610635186e-05, + "loss": 0.3939, "step": 226500 }, { "epoch": 0.14, - "learning_rate": 5.508898454065354e-05, - "loss": 0.5426, + "learning_rate": 5.508897614079129e-05, + "loss": 0.391, "step": 227000 }, { "epoch": 0.14, - "learning_rate": 5.508688457509297e-05, - "loss": 0.5406, + "learning_rate": 5.5086876175230724e-05, + "loss": 0.3907, "step": 227500 }, { "epoch": 0.14, - "learning_rate": 5.5084788809463525e-05, - "loss": 0.5405, + "learning_rate": 5.5084776209670164e-05, + "loss": 0.4009, "step": 228000 }, { "epoch": 0.14, - "learning_rate": 5.508268884390296e-05, - "loss": 0.5319, + "learning_rate": 5.50826762441096e-05, + "loss": 0.3896, "step": 228500 }, { "epoch": 0.14, - "learning_rate": 5.50805888783424e-05, - "loss": 0.5378, + "learning_rate": 5.508058047848015e-05, + "loss": 0.3965, "step": 229000 }, { "epoch": 0.14, - "learning_rate": 5.507848891278183e-05, - "loss": 0.5393, + "learning_rate": 5.507848051291959e-05, + "loss": 0.3958, "step": 229500 }, { "epoch": 0.14, - "learning_rate": 5.5076393147152385e-05, - "loss": 0.5587, + "learning_rate": 5.5076380547359025e-05, + "loss": 0.4004, "step": 230000 }, { "epoch": 0.14, - "learning_rate": 5.507429318159182e-05, - "loss": 0.545, + "learning_rate": 5.507428058179846e-05, + "loss": 0.3997, "step": 230500 }, { "epoch": 0.14, - "learning_rate": 5.507219321603126e-05, - "loss": 0.5439, + "learning_rate": 5.507218481616901e-05, + "loss": 0.3907, "step": 231000 }, { "epoch": 0.14, - "learning_rate": 5.507009325047069e-05, - "loss": 0.5283, + "learning_rate": 5.507008485060845e-05, + "loss": 0.3905, "step": 231500 }, { "epoch": 0.14, - "learning_rate": 5.5067997484841246e-05, - "loss": 0.538, + "learning_rate": 5.5067984885047886e-05, + "loss": 0.3881, "step": 232000 }, { "epoch": 0.14, - "learning_rate": 5.5065897519280686e-05, - "loss": 0.5309, + "learning_rate": 5.506588491948732e-05, + "loss": 0.3871, "step": 232500 }, { "epoch": 0.14, - "learning_rate": 5.506379755372012e-05, - "loss": 0.5331, + "learning_rate": 5.506378915385787e-05, + "loss": 0.3885, "step": 233000 }, { "epoch": 0.14, - "learning_rate": 5.506169758815955e-05, - "loss": 0.5485, + "learning_rate": 5.506169338822843e-05, + "loss": 0.3996, "step": 233500 }, { "epoch": 0.14, - "learning_rate": 5.505960182253011e-05, - "loss": 0.5486, + "learning_rate": 5.5059593422667867e-05, + "loss": 0.3862, "step": 234000 }, { "epoch": 0.14, - "learning_rate": 5.505750185696955e-05, - "loss": 0.5277, + "learning_rate": 5.50574934571073e-05, + "loss": 0.3844, "step": 234500 }, { "epoch": 0.14, - "learning_rate": 5.505540189140898e-05, - "loss": 0.5366, + "learning_rate": 5.505539349154674e-05, + "loss": 0.3904, "step": 235000 }, { "epoch": 0.14, - "learning_rate": 5.5053301925848414e-05, - "loss": 0.5368, + "learning_rate": 5.5053293525986174e-05, + "loss": 0.389, "step": 235500 }, { "epoch": 0.14, - "learning_rate": 5.5051201960287854e-05, - "loss": 0.5366, + "learning_rate": 5.505119356042561e-05, + "loss": 0.3865, "step": 236000 }, { "epoch": 0.14, - "learning_rate": 5.504910619465841e-05, - "loss": 0.5314, + "learning_rate": 5.504909359486505e-05, + "loss": 0.386, "step": 236500 }, { "epoch": 0.14, - "learning_rate": 5.504700622909784e-05, - "loss": 0.5275, + "learning_rate": 5.504699362930448e-05, + "loss": 0.3911, "step": 237000 }, { "epoch": 0.14, - "learning_rate": 5.5044910463468395e-05, - "loss": 0.5336, + "learning_rate": 5.5044893663743914e-05, + "loss": 0.3926, "step": 237500 }, { "epoch": 0.14, - "learning_rate": 5.5042810497907835e-05, - "loss": 0.5381, + "learning_rate": 5.504279789811447e-05, + "loss": 0.3917, "step": 238000 }, { "epoch": 0.14, - "learning_rate": 5.504071053234727e-05, - "loss": 0.5449, + "learning_rate": 5.504069793255391e-05, + "loss": 0.3949, "step": 238500 }, { "epoch": 0.14, - "learning_rate": 5.50386105667867e-05, - "loss": 0.5279, + "learning_rate": 5.503859796699334e-05, + "loss": 0.386, "step": 239000 }, { "epoch": 0.14, - "learning_rate": 5.503651060122614e-05, - "loss": 0.5159, + "learning_rate": 5.5036498001432775e-05, + "loss": 0.3814, "step": 239500 }, { "epoch": 0.14, - "learning_rate": 5.5034410635665576e-05, - "loss": 0.5428, + "learning_rate": 5.503440223580333e-05, + "loss": 0.3902, "step": 240000 }, { "epoch": 0.14, - "learning_rate": 5.503231067010501e-05, - "loss": 0.5263, + "learning_rate": 5.503230227024277e-05, + "loss": 0.3831, "step": 240500 }, { "epoch": 0.14, - "learning_rate": 5.503021070454445e-05, - "loss": 0.5401, + "learning_rate": 5.50302023046822e-05, + "loss": 0.3918, "step": 241000 }, { "epoch": 0.14, - "learning_rate": 5.5028114938915e-05, - "loss": 0.5342, + "learning_rate": 5.5028102339121636e-05, + "loss": 0.3876, "step": 241500 }, { "epoch": 0.15, - "learning_rate": 5.5026014973354436e-05, - "loss": 0.5334, + "learning_rate": 5.5026002373561076e-05, + "loss": 0.3936, "step": 242000 }, { "epoch": 0.15, - "learning_rate": 5.502391500779387e-05, - "loss": 0.5385, + "learning_rate": 5.502390240800051e-05, + "loss": 0.3855, "step": 242500 }, { "epoch": 0.15, - "learning_rate": 5.502181504223331e-05, - "loss": 0.5353, + "learning_rate": 5.502180244243994e-05, + "loss": 0.3869, "step": 243000 }, { "epoch": 0.15, - "learning_rate": 5.5019719276603864e-05, - "loss": 0.533, + "learning_rate": 5.5019702476879376e-05, + "loss": 0.3872, "step": 243500 }, { "epoch": 0.15, - "learning_rate": 5.50176193110433e-05, - "loss": 0.5379, + "learning_rate": 5.501760671124994e-05, + "loss": 0.3844, "step": 244000 }, { "epoch": 0.15, - "learning_rate": 5.501551934548273e-05, - "loss": 0.5242, + "learning_rate": 5.501550674568937e-05, + "loss": 0.3823, "step": 244500 }, { "epoch": 0.15, - "learning_rate": 5.501341937992217e-05, - "loss": 0.5278, + "learning_rate": 5.501340678012881e-05, + "loss": 0.3898, "step": 245000 }, { "epoch": 0.15, - "learning_rate": 5.5011323614292724e-05, - "loss": 0.5375, + "learning_rate": 5.501130681456824e-05, + "loss": 0.3866, "step": 245500 }, { "epoch": 0.15, - "learning_rate": 5.500922364873216e-05, - "loss": 0.5347, + "learning_rate": 5.50092110489388e-05, + "loss": 0.3907, "step": 246000 }, { "epoch": 0.15, - "learning_rate": 5.500712788310271e-05, - "loss": 0.5258, + "learning_rate": 5.500711108337823e-05, + "loss": 0.3872, "step": 246500 }, { "epoch": 0.15, - "learning_rate": 5.500502791754215e-05, - "loss": 0.5321, + "learning_rate": 5.500501111781767e-05, + "loss": 0.3857, "step": 247000 }, { "epoch": 0.15, - "learning_rate": 5.5002927951981585e-05, - "loss": 0.5381, + "learning_rate": 5.5002911152257105e-05, + "loss": 0.3876, "step": 247500 }, { "epoch": 0.15, - "learning_rate": 5.500082798642102e-05, - "loss": 0.5345, + "learning_rate": 5.500081118669653e-05, + "loss": 0.3908, "step": 248000 }, { "epoch": 0.15, - "learning_rate": 5.499872802086046e-05, - "loss": 0.5367, + "learning_rate": 5.499871542106709e-05, + "loss": 0.3881, "step": 248500 }, { "epoch": 0.15, - "learning_rate": 5.499662805529989e-05, - "loss": 0.54, + "learning_rate": 5.499661545550653e-05, + "loss": 0.3939, "step": 249000 }, { "epoch": 0.15, - "learning_rate": 5.4994528089739326e-05, - "loss": 0.5262, + "learning_rate": 5.4994515489945965e-05, + "loss": 0.386, "step": 249500 }, { "epoch": 0.15, - "learning_rate": 5.499243232410988e-05, - "loss": 0.5303, + "learning_rate": 5.49924155243854e-05, + "loss": 0.3843, "step": 250000 }, { "epoch": 0.15, - "learning_rate": 5.499033235854932e-05, - "loss": 0.5282, + "learning_rate": 5.499031555882483e-05, + "loss": 0.3861, "step": 250500 }, { "epoch": 0.15, - "learning_rate": 5.498823239298875e-05, - "loss": 0.5223, + "learning_rate": 5.4988215593264266e-05, + "loss": 0.3855, "step": 251000 }, { "epoch": 0.15, - "learning_rate": 5.4986136627359307e-05, - "loss": 0.5503, + "learning_rate": 5.4986119827634826e-05, + "loss": 0.4056, "step": 251500 }, { "epoch": 0.15, - "learning_rate": 5.498403666179875e-05, - "loss": 0.5315, + "learning_rate": 5.4984019862074266e-05, + "loss": 0.3836, "step": 252000 }, { "epoch": 0.15, - "learning_rate": 5.498193669623818e-05, - "loss": 0.5339, + "learning_rate": 5.49819198965137e-05, + "loss": 0.3884, "step": 252500 }, { "epoch": 0.15, - "learning_rate": 5.4979836730677614e-05, - "loss": 0.5316, + "learning_rate": 5.4979819930953126e-05, + "loss": 0.3852, "step": 253000 }, { "epoch": 0.15, - "learning_rate": 5.4977736765117054e-05, - "loss": 0.5194, + "learning_rate": 5.497771996539257e-05, + "loss": 0.3754, "step": 253500 }, { "epoch": 0.15, - "learning_rate": 5.497563679955649e-05, - "loss": 0.5397, + "learning_rate": 5.4975619999832e-05, + "loss": 0.3869, "step": 254000 }, { "epoch": 0.15, - "learning_rate": 5.497353683399592e-05, - "loss": 0.521, + "learning_rate": 5.497352423420256e-05, + "loss": 0.3813, "step": 254500 }, { "epoch": 0.15, - "learning_rate": 5.497143686843536e-05, - "loss": 0.5335, + "learning_rate": 5.4971424268641994e-05, + "loss": 0.3857, "step": 255000 }, { "epoch": 0.15, - "learning_rate": 5.4969336902874795e-05, - "loss": 0.5279, + "learning_rate": 5.496932850301255e-05, + "loss": 0.3887, "step": 255500 }, { "epoch": 0.15, - "learning_rate": 5.496723693731423e-05, - "loss": 0.5201, + "learning_rate": 5.496722853745199e-05, + "loss": 0.3822, "step": 256000 }, { "epoch": 0.15, - "learning_rate": 5.496513697175367e-05, - "loss": 0.5292, + "learning_rate": 5.496512857189142e-05, + "loss": 0.3857, "step": 256500 }, { "epoch": 0.15, - "learning_rate": 5.4963037006193095e-05, - "loss": 0.5234, + "learning_rate": 5.4963028606330855e-05, + "loss": 0.3863, "step": 257000 }, { "epoch": 0.15, - "learning_rate": 5.4960941240563655e-05, - "loss": 0.5251, + "learning_rate": 5.496092864077029e-05, + "loss": 0.3857, "step": 257500 }, { "epoch": 0.15, - "learning_rate": 5.495884127500309e-05, - "loss": 0.5207, + "learning_rate": 5.495882867520972e-05, + "loss": 0.3786, "step": 258000 }, { "epoch": 0.15, - "learning_rate": 5.495674130944253e-05, - "loss": 0.5232, + "learning_rate": 5.495672870964916e-05, + "loss": 0.3852, "step": 258500 }, { "epoch": 0.16, - "learning_rate": 5.495464134388196e-05, - "loss": 0.5224, + "learning_rate": 5.4954628744088595e-05, + "loss": 0.3803, "step": 259000 }, { "epoch": 0.16, - "learning_rate": 5.4952545578252516e-05, - "loss": 0.515, + "learning_rate": 5.495252877852803e-05, + "loss": 0.3777, "step": 259500 }, { "epoch": 0.16, - "learning_rate": 5.495044561269195e-05, - "loss": 0.5319, + "learning_rate": 5.495043301289858e-05, + "loss": 0.3912, "step": 260000 }, { "epoch": 0.16, - "learning_rate": 5.494834564713139e-05, - "loss": 0.5267, + "learning_rate": 5.494833304733802e-05, + "loss": 0.3779, "step": 260500 }, { "epoch": 0.16, - "learning_rate": 5.494624568157082e-05, - "loss": 0.5378, + "learning_rate": 5.4946233081777456e-05, + "loss": 0.3847, "step": 261000 }, { "epoch": 0.16, - "learning_rate": 5.494414991594138e-05, - "loss": 0.5316, + "learning_rate": 5.494413311621689e-05, + "loss": 0.3845, "step": 261500 }, { "epoch": 0.16, - "learning_rate": 5.494204995038082e-05, - "loss": 0.5228, + "learning_rate": 5.494203735058745e-05, + "loss": 0.3814, "step": 262000 }, { "epoch": 0.16, - "learning_rate": 5.493995418475137e-05, - "loss": 0.5419, + "learning_rate": 5.4939941584958003e-05, + "loss": 0.3919, "step": 262500 }, { "epoch": 0.16, - "learning_rate": 5.4937854219190804e-05, - "loss": 0.5258, + "learning_rate": 5.4937841619397444e-05, + "loss": 0.3812, "step": 263000 }, { "epoch": 0.16, - "learning_rate": 5.493575425363024e-05, - "loss": 0.529, + "learning_rate": 5.493574165383688e-05, + "loss": 0.3784, "step": 263500 }, { "epoch": 0.16, - "learning_rate": 5.493365428806968e-05, - "loss": 0.523, + "learning_rate": 5.493364168827631e-05, + "loss": 0.3857, "step": 264000 }, { "epoch": 0.16, - "learning_rate": 5.493155432250911e-05, - "loss": 0.5303, + "learning_rate": 5.493154172271575e-05, + "loss": 0.3831, "step": 264500 }, { "epoch": 0.16, - "learning_rate": 5.4929454356948545e-05, - "loss": 0.5383, + "learning_rate": 5.492944175715518e-05, + "loss": 0.3902, "step": 265000 }, { "epoch": 0.16, - "learning_rate": 5.49273585913191e-05, - "loss": 0.5379, + "learning_rate": 5.492734179159462e-05, + "loss": 0.3797, "step": 265500 }, { "epoch": 0.16, - "learning_rate": 5.492525862575854e-05, - "loss": 0.5254, + "learning_rate": 5.492524182603405e-05, + "loss": 0.3822, "step": 266000 }, { "epoch": 0.16, - "learning_rate": 5.492315866019797e-05, - "loss": 0.5297, + "learning_rate": 5.4923141860473485e-05, + "loss": 0.3855, "step": 266500 }, { "epoch": 0.16, - "learning_rate": 5.4921058694637405e-05, - "loss": 0.5203, + "learning_rate": 5.4921041894912925e-05, + "loss": 0.3745, "step": 267000 }, { "epoch": 0.16, - "learning_rate": 5.4918958729076846e-05, - "loss": 0.5228, + "learning_rate": 5.491894192935236e-05, + "loss": 0.3789, "step": 267500 }, { "epoch": 0.16, - "learning_rate": 5.491685876351628e-05, - "loss": 0.5192, + "learning_rate": 5.491684196379179e-05, + "loss": 0.3772, "step": 268000 }, { "epoch": 0.16, - "learning_rate": 5.491475879795571e-05, - "loss": 0.5326, + "learning_rate": 5.4914746198162345e-05, + "loss": 0.3891, "step": 268500 }, { "epoch": 0.16, - "learning_rate": 5.4912658832395146e-05, - "loss": 0.5255, + "learning_rate": 5.4912654632464026e-05, + "loss": 0.3847, "step": 269000 }, { "epoch": 0.16, - "learning_rate": 5.4910563066765706e-05, - "loss": 0.5241, + "learning_rate": 5.491055466690346e-05, + "loss": 0.3832, "step": 269500 }, { "epoch": 0.16, - "learning_rate": 5.490846310120514e-05, - "loss": 0.5347, + "learning_rate": 5.49084547013429e-05, + "loss": 0.3845, "step": 270000 }, { "epoch": 0.16, - "learning_rate": 5.490636733557569e-05, - "loss": 0.5259, + "learning_rate": 5.490635473578233e-05, + "loss": 0.3781, "step": 270500 }, { "epoch": 0.16, - "learning_rate": 5.490427156994625e-05, - "loss": 0.5275, + "learning_rate": 5.490425477022177e-05, + "loss": 0.3854, "step": 271000 }, { "epoch": 0.16, - "learning_rate": 5.490217160438569e-05, - "loss": 0.5208, + "learning_rate": 5.490215480466121e-05, + "loss": 0.3885, "step": 271500 }, { "epoch": 0.16, - "learning_rate": 5.490007163882512e-05, - "loss": 0.5212, + "learning_rate": 5.490005483910063e-05, + "loss": 0.3785, "step": 272000 }, { "epoch": 0.16, - "learning_rate": 5.4897971673264554e-05, - "loss": 0.519, + "learning_rate": 5.4897954873540074e-05, + "loss": 0.3795, "step": 272500 }, { "epoch": 0.16, - "learning_rate": 5.4895871707703994e-05, - "loss": 0.523, + "learning_rate": 5.489585490797951e-05, + "loss": 0.3781, "step": 273000 }, { "epoch": 0.16, - "learning_rate": 5.489377594207455e-05, - "loss": 0.5421, + "learning_rate": 5.489375914235007e-05, + "loss": 0.3885, "step": 273500 }, { "epoch": 0.16, - "learning_rate": 5.489167597651398e-05, - "loss": 0.5139, + "learning_rate": 5.48916591767895e-05, + "loss": 0.3703, "step": 274000 }, { "epoch": 0.16, - "learning_rate": 5.488957601095342e-05, - "loss": 0.5294, + "learning_rate": 5.4889559211228934e-05, + "loss": 0.3871, "step": 274500 }, { "epoch": 0.16, - "learning_rate": 5.4887476045392855e-05, - "loss": 0.5096, + "learning_rate": 5.488745924566837e-05, + "loss": 0.3716, "step": 275000 }, { "epoch": 0.17, - "learning_rate": 5.488537607983229e-05, - "loss": 0.5301, + "learning_rate": 5.48853592801078e-05, + "loss": 0.3856, "step": 275500 }, { "epoch": 0.17, - "learning_rate": 5.488327611427173e-05, - "loss": 0.529, + "learning_rate": 5.488325931454724e-05, + "loss": 0.3807, "step": 276000 }, { "epoch": 0.17, - "learning_rate": 5.488117614871116e-05, - "loss": 0.5144, + "learning_rate": 5.4881159348986675e-05, + "loss": 0.3749, "step": 276500 }, { "epoch": 0.17, - "learning_rate": 5.4879076183150596e-05, - "loss": 0.5175, + "learning_rate": 5.487906358335723e-05, + "loss": 0.3797, "step": 277000 }, { "epoch": 0.17, - "learning_rate": 5.4876976217590036e-05, - "loss": 0.5146, + "learning_rate": 5.487696361779667e-05, + "loss": 0.3742, "step": 277500 }, { "epoch": 0.17, - "learning_rate": 5.487487625202947e-05, - "loss": 0.5327, + "learning_rate": 5.48748636522361e-05, + "loss": 0.3765, "step": 278000 }, { "epoch": 0.17, - "learning_rate": 5.4872776286468896e-05, - "loss": 0.5261, + "learning_rate": 5.4872763686675536e-05, + "loss": 0.3789, "step": 278500 }, { "epoch": 0.17, - "learning_rate": 5.4870676320908336e-05, - "loss": 0.5025, + "learning_rate": 5.487066792104609e-05, + "loss": 0.3689, "step": 279000 }, { "epoch": 0.17, - "learning_rate": 5.48685805552789e-05, - "loss": 0.532, + "learning_rate": 5.486857215541665e-05, + "loss": 0.3921, "step": 279500 }, { "epoch": 0.17, - "learning_rate": 5.486648058971833e-05, - "loss": 0.5191, + "learning_rate": 5.486647218985609e-05, + "loss": 0.3782, "step": 280000 }, { "epoch": 0.17, - "learning_rate": 5.486438062415776e-05, - "loss": 0.5193, + "learning_rate": 5.486437222429552e-05, + "loss": 0.381, "step": 280500 }, { "epoch": 0.17, - "learning_rate": 5.48622806585972e-05, - "loss": 0.5327, + "learning_rate": 5.486227225873496e-05, + "loss": 0.3824, "step": 281000 }, { "epoch": 0.17, - "learning_rate": 5.486018069303663e-05, - "loss": 0.5121, + "learning_rate": 5.486017229317439e-05, + "loss": 0.3788, "step": 281500 }, { "epoch": 0.17, - "learning_rate": 5.485808492740719e-05, - "loss": 0.5212, + "learning_rate": 5.4858072327613824e-05, + "loss": 0.3762, "step": 282000 }, { "epoch": 0.17, - "learning_rate": 5.4855984961846624e-05, - "loss": 0.5116, + "learning_rate": 5.485597236205326e-05, + "loss": 0.3735, "step": 282500 }, { "epoch": 0.17, - "learning_rate": 5.4853889196217185e-05, - "loss": 0.5175, + "learning_rate": 5.48538723964927e-05, + "loss": 0.3736, "step": 283000 }, { "epoch": 0.17, - "learning_rate": 5.485178923065662e-05, - "loss": 0.5248, + "learning_rate": 5.485177243093213e-05, + "loss": 0.3799, "step": 283500 }, { "epoch": 0.17, - "learning_rate": 5.484968926509605e-05, - "loss": 0.5194, + "learning_rate": 5.4849672465371564e-05, + "loss": 0.376, "step": 284000 }, { "epoch": 0.17, - "learning_rate": 5.484758929953549e-05, - "loss": 0.518, + "learning_rate": 5.4847576699742125e-05, + "loss": 0.3807, "step": 284500 }, { "epoch": 0.17, - "learning_rate": 5.4845489333974925e-05, - "loss": 0.5274, + "learning_rate": 5.484547673418156e-05, + "loss": 0.3782, "step": 285000 }, { "epoch": 0.17, - "learning_rate": 5.484338936841435e-05, - "loss": 0.5129, + "learning_rate": 5.484337676862099e-05, + "loss": 0.371, "step": 285500 }, { "epoch": 0.17, - "learning_rate": 5.484128940285379e-05, - "loss": 0.5142, + "learning_rate": 5.484127680306043e-05, + "loss": 0.3756, "step": 286000 }, { "epoch": 0.17, - "learning_rate": 5.4839189437293226e-05, - "loss": 0.5305, + "learning_rate": 5.4839176837499865e-05, + "loss": 0.3862, "step": 286500 }, { "epoch": 0.17, - "learning_rate": 5.4837093671663786e-05, - "loss": 0.5205, + "learning_rate": 5.48370768719393e-05, + "loss": 0.3722, "step": 287000 }, { "epoch": 0.17, - "learning_rate": 5.483499370610322e-05, - "loss": 0.5129, + "learning_rate": 5.483498110630985e-05, + "loss": 0.3739, "step": 287500 }, { "epoch": 0.17, - "learning_rate": 5.483289794047377e-05, - "loss": 0.505, + "learning_rate": 5.483288114074929e-05, + "loss": 0.3694, "step": 288000 }, { "epoch": 0.17, - "learning_rate": 5.483079797491321e-05, - "loss": 0.5062, + "learning_rate": 5.4830781175188726e-05, + "loss": 0.3744, "step": 288500 }, { "epoch": 0.17, - "learning_rate": 5.482869800935265e-05, - "loss": 0.5175, + "learning_rate": 5.482868120962816e-05, + "loss": 0.3743, "step": 289000 }, { "epoch": 0.17, - "learning_rate": 5.482659804379208e-05, - "loss": 0.5239, + "learning_rate": 5.48265812440676e-05, + "loss": 0.3812, "step": 289500 }, { "epoch": 0.17, - "learning_rate": 5.482449807823152e-05, - "loss": 0.5136, + "learning_rate": 5.482448127850703e-05, + "loss": 0.3712, "step": 290000 }, { "epoch": 0.17, - "learning_rate": 5.482239811267095e-05, - "loss": 0.5101, + "learning_rate": 5.482238551287759e-05, + "loss": 0.3719, "step": 290500 }, { "epoch": 0.17, - "learning_rate": 5.482029814711039e-05, - "loss": 0.5223, + "learning_rate": 5.482028974724814e-05, + "loss": 0.393, "step": 291000 }, { "epoch": 0.17, - "learning_rate": 5.481819818154982e-05, - "loss": 0.5275, + "learning_rate": 5.481818978168758e-05, + "loss": 0.3833, "step": 291500 }, { "epoch": 0.18, - "learning_rate": 5.481610241592038e-05, - "loss": 0.5201, + "learning_rate": 5.4816089816127014e-05, + "loss": 0.3784, "step": 292000 }, { "epoch": 0.18, - "learning_rate": 5.481400245035981e-05, - "loss": 0.504, + "learning_rate": 5.481398985056645e-05, + "loss": 0.3692, "step": 292500 }, { "epoch": 0.18, - "learning_rate": 5.481190668473037e-05, - "loss": 0.5262, + "learning_rate": 5.481188988500589e-05, + "loss": 0.3795, "step": 293000 }, { "epoch": 0.18, - "learning_rate": 5.480980671916981e-05, - "loss": 0.5258, + "learning_rate": 5.480978991944532e-05, + "loss": 0.3779, "step": 293500 }, { "epoch": 0.18, - "learning_rate": 5.480770675360924e-05, - "loss": 0.5064, + "learning_rate": 5.4807689953884755e-05, + "loss": 0.3659, "step": 294000 }, { "epoch": 0.18, - "learning_rate": 5.4805606788048675e-05, - "loss": 0.5209, + "learning_rate": 5.4805589988324195e-05, + "loss": 0.3801, "step": 294500 }, { "epoch": 0.18, - "learning_rate": 5.480350682248811e-05, - "loss": 0.5096, + "learning_rate": 5.480349002276363e-05, + "loss": 0.3774, "step": 295000 }, { "epoch": 0.18, - "learning_rate": 5.480140685692754e-05, - "loss": 0.5157, + "learning_rate": 5.480139005720306e-05, + "loss": 0.3727, "step": 295500 }, { "epoch": 0.18, - "learning_rate": 5.4799306891366976e-05, - "loss": 0.5102, + "learning_rate": 5.47992900916425e-05, + "loss": 0.3762, "step": 296000 }, { "epoch": 0.18, - "learning_rate": 5.4797206925806416e-05, - "loss": 0.5095, + "learning_rate": 5.479719012608193e-05, + "loss": 0.3696, "step": 296500 }, { "epoch": 0.18, - "learning_rate": 5.4795111160176976e-05, - "loss": 0.5077, + "learning_rate": 5.479509016052136e-05, + "loss": 0.375, "step": 297000 }, { "epoch": 0.18, - "learning_rate": 5.47930111946164e-05, - "loss": 0.5097, + "learning_rate": 5.47929901949608e-05, + "loss": 0.3706, "step": 297500 }, { "epoch": 0.18, - "learning_rate": 5.479091122905584e-05, - "loss": 0.5095, + "learning_rate": 5.4790890229400236e-05, + "loss": 0.3694, "step": 298000 }, { "epoch": 0.18, - "learning_rate": 5.478881126349528e-05, - "loss": 0.502, + "learning_rate": 5.478879026383967e-05, + "loss": 0.3695, "step": 298500 }, { "epoch": 0.18, - "learning_rate": 5.478671129793471e-05, - "loss": 0.526, + "learning_rate": 5.478669029827911e-05, + "loss": 0.3763, "step": 299000 }, { "epoch": 0.18, - "learning_rate": 5.478461553230527e-05, - "loss": 0.5113, + "learning_rate": 5.478459033271854e-05, + "loss": 0.3744, "step": 299500 }, { "epoch": 0.18, - "learning_rate": 5.4782515566744704e-05, - "loss": 0.5203, + "learning_rate": 5.4782494567089097e-05, + "loss": 0.3787, "step": 300000 }, { "epoch": 0.18, - "eval_loss": 0.4822831451892853, - "eval_runtime": 1102.9313, - "eval_samples_per_second": 477.564, - "eval_steps_per_second": 79.594, + "eval_loss": 0.3329967260360718, + "eval_runtime": 1465.3298, + "eval_samples_per_second": 359.455, + "eval_steps_per_second": 59.909, "step": 300000 }, { "epoch": 0.18, - "learning_rate": 5.478041560118414e-05, - "loss": 0.5233, + "learning_rate": 5.478039880145966e-05, + "loss": 0.3786, "step": 300500 }, { "epoch": 0.18, - "learning_rate": 5.477831563562357e-05, - "loss": 0.5089, + "learning_rate": 5.477829883589909e-05, + "loss": 0.3774, "step": 301000 }, { "epoch": 0.18, - "learning_rate": 5.477621986999413e-05, - "loss": 0.5203, + "learning_rate": 5.4776198870338524e-05, + "loss": 0.3757, "step": 301500 }, { "epoch": 0.18, - "learning_rate": 5.4774119904433565e-05, - "loss": 0.5129, + "learning_rate": 5.4774103104709084e-05, + "loss": 0.3754, "step": 302000 }, { "epoch": 0.18, - "learning_rate": 5.4772019938873e-05, - "loss": 0.5119, + "learning_rate": 5.477200313914852e-05, + "loss": 0.3731, "step": 302500 }, { "epoch": 0.18, - "learning_rate": 5.476991997331244e-05, - "loss": 0.5185, + "learning_rate": 5.476990317358796e-05, + "loss": 0.3757, "step": 303000 }, { "epoch": 0.18, - "learning_rate": 5.4767824207683e-05, - "loss": 0.5022, + "learning_rate": 5.4767803208027385e-05, + "loss": 0.3702, "step": 303500 }, { "epoch": 0.18, - "learning_rate": 5.476572424212243e-05, - "loss": 0.5123, + "learning_rate": 5.476570324246682e-05, + "loss": 0.3719, "step": 304000 }, { "epoch": 0.18, - "learning_rate": 5.476362427656186e-05, - "loss": 0.5113, + "learning_rate": 5.476360747683738e-05, + "loss": 0.3744, "step": 304500 }, { "epoch": 0.18, - "learning_rate": 5.47615243110013e-05, - "loss": 0.515, + "learning_rate": 5.476150751127682e-05, + "loss": 0.3691, "step": 305000 }, { "epoch": 0.18, - "learning_rate": 5.475942854537186e-05, - "loss": 0.511, + "learning_rate": 5.475940754571625e-05, + "loss": 0.366, "step": 305500 }, { "epoch": 0.18, - "learning_rate": 5.475732857981129e-05, - "loss": 0.5163, + "learning_rate": 5.475730758015568e-05, + "loss": 0.3794, "step": 306000 }, { "epoch": 0.18, - "learning_rate": 5.4755232814181846e-05, - "loss": 0.5108, + "learning_rate": 5.475520761459512e-05, + "loss": 0.3715, "step": 306500 }, { "epoch": 0.18, - "learning_rate": 5.475313284862128e-05, - "loss": 0.5065, + "learning_rate": 5.475310764903455e-05, + "loss": 0.3741, "step": 307000 }, { "epoch": 0.18, - "learning_rate": 5.475103288306072e-05, - "loss": 0.5166, + "learning_rate": 5.4751007683473986e-05, + "loss": 0.3776, "step": 307500 }, { "epoch": 0.18, - "learning_rate": 5.4748932917500154e-05, - "loss": 0.5103, + "learning_rate": 5.4748907717913426e-05, + "loss": 0.3714, "step": 308000 }, { "epoch": 0.18, - "learning_rate": 5.474683295193959e-05, - "loss": 0.5047, + "learning_rate": 5.474680775235286e-05, + "loss": 0.3667, "step": 308500 }, { "epoch": 0.19, - "learning_rate": 5.474473298637903e-05, - "loss": 0.5089, + "learning_rate": 5.474471198672341e-05, + "loss": 0.3753, "step": 309000 }, { "epoch": 0.19, - "learning_rate": 5.4742633020818454e-05, - "loss": 0.523, + "learning_rate": 5.474261202116285e-05, + "loss": 0.3742, "step": 309500 }, { "epoch": 0.19, - "learning_rate": 5.4740533055257894e-05, - "loss": 0.5083, + "learning_rate": 5.4740516255533414e-05, + "loss": 0.3634, "step": 310000 }, { "epoch": 0.19, - "learning_rate": 5.4738437289628455e-05, - "loss": 0.5164, + "learning_rate": 5.473841628997285e-05, + "loss": 0.377, "step": 310500 }, { "epoch": 0.19, - "learning_rate": 5.473633732406789e-05, - "loss": 0.5302, + "learning_rate": 5.4736316324412274e-05, + "loss": 0.3744, "step": 311000 }, { "epoch": 0.19, - "learning_rate": 5.4734237358507315e-05, - "loss": 0.5004, + "learning_rate": 5.4734216358851714e-05, + "loss": 0.3654, "step": 311500 }, { "epoch": 0.19, - "learning_rate": 5.4732137392946755e-05, - "loss": 0.5141, + "learning_rate": 5.473211639329115e-05, + "loss": 0.3715, "step": 312000 }, { "epoch": 0.19, - "learning_rate": 5.4730041627317315e-05, - "loss": 0.5105, + "learning_rate": 5.473001642773058e-05, + "loss": 0.3699, "step": 312500 }, { "epoch": 0.19, - "learning_rate": 5.472794166175675e-05, - "loss": 0.5103, + "learning_rate": 5.472791646217002e-05, + "loss": 0.3721, "step": 313000 }, { "epoch": 0.19, - "learning_rate": 5.472584169619618e-05, - "loss": 0.5096, + "learning_rate": 5.4725816496609455e-05, + "loss": 0.3746, "step": 313500 }, { "epoch": 0.19, - "learning_rate": 5.4723741730635616e-05, - "loss": 0.5134, + "learning_rate": 5.472371653104889e-05, + "loss": 0.3657, "step": 314000 }, { "epoch": 0.19, - "learning_rate": 5.4721645965006176e-05, - "loss": 0.4984, + "learning_rate": 5.472162076541944e-05, + "loss": 0.3635, "step": 314500 }, { "epoch": 0.19, - "learning_rate": 5.471954599944561e-05, - "loss": 0.5093, + "learning_rate": 5.471952079985888e-05, + "loss": 0.3697, "step": 315000 }, { "epoch": 0.19, - "learning_rate": 5.471744603388504e-05, - "loss": 0.5057, + "learning_rate": 5.4717420834298315e-05, + "loss": 0.3674, "step": 315500 }, { "epoch": 0.19, - "learning_rate": 5.471534606832448e-05, - "loss": 0.5081, + "learning_rate": 5.471532086873775e-05, + "loss": 0.3707, "step": 316000 }, { "epoch": 0.19, - "learning_rate": 5.471325030269504e-05, - "loss": 0.5256, + "learning_rate": 5.471322510310831e-05, + "loss": 0.3764, "step": 316500 }, { "epoch": 0.19, - "learning_rate": 5.471115033713447e-05, - "loss": 0.5071, + "learning_rate": 5.471112513754774e-05, + "loss": 0.375, "step": 317000 }, { "epoch": 0.19, - "learning_rate": 5.470905037157391e-05, - "loss": 0.5014, + "learning_rate": 5.4709025171987176e-05, + "loss": 0.3674, "step": 317500 }, { "epoch": 0.19, - "learning_rate": 5.4706954605944464e-05, - "loss": 0.5024, + "learning_rate": 5.4706925206426616e-05, + "loss": 0.3647, "step": 318000 }, { "epoch": 0.19, - "learning_rate": 5.470485884031502e-05, - "loss": 0.503, + "learning_rate": 5.470482944079717e-05, + "loss": 0.3663, "step": 318500 }, { "epoch": 0.19, - "learning_rate": 5.470275887475445e-05, - "loss": 0.5149, + "learning_rate": 5.4702729475236603e-05, + "loss": 0.3693, "step": 319000 }, { "epoch": 0.19, - "learning_rate": 5.4700658909193884e-05, - "loss": 0.5047, + "learning_rate": 5.470062950967604e-05, + "loss": 0.3781, "step": 319500 }, { "epoch": 0.19, - "learning_rate": 5.4698558943633325e-05, - "loss": 0.5071, + "learning_rate": 5.469852954411548e-05, + "loss": 0.3702, "step": 320000 }, { "epoch": 0.19, - "learning_rate": 5.469645897807276e-05, - "loss": 0.5055, + "learning_rate": 5.469642957855491e-05, + "loss": 0.3675, "step": 320500 }, { "epoch": 0.19, - "learning_rate": 5.469435901251219e-05, - "loss": 0.4914, + "learning_rate": 5.4694329612994344e-05, + "loss": 0.3659, "step": 321000 }, { "epoch": 0.19, - "learning_rate": 5.469225904695163e-05, - "loss": 0.5039, + "learning_rate": 5.4692229647433784e-05, + "loss": 0.3669, "step": 321500 }, { "epoch": 0.19, - "learning_rate": 5.4690159081391065e-05, - "loss": 0.514, + "learning_rate": 5.469013388180434e-05, + "loss": 0.3713, "step": 322000 }, { "epoch": 0.19, - "learning_rate": 5.46880591158305e-05, - "loss": 0.511, + "learning_rate": 5.468803391624377e-05, + "loss": 0.3712, "step": 322500 }, { "epoch": 0.19, - "learning_rate": 5.468595915026994e-05, - "loss": 0.505, + "learning_rate": 5.4685933950683205e-05, + "loss": 0.3718, "step": 323000 }, { "epoch": 0.19, - "learning_rate": 5.4683859184709366e-05, - "loss": 0.5123, + "learning_rate": 5.4683833985122645e-05, + "loss": 0.3672, "step": 323500 }, { "epoch": 0.19, - "learning_rate": 5.4681759219148806e-05, - "loss": 0.5041, + "learning_rate": 5.468173401956208e-05, + "loss": 0.3652, "step": 324000 }, { "epoch": 0.19, - "learning_rate": 5.4679663453519366e-05, - "loss": 0.5104, + "learning_rate": 5.467963825393263e-05, + "loss": 0.3686, "step": 324500 }, { "epoch": 0.19, - "learning_rate": 5.46775634879588e-05, - "loss": 0.511, + "learning_rate": 5.467753828837207e-05, + "loss": 0.3673, "step": 325000 }, { "epoch": 0.2, - "learning_rate": 5.467546352239823e-05, - "loss": 0.5168, + "learning_rate": 5.4675438322811506e-05, + "loss": 0.3822, "step": 325500 }, { "epoch": 0.2, - "learning_rate": 5.467336355683767e-05, - "loss": 0.5044, + "learning_rate": 5.467333835725094e-05, + "loss": 0.3672, "step": 326000 }, { "epoch": 0.2, - "learning_rate": 5.467126779120823e-05, - "loss": 0.5054, + "learning_rate": 5.467123839169038e-05, + "loss": 0.3695, "step": 326500 }, { "epoch": 0.2, - "learning_rate": 5.466917202557878e-05, - "loss": 0.5026, + "learning_rate": 5.466913842612981e-05, + "loss": 0.3709, "step": 327000 }, { "epoch": 0.2, - "learning_rate": 5.4667072060018214e-05, - "loss": 0.5107, + "learning_rate": 5.4667038460569246e-05, + "loss": 0.3718, "step": 327500 }, { "epoch": 0.2, - "learning_rate": 5.466497209445765e-05, - "loss": 0.508, + "learning_rate": 5.4664938495008687e-05, + "loss": 0.3711, "step": 328000 }, { "epoch": 0.2, - "learning_rate": 5.466287212889709e-05, - "loss": 0.5086, + "learning_rate": 5.4662846929310354e-05, + "loss": 0.3711, "step": 328500 }, { "epoch": 0.2, - "learning_rate": 5.466077216333652e-05, - "loss": 0.505, + "learning_rate": 5.4660751163680914e-05, + "loss": 0.3679, "step": 329000 }, { "epoch": 0.2, - "learning_rate": 5.4658672197775955e-05, - "loss": 0.5076, + "learning_rate": 5.4658651198120354e-05, + "loss": 0.3683, "step": 329500 }, { "epoch": 0.2, - "learning_rate": 5.4656572232215395e-05, - "loss": 0.5088, + "learning_rate": 5.465655123255978e-05, + "loss": 0.3712, "step": 330000 }, { "epoch": 0.2, - "learning_rate": 5.465447226665483e-05, - "loss": 0.5191, + "learning_rate": 5.465445126699922e-05, + "loss": 0.3713, "step": 330500 }, { "epoch": 0.2, - "learning_rate": 5.465237650102538e-05, - "loss": 0.5025, + "learning_rate": 5.4652351301438654e-05, + "loss": 0.3659, "step": 331000 }, { "epoch": 0.2, - "learning_rate": 5.465027653546482e-05, - "loss": 0.4919, + "learning_rate": 5.465025133587809e-05, + "loss": 0.3661, "step": 331500 }, { "epoch": 0.2, - "learning_rate": 5.4648176569904256e-05, - "loss": 0.5094, + "learning_rate": 5.464815137031753e-05, + "loss": 0.369, "step": 332000 }, { "epoch": 0.2, - "learning_rate": 5.464607660434369e-05, - "loss": 0.5123, + "learning_rate": 5.464605560468808e-05, + "loss": 0.3666, "step": 332500 }, { "epoch": 0.2, - "learning_rate": 5.464398083871424e-05, - "loss": 0.5047, + "learning_rate": 5.4643955639127515e-05, + "loss": 0.3719, "step": 333000 }, { "epoch": 0.2, - "learning_rate": 5.464188087315368e-05, - "loss": 0.5108, + "learning_rate": 5.464185567356695e-05, + "loss": 0.3717, "step": 333500 }, { "epoch": 0.2, - "learning_rate": 5.4639780907593116e-05, - "loss": 0.4939, + "learning_rate": 5.463975570800639e-05, + "loss": 0.3639, "step": 334000 }, { "epoch": 0.2, - "learning_rate": 5.463768094203255e-05, - "loss": 0.4956, + "learning_rate": 5.463765574244582e-05, + "loss": 0.3648, "step": 334500 }, { "epoch": 0.2, - "learning_rate": 5.463558097647199e-05, - "loss": 0.5011, + "learning_rate": 5.4635555776885256e-05, + "loss": 0.3643, "step": 335000 }, { "epoch": 0.2, - "learning_rate": 5.4633485210842544e-05, - "loss": 0.5076, + "learning_rate": 5.4633455811324696e-05, + "loss": 0.363, "step": 335500 }, { "epoch": 0.2, - "learning_rate": 5.463138524528198e-05, - "loss": 0.5078, + "learning_rate": 5.463135584576413e-05, + "loss": 0.3619, "step": 336000 }, { "epoch": 0.2, - "learning_rate": 5.462928527972141e-05, - "loss": 0.4944, + "learning_rate": 5.462925588020356e-05, + "loss": 0.3591, "step": 336500 }, { "epoch": 0.2, - "learning_rate": 5.462718531416085e-05, - "loss": 0.5047, + "learning_rate": 5.4627155914643e-05, + "loss": 0.3638, "step": 337000 }, { "epoch": 0.2, - "learning_rate": 5.4625089548531404e-05, - "loss": 0.5066, + "learning_rate": 5.462506014901356e-05, + "loss": 0.3715, "step": 337500 }, { "epoch": 0.2, - "learning_rate": 5.462298958297084e-05, - "loss": 0.5039, + "learning_rate": 5.462296018345299e-05, + "loss": 0.3618, "step": 338000 }, { "epoch": 0.2, - "learning_rate": 5.462088961741028e-05, - "loss": 0.5119, + "learning_rate": 5.462086021789243e-05, + "loss": 0.3635, "step": 338500 }, { "epoch": 0.2, - "learning_rate": 5.461878965184971e-05, - "loss": 0.5187, + "learning_rate": 5.4618760252331864e-05, + "loss": 0.3717, "step": 339000 }, { "epoch": 0.2, - "learning_rate": 5.4616693886220265e-05, - "loss": 0.4974, + "learning_rate": 5.461666448670242e-05, + "loss": 0.3628, "step": 339500 }, { "epoch": 0.2, - "learning_rate": 5.46145939206597e-05, - "loss": 0.4936, + "learning_rate": 5.461456452114185e-05, + "loss": 0.3569, "step": 340000 }, { "epoch": 0.2, - "learning_rate": 5.461249395509914e-05, - "loss": 0.5076, + "learning_rate": 5.461246455558129e-05, + "loss": 0.3649, "step": 340500 }, { "epoch": 0.2, - "learning_rate": 5.461039398953857e-05, - "loss": 0.5111, + "learning_rate": 5.4610364590020725e-05, + "loss": 0.371, "step": 341000 }, { "epoch": 0.2, - "learning_rate": 5.4608298223909126e-05, - "loss": 0.5019, + "learning_rate": 5.460826462446016e-05, + "loss": 0.3657, "step": 341500 }, { "epoch": 0.21, - "learning_rate": 5.460620245827968e-05, - "loss": 0.5021, + "learning_rate": 5.46061646588996e-05, + "loss": 0.3606, "step": 342000 }, { "epoch": 0.21, - "learning_rate": 5.460410249271912e-05, - "loss": 0.5026, + "learning_rate": 5.460406889327015e-05, + "loss": 0.366, "step": 342500 }, { "epoch": 0.21, - "learning_rate": 5.460200252715855e-05, - "loss": 0.4984, + "learning_rate": 5.4601968927709585e-05, + "loss": 0.362, "step": 343000 }, { "epoch": 0.21, - "learning_rate": 5.4599902561597987e-05, - "loss": 0.502, + "learning_rate": 5.459987316208014e-05, + "loss": 0.3658, "step": 343500 }, { "epoch": 0.21, - "learning_rate": 5.459780259603743e-05, - "loss": 0.4965, + "learning_rate": 5.459777319651958e-05, + "loss": 0.3565, "step": 344000 }, { "epoch": 0.21, - "learning_rate": 5.459570263047686e-05, - "loss": 0.5082, + "learning_rate": 5.459567323095901e-05, + "loss": 0.3699, "step": 344500 }, { "epoch": 0.21, - "learning_rate": 5.4593602664916294e-05, - "loss": 0.503, + "learning_rate": 5.4593577465329566e-05, + "loss": 0.3666, "step": 345000 }, { "epoch": 0.21, - "learning_rate": 5.4591502699355734e-05, - "loss": 0.505, + "learning_rate": 5.4591477499769e-05, + "loss": 0.3637, "step": 345500 }, { "epoch": 0.21, - "learning_rate": 5.458940693372629e-05, - "loss": 0.5069, + "learning_rate": 5.458937753420844e-05, + "loss": 0.3631, "step": 346000 }, { "epoch": 0.21, - "learning_rate": 5.458730696816572e-05, - "loss": 0.5096, + "learning_rate": 5.458727756864787e-05, + "loss": 0.3739, "step": 346500 }, { "epoch": 0.21, - "learning_rate": 5.4585207002605154e-05, - "loss": 0.504, + "learning_rate": 5.458517760308731e-05, + "loss": 0.3715, "step": 347000 }, { "epoch": 0.21, - "learning_rate": 5.4583107037044595e-05, - "loss": 0.4978, + "learning_rate": 5.458307763752675e-05, + "loss": 0.3645, "step": 347500 }, { "epoch": 0.21, - "learning_rate": 5.458101127141515e-05, - "loss": 0.5064, + "learning_rate": 5.458097767196618e-05, + "loss": 0.3679, "step": 348000 }, { "epoch": 0.21, - "learning_rate": 5.457891130585458e-05, - "loss": 0.5108, + "learning_rate": 5.4578877706405614e-05, + "loss": 0.3615, "step": 348500 }, { "epoch": 0.21, - "learning_rate": 5.4576811340294015e-05, - "loss": 0.5069, + "learning_rate": 5.4576777740845054e-05, + "loss": 0.3662, "step": 349000 }, { "epoch": 0.21, - "learning_rate": 5.4574711374733455e-05, - "loss": 0.4942, + "learning_rate": 5.457467777528448e-05, + "loss": 0.3636, "step": 349500 }, { "epoch": 0.21, - "learning_rate": 5.457261560910401e-05, - "loss": 0.4912, + "learning_rate": 5.4572577809723914e-05, + "loss": 0.3647, "step": 350000 }, { "epoch": 0.21, - "learning_rate": 5.457051564354344e-05, - "loss": 0.4906, + "learning_rate": 5.4570477844163355e-05, + "loss": 0.3608, "step": 350500 }, { "epoch": 0.21, - "learning_rate": 5.456841567798288e-05, - "loss": 0.5016, + "learning_rate": 5.4568382078533915e-05, + "loss": 0.3698, "step": 351000 }, { "epoch": 0.21, - "learning_rate": 5.4566315712422316e-05, - "loss": 0.4988, + "learning_rate": 5.456628211297335e-05, + "loss": 0.3604, "step": 351500 }, { "epoch": 0.21, - "learning_rate": 5.456421994679287e-05, - "loss": 0.4995, + "learning_rate": 5.456418214741278e-05, + "loss": 0.361, "step": 352000 }, { "epoch": 0.21, - "learning_rate": 5.45621199812323e-05, - "loss": 0.5008, + "learning_rate": 5.4562082181852215e-05, + "loss": 0.365, "step": 352500 }, { "epoch": 0.21, - "learning_rate": 5.4560020015671743e-05, - "loss": 0.5023, + "learning_rate": 5.455998221629165e-05, + "loss": 0.3619, "step": 353000 }, { "epoch": 0.21, - "learning_rate": 5.455792005011118e-05, - "loss": 0.5017, + "learning_rate": 5.455788225073109e-05, + "loss": 0.3649, "step": 353500 }, { "epoch": 0.21, - "learning_rate": 5.455582428448173e-05, - "loss": 0.5088, + "learning_rate": 5.455578228517052e-05, + "loss": 0.3712, "step": 354000 }, { "epoch": 0.21, - "learning_rate": 5.4553724318921164e-05, - "loss": 0.4994, + "learning_rate": 5.4553686519541076e-05, + "loss": 0.3674, "step": 354500 }, { "epoch": 0.21, - "learning_rate": 5.4551624353360604e-05, - "loss": 0.5017, + "learning_rate": 5.455158655398051e-05, + "loss": 0.3584, "step": 355000 }, { "epoch": 0.21, - "learning_rate": 5.454952438780004e-05, - "loss": 0.498, + "learning_rate": 5.454948658841995e-05, + "loss": 0.3614, "step": 355500 }, { "epoch": 0.21, - "learning_rate": 5.454742442223947e-05, - "loss": 0.4991, + "learning_rate": 5.454738662285938e-05, + "loss": 0.3642, "step": 356000 }, { "epoch": 0.21, - "learning_rate": 5.454532445667891e-05, - "loss": 0.5074, + "learning_rate": 5.454528665729882e-05, + "loss": 0.369, "step": 356500 }, { "epoch": 0.21, - "learning_rate": 5.4543224491118345e-05, - "loss": 0.4835, + "learning_rate": 5.454318669173826e-05, + "loss": 0.3577, "step": 357000 }, { "epoch": 0.21, - "learning_rate": 5.454112452555778e-05, - "loss": 0.4909, + "learning_rate": 5.454108672617769e-05, + "loss": 0.35, "step": 357500 }, { "epoch": 0.21, - "learning_rate": 5.453902875992834e-05, - "loss": 0.501, + "learning_rate": 5.4538986760617124e-05, + "loss": 0.3592, "step": 358000 }, { "epoch": 0.21, - "learning_rate": 5.453693299429889e-05, - "loss": 0.4967, + "learning_rate": 5.4536886795056564e-05, + "loss": 0.3635, "step": 358500 }, { "epoch": 0.22, - "learning_rate": 5.4534833028738326e-05, - "loss": 0.5016, + "learning_rate": 5.453479102942712e-05, + "loss": 0.3661, "step": 359000 }, { "epoch": 0.22, - "learning_rate": 5.453273306317776e-05, - "loss": 0.5087, + "learning_rate": 5.453269106386655e-05, + "loss": 0.3712, "step": 359500 }, { "epoch": 0.22, - "learning_rate": 5.45306330976172e-05, - "loss": 0.4931, + "learning_rate": 5.4530591098305985e-05, + "loss": 0.3597, "step": 360000 }, { "epoch": 0.22, - "learning_rate": 5.452853313205663e-05, - "loss": 0.5016, + "learning_rate": 5.4528491132745425e-05, + "loss": 0.3683, "step": 360500 }, { "epoch": 0.22, - "learning_rate": 5.4526433166496066e-05, - "loss": 0.4924, + "learning_rate": 5.452639116718486e-05, + "loss": 0.3638, "step": 361000 }, { "epoch": 0.22, - "learning_rate": 5.4524333200935506e-05, - "loss": 0.4941, + "learning_rate": 5.452429120162429e-05, + "loss": 0.3637, "step": 361500 }, { "epoch": 0.22, - "learning_rate": 5.452223743530606e-05, - "loss": 0.5112, + "learning_rate": 5.452219123606373e-05, + "loss": 0.3693, "step": 362000 }, { "epoch": 0.22, - "learning_rate": 5.4520137469745493e-05, - "loss": 0.5016, + "learning_rate": 5.452009127050316e-05, + "loss": 0.3613, "step": 362500 }, { "epoch": 0.22, - "learning_rate": 5.451803750418493e-05, - "loss": 0.4971, + "learning_rate": 5.45179913049426e-05, + "loss": 0.3637, "step": 363000 }, { "epoch": 0.22, - "learning_rate": 5.451593753862437e-05, - "loss": 0.4935, + "learning_rate": 5.451589553931316e-05, + "loss": 0.3619, "step": 363500 }, { "epoch": 0.22, - "learning_rate": 5.45138375730638e-05, - "loss": 0.4995, + "learning_rate": 5.451379557375259e-05, + "loss": 0.3588, "step": 364000 }, { "epoch": 0.22, - "learning_rate": 5.4511737607503234e-05, - "loss": 0.4911, + "learning_rate": 5.4511695608192026e-05, + "loss": 0.3617, "step": 364500 }, { "epoch": 0.22, - "learning_rate": 5.4509637641942674e-05, - "loss": 0.4947, + "learning_rate": 5.450959564263146e-05, + "loss": 0.3627, "step": 365000 }, { "epoch": 0.22, - "learning_rate": 5.450753767638211e-05, - "loss": 0.4967, + "learning_rate": 5.450749567707089e-05, + "loss": 0.3573, "step": 365500 }, { "epoch": 0.22, - "learning_rate": 5.450543771082154e-05, - "loss": 0.4976, + "learning_rate": 5.4505395711510327e-05, + "loss": 0.3626, "step": 366000 }, { "epoch": 0.22, - "learning_rate": 5.45033419451921e-05, - "loss": 0.4922, + "learning_rate": 5.450329994588089e-05, + "loss": 0.3587, "step": 366500 }, { "epoch": 0.22, - "learning_rate": 5.4501241979631535e-05, - "loss": 0.5062, + "learning_rate": 5.450119998032032e-05, + "loss": 0.3673, "step": 367000 }, { "epoch": 0.22, - "learning_rate": 5.449914201407097e-05, - "loss": 0.4856, + "learning_rate": 5.4499100014759754e-05, + "loss": 0.3589, "step": 367500 }, { "epoch": 0.22, - "learning_rate": 5.449704204851041e-05, - "loss": 0.4983, + "learning_rate": 5.449700004919919e-05, + "loss": 0.3619, "step": 368000 }, { "epoch": 0.22, - "learning_rate": 5.449494628288096e-05, - "loss": 0.4996, + "learning_rate": 5.449490008363863e-05, + "loss": 0.3616, "step": 368500 }, { "epoch": 0.22, - "learning_rate": 5.4492846317320396e-05, - "loss": 0.5002, + "learning_rate": 5.449280851794031e-05, + "loss": 0.3582, "step": 369000 }, { "epoch": 0.22, - "learning_rate": 5.449074635175983e-05, - "loss": 0.4971, + "learning_rate": 5.449070855237974e-05, + "loss": 0.3641, "step": 369500 }, { "epoch": 0.22, - "learning_rate": 5.448864638619927e-05, - "loss": 0.5007, + "learning_rate": 5.4488608586819175e-05, + "loss": 0.3615, "step": 370000 }, { "epoch": 0.22, - "learning_rate": 5.448655062056982e-05, - "loss": 0.5089, + "learning_rate": 5.448651282118973e-05, + "loss": 0.3647, "step": 370500 }, { "epoch": 0.22, - "learning_rate": 5.4484450655009256e-05, - "loss": 0.4842, + "learning_rate": 5.448441285562917e-05, + "loss": 0.356, "step": 371000 }, { "epoch": 0.22, - "learning_rate": 5.448235068944869e-05, - "loss": 0.4895, + "learning_rate": 5.44823128900686e-05, + "loss": 0.354, "step": 371500 }, { "epoch": 0.22, - "learning_rate": 5.448025072388813e-05, - "loss": 0.4975, + "learning_rate": 5.4480212924508036e-05, + "loss": 0.3575, "step": 372000 }, { "epoch": 0.22, - "learning_rate": 5.4478154958258684e-05, - "loss": 0.5121, + "learning_rate": 5.4478112958947476e-05, + "loss": 0.3749, "step": 372500 }, { "epoch": 0.22, - "learning_rate": 5.447605499269812e-05, - "loss": 0.5017, + "learning_rate": 5.447601719331803e-05, + "loss": 0.3622, "step": 373000 }, { "epoch": 0.22, - "learning_rate": 5.447395502713756e-05, - "loss": 0.4865, + "learning_rate": 5.447391722775746e-05, + "loss": 0.3557, "step": 373500 }, { "epoch": 0.22, - "learning_rate": 5.447185926150811e-05, - "loss": 0.5054, + "learning_rate": 5.4471817262196896e-05, + "loss": 0.3646, "step": 374000 }, { "epoch": 0.22, - "learning_rate": 5.4469759295947544e-05, - "loss": 0.5035, + "learning_rate": 5.4469717296636337e-05, + "loss": 0.3649, "step": 374500 }, { "epoch": 0.22, - "learning_rate": 5.446765933038698e-05, - "loss": 0.4888, + "learning_rate": 5.446761733107577e-05, + "loss": 0.356, "step": 375000 }, { "epoch": 0.23, - "learning_rate": 5.446555936482642e-05, - "loss": 0.5082, + "learning_rate": 5.4465517365515203e-05, + "loss": 0.3658, "step": 375500 }, { "epoch": 0.23, - "learning_rate": 5.446345939926585e-05, - "loss": 0.49, + "learning_rate": 5.4463417399954644e-05, + "loss": 0.3608, "step": 376000 }, { "epoch": 0.23, - "learning_rate": 5.4461359433705285e-05, - "loss": 0.4938, + "learning_rate": 5.446131743439407e-05, + "loss": 0.3575, "step": 376500 }, { "epoch": 0.23, - "learning_rate": 5.445926366807584e-05, - "loss": 0.5101, + "learning_rate": 5.445922166876463e-05, + "loss": 0.3704, "step": 377000 }, { "epoch": 0.23, - "learning_rate": 5.445716370251528e-05, - "loss": 0.5033, + "learning_rate": 5.445712170320407e-05, + "loss": 0.3624, "step": 377500 }, { "epoch": 0.23, - "learning_rate": 5.445506373695471e-05, - "loss": 0.4911, + "learning_rate": 5.4455021737643504e-05, + "loss": 0.3571, "step": 378000 }, { "epoch": 0.23, - "learning_rate": 5.4452963771394146e-05, - "loss": 0.4924, + "learning_rate": 5.445292177208294e-05, + "loss": 0.36, "step": 378500 }, { "epoch": 0.23, - "learning_rate": 5.4450863805833586e-05, - "loss": 0.4894, + "learning_rate": 5.445082180652237e-05, + "loss": 0.3559, "step": 379000 }, { "epoch": 0.23, - "learning_rate": 5.444876384027302e-05, - "loss": 0.4997, + "learning_rate": 5.4448721840961805e-05, + "loss": 0.3652, "step": 379500 }, { "epoch": 0.23, - "learning_rate": 5.444666387471245e-05, - "loss": 0.4942, + "learning_rate": 5.4446626075332365e-05, + "loss": 0.3612, "step": 380000 }, { "epoch": 0.23, - "learning_rate": 5.444456390915189e-05, - "loss": 0.4815, + "learning_rate": 5.44445261097718e-05, + "loss": 0.3486, "step": 380500 }, { "epoch": 0.23, - "learning_rate": 5.444246814352245e-05, - "loss": 0.4945, + "learning_rate": 5.444243034414235e-05, + "loss": 0.3596, "step": 381000 }, { "epoch": 0.23, - "learning_rate": 5.444036817796188e-05, - "loss": 0.501, + "learning_rate": 5.444033037858179e-05, + "loss": 0.3626, "step": 381500 }, { "epoch": 0.23, - "learning_rate": 5.443826821240132e-05, - "loss": 0.4842, + "learning_rate": 5.4438234612952346e-05, + "loss": 0.3498, "step": 382000 }, { "epoch": 0.23, - "learning_rate": 5.4436172446771874e-05, - "loss": 0.5063, + "learning_rate": 5.443613464739178e-05, + "loss": 0.3715, "step": 382500 }, { "epoch": 0.23, - "learning_rate": 5.443407248121131e-05, - "loss": 0.4981, + "learning_rate": 5.443403468183122e-05, + "loss": 0.3568, "step": 383000 }, { "epoch": 0.23, - "learning_rate": 5.443197251565074e-05, - "loss": 0.4929, + "learning_rate": 5.443193471627065e-05, + "loss": 0.3632, "step": 383500 }, { "epoch": 0.23, - "learning_rate": 5.442987255009018e-05, - "loss": 0.4935, + "learning_rate": 5.442983475071009e-05, + "loss": 0.3594, "step": 384000 }, { "epoch": 0.23, - "learning_rate": 5.4427772584529615e-05, - "loss": 0.4904, + "learning_rate": 5.442773478514953e-05, + "loss": 0.3655, "step": 384500 }, { "epoch": 0.23, - "learning_rate": 5.442567681890017e-05, - "loss": 0.5077, + "learning_rate": 5.442563481958896e-05, + "loss": 0.3589, "step": 385000 }, { "epoch": 0.23, - "learning_rate": 5.44235768533396e-05, - "loss": 0.4849, + "learning_rate": 5.4423534854028394e-05, + "loss": 0.3531, "step": 385500 }, { "epoch": 0.23, - "learning_rate": 5.442147688777904e-05, - "loss": 0.4817, + "learning_rate": 5.442143488846783e-05, + "loss": 0.3503, "step": 386000 }, { "epoch": 0.23, - "learning_rate": 5.4419376922218475e-05, - "loss": 0.4919, + "learning_rate": 5.441933492290726e-05, + "loss": 0.3562, "step": 386500 }, { "epoch": 0.23, - "learning_rate": 5.4417276956657916e-05, - "loss": 0.4935, + "learning_rate": 5.441723915727782e-05, + "loss": 0.3564, "step": 387000 }, { "epoch": 0.23, - "learning_rate": 5.441517699109735e-05, - "loss": 0.4954, + "learning_rate": 5.4415139191717254e-05, + "loss": 0.3556, "step": 387500 }, { "epoch": 0.23, - "learning_rate": 5.4413077025536776e-05, - "loss": 0.4895, + "learning_rate": 5.4413039226156695e-05, + "loss": 0.3569, "step": 388000 }, { "epoch": 0.23, - "learning_rate": 5.4410977059976216e-05, - "loss": 0.4913, + "learning_rate": 5.441093926059612e-05, + "loss": 0.3555, "step": 388500 }, { "epoch": 0.23, - "learning_rate": 5.440887709441565e-05, - "loss": 0.4849, + "learning_rate": 5.4408839295035555e-05, + "loss": 0.3543, "step": 389000 }, { "epoch": 0.23, - "learning_rate": 5.440678132878621e-05, - "loss": 0.4769, + "learning_rate": 5.4406739329474995e-05, + "loss": 0.3523, "step": 389500 }, { "epoch": 0.23, - "learning_rate": 5.440468136322564e-05, - "loss": 0.4958, + "learning_rate": 5.440463936391443e-05, + "loss": 0.3565, "step": 390000 }, { "epoch": 0.23, - "learning_rate": 5.44025855975962e-05, - "loss": 0.4968, + "learning_rate": 5.440253939835387e-05, + "loss": 0.3564, "step": 390500 }, { "epoch": 0.23, - "learning_rate": 5.440048563203564e-05, - "loss": 0.4991, + "learning_rate": 5.440044363272442e-05, + "loss": 0.355, "step": 391000 }, { "epoch": 0.23, - "learning_rate": 5.439838566647507e-05, - "loss": 0.4975, + "learning_rate": 5.4398343667163856e-05, + "loss": 0.358, "step": 391500 }, { "epoch": 0.24, - "learning_rate": 5.4396285700914504e-05, - "loss": 0.5008, + "learning_rate": 5.439624370160329e-05, + "loss": 0.3632, "step": 392000 }, { "epoch": 0.24, - "learning_rate": 5.4394185735353944e-05, - "loss": 0.4803, + "learning_rate": 5.439414373604273e-05, + "loss": 0.3554, "step": 392500 }, { "epoch": 0.24, - "learning_rate": 5.439208576979337e-05, - "loss": 0.4931, + "learning_rate": 5.439204377048216e-05, + "loss": 0.3567, "step": 393000 }, { "epoch": 0.24, - "learning_rate": 5.438998580423281e-05, - "loss": 0.4944, + "learning_rate": 5.4389943804921596e-05, + "loss": 0.3581, "step": 393500 }, { "epoch": 0.24, - "learning_rate": 5.4387885838672245e-05, - "loss": 0.4975, + "learning_rate": 5.438784383936104e-05, + "loss": 0.3623, "step": 394000 }, { "epoch": 0.24, - "learning_rate": 5.438578587311168e-05, - "loss": 0.4966, + "learning_rate": 5.438574387380047e-05, + "loss": 0.357, "step": 394500 }, { "epoch": 0.24, - "learning_rate": 5.438369010748223e-05, - "loss": 0.4939, + "learning_rate": 5.4383643908239904e-05, + "loss": 0.3534, "step": 395000 }, { "epoch": 0.24, - "learning_rate": 5.438159014192167e-05, - "loss": 0.4853, + "learning_rate": 5.4381543942679344e-05, + "loss": 0.3556, "step": 395500 }, { "epoch": 0.24, - "learning_rate": 5.4379490176361105e-05, - "loss": 0.4925, + "learning_rate": 5.437944397711878e-05, + "loss": 0.3583, "step": 396000 }, { "epoch": 0.24, - "learning_rate": 5.437739021080054e-05, - "loss": 0.4904, + "learning_rate": 5.4377344011558204e-05, + "loss": 0.3553, "step": 396500 }, { "epoch": 0.24, - "learning_rate": 5.43752944451711e-05, - "loss": 0.5037, + "learning_rate": 5.4375248245928764e-05, + "loss": 0.3625, "step": 397000 }, { "epoch": 0.24, - "learning_rate": 5.437319447961053e-05, - "loss": 0.4943, + "learning_rate": 5.4373148280368205e-05, + "loss": 0.3607, "step": 397500 }, { "epoch": 0.24, - "learning_rate": 5.437109871398109e-05, - "loss": 0.4858, + "learning_rate": 5.437104831480764e-05, + "loss": 0.3545, "step": 398000 }, { "epoch": 0.24, - "learning_rate": 5.4368998748420526e-05, - "loss": 0.4913, + "learning_rate": 5.436894834924707e-05, + "loss": 0.3546, "step": 398500 }, { "epoch": 0.24, - "learning_rate": 5.436689878285996e-05, - "loss": 0.4948, + "learning_rate": 5.4366848383686505e-05, + "loss": 0.3554, "step": 399000 }, { "epoch": 0.24, - "learning_rate": 5.43647988172994e-05, - "loss": 0.4953, + "learning_rate": 5.4364752618057065e-05, + "loss": 0.3625, "step": 399500 }, { "epoch": 0.24, - "learning_rate": 5.436269885173883e-05, - "loss": 0.4932, + "learning_rate": 5.436265685242762e-05, + "loss": 0.364, "step": 400000 }, { "epoch": 0.24, - "eval_loss": 0.4640251398086548, - "eval_runtime": 1106.9436, - "eval_samples_per_second": 475.833, - "eval_steps_per_second": 79.306, + "eval_loss": 0.31898367404937744, + "eval_runtime": 1466.0508, + "eval_samples_per_second": 359.278, + "eval_steps_per_second": 59.88, "step": 400000 }, { "epoch": 0.24, - "learning_rate": 5.436059888617827e-05, - "loss": 0.4821, + "learning_rate": 5.436055688686705e-05, + "loss": 0.3497, "step": 400500 }, { "epoch": 0.24, - "learning_rate": 5.43584989206177e-05, - "loss": 0.4923, + "learning_rate": 5.435845692130649e-05, + "loss": 0.3553, "step": 401000 }, { "epoch": 0.24, - "learning_rate": 5.4356398955057134e-05, - "loss": 0.4936, + "learning_rate": 5.4356356955745926e-05, + "loss": 0.3582, "step": 401500 }, { "epoch": 0.24, - "learning_rate": 5.4354298989496574e-05, - "loss": 0.4886, + "learning_rate": 5.435425699018536e-05, + "loss": 0.3496, "step": 402000 }, { "epoch": 0.24, - "learning_rate": 5.435220322386713e-05, - "loss": 0.4754, + "learning_rate": 5.43521570246248e-05, + "loss": 0.3493, "step": 402500 }, { "epoch": 0.24, - "learning_rate": 5.435010325830656e-05, - "loss": 0.5005, + "learning_rate": 5.435005705906423e-05, + "loss": 0.3627, "step": 403000 }, { "epoch": 0.24, - "learning_rate": 5.4348003292745995e-05, - "loss": 0.4906, + "learning_rate": 5.434795709350366e-05, + "loss": 0.3577, "step": 403500 }, { "epoch": 0.24, - "learning_rate": 5.4345903327185435e-05, - "loss": 0.5004, + "learning_rate": 5.43458571279431e-05, + "loss": 0.3596, "step": 404000 }, { "epoch": 0.24, - "learning_rate": 5.434380756155599e-05, - "loss": 0.4803, + "learning_rate": 5.4343757162382534e-05, + "loss": 0.3509, "step": 404500 }, { "epoch": 0.24, - "learning_rate": 5.434171179592655e-05, - "loss": 0.4901, + "learning_rate": 5.434165719682197e-05, + "loss": 0.3561, "step": 405000 }, { "epoch": 0.24, - "learning_rate": 5.433961183036598e-05, - "loss": 0.5002, + "learning_rate": 5.433956143119253e-05, + "loss": 0.3629, "step": 405500 }, { "epoch": 0.24, - "learning_rate": 5.4337511864805416e-05, - "loss": 0.479, + "learning_rate": 5.433746146563196e-05, + "loss": 0.3584, "step": 406000 }, { "epoch": 0.24, - "learning_rate": 5.4335411899244856e-05, - "loss": 0.5007, + "learning_rate": 5.4335361500071394e-05, + "loss": 0.3595, "step": 406500 }, { "epoch": 0.24, - "learning_rate": 5.433331193368428e-05, - "loss": 0.4883, + "learning_rate": 5.4333265734441955e-05, + "loss": 0.3509, "step": 407000 }, { "epoch": 0.24, - "learning_rate": 5.433121196812372e-05, - "loss": 0.4812, + "learning_rate": 5.4331165768881395e-05, + "loss": 0.3509, "step": 407500 }, { "epoch": 0.24, - "learning_rate": 5.4329112002563156e-05, - "loss": 0.4963, + "learning_rate": 5.432906580332083e-05, + "loss": 0.3563, "step": 408000 }, { "epoch": 0.24, - "learning_rate": 5.432701203700259e-05, - "loss": 0.4825, + "learning_rate": 5.4326965837760255e-05, + "loss": 0.3517, "step": 408500 }, { "epoch": 0.25, - "learning_rate": 5.432491627137315e-05, - "loss": 0.5059, + "learning_rate": 5.4324870072130815e-05, + "loss": 0.3583, "step": 409000 }, { "epoch": 0.25, - "learning_rate": 5.4322816305812584e-05, - "loss": 0.4886, + "learning_rate": 5.4322770106570256e-05, + "loss": 0.3623, "step": 409500 }, { "epoch": 0.25, - "learning_rate": 5.432071634025202e-05, - "loss": 0.4886, + "learning_rate": 5.432067014100969e-05, + "loss": 0.356, "step": 410000 }, { "epoch": 0.25, - "learning_rate": 5.431861637469145e-05, - "loss": 0.4839, + "learning_rate": 5.431857017544912e-05, + "loss": 0.3522, "step": 410500 }, { "epoch": 0.25, - "learning_rate": 5.431651640913089e-05, - "loss": 0.49, + "learning_rate": 5.4316470209888556e-05, + "loss": 0.3565, "step": 411000 }, { "epoch": 0.25, - "learning_rate": 5.4314416443570324e-05, - "loss": 0.5029, + "learning_rate": 5.431437024432799e-05, + "loss": 0.3629, "step": 411500 }, { "epoch": 0.25, - "learning_rate": 5.431231647800976e-05, - "loss": 0.4902, + "learning_rate": 5.431227027876742e-05, + "loss": 0.3601, "step": 412000 }, { "epoch": 0.25, - "learning_rate": 5.43102165124492e-05, - "loss": 0.4839, + "learning_rate": 5.431017031320686e-05, + "loss": 0.3565, "step": 412500 }, { "epoch": 0.25, - "learning_rate": 5.430812074681975e-05, - "loss": 0.4861, + "learning_rate": 5.4308070347646297e-05, + "loss": 0.3532, "step": 413000 }, { "epoch": 0.25, - "learning_rate": 5.4306020781259185e-05, - "loss": 0.494, + "learning_rate": 5.430597038208573e-05, + "loss": 0.356, "step": 413500 }, { "epoch": 0.25, - "learning_rate": 5.430392081569862e-05, - "loss": 0.4821, + "learning_rate": 5.430387041652517e-05, + "loss": 0.3574, "step": 414000 }, { "epoch": 0.25, - "learning_rate": 5.430182085013806e-05, - "loss": 0.4898, + "learning_rate": 5.4301770450964604e-05, + "loss": 0.3574, "step": 414500 }, { "epoch": 0.25, - "learning_rate": 5.429972508450861e-05, - "loss": 0.4891, + "learning_rate": 5.429967048540404e-05, + "loss": 0.3529, "step": 415000 }, { "epoch": 0.25, - "learning_rate": 5.4297625118948046e-05, - "loss": 0.4907, + "learning_rate": 5.42975747197746e-05, + "loss": 0.3621, "step": 415500 }, { "epoch": 0.25, - "learning_rate": 5.4295525153387486e-05, - "loss": 0.4775, + "learning_rate": 5.429547475421403e-05, + "loss": 0.348, "step": 416000 }, { "epoch": 0.25, - "learning_rate": 5.429342518782692e-05, - "loss": 0.4801, + "learning_rate": 5.4293374788653464e-05, + "loss": 0.3517, "step": 416500 }, { "epoch": 0.25, - "learning_rate": 5.429132942219747e-05, - "loss": 0.4925, + "learning_rate": 5.4291274823092905e-05, + "loss": 0.3523, "step": 417000 }, { "epoch": 0.25, - "learning_rate": 5.4289229456636906e-05, - "loss": 0.487, + "learning_rate": 5.428917485753234e-05, + "loss": 0.3594, "step": 417500 }, { "epoch": 0.25, - "learning_rate": 5.428712949107635e-05, - "loss": 0.4831, + "learning_rate": 5.428707489197177e-05, + "loss": 0.3508, "step": 418000 }, { "epoch": 0.25, - "learning_rate": 5.428502952551578e-05, - "loss": 0.4823, + "learning_rate": 5.4284974926411205e-05, + "loss": 0.3483, "step": 418500 }, { "epoch": 0.25, - "learning_rate": 5.4282933759886334e-05, - "loss": 0.4925, + "learning_rate": 5.428287496085064e-05, + "loss": 0.3598, "step": 419000 }, { "epoch": 0.25, - "learning_rate": 5.428083379432577e-05, - "loss": 0.4827, + "learning_rate": 5.428077499529007e-05, + "loss": 0.3486, "step": 419500 }, { "epoch": 0.25, - "learning_rate": 5.427873382876521e-05, - "loss": 0.4905, + "learning_rate": 5.427867922966063e-05, + "loss": 0.3598, "step": 420000 }, { "epoch": 0.25, - "learning_rate": 5.427663386320464e-05, - "loss": 0.4942, + "learning_rate": 5.427657926410007e-05, + "loss": 0.3594, "step": 420500 }, { "epoch": 0.25, - "learning_rate": 5.42745380975752e-05, - "loss": 0.488, + "learning_rate": 5.42744792985395e-05, + "loss": 0.358, "step": 421000 }, { "epoch": 0.25, - "learning_rate": 5.4272438132014635e-05, - "loss": 0.484, + "learning_rate": 5.427237933297893e-05, + "loss": 0.3543, "step": 421500 }, { "epoch": 0.25, - "learning_rate": 5.4270342366385195e-05, - "loss": 0.4881, + "learning_rate": 5.427028356734949e-05, + "loss": 0.3528, "step": 422000 }, { "epoch": 0.25, - "learning_rate": 5.426824240082463e-05, - "loss": 0.4892, + "learning_rate": 5.4268187801720053e-05, + "loss": 0.3497, "step": 422500 }, { "epoch": 0.25, - "learning_rate": 5.426614243526406e-05, - "loss": 0.4962, + "learning_rate": 5.426608783615949e-05, + "loss": 0.3595, "step": 423000 }, { "epoch": 0.25, - "learning_rate": 5.4264042469703495e-05, - "loss": 0.5008, + "learning_rate": 5.426398787059892e-05, + "loss": 0.3567, "step": 423500 }, { "epoch": 0.25, - "learning_rate": 5.426194250414293e-05, - "loss": 0.4817, + "learning_rate": 5.426188790503836e-05, + "loss": 0.3518, "step": 424000 }, { "epoch": 0.25, - "learning_rate": 5.425984253858236e-05, - "loss": 0.4851, + "learning_rate": 5.4259787939477794e-05, + "loss": 0.35, "step": 424500 }, { "epoch": 0.25, - "learning_rate": 5.42577425730218e-05, - "loss": 0.4779, + "learning_rate": 5.425768797391723e-05, + "loss": 0.3449, "step": 425000 }, { "epoch": 0.26, - "learning_rate": 5.4255642607461236e-05, - "loss": 0.4865, + "learning_rate": 5.425558800835667e-05, + "loss": 0.3556, "step": 425500 }, { "epoch": 0.26, - "learning_rate": 5.425354684183179e-05, - "loss": 0.4877, + "learning_rate": 5.4253488042796094e-05, + "loss": 0.3536, "step": 426000 }, { "epoch": 0.26, - "learning_rate": 5.425144687627122e-05, - "loss": 0.4845, + "learning_rate": 5.4251392277166655e-05, + "loss": 0.3491, "step": 426500 }, { "epoch": 0.26, - "learning_rate": 5.424934691071066e-05, - "loss": 0.4875, + "learning_rate": 5.424929231160609e-05, + "loss": 0.3608, "step": 427000 }, { "epoch": 0.26, - "learning_rate": 5.42472469451501e-05, - "loss": 0.485, + "learning_rate": 5.424719654597664e-05, + "loss": 0.3584, "step": 427500 }, { "epoch": 0.26, - "learning_rate": 5.424515117952066e-05, - "loss": 0.488, + "learning_rate": 5.424509658041608e-05, + "loss": 0.3547, "step": 428000 }, { "epoch": 0.26, - "learning_rate": 5.424305121396009e-05, - "loss": 0.4798, + "learning_rate": 5.4242996614855515e-05, + "loss": 0.3474, "step": 428500 }, { "epoch": 0.26, - "learning_rate": 5.4240951248399524e-05, - "loss": 0.4893, + "learning_rate": 5.424089664929495e-05, + "loss": 0.3491, "step": 429000 }, { "epoch": 0.26, - "learning_rate": 5.423885128283896e-05, - "loss": 0.4907, + "learning_rate": 5.423879668373439e-05, + "loss": 0.3552, "step": 429500 }, { "epoch": 0.26, - "learning_rate": 5.423675551720952e-05, - "loss": 0.4784, + "learning_rate": 5.423669671817382e-05, + "loss": 0.3483, "step": 430000 }, { "epoch": 0.26, - "learning_rate": 5.423465555164896e-05, - "loss": 0.4802, + "learning_rate": 5.4234596752613256e-05, + "loss": 0.3523, "step": 430500 }, { "epoch": 0.26, - "learning_rate": 5.4232555586088385e-05, - "loss": 0.4829, + "learning_rate": 5.423249678705269e-05, + "loss": 0.3489, "step": 431000 }, { "epoch": 0.26, - "learning_rate": 5.423045562052782e-05, - "loss": 0.4844, + "learning_rate": 5.423039682149212e-05, + "loss": 0.3491, "step": 431500 }, { "epoch": 0.26, - "learning_rate": 5.422835985489838e-05, - "loss": 0.493, + "learning_rate": 5.422830105586268e-05, + "loss": 0.3508, "step": 432000 }, { "epoch": 0.26, - "learning_rate": 5.422625988933782e-05, - "loss": 0.4844, + "learning_rate": 5.4226201090302124e-05, + "loss": 0.348, "step": 432500 }, { "epoch": 0.26, - "learning_rate": 5.422415992377725e-05, - "loss": 0.475, + "learning_rate": 5.422410532467268e-05, + "loss": 0.3478, "step": 433000 }, { "epoch": 0.26, - "learning_rate": 5.422205995821668e-05, - "loss": 0.5024, + "learning_rate": 5.422200535911211e-05, + "loss": 0.3565, "step": 433500 }, { "epoch": 0.26, - "learning_rate": 5.421995999265612e-05, - "loss": 0.4863, + "learning_rate": 5.4219905393551544e-05, + "loss": 0.3568, "step": 434000 }, { "epoch": 0.26, - "learning_rate": 5.421786422702668e-05, - "loss": 0.4783, + "learning_rate": 5.4217805427990984e-05, + "loss": 0.3507, "step": 434500 }, { "epoch": 0.26, - "learning_rate": 5.421576426146611e-05, - "loss": 0.4873, + "learning_rate": 5.421570546243042e-05, + "loss": 0.3543, "step": 435000 }, { "epoch": 0.26, - "learning_rate": 5.4213664295905546e-05, - "loss": 0.4879, + "learning_rate": 5.421360969680097e-05, + "loss": 0.3492, "step": 435500 }, { "epoch": 0.26, - "learning_rate": 5.421156433034498e-05, - "loss": 0.4712, + "learning_rate": 5.421150973124041e-05, + "loss": 0.3478, "step": 436000 }, { "epoch": 0.26, - "learning_rate": 5.420946436478441e-05, - "loss": 0.4924, + "learning_rate": 5.4209409765679845e-05, + "loss": 0.3625, "step": 436500 }, { "epoch": 0.26, - "learning_rate": 5.4207364399223854e-05, - "loss": 0.4926, + "learning_rate": 5.420730980011928e-05, + "loss": 0.3557, "step": 437000 }, { "epoch": 0.26, - "learning_rate": 5.420526443366329e-05, - "loss": 0.4782, + "learning_rate": 5.420521403448983e-05, + "loss": 0.3535, "step": 437500 }, { "epoch": 0.26, - "learning_rate": 5.420316446810272e-05, - "loss": 0.4805, + "learning_rate": 5.420311406892927e-05, + "loss": 0.3502, "step": 438000 }, { "epoch": 0.26, - "learning_rate": 5.4201068702473274e-05, - "loss": 0.4836, + "learning_rate": 5.4201014103368706e-05, + "loss": 0.3534, "step": 438500 }, { "epoch": 0.26, - "learning_rate": 5.4198968736912714e-05, - "loss": 0.5047, + "learning_rate": 5.419891413780814e-05, + "loss": 0.3572, "step": 439000 }, { "epoch": 0.26, - "learning_rate": 5.419686877135215e-05, - "loss": 0.4813, + "learning_rate": 5.419681417224758e-05, + "loss": 0.3513, "step": 439500 }, { "epoch": 0.26, - "learning_rate": 5.419476880579158e-05, - "loss": 0.4868, + "learning_rate": 5.419471840661813e-05, + "loss": 0.3471, "step": 440000 }, { "epoch": 0.26, - "learning_rate": 5.419266884023102e-05, - "loss": 0.4884, + "learning_rate": 5.4192618441057567e-05, + "loss": 0.3487, "step": 440500 }, { "epoch": 0.26, - "learning_rate": 5.4190573074601575e-05, - "loss": 0.493, + "learning_rate": 5.4190518475497e-05, + "loss": 0.3612, "step": 441000 }, { "epoch": 0.26, - "learning_rate": 5.4188477308972135e-05, - "loss": 0.4917, + "learning_rate": 5.418841850993644e-05, + "loss": 0.3537, "step": 441500 }, { "epoch": 0.26, - "learning_rate": 5.418637734341157e-05, - "loss": 0.4769, + "learning_rate": 5.4186318544375874e-05, + "loss": 0.3503, "step": 442000 }, { "epoch": 0.27, - "learning_rate": 5.4184277377851e-05, - "loss": 0.4872, + "learning_rate": 5.418421857881531e-05, + "loss": 0.3592, "step": 442500 }, { "epoch": 0.27, - "learning_rate": 5.4182177412290436e-05, - "loss": 0.4839, + "learning_rate": 5.418211861325474e-05, + "loss": 0.3494, "step": 443000 }, { "epoch": 0.27, - "learning_rate": 5.418007744672987e-05, - "loss": 0.473, + "learning_rate": 5.4180018647694174e-05, + "loss": 0.3446, "step": 443500 }, { "epoch": 0.27, - "learning_rate": 5.417797748116931e-05, - "loss": 0.4828, + "learning_rate": 5.4177918682133614e-05, + "loss": 0.3485, "step": 444000 }, { "epoch": 0.27, - "learning_rate": 5.417587751560874e-05, - "loss": 0.4802, + "learning_rate": 5.4175822916504175e-05, + "loss": 0.3562, "step": 444500 }, { "epoch": 0.27, - "learning_rate": 5.4173777550048176e-05, - "loss": 0.481, + "learning_rate": 5.41737229509436e-05, + "loss": 0.3433, "step": 445000 }, { "epoch": 0.27, - "learning_rate": 5.417168178441873e-05, - "loss": 0.4829, + "learning_rate": 5.4171622985383035e-05, + "loss": 0.3502, "step": 445500 }, { "epoch": 0.27, - "learning_rate": 5.416958181885817e-05, - "loss": 0.4798, + "learning_rate": 5.4169523019822475e-05, + "loss": 0.3526, "step": 446000 }, { "epoch": 0.27, - "learning_rate": 5.4167481853297604e-05, - "loss": 0.4819, + "learning_rate": 5.4167427254193035e-05, + "loss": 0.3463, "step": 446500 }, { "epoch": 0.27, - "learning_rate": 5.416538188773704e-05, - "loss": 0.4796, + "learning_rate": 5.416532728863247e-05, + "loss": 0.3475, "step": 447000 }, { "epoch": 0.27, - "learning_rate": 5.416328612210759e-05, - "loss": 0.4809, + "learning_rate": 5.4163227323071896e-05, + "loss": 0.3464, "step": 447500 }, { "epoch": 0.27, - "learning_rate": 5.416118615654703e-05, - "loss": 0.488, + "learning_rate": 5.4161131557442456e-05, + "loss": 0.3536, "step": 448000 }, { "epoch": 0.27, - "learning_rate": 5.415909039091759e-05, - "loss": 0.4986, + "learning_rate": 5.4159031591881896e-05, + "loss": 0.3517, "step": 448500 }, { "epoch": 0.27, - "learning_rate": 5.4156990425357025e-05, - "loss": 0.4803, + "learning_rate": 5.415693162632133e-05, + "loss": 0.3493, "step": 449000 }, { "epoch": 0.27, - "learning_rate": 5.415489045979646e-05, - "loss": 0.4834, + "learning_rate": 5.415483166076076e-05, + "loss": 0.3502, "step": 449500 }, { "epoch": 0.27, - "learning_rate": 5.415279049423589e-05, - "loss": 0.4773, + "learning_rate": 5.4152731695200196e-05, + "loss": 0.345, "step": 450000 }, { "epoch": 0.27, - "learning_rate": 5.4150690528675325e-05, - "loss": 0.4835, + "learning_rate": 5.415063172963963e-05, + "loss": 0.3545, "step": 450500 }, { "epoch": 0.27, - "learning_rate": 5.4148590563114765e-05, - "loss": 0.4915, + "learning_rate": 5.414853176407907e-05, + "loss": 0.3547, "step": 451000 }, { "epoch": 0.27, - "learning_rate": 5.41464905975542e-05, - "loss": 0.4809, + "learning_rate": 5.4146431798518504e-05, + "loss": 0.3523, "step": 451500 }, { "epoch": 0.27, - "learning_rate": 5.414439483192476e-05, - "loss": 0.492, + "learning_rate": 5.414433183295794e-05, + "loss": 0.3562, "step": 452000 }, { "epoch": 0.27, - "learning_rate": 5.4142294866364186e-05, - "loss": 0.4793, + "learning_rate": 5.414223606732849e-05, + "loss": 0.3474, "step": 452500 }, { "epoch": 0.27, - "learning_rate": 5.4140194900803626e-05, - "loss": 0.4843, + "learning_rate": 5.414013610176793e-05, + "loss": 0.3541, "step": 453000 }, { "epoch": 0.27, - "learning_rate": 5.413809493524306e-05, - "loss": 0.4828, + "learning_rate": 5.413804033613849e-05, + "loss": 0.349, "step": 453500 }, { "epoch": 0.27, - "learning_rate": 5.413599496968249e-05, - "loss": 0.4858, + "learning_rate": 5.4135940370577925e-05, + "loss": 0.3521, "step": 454000 }, { "epoch": 0.27, - "learning_rate": 5.4133899204053047e-05, - "loss": 0.4831, + "learning_rate": 5.413384040501735e-05, + "loss": 0.3545, "step": 454500 }, { "epoch": 0.27, - "learning_rate": 5.413179923849249e-05, - "loss": 0.4877, + "learning_rate": 5.413174043945679e-05, + "loss": 0.3503, "step": 455000 }, { "epoch": 0.27, - "learning_rate": 5.412969927293192e-05, - "loss": 0.4842, + "learning_rate": 5.4129640473896225e-05, + "loss": 0.3522, "step": 455500 }, { "epoch": 0.27, - "learning_rate": 5.4127599307371354e-05, - "loss": 0.4774, + "learning_rate": 5.412754050833566e-05, + "loss": 0.3523, "step": 456000 }, { "epoch": 0.27, - "learning_rate": 5.4125503541741914e-05, - "loss": 0.4777, + "learning_rate": 5.41254405427751e-05, + "loss": 0.3512, "step": 456500 }, { "epoch": 0.27, - "learning_rate": 5.412340357618135e-05, - "loss": 0.4871, + "learning_rate": 5.412334057721453e-05, + "loss": 0.3523, "step": 457000 }, { "epoch": 0.27, - "learning_rate": 5.412130361062078e-05, - "loss": 0.485, + "learning_rate": 5.4121244811585086e-05, + "loss": 0.3544, "step": 457500 }, { "epoch": 0.27, - "learning_rate": 5.411920364506022e-05, - "loss": 0.4824, + "learning_rate": 5.4119144846024526e-05, + "loss": 0.3506, "step": 458000 }, { "epoch": 0.27, - "learning_rate": 5.4117103679499655e-05, - "loss": 0.4786, + "learning_rate": 5.4117049080395086e-05, + "loss": 0.3506, "step": 458500 }, { "epoch": 0.28, - "learning_rate": 5.411500371393909e-05, - "loss": 0.4775, + "learning_rate": 5.411494911483451e-05, + "loss": 0.3468, "step": 459000 }, { "epoch": 0.28, - "learning_rate": 5.411290374837853e-05, - "loss": 0.4765, + "learning_rate": 5.4112849149273947e-05, + "loss": 0.3465, "step": 459500 }, { "epoch": 0.28, - "learning_rate": 5.411080378281796e-05, - "loss": 0.468, + "learning_rate": 5.411074918371339e-05, + "loss": 0.3462, "step": 460000 }, { "epoch": 0.28, - "learning_rate": 5.410871221711964e-05, - "loss": 0.4827, + "learning_rate": 5.410864921815282e-05, + "loss": 0.3436, "step": 460500 }, { "epoch": 0.28, - "learning_rate": 5.4106612251559076e-05, - "loss": 0.4798, + "learning_rate": 5.4106549252592254e-05, + "loss": 0.3466, "step": 461000 }, { "epoch": 0.28, - "learning_rate": 5.410451228599851e-05, - "loss": 0.484, + "learning_rate": 5.4104449287031694e-05, + "loss": 0.3527, "step": 461500 }, { "epoch": 0.28, - "learning_rate": 5.410241232043794e-05, - "loss": 0.4744, + "learning_rate": 5.410234932147113e-05, + "loss": 0.3473, "step": 462000 }, { "epoch": 0.28, - "learning_rate": 5.4100312354877376e-05, - "loss": 0.4888, + "learning_rate": 5.410025355584168e-05, + "loss": 0.3535, "step": 462500 }, { "epoch": 0.28, - "learning_rate": 5.409821238931681e-05, - "loss": 0.4786, + "learning_rate": 5.4098153590281114e-05, + "loss": 0.3463, "step": 463000 }, { "epoch": 0.28, - "learning_rate": 5.409611242375625e-05, - "loss": 0.4777, + "learning_rate": 5.4096053624720555e-05, + "loss": 0.3507, "step": 463500 }, { "epoch": 0.28, - "learning_rate": 5.409401245819568e-05, - "loss": 0.4813, + "learning_rate": 5.409395365915999e-05, + "loss": 0.3453, "step": 464000 }, { "epoch": 0.28, - "learning_rate": 5.409191669256624e-05, - "loss": 0.486, + "learning_rate": 5.409185369359942e-05, + "loss": 0.3498, "step": 464500 }, { "epoch": 0.28, - "learning_rate": 5.408981672700568e-05, - "loss": 0.4769, + "learning_rate": 5.408975372803886e-05, + "loss": 0.3447, "step": 465000 }, { "epoch": 0.28, - "learning_rate": 5.408772096137624e-05, - "loss": 0.4723, + "learning_rate": 5.4087657962409415e-05, + "loss": 0.3464, "step": 465500 }, { "epoch": 0.28, - "learning_rate": 5.408562099581567e-05, - "loss": 0.4762, + "learning_rate": 5.408555799684885e-05, + "loss": 0.3474, "step": 466000 }, { "epoch": 0.28, - "learning_rate": 5.40835210302551e-05, - "loss": 0.4793, + "learning_rate": 5.408345803128829e-05, + "loss": 0.348, "step": 466500 }, { "epoch": 0.28, - "learning_rate": 5.408142106469454e-05, - "loss": 0.4789, + "learning_rate": 5.408136226565884e-05, + "loss": 0.349, "step": 467000 }, { "epoch": 0.28, - "learning_rate": 5.407932109913397e-05, - "loss": 0.4817, + "learning_rate": 5.4079262300098276e-05, + "loss": 0.344, "step": 467500 }, { "epoch": 0.28, - "learning_rate": 5.4077221133573405e-05, - "loss": 0.469, + "learning_rate": 5.407716233453771e-05, + "loss": 0.3418, "step": 468000 }, { "epoch": 0.28, - "learning_rate": 5.4075121168012845e-05, - "loss": 0.4803, + "learning_rate": 5.407506236897715e-05, + "loss": 0.3586, "step": 468500 }, { "epoch": 0.28, - "learning_rate": 5.40730254023834e-05, - "loss": 0.4751, + "learning_rate": 5.407296240341658e-05, + "loss": 0.3443, "step": 469000 }, { "epoch": 0.28, - "learning_rate": 5.407092543682283e-05, - "loss": 0.4776, + "learning_rate": 5.407086243785602e-05, + "loss": 0.35, "step": 469500 }, { "epoch": 0.28, - "learning_rate": 5.4068825471262265e-05, - "loss": 0.4821, + "learning_rate": 5.406876247229546e-05, + "loss": 0.345, "step": 470000 }, { "epoch": 0.28, - "learning_rate": 5.4066725505701706e-05, - "loss": 0.4766, + "learning_rate": 5.406666250673489e-05, + "loss": 0.3478, "step": 470500 }, { "epoch": 0.28, - "learning_rate": 5.406462554014114e-05, - "loss": 0.4848, + "learning_rate": 5.4064562541174324e-05, + "loss": 0.3472, "step": 471000 }, { "epoch": 0.28, - "learning_rate": 5.406252557458058e-05, - "loss": 0.4782, + "learning_rate": 5.4062462575613764e-05, + "loss": 0.347, "step": 471500 }, { "epoch": 0.28, - "learning_rate": 5.406042560902001e-05, - "loss": 0.4766, + "learning_rate": 5.406036261005319e-05, + "loss": 0.3518, "step": 472000 }, { "epoch": 0.28, - "learning_rate": 5.4058325643459446e-05, - "loss": 0.4727, + "learning_rate": 5.4058262644492624e-05, + "loss": 0.3423, "step": 472500 }, { "epoch": 0.28, - "learning_rate": 5.405623407776113e-05, - "loss": 0.4806, + "learning_rate": 5.4056166878863185e-05, + "loss": 0.352, "step": 473000 }, { "epoch": 0.28, - "learning_rate": 5.405413411220056e-05, - "loss": 0.4798, + "learning_rate": 5.4054066913302625e-05, + "loss": 0.3452, "step": 473500 }, { "epoch": 0.28, - "learning_rate": 5.4052034146639994e-05, - "loss": 0.4663, + "learning_rate": 5.405196694774206e-05, + "loss": 0.3427, "step": 474000 }, { "epoch": 0.28, - "learning_rate": 5.404993418107943e-05, - "loss": 0.4745, + "learning_rate": 5.404986698218149e-05, + "loss": 0.3433, "step": 474500 }, { "epoch": 0.28, - "learning_rate": 5.404783421551886e-05, - "loss": 0.4759, + "learning_rate": 5.4047767016620925e-05, + "loss": 0.3472, "step": 475000 }, { "epoch": 0.29, - "learning_rate": 5.40457342499583e-05, - "loss": 0.4731, + "learning_rate": 5.404566705106036e-05, + "loss": 0.3435, "step": 475500 }, { "epoch": 0.29, - "learning_rate": 5.4043634284397734e-05, - "loss": 0.4838, + "learning_rate": 5.404357128543092e-05, + "loss": 0.35, "step": 476000 }, { "epoch": 0.29, - "learning_rate": 5.404153431883717e-05, - "loss": 0.4786, + "learning_rate": 5.404147131987035e-05, + "loss": 0.3495, "step": 476500 }, { "epoch": 0.29, - "learning_rate": 5.403943855320772e-05, - "loss": 0.4884, + "learning_rate": 5.4039371354309786e-05, + "loss": 0.3529, "step": 477000 }, { "epoch": 0.29, - "learning_rate": 5.403734278757829e-05, - "loss": 0.4771, + "learning_rate": 5.403727138874922e-05, + "loss": 0.3486, "step": 477500 }, { "epoch": 0.29, - "learning_rate": 5.403524282201772e-05, - "loss": 0.4685, + "learning_rate": 5.403517142318866e-05, + "loss": 0.3381, "step": 478000 }, { "epoch": 0.29, - "learning_rate": 5.403314285645715e-05, - "loss": 0.4844, + "learning_rate": 5.403307145762809e-05, + "loss": 0.3462, "step": 478500 }, { "epoch": 0.29, - "learning_rate": 5.403104289089659e-05, - "loss": 0.4735, + "learning_rate": 5.4030971492067527e-05, + "loss": 0.3507, "step": 479000 }, { "epoch": 0.29, - "learning_rate": 5.402894292533602e-05, - "loss": 0.4749, + "learning_rate": 5.402887152650697e-05, + "loss": 0.3463, "step": 479500 }, { "epoch": 0.29, - "learning_rate": 5.402684715970658e-05, - "loss": 0.4834, + "learning_rate": 5.40267715609464e-05, + "loss": 0.3469, "step": 480000 }, { "epoch": 0.29, - "learning_rate": 5.4024747194146016e-05, - "loss": 0.4872, + "learning_rate": 5.4024675795316954e-05, + "loss": 0.3508, "step": 480500 }, { "epoch": 0.29, - "learning_rate": 5.402264722858545e-05, - "loss": 0.4707, + "learning_rate": 5.4022580029687514e-05, + "loss": 0.3426, "step": 481000 }, { "epoch": 0.29, - "learning_rate": 5.402054726302488e-05, - "loss": 0.4784, + "learning_rate": 5.402048006412695e-05, + "loss": 0.3445, "step": 481500 }, { "epoch": 0.29, - "learning_rate": 5.4018447297464317e-05, - "loss": 0.4714, + "learning_rate": 5.401838009856638e-05, + "loss": 0.3404, "step": 482000 }, { "epoch": 0.29, - "learning_rate": 5.401634733190376e-05, - "loss": 0.4916, + "learning_rate": 5.4016280133005815e-05, + "loss": 0.3503, "step": 482500 }, { "epoch": 0.29, - "learning_rate": 5.401424736634319e-05, - "loss": 0.4772, + "learning_rate": 5.4014184367376375e-05, + "loss": 0.3468, "step": 483000 }, { "epoch": 0.29, - "learning_rate": 5.4012151600713744e-05, - "loss": 0.4774, + "learning_rate": 5.4012084401815815e-05, + "loss": 0.3455, "step": 483500 }, { "epoch": 0.29, - "learning_rate": 5.401005163515318e-05, - "loss": 0.4721, + "learning_rate": 5.400998443625524e-05, + "loss": 0.3447, "step": 484000 }, { "epoch": 0.29, - "learning_rate": 5.400795166959262e-05, - "loss": 0.4684, + "learning_rate": 5.4007884470694675e-05, + "loss": 0.3442, "step": 484500 }, { "epoch": 0.29, - "learning_rate": 5.400585170403205e-05, - "loss": 0.4806, + "learning_rate": 5.4005788705065236e-05, + "loss": 0.3501, "step": 485000 }, { "epoch": 0.29, - "learning_rate": 5.400375173847149e-05, - "loss": 0.474, + "learning_rate": 5.4003688739504676e-05, + "loss": 0.3419, "step": 485500 }, { "epoch": 0.29, - "learning_rate": 5.4001651772910925e-05, - "loss": 0.4765, + "learning_rate": 5.40015887739441e-05, + "loss": 0.3401, "step": 486000 }, { "epoch": 0.29, - "learning_rate": 5.399955180735036e-05, - "loss": 0.4791, + "learning_rate": 5.3999488808383536e-05, + "loss": 0.3468, "step": 486500 }, { "epoch": 0.29, - "learning_rate": 5.39974518417898e-05, - "loss": 0.4719, + "learning_rate": 5.3997388842822976e-05, + "loss": 0.346, "step": 487000 }, { "epoch": 0.29, - "learning_rate": 5.399535187622923e-05, - "loss": 0.4824, + "learning_rate": 5.399528887726241e-05, + "loss": 0.3479, "step": 487500 }, { "epoch": 0.29, - "learning_rate": 5.3993256110599785e-05, - "loss": 0.4636, + "learning_rate": 5.399318891170184e-05, + "loss": 0.3379, "step": 488000 }, { "epoch": 0.29, - "learning_rate": 5.399115614503922e-05, - "loss": 0.4664, + "learning_rate": 5.3991088946141283e-05, + "loss": 0.3453, "step": 488500 }, { "epoch": 0.29, - "learning_rate": 5.398906037940977e-05, - "loss": 0.4824, + "learning_rate": 5.398899318051184e-05, + "loss": 0.348, "step": 489000 }, { "epoch": 0.29, - "learning_rate": 5.398696041384921e-05, - "loss": 0.4752, + "learning_rate": 5.398689321495127e-05, + "loss": 0.3451, "step": 489500 }, { "epoch": 0.29, - "learning_rate": 5.3984860448288646e-05, - "loss": 0.4637, + "learning_rate": 5.398479324939071e-05, + "loss": 0.3372, "step": 490000 }, { "epoch": 0.29, - "learning_rate": 5.398276048272808e-05, - "loss": 0.4761, + "learning_rate": 5.3982693283830144e-05, + "loss": 0.3503, "step": 490500 }, { "epoch": 0.29, - "learning_rate": 5.398066051716752e-05, - "loss": 0.4694, + "learning_rate": 5.39805975182007e-05, + "loss": 0.3513, "step": 491000 }, { "epoch": 0.29, - "learning_rate": 5.397856475153807e-05, - "loss": 0.462, + "learning_rate": 5.397850175257126e-05, + "loss": 0.3357, "step": 491500 }, { "epoch": 0.29, - "learning_rate": 5.397646478597751e-05, - "loss": 0.4713, + "learning_rate": 5.397640178701069e-05, + "loss": 0.3456, "step": 492000 }, { "epoch": 0.3, - "learning_rate": 5.397436482041695e-05, - "loss": 0.4831, + "learning_rate": 5.397430182145013e-05, + "loss": 0.3508, "step": 492500 }, { "epoch": 0.3, - "learning_rate": 5.397226485485638e-05, - "loss": 0.486, + "learning_rate": 5.3972201855889565e-05, + "loss": 0.3477, "step": 493000 }, { "epoch": 0.3, - "learning_rate": 5.3970164889295814e-05, - "loss": 0.4889, + "learning_rate": 5.397010189032899e-05, + "loss": 0.3526, "step": 493500 }, { "epoch": 0.3, - "learning_rate": 5.3968064923735254e-05, - "loss": 0.4764, + "learning_rate": 5.396800192476843e-05, + "loss": 0.3409, "step": 494000 }, { "epoch": 0.3, - "learning_rate": 5.396596495817469e-05, - "loss": 0.4812, + "learning_rate": 5.3965901959207866e-05, + "loss": 0.3465, "step": 494500 }, { "epoch": 0.3, - "learning_rate": 5.396386499261412e-05, - "loss": 0.4633, + "learning_rate": 5.3963801993647306e-05, + "loss": 0.341, "step": 495000 }, { "epoch": 0.3, - "learning_rate": 5.3961769226984675e-05, - "loss": 0.4737, + "learning_rate": 5.396170622801786e-05, + "loss": 0.3488, "step": 495500 }, { "epoch": 0.3, - "learning_rate": 5.395967346135523e-05, - "loss": 0.4946, + "learning_rate": 5.395960626245729e-05, + "loss": 0.3548, "step": 496000 }, { "epoch": 0.3, - "learning_rate": 5.395757349579467e-05, - "loss": 0.4614, + "learning_rate": 5.3957506296896726e-05, + "loss": 0.3409, "step": 496500 }, { "epoch": 0.3, - "learning_rate": 5.39554735302341e-05, - "loss": 0.4759, + "learning_rate": 5.3955406331336167e-05, + "loss": 0.3457, "step": 497000 }, { "epoch": 0.3, - "learning_rate": 5.3953373564673535e-05, - "loss": 0.469, + "learning_rate": 5.39533063657756e-05, + "loss": 0.3428, "step": 497500 }, { "epoch": 0.3, - "learning_rate": 5.3951273599112976e-05, - "loss": 0.4852, + "learning_rate": 5.3951206400215033e-05, + "loss": 0.3547, "step": 498000 }, { "epoch": 0.3, - "learning_rate": 5.394917363355241e-05, - "loss": 0.4739, + "learning_rate": 5.3949106434654474e-05, + "loss": 0.344, "step": 498500 }, { "epoch": 0.3, - "learning_rate": 5.394707366799184e-05, - "loss": 0.4711, + "learning_rate": 5.394700646909391e-05, + "loss": 0.3415, "step": 499000 }, { "epoch": 0.3, - "learning_rate": 5.394497370243128e-05, - "loss": 0.4733, + "learning_rate": 5.394491070346446e-05, + "loss": 0.3391, "step": 499500 }, { "epoch": 0.3, - "learning_rate": 5.3942877936801836e-05, - "loss": 0.4654, + "learning_rate": 5.3942810737903894e-05, + "loss": 0.3393, "step": 500000 }, { "epoch": 0.3, - "eval_loss": 0.44848358631134033, - "eval_runtime": 1105.3231, - "eval_samples_per_second": 476.53, - "eval_steps_per_second": 79.422, + "eval_loss": 0.31016305088996887, + "eval_runtime": 1464.0628, + "eval_samples_per_second": 359.766, + "eval_steps_per_second": 59.961, "step": 500000 }, { "epoch": 0.3, - "learning_rate": 5.394077797124127e-05, - "loss": 0.4744, + "learning_rate": 5.3940710772343334e-05, + "loss": 0.3476, "step": 500500 }, { "epoch": 0.3, - "learning_rate": 5.393867800568071e-05, - "loss": 0.4725, + "learning_rate": 5.393861080678277e-05, + "loss": 0.344, "step": 501000 }, { "epoch": 0.3, - "learning_rate": 5.3936578040120144e-05, - "loss": 0.4721, + "learning_rate": 5.393651504115332e-05, + "loss": 0.3406, "step": 501500 }, { "epoch": 0.3, - "learning_rate": 5.393447807455958e-05, - "loss": 0.4745, + "learning_rate": 5.393441507559276e-05, + "loss": 0.3436, "step": 502000 }, { "epoch": 0.3, - "learning_rate": 5.393238230893013e-05, - "loss": 0.4814, + "learning_rate": 5.3932315110032195e-05, + "loss": 0.3441, "step": 502500 }, { "epoch": 0.3, - "learning_rate": 5.393028234336957e-05, - "loss": 0.47, + "learning_rate": 5.393021514447163e-05, + "loss": 0.3473, "step": 503000 }, { "epoch": 0.3, - "learning_rate": 5.3928182377809004e-05, - "loss": 0.4721, + "learning_rate": 5.392811517891107e-05, + "loss": 0.3426, "step": 503500 }, { "epoch": 0.3, - "learning_rate": 5.392608241224844e-05, - "loss": 0.4668, + "learning_rate": 5.392602361321274e-05, + "loss": 0.3406, "step": 504000 }, { "epoch": 0.3, - "learning_rate": 5.392398664661899e-05, - "loss": 0.4679, + "learning_rate": 5.392392364765218e-05, + "loss": 0.3442, "step": 504500 }, { "epoch": 0.3, - "learning_rate": 5.392188668105843e-05, - "loss": 0.4823, + "learning_rate": 5.3921823682091616e-05, + "loss": 0.3489, "step": 505000 }, { "epoch": 0.3, - "learning_rate": 5.3919786715497865e-05, - "loss": 0.4748, + "learning_rate": 5.391972791646217e-05, + "loss": 0.3446, "step": 505500 }, { "epoch": 0.3, - "learning_rate": 5.39176867499373e-05, - "loss": 0.4733, + "learning_rate": 5.39176279509016e-05, + "loss": 0.3443, "step": 506000 }, { "epoch": 0.3, - "learning_rate": 5.391559098430786e-05, - "loss": 0.4842, + "learning_rate": 5.3915527985341043e-05, + "loss": 0.3462, "step": 506500 }, { "epoch": 0.3, - "learning_rate": 5.391349101874729e-05, - "loss": 0.476, + "learning_rate": 5.391342801978048e-05, + "loss": 0.3441, "step": 507000 }, { "epoch": 0.3, - "learning_rate": 5.3911391053186726e-05, - "loss": 0.4767, + "learning_rate": 5.3911328054219904e-05, + "loss": 0.3474, "step": 507500 }, { "epoch": 0.3, - "learning_rate": 5.3909291087626166e-05, - "loss": 0.4652, + "learning_rate": 5.3909228088659344e-05, + "loss": 0.3399, "step": 508000 }, { "epoch": 0.3, - "learning_rate": 5.390719952192784e-05, - "loss": 0.476, + "learning_rate": 5.390712812309878e-05, + "loss": 0.3452, "step": 508500 }, { "epoch": 0.31, - "learning_rate": 5.390509955636728e-05, - "loss": 0.4767, + "learning_rate": 5.390502815753822e-05, + "loss": 0.3486, "step": 509000 }, { "epoch": 0.31, - "learning_rate": 5.3902999590806707e-05, - "loss": 0.4742, + "learning_rate": 5.390293239190878e-05, + "loss": 0.3441, "step": 509500 }, { "epoch": 0.31, - "learning_rate": 5.390089962524614e-05, - "loss": 0.4767, + "learning_rate": 5.3900832426348205e-05, + "loss": 0.3524, "step": 510000 }, { "epoch": 0.31, - "learning_rate": 5.389879965968558e-05, - "loss": 0.4709, + "learning_rate": 5.389873246078764e-05, + "loss": 0.347, "step": 510500 }, { "epoch": 0.31, - "learning_rate": 5.3896699694125014e-05, - "loss": 0.4737, + "learning_rate": 5.389663249522708e-05, + "loss": 0.3465, "step": 511000 }, { "epoch": 0.31, - "learning_rate": 5.389459972856445e-05, - "loss": 0.4719, + "learning_rate": 5.389453252966651e-05, + "loss": 0.3455, "step": 511500 }, { "epoch": 0.31, - "learning_rate": 5.389249976300389e-05, - "loss": 0.4696, + "learning_rate": 5.3892432564105945e-05, + "loss": 0.3445, "step": 512000 }, { "epoch": 0.31, - "learning_rate": 5.389039979744332e-05, - "loss": 0.4794, + "learning_rate": 5.3890332598545385e-05, + "loss": 0.3451, "step": 512500 }, { "epoch": 0.31, - "learning_rate": 5.3888304031813874e-05, - "loss": 0.4704, + "learning_rate": 5.388823263298482e-05, + "loss": 0.3411, "step": 513000 }, { "epoch": 0.31, - "learning_rate": 5.3886204066253315e-05, - "loss": 0.4682, + "learning_rate": 5.388613686735537e-05, + "loss": 0.3462, "step": 513500 }, { "epoch": 0.31, - "learning_rate": 5.388410410069275e-05, - "loss": 0.4628, + "learning_rate": 5.388404110172593e-05, + "loss": 0.3414, "step": 514000 }, { "epoch": 0.31, - "learning_rate": 5.388200413513218e-05, - "loss": 0.474, + "learning_rate": 5.3881941136165366e-05, + "loss": 0.3442, "step": 514500 }, { "epoch": 0.31, - "learning_rate": 5.387991256943386e-05, - "loss": 0.471, + "learning_rate": 5.38798411706048e-05, + "loss": 0.3457, "step": 515000 }, { "epoch": 0.31, - "learning_rate": 5.3877816803804416e-05, - "loss": 0.4628, + "learning_rate": 5.387774120504423e-05, + "loss": 0.3445, "step": 515500 }, { "epoch": 0.31, - "learning_rate": 5.387571683824385e-05, - "loss": 0.4751, + "learning_rate": 5.3875641239483673e-05, + "loss": 0.347, "step": 516000 }, { "epoch": 0.31, - "learning_rate": 5.387361687268329e-05, - "loss": 0.4658, + "learning_rate": 5.387354127392311e-05, + "loss": 0.3434, "step": 516500 }, { "epoch": 0.31, - "learning_rate": 5.387151690712272e-05, - "loss": 0.4581, + "learning_rate": 5.387144130836254e-05, + "loss": 0.3344, "step": 517000 }, { "epoch": 0.31, - "learning_rate": 5.3869416941562156e-05, - "loss": 0.4749, + "learning_rate": 5.386934134280198e-05, + "loss": 0.3484, "step": 517500 }, { "epoch": 0.31, - "learning_rate": 5.3867316976001596e-05, - "loss": 0.4818, + "learning_rate": 5.3867241377241414e-05, + "loss": 0.3452, "step": 518000 }, { "epoch": 0.31, - "learning_rate": 5.386521701044103e-05, - "loss": 0.4768, + "learning_rate": 5.386514561161197e-05, + "loss": 0.3443, "step": 518500 }, { "epoch": 0.31, - "learning_rate": 5.3863117044880463e-05, - "loss": 0.4707, + "learning_rate": 5.38630456460514e-05, + "loss": 0.3428, "step": 519000 }, { "epoch": 0.31, - "learning_rate": 5.38610170793199e-05, - "loss": 0.4678, + "learning_rate": 5.386094568049084e-05, + "loss": 0.3349, "step": 519500 }, { "epoch": 0.31, - "learning_rate": 5.385891711375933e-05, - "loss": 0.4559, + "learning_rate": 5.3858845714930275e-05, + "loss": 0.3314, "step": 520000 }, { "epoch": 0.31, - "learning_rate": 5.385681714819877e-05, - "loss": 0.4681, + "learning_rate": 5.385674574936971e-05, + "loss": 0.3411, "step": 520500 }, { "epoch": 0.31, - "learning_rate": 5.3854717182638204e-05, - "loss": 0.4787, + "learning_rate": 5.385464578380915e-05, + "loss": 0.3522, "step": 521000 }, { "epoch": 0.31, - "learning_rate": 5.385262141700876e-05, - "loss": 0.4673, + "learning_rate": 5.38525500181797e-05, + "loss": 0.3409, "step": 521500 }, { "epoch": 0.31, - "learning_rate": 5.385052145144819e-05, - "loss": 0.4693, + "learning_rate": 5.3850454252550256e-05, + "loss": 0.3401, "step": 522000 }, { "epoch": 0.31, - "learning_rate": 5.384842148588763e-05, - "loss": 0.4629, + "learning_rate": 5.384835428698969e-05, + "loss": 0.3385, "step": 522500 }, { "epoch": 0.31, - "learning_rate": 5.3846321520327065e-05, - "loss": 0.4697, + "learning_rate": 5.384625432142913e-05, + "loss": 0.3392, "step": 523000 }, { "epoch": 0.31, - "learning_rate": 5.3844225754697625e-05, - "loss": 0.4682, + "learning_rate": 5.384415435586856e-05, + "loss": 0.3432, "step": 523500 }, { "epoch": 0.31, - "learning_rate": 5.384212578913705e-05, - "loss": 0.4765, + "learning_rate": 5.3842054390307996e-05, + "loss": 0.3456, "step": 524000 }, { "epoch": 0.31, - "learning_rate": 5.384002582357649e-05, - "loss": 0.4713, + "learning_rate": 5.3839954424747436e-05, + "loss": 0.3407, "step": 524500 }, { "epoch": 0.31, - "learning_rate": 5.3837925858015925e-05, - "loss": 0.462, + "learning_rate": 5.383785445918687e-05, + "loss": 0.3427, "step": 525000 }, { "epoch": 0.32, - "learning_rate": 5.3835830092386486e-05, - "loss": 0.487, + "learning_rate": 5.3835758693557424e-05, + "loss": 0.3549, "step": 525500 }, { "epoch": 0.32, - "learning_rate": 5.383373012682592e-05, - "loss": 0.4732, + "learning_rate": 5.383365872799686e-05, + "loss": 0.346, "step": 526000 }, { "epoch": 0.32, - "learning_rate": 5.383163016126535e-05, - "loss": 0.4689, + "learning_rate": 5.38315587624363e-05, + "loss": 0.3425, "step": 526500 }, { "epoch": 0.32, - "learning_rate": 5.3829530195704786e-05, - "loss": 0.4776, + "learning_rate": 5.382945879687573e-05, + "loss": 0.3371, "step": 527000 }, { "epoch": 0.32, - "learning_rate": 5.3827430230144226e-05, - "loss": 0.4669, + "learning_rate": 5.3827358831315164e-05, + "loss": 0.3431, "step": 527500 }, { "epoch": 0.32, - "learning_rate": 5.382533026458366e-05, - "loss": 0.4559, + "learning_rate": 5.3825258865754604e-05, + "loss": 0.3384, "step": 528000 }, { "epoch": 0.32, - "learning_rate": 5.382323029902309e-05, - "loss": 0.4742, + "learning_rate": 5.382315890019404e-05, + "loss": 0.3465, "step": 528500 }, { "epoch": 0.32, - "learning_rate": 5.3821130333462534e-05, - "loss": 0.4723, + "learning_rate": 5.382105893463347e-05, + "loss": 0.348, "step": 529000 }, { "epoch": 0.32, - "learning_rate": 5.381903456783309e-05, - "loss": 0.4701, + "learning_rate": 5.381895896907291e-05, + "loss": 0.3429, "step": 529500 }, { "epoch": 0.32, - "learning_rate": 5.381693460227252e-05, - "loss": 0.4639, + "learning_rate": 5.381685900351234e-05, + "loss": 0.3412, "step": 530000 }, { "epoch": 0.32, - "learning_rate": 5.381483883664308e-05, - "loss": 0.4688, + "learning_rate": 5.381475903795177e-05, + "loss": 0.3397, "step": 530500 }, { "epoch": 0.32, - "learning_rate": 5.381273887108251e-05, - "loss": 0.4621, + "learning_rate": 5.381265907239121e-05, + "loss": 0.3356, "step": 531000 }, { "epoch": 0.32, - "learning_rate": 5.381063890552195e-05, - "loss": 0.4747, + "learning_rate": 5.3810559106830645e-05, + "loss": 0.3397, "step": 531500 }, { "epoch": 0.32, - "learning_rate": 5.380853893996138e-05, - "loss": 0.4659, + "learning_rate": 5.380845914127008e-05, + "loss": 0.3378, "step": 532000 }, { "epoch": 0.32, - "learning_rate": 5.3806438974400815e-05, - "loss": 0.4702, + "learning_rate": 5.380636337564064e-05, + "loss": 0.3428, "step": 532500 }, { "epoch": 0.32, - "learning_rate": 5.3804339008840255e-05, - "loss": 0.4744, + "learning_rate": 5.38042676100112e-05, + "loss": 0.346, "step": 533000 }, { "epoch": 0.32, - "learning_rate": 5.380223904327969e-05, - "loss": 0.4646, + "learning_rate": 5.380217184438175e-05, + "loss": 0.3395, "step": 533500 }, { "epoch": 0.32, - "learning_rate": 5.380013907771912e-05, - "loss": 0.4655, + "learning_rate": 5.3800071878821187e-05, + "loss": 0.3407, "step": 534000 }, { "epoch": 0.32, - "learning_rate": 5.379804331208968e-05, - "loss": 0.4674, + "learning_rate": 5.379797191326062e-05, + "loss": 0.3414, "step": 534500 }, { "epoch": 0.32, - "learning_rate": 5.379594754646024e-05, - "loss": 0.4582, + "learning_rate": 5.379587194770006e-05, + "loss": 0.3386, "step": 535000 }, { "epoch": 0.32, - "learning_rate": 5.3793851780830796e-05, - "loss": 0.4678, + "learning_rate": 5.3793771982139494e-05, + "loss": 0.3458, "step": 535500 }, { "epoch": 0.32, - "learning_rate": 5.379175181527023e-05, - "loss": 0.4647, + "learning_rate": 5.379167201657893e-05, + "loss": 0.3365, "step": 536000 }, { "epoch": 0.32, - "learning_rate": 5.378965184970966e-05, - "loss": 0.4683, + "learning_rate": 5.378957205101837e-05, + "loss": 0.3437, "step": 536500 }, { "epoch": 0.32, - "learning_rate": 5.3787551884149103e-05, - "loss": 0.4691, + "learning_rate": 5.3787472085457794e-05, + "loss": 0.3361, "step": 537000 }, { "epoch": 0.32, - "learning_rate": 5.378545191858854e-05, - "loss": 0.4705, + "learning_rate": 5.378537211989723e-05, + "loss": 0.3445, "step": 537500 }, { "epoch": 0.32, - "learning_rate": 5.378335615295909e-05, - "loss": 0.4659, + "learning_rate": 5.378327215433667e-05, + "loss": 0.3364, "step": 538000 }, { "epoch": 0.32, - "learning_rate": 5.3781256187398524e-05, - "loss": 0.4598, + "learning_rate": 5.37811721887761e-05, + "loss": 0.3447, "step": 538500 }, { "epoch": 0.32, - "learning_rate": 5.3779156221837964e-05, - "loss": 0.4767, + "learning_rate": 5.377907642314666e-05, + "loss": 0.3427, "step": 539000 }, { "epoch": 0.32, - "learning_rate": 5.37770562562774e-05, - "loss": 0.4741, + "learning_rate": 5.3776976457586095e-05, + "loss": 0.3441, "step": 539500 }, { "epoch": 0.32, - "learning_rate": 5.377495629071683e-05, - "loss": 0.4665, + "learning_rate": 5.377487649202553e-05, + "loss": 0.3394, "step": 540000 }, { "epoch": 0.32, - "learning_rate": 5.3772856325156264e-05, - "loss": 0.4662, + "learning_rate": 5.377277652646496e-05, + "loss": 0.338, "step": 540500 }, { "epoch": 0.32, - "learning_rate": 5.37707563595957e-05, - "loss": 0.4796, + "learning_rate": 5.37706765609044e-05, + "loss": 0.3483, "step": 541000 }, { "epoch": 0.32, - "learning_rate": 5.376865639403514e-05, - "loss": 0.473, + "learning_rate": 5.376858079527496e-05, + "loss": 0.3408, "step": 541500 }, { "epoch": 0.32, - "learning_rate": 5.376655642847457e-05, - "loss": 0.4599, + "learning_rate": 5.376648082971439e-05, + "loss": 0.3306, "step": 542000 }, { "epoch": 0.33, - "learning_rate": 5.3764456462914005e-05, - "loss": 0.4725, + "learning_rate": 5.376438086415382e-05, + "loss": 0.3398, "step": 542500 }, { "epoch": 0.33, - "learning_rate": 5.3762356497353445e-05, - "loss": 0.47, + "learning_rate": 5.376228089859326e-05, + "loss": 0.3455, "step": 543000 }, { "epoch": 0.33, - "learning_rate": 5.3760260731724e-05, - "loss": 0.4776, + "learning_rate": 5.3760180933032696e-05, + "loss": 0.3403, "step": 543500 }, { "epoch": 0.33, - "learning_rate": 5.375816076616343e-05, - "loss": 0.4765, + "learning_rate": 5.375808096747213e-05, + "loss": 0.3473, "step": 544000 }, { "epoch": 0.33, - "learning_rate": 5.3756060800602866e-05, - "loss": 0.4611, + "learning_rate": 5.375598100191157e-05, + "loss": 0.3398, "step": 544500 }, { "epoch": 0.33, - "learning_rate": 5.3753960835042306e-05, - "loss": 0.4681, + "learning_rate": 5.3753881036351004e-05, + "loss": 0.3404, "step": 545000 }, { "epoch": 0.33, - "learning_rate": 5.375186086948174e-05, - "loss": 0.4627, + "learning_rate": 5.375178527072156e-05, + "loss": 0.3386, "step": 545500 }, { "epoch": 0.33, - "learning_rate": 5.374976090392117e-05, - "loss": 0.4707, + "learning_rate": 5.374968530516099e-05, + "loss": 0.3413, "step": 546000 }, { "epoch": 0.33, - "learning_rate": 5.374766093836061e-05, - "loss": 0.4587, + "learning_rate": 5.374758533960043e-05, + "loss": 0.3399, "step": 546500 }, { "epoch": 0.33, - "learning_rate": 5.374556097280005e-05, - "loss": 0.4564, + "learning_rate": 5.3745485374039864e-05, + "loss": 0.3379, "step": 547000 }, { "epoch": 0.33, - "learning_rate": 5.37434652071706e-05, - "loss": 0.4745, + "learning_rate": 5.374338960841042e-05, + "loss": 0.3456, "step": 547500 }, { "epoch": 0.33, - "learning_rate": 5.3741365241610034e-05, - "loss": 0.4713, + "learning_rate": 5.374129384278098e-05, + "loss": 0.3391, "step": 548000 }, { "epoch": 0.33, - "learning_rate": 5.3739269475980594e-05, - "loss": 0.4714, + "learning_rate": 5.373919387722042e-05, + "loss": 0.3468, "step": 548500 }, { "epoch": 0.33, - "learning_rate": 5.373716951042003e-05, - "loss": 0.4618, + "learning_rate": 5.3737093911659845e-05, + "loss": 0.3407, "step": 549000 }, { "epoch": 0.33, - "learning_rate": 5.373507374479059e-05, - "loss": 0.4738, + "learning_rate": 5.373499394609928e-05, + "loss": 0.3426, "step": 549500 }, { "epoch": 0.33, - "learning_rate": 5.3732973779230015e-05, - "loss": 0.4768, + "learning_rate": 5.373289398053872e-05, + "loss": 0.3468, "step": 550000 }, { "epoch": 0.33, - "learning_rate": 5.3730873813669455e-05, - "loss": 0.4796, + "learning_rate": 5.373079401497815e-05, + "loss": 0.3456, "step": 550500 }, { "epoch": 0.33, - "learning_rate": 5.372877384810889e-05, - "loss": 0.4629, + "learning_rate": 5.3728694049417586e-05, + "loss": 0.3349, "step": 551000 }, { "epoch": 0.33, - "learning_rate": 5.372667388254832e-05, - "loss": 0.4726, + "learning_rate": 5.3726594083857026e-05, + "loss": 0.3405, "step": 551500 }, { "epoch": 0.33, - "learning_rate": 5.372457391698776e-05, - "loss": 0.4692, + "learning_rate": 5.372449411829646e-05, + "loss": 0.3367, "step": 552000 }, { "epoch": 0.33, - "learning_rate": 5.3722473951427195e-05, - "loss": 0.4632, + "learning_rate": 5.372239415273589e-05, + "loss": 0.3322, "step": 552500 }, { "epoch": 0.33, - "learning_rate": 5.372037398586663e-05, - "loss": 0.4545, + "learning_rate": 5.372029418717533e-05, + "loss": 0.3339, "step": 553000 }, { "epoch": 0.33, - "learning_rate": 5.371827402030607e-05, - "loss": 0.458, + "learning_rate": 5.3718194221614767e-05, + "loss": 0.3393, "step": 553500 }, { "epoch": 0.33, - "learning_rate": 5.37161740547455e-05, - "loss": 0.4636, + "learning_rate": 5.371609845598532e-05, + "loss": 0.3393, "step": 554000 }, { "epoch": 0.33, - "learning_rate": 5.3714078289116056e-05, - "loss": 0.4788, + "learning_rate": 5.371399849042476e-05, + "loss": 0.3424, "step": 554500 }, { "epoch": 0.33, - "learning_rate": 5.371197832355549e-05, - "loss": 0.4675, + "learning_rate": 5.3711902724795314e-05, + "loss": 0.3378, "step": 555000 }, { "epoch": 0.33, - "learning_rate": 5.370987835799493e-05, - "loss": 0.4563, + "learning_rate": 5.3709806959165874e-05, + "loss": 0.3321, "step": 555500 }, { "epoch": 0.33, - "learning_rate": 5.370777839243436e-05, - "loss": 0.4649, + "learning_rate": 5.37077069936053e-05, + "loss": 0.338, "step": 556000 }, { "epoch": 0.33, - "learning_rate": 5.3705678426873804e-05, - "loss": 0.4569, + "learning_rate": 5.3705607028044734e-05, + "loss": 0.3405, "step": 556500 }, { "epoch": 0.33, - "learning_rate": 5.370357846131324e-05, - "loss": 0.4584, + "learning_rate": 5.3703507062484175e-05, + "loss": 0.3359, "step": 557000 }, { "epoch": 0.33, - "learning_rate": 5.370147849575267e-05, - "loss": 0.463, + "learning_rate": 5.370140709692361e-05, + "loss": 0.3328, "step": 557500 }, { "epoch": 0.33, - "learning_rate": 5.3699378530192104e-05, - "loss": 0.4636, + "learning_rate": 5.369930713136304e-05, + "loss": 0.3401, "step": 558000 }, { "epoch": 0.33, - "learning_rate": 5.3697282764562664e-05, - "loss": 0.4629, + "learning_rate": 5.369720716580248e-05, + "loss": 0.3353, "step": 558500 }, { "epoch": 0.34, - "learning_rate": 5.36951827990021e-05, - "loss": 0.4681, + "learning_rate": 5.3695107200241915e-05, + "loss": 0.3417, "step": 559000 }, { "epoch": 0.34, - "learning_rate": 5.369308283344153e-05, - "loss": 0.4707, + "learning_rate": 5.369300723468135e-05, + "loss": 0.342, "step": 559500 }, { "epoch": 0.34, - "learning_rate": 5.3690987067812085e-05, - "loss": 0.4832, + "learning_rate": 5.369090726912079e-05, + "loss": 0.3388, "step": 560000 }, { "epoch": 0.34, - "learning_rate": 5.3688887102251525e-05, - "loss": 0.46, + "learning_rate": 5.368881150349134e-05, + "loss": 0.3374, "step": 560500 }, { "epoch": 0.34, - "learning_rate": 5.368678713669096e-05, - "loss": 0.4679, + "learning_rate": 5.3686711537930776e-05, + "loss": 0.3384, "step": 561000 }, { "epoch": 0.34, - "learning_rate": 5.368468717113039e-05, - "loss": 0.4642, + "learning_rate": 5.3684611572370216e-05, + "loss": 0.3416, "step": 561500 }, { "epoch": 0.34, - "learning_rate": 5.368258720556983e-05, - "loss": 0.4681, + "learning_rate": 5.368251160680965e-05, + "loss": 0.3421, "step": 562000 }, { "epoch": 0.34, - "learning_rate": 5.368048724000926e-05, - "loss": 0.4787, + "learning_rate": 5.368041164124908e-05, + "loss": 0.3452, "step": 562500 }, { "epoch": 0.34, - "learning_rate": 5.36783872744487e-05, - "loss": 0.4644, + "learning_rate": 5.3678311675688523e-05, + "loss": 0.3362, "step": 563000 }, { "epoch": 0.34, - "learning_rate": 5.367628730888813e-05, - "loss": 0.4619, + "learning_rate": 5.367621171012796e-05, + "loss": 0.3416, "step": 563500 }, { "epoch": 0.34, - "learning_rate": 5.367419154325869e-05, - "loss": 0.4637, + "learning_rate": 5.3674111744567384e-05, + "loss": 0.3382, "step": 564000 }, { "epoch": 0.34, - "learning_rate": 5.3672091577698126e-05, - "loss": 0.4603, + "learning_rate": 5.3672011779006824e-05, + "loss": 0.3376, "step": 564500 }, { "epoch": 0.34, - "learning_rate": 5.366999161213756e-05, - "loss": 0.4748, + "learning_rate": 5.366991181344626e-05, + "loss": 0.3418, "step": 565000 }, { "epoch": 0.34, - "learning_rate": 5.366789584650812e-05, - "loss": 0.4696, + "learning_rate": 5.366781604781682e-05, + "loss": 0.3372, "step": 565500 }, { "epoch": 0.34, - "learning_rate": 5.3665795880947554e-05, - "loss": 0.4617, + "learning_rate": 5.366571608225625e-05, + "loss": 0.3395, "step": 566000 }, { "epoch": 0.34, - "learning_rate": 5.366369591538699e-05, - "loss": 0.4513, + "learning_rate": 5.3663616116695685e-05, + "loss": 0.3324, "step": 566500 }, { "epoch": 0.34, - "learning_rate": 5.366159594982643e-05, - "loss": 0.4597, + "learning_rate": 5.3661520351066245e-05, + "loss": 0.3376, "step": 567000 }, { "epoch": 0.34, - "learning_rate": 5.3659495984265854e-05, - "loss": 0.4643, + "learning_rate": 5.365942038550568e-05, + "loss": 0.3368, "step": 567500 }, { "epoch": 0.34, - "learning_rate": 5.365739601870529e-05, - "loss": 0.4605, + "learning_rate": 5.365732041994511e-05, + "loss": 0.3353, "step": 568000 }, { "epoch": 0.34, - "learning_rate": 5.365529605314473e-05, - "loss": 0.4665, + "learning_rate": 5.3655220454384545e-05, + "loss": 0.3351, "step": 568500 }, { "epoch": 0.34, - "learning_rate": 5.365319608758416e-05, - "loss": 0.4612, + "learning_rate": 5.3653124688755106e-05, + "loss": 0.3382, "step": 569000 }, { "epoch": 0.34, - "learning_rate": 5.365110032195472e-05, - "loss": 0.4624, + "learning_rate": 5.365102472319454e-05, + "loss": 0.3459, "step": 569500 }, { "epoch": 0.34, - "learning_rate": 5.3649000356394155e-05, - "loss": 0.4799, + "learning_rate": 5.364892475763398e-05, + "loss": 0.3476, "step": 570000 }, { "epoch": 0.34, - "learning_rate": 5.364690039083359e-05, - "loss": 0.469, + "learning_rate": 5.364682479207341e-05, + "loss": 0.3425, "step": 570500 }, { "epoch": 0.34, - "learning_rate": 5.364480042527302e-05, - "loss": 0.4845, + "learning_rate": 5.364472482651284e-05, + "loss": 0.3465, "step": 571000 }, { "epoch": 0.34, - "learning_rate": 5.364270465964358e-05, - "loss": 0.4607, + "learning_rate": 5.364262486095228e-05, + "loss": 0.3382, "step": 571500 }, { "epoch": 0.34, - "learning_rate": 5.364060469408302e-05, - "loss": 0.4676, + "learning_rate": 5.364052489539171e-05, + "loss": 0.3376, "step": 572000 }, { "epoch": 0.34, - "learning_rate": 5.3638508928453576e-05, - "loss": 0.4709, + "learning_rate": 5.3638429129762273e-05, + "loss": 0.3391, "step": 572500 }, { "epoch": 0.34, - "learning_rate": 5.363640896289301e-05, - "loss": 0.4588, + "learning_rate": 5.363632916420171e-05, + "loss": 0.3417, "step": 573000 }, { "epoch": 0.34, - "learning_rate": 5.363430899733244e-05, - "loss": 0.4667, + "learning_rate": 5.363422919864114e-05, + "loss": 0.3398, "step": 573500 }, { "epoch": 0.34, - "learning_rate": 5.363220903177188e-05, - "loss": 0.4727, + "learning_rate": 5.3632129233080574e-05, + "loss": 0.3443, "step": 574000 }, { "epoch": 0.34, - "learning_rate": 5.363011326614244e-05, - "loss": 0.4633, + "learning_rate": 5.363002926752001e-05, + "loss": 0.3443, "step": 574500 }, { "epoch": 0.34, - "learning_rate": 5.362801330058187e-05, - "loss": 0.4648, + "learning_rate": 5.362792930195945e-05, + "loss": 0.3383, "step": 575000 }, { "epoch": 0.35, - "learning_rate": 5.3625913335021304e-05, - "loss": 0.4565, + "learning_rate": 5.362583353633001e-05, + "loss": 0.3344, "step": 575500 }, { "epoch": 0.35, - "learning_rate": 5.3623813369460744e-05, - "loss": 0.4652, + "learning_rate": 5.3623733570769435e-05, + "loss": 0.3373, "step": 576000 }, { "epoch": 0.35, - "learning_rate": 5.362171340390018e-05, - "loss": 0.4649, + "learning_rate": 5.3621633605208875e-05, + "loss": 0.3413, "step": 576500 }, { "epoch": 0.35, - "learning_rate": 5.361961343833961e-05, - "loss": 0.4619, + "learning_rate": 5.361953363964831e-05, + "loss": 0.3378, "step": 577000 }, { "epoch": 0.35, - "learning_rate": 5.3617513472779044e-05, - "loss": 0.4586, + "learning_rate": 5.361743367408774e-05, + "loss": 0.3397, "step": 577500 }, { "epoch": 0.35, - "learning_rate": 5.361541350721848e-05, - "loss": 0.4515, + "learning_rate": 5.361533370852718e-05, + "loss": 0.3262, "step": 578000 }, { "epoch": 0.35, - "learning_rate": 5.361331354165792e-05, - "loss": 0.4511, + "learning_rate": 5.3613237942897736e-05, + "loss": 0.331, "step": 578500 }, { "epoch": 0.35, - "learning_rate": 5.361121777602848e-05, - "loss": 0.479, + "learning_rate": 5.361113797733717e-05, + "loss": 0.34, "step": 579000 }, { "epoch": 0.35, - "learning_rate": 5.3609117810467905e-05, - "loss": 0.4606, + "learning_rate": 5.36090380117766e-05, + "loss": 0.3359, "step": 579500 }, { "epoch": 0.35, - "learning_rate": 5.360701784490734e-05, - "loss": 0.4543, + "learning_rate": 5.360693804621604e-05, + "loss": 0.3371, "step": 580000 }, { "epoch": 0.35, - "learning_rate": 5.360491787934678e-05, - "loss": 0.4661, + "learning_rate": 5.3604838080655476e-05, + "loss": 0.3355, "step": 580500 }, { "epoch": 0.35, - "learning_rate": 5.360281791378621e-05, - "loss": 0.47, + "learning_rate": 5.360273811509491e-05, + "loss": 0.3384, "step": 581000 }, { "epoch": 0.35, - "learning_rate": 5.3600717948225646e-05, - "loss": 0.4654, + "learning_rate": 5.360064234946546e-05, + "loss": 0.3433, "step": 581500 }, { "epoch": 0.35, - "learning_rate": 5.3598617982665086e-05, - "loss": 0.4672, + "learning_rate": 5.3598542383904903e-05, + "loss": 0.3334, "step": 582000 }, { "epoch": 0.35, - "learning_rate": 5.359651801710452e-05, - "loss": 0.4666, + "learning_rate": 5.359644241834434e-05, + "loss": 0.3467, "step": 582500 }, { "epoch": 0.35, - "learning_rate": 5.359442225147507e-05, - "loss": 0.4505, + "learning_rate": 5.359434245278377e-05, + "loss": 0.3259, "step": 583000 }, { "epoch": 0.35, - "learning_rate": 5.359232648584563e-05, - "loss": 0.4659, + "learning_rate": 5.359224248722321e-05, + "loss": 0.341, "step": 583500 }, { "epoch": 0.35, - "learning_rate": 5.359022652028507e-05, - "loss": 0.4575, + "learning_rate": 5.3590142521662644e-05, + "loss": 0.3343, "step": 584000 }, { "epoch": 0.35, - "learning_rate": 5.35881265547245e-05, - "loss": 0.4572, + "learning_rate": 5.358804255610208e-05, + "loss": 0.3329, "step": 584500 }, { "epoch": 0.35, - "learning_rate": 5.3586026589163934e-05, - "loss": 0.46, + "learning_rate": 5.358594259054152e-05, + "loss": 0.3418, "step": 585000 }, { "epoch": 0.35, - "learning_rate": 5.3583926623603374e-05, - "loss": 0.463, + "learning_rate": 5.358384262498095e-05, + "loss": 0.3393, "step": 585500 }, { "epoch": 0.35, - "learning_rate": 5.358182665804281e-05, - "loss": 0.4617, + "learning_rate": 5.3581746859351505e-05, + "loss": 0.336, "step": 586000 }, { "epoch": 0.35, - "learning_rate": 5.357972669248224e-05, - "loss": 0.4695, + "learning_rate": 5.3579646893790945e-05, + "loss": 0.3365, "step": 586500 }, { "epoch": 0.35, - "learning_rate": 5.3577630926852794e-05, - "loss": 0.4552, + "learning_rate": 5.357754692823038e-05, + "loss": 0.3343, "step": 587000 }, { "epoch": 0.35, - "learning_rate": 5.3575530961292235e-05, - "loss": 0.471, + "learning_rate": 5.357544696266981e-05, + "loss": 0.344, "step": 587500 }, { "epoch": 0.35, - "learning_rate": 5.357343099573167e-05, - "loss": 0.4603, + "learning_rate": 5.3573351197040365e-05, + "loss": 0.3414, "step": 588000 }, { "epoch": 0.35, - "learning_rate": 5.35713310301711e-05, - "loss": 0.4541, + "learning_rate": 5.3571251231479806e-05, + "loss": 0.3369, "step": 588500 }, { "epoch": 0.35, - "learning_rate": 5.356923106461054e-05, - "loss": 0.4583, + "learning_rate": 5.356915126591924e-05, + "loss": 0.3313, "step": 589000 }, { "epoch": 0.35, - "learning_rate": 5.3567131099049975e-05, - "loss": 0.4849, + "learning_rate": 5.356705130035867e-05, + "loss": 0.3492, "step": 589500 }, { "epoch": 0.35, - "learning_rate": 5.356503113348941e-05, - "loss": 0.4642, + "learning_rate": 5.356495133479811e-05, + "loss": 0.3327, "step": 590000 }, { "epoch": 0.35, - "learning_rate": 5.356293536785996e-05, - "loss": 0.4594, + "learning_rate": 5.3562851369237546e-05, + "loss": 0.3395, "step": 590500 }, { "epoch": 0.35, - "learning_rate": 5.35608354022994e-05, - "loss": 0.4704, + "learning_rate": 5.356075140367697e-05, + "loss": 0.3431, "step": 591000 }, { "epoch": 0.35, - "learning_rate": 5.3558735436738836e-05, - "loss": 0.4581, + "learning_rate": 5.355865563804753e-05, + "loss": 0.336, "step": 591500 }, { "epoch": 0.35, - "learning_rate": 5.355663547117827e-05, - "loss": 0.4688, + "learning_rate": 5.3556555672486974e-05, + "loss": 0.3447, "step": 592000 }, { "epoch": 0.36, - "learning_rate": 5.355453970554883e-05, - "loss": 0.4616, + "learning_rate": 5.355445570692641e-05, + "loss": 0.336, "step": 592500 }, { "epoch": 0.36, - "learning_rate": 5.355243973998826e-05, - "loss": 0.4545, + "learning_rate": 5.355235574136584e-05, + "loss": 0.3355, "step": 593000 }, { "epoch": 0.36, - "learning_rate": 5.35503397744277e-05, - "loss": 0.4565, + "learning_rate": 5.3550255775805274e-05, + "loss": 0.3286, "step": 593500 }, { "epoch": 0.36, - "learning_rate": 5.354823980886714e-05, - "loss": 0.4676, + "learning_rate": 5.354815581024471e-05, + "loss": 0.3431, "step": 594000 }, { "epoch": 0.36, - "learning_rate": 5.354613984330657e-05, - "loss": 0.46, + "learning_rate": 5.354605584468415e-05, + "loss": 0.3408, "step": 594500 }, { "epoch": 0.36, - "learning_rate": 5.3544039877746004e-05, - "loss": 0.4544, + "learning_rate": 5.354396007905471e-05, + "loss": 0.333, "step": 595000 }, { "epoch": 0.36, - "learning_rate": 5.3541939912185444e-05, - "loss": 0.4461, + "learning_rate": 5.3541860113494135e-05, + "loss": 0.3283, "step": 595500 }, { "epoch": 0.36, - "learning_rate": 5.3539844146556e-05, - "loss": 0.4712, + "learning_rate": 5.353976014793357e-05, + "loss": 0.3428, "step": 596000 }, { "epoch": 0.36, - "learning_rate": 5.353774418099543e-05, - "loss": 0.467, + "learning_rate": 5.353766018237301e-05, + "loss": 0.3404, "step": 596500 }, { "epoch": 0.36, - "learning_rate": 5.3535644215434865e-05, - "loss": 0.4713, + "learning_rate": 5.353556021681244e-05, + "loss": 0.3362, "step": 597000 }, { "epoch": 0.36, - "learning_rate": 5.3533544249874305e-05, - "loss": 0.4754, + "learning_rate": 5.3533464451183e-05, + "loss": 0.3443, "step": 597500 }, { "epoch": 0.36, - "learning_rate": 5.353144428431374e-05, - "loss": 0.4569, + "learning_rate": 5.353136448562243e-05, + "loss": 0.3321, "step": 598000 }, { "epoch": 0.36, - "learning_rate": 5.352934431875317e-05, - "loss": 0.4629, + "learning_rate": 5.352926452006187e-05, + "loss": 0.3367, "step": 598500 }, { "epoch": 0.36, - "learning_rate": 5.3527244353192605e-05, - "loss": 0.463, + "learning_rate": 5.35271645545013e-05, + "loss": 0.3361, "step": 599000 }, { "epoch": 0.36, - "learning_rate": 5.352514438763204e-05, - "loss": 0.4675, + "learning_rate": 5.3525064588940736e-05, + "loss": 0.3367, "step": 599500 }, { "epoch": 0.36, - "learning_rate": 5.35230486220026e-05, - "loss": 0.4593, + "learning_rate": 5.3522968823311296e-05, + "loss": 0.3344, "step": 600000 }, { "epoch": 0.36, - "eval_loss": 0.43542608618736267, - "eval_runtime": 1110.5829, - "eval_samples_per_second": 474.273, - "eval_steps_per_second": 79.046, + "eval_loss": 0.3018554449081421, + "eval_runtime": 1471.2095, + "eval_samples_per_second": 358.018, + "eval_steps_per_second": 59.67, "step": 600000 }, { "epoch": 0.36, - "learning_rate": 5.352094865644203e-05, - "loss": 0.4673, + "learning_rate": 5.352086885775073e-05, + "loss": 0.3345, "step": 600500 }, { "epoch": 0.36, - "learning_rate": 5.351884869088147e-05, - "loss": 0.468, + "learning_rate": 5.351876889219016e-05, + "loss": 0.3411, "step": 601000 }, { "epoch": 0.36, - "learning_rate": 5.35167487253209e-05, - "loss": 0.4468, + "learning_rate": 5.3516673126560724e-05, + "loss": 0.33, "step": 601500 }, { "epoch": 0.36, - "learning_rate": 5.351465295969146e-05, - "loss": 0.4703, + "learning_rate": 5.3514573161000164e-05, + "loss": 0.3389, "step": 602000 }, { "epoch": 0.36, - "learning_rate": 5.35125529941309e-05, - "loss": 0.4562, + "learning_rate": 5.35124731954396e-05, + "loss": 0.3292, "step": 602500 }, { "epoch": 0.36, - "learning_rate": 5.351045302857033e-05, - "loss": 0.4582, + "learning_rate": 5.3510373229879024e-05, + "loss": 0.3317, "step": 603000 }, { "epoch": 0.36, - "learning_rate": 5.350835306300977e-05, - "loss": 0.4558, + "learning_rate": 5.3508277464249584e-05, + "loss": 0.3332, "step": 603500 }, { "epoch": 0.36, - "learning_rate": 5.350625729738032e-05, - "loss": 0.4562, + "learning_rate": 5.3506177498689025e-05, + "loss": 0.3353, "step": 604000 }, { "epoch": 0.36, - "learning_rate": 5.350415733181976e-05, - "loss": 0.4624, + "learning_rate": 5.350407753312846e-05, + "loss": 0.3349, "step": 604500 }, { "epoch": 0.36, - "learning_rate": 5.3502057366259194e-05, - "loss": 0.4595, + "learning_rate": 5.350198176749901e-05, + "loss": 0.3346, "step": 605000 }, { "epoch": 0.36, - "learning_rate": 5.349996160062975e-05, - "loss": 0.4726, + "learning_rate": 5.3499881801938445e-05, + "loss": 0.3395, "step": 605500 }, { "epoch": 0.36, - "learning_rate": 5.349786163506918e-05, - "loss": 0.4626, + "learning_rate": 5.3497781836377885e-05, + "loss": 0.3384, "step": 606000 }, { "epoch": 0.36, - "learning_rate": 5.349576166950862e-05, - "loss": 0.464, + "learning_rate": 5.349568187081732e-05, + "loss": 0.3355, "step": 606500 }, { "epoch": 0.36, - "learning_rate": 5.3493661703948055e-05, - "loss": 0.4572, + "learning_rate": 5.349358190525676e-05, + "loss": 0.3329, "step": 607000 }, { "epoch": 0.36, - "learning_rate": 5.349156173838749e-05, - "loss": 0.46, + "learning_rate": 5.3491481939696186e-05, + "loss": 0.3383, "step": 607500 }, { "epoch": 0.36, - "learning_rate": 5.348946177282693e-05, - "loss": 0.4675, + "learning_rate": 5.348938197413562e-05, + "loss": 0.334, "step": 608000 }, { "epoch": 0.36, - "learning_rate": 5.348736180726636e-05, - "loss": 0.4633, + "learning_rate": 5.348728200857506e-05, + "loss": 0.3349, "step": 608500 }, { "epoch": 0.37, - "learning_rate": 5.3485261841705795e-05, - "loss": 0.4541, + "learning_rate": 5.348518204301449e-05, + "loss": 0.3348, "step": 609000 }, { "epoch": 0.37, - "learning_rate": 5.3483166076076356e-05, - "loss": 0.4592, + "learning_rate": 5.3483082077453926e-05, + "loss": 0.3373, "step": 609500 }, { "epoch": 0.37, - "learning_rate": 5.348107031044691e-05, - "loss": 0.4706, + "learning_rate": 5.3480982111893367e-05, + "loss": 0.3386, "step": 610000 }, { "epoch": 0.37, - "learning_rate": 5.347897034488634e-05, - "loss": 0.4518, + "learning_rate": 5.34788821463328e-05, + "loss": 0.3317, "step": 610500 }, { "epoch": 0.37, - "learning_rate": 5.3476870379325776e-05, - "loss": 0.4584, + "learning_rate": 5.3476782180772233e-05, + "loss": 0.338, "step": 611000 }, { "epoch": 0.37, - "learning_rate": 5.3474770413765217e-05, - "loss": 0.466, + "learning_rate": 5.3474682215211674e-05, + "loss": 0.3368, "step": 611500 }, { "epoch": 0.37, - "learning_rate": 5.347267044820465e-05, - "loss": 0.4534, + "learning_rate": 5.347258644958223e-05, + "loss": 0.3364, "step": 612000 }, { "epoch": 0.37, - "learning_rate": 5.3470570482644083e-05, - "loss": 0.4687, + "learning_rate": 5.347048648402166e-05, + "loss": 0.3381, "step": 612500 }, { "epoch": 0.37, - "learning_rate": 5.3468470517083524e-05, - "loss": 0.4647, + "learning_rate": 5.3468386518461094e-05, + "loss": 0.3405, "step": 613000 }, { "epoch": 0.37, - "learning_rate": 5.346637055152295e-05, - "loss": 0.4613, + "learning_rate": 5.3466290752831655e-05, + "loss": 0.3431, "step": 613500 }, { "epoch": 0.37, - "learning_rate": 5.346427478589351e-05, - "loss": 0.4587, + "learning_rate": 5.346419078727109e-05, + "loss": 0.3371, "step": 614000 }, { "epoch": 0.37, - "learning_rate": 5.3462174820332944e-05, - "loss": 0.4569, + "learning_rate": 5.346209082171052e-05, + "loss": 0.3281, "step": 614500 }, { "epoch": 0.37, - "learning_rate": 5.3460074854772384e-05, - "loss": 0.4707, + "learning_rate": 5.345999085614996e-05, + "loss": 0.3402, "step": 615000 }, { "epoch": 0.37, - "learning_rate": 5.345797488921182e-05, - "loss": 0.464, + "learning_rate": 5.3457890890589395e-05, + "loss": 0.3384, "step": 615500 }, { "epoch": 0.37, - "learning_rate": 5.345587492365125e-05, - "loss": 0.4617, + "learning_rate": 5.345579092502883e-05, + "loss": 0.3394, "step": 616000 }, { "epoch": 0.37, - "learning_rate": 5.345377915802181e-05, - "loss": 0.4549, + "learning_rate": 5.345369095946827e-05, + "loss": 0.3293, "step": 616500 }, { "epoch": 0.37, - "learning_rate": 5.3451679192461245e-05, - "loss": 0.4706, + "learning_rate": 5.34515909939077e-05, + "loss": 0.3409, "step": 617000 }, { "epoch": 0.37, - "learning_rate": 5.344957922690068e-05, - "loss": 0.4793, + "learning_rate": 5.3449491028347136e-05, + "loss": 0.3431, "step": 617500 }, { "epoch": 0.37, - "learning_rate": 5.344747926134012e-05, - "loss": 0.4565, + "learning_rate": 5.344739106278657e-05, + "loss": 0.3363, "step": 618000 }, { "epoch": 0.37, - "learning_rate": 5.3445379295779545e-05, - "loss": 0.4618, + "learning_rate": 5.3445291097226e-05, + "loss": 0.3309, "step": 618500 }, { "epoch": 0.37, - "learning_rate": 5.344327933021898e-05, - "loss": 0.4499, + "learning_rate": 5.3443191131665436e-05, + "loss": 0.3252, "step": 619000 }, { "epoch": 0.37, - "learning_rate": 5.344117936465842e-05, - "loss": 0.4606, + "learning_rate": 5.3441095366035997e-05, + "loss": 0.3312, "step": 619500 }, { "epoch": 0.37, - "learning_rate": 5.343908359902898e-05, - "loss": 0.4765, + "learning_rate": 5.343899540047544e-05, + "loss": 0.3406, "step": 620000 }, { "epoch": 0.37, - "learning_rate": 5.3436983633468406e-05, - "loss": 0.4644, + "learning_rate": 5.3436895434914863e-05, + "loss": 0.3358, "step": 620500 }, { "epoch": 0.37, - "learning_rate": 5.343488366790784e-05, - "loss": 0.4584, + "learning_rate": 5.34347954693543e-05, + "loss": 0.3334, "step": 621000 }, { "epoch": 0.37, - "learning_rate": 5.343278370234728e-05, - "loss": 0.4528, + "learning_rate": 5.343269970372486e-05, + "loss": 0.3307, "step": 621500 }, { "epoch": 0.37, - "learning_rate": 5.343068373678671e-05, - "loss": 0.4561, + "learning_rate": 5.34305997381643e-05, + "loss": 0.3293, "step": 622000 }, { "epoch": 0.37, - "learning_rate": 5.3428587971157274e-05, - "loss": 0.459, + "learning_rate": 5.3428499772603724e-05, + "loss": 0.3379, "step": 622500 }, { "epoch": 0.37, - "learning_rate": 5.342648800559671e-05, - "loss": 0.4615, + "learning_rate": 5.3426404006974285e-05, + "loss": 0.3325, "step": 623000 }, { "epoch": 0.37, - "learning_rate": 5.342438804003614e-05, - "loss": 0.4616, + "learning_rate": 5.342430824134484e-05, + "loss": 0.337, "step": 623500 }, { "epoch": 0.37, - "learning_rate": 5.3422288074475574e-05, - "loss": 0.4645, + "learning_rate": 5.342220827578428e-05, + "loss": 0.3376, "step": 624000 }, { "epoch": 0.37, - "learning_rate": 5.3420188108915014e-05, - "loss": 0.4638, + "learning_rate": 5.342010831022371e-05, + "loss": 0.3374, "step": 624500 }, { "epoch": 0.37, - "learning_rate": 5.341808814335445e-05, - "loss": 0.4588, + "learning_rate": 5.3418008344663145e-05, + "loss": 0.3396, "step": 625000 }, { "epoch": 0.38, - "learning_rate": 5.341598817779388e-05, - "loss": 0.4619, + "learning_rate": 5.3415908379102585e-05, + "loss": 0.3309, "step": 625500 }, { "epoch": 0.38, - "learning_rate": 5.341388821223332e-05, - "loss": 0.4573, + "learning_rate": 5.341381261347314e-05, + "loss": 0.3394, "step": 626000 }, { "epoch": 0.38, - "learning_rate": 5.3411792446603875e-05, - "loss": 0.4668, + "learning_rate": 5.341171264791257e-05, + "loss": 0.3352, "step": 626500 }, { "epoch": 0.38, - "learning_rate": 5.340969248104331e-05, - "loss": 0.4602, + "learning_rate": 5.3409612682352006e-05, + "loss": 0.3335, "step": 627000 }, { "epoch": 0.38, - "learning_rate": 5.340759251548274e-05, - "loss": 0.4508, + "learning_rate": 5.3407512716791446e-05, + "loss": 0.3283, "step": 627500 }, { "epoch": 0.38, - "learning_rate": 5.340549254992218e-05, - "loss": 0.4435, + "learning_rate": 5.340541275123088e-05, + "loss": 0.3292, "step": 628000 }, { "epoch": 0.38, - "learning_rate": 5.3403392584361616e-05, - "loss": 0.4509, + "learning_rate": 5.340331278567031e-05, + "loss": 0.3258, "step": 628500 }, { "epoch": 0.38, - "learning_rate": 5.3401301018663296e-05, - "loss": 0.4595, + "learning_rate": 5.340121282010975e-05, + "loss": 0.3297, "step": 629000 }, { "epoch": 0.38, - "learning_rate": 5.339920105310273e-05, - "loss": 0.4596, + "learning_rate": 5.339911285454919e-05, + "loss": 0.3314, "step": 629500 }, { "epoch": 0.38, - "learning_rate": 5.339710108754216e-05, - "loss": 0.4466, + "learning_rate": 5.339701288898862e-05, + "loss": 0.3289, "step": 630000 }, { "epoch": 0.38, - "learning_rate": 5.3395001121981597e-05, - "loss": 0.4616, + "learning_rate": 5.339491712335918e-05, + "loss": 0.337, "step": 630500 }, { "epoch": 0.38, - "learning_rate": 5.339290115642103e-05, - "loss": 0.4483, + "learning_rate": 5.3392817157798614e-05, + "loss": 0.3302, "step": 631000 }, { "epoch": 0.38, - "learning_rate": 5.339080119086047e-05, - "loss": 0.4559, + "learning_rate": 5.339071719223805e-05, + "loss": 0.336, "step": 631500 }, { "epoch": 0.38, - "learning_rate": 5.338870542523103e-05, - "loss": 0.4635, + "learning_rate": 5.338861722667748e-05, + "loss": 0.3332, "step": 632000 }, { "epoch": 0.38, - "learning_rate": 5.338660545967046e-05, - "loss": 0.444, + "learning_rate": 5.3386517261116914e-05, + "loss": 0.3272, "step": 632500 }, { "epoch": 0.38, - "learning_rate": 5.338450549410989e-05, - "loss": 0.4643, + "learning_rate": 5.338441729555635e-05, + "loss": 0.3353, "step": 633000 }, { "epoch": 0.38, - "learning_rate": 5.338240552854933e-05, - "loss": 0.4559, + "learning_rate": 5.338231732999579e-05, + "loss": 0.3297, "step": 633500 }, { "epoch": 0.38, - "learning_rate": 5.3380305562988764e-05, - "loss": 0.4588, + "learning_rate": 5.338021736443522e-05, + "loss": 0.335, "step": 634000 }, { "epoch": 0.38, - "learning_rate": 5.33782055974282e-05, - "loss": 0.4573, + "learning_rate": 5.3378117398874655e-05, + "loss": 0.3331, "step": 634500 }, { "epoch": 0.38, - "learning_rate": 5.337610563186764e-05, - "loss": 0.4722, + "learning_rate": 5.3376017433314095e-05, + "loss": 0.3415, "step": 635000 }, { "epoch": 0.38, - "learning_rate": 5.337400566630707e-05, - "loss": 0.4547, + "learning_rate": 5.337392166768465e-05, + "loss": 0.3307, "step": 635500 }, { "epoch": 0.38, - "learning_rate": 5.3371909900677625e-05, - "loss": 0.4554, + "learning_rate": 5.337182170212408e-05, + "loss": 0.3378, "step": 636000 }, { "epoch": 0.38, - "learning_rate": 5.3369809935117065e-05, - "loss": 0.4545, + "learning_rate": 5.3369721736563516e-05, + "loss": 0.3404, "step": 636500 }, { "epoch": 0.38, - "learning_rate": 5.33677099695565e-05, - "loss": 0.4566, + "learning_rate": 5.3367621771002956e-05, + "loss": 0.3393, "step": 637000 }, { "epoch": 0.38, - "learning_rate": 5.336561000399593e-05, - "loss": 0.4576, + "learning_rate": 5.336552180544239e-05, + "loss": 0.3296, "step": 637500 }, { "epoch": 0.38, - "learning_rate": 5.336351003843537e-05, - "loss": 0.4555, + "learning_rate": 5.336342603981294e-05, + "loss": 0.3383, "step": 638000 }, { "epoch": 0.38, - "learning_rate": 5.3361414272805926e-05, - "loss": 0.4577, + "learning_rate": 5.3361330274183503e-05, + "loss": 0.3328, "step": 638500 }, { "epoch": 0.38, - "learning_rate": 5.335931430724536e-05, - "loss": 0.4507, + "learning_rate": 5.3359230308622944e-05, + "loss": 0.3279, "step": 639000 }, { "epoch": 0.38, - "learning_rate": 5.335721434168479e-05, - "loss": 0.4561, + "learning_rate": 5.335713034306237e-05, + "loss": 0.3275, "step": 639500 }, { "epoch": 0.38, - "learning_rate": 5.335511437612423e-05, - "loss": 0.4609, + "learning_rate": 5.3355030377501804e-05, + "loss": 0.3291, "step": 640000 }, { "epoch": 0.38, - "learning_rate": 5.335301861049479e-05, - "loss": 0.461, + "learning_rate": 5.3352930411941244e-05, + "loss": 0.333, "step": 640500 }, { "epoch": 0.38, - "learning_rate": 5.335091864493422e-05, - "loss": 0.4559, + "learning_rate": 5.3350834646311804e-05, + "loss": 0.33, "step": 641000 }, { "epoch": 0.38, - "learning_rate": 5.3348818679373654e-05, - "loss": 0.463, + "learning_rate": 5.334873468075123e-05, + "loss": 0.3325, "step": 641500 }, { "epoch": 0.38, - "learning_rate": 5.3346718713813094e-05, - "loss": 0.4551, + "learning_rate": 5.3346634715190665e-05, + "loss": 0.3306, "step": 642000 }, { "epoch": 0.39, - "learning_rate": 5.334462294818365e-05, - "loss": 0.4758, + "learning_rate": 5.3344534749630105e-05, + "loss": 0.3379, "step": 642500 }, { "epoch": 0.39, - "learning_rate": 5.334252298262308e-05, - "loss": 0.4667, + "learning_rate": 5.334243478406954e-05, + "loss": 0.3378, "step": 643000 }, { "epoch": 0.39, - "learning_rate": 5.334042301706252e-05, - "loss": 0.4464, + "learning_rate": 5.33403390184401e-05, + "loss": 0.3299, "step": 643500 }, { "epoch": 0.39, - "learning_rate": 5.3338323051501955e-05, - "loss": 0.4622, + "learning_rate": 5.333824325281065e-05, + "loss": 0.3298, "step": 644000 }, { "epoch": 0.39, - "learning_rate": 5.333622308594139e-05, - "loss": 0.4475, + "learning_rate": 5.333614328725009e-05, + "loss": 0.3349, "step": 644500 }, { "epoch": 0.39, - "learning_rate": 5.333412732031194e-05, - "loss": 0.4538, + "learning_rate": 5.3334043321689526e-05, + "loss": 0.3281, "step": 645000 }, { "epoch": 0.39, - "learning_rate": 5.333202735475138e-05, - "loss": 0.454, + "learning_rate": 5.333194335612896e-05, + "loss": 0.3355, "step": 645500 }, { "epoch": 0.39, - "learning_rate": 5.3329927389190815e-05, - "loss": 0.4572, + "learning_rate": 5.33298433905684e-05, + "loss": 0.3338, "step": 646000 }, { "epoch": 0.39, - "learning_rate": 5.332782742363025e-05, - "loss": 0.4578, + "learning_rate": 5.3327743425007826e-05, + "loss": 0.3355, "step": 646500 }, { "epoch": 0.39, - "learning_rate": 5.332572745806969e-05, - "loss": 0.4602, + "learning_rate": 5.332564345944726e-05, + "loss": 0.3365, "step": 647000 }, { "epoch": 0.39, - "learning_rate": 5.332362749250912e-05, - "loss": 0.4535, + "learning_rate": 5.33235434938867e-05, + "loss": 0.3314, "step": 647500 }, { "epoch": 0.39, - "learning_rate": 5.3321527526948556e-05, - "loss": 0.4637, + "learning_rate": 5.332144352832613e-05, + "loss": 0.3373, "step": 648000 }, { "epoch": 0.39, - "learning_rate": 5.3319427561387996e-05, - "loss": 0.4508, + "learning_rate": 5.331934356276557e-05, + "loss": 0.333, "step": 648500 }, { "epoch": 0.39, - "learning_rate": 5.331733179575855e-05, - "loss": 0.45, + "learning_rate": 5.331724359720501e-05, + "loss": 0.3287, "step": 649000 }, { "epoch": 0.39, - "learning_rate": 5.331523183019798e-05, - "loss": 0.4555, + "learning_rate": 5.331514363164444e-05, + "loss": 0.3282, "step": 649500 }, { "epoch": 0.39, - "learning_rate": 5.331313186463742e-05, - "loss": 0.4494, + "learning_rate": 5.3313047866014994e-05, + "loss": 0.3305, "step": 650000 }, { "epoch": 0.39, - "learning_rate": 5.331103189907686e-05, - "loss": 0.4599, + "learning_rate": 5.331094790045443e-05, + "loss": 0.3437, "step": 650500 }, { "epoch": 0.39, - "learning_rate": 5.330893613344741e-05, - "loss": 0.4592, + "learning_rate": 5.330884793489387e-05, + "loss": 0.3306, "step": 651000 }, { "epoch": 0.39, - "learning_rate": 5.3306836167886844e-05, - "loss": 0.4575, + "learning_rate": 5.33067479693333e-05, + "loss": 0.3242, "step": 651500 }, { "epoch": 0.39, - "learning_rate": 5.3304736202326284e-05, - "loss": 0.4576, + "learning_rate": 5.3304652203703855e-05, + "loss": 0.3332, "step": 652000 }, { "epoch": 0.39, - "learning_rate": 5.330263623676572e-05, - "loss": 0.4569, + "learning_rate": 5.3302552238143295e-05, + "loss": 0.3338, "step": 652500 }, { "epoch": 0.39, - "learning_rate": 5.330054047113627e-05, - "loss": 0.4508, + "learning_rate": 5.330045227258273e-05, + "loss": 0.3315, "step": 653000 }, { "epoch": 0.39, - "learning_rate": 5.3298440505575705e-05, - "loss": 0.4563, + "learning_rate": 5.329835230702216e-05, + "loss": 0.3349, "step": 653500 }, { "epoch": 0.39, - "learning_rate": 5.3296340540015145e-05, - "loss": 0.4494, + "learning_rate": 5.32962523414616e-05, + "loss": 0.3296, "step": 654000 }, { "epoch": 0.39, - "learning_rate": 5.329424057445458e-05, - "loss": 0.4573, + "learning_rate": 5.3294152375901036e-05, + "loss": 0.3336, "step": 654500 }, { "epoch": 0.39, - "learning_rate": 5.329214060889401e-05, - "loss": 0.4545, + "learning_rate": 5.329205241034047e-05, + "loss": 0.3356, "step": 655000 }, { "epoch": 0.39, - "learning_rate": 5.3290044843264565e-05, - "loss": 0.4551, + "learning_rate": 5.328995244477991e-05, + "loss": 0.3296, "step": 655500 }, { "epoch": 0.39, - "learning_rate": 5.3287944877704006e-05, - "loss": 0.4568, + "learning_rate": 5.328785247921934e-05, + "loss": 0.3317, "step": 656000 }, { "epoch": 0.39, - "learning_rate": 5.328584491214344e-05, - "loss": 0.4645, + "learning_rate": 5.3285752513658776e-05, + "loss": 0.3324, "step": 656500 }, { "epoch": 0.39, - "learning_rate": 5.328374914651399e-05, - "loss": 0.4496, + "learning_rate": 5.328365254809821e-05, + "loss": 0.3304, "step": 657000 }, { "epoch": 0.39, - "learning_rate": 5.328164918095343e-05, - "loss": 0.4406, + "learning_rate": 5.328155258253764e-05, + "loss": 0.3304, "step": 657500 }, { "epoch": 0.39, - "learning_rate": 5.3279549215392866e-05, - "loss": 0.4547, + "learning_rate": 5.3279461016839324e-05, + "loss": 0.3276, "step": 658000 }, { "epoch": 0.39, - "learning_rate": 5.32774492498323e-05, - "loss": 0.4476, + "learning_rate": 5.327736105127876e-05, + "loss": 0.3247, "step": 658500 }, { "epoch": 0.4, - "learning_rate": 5.327534928427174e-05, - "loss": 0.4633, + "learning_rate": 5.327526108571819e-05, + "loss": 0.3327, "step": 659000 }, { "epoch": 0.4, - "learning_rate": 5.3273249318711174e-05, - "loss": 0.4662, + "learning_rate": 5.327316112015763e-05, + "loss": 0.3378, "step": 659500 }, { "epoch": 0.4, - "learning_rate": 5.327114935315061e-05, - "loss": 0.4619, + "learning_rate": 5.3271061154597064e-05, + "loss": 0.3342, "step": 660000 }, { "epoch": 0.4, - "learning_rate": 5.326904938759005e-05, - "loss": 0.4683, + "learning_rate": 5.3268961189036505e-05, + "loss": 0.3361, "step": 660500 }, { "epoch": 0.4, - "learning_rate": 5.32669536219606e-05, - "loss": 0.4612, + "learning_rate": 5.326686542340706e-05, + "loss": 0.3297, "step": 661000 }, { "epoch": 0.4, - "learning_rate": 5.3264853656400034e-05, - "loss": 0.4559, + "learning_rate": 5.326476545784649e-05, + "loss": 0.3375, "step": 661500 }, { "epoch": 0.4, - "learning_rate": 5.326275369083947e-05, - "loss": 0.4529, + "learning_rate": 5.3262665492285925e-05, + "loss": 0.332, "step": 662000 }, { "epoch": 0.4, - "learning_rate": 5.326065372527891e-05, - "loss": 0.4654, + "learning_rate": 5.3260565526725365e-05, + "loss": 0.3308, "step": 662500 }, { "epoch": 0.4, - "learning_rate": 5.325855375971834e-05, - "loss": 0.4486, + "learning_rate": 5.32584655611648e-05, + "loss": 0.3258, "step": 663000 }, { "epoch": 0.4, - "learning_rate": 5.3256453794157775e-05, - "loss": 0.4569, + "learning_rate": 5.325636559560423e-05, + "loss": 0.3351, "step": 663500 }, { "epoch": 0.4, - "learning_rate": 5.3254353828597215e-05, - "loss": 0.4553, + "learning_rate": 5.3254265630043666e-05, + "loss": 0.326, "step": 664000 }, { "epoch": 0.4, - "learning_rate": 5.325225386303664e-05, - "loss": 0.4501, + "learning_rate": 5.32521656644831e-05, + "loss": 0.3252, "step": 664500 }, { "epoch": 0.4, - "learning_rate": 5.32501580974072e-05, - "loss": 0.4534, + "learning_rate": 5.325006989885366e-05, + "loss": 0.3341, "step": 665000 }, { "epoch": 0.4, - "learning_rate": 5.3248062331777756e-05, - "loss": 0.4603, + "learning_rate": 5.324796993329309e-05, + "loss": 0.3363, "step": 665500 }, { "epoch": 0.4, - "learning_rate": 5.3245962366217196e-05, - "loss": 0.4658, + "learning_rate": 5.324586996773253e-05, + "loss": 0.3406, "step": 666000 }, { "epoch": 0.4, - "learning_rate": 5.324386240065663e-05, - "loss": 0.4492, + "learning_rate": 5.324377000217196e-05, + "loss": 0.3266, "step": 666500 }, { "epoch": 0.4, - "learning_rate": 5.324176243509606e-05, - "loss": 0.4495, + "learning_rate": 5.324167423654252e-05, + "loss": 0.3271, "step": 667000 }, { "epoch": 0.4, - "learning_rate": 5.32396624695355e-05, - "loss": 0.4616, + "learning_rate": 5.3239578470913074e-05, + "loss": 0.3371, "step": 667500 }, { "epoch": 0.4, - "learning_rate": 5.323756250397494e-05, - "loss": 0.462, + "learning_rate": 5.3237478505352514e-05, + "loss": 0.3361, "step": 668000 }, { "epoch": 0.4, - "learning_rate": 5.323546673834549e-05, - "loss": 0.4633, + "learning_rate": 5.323537853979195e-05, + "loss": 0.3404, "step": 668500 }, { "epoch": 0.4, - "learning_rate": 5.3233366772784924e-05, - "loss": 0.4594, + "learning_rate": 5.323327857423138e-05, + "loss": 0.3238, "step": 669000 }, { "epoch": 0.4, - "learning_rate": 5.3231266807224364e-05, - "loss": 0.4669, + "learning_rate": 5.323117860867082e-05, + "loss": 0.3314, "step": 669500 }, { "epoch": 0.4, - "learning_rate": 5.32291668416638e-05, - "loss": 0.4514, + "learning_rate": 5.3229078643110255e-05, + "loss": 0.3328, "step": 670000 }, { "epoch": 0.4, - "learning_rate": 5.322706687610323e-05, - "loss": 0.4523, + "learning_rate": 5.322697867754969e-05, + "loss": 0.333, "step": 670500 }, { "epoch": 0.4, - "learning_rate": 5.322496691054267e-05, - "loss": 0.4481, + "learning_rate": 5.322487871198912e-05, + "loss": 0.335, "step": 671000 }, { "epoch": 0.4, - "learning_rate": 5.32228669449821e-05, - "loss": 0.4509, + "learning_rate": 5.3222778746428555e-05, + "loss": 0.3266, "step": 671500 }, { "epoch": 0.4, - "learning_rate": 5.322077117935266e-05, - "loss": 0.4422, + "learning_rate": 5.3220682980799115e-05, + "loss": 0.329, "step": 672000 }, { "epoch": 0.4, - "learning_rate": 5.321867121379209e-05, - "loss": 0.4521, + "learning_rate": 5.321858301523855e-05, + "loss": 0.3301, "step": 672500 }, { "epoch": 0.4, - "learning_rate": 5.321657124823153e-05, - "loss": 0.4576, + "learning_rate": 5.321648304967799e-05, + "loss": 0.3256, "step": 673000 }, { "epoch": 0.4, - "learning_rate": 5.3214471282670965e-05, - "loss": 0.4526, + "learning_rate": 5.3214383084117416e-05, + "loss": 0.3267, "step": 673500 }, { "epoch": 0.4, - "learning_rate": 5.321237551704152e-05, - "loss": 0.4625, + "learning_rate": 5.3212287318487976e-05, + "loss": 0.3315, "step": 674000 }, { "epoch": 0.4, - "learning_rate": 5.321027555148096e-05, - "loss": 0.4474, + "learning_rate": 5.3210187352927416e-05, + "loss": 0.3283, "step": 674500 }, { "epoch": 0.4, - "learning_rate": 5.320817558592039e-05, - "loss": 0.4539, + "learning_rate": 5.320809158729797e-05, + "loss": 0.3326, "step": 675000 }, { "epoch": 0.4, - "learning_rate": 5.3206075620359826e-05, - "loss": 0.4556, + "learning_rate": 5.32059916217374e-05, + "loss": 0.3291, "step": 675500 }, { "epoch": 0.41, - "learning_rate": 5.3203975654799266e-05, - "loss": 0.4568, + "learning_rate": 5.320389165617684e-05, + "loss": 0.3332, "step": 676000 }, { "epoch": 0.41, - "learning_rate": 5.320187568923869e-05, - "loss": 0.4549, + "learning_rate": 5.320179169061628e-05, + "loss": 0.3316, "step": 676500 }, { "epoch": 0.41, - "learning_rate": 5.3199775723678126e-05, - "loss": 0.4563, + "learning_rate": 5.319969172505571e-05, + "loss": 0.3366, "step": 677000 }, { "epoch": 0.41, - "learning_rate": 5.3197675758117567e-05, - "loss": 0.4653, + "learning_rate": 5.3197591759495144e-05, + "loss": 0.3398, "step": 677500 }, { "epoch": 0.41, - "learning_rate": 5.319558419241924e-05, - "loss": 0.4474, + "learning_rate": 5.319549179393458e-05, + "loss": 0.3257, "step": 678000 }, { "epoch": 0.41, - "learning_rate": 5.319348422685868e-05, - "loss": 0.4481, + "learning_rate": 5.319339182837401e-05, + "loss": 0.335, "step": 678500 }, { "epoch": 0.41, - "learning_rate": 5.3191384261298114e-05, - "loss": 0.4532, + "learning_rate": 5.3191291862813444e-05, + "loss": 0.3217, "step": 679000 }, { "epoch": 0.41, - "learning_rate": 5.318928429573755e-05, - "loss": 0.4522, + "learning_rate": 5.3189196097184005e-05, + "loss": 0.3294, "step": 679500 }, { "epoch": 0.41, - "learning_rate": 5.318718433017699e-05, - "loss": 0.4516, + "learning_rate": 5.3187096131623445e-05, + "loss": 0.3327, "step": 680000 }, { "epoch": 0.41, - "learning_rate": 5.318508856454754e-05, - "loss": 0.4476, + "learning_rate": 5.318499616606287e-05, + "loss": 0.3303, "step": 680500 }, { "epoch": 0.41, - "learning_rate": 5.3182988598986975e-05, - "loss": 0.4529, + "learning_rate": 5.318289620050231e-05, + "loss": 0.3216, "step": 681000 }, { "epoch": 0.41, - "learning_rate": 5.318089283335753e-05, - "loss": 0.4511, + "learning_rate": 5.3180796234941745e-05, + "loss": 0.326, "step": 681500 }, { "epoch": 0.41, - "learning_rate": 5.317879706772809e-05, - "loss": 0.4487, + "learning_rate": 5.317869626938118e-05, + "loss": 0.3292, "step": 682000 }, { "epoch": 0.41, - "learning_rate": 5.317669710216752e-05, - "loss": 0.4599, + "learning_rate": 5.317659630382062e-05, + "loss": 0.3343, "step": 682500 }, { "epoch": 0.41, - "learning_rate": 5.3174597136606956e-05, - "loss": 0.4508, + "learning_rate": 5.317450053819117e-05, + "loss": 0.3236, "step": 683000 }, { "epoch": 0.41, - "learning_rate": 5.317249717104639e-05, - "loss": 0.459, + "learning_rate": 5.3172400572630606e-05, + "loss": 0.3307, "step": 683500 }, { "epoch": 0.41, - "learning_rate": 5.317039720548583e-05, - "loss": 0.448, + "learning_rate": 5.317030060707004e-05, + "loss": 0.3311, "step": 684000 }, { "epoch": 0.41, - "learning_rate": 5.316829723992526e-05, - "loss": 0.4419, + "learning_rate": 5.316820064150948e-05, + "loss": 0.3242, "step": 684500 }, { "epoch": 0.41, - "learning_rate": 5.3166197274364696e-05, - "loss": 0.448, + "learning_rate": 5.316610487588004e-05, + "loss": 0.3297, "step": 685000 }, { "epoch": 0.41, - "learning_rate": 5.3164097308804136e-05, - "loss": 0.4495, + "learning_rate": 5.316400491031947e-05, + "loss": 0.3259, "step": 685500 }, { "epoch": 0.41, - "learning_rate": 5.316199734324357e-05, - "loss": 0.4488, + "learning_rate": 5.31619049447589e-05, + "loss": 0.3293, "step": 686000 }, { "epoch": 0.41, - "learning_rate": 5.3159897377683e-05, - "loss": 0.4545, + "learning_rate": 5.315980497919834e-05, + "loss": 0.3326, "step": 686500 }, { "epoch": 0.41, - "learning_rate": 5.3157797412122444e-05, - "loss": 0.4565, + "learning_rate": 5.3157705013637774e-05, + "loss": 0.3336, "step": 687000 }, { "epoch": 0.41, - "learning_rate": 5.315569744656188e-05, - "loss": 0.4453, + "learning_rate": 5.3155609248008334e-05, + "loss": 0.3361, "step": 687500 }, { "epoch": 0.41, - "learning_rate": 5.315359748100131e-05, - "loss": 0.4494, + "learning_rate": 5.315350928244777e-05, + "loss": 0.3315, "step": 688000 }, { "epoch": 0.41, - "learning_rate": 5.3151497515440744e-05, - "loss": 0.4491, + "learning_rate": 5.315141351681833e-05, + "loss": 0.3355, "step": 688500 }, { "epoch": 0.41, - "learning_rate": 5.3149401749811304e-05, - "loss": 0.4695, + "learning_rate": 5.314931355125776e-05, + "loss": 0.3379, "step": 689000 }, { "epoch": 0.41, - "learning_rate": 5.314730178425074e-05, - "loss": 0.4485, + "learning_rate": 5.3147213585697195e-05, + "loss": 0.3265, "step": 689500 }, { "epoch": 0.41, - "learning_rate": 5.314520181869018e-05, - "loss": 0.4554, + "learning_rate": 5.314511362013663e-05, + "loss": 0.3354, "step": 690000 }, { "epoch": 0.41, - "learning_rate": 5.3143101853129605e-05, - "loss": 0.4438, + "learning_rate": 5.314301365457606e-05, + "loss": 0.3246, "step": 690500 }, { "epoch": 0.41, - "learning_rate": 5.314100188756904e-05, - "loss": 0.4545, + "learning_rate": 5.3140913689015495e-05, + "loss": 0.3309, "step": 691000 }, { "epoch": 0.41, - "learning_rate": 5.313890192200848e-05, - "loss": 0.4571, + "learning_rate": 5.3138813723454936e-05, + "loss": 0.3353, "step": 691500 }, { "epoch": 0.41, - "learning_rate": 5.313680195644791e-05, - "loss": 0.458, + "learning_rate": 5.313671375789437e-05, + "loss": 0.3314, "step": 692000 }, { "epoch": 0.42, - "learning_rate": 5.3134701990887345e-05, - "loss": 0.454, + "learning_rate": 5.31346137923338e-05, + "loss": 0.3287, "step": 692500 }, { "epoch": 0.42, - "learning_rate": 5.31326062252579e-05, - "loss": 0.4603, + "learning_rate": 5.3132518026704356e-05, + "loss": 0.3408, "step": 693000 }, { "epoch": 0.42, - "learning_rate": 5.3130510459628466e-05, - "loss": 0.453, + "learning_rate": 5.3130418061143796e-05, + "loss": 0.3333, "step": 693500 }, { "epoch": 0.42, - "learning_rate": 5.31284104940679e-05, - "loss": 0.4468, + "learning_rate": 5.312831809558323e-05, + "loss": 0.326, "step": 694000 }, { "epoch": 0.42, - "learning_rate": 5.312631052850733e-05, - "loss": 0.4517, + "learning_rate": 5.312621813002266e-05, + "loss": 0.328, "step": 694500 }, { "epoch": 0.42, - "learning_rate": 5.312421056294677e-05, - "loss": 0.452, + "learning_rate": 5.3124118164462103e-05, + "loss": 0.3326, "step": 695000 }, { "epoch": 0.42, - "learning_rate": 5.312211479731733e-05, - "loss": 0.4458, + "learning_rate": 5.312201819890154e-05, + "loss": 0.325, "step": 695500 }, { "epoch": 0.42, - "learning_rate": 5.312001483175676e-05, - "loss": 0.4548, + "learning_rate": 5.311992243327209e-05, + "loss": 0.3272, "step": 696000 }, { "epoch": 0.42, - "learning_rate": 5.3117914866196194e-05, - "loss": 0.4561, + "learning_rate": 5.311782246771153e-05, + "loss": 0.3303, "step": 696500 }, { "epoch": 0.42, - "learning_rate": 5.3115814900635634e-05, - "loss": 0.4511, + "learning_rate": 5.3115722502150964e-05, + "loss": 0.3323, "step": 697000 }, { "epoch": 0.42, - "learning_rate": 5.311371493507506e-05, - "loss": 0.4478, + "learning_rate": 5.31136225365904e-05, + "loss": 0.3288, "step": 697500 }, { "epoch": 0.42, - "learning_rate": 5.3111614969514494e-05, - "loss": 0.4571, + "learning_rate": 5.311152257102984e-05, + "loss": 0.3301, "step": 698000 }, { "epoch": 0.42, - "learning_rate": 5.3109515003953934e-05, - "loss": 0.4482, + "learning_rate": 5.310942260546927e-05, + "loss": 0.3222, "step": 698500 }, { "epoch": 0.42, - "learning_rate": 5.310741503839337e-05, - "loss": 0.4553, + "learning_rate": 5.3107322639908705e-05, + "loss": 0.3273, "step": 699000 }, { "epoch": 0.42, - "learning_rate": 5.31053150728328e-05, - "loss": 0.4543, + "learning_rate": 5.310522687427926e-05, + "loss": 0.3328, "step": 699500 }, { "epoch": 0.42, - "learning_rate": 5.310321510727224e-05, - "loss": 0.4582, + "learning_rate": 5.310313110864981e-05, + "loss": 0.3345, "step": 700000 }, { "epoch": 0.42, - "eval_loss": 0.4269881248474121, - "eval_runtime": 1108.4352, - "eval_samples_per_second": 475.192, - "eval_steps_per_second": 79.199, + "eval_loss": 0.29597923159599304, + "eval_runtime": 1469.5457, + "eval_samples_per_second": 358.424, + "eval_steps_per_second": 59.738, "step": 700000 }, { "epoch": 0.42, - "learning_rate": 5.3101115141711675e-05, - "loss": 0.4571, + "learning_rate": 5.310103114308925e-05, + "loss": 0.3313, "step": 700500 }, { "epoch": 0.42, - "learning_rate": 5.309901937608223e-05, - "loss": 0.4621, + "learning_rate": 5.3098931177528686e-05, + "loss": 0.336, "step": 701000 }, { "epoch": 0.42, - "learning_rate": 5.309691941052167e-05, - "loss": 0.4606, + "learning_rate": 5.309683121196812e-05, + "loss": 0.3305, "step": 701500 }, { "epoch": 0.42, - "learning_rate": 5.30948194449611e-05, - "loss": 0.4538, + "learning_rate": 5.309473124640756e-05, + "loss": 0.3345, "step": 702000 }, { "epoch": 0.42, - "learning_rate": 5.3092719479400536e-05, - "loss": 0.4627, + "learning_rate": 5.309263548077811e-05, + "loss": 0.3288, "step": 702500 }, { "epoch": 0.42, - "learning_rate": 5.3090619513839976e-05, - "loss": 0.4532, + "learning_rate": 5.3090535515217546e-05, + "loss": 0.3291, "step": 703000 }, { "epoch": 0.42, - "learning_rate": 5.308851954827941e-05, - "loss": 0.4618, + "learning_rate": 5.3088435549656987e-05, + "loss": 0.3327, "step": 703500 }, { "epoch": 0.42, - "learning_rate": 5.308642378264996e-05, - "loss": 0.455, + "learning_rate": 5.308633558409642e-05, + "loss": 0.328, "step": 704000 }, { "epoch": 0.42, - "learning_rate": 5.3084323817089396e-05, - "loss": 0.4536, + "learning_rate": 5.3084235618535854e-05, + "loss": 0.3292, "step": 704500 }, { "epoch": 0.42, - "learning_rate": 5.3082223851528837e-05, - "loss": 0.4464, + "learning_rate": 5.3082135652975294e-05, + "loss": 0.3257, "step": 705000 }, { "epoch": 0.42, - "learning_rate": 5.308012388596827e-05, - "loss": 0.4518, + "learning_rate": 5.308003568741473e-05, + "loss": 0.335, "step": 705500 }, { "epoch": 0.42, - "learning_rate": 5.3078023920407703e-05, - "loss": 0.4437, + "learning_rate": 5.307793572185416e-05, + "loss": 0.3255, "step": 706000 }, { "epoch": 0.42, - "learning_rate": 5.3075923954847144e-05, - "loss": 0.4384, + "learning_rate": 5.30758357562936e-05, + "loss": 0.3195, "step": 706500 }, { "epoch": 0.42, - "learning_rate": 5.307382398928658e-05, - "loss": 0.4546, + "learning_rate": 5.3073739990664154e-05, + "loss": 0.3236, "step": 707000 }, { "epoch": 0.42, - "learning_rate": 5.307172402372601e-05, - "loss": 0.4364, + "learning_rate": 5.307164002510359e-05, + "loss": 0.3232, "step": 707500 }, { "epoch": 0.42, - "learning_rate": 5.3069628258096564e-05, - "loss": 0.4505, + "learning_rate": 5.306954005954302e-05, + "loss": 0.3292, "step": 708000 }, { "epoch": 0.42, - "learning_rate": 5.3067532492467125e-05, - "loss": 0.4478, + "learning_rate": 5.306744009398246e-05, + "loss": 0.3243, "step": 708500 }, { "epoch": 0.43, - "learning_rate": 5.306543252690656e-05, - "loss": 0.4562, + "learning_rate": 5.3065340128421895e-05, + "loss": 0.3323, "step": 709000 }, { "epoch": 0.43, - "learning_rate": 5.306333256134599e-05, - "loss": 0.4514, + "learning_rate": 5.306324436279245e-05, + "loss": 0.3312, "step": 709500 }, { "epoch": 0.43, - "learning_rate": 5.306123259578543e-05, - "loss": 0.454, + "learning_rate": 5.306114439723188e-05, + "loss": 0.3272, "step": 710000 }, { "epoch": 0.43, - "learning_rate": 5.3059132630224865e-05, - "loss": 0.4432, + "learning_rate": 5.305904443167132e-05, + "loss": 0.3238, "step": 710500 }, { "epoch": 0.43, - "learning_rate": 5.30570326646643e-05, - "loss": 0.4397, + "learning_rate": 5.3056944466110756e-05, + "loss": 0.3236, "step": 711000 }, { "epoch": 0.43, - "learning_rate": 5.305493269910374e-05, - "loss": 0.4571, + "learning_rate": 5.305484450055019e-05, + "loss": 0.3285, "step": 711500 }, { "epoch": 0.43, - "learning_rate": 5.305283273354317e-05, - "loss": 0.4442, + "learning_rate": 5.305274453498963e-05, + "loss": 0.3244, "step": 712000 }, { "epoch": 0.43, - "learning_rate": 5.3050736967913726e-05, - "loss": 0.445, + "learning_rate": 5.3050644569429056e-05, + "loss": 0.3238, "step": 712500 }, { "epoch": 0.43, - "learning_rate": 5.304864120228428e-05, - "loss": 0.4491, + "learning_rate": 5.3048544603868496e-05, + "loss": 0.3308, "step": 713000 }, { "epoch": 0.43, - "learning_rate": 5.304654123672371e-05, - "loss": 0.4447, + "learning_rate": 5.304644463830793e-05, + "loss": 0.3269, "step": 713500 }, { "epoch": 0.43, - "learning_rate": 5.304444127116315e-05, - "loss": 0.4656, + "learning_rate": 5.304434887267849e-05, + "loss": 0.3323, "step": 714000 }, { "epoch": 0.43, - "learning_rate": 5.3042341305602587e-05, - "loss": 0.4451, + "learning_rate": 5.304224890711792e-05, + "loss": 0.3229, "step": 714500 }, { "epoch": 0.43, - "learning_rate": 5.304024134004202e-05, - "loss": 0.444, + "learning_rate": 5.304015314148848e-05, + "loss": 0.3265, "step": 715000 }, { "epoch": 0.43, - "learning_rate": 5.303814137448146e-05, - "loss": 0.4479, + "learning_rate": 5.303805317592792e-05, + "loss": 0.325, "step": 715500 }, { "epoch": 0.43, - "learning_rate": 5.3036041408920894e-05, - "loss": 0.4543, + "learning_rate": 5.303595321036735e-05, + "loss": 0.3275, "step": 716000 }, { "epoch": 0.43, - "learning_rate": 5.303394564329145e-05, - "loss": 0.4549, + "learning_rate": 5.3033853244806784e-05, + "loss": 0.3336, "step": 716500 }, { "epoch": 0.43, - "learning_rate": 5.303184567773089e-05, - "loss": 0.4589, + "learning_rate": 5.303175327924622e-05, + "loss": 0.3361, "step": 717000 }, { "epoch": 0.43, - "learning_rate": 5.302974571217032e-05, - "loss": 0.4537, + "learning_rate": 5.302965751361678e-05, + "loss": 0.3331, "step": 717500 }, { "epoch": 0.43, - "learning_rate": 5.3027645746609754e-05, - "loss": 0.4489, + "learning_rate": 5.302755754805621e-05, + "loss": 0.3347, "step": 718000 }, { "epoch": 0.43, - "learning_rate": 5.3025545781049195e-05, - "loss": 0.4559, + "learning_rate": 5.3025461782426765e-05, + "loss": 0.3362, "step": 718500 }, { "epoch": 0.43, - "learning_rate": 5.302344581548863e-05, - "loss": 0.4496, + "learning_rate": 5.3023361816866206e-05, + "loss": 0.3261, "step": 719000 }, { "epoch": 0.43, - "learning_rate": 5.302134584992806e-05, - "loss": 0.4671, + "learning_rate": 5.302126185130564e-05, + "loss": 0.3369, "step": 719500 }, { "epoch": 0.43, - "learning_rate": 5.3019245884367495e-05, - "loss": 0.4498, + "learning_rate": 5.301916188574507e-05, + "loss": 0.33, "step": 720000 }, { "epoch": 0.43, - "learning_rate": 5.3017150118738055e-05, - "loss": 0.4406, + "learning_rate": 5.301706192018451e-05, + "loss": 0.3248, "step": 720500 }, { "epoch": 0.43, - "learning_rate": 5.301505015317749e-05, - "loss": 0.4509, + "learning_rate": 5.3014961954623946e-05, + "loss": 0.3338, "step": 721000 }, { "epoch": 0.43, - "learning_rate": 5.301295018761692e-05, - "loss": 0.4578, + "learning_rate": 5.301286198906338e-05, + "loss": 0.3355, "step": 721500 }, { "epoch": 0.43, - "learning_rate": 5.301085022205636e-05, - "loss": 0.4481, + "learning_rate": 5.301076202350281e-05, + "loss": 0.326, "step": 722000 }, { "epoch": 0.43, - "learning_rate": 5.3008754456426916e-05, - "loss": 0.4471, + "learning_rate": 5.3008662057942246e-05, + "loss": 0.3249, "step": 722500 }, { "epoch": 0.43, - "learning_rate": 5.300665449086635e-05, - "loss": 0.4489, + "learning_rate": 5.300656209238168e-05, + "loss": 0.3276, "step": 723000 }, { "epoch": 0.43, - "learning_rate": 5.300455452530578e-05, - "loss": 0.4407, + "learning_rate": 5.300446212682112e-05, + "loss": 0.3284, "step": 723500 }, { "epoch": 0.43, - "learning_rate": 5.300245455974522e-05, - "loss": 0.4565, + "learning_rate": 5.3002362161260554e-05, + "loss": 0.3316, "step": 724000 }, { "epoch": 0.43, - "learning_rate": 5.300035879411578e-05, - "loss": 0.4565, + "learning_rate": 5.3000270595562234e-05, + "loss": 0.3282, "step": 724500 }, { "epoch": 0.43, - "learning_rate": 5.299825882855521e-05, - "loss": 0.447, + "learning_rate": 5.299817063000167e-05, + "loss": 0.3303, "step": 725000 }, { "epoch": 0.43, - "learning_rate": 5.299615886299465e-05, - "loss": 0.4546, + "learning_rate": 5.299607066444111e-05, + "loss": 0.3337, "step": 725500 }, { "epoch": 0.44, - "learning_rate": 5.2994063097365204e-05, - "loss": 0.4453, + "learning_rate": 5.299397489881166e-05, + "loss": 0.3235, "step": 726000 }, { "epoch": 0.44, - "learning_rate": 5.299196313180464e-05, - "loss": 0.4492, + "learning_rate": 5.2991874933251095e-05, + "loss": 0.3282, "step": 726500 }, { "epoch": 0.44, - "learning_rate": 5.298986316624407e-05, - "loss": 0.4428, + "learning_rate": 5.298977496769053e-05, + "loss": 0.3264, "step": 727000 }, { "epoch": 0.44, - "learning_rate": 5.298776320068351e-05, - "loss": 0.4514, + "learning_rate": 5.298767500212997e-05, + "loss": 0.3348, "step": 727500 }, { "epoch": 0.44, - "learning_rate": 5.2985663235122945e-05, - "loss": 0.4514, + "learning_rate": 5.29855750365694e-05, + "loss": 0.3255, "step": 728000 }, { "epoch": 0.44, - "learning_rate": 5.298356326956238e-05, - "loss": 0.4422, + "learning_rate": 5.2983475071008835e-05, + "loss": 0.3303, "step": 728500 }, { "epoch": 0.44, - "learning_rate": 5.298146330400182e-05, - "loss": 0.441, + "learning_rate": 5.298137510544827e-05, + "loss": 0.3232, "step": 729000 }, { "epoch": 0.44, - "learning_rate": 5.2979363338441245e-05, - "loss": 0.4472, + "learning_rate": 5.29792751398877e-05, + "loss": 0.3225, "step": 729500 }, { "epoch": 0.44, - "learning_rate": 5.2977267572811806e-05, - "loss": 0.4494, + "learning_rate": 5.297717937425826e-05, + "loss": 0.3305, "step": 730000 }, { "epoch": 0.44, - "learning_rate": 5.297516760725124e-05, - "loss": 0.4444, + "learning_rate": 5.2975079408697696e-05, + "loss": 0.3245, "step": 730500 }, { "epoch": 0.44, - "learning_rate": 5.297306764169068e-05, - "loss": 0.4507, + "learning_rate": 5.2972979443137136e-05, + "loss": 0.3263, "step": 731000 }, { "epoch": 0.44, - "learning_rate": 5.297097187606123e-05, - "loss": 0.4621, + "learning_rate": 5.297087947757656e-05, + "loss": 0.327, "step": 731500 }, { "epoch": 0.44, - "learning_rate": 5.2968871910500666e-05, - "loss": 0.4545, + "learning_rate": 5.2968779512016e-05, + "loss": 0.3261, "step": 732000 }, { "epoch": 0.44, - "learning_rate": 5.2966771944940106e-05, - "loss": 0.4473, + "learning_rate": 5.296667954645544e-05, + "loss": 0.3301, "step": 732500 }, { "epoch": 0.44, - "learning_rate": 5.296467197937954e-05, - "loss": 0.4438, + "learning_rate": 5.296457958089487e-05, + "loss": 0.3223, "step": 733000 }, { "epoch": 0.44, - "learning_rate": 5.296257201381897e-05, - "loss": 0.4627, + "learning_rate": 5.296247961533431e-05, + "loss": 0.3382, "step": 733500 }, { "epoch": 0.44, - "learning_rate": 5.296047204825841e-05, - "loss": 0.4542, + "learning_rate": 5.2960379649773744e-05, + "loss": 0.3252, "step": 734000 }, { "epoch": 0.44, - "learning_rate": 5.295837208269784e-05, - "loss": 0.4528, + "learning_rate": 5.29582838841443e-05, + "loss": 0.331, "step": 734500 }, { "epoch": 0.44, - "learning_rate": 5.2956272117137274e-05, - "loss": 0.44, + "learning_rate": 5.295618391858373e-05, + "loss": 0.3215, "step": 735000 }, { "epoch": 0.44, - "learning_rate": 5.2954172151576714e-05, - "loss": 0.4465, + "learning_rate": 5.295408815295429e-05, + "loss": 0.3293, "step": 735500 }, { "epoch": 0.44, - "learning_rate": 5.2952076385947274e-05, - "loss": 0.4552, + "learning_rate": 5.2951988187393725e-05, + "loss": 0.3329, "step": 736000 }, { "epoch": 0.44, - "learning_rate": 5.29499764203867e-05, - "loss": 0.4448, + "learning_rate": 5.294988822183316e-05, + "loss": 0.3253, "step": 736500 }, { "epoch": 0.44, - "learning_rate": 5.2947876454826134e-05, - "loss": 0.4532, + "learning_rate": 5.294778825627259e-05, + "loss": 0.3258, "step": 737000 }, { "epoch": 0.44, - "learning_rate": 5.2945776489265575e-05, - "loss": 0.4518, + "learning_rate": 5.294568829071203e-05, + "loss": 0.3254, "step": 737500 }, { "epoch": 0.44, - "learning_rate": 5.2943684923567255e-05, - "loss": 0.4466, + "learning_rate": 5.2943588325151465e-05, + "loss": 0.334, "step": 738000 }, { "epoch": 0.44, - "learning_rate": 5.294158495800669e-05, - "loss": 0.4488, + "learning_rate": 5.29414883595909e-05, + "loss": 0.3301, "step": 738500 }, { "epoch": 0.44, - "learning_rate": 5.293948499244612e-05, - "loss": 0.4484, + "learning_rate": 5.293938839403034e-05, + "loss": 0.327, "step": 739000 }, { "epoch": 0.44, - "learning_rate": 5.293738502688556e-05, - "loss": 0.4589, + "learning_rate": 5.293729262840089e-05, + "loss": 0.3295, "step": 739500 }, { "epoch": 0.44, - "learning_rate": 5.2935285061324996e-05, - "loss": 0.4436, + "learning_rate": 5.2935192662840326e-05, + "loss": 0.3198, "step": 740000 }, { "epoch": 0.44, - "learning_rate": 5.293318929569555e-05, - "loss": 0.4546, + "learning_rate": 5.2933096897210886e-05, + "loss": 0.331, "step": 740500 }, { "epoch": 0.44, - "learning_rate": 5.293108933013498e-05, - "loss": 0.4572, + "learning_rate": 5.293100113158144e-05, + "loss": 0.3222, "step": 741000 }, { "epoch": 0.44, - "learning_rate": 5.292898936457442e-05, - "loss": 0.4454, + "learning_rate": 5.292890116602088e-05, + "loss": 0.3216, "step": 741500 }, { "epoch": 0.44, - "learning_rate": 5.2926889399013857e-05, - "loss": 0.4524, + "learning_rate": 5.2926805400391434e-05, + "loss": 0.3259, "step": 742000 }, { "epoch": 0.45, - "learning_rate": 5.292478943345329e-05, - "loss": 0.4532, + "learning_rate": 5.292470543483087e-05, + "loss": 0.3311, "step": 742500 }, { "epoch": 0.45, - "learning_rate": 5.292268946789273e-05, - "loss": 0.4563, + "learning_rate": 5.29226054692703e-05, + "loss": 0.3258, "step": 743000 }, { "epoch": 0.45, - "learning_rate": 5.2920589502332164e-05, - "loss": 0.4524, + "learning_rate": 5.292050550370974e-05, + "loss": 0.3283, "step": 743500 }, { "epoch": 0.45, - "learning_rate": 5.291848953677159e-05, - "loss": 0.4459, + "learning_rate": 5.2918405538149174e-05, + "loss": 0.3328, "step": 744000 }, { "epoch": 0.45, - "learning_rate": 5.291639377114215e-05, - "loss": 0.4542, + "learning_rate": 5.291630977251973e-05, + "loss": 0.3297, "step": 744500 }, { "epoch": 0.45, - "learning_rate": 5.291429380558159e-05, - "loss": 0.4523, + "learning_rate": 5.291420980695917e-05, + "loss": 0.326, "step": 745000 }, { "epoch": 0.45, - "learning_rate": 5.2912193840021024e-05, - "loss": 0.4449, + "learning_rate": 5.29121098413986e-05, + "loss": 0.3237, "step": 745500 }, { "epoch": 0.45, - "learning_rate": 5.291009387446046e-05, - "loss": 0.4411, + "learning_rate": 5.2910009875838035e-05, + "loss": 0.3216, "step": 746000 }, { "epoch": 0.45, - "learning_rate": 5.290799810883102e-05, - "loss": 0.4444, + "learning_rate": 5.2907909910277475e-05, + "loss": 0.3296, "step": 746500 }, { "epoch": 0.45, - "learning_rate": 5.290589814327045e-05, - "loss": 0.4446, + "learning_rate": 5.290580994471691e-05, + "loss": 0.3271, "step": 747000 }, { "epoch": 0.45, - "learning_rate": 5.2903798177709885e-05, - "loss": 0.4558, + "learning_rate": 5.290370997915634e-05, + "loss": 0.3305, "step": 747500 }, { "epoch": 0.45, - "learning_rate": 5.2901698212149325e-05, - "loss": 0.4553, + "learning_rate": 5.2901610013595776e-05, + "loss": 0.3307, "step": 748000 }, { "epoch": 0.45, - "learning_rate": 5.289959824658875e-05, - "loss": 0.4408, + "learning_rate": 5.289951004803521e-05, + "loss": 0.3245, "step": 748500 }, { "epoch": 0.45, - "learning_rate": 5.2897498281028186e-05, - "loss": 0.4449, + "learning_rate": 5.289741008247464e-05, + "loss": 0.3245, "step": 749000 }, { "epoch": 0.45, - "learning_rate": 5.2895398315467626e-05, - "loss": 0.4527, + "learning_rate": 5.289531011691408e-05, + "loss": 0.333, "step": 749500 }, { "epoch": 0.45, - "learning_rate": 5.289329834990706e-05, - "loss": 0.4487, + "learning_rate": 5.2893210151353516e-05, + "loss": 0.3245, "step": 750000 }, { "epoch": 0.45, - "learning_rate": 5.289119838434649e-05, - "loss": 0.4545, + "learning_rate": 5.289111018579295e-05, + "loss": 0.3305, "step": 750500 }, { "epoch": 0.45, - "learning_rate": 5.2889102618717046e-05, - "loss": 0.4516, + "learning_rate": 5.288901022023239e-05, + "loss": 0.3293, "step": 751000 }, { "epoch": 0.45, - "learning_rate": 5.2887002653156486e-05, - "loss": 0.4579, + "learning_rate": 5.2886910254671824e-05, + "loss": 0.3316, "step": 751500 }, { "epoch": 0.45, - "learning_rate": 5.288490268759592e-05, - "loss": 0.4509, + "learning_rate": 5.288481028911126e-05, + "loss": 0.3315, "step": 752000 }, { "epoch": 0.45, - "learning_rate": 5.288280272203536e-05, - "loss": 0.4582, + "learning_rate": 5.288271452348181e-05, + "loss": 0.336, "step": 752500 }, { "epoch": 0.45, - "learning_rate": 5.2880711156337034e-05, - "loss": 0.4493, + "learning_rate": 5.288061875785237e-05, + "loss": 0.3277, "step": 753000 }, { "epoch": 0.45, - "learning_rate": 5.2878611190776474e-05, - "loss": 0.4605, + "learning_rate": 5.2878518792291804e-05, + "loss": 0.3366, "step": 753500 }, { "epoch": 0.45, - "learning_rate": 5.287651122521591e-05, - "loss": 0.4387, + "learning_rate": 5.287641882673124e-05, + "loss": 0.3277, "step": 754000 }, { "epoch": 0.45, - "learning_rate": 5.287441125965534e-05, - "loss": 0.4578, + "learning_rate": 5.287431886117068e-05, + "loss": 0.3304, "step": 754500 }, { "epoch": 0.45, - "learning_rate": 5.287231129409478e-05, - "loss": 0.4476, + "learning_rate": 5.287221889561011e-05, + "loss": 0.3248, "step": 755000 }, { "epoch": 0.45, - "learning_rate": 5.287021972839645e-05, - "loss": 0.4494, + "learning_rate": 5.2870118930049545e-05, + "loss": 0.3234, "step": 755500 }, { "epoch": 0.45, - "learning_rate": 5.286811976283589e-05, - "loss": 0.4319, + "learning_rate": 5.2868018964488985e-05, + "loss": 0.3198, "step": 756000 }, { "epoch": 0.45, - "learning_rate": 5.286601979727532e-05, - "loss": 0.4443, + "learning_rate": 5.286591899892842e-05, + "loss": 0.3313, "step": 756500 }, { "epoch": 0.45, - "learning_rate": 5.2863919831714755e-05, - "loss": 0.4436, + "learning_rate": 5.286382323329897e-05, + "loss": 0.3204, "step": 757000 }, { "epoch": 0.45, - "learning_rate": 5.2861819866154196e-05, - "loss": 0.4432, + "learning_rate": 5.2861723267738406e-05, + "loss": 0.322, "step": 757500 }, { "epoch": 0.45, - "learning_rate": 5.285971990059363e-05, - "loss": 0.4387, + "learning_rate": 5.2859623302177846e-05, + "loss": 0.3185, "step": 758000 }, { "epoch": 0.45, - "learning_rate": 5.285761993503306e-05, - "loss": 0.4475, + "learning_rate": 5.285752333661728e-05, + "loss": 0.328, "step": 758500 }, { "epoch": 0.46, - "learning_rate": 5.28555199694725e-05, - "loss": 0.4494, + "learning_rate": 5.285542757098783e-05, + "loss": 0.331, "step": 759000 }, { "epoch": 0.46, - "learning_rate": 5.2853420003911936e-05, - "loss": 0.4418, + "learning_rate": 5.2853331805358393e-05, + "loss": 0.3228, "step": 759500 }, { "epoch": 0.46, - "learning_rate": 5.2851320038351376e-05, - "loss": 0.4497, + "learning_rate": 5.285123183979783e-05, + "loss": 0.3271, "step": 760000 }, { "epoch": 0.46, - "learning_rate": 5.28492200727908e-05, - "loss": 0.4469, + "learning_rate": 5.284913187423726e-05, + "loss": 0.3274, "step": 760500 }, { "epoch": 0.46, - "learning_rate": 5.2847120107230237e-05, - "loss": 0.4565, + "learning_rate": 5.2847031908676694e-05, + "loss": 0.3265, "step": 761000 }, { "epoch": 0.46, - "learning_rate": 5.28450243416008e-05, - "loss": 0.4353, + "learning_rate": 5.2844931943116134e-05, + "loss": 0.3193, "step": 761500 }, { "epoch": 0.46, - "learning_rate": 5.284292857597135e-05, - "loss": 0.4496, + "learning_rate": 5.284283197755557e-05, + "loss": 0.3278, "step": 762000 }, { "epoch": 0.46, - "learning_rate": 5.284082861041079e-05, - "loss": 0.4501, + "learning_rate": 5.2840732011995e-05, + "loss": 0.3272, "step": 762500 }, { "epoch": 0.46, - "learning_rate": 5.2838728644850224e-05, - "loss": 0.4437, + "learning_rate": 5.283863204643444e-05, + "loss": 0.323, "step": 763000 }, { "epoch": 0.46, - "learning_rate": 5.283662867928966e-05, - "loss": 0.4548, + "learning_rate": 5.2836532080873875e-05, + "loss": 0.329, "step": 763500 }, { "epoch": 0.46, - "learning_rate": 5.28345287137291e-05, - "loss": 0.4443, + "learning_rate": 5.283443631524443e-05, + "loss": 0.3248, "step": 764000 }, { "epoch": 0.46, - "learning_rate": 5.283242874816853e-05, - "loss": 0.4521, + "learning_rate": 5.283233634968386e-05, + "loss": 0.3248, "step": 764500 }, { "epoch": 0.46, - "learning_rate": 5.283032878260796e-05, - "loss": 0.4486, + "learning_rate": 5.28302363841233e-05, + "loss": 0.3242, "step": 765000 }, { "epoch": 0.46, - "learning_rate": 5.28282288170474e-05, - "loss": 0.461, + "learning_rate": 5.2828136418562735e-05, + "loss": 0.3271, "step": 765500 }, { "epoch": 0.46, - "learning_rate": 5.282613305141796e-05, - "loss": 0.4415, + "learning_rate": 5.282604065293329e-05, + "loss": 0.322, "step": 766000 }, { "epoch": 0.46, - "learning_rate": 5.282403308585739e-05, - "loss": 0.4386, + "learning_rate": 5.282394068737272e-05, + "loss": 0.3235, "step": 766500 }, { "epoch": 0.46, - "learning_rate": 5.2821937320227946e-05, - "loss": 0.4413, + "learning_rate": 5.282184492174328e-05, + "loss": 0.3258, "step": 767000 }, { "epoch": 0.46, - "learning_rate": 5.2819837354667386e-05, - "loss": 0.4485, + "learning_rate": 5.2819744956182716e-05, + "loss": 0.3233, "step": 767500 }, { "epoch": 0.46, - "learning_rate": 5.281773738910682e-05, - "loss": 0.4467, + "learning_rate": 5.281764499062215e-05, + "loss": 0.3274, "step": 768000 }, { "epoch": 0.46, - "learning_rate": 5.281563742354625e-05, - "loss": 0.4419, + "learning_rate": 5.281554502506159e-05, + "loss": 0.3273, "step": 768500 }, { "epoch": 0.46, - "learning_rate": 5.281353745798569e-05, - "loss": 0.4469, + "learning_rate": 5.281344925943215e-05, + "loss": 0.3311, "step": 769000 }, { "epoch": 0.46, - "learning_rate": 5.2811437492425126e-05, - "loss": 0.4462, + "learning_rate": 5.281134929387158e-05, + "loss": 0.3256, "step": 769500 }, { "epoch": 0.46, - "learning_rate": 5.280933752686455e-05, - "loss": 0.4403, + "learning_rate": 5.280925352824214e-05, + "loss": 0.3238, "step": 770000 }, { "epoch": 0.46, - "learning_rate": 5.2807241761235114e-05, - "loss": 0.4349, + "learning_rate": 5.280715356268157e-05, + "loss": 0.3227, "step": 770500 }, { "epoch": 0.46, - "learning_rate": 5.2805141795674554e-05, - "loss": 0.4533, + "learning_rate": 5.280505359712101e-05, + "loss": 0.3307, "step": 771000 }, { "epoch": 0.46, - "learning_rate": 5.280304183011399e-05, - "loss": 0.449, + "learning_rate": 5.2802953631560444e-05, + "loss": 0.323, "step": 771500 }, { "epoch": 0.46, - "learning_rate": 5.280094186455342e-05, - "loss": 0.4477, + "learning_rate": 5.280085366599987e-05, + "loss": 0.3309, "step": 772000 }, { "epoch": 0.46, - "learning_rate": 5.2798841898992854e-05, - "loss": 0.4441, + "learning_rate": 5.279875370043931e-05, + "loss": 0.3226, "step": 772500 }, { "epoch": 0.46, - "learning_rate": 5.2796746133363414e-05, - "loss": 0.4492, + "learning_rate": 5.2796653734878745e-05, + "loss": 0.3269, "step": 773000 }, { "epoch": 0.46, - "learning_rate": 5.279464616780285e-05, - "loss": 0.4552, + "learning_rate": 5.279455376931818e-05, + "loss": 0.3299, "step": 773500 }, { "epoch": 0.46, - "learning_rate": 5.279254620224229e-05, - "loss": 0.4507, + "learning_rate": 5.279245380375762e-05, + "loss": 0.3248, "step": 774000 }, { "epoch": 0.46, - "learning_rate": 5.2790446236681715e-05, - "loss": 0.4546, + "learning_rate": 5.279035383819705e-05, + "loss": 0.3275, "step": 774500 }, { "epoch": 0.46, - "learning_rate": 5.278834627112115e-05, - "loss": 0.4438, + "learning_rate": 5.2788253872636485e-05, + "loss": 0.3262, "step": 775000 }, { "epoch": 0.46, - "learning_rate": 5.278624630556059e-05, - "loss": 0.4503, + "learning_rate": 5.2786153907075926e-05, + "loss": 0.3221, "step": 775500 }, { "epoch": 0.47, - "learning_rate": 5.278414634000002e-05, - "loss": 0.4452, + "learning_rate": 5.278405394151536e-05, + "loss": 0.3238, "step": 776000 }, { "epoch": 0.47, - "learning_rate": 5.2782046374439455e-05, - "loss": 0.4427, + "learning_rate": 5.278195817588591e-05, + "loss": 0.3312, "step": 776500 }, { "epoch": 0.47, - "learning_rate": 5.277995060881001e-05, - "loss": 0.4503, + "learning_rate": 5.277985821032535e-05, + "loss": 0.3299, "step": 777000 }, { "epoch": 0.47, - "learning_rate": 5.277785064324945e-05, - "loss": 0.4369, + "learning_rate": 5.2777762444695906e-05, + "loss": 0.3187, "step": 777500 }, { "epoch": 0.47, - "learning_rate": 5.277575067768888e-05, - "loss": 0.4345, + "learning_rate": 5.277566247913534e-05, + "loss": 0.3143, "step": 778000 }, { "epoch": 0.47, - "learning_rate": 5.2773650712128316e-05, - "loss": 0.4388, + "learning_rate": 5.2773562513574773e-05, + "loss": 0.3281, "step": 778500 }, { "epoch": 0.47, - "learning_rate": 5.2771550746567756e-05, - "loss": 0.4378, + "learning_rate": 5.2771462548014214e-05, + "loss": 0.3228, "step": 779000 }, { "epoch": 0.47, - "learning_rate": 5.276945078100719e-05, - "loss": 0.4496, + "learning_rate": 5.276936258245365e-05, + "loss": 0.3253, "step": 779500 }, { "epoch": 0.47, - "learning_rate": 5.276735081544662e-05, - "loss": 0.4411, + "learning_rate": 5.276726261689308e-05, + "loss": 0.324, "step": 780000 }, { "epoch": 0.47, - "learning_rate": 5.2765250849886064e-05, - "loss": 0.4408, + "learning_rate": 5.276516265133252e-05, + "loss": 0.3268, "step": 780500 }, { "epoch": 0.47, - "learning_rate": 5.276315508425662e-05, - "loss": 0.4326, + "learning_rate": 5.2763066885703074e-05, + "loss": 0.3154, "step": 781000 }, { "epoch": 0.47, - "learning_rate": 5.276105931862718e-05, - "loss": 0.4496, + "learning_rate": 5.276096692014251e-05, + "loss": 0.3221, "step": 781500 }, { "epoch": 0.47, - "learning_rate": 5.2758959353066604e-05, - "loss": 0.4357, + "learning_rate": 5.275886695458194e-05, + "loss": 0.3193, "step": 782000 }, { "epoch": 0.47, - "learning_rate": 5.2756859387506044e-05, - "loss": 0.4396, + "learning_rate": 5.275676698902138e-05, + "loss": 0.3239, "step": 782500 }, { "epoch": 0.47, - "learning_rate": 5.275475942194548e-05, - "loss": 0.4417, + "learning_rate": 5.2754667023460815e-05, + "loss": 0.3247, "step": 783000 }, { "epoch": 0.47, - "learning_rate": 5.275265945638491e-05, - "loss": 0.4503, + "learning_rate": 5.275256705790025e-05, + "loss": 0.3242, "step": 783500 }, { "epoch": 0.47, - "learning_rate": 5.275055949082435e-05, - "loss": 0.4496, + "learning_rate": 5.275046709233969e-05, + "loss": 0.3275, "step": 784000 }, { "epoch": 0.47, - "learning_rate": 5.2748459525263785e-05, - "loss": 0.4564, + "learning_rate": 5.2748367126779115e-05, + "loss": 0.33, "step": 784500 }, { "epoch": 0.47, - "learning_rate": 5.274635955970322e-05, - "loss": 0.445, + "learning_rate": 5.2746271361149676e-05, + "loss": 0.3237, "step": 785000 }, { "epoch": 0.47, - "learning_rate": 5.274425959414266e-05, - "loss": 0.4512, + "learning_rate": 5.2744171395589116e-05, + "loss": 0.3287, "step": 785500 }, { "epoch": 0.47, - "learning_rate": 5.274216382851321e-05, - "loss": 0.4452, + "learning_rate": 5.274207143002855e-05, + "loss": 0.3216, "step": 786000 }, { "epoch": 0.47, - "learning_rate": 5.2740063862952646e-05, - "loss": 0.4423, + "learning_rate": 5.27399756643991e-05, + "loss": 0.324, "step": 786500 }, { "epoch": 0.47, - "learning_rate": 5.273796389739208e-05, - "loss": 0.4485, + "learning_rate": 5.2737875698838536e-05, + "loss": 0.3276, "step": 787000 }, { "epoch": 0.47, - "learning_rate": 5.273586393183152e-05, - "loss": 0.4407, + "learning_rate": 5.273577573327798e-05, + "loss": 0.3208, "step": 787500 }, { "epoch": 0.47, - "learning_rate": 5.273376816620207e-05, - "loss": 0.4463, + "learning_rate": 5.273367576771741e-05, + "loss": 0.3243, "step": 788000 }, { "epoch": 0.47, - "learning_rate": 5.273167240057263e-05, - "loss": 0.4499, + "learning_rate": 5.2731575802156844e-05, + "loss": 0.3286, "step": 788500 }, { "epoch": 0.47, - "learning_rate": 5.272957243501206e-05, - "loss": 0.4491, + "learning_rate": 5.2729475836596284e-05, + "loss": 0.3257, "step": 789000 }, { "epoch": 0.47, - "learning_rate": 5.27274724694515e-05, - "loss": 0.4454, + "learning_rate": 5.272737587103571e-05, + "loss": 0.3169, "step": 789500 }, { "epoch": 0.47, - "learning_rate": 5.2725372503890934e-05, - "loss": 0.4461, + "learning_rate": 5.272528010540627e-05, + "loss": 0.3241, "step": 790000 }, { "epoch": 0.47, - "learning_rate": 5.2723276738261494e-05, - "loss": 0.4367, + "learning_rate": 5.272318013984571e-05, + "loss": 0.3186, "step": 790500 }, { "epoch": 0.47, - "learning_rate": 5.272117677270093e-05, - "loss": 0.4494, + "learning_rate": 5.2721080174285145e-05, + "loss": 0.3265, "step": 791000 }, { "epoch": 0.47, - "learning_rate": 5.271907680714036e-05, - "loss": 0.4404, + "learning_rate": 5.271898020872458e-05, + "loss": 0.3267, "step": 791500 }, { "epoch": 0.47, - "learning_rate": 5.2716976841579794e-05, - "loss": 0.4535, + "learning_rate": 5.271688024316401e-05, + "loss": 0.329, "step": 792000 }, { "epoch": 0.48, - "learning_rate": 5.271487687601923e-05, - "loss": 0.4564, + "learning_rate": 5.2714780277603445e-05, + "loss": 0.3283, "step": 792500 }, { "epoch": 0.48, - "learning_rate": 5.271277691045867e-05, - "loss": 0.4513, + "learning_rate": 5.271268031204288e-05, + "loss": 0.3274, "step": 793000 }, { "epoch": 0.48, - "learning_rate": 5.27106769448981e-05, - "loss": 0.4486, + "learning_rate": 5.271058034648232e-05, + "loss": 0.3288, "step": 793500 }, { "epoch": 0.48, - "learning_rate": 5.2708576979337535e-05, - "loss": 0.4489, + "learning_rate": 5.270848038092175e-05, + "loss": 0.3315, "step": 794000 }, { "epoch": 0.48, - "learning_rate": 5.2706481213708095e-05, - "loss": 0.436, + "learning_rate": 5.2706384615292306e-05, + "loss": 0.323, "step": 794500 }, { "epoch": 0.48, - "learning_rate": 5.270438124814753e-05, - "loss": 0.4431, + "learning_rate": 5.270428464973174e-05, + "loss": 0.3158, "step": 795000 }, { "epoch": 0.48, - "learning_rate": 5.270228128258696e-05, - "loss": 0.4457, + "learning_rate": 5.270218468417118e-05, + "loss": 0.3256, "step": 795500 }, { "epoch": 0.48, - "learning_rate": 5.27001813170264e-05, - "loss": 0.4371, + "learning_rate": 5.270008471861061e-05, + "loss": 0.3223, "step": 796000 }, { "epoch": 0.48, - "learning_rate": 5.2698089751328076e-05, - "loss": 0.4413, + "learning_rate": 5.2697984753050046e-05, + "loss": 0.3255, "step": 796500 }, { "epoch": 0.48, - "learning_rate": 5.2695989785767517e-05, - "loss": 0.4503, + "learning_rate": 5.2695884787489487e-05, + "loss": 0.3241, "step": 797000 }, { "epoch": 0.48, - "learning_rate": 5.269388982020695e-05, - "loss": 0.4531, + "learning_rate": 5.269378482192892e-05, + "loss": 0.3237, "step": 797500 }, { "epoch": 0.48, - "learning_rate": 5.2691789854646383e-05, - "loss": 0.4475, + "learning_rate": 5.2691684856368353e-05, + "loss": 0.3238, "step": 798000 }, { "epoch": 0.48, - "learning_rate": 5.268968988908582e-05, - "loss": 0.4366, + "learning_rate": 5.2689589090738914e-05, + "loss": 0.3155, "step": 798500 }, { "epoch": 0.48, - "learning_rate": 5.268758992352525e-05, - "loss": 0.4336, + "learning_rate": 5.268749332510947e-05, + "loss": 0.3214, "step": 799000 }, { "epoch": 0.48, - "learning_rate": 5.268549415789581e-05, - "loss": 0.4513, + "learning_rate": 5.26853933595489e-05, + "loss": 0.3271, "step": 799500 }, { "epoch": 0.48, - "learning_rate": 5.2683394192335244e-05, - "loss": 0.4385, + "learning_rate": 5.2683293393988334e-05, + "loss": 0.3183, "step": 800000 }, { "epoch": 0.48, - "eval_loss": 0.4203203022480011, - "eval_runtime": 1112.9021, - "eval_samples_per_second": 473.285, - "eval_steps_per_second": 78.881, + "eval_loss": 0.2911185920238495, + "eval_runtime": 1475.0852, + "eval_samples_per_second": 357.078, + "eval_steps_per_second": 59.513, "step": 800000 }, { "epoch": 0.48, - "learning_rate": 5.2681294226774684e-05, - "loss": 0.4351, + "learning_rate": 5.2681193428427775e-05, + "loss": 0.3193, "step": 800500 }, { "epoch": 0.48, - "learning_rate": 5.267919426121411e-05, - "loss": 0.4367, + "learning_rate": 5.267909346286721e-05, + "loss": 0.3228, "step": 801000 }, { "epoch": 0.48, - "learning_rate": 5.267709429565355e-05, - "loss": 0.4444, + "learning_rate": 5.267699769723776e-05, + "loss": 0.326, "step": 801500 }, { "epoch": 0.48, - "learning_rate": 5.267499853002411e-05, - "loss": 0.4397, + "learning_rate": 5.2674897731677195e-05, + "loss": 0.3274, "step": 802000 }, { "epoch": 0.48, - "learning_rate": 5.2672898564463545e-05, - "loss": 0.4394, + "learning_rate": 5.2672797766116635e-05, + "loss": 0.319, "step": 802500 }, { "epoch": 0.48, - "learning_rate": 5.267079859890298e-05, - "loss": 0.438, + "learning_rate": 5.267069780055607e-05, + "loss": 0.3261, "step": 803000 }, { "epoch": 0.48, - "learning_rate": 5.266869863334241e-05, - "loss": 0.4468, + "learning_rate": 5.266860203492662e-05, + "loss": 0.3203, "step": 803500 }, { "epoch": 0.48, - "learning_rate": 5.2666598667781846e-05, - "loss": 0.441, + "learning_rate": 5.266650206936606e-05, + "loss": 0.3198, "step": 804000 }, { "epoch": 0.48, - "learning_rate": 5.266449870222128e-05, - "loss": 0.4408, + "learning_rate": 5.2664402103805496e-05, + "loss": 0.3241, "step": 804500 }, { "epoch": 0.48, - "learning_rate": 5.266239873666072e-05, - "loss": 0.4369, + "learning_rate": 5.266230213824493e-05, + "loss": 0.3182, "step": 805000 }, { "epoch": 0.48, - "learning_rate": 5.266030297103127e-05, - "loss": 0.4508, + "learning_rate": 5.266020217268437e-05, + "loss": 0.3303, "step": 805500 }, { "epoch": 0.48, - "learning_rate": 5.2658203005470706e-05, - "loss": 0.4282, + "learning_rate": 5.26581022071238e-05, + "loss": 0.3145, "step": 806000 }, { "epoch": 0.48, - "learning_rate": 5.265610303991014e-05, - "loss": 0.436, + "learning_rate": 5.2656002241563237e-05, + "loss": 0.3257, "step": 806500 }, { "epoch": 0.48, - "learning_rate": 5.265400307434958e-05, - "loss": 0.4386, + "learning_rate": 5.265390227600268e-05, + "loss": 0.3196, "step": 807000 }, { "epoch": 0.48, - "learning_rate": 5.2651903108789013e-05, - "loss": 0.4434, + "learning_rate": 5.265180231044211e-05, + "loss": 0.3237, "step": 807500 }, { "epoch": 0.48, - "learning_rate": 5.264980314322845e-05, - "loss": 0.4483, + "learning_rate": 5.2649702344881544e-05, + "loss": 0.3217, "step": 808000 }, { "epoch": 0.48, - "learning_rate": 5.264770317766789e-05, - "loss": 0.4415, + "learning_rate": 5.26476065792521e-05, + "loss": 0.3257, "step": 808500 }, { "epoch": 0.49, - "learning_rate": 5.264560321210732e-05, - "loss": 0.449, + "learning_rate": 5.264550661369154e-05, + "loss": 0.3278, "step": 809000 }, { "epoch": 0.49, - "learning_rate": 5.2643503246546754e-05, - "loss": 0.4413, + "learning_rate": 5.264340664813097e-05, + "loss": 0.3225, "step": 809500 }, { "epoch": 0.49, - "learning_rate": 5.2641407480917314e-05, - "loss": 0.4402, + "learning_rate": 5.2641306682570404e-05, + "loss": 0.3194, "step": 810000 }, { "epoch": 0.49, - "learning_rate": 5.263930751535675e-05, - "loss": 0.4547, + "learning_rate": 5.2639206717009845e-05, + "loss": 0.3305, "step": 810500 }, { "epoch": 0.49, - "learning_rate": 5.263720754979618e-05, - "loss": 0.4462, + "learning_rate": 5.26371109513804e-05, + "loss": 0.3257, "step": 811000 }, { "epoch": 0.49, - "learning_rate": 5.263510758423562e-05, - "loss": 0.4455, + "learning_rate": 5.263501098581983e-05, + "loss": 0.3257, "step": 811500 }, { "epoch": 0.49, - "learning_rate": 5.2633011818606175e-05, - "loss": 0.4442, + "learning_rate": 5.2632911020259265e-05, + "loss": 0.326, "step": 812000 }, { "epoch": 0.49, - "learning_rate": 5.263091185304561e-05, - "loss": 0.4463, + "learning_rate": 5.2630815254629826e-05, + "loss": 0.3236, "step": 812500 }, { "epoch": 0.49, - "learning_rate": 5.262881188748504e-05, - "loss": 0.4529, + "learning_rate": 5.262871528906926e-05, + "loss": 0.3247, "step": 813000 }, { "epoch": 0.49, - "learning_rate": 5.262671192192448e-05, - "loss": 0.4384, + "learning_rate": 5.262661952343981e-05, + "loss": 0.3185, "step": 813500 }, { "epoch": 0.49, - "learning_rate": 5.2624616156295036e-05, - "loss": 0.4476, + "learning_rate": 5.2624519557879246e-05, + "loss": 0.3239, "step": 814000 }, { "epoch": 0.49, - "learning_rate": 5.262251619073447e-05, - "loss": 0.4426, + "learning_rate": 5.2622419592318686e-05, + "loss": 0.3231, "step": 814500 }, { "epoch": 0.49, - "learning_rate": 5.26204162251739e-05, - "loss": 0.4441, + "learning_rate": 5.262031962675812e-05, + "loss": 0.3233, "step": 815000 }, { "epoch": 0.49, - "learning_rate": 5.261831625961334e-05, - "loss": 0.4477, + "learning_rate": 5.261821966119755e-05, + "loss": 0.3222, "step": 815500 }, { "epoch": 0.49, - "learning_rate": 5.2616220493983897e-05, - "loss": 0.4424, + "learning_rate": 5.2616119695636993e-05, + "loss": 0.32, "step": 816000 }, { "epoch": 0.49, - "learning_rate": 5.261412472835446e-05, - "loss": 0.4465, + "learning_rate": 5.261401973007643e-05, + "loss": 0.3304, "step": 816500 }, { "epoch": 0.49, - "learning_rate": 5.261202476279389e-05, - "loss": 0.4302, + "learning_rate": 5.261191976451586e-05, + "loss": 0.3195, "step": 817000 }, { "epoch": 0.49, - "learning_rate": 5.2609924797233324e-05, - "loss": 0.4305, + "learning_rate": 5.26098197989553e-05, + "loss": 0.3212, "step": 817500 }, { "epoch": 0.49, - "learning_rate": 5.260782483167276e-05, - "loss": 0.452, + "learning_rate": 5.2607724033325854e-05, + "loss": 0.324, "step": 818000 }, { "epoch": 0.49, - "learning_rate": 5.260572486611219e-05, - "loss": 0.4447, + "learning_rate": 5.260562406776529e-05, + "loss": 0.3186, "step": 818500 }, { "epoch": 0.49, - "learning_rate": 5.260362490055163e-05, - "loss": 0.4432, + "learning_rate": 5.260352830213584e-05, + "loss": 0.3231, "step": 819000 }, { "epoch": 0.49, - "learning_rate": 5.2601524934991064e-05, - "loss": 0.434, + "learning_rate": 5.260142833657528e-05, + "loss": 0.3154, "step": 819500 }, { "epoch": 0.49, - "learning_rate": 5.25994249694305e-05, - "loss": 0.4301, + "learning_rate": 5.2599328371014715e-05, + "loss": 0.3173, "step": 820000 }, { "epoch": 0.49, - "learning_rate": 5.259732920380105e-05, - "loss": 0.4529, + "learning_rate": 5.259722840545415e-05, + "loss": 0.3272, "step": 820500 }, { "epoch": 0.49, - "learning_rate": 5.259523343817161e-05, - "loss": 0.4438, + "learning_rate": 5.259512843989359e-05, + "loss": 0.3194, "step": 821000 }, { "epoch": 0.49, - "learning_rate": 5.259313347261105e-05, - "loss": 0.4324, + "learning_rate": 5.259302847433302e-05, + "loss": 0.3153, "step": 821500 }, { "epoch": 0.49, - "learning_rate": 5.2591033507050486e-05, - "loss": 0.4466, + "learning_rate": 5.2590928508772455e-05, + "loss": 0.323, "step": 822000 }, { "epoch": 0.49, - "learning_rate": 5.258893354148992e-05, - "loss": 0.4382, + "learning_rate": 5.2588828543211896e-05, + "loss": 0.3209, "step": 822500 }, { "epoch": 0.49, - "learning_rate": 5.258683777586048e-05, - "loss": 0.4459, + "learning_rate": 5.258673277758245e-05, + "loss": 0.3313, "step": 823000 }, { "epoch": 0.49, - "learning_rate": 5.258473781029991e-05, - "loss": 0.4475, + "learning_rate": 5.258463281202188e-05, + "loss": 0.322, "step": 823500 }, { "epoch": 0.49, - "learning_rate": 5.2582637844739346e-05, - "loss": 0.4397, + "learning_rate": 5.2582532846461316e-05, + "loss": 0.3218, "step": 824000 }, { "epoch": 0.49, - "learning_rate": 5.2580537879178786e-05, - "loss": 0.4388, + "learning_rate": 5.2580432880900756e-05, + "loss": 0.3189, "step": 824500 }, { "epoch": 0.49, - "learning_rate": 5.257843791361821e-05, - "loss": 0.4389, + "learning_rate": 5.257833291534019e-05, + "loss": 0.3211, "step": 825000 }, { "epoch": 0.49, - "learning_rate": 5.2576337948057647e-05, - "loss": 0.4387, + "learning_rate": 5.257623294977962e-05, + "loss": 0.3239, "step": 825500 }, { "epoch": 0.5, - "learning_rate": 5.257423798249709e-05, - "loss": 0.4586, + "learning_rate": 5.257413298421906e-05, + "loss": 0.3363, "step": 826000 }, { "epoch": 0.5, - "learning_rate": 5.257213801693652e-05, - "loss": 0.4368, + "learning_rate": 5.257203301865849e-05, + "loss": 0.3193, "step": 826500 }, { "epoch": 0.5, - "learning_rate": 5.2570042251307074e-05, - "loss": 0.4423, + "learning_rate": 5.2569933053097924e-05, + "loss": 0.3219, "step": 827000 }, { "epoch": 0.5, - "learning_rate": 5.256794228574651e-05, - "loss": 0.4436, + "learning_rate": 5.2567833087537364e-05, + "loss": 0.3188, "step": 827500 }, { "epoch": 0.5, - "learning_rate": 5.256584232018595e-05, - "loss": 0.4402, + "learning_rate": 5.25657331219768e-05, + "loss": 0.3207, "step": 828000 }, { "epoch": 0.5, - "learning_rate": 5.256374655455651e-05, - "loss": 0.449, + "learning_rate": 5.256363315641623e-05, + "loss": 0.3271, "step": 828500 }, { "epoch": 0.5, - "learning_rate": 5.256164658899594e-05, - "loss": 0.4468, + "learning_rate": 5.256153739078679e-05, + "loss": 0.3245, "step": 829000 }, { "epoch": 0.5, - "learning_rate": 5.2559546623435375e-05, - "loss": 0.4408, + "learning_rate": 5.255944162515735e-05, + "loss": 0.3193, "step": 829500 }, { "epoch": 0.5, - "learning_rate": 5.255744665787481e-05, - "loss": 0.4328, + "learning_rate": 5.2557341659596785e-05, + "loss": 0.3198, "step": 830000 }, { "epoch": 0.5, - "learning_rate": 5.255535089224537e-05, - "loss": 0.4445, + "learning_rate": 5.255524169403621e-05, + "loss": 0.3173, "step": 830500 }, { "epoch": 0.5, - "learning_rate": 5.25532509266848e-05, - "loss": 0.4388, + "learning_rate": 5.255314172847565e-05, + "loss": 0.3202, "step": 831000 }, { "epoch": 0.5, - "learning_rate": 5.255115096112424e-05, - "loss": 0.4425, + "learning_rate": 5.2551041762915085e-05, + "loss": 0.3216, "step": 831500 }, { "epoch": 0.5, - "learning_rate": 5.254905099556367e-05, - "loss": 0.4386, + "learning_rate": 5.254894179735452e-05, + "loss": 0.318, "step": 832000 }, { "epoch": 0.5, - "learning_rate": 5.254695522993423e-05, - "loss": 0.4458, + "learning_rate": 5.254684183179396e-05, + "loss": 0.3207, "step": 832500 }, { "epoch": 0.5, - "learning_rate": 5.254485526437366e-05, - "loss": 0.4364, + "learning_rate": 5.254474186623339e-05, + "loss": 0.32, "step": 833000 }, { "epoch": 0.5, - "learning_rate": 5.25427552988131e-05, - "loss": 0.447, + "learning_rate": 5.2542646100603946e-05, + "loss": 0.3261, "step": 833500 }, { "epoch": 0.5, - "learning_rate": 5.2540655333252537e-05, - "loss": 0.4501, + "learning_rate": 5.254054613504338e-05, + "loss": 0.3271, "step": 834000 }, { "epoch": 0.5, - "learning_rate": 5.253855536769196e-05, - "loss": 0.4403, + "learning_rate": 5.253844616948282e-05, + "loss": 0.3223, "step": 834500 }, { "epoch": 0.5, - "learning_rate": 5.2536455402131403e-05, - "loss": 0.449, + "learning_rate": 5.253634620392225e-05, + "loss": 0.3255, "step": 835000 }, { "epoch": 0.5, - "learning_rate": 5.253435543657084e-05, - "loss": 0.4283, + "learning_rate": 5.253424623836169e-05, + "loss": 0.3175, "step": 835500 }, { "epoch": 0.5, - "learning_rate": 5.253225547101027e-05, - "loss": 0.4387, + "learning_rate": 5.253214627280113e-05, + "loss": 0.3251, "step": 836000 }, { "epoch": 0.5, - "learning_rate": 5.253015550544971e-05, - "loss": 0.449, + "learning_rate": 5.253004630724056e-05, + "loss": 0.3253, "step": 836500 }, { "epoch": 0.5, - "learning_rate": 5.2528055539889144e-05, - "loss": 0.4357, + "learning_rate": 5.2527946341679994e-05, + "loss": 0.3216, "step": 837000 }, { "epoch": 0.5, - "learning_rate": 5.2525955574328584e-05, - "loss": 0.4258, + "learning_rate": 5.2525846376119434e-05, + "loss": 0.3166, "step": 837500 }, { "epoch": 0.5, - "learning_rate": 5.252385560876802e-05, - "loss": 0.4414, + "learning_rate": 5.252375061048999e-05, + "loss": 0.3242, "step": 838000 }, { "epoch": 0.5, - "learning_rate": 5.252175984313857e-05, - "loss": 0.4506, + "learning_rate": 5.252165064492942e-05, + "loss": 0.327, "step": 838500 }, { "epoch": 0.5, - "learning_rate": 5.2519659877578005e-05, - "loss": 0.442, + "learning_rate": 5.2519554879299975e-05, + "loss": 0.3241, "step": 839000 }, { "epoch": 0.5, - "learning_rate": 5.2517559912017445e-05, - "loss": 0.439, + "learning_rate": 5.2517454913739415e-05, + "loss": 0.3284, "step": 839500 }, { "epoch": 0.5, - "learning_rate": 5.2515464146388e-05, - "loss": 0.4314, + "learning_rate": 5.251535494817885e-05, + "loss": 0.3199, "step": 840000 }, { "epoch": 0.5, - "learning_rate": 5.251336418082743e-05, - "loss": 0.4413, + "learning_rate": 5.251325498261828e-05, + "loss": 0.3219, "step": 840500 }, { "epoch": 0.5, - "learning_rate": 5.2511264215266866e-05, - "loss": 0.4541, + "learning_rate": 5.251115501705772e-05, + "loss": 0.3276, "step": 841000 }, { "epoch": 0.5, - "learning_rate": 5.2509164249706306e-05, - "loss": 0.4392, + "learning_rate": 5.2509055051497156e-05, + "loss": 0.3207, "step": 841500 }, { "epoch": 0.5, - "learning_rate": 5.250706428414574e-05, - "loss": 0.4559, + "learning_rate": 5.250695508593659e-05, + "loss": 0.3241, "step": 842000 }, { "epoch": 0.51, - "learning_rate": 5.250496431858517e-05, - "loss": 0.4517, + "learning_rate": 5.250485512037603e-05, + "loss": 0.3277, "step": 842500 }, { "epoch": 0.51, - "learning_rate": 5.250286435302461e-05, - "loss": 0.4498, + "learning_rate": 5.250275515481546e-05, + "loss": 0.3295, "step": 843000 }, { "epoch": 0.51, - "learning_rate": 5.2500764387464046e-05, - "loss": 0.4395, + "learning_rate": 5.2500659389186016e-05, + "loss": 0.3209, "step": 843500 }, { "epoch": 0.51, - "learning_rate": 5.24986686218346e-05, - "loss": 0.4561, + "learning_rate": 5.249856362355657e-05, + "loss": 0.3282, "step": 844000 }, { "epoch": 0.51, - "learning_rate": 5.249656865627404e-05, - "loss": 0.4453, + "learning_rate": 5.249646365799601e-05, + "loss": 0.3262, "step": 844500 }, { "epoch": 0.51, - "learning_rate": 5.2494472890644594e-05, - "loss": 0.4404, + "learning_rate": 5.2494363692435444e-05, + "loss": 0.3214, "step": 845000 }, { "epoch": 0.51, - "learning_rate": 5.249237292508403e-05, - "loss": 0.4414, + "learning_rate": 5.249226372687488e-05, + "loss": 0.3266, "step": 845500 }, { "epoch": 0.51, - "learning_rate": 5.249027295952346e-05, - "loss": 0.4364, + "learning_rate": 5.249016376131432e-05, + "loss": 0.3222, "step": 846000 }, { "epoch": 0.51, - "learning_rate": 5.24881729939629e-05, - "loss": 0.4453, + "learning_rate": 5.248806799568487e-05, + "loss": 0.3235, "step": 846500 }, { "epoch": 0.51, - "learning_rate": 5.2486073028402334e-05, - "loss": 0.4529, + "learning_rate": 5.2485968030124304e-05, + "loss": 0.3266, "step": 847000 }, { "epoch": 0.51, - "learning_rate": 5.248397726277289e-05, - "loss": 0.4461, + "learning_rate": 5.248386806456374e-05, + "loss": 0.3253, "step": 847500 }, { "epoch": 0.51, - "learning_rate": 5.248187729721232e-05, - "loss": 0.4478, + "learning_rate": 5.248176809900318e-05, + "loss": 0.3305, "step": 848000 }, { "epoch": 0.51, - "learning_rate": 5.247977733165176e-05, - "loss": 0.4356, + "learning_rate": 5.247966813344261e-05, + "loss": 0.3212, "step": 848500 }, { "epoch": 0.51, - "learning_rate": 5.2477677366091195e-05, - "loss": 0.4385, + "learning_rate": 5.2477568167882045e-05, + "loss": 0.3206, "step": 849000 }, { "epoch": 0.51, - "learning_rate": 5.247557740053063e-05, - "loss": 0.4217, + "learning_rate": 5.2475468202321485e-05, + "loss": 0.3138, "step": 849500 }, { "epoch": 0.51, - "learning_rate": 5.247347743497007e-05, - "loss": 0.4523, + "learning_rate": 5.247336823676092e-05, + "loss": 0.3256, "step": 850000 }, { "epoch": 0.51, - "learning_rate": 5.24713774694095e-05, - "loss": 0.4332, + "learning_rate": 5.247127247113147e-05, + "loss": 0.3231, "step": 850500 }, { "epoch": 0.51, - "learning_rate": 5.2469277503848936e-05, - "loss": 0.43, + "learning_rate": 5.2469176705502026e-05, + "loss": 0.3209, "step": 851000 }, { "epoch": 0.51, - "learning_rate": 5.2467181738219496e-05, - "loss": 0.4423, + "learning_rate": 5.2467076739941466e-05, + "loss": 0.33, "step": 851500 }, { "epoch": 0.51, - "learning_rate": 5.246508177265893e-05, - "loss": 0.4496, + "learning_rate": 5.24649767743809e-05, + "loss": 0.3226, "step": 852000 }, { "epoch": 0.51, - "learning_rate": 5.246298180709836e-05, - "loss": 0.4419, + "learning_rate": 5.246287680882033e-05, + "loss": 0.3185, "step": 852500 }, { "epoch": 0.51, - "learning_rate": 5.24608818415378e-05, - "loss": 0.4351, + "learning_rate": 5.246077684325977e-05, + "loss": 0.3155, "step": 853000 }, { "epoch": 0.51, - "learning_rate": 5.245878607590836e-05, - "loss": 0.4407, + "learning_rate": 5.245867687769921e-05, + "loss": 0.3231, "step": 853500 }, { "epoch": 0.51, - "learning_rate": 5.245668611034779e-05, - "loss": 0.447, + "learning_rate": 5.245657691213864e-05, + "loss": 0.3219, "step": 854000 }, { "epoch": 0.51, - "learning_rate": 5.2454586144787224e-05, - "loss": 0.4428, + "learning_rate": 5.245447694657808e-05, + "loss": 0.3242, "step": 854500 }, { "epoch": 0.51, - "learning_rate": 5.2452486179226664e-05, - "loss": 0.4479, + "learning_rate": 5.245237698101751e-05, + "loss": 0.3194, "step": 855000 }, { "epoch": 0.51, - "learning_rate": 5.24503862136661e-05, - "loss": 0.4365, + "learning_rate": 5.245027701545694e-05, + "loss": 0.3206, "step": 855500 }, { "epoch": 0.51, - "learning_rate": 5.244828624810553e-05, - "loss": 0.4379, + "learning_rate": 5.244817704989638e-05, + "loss": 0.3172, "step": 856000 }, { "epoch": 0.51, - "learning_rate": 5.2446190482476084e-05, - "loss": 0.4265, + "learning_rate": 5.244608128426694e-05, + "loss": 0.3131, "step": 856500 }, { "epoch": 0.51, - "learning_rate": 5.2444090516915525e-05, - "loss": 0.4365, + "learning_rate": 5.2443981318706375e-05, + "loss": 0.3163, "step": 857000 }, { "epoch": 0.51, - "learning_rate": 5.244199055135496e-05, - "loss": 0.4359, + "learning_rate": 5.244188135314581e-05, + "loss": 0.3212, "step": 857500 }, { "epoch": 0.51, - "learning_rate": 5.243989058579439e-05, - "loss": 0.4384, + "learning_rate": 5.243978138758524e-05, + "loss": 0.32, "step": 858000 }, { "epoch": 0.51, - "learning_rate": 5.243779062023383e-05, - "loss": 0.4293, + "learning_rate": 5.2437681422024675e-05, + "loss": 0.3149, "step": 858500 }, { "epoch": 0.52, - "learning_rate": 5.243569065467326e-05, - "loss": 0.441, + "learning_rate": 5.2435581456464115e-05, + "loss": 0.3208, "step": 859000 }, { "epoch": 0.52, - "learning_rate": 5.24335906891127e-05, - "loss": 0.4328, + "learning_rate": 5.2433485690834675e-05, + "loss": 0.319, "step": 859500 }, { "epoch": 0.52, - "learning_rate": 5.243149072355213e-05, - "loss": 0.4431, + "learning_rate": 5.24313857252741e-05, + "loss": 0.3204, "step": 860000 }, { "epoch": 0.52, - "learning_rate": 5.242939495792269e-05, - "loss": 0.4451, + "learning_rate": 5.242928995964466e-05, + "loss": 0.3247, "step": 860500 }, { "epoch": 0.52, - "learning_rate": 5.2427299192293246e-05, - "loss": 0.431, + "learning_rate": 5.2427189994084096e-05, + "loss": 0.3161, "step": 861000 }, { "epoch": 0.52, - "learning_rate": 5.242519922673268e-05, - "loss": 0.4271, + "learning_rate": 5.2425090028523536e-05, + "loss": 0.3186, "step": 861500 }, { "epoch": 0.52, - "learning_rate": 5.242309926117212e-05, - "loss": 0.4258, + "learning_rate": 5.242299006296297e-05, + "loss": 0.3158, "step": 862000 }, { "epoch": 0.52, - "learning_rate": 5.242099929561155e-05, - "loss": 0.4463, + "learning_rate": 5.2420890097402396e-05, + "loss": 0.3239, "step": 862500 }, { "epoch": 0.52, - "learning_rate": 5.241889933005099e-05, - "loss": 0.4352, + "learning_rate": 5.2418790131841837e-05, + "loss": 0.3235, "step": 863000 }, { "epoch": 0.52, - "learning_rate": 5.241679936449042e-05, - "loss": 0.4383, + "learning_rate": 5.24166943662124e-05, + "loss": 0.3205, "step": 863500 }, { "epoch": 0.52, - "learning_rate": 5.24147077987921e-05, - "loss": 0.4434, + "learning_rate": 5.241459440065183e-05, + "loss": 0.3247, "step": 864000 }, { "epoch": 0.52, - "learning_rate": 5.2412607833231534e-05, - "loss": 0.4448, + "learning_rate": 5.2412494435091264e-05, + "loss": 0.3225, "step": 864500 }, { "epoch": 0.52, - "learning_rate": 5.241050786767097e-05, - "loss": 0.4315, + "learning_rate": 5.24103944695307e-05, + "loss": 0.3165, "step": 865000 }, { "epoch": 0.52, - "learning_rate": 5.240840790211041e-05, - "loss": 0.4317, + "learning_rate": 5.240829450397013e-05, + "loss": 0.3205, "step": 865500 }, { "epoch": 0.52, - "learning_rate": 5.240630793654984e-05, - "loss": 0.4381, + "learning_rate": 5.240619453840957e-05, + "loss": 0.3207, "step": 866000 }, { "epoch": 0.52, - "learning_rate": 5.2404207970989275e-05, - "loss": 0.4474, + "learning_rate": 5.2404094572849004e-05, + "loss": 0.325, "step": 866500 }, { "epoch": 0.52, - "learning_rate": 5.2402108005428715e-05, - "loss": 0.4266, + "learning_rate": 5.240199460728844e-05, + "loss": 0.3127, "step": 867000 }, { "epoch": 0.52, - "learning_rate": 5.240000803986815e-05, - "loss": 0.4413, + "learning_rate": 5.239989464172788e-05, + "loss": 0.3184, "step": 867500 }, { "epoch": 0.52, - "learning_rate": 5.239790807430758e-05, - "loss": 0.4382, + "learning_rate": 5.239779887609843e-05, + "loss": 0.3203, "step": 868000 }, { "epoch": 0.52, - "learning_rate": 5.2395808108747015e-05, - "loss": 0.4455, + "learning_rate": 5.2395698910537865e-05, + "loss": 0.3219, "step": 868500 }, { "epoch": 0.52, - "learning_rate": 5.239370814318645e-05, - "loss": 0.4281, + "learning_rate": 5.23935989449773e-05, + "loss": 0.3139, "step": 869000 }, { "epoch": 0.52, - "learning_rate": 5.239161237755701e-05, - "loss": 0.4289, + "learning_rate": 5.239149897941674e-05, + "loss": 0.3155, "step": 869500 }, { "epoch": 0.52, - "learning_rate": 5.238951241199644e-05, - "loss": 0.4458, + "learning_rate": 5.238939901385617e-05, + "loss": 0.3219, "step": 870000 }, { "epoch": 0.52, - "learning_rate": 5.238741244643588e-05, - "loss": 0.4403, + "learning_rate": 5.2387303248226726e-05, + "loss": 0.3226, "step": 870500 }, { "epoch": 0.52, - "learning_rate": 5.238531248087531e-05, - "loss": 0.442, + "learning_rate": 5.238520328266616e-05, + "loss": 0.3251, "step": 871000 }, { "epoch": 0.52, - "learning_rate": 5.238321251531474e-05, - "loss": 0.4279, + "learning_rate": 5.238310751703672e-05, + "loss": 0.3197, "step": 871500 }, { "epoch": 0.52, - "learning_rate": 5.238111254975418e-05, - "loss": 0.4286, + "learning_rate": 5.238100755147615e-05, + "loss": 0.32, "step": 872000 }, { "epoch": 0.52, - "learning_rate": 5.2379016784124744e-05, - "loss": 0.4269, + "learning_rate": 5.237890758591559e-05, + "loss": 0.317, "step": 872500 }, { "epoch": 0.52, - "learning_rate": 5.237691681856417e-05, - "loss": 0.438, + "learning_rate": 5.237680762035503e-05, + "loss": 0.3206, "step": 873000 }, { "epoch": 0.52, - "learning_rate": 5.237481685300361e-05, - "loss": 0.4402, + "learning_rate": 5.237470765479446e-05, + "loss": 0.3251, "step": 873500 }, { "epoch": 0.52, - "learning_rate": 5.2372716887443044e-05, - "loss": 0.4438, + "learning_rate": 5.2372607689233894e-05, + "loss": 0.323, "step": 874000 }, { "epoch": 0.52, - "learning_rate": 5.237061692188248e-05, - "loss": 0.4337, + "learning_rate": 5.2370507723673334e-05, + "loss": 0.3244, "step": 874500 }, { "epoch": 0.52, - "learning_rate": 5.236851695632192e-05, - "loss": 0.4355, + "learning_rate": 5.236840775811277e-05, + "loss": 0.3184, "step": 875000 }, { "epoch": 0.52, - "learning_rate": 5.236641699076135e-05, - "loss": 0.4369, + "learning_rate": 5.23663077925522e-05, + "loss": 0.318, "step": 875500 }, { "epoch": 0.53, - "learning_rate": 5.2364317025200785e-05, - "loss": 0.4411, + "learning_rate": 5.2364212026922755e-05, + "loss": 0.3223, "step": 876000 }, { "epoch": 0.53, - "learning_rate": 5.236222125957134e-05, - "loss": 0.4394, + "learning_rate": 5.2362112061362195e-05, + "loss": 0.3206, "step": 876500 }, { "epoch": 0.53, - "learning_rate": 5.236012129401078e-05, - "loss": 0.4257, + "learning_rate": 5.236001209580163e-05, + "loss": 0.314, "step": 877000 }, { "epoch": 0.53, - "learning_rate": 5.235802552838134e-05, - "loss": 0.4427, + "learning_rate": 5.235791213024106e-05, + "loss": 0.3226, "step": 877500 }, { "epoch": 0.53, - "learning_rate": 5.2355925562820765e-05, - "loss": 0.4423, + "learning_rate": 5.23558121646805e-05, + "loss": 0.3239, "step": 878000 }, { "epoch": 0.53, - "learning_rate": 5.23538255972602e-05, - "loss": 0.448, + "learning_rate": 5.2353720598982176e-05, + "loss": 0.3267, "step": 878500 }, { "epoch": 0.53, - "learning_rate": 5.235172563169964e-05, - "loss": 0.4434, + "learning_rate": 5.235162063342161e-05, + "loss": 0.3231, "step": 879000 }, { "epoch": 0.53, - "learning_rate": 5.234962566613907e-05, - "loss": 0.4384, + "learning_rate": 5.234952066786104e-05, + "loss": 0.3279, "step": 879500 }, { "epoch": 0.53, - "learning_rate": 5.2347525700578506e-05, - "loss": 0.444, + "learning_rate": 5.234742070230048e-05, + "loss": 0.3226, "step": 880000 }, { "epoch": 0.53, - "learning_rate": 5.2345425735017946e-05, - "loss": 0.4334, + "learning_rate": 5.2345320736739916e-05, + "loss": 0.3159, "step": 880500 }, { "epoch": 0.53, - "learning_rate": 5.234332576945738e-05, - "loss": 0.4278, + "learning_rate": 5.2343224971110477e-05, + "loss": 0.3184, "step": 881000 }, { "epoch": 0.53, - "learning_rate": 5.234123000382793e-05, - "loss": 0.4432, + "learning_rate": 5.23411250055499e-05, + "loss": 0.3219, "step": 881500 }, { "epoch": 0.53, - "learning_rate": 5.2339134238198494e-05, - "loss": 0.439, + "learning_rate": 5.2339025039989344e-05, + "loss": 0.32, "step": 882000 }, { "epoch": 0.53, - "learning_rate": 5.2337034272637934e-05, - "loss": 0.4417, + "learning_rate": 5.2336929274359904e-05, + "loss": 0.3156, "step": 882500 }, { "epoch": 0.53, - "learning_rate": 5.233493430707736e-05, - "loss": 0.4345, + "learning_rate": 5.233482930879934e-05, + "loss": 0.3199, "step": 883000 }, { "epoch": 0.53, - "learning_rate": 5.2332834341516794e-05, - "loss": 0.4369, + "learning_rate": 5.233272934323877e-05, + "loss": 0.3205, "step": 883500 }, { "epoch": 0.53, - "learning_rate": 5.2330734375956234e-05, - "loss": 0.4323, + "learning_rate": 5.2330629377678204e-05, + "loss": 0.318, "step": 884000 }, { "epoch": 0.53, - "learning_rate": 5.232863441039567e-05, - "loss": 0.4316, + "learning_rate": 5.232852941211764e-05, + "loss": 0.3181, "step": 884500 }, { "epoch": 0.53, - "learning_rate": 5.23265344448351e-05, - "loss": 0.4399, + "learning_rate": 5.232642944655707e-05, + "loss": 0.3189, "step": 885000 }, { "epoch": 0.53, - "learning_rate": 5.232443447927454e-05, - "loss": 0.4341, + "learning_rate": 5.232433368092763e-05, + "loss": 0.3145, "step": 885500 }, { "epoch": 0.53, - "learning_rate": 5.2322342913576215e-05, - "loss": 0.435, + "learning_rate": 5.2322233715367065e-05, + "loss": 0.3129, "step": 886000 }, { "epoch": 0.53, - "learning_rate": 5.2320242948015655e-05, - "loss": 0.4418, + "learning_rate": 5.23201337498065e-05, + "loss": 0.3222, "step": 886500 }, { "epoch": 0.53, - "learning_rate": 5.231814298245509e-05, - "loss": 0.4267, + "learning_rate": 5.231803378424594e-05, + "loss": 0.3188, "step": 887000 }, { "epoch": 0.53, - "learning_rate": 5.231604301689452e-05, - "loss": 0.4392, + "learning_rate": 5.231593381868537e-05, + "loss": 0.3249, "step": 887500 }, { "epoch": 0.53, - "learning_rate": 5.2313943051333956e-05, - "loss": 0.444, + "learning_rate": 5.2313833853124806e-05, + "loss": 0.3163, "step": 888000 }, { "epoch": 0.53, - "learning_rate": 5.231184308577339e-05, - "loss": 0.4307, + "learning_rate": 5.2311733887564246e-05, + "loss": 0.3156, "step": 888500 }, { "epoch": 0.53, - "learning_rate": 5.230974312021283e-05, - "loss": 0.4339, + "learning_rate": 5.230963392200368e-05, + "loss": 0.316, "step": 889000 }, { "epoch": 0.53, - "learning_rate": 5.230764315465226e-05, - "loss": 0.4391, + "learning_rate": 5.230753395644311e-05, + "loss": 0.3189, "step": 889500 }, { "epoch": 0.53, - "learning_rate": 5.2305547389022816e-05, - "loss": 0.4305, + "learning_rate": 5.230543399088255e-05, + "loss": 0.3152, "step": 890000 }, { "epoch": 0.53, - "learning_rate": 5.230344742346225e-05, - "loss": 0.4453, + "learning_rate": 5.2303338225253107e-05, + "loss": 0.3225, "step": 890500 }, { "epoch": 0.53, - "learning_rate": 5.230134745790169e-05, - "loss": 0.4355, + "learning_rate": 5.230123825969254e-05, + "loss": 0.3169, "step": 891000 }, { "epoch": 0.53, - "learning_rate": 5.2299247492341124e-05, - "loss": 0.4408, + "learning_rate": 5.2299138294131973e-05, + "loss": 0.3194, "step": 891500 }, { "epoch": 0.53, - "learning_rate": 5.2297155926642804e-05, - "loss": 0.4354, + "learning_rate": 5.2297038328571414e-05, + "loss": 0.3181, "step": 892000 }, { "epoch": 0.54, - "learning_rate": 5.229505596108224e-05, - "loss": 0.443, + "learning_rate": 5.229493836301085e-05, + "loss": 0.3202, "step": 892500 }, { "epoch": 0.54, - "learning_rate": 5.229295599552167e-05, - "loss": 0.4412, + "learning_rate": 5.229283839745028e-05, + "loss": 0.3192, "step": 893000 }, { "epoch": 0.54, - "learning_rate": 5.229085602996111e-05, - "loss": 0.4339, + "learning_rate": 5.229073843188972e-05, + "loss": 0.3131, "step": 893500 }, { "epoch": 0.54, - "learning_rate": 5.2288756064400545e-05, - "loss": 0.4364, + "learning_rate": 5.228863846632915e-05, + "loss": 0.3185, "step": 894000 }, { "epoch": 0.54, - "learning_rate": 5.22866602987711e-05, - "loss": 0.4447, + "learning_rate": 5.228653850076858e-05, + "loss": 0.3267, "step": 894500 }, { "epoch": 0.54, - "learning_rate": 5.228456033321054e-05, - "loss": 0.4455, + "learning_rate": 5.228443853520802e-05, + "loss": 0.322, "step": 895000 }, { "epoch": 0.54, - "learning_rate": 5.228246036764997e-05, - "loss": 0.4344, + "learning_rate": 5.2282338569647455e-05, + "loss": 0.3216, "step": 895500 }, { "epoch": 0.54, - "learning_rate": 5.2280360402089405e-05, - "loss": 0.4252, + "learning_rate": 5.2280242804018015e-05, + "loss": 0.3121, "step": 896000 }, { "epoch": 0.54, - "learning_rate": 5.2278260436528846e-05, - "loss": 0.4314, + "learning_rate": 5.227814283845745e-05, + "loss": 0.3185, "step": 896500 }, { "epoch": 0.54, - "learning_rate": 5.227616047096827e-05, - "loss": 0.437, + "learning_rate": 5.227604287289688e-05, + "loss": 0.3203, "step": 897000 }, { "epoch": 0.54, - "learning_rate": 5.2274060505407706e-05, - "loss": 0.4309, + "learning_rate": 5.2273942907336315e-05, + "loss": 0.3218, "step": 897500 }, { "epoch": 0.54, - "learning_rate": 5.2271960539847146e-05, - "loss": 0.4412, + "learning_rate": 5.2271842941775756e-05, + "loss": 0.3215, "step": 898000 }, { "epoch": 0.54, - "learning_rate": 5.2269864774217706e-05, - "loss": 0.4329, + "learning_rate": 5.2269747176146316e-05, + "loss": 0.3233, "step": 898500 }, { "epoch": 0.54, - "learning_rate": 5.226776480865714e-05, - "loss": 0.4397, + "learning_rate": 5.226765141051687e-05, + "loss": 0.3237, "step": 899000 }, { "epoch": 0.54, - "learning_rate": 5.2265664843096566e-05, - "loss": 0.4336, + "learning_rate": 5.226555564488742e-05, + "loss": 0.3287, "step": 899500 }, { "epoch": 0.54, - "learning_rate": 5.226356907746713e-05, - "loss": 0.4423, + "learning_rate": 5.2263455679326857e-05, + "loss": 0.3241, "step": 900000 }, { "epoch": 0.54, - "eval_loss": 0.41384953260421753, - "eval_runtime": 1113.6597, - "eval_samples_per_second": 472.963, - "eval_steps_per_second": 78.827, + "eval_loss": 0.2881280183792114, + "eval_runtime": 1457.6063, + "eval_samples_per_second": 361.36, + "eval_steps_per_second": 60.227, "step": 900000 }, { "epoch": 0.54, - "learning_rate": 5.226146911190657e-05, - "loss": 0.4501, + "learning_rate": 5.226135571376629e-05, + "loss": 0.3225, "step": 900500 }, { "epoch": 0.54, - "learning_rate": 5.2259369146346e-05, - "loss": 0.4412, + "learning_rate": 5.225925574820573e-05, + "loss": 0.3167, "step": 901000 }, { "epoch": 0.54, - "learning_rate": 5.2257269180785434e-05, - "loss": 0.4312, + "learning_rate": 5.2257155782645164e-05, + "loss": 0.3186, "step": 901500 }, { "epoch": 0.54, - "learning_rate": 5.225516921522487e-05, - "loss": 0.4328, + "learning_rate": 5.22550558170846e-05, + "loss": 0.3203, "step": 902000 }, { "epoch": 0.54, - "learning_rate": 5.225307344959543e-05, - "loss": 0.4359, + "learning_rate": 5.225295585152404e-05, + "loss": 0.3126, "step": 902500 }, { "epoch": 0.54, - "learning_rate": 5.225097348403486e-05, - "loss": 0.4201, + "learning_rate": 5.225085588596347e-05, + "loss": 0.3093, "step": 903000 }, { "epoch": 0.54, - "learning_rate": 5.22488735184743e-05, - "loss": 0.4353, + "learning_rate": 5.2248755920402904e-05, + "loss": 0.3177, "step": 903500 }, { "epoch": 0.54, - "learning_rate": 5.224677355291373e-05, - "loss": 0.4411, + "learning_rate": 5.2246660154773465e-05, + "loss": 0.3165, "step": 904000 }, { "epoch": 0.54, - "learning_rate": 5.224467778728429e-05, - "loss": 0.4302, + "learning_rate": 5.22445601892129e-05, + "loss": 0.3156, "step": 904500 }, { "epoch": 0.54, - "learning_rate": 5.224257782172372e-05, - "loss": 0.4395, + "learning_rate": 5.224246022365233e-05, + "loss": 0.3164, "step": 905000 }, { "epoch": 0.54, - "learning_rate": 5.224047785616316e-05, - "loss": 0.428, + "learning_rate": 5.2240364458022885e-05, + "loss": 0.3179, "step": 905500 }, { "epoch": 0.54, - "learning_rate": 5.2238377890602596e-05, - "loss": 0.4344, + "learning_rate": 5.2238264492462325e-05, + "loss": 0.3209, "step": 906000 }, { "epoch": 0.54, - "learning_rate": 5.223627792504202e-05, - "loss": 0.4424, + "learning_rate": 5.223616452690176e-05, + "loss": 0.3196, "step": 906500 }, { "epoch": 0.54, - "learning_rate": 5.223417795948146e-05, - "loss": 0.4332, + "learning_rate": 5.223406456134119e-05, + "loss": 0.3225, "step": 907000 }, { "epoch": 0.54, - "learning_rate": 5.2232077993920896e-05, - "loss": 0.434, + "learning_rate": 5.223196459578063e-05, + "loss": 0.3176, "step": 907500 }, { "epoch": 0.54, - "learning_rate": 5.222997802836033e-05, - "loss": 0.4299, + "learning_rate": 5.2229864630220066e-05, + "loss": 0.3192, "step": 908000 }, { "epoch": 0.54, - "learning_rate": 5.22278822627309e-05, - "loss": 0.4345, + "learning_rate": 5.222776466465949e-05, + "loss": 0.315, "step": 908500 }, { "epoch": 0.54, - "learning_rate": 5.222578229717032e-05, - "loss": 0.4472, + "learning_rate": 5.222566469909893e-05, + "loss": 0.3209, "step": 909000 }, { "epoch": 0.55, - "learning_rate": 5.2223686531540884e-05, - "loss": 0.4365, + "learning_rate": 5.2223564733538366e-05, + "loss": 0.3115, "step": 909500 }, { "epoch": 0.55, - "learning_rate": 5.222158656598032e-05, - "loss": 0.4291, + "learning_rate": 5.222146476797781e-05, + "loss": 0.3159, "step": 910000 }, { "epoch": 0.55, - "learning_rate": 5.221948660041976e-05, - "loss": 0.4303, + "learning_rate": 5.221936480241724e-05, + "loss": 0.3172, "step": 910500 }, { "epoch": 0.55, - "learning_rate": 5.221738663485919e-05, - "loss": 0.4381, + "learning_rate": 5.2217264836856674e-05, + "loss": 0.3207, "step": 911000 }, { "epoch": 0.55, - "learning_rate": 5.221528666929862e-05, - "loss": 0.4342, + "learning_rate": 5.221516907122723e-05, + "loss": 0.3204, "step": 911500 }, { "epoch": 0.55, - "learning_rate": 5.221318670373806e-05, - "loss": 0.4281, + "learning_rate": 5.221307330559779e-05, + "loss": 0.3136, "step": 912000 }, { "epoch": 0.55, - "learning_rate": 5.221108673817749e-05, - "loss": 0.4367, + "learning_rate": 5.221097334003723e-05, + "loss": 0.3188, "step": 912500 }, { "epoch": 0.55, - "learning_rate": 5.2208986772616925e-05, - "loss": 0.4348, + "learning_rate": 5.2208873374476654e-05, + "loss": 0.3172, "step": 913000 }, { "epoch": 0.55, - "learning_rate": 5.220689100698748e-05, - "loss": 0.452, + "learning_rate": 5.220677340891609e-05, + "loss": 0.3236, "step": 913500 }, { "epoch": 0.55, - "learning_rate": 5.220479104142692e-05, - "loss": 0.432, + "learning_rate": 5.220467344335553e-05, + "loss": 0.3202, "step": 914000 }, { "epoch": 0.55, - "learning_rate": 5.220269107586635e-05, - "loss": 0.4419, + "learning_rate": 5.220257347779496e-05, + "loss": 0.32, "step": 914500 }, { "epoch": 0.55, - "learning_rate": 5.2200591110305785e-05, - "loss": 0.4372, + "learning_rate": 5.2200473512234395e-05, + "loss": 0.3214, "step": 915000 }, { "epoch": 0.55, - "learning_rate": 5.219849534467635e-05, - "loss": 0.438, + "learning_rate": 5.2198373546673835e-05, + "loss": 0.3159, "step": 915500 }, { "epoch": 0.55, - "learning_rate": 5.219639537911578e-05, - "loss": 0.4308, + "learning_rate": 5.219627358111327e-05, + "loss": 0.3108, "step": 916000 }, { "epoch": 0.55, - "learning_rate": 5.219429541355521e-05, - "loss": 0.4305, + "learning_rate": 5.21941736155527e-05, + "loss": 0.3189, "step": 916500 }, { "epoch": 0.55, - "learning_rate": 5.219219544799465e-05, - "loss": 0.4293, + "learning_rate": 5.219207364999214e-05, + "loss": 0.3143, "step": 917000 }, { "epoch": 0.55, - "learning_rate": 5.219009968236521e-05, - "loss": 0.4417, + "learning_rate": 5.2189973684431576e-05, + "loss": 0.322, "step": 917500 }, { "epoch": 0.55, - "learning_rate": 5.218799971680465e-05, - "loss": 0.4457, + "learning_rate": 5.218787791880213e-05, + "loss": 0.3211, "step": 918000 }, { "epoch": 0.55, - "learning_rate": 5.2185899751244073e-05, - "loss": 0.4206, + "learning_rate": 5.218577795324157e-05, + "loss": 0.3136, "step": 918500 }, { "epoch": 0.55, - "learning_rate": 5.2183799785683514e-05, - "loss": 0.438, + "learning_rate": 5.218368218761212e-05, + "loss": 0.3213, "step": 919000 }, { "epoch": 0.55, - "learning_rate": 5.2181704020054074e-05, - "loss": 0.4321, + "learning_rate": 5.218158222205156e-05, + "loss": 0.317, "step": 919500 }, { "epoch": 0.55, - "learning_rate": 5.217960405449351e-05, - "loss": 0.4331, + "learning_rate": 5.217948225649099e-05, + "loss": 0.3207, "step": 920000 }, { "epoch": 0.55, - "learning_rate": 5.217750408893294e-05, - "loss": 0.4373, + "learning_rate": 5.217738229093043e-05, + "loss": 0.3203, "step": 920500 }, { "epoch": 0.55, - "learning_rate": 5.2175404123372374e-05, - "loss": 0.4381, + "learning_rate": 5.2175290725232104e-05, + "loss": 0.3193, "step": 921000 }, { "epoch": 0.55, - "learning_rate": 5.217330415781181e-05, - "loss": 0.435, + "learning_rate": 5.2173190759671544e-05, + "loss": 0.3176, "step": 921500 }, { "epoch": 0.55, - "learning_rate": 5.217120419225125e-05, - "loss": 0.4228, + "learning_rate": 5.217109079411098e-05, + "loss": 0.3093, "step": 922000 }, { "epoch": 0.55, - "learning_rate": 5.216910422669068e-05, - "loss": 0.4324, + "learning_rate": 5.216899082855041e-05, + "loss": 0.3196, "step": 922500 }, { "epoch": 0.55, - "learning_rate": 5.216700846106124e-05, - "loss": 0.4335, + "learning_rate": 5.2166890862989845e-05, + "loss": 0.3148, "step": 923000 }, { "epoch": 0.55, - "learning_rate": 5.216490849550067e-05, - "loss": 0.4376, + "learning_rate": 5.216479089742928e-05, + "loss": 0.3262, "step": 923500 }, { "epoch": 0.55, - "learning_rate": 5.216280852994011e-05, - "loss": 0.4229, + "learning_rate": 5.216269093186872e-05, + "loss": 0.3146, "step": 924000 }, { "epoch": 0.55, - "learning_rate": 5.216070856437954e-05, - "loss": 0.4261, + "learning_rate": 5.216059096630815e-05, + "loss": 0.3134, "step": 924500 }, { "epoch": 0.55, - "learning_rate": 5.2158608598818976e-05, - "loss": 0.4385, + "learning_rate": 5.2158491000747585e-05, + "loss": 0.3196, "step": 925000 }, { "epoch": 0.55, - "learning_rate": 5.215651283318953e-05, - "loss": 0.4417, + "learning_rate": 5.215639523511814e-05, + "loss": 0.3192, "step": 925500 }, { "epoch": 0.56, - "learning_rate": 5.215441286762897e-05, - "loss": 0.4465, + "learning_rate": 5.215429526955758e-05, + "loss": 0.3251, "step": 926000 }, { "epoch": 0.56, - "learning_rate": 5.21523129020684e-05, - "loss": 0.4265, + "learning_rate": 5.215219530399701e-05, + "loss": 0.313, "step": 926500 }, { "epoch": 0.56, - "learning_rate": 5.215021713643896e-05, - "loss": 0.4352, + "learning_rate": 5.2150095338436446e-05, + "loss": 0.3165, "step": 927000 }, { "epoch": 0.56, - "learning_rate": 5.21481171708784e-05, - "loss": 0.4338, + "learning_rate": 5.2147999572807e-05, + "loss": 0.3179, "step": 927500 }, { "epoch": 0.56, - "learning_rate": 5.214601720531783e-05, - "loss": 0.4342, + "learning_rate": 5.214589960724644e-05, + "loss": 0.321, "step": 928000 }, { "epoch": 0.56, - "learning_rate": 5.2143917239757264e-05, - "loss": 0.4419, + "learning_rate": 5.214379964168587e-05, + "loss": 0.3255, "step": 928500 }, { "epoch": 0.56, - "learning_rate": 5.2141817274196704e-05, - "loss": 0.4385, + "learning_rate": 5.214169967612531e-05, + "loss": 0.3183, "step": 929000 }, { "epoch": 0.56, - "learning_rate": 5.2139721508567264e-05, - "loss": 0.4368, + "learning_rate": 5.213959971056475e-05, + "loss": 0.3181, "step": 929500 }, { "epoch": 0.56, - "learning_rate": 5.21376215430067e-05, - "loss": 0.4364, + "learning_rate": 5.213749974500418e-05, + "loss": 0.3193, "step": 930000 }, { "epoch": 0.56, - "learning_rate": 5.2135521577446124e-05, - "loss": 0.4367, + "learning_rate": 5.2135399779443614e-05, + "loss": 0.3176, "step": 930500 }, { "epoch": 0.56, - "learning_rate": 5.2133421611885565e-05, - "loss": 0.4379, + "learning_rate": 5.2133299813883054e-05, + "loss": 0.3234, "step": 931000 }, { "epoch": 0.56, - "learning_rate": 5.2131321646325e-05, - "loss": 0.4332, + "learning_rate": 5.213119984832249e-05, + "loss": 0.3171, "step": 931500 }, { "epoch": 0.56, - "learning_rate": 5.212922168076443e-05, - "loss": 0.437, + "learning_rate": 5.212909988276192e-05, + "loss": 0.3203, "step": 932000 }, { "epoch": 0.56, - "learning_rate": 5.212712171520387e-05, - "loss": 0.4465, + "learning_rate": 5.2127008317063595e-05, + "loss": 0.328, "step": 932500 }, { "epoch": 0.56, - "learning_rate": 5.2125021749643305e-05, - "loss": 0.4364, + "learning_rate": 5.2124908351503035e-05, + "loss": 0.3223, "step": 933000 }, { "epoch": 0.56, - "learning_rate": 5.212292178408274e-05, - "loss": 0.4251, + "learning_rate": 5.212280838594247e-05, + "loss": 0.3091, "step": 933500 }, { "epoch": 0.56, - "learning_rate": 5.212082181852218e-05, - "loss": 0.4339, + "learning_rate": 5.21207084203819e-05, + "loss": 0.3178, "step": 934000 }, { "epoch": 0.56, - "learning_rate": 5.211872185296161e-05, - "loss": 0.4384, + "learning_rate": 5.211860845482134e-05, + "loss": 0.3188, "step": 934500 }, { "epoch": 0.56, - "learning_rate": 5.2116621887401046e-05, - "loss": 0.4342, + "learning_rate": 5.2116508489260776e-05, + "loss": 0.3149, "step": 935000 }, { "epoch": 0.56, - "learning_rate": 5.21145261217716e-05, - "loss": 0.4378, + "learning_rate": 5.211440852370021e-05, + "loss": 0.3193, "step": 935500 }, { "epoch": 0.56, - "learning_rate": 5.211242615621104e-05, - "loss": 0.432, + "learning_rate": 5.211230855813965e-05, + "loss": 0.3173, "step": 936000 }, { "epoch": 0.56, - "learning_rate": 5.211032619065047e-05, - "loss": 0.4215, + "learning_rate": 5.211020859257908e-05, + "loss": 0.3103, "step": 936500 }, { "epoch": 0.56, - "learning_rate": 5.210822622508991e-05, - "loss": 0.4411, + "learning_rate": 5.2108112826949636e-05, + "loss": 0.3221, "step": 937000 }, { "epoch": 0.56, - "learning_rate": 5.210612625952935e-05, - "loss": 0.4399, + "learning_rate": 5.210601286138907e-05, + "loss": 0.3249, "step": 937500 }, { "epoch": 0.56, - "learning_rate": 5.210402629396878e-05, - "loss": 0.437, + "learning_rate": 5.210391289582851e-05, + "loss": 0.3149, "step": 938000 }, { "epoch": 0.56, - "learning_rate": 5.2101926328408214e-05, - "loss": 0.437, + "learning_rate": 5.2101812930267944e-05, + "loss": 0.3195, "step": 938500 }, { "epoch": 0.56, - "learning_rate": 5.2099830562778774e-05, - "loss": 0.4355, + "learning_rate": 5.2099721364569624e-05, + "loss": 0.3157, "step": 939000 }, { "epoch": 0.56, - "learning_rate": 5.209773059721821e-05, - "loss": 0.4399, + "learning_rate": 5.209762139900905e-05, + "loss": 0.3134, "step": 939500 }, { "epoch": 0.56, - "learning_rate": 5.209563063165764e-05, - "loss": 0.4376, + "learning_rate": 5.209552143344849e-05, + "loss": 0.323, "step": 940000 }, { "epoch": 0.56, - "learning_rate": 5.2093530666097075e-05, - "loss": 0.4273, + "learning_rate": 5.2093421467887924e-05, + "loss": 0.3121, "step": 940500 }, { "epoch": 0.56, - "learning_rate": 5.209143070053651e-05, - "loss": 0.4274, + "learning_rate": 5.209132150232736e-05, + "loss": 0.3184, "step": 941000 }, { "epoch": 0.56, - "learning_rate": 5.208933493490707e-05, - "loss": 0.4377, + "learning_rate": 5.208922573669792e-05, + "loss": 0.3199, "step": 941500 }, { "epoch": 0.56, - "learning_rate": 5.20872349693465e-05, - "loss": 0.4497, + "learning_rate": 5.208712577113735e-05, + "loss": 0.3276, "step": 942000 }, { "epoch": 0.57, - "learning_rate": 5.208513500378594e-05, - "loss": 0.4283, + "learning_rate": 5.2085025805576785e-05, + "loss": 0.3163, "step": 942500 }, { "epoch": 0.57, - "learning_rate": 5.208303503822537e-05, - "loss": 0.4406, + "learning_rate": 5.208292584001622e-05, + "loss": 0.315, "step": 943000 }, { "epoch": 0.57, - "learning_rate": 5.20809350726648e-05, - "loss": 0.423, + "learning_rate": 5.208082587445566e-05, + "loss": 0.3161, "step": 943500 }, { "epoch": 0.57, - "learning_rate": 5.207883510710424e-05, - "loss": 0.437, + "learning_rate": 5.207872590889509e-05, + "loss": 0.3254, "step": 944000 }, { "epoch": 0.57, - "learning_rate": 5.2076735141543676e-05, - "loss": 0.4338, + "learning_rate": 5.2076625943334526e-05, + "loss": 0.3158, "step": 944500 }, { "epoch": 0.57, - "learning_rate": 5.2074639375914236e-05, - "loss": 0.4248, + "learning_rate": 5.2074525977773966e-05, + "loss": 0.3167, "step": 945000 }, { "epoch": 0.57, - "learning_rate": 5.207253941035367e-05, - "loss": 0.4395, + "learning_rate": 5.20724260122134e-05, + "loss": 0.3213, "step": 945500 }, { "epoch": 0.57, - "learning_rate": 5.20704394447931e-05, - "loss": 0.4367, + "learning_rate": 5.207032604665283e-05, + "loss": 0.3212, "step": 946000 }, { "epoch": 0.57, - "learning_rate": 5.2068339479232537e-05, - "loss": 0.4402, + "learning_rate": 5.206822608109227e-05, + "loss": 0.3164, "step": 946500 }, { "epoch": 0.57, - "learning_rate": 5.206623951367198e-05, - "loss": 0.4373, + "learning_rate": 5.206613031546283e-05, + "loss": 0.3205, "step": 947000 }, { "epoch": 0.57, - "learning_rate": 5.206413954811141e-05, - "loss": 0.4383, + "learning_rate": 5.206403034990226e-05, + "loss": 0.3217, "step": 947500 }, { "epoch": 0.57, - "learning_rate": 5.2062043782481964e-05, - "loss": 0.4357, + "learning_rate": 5.2061934584272814e-05, + "loss": 0.3198, "step": 948000 }, { "epoch": 0.57, - "learning_rate": 5.20599438169214e-05, - "loss": 0.4258, + "learning_rate": 5.2059834618712254e-05, + "loss": 0.3113, "step": 948500 }, { "epoch": 0.57, - "learning_rate": 5.205784385136084e-05, - "loss": 0.4272, + "learning_rate": 5.205773465315169e-05, + "loss": 0.3134, "step": 949000 }, { "epoch": 0.57, - "learning_rate": 5.205574388580027e-05, - "loss": 0.4341, + "learning_rate": 5.205563468759112e-05, + "loss": 0.3171, "step": 949500 }, { "epoch": 0.57, - "learning_rate": 5.2053648120170825e-05, - "loss": 0.4331, + "learning_rate": 5.2053538921961674e-05, + "loss": 0.3171, "step": 950000 }, { "epoch": 0.57, - "learning_rate": 5.205154815461026e-05, - "loss": 0.4322, + "learning_rate": 5.2051438956401115e-05, + "loss": 0.3122, "step": 950500 }, { "epoch": 0.57, - "learning_rate": 5.20494481890497e-05, - "loss": 0.4364, + "learning_rate": 5.204933899084055e-05, + "loss": 0.3179, "step": 951000 }, { "epoch": 0.57, - "learning_rate": 5.204734822348913e-05, - "loss": 0.4312, + "learning_rate": 5.204723902527998e-05, + "loss": 0.3206, "step": 951500 }, { "epoch": 0.57, - "learning_rate": 5.2045248257928565e-05, - "loss": 0.4386, + "learning_rate": 5.204513905971942e-05, + "loss": 0.3144, "step": 952000 }, { "epoch": 0.57, - "learning_rate": 5.2043148292368005e-05, - "loss": 0.4341, + "learning_rate": 5.2043039094158855e-05, + "loss": 0.3161, "step": 952500 }, { "epoch": 0.57, - "learning_rate": 5.204104832680744e-05, - "loss": 0.4412, + "learning_rate": 5.204093912859829e-05, + "loss": 0.3208, "step": 953000 }, { "epoch": 0.57, - "learning_rate": 5.203894836124687e-05, - "loss": 0.4374, + "learning_rate": 5.203884336296885e-05, + "loss": 0.321, "step": 953500 }, { "epoch": 0.57, - "learning_rate": 5.203685259561743e-05, - "loss": 0.4242, + "learning_rate": 5.203674339740828e-05, + "loss": 0.3122, "step": 954000 }, { "epoch": 0.57, - "learning_rate": 5.2034752630056866e-05, - "loss": 0.4296, + "learning_rate": 5.2034643431847716e-05, + "loss": 0.3143, "step": 954500 }, { "epoch": 0.57, - "learning_rate": 5.20326526644963e-05, - "loss": 0.4266, + "learning_rate": 5.2032543466287156e-05, + "loss": 0.3162, "step": 955000 }, { "epoch": 0.57, - "learning_rate": 5.203055269893574e-05, - "loss": 0.4289, + "learning_rate": 5.203044350072659e-05, + "loss": 0.3071, "step": 955500 }, { "epoch": 0.57, - "learning_rate": 5.202845273337517e-05, - "loss": 0.4276, + "learning_rate": 5.202834773509714e-05, + "loss": 0.3168, "step": 956000 }, { "epoch": 0.57, - "learning_rate": 5.202635276781461e-05, - "loss": 0.441, + "learning_rate": 5.202624776953658e-05, + "loss": 0.3183, "step": 956500 }, { "epoch": 0.57, - "learning_rate": 5.202425280225405e-05, - "loss": 0.4311, + "learning_rate": 5.202414780397602e-05, + "loss": 0.3192, "step": 957000 }, { "epoch": 0.57, - "learning_rate": 5.202215283669348e-05, - "loss": 0.4326, + "learning_rate": 5.202204783841545e-05, + "loss": 0.3114, "step": 957500 }, { "epoch": 0.57, - "learning_rate": 5.2020057071064034e-05, - "loss": 0.431, + "learning_rate": 5.2019947872854884e-05, + "loss": 0.313, "step": 958000 }, { "epoch": 0.57, - "learning_rate": 5.201795710550347e-05, - "loss": 0.4346, + "learning_rate": 5.2017847907294324e-05, + "loss": 0.3179, "step": 958500 }, { "epoch": 0.57, - "learning_rate": 5.201585713994291e-05, - "loss": 0.4277, + "learning_rate": 5.201575214166488e-05, + "loss": 0.315, "step": 959000 }, { "epoch": 0.58, - "learning_rate": 5.201376137431346e-05, - "loss": 0.4418, + "learning_rate": 5.201365217610431e-05, + "loss": 0.3243, "step": 959500 }, { "epoch": 0.58, - "learning_rate": 5.2011661408752895e-05, - "loss": 0.4346, + "learning_rate": 5.2011552210543745e-05, + "loss": 0.3177, "step": 960000 }, { "epoch": 0.58, - "learning_rate": 5.200956144319233e-05, - "loss": 0.4239, + "learning_rate": 5.2009452244983185e-05, + "loss": 0.3108, "step": 960500 }, { "epoch": 0.58, - "learning_rate": 5.200746147763177e-05, - "loss": 0.4354, + "learning_rate": 5.200735227942262e-05, + "loss": 0.3222, "step": 961000 }, { "epoch": 0.58, - "learning_rate": 5.20053615120712e-05, - "loss": 0.426, + "learning_rate": 5.200525231386205e-05, + "loss": 0.3141, "step": 961500 }, { "epoch": 0.58, - "learning_rate": 5.200326154651064e-05, - "loss": 0.4412, + "learning_rate": 5.2003152348301485e-05, + "loss": 0.3237, "step": 962000 }, { "epoch": 0.58, - "learning_rate": 5.2001161580950076e-05, - "loss": 0.4387, + "learning_rate": 5.200105238274092e-05, + "loss": 0.3198, "step": 962500 }, { "epoch": 0.58, - "learning_rate": 5.19990616153895e-05, - "loss": 0.4223, + "learning_rate": 5.199895241718036e-05, + "loss": 0.3106, "step": 963000 }, { "epoch": 0.58, - "learning_rate": 5.199697004969118e-05, - "loss": 0.4381, + "learning_rate": 5.199685665155092e-05, + "loss": 0.3217, "step": 963500 }, { "epoch": 0.58, - "learning_rate": 5.1994870084130616e-05, - "loss": 0.4403, + "learning_rate": 5.1994756685990346e-05, + "loss": 0.3202, "step": 964000 }, { "epoch": 0.58, - "learning_rate": 5.1992770118570056e-05, - "loss": 0.4336, + "learning_rate": 5.199265672042978e-05, + "loss": 0.3145, "step": 964500 }, { "epoch": 0.58, - "learning_rate": 5.199067015300949e-05, - "loss": 0.4251, + "learning_rate": 5.199055675486922e-05, + "loss": 0.3106, "step": 965000 }, { "epoch": 0.58, - "learning_rate": 5.198857018744892e-05, - "loss": 0.4273, + "learning_rate": 5.198845678930865e-05, + "loss": 0.3159, "step": 965500 }, { "epoch": 0.58, - "learning_rate": 5.1986470221888364e-05, - "loss": 0.4305, + "learning_rate": 5.1986365223610334e-05, + "loss": 0.3174, "step": 966000 }, { "epoch": 0.58, - "learning_rate": 5.19843702563278e-05, - "loss": 0.4311, + "learning_rate": 5.198426525804977e-05, + "loss": 0.3155, "step": 966500 }, { "epoch": 0.58, - "learning_rate": 5.198227449069835e-05, - "loss": 0.4429, + "learning_rate": 5.19821652924892e-05, + "loss": 0.3246, "step": 967000 }, { "epoch": 0.58, - "learning_rate": 5.1980174525137784e-05, - "loss": 0.4323, + "learning_rate": 5.198006952685976e-05, + "loss": 0.3127, "step": 967500 }, { "epoch": 0.58, - "learning_rate": 5.1978074559577224e-05, - "loss": 0.4343, + "learning_rate": 5.1977969561299194e-05, + "loss": 0.3173, "step": 968000 }, { "epoch": 0.58, - "learning_rate": 5.197597459401666e-05, - "loss": 0.4308, + "learning_rate": 5.197586959573863e-05, + "loss": 0.3133, "step": 968500 }, { "epoch": 0.58, - "learning_rate": 5.19738746284561e-05, - "loss": 0.4362, + "learning_rate": 5.197376963017807e-05, + "loss": 0.3183, "step": 969000 }, { "epoch": 0.58, - "learning_rate": 5.197177886282665e-05, - "loss": 0.4365, + "learning_rate": 5.19716696646175e-05, + "loss": 0.3213, "step": 969500 }, { "epoch": 0.58, - "learning_rate": 5.1969678897266085e-05, - "loss": 0.4323, + "learning_rate": 5.1969569699056935e-05, + "loss": 0.3175, "step": 970000 }, { "epoch": 0.58, - "learning_rate": 5.196758313163664e-05, - "loss": 0.4396, + "learning_rate": 5.1967469733496375e-05, + "loss": 0.3268, "step": 970500 }, { "epoch": 0.58, - "learning_rate": 5.196548316607607e-05, - "loss": 0.434, + "learning_rate": 5.19653697679358e-05, + "loss": 0.3177, "step": 971000 }, { "epoch": 0.58, - "learning_rate": 5.196338320051551e-05, - "loss": 0.4144, + "learning_rate": 5.1963269802375235e-05, + "loss": 0.3098, "step": 971500 }, { "epoch": 0.58, - "learning_rate": 5.1961283234954946e-05, - "loss": 0.4275, + "learning_rate": 5.1961174036745796e-05, + "loss": 0.3135, "step": 972000 }, { "epoch": 0.58, - "learning_rate": 5.195918326939438e-05, - "loss": 0.4501, + "learning_rate": 5.1959074071185236e-05, + "loss": 0.3217, "step": 972500 }, { "epoch": 0.58, - "learning_rate": 5.195708330383382e-05, - "loss": 0.4291, + "learning_rate": 5.195697410562467e-05, + "loss": 0.3145, "step": 973000 }, { "epoch": 0.58, - "learning_rate": 5.195498333827325e-05, - "loss": 0.4297, + "learning_rate": 5.1954874140064096e-05, + "loss": 0.3152, "step": 973500 }, { "epoch": 0.58, - "learning_rate": 5.1952883372712686e-05, - "loss": 0.4263, + "learning_rate": 5.1952774174503536e-05, + "loss": 0.3128, "step": 974000 }, { "epoch": 0.58, - "learning_rate": 5.195078760708325e-05, - "loss": 0.4368, + "learning_rate": 5.1950678408874097e-05, + "loss": 0.3215, "step": 974500 }, { "epoch": 0.58, - "learning_rate": 5.194868764152268e-05, - "loss": 0.4304, + "learning_rate": 5.194857844331353e-05, + "loss": 0.3141, "step": 975000 }, { "epoch": 0.58, - "learning_rate": 5.1946587675962114e-05, - "loss": 0.4361, + "learning_rate": 5.194647847775297e-05, + "loss": 0.3213, "step": 975500 }, { "epoch": 0.59, - "learning_rate": 5.1944487710401554e-05, - "loss": 0.4307, + "learning_rate": 5.19443785121924e-05, + "loss": 0.311, "step": 976000 }, { "epoch": 0.59, - "learning_rate": 5.194238774484099e-05, - "loss": 0.4344, + "learning_rate": 5.194227854663183e-05, + "loss": 0.3137, "step": 976500 }, { "epoch": 0.59, - "learning_rate": 5.1940287779280414e-05, - "loss": 0.4342, + "learning_rate": 5.194017858107127e-05, + "loss": 0.3218, "step": 977000 }, { "epoch": 0.59, - "learning_rate": 5.1938187813719854e-05, - "loss": 0.433, + "learning_rate": 5.1938078615510704e-05, + "loss": 0.3156, "step": 977500 }, { "epoch": 0.59, - "learning_rate": 5.193608784815929e-05, - "loss": 0.4325, + "learning_rate": 5.193597864995014e-05, + "loss": 0.3214, "step": 978000 }, { "epoch": 0.59, - "learning_rate": 5.193398788259872e-05, - "loss": 0.4247, + "learning_rate": 5.193387868438958e-05, + "loss": 0.3106, "step": 978500 }, { "epoch": 0.59, - "learning_rate": 5.19318963169004e-05, - "loss": 0.4297, + "learning_rate": 5.193178291876013e-05, + "loss": 0.3125, "step": 979000 }, { "epoch": 0.59, - "learning_rate": 5.1929796351339835e-05, - "loss": 0.4373, + "learning_rate": 5.1929682953199565e-05, + "loss": 0.3204, "step": 979500 }, { "epoch": 0.59, - "learning_rate": 5.1927696385779275e-05, - "loss": 0.4279, + "learning_rate": 5.1927582987639e-05, + "loss": 0.3127, "step": 980000 }, { "epoch": 0.59, - "learning_rate": 5.192559642021871e-05, - "loss": 0.4307, + "learning_rate": 5.192548302207844e-05, + "loss": 0.3123, "step": 980500 }, { "epoch": 0.59, - "learning_rate": 5.192349645465814e-05, - "loss": 0.439, + "learning_rate": 5.192338725644899e-05, + "loss": 0.3205, "step": 981000 }, { "epoch": 0.59, - "learning_rate": 5.192139648909758e-05, - "loss": 0.4377, + "learning_rate": 5.1921287290888426e-05, + "loss": 0.3155, "step": 981500 }, { "epoch": 0.59, - "learning_rate": 5.191929652353701e-05, - "loss": 0.4369, + "learning_rate": 5.1919187325327866e-05, + "loss": 0.3169, "step": 982000 }, { "epoch": 0.59, - "learning_rate": 5.191719655797645e-05, - "loss": 0.4363, + "learning_rate": 5.19170873597673e-05, + "loss": 0.3176, "step": 982500 }, { "epoch": 0.59, - "learning_rate": 5.191510079234701e-05, - "loss": 0.4242, + "learning_rate": 5.191499159413785e-05, + "loss": 0.3186, "step": 983000 }, { "epoch": 0.59, - "learning_rate": 5.191300082678644e-05, - "loss": 0.4264, + "learning_rate": 5.1912891628577286e-05, + "loss": 0.3134, "step": 983500 }, { "epoch": 0.59, - "learning_rate": 5.191090086122588e-05, - "loss": 0.4293, + "learning_rate": 5.191079586294785e-05, + "loss": 0.3211, "step": 984000 }, { "epoch": 0.59, - "learning_rate": 5.190880089566531e-05, - "loss": 0.4302, + "learning_rate": 5.190869589738729e-05, + "loss": 0.3154, "step": 984500 }, { "epoch": 0.59, - "learning_rate": 5.190670513003587e-05, - "loss": 0.4386, + "learning_rate": 5.190659593182672e-05, + "loss": 0.3178, "step": 985000 }, { "epoch": 0.59, - "learning_rate": 5.1904605164475304e-05, - "loss": 0.4313, + "learning_rate": 5.190449596626615e-05, + "loss": 0.3165, "step": 985500 }, { "epoch": 0.59, - "learning_rate": 5.190250519891474e-05, - "loss": 0.4399, + "learning_rate": 5.190239600070559e-05, + "loss": 0.3192, "step": 986000 }, { "epoch": 0.59, - "learning_rate": 5.190040523335417e-05, - "loss": 0.4361, + "learning_rate": 5.190030023507615e-05, + "loss": 0.321, "step": 986500 }, { "epoch": 0.59, - "learning_rate": 5.189830946772473e-05, - "loss": 0.4386, + "learning_rate": 5.189820026951558e-05, + "loss": 0.32, "step": 987000 }, { "epoch": 0.59, - "learning_rate": 5.1896209502164165e-05, - "loss": 0.4256, + "learning_rate": 5.1896100303955015e-05, + "loss": 0.3086, "step": 987500 }, { "epoch": 0.59, - "learning_rate": 5.18941095366036e-05, - "loss": 0.4304, + "learning_rate": 5.189400033839445e-05, + "loss": 0.3134, "step": 988000 }, { "epoch": 0.59, - "learning_rate": 5.189200957104304e-05, - "loss": 0.4324, + "learning_rate": 5.189190037283388e-05, + "loss": 0.3164, "step": 988500 }, { "epoch": 0.59, - "learning_rate": 5.188991380541359e-05, - "loss": 0.4414, + "learning_rate": 5.188980040727332e-05, + "loss": 0.3171, "step": 989000 }, { "epoch": 0.59, - "learning_rate": 5.1887813839853025e-05, - "loss": 0.4335, + "learning_rate": 5.1887700441712755e-05, + "loss": 0.3129, "step": 989500 }, { "epoch": 0.59, - "learning_rate": 5.1885713874292466e-05, - "loss": 0.4296, + "learning_rate": 5.188560047615219e-05, + "loss": 0.3158, "step": 990000 }, { "epoch": 0.59, - "learning_rate": 5.18836139087319e-05, - "loss": 0.4324, + "learning_rate": 5.188350051059163e-05, + "loss": 0.3164, "step": 990500 }, { "epoch": 0.59, - "learning_rate": 5.188151814310245e-05, - "loss": 0.4354, + "learning_rate": 5.188140054503106e-05, + "loss": 0.3218, "step": 991000 }, { "epoch": 0.59, - "learning_rate": 5.1879418177541886e-05, - "loss": 0.4305, + "learning_rate": 5.1879300579470496e-05, + "loss": 0.3109, "step": 991500 }, { "epoch": 0.59, - "learning_rate": 5.1877318211981326e-05, - "loss": 0.4283, + "learning_rate": 5.1877200613909936e-05, + "loss": 0.314, "step": 992000 }, { "epoch": 0.6, - "learning_rate": 5.187522244635188e-05, - "loss": 0.4307, + "learning_rate": 5.187510484828049e-05, + "loss": 0.3153, "step": 992500 }, { "epoch": 0.6, - "learning_rate": 5.1873122480791313e-05, - "loss": 0.4306, + "learning_rate": 5.187300908265104e-05, + "loss": 0.3181, "step": 993000 }, { "epoch": 0.6, - "learning_rate": 5.187102251523075e-05, - "loss": 0.4329, + "learning_rate": 5.187090911709048e-05, + "loss": 0.3145, "step": 993500 }, { "epoch": 0.6, - "learning_rate": 5.186892254967019e-05, - "loss": 0.4255, + "learning_rate": 5.186880915152991e-05, + "loss": 0.3111, "step": 994000 }, { "epoch": 0.6, - "learning_rate": 5.186682258410962e-05, - "loss": 0.4252, + "learning_rate": 5.186670918596935e-05, + "loss": 0.3148, "step": 994500 }, { "epoch": 0.6, - "learning_rate": 5.1864722618549054e-05, - "loss": 0.4255, + "learning_rate": 5.1864609220408784e-05, + "loss": 0.3115, "step": 995000 }, { "epoch": 0.6, - "learning_rate": 5.1862626852919614e-05, - "loss": 0.4262, + "learning_rate": 5.1862517654710464e-05, + "loss": 0.3162, "step": 995500 }, { "epoch": 0.6, - "learning_rate": 5.186052688735905e-05, - "loss": 0.4298, + "learning_rate": 5.18604176891499e-05, + "loss": 0.3192, "step": 996000 }, { "epoch": 0.6, - "learning_rate": 5.185842692179848e-05, - "loss": 0.433, + "learning_rate": 5.185831772358934e-05, + "loss": 0.3137, "step": 996500 }, { "epoch": 0.6, - "learning_rate": 5.185632695623792e-05, - "loss": 0.4348, + "learning_rate": 5.185621775802877e-05, + "loss": 0.3114, "step": 997000 }, { "epoch": 0.6, - "learning_rate": 5.1854231190608475e-05, - "loss": 0.4303, + "learning_rate": 5.18541177924682e-05, + "loss": 0.3118, "step": 997500 }, { "epoch": 0.6, - "learning_rate": 5.185213122504791e-05, - "loss": 0.4285, + "learning_rate": 5.185201782690764e-05, + "loss": 0.3152, "step": 998000 }, { "epoch": 0.6, - "learning_rate": 5.185003125948734e-05, - "loss": 0.4397, + "learning_rate": 5.184991786134707e-05, + "loss": 0.3165, "step": 998500 }, { "epoch": 0.6, - "learning_rate": 5.184793129392678e-05, - "loss": 0.4273, + "learning_rate": 5.1847817895786505e-05, + "loss": 0.3139, "step": 999000 }, { "epoch": 0.6, - "learning_rate": 5.1845831328366216e-05, - "loss": 0.4321, + "learning_rate": 5.184572213015706e-05, + "loss": 0.3118, "step": 999500 }, { "epoch": 0.6, - "learning_rate": 5.184373136280565e-05, - "loss": 0.4303, + "learning_rate": 5.18436221645965e-05, + "loss": 0.3179, "step": 1000000 }, { "epoch": 0.6, - "eval_loss": 0.4063897132873535, - "eval_runtime": 1123.4797, - "eval_samples_per_second": 468.829, - "eval_steps_per_second": 78.138, + "eval_loss": 0.2849758267402649, + "eval_runtime": 1458.9307, + "eval_samples_per_second": 361.032, + "eval_steps_per_second": 60.172, "step": 1000000 }, { "epoch": 0.6, - "learning_rate": 5.184163139724509e-05, - "loss": 0.4253, + "learning_rate": 5.184152219903593e-05, + "loss": 0.3121, "step": 1000500 }, { "epoch": 0.6, - "learning_rate": 5.183953563161564e-05, - "loss": 0.4352, + "learning_rate": 5.1839422233475366e-05, + "loss": 0.3218, "step": 1001000 }, { "epoch": 0.6, - "learning_rate": 5.1837435666055076e-05, - "loss": 0.4338, + "learning_rate": 5.1837322267914806e-05, + "loss": 0.3122, "step": 1001500 }, { "epoch": 0.6, - "learning_rate": 5.183533570049451e-05, - "loss": 0.442, + "learning_rate": 5.183522230235424e-05, + "loss": 0.316, "step": 1002000 }, { "epoch": 0.6, - "learning_rate": 5.183323573493395e-05, - "loss": 0.4312, + "learning_rate": 5.183312653672479e-05, + "loss": 0.3163, "step": 1002500 }, { "epoch": 0.6, - "learning_rate": 5.1831135769373384e-05, - "loss": 0.4391, + "learning_rate": 5.1831026571164233e-05, + "loss": 0.3145, "step": 1003000 }, { "epoch": 0.6, - "learning_rate": 5.182903580381282e-05, - "loss": 0.4255, + "learning_rate": 5.182892660560367e-05, + "loss": 0.3097, "step": 1003500 }, { "epoch": 0.6, - "learning_rate": 5.182693583825225e-05, - "loss": 0.4237, + "learning_rate": 5.18268266400431e-05, + "loss": 0.3137, "step": 1004000 }, { "epoch": 0.6, - "learning_rate": 5.1824835872691684e-05, - "loss": 0.4345, + "learning_rate": 5.1824730874413654e-05, + "loss": 0.3163, "step": 1004500 }, { "epoch": 0.6, - "learning_rate": 5.1822740107062244e-05, - "loss": 0.4283, + "learning_rate": 5.1822630908853094e-05, + "loss": 0.3151, "step": 1005000 }, { "epoch": 0.6, - "learning_rate": 5.1820640141501685e-05, - "loss": 0.4265, + "learning_rate": 5.182053094329253e-05, + "loss": 0.3147, "step": 1005500 }, { "epoch": 0.6, - "learning_rate": 5.181854017594111e-05, - "loss": 0.4265, + "learning_rate": 5.181843097773196e-05, + "loss": 0.3123, "step": 1006000 }, { "epoch": 0.6, - "learning_rate": 5.1816440210380545e-05, - "loss": 0.4351, + "learning_rate": 5.181633521210252e-05, + "loss": 0.3127, "step": 1006500 }, { "epoch": 0.6, - "learning_rate": 5.1814348644682225e-05, - "loss": 0.4318, + "learning_rate": 5.1814235246541955e-05, + "loss": 0.3151, "step": 1007000 }, { "epoch": 0.6, - "learning_rate": 5.181224867912166e-05, - "loss": 0.4328, + "learning_rate": 5.181213528098139e-05, + "loss": 0.3167, "step": 1007500 }, { "epoch": 0.6, - "learning_rate": 5.18101487135611e-05, - "loss": 0.4298, + "learning_rate": 5.181003531542082e-05, + "loss": 0.3149, "step": 1008000 }, { "epoch": 0.6, - "learning_rate": 5.180804874800053e-05, - "loss": 0.4331, + "learning_rate": 5.180793534986026e-05, + "loss": 0.3133, "step": 1008500 }, { "epoch": 0.6, - "learning_rate": 5.1805948782439966e-05, - "loss": 0.4421, + "learning_rate": 5.1805835384299696e-05, + "loss": 0.3243, "step": 1009000 }, { "epoch": 0.61, - "learning_rate": 5.1803848816879406e-05, - "loss": 0.4348, + "learning_rate": 5.180373541873913e-05, + "loss": 0.3156, "step": 1009500 }, { "epoch": 0.61, - "learning_rate": 5.180174885131884e-05, - "loss": 0.4194, + "learning_rate": 5.180163965310969e-05, + "loss": 0.3135, "step": 1010000 }, { "epoch": 0.61, - "learning_rate": 5.179964888575827e-05, - "loss": 0.4284, + "learning_rate": 5.179953968754912e-05, + "loss": 0.3114, "step": 1010500 }, { "epoch": 0.61, - "learning_rate": 5.179755312012883e-05, - "loss": 0.4319, + "learning_rate": 5.1797439721988556e-05, + "loss": 0.3175, "step": 1011000 }, { "epoch": 0.61, - "learning_rate": 5.179545315456827e-05, - "loss": 0.4284, + "learning_rate": 5.1795339756427996e-05, + "loss": 0.3087, "step": 1011500 }, { "epoch": 0.61, - "learning_rate": 5.179335738893882e-05, - "loss": 0.4334, + "learning_rate": 5.179323979086743e-05, + "loss": 0.3127, "step": 1012000 }, { "epoch": 0.61, - "learning_rate": 5.1791257423378254e-05, - "loss": 0.4372, + "learning_rate": 5.1791144025237984e-05, + "loss": 0.3209, "step": 1012500 }, { "epoch": 0.61, - "learning_rate": 5.1789157457817694e-05, - "loss": 0.418, + "learning_rate": 5.178904405967742e-05, + "loss": 0.3122, "step": 1013000 }, { "epoch": 0.61, - "learning_rate": 5.178705749225713e-05, - "loss": 0.4341, + "learning_rate": 5.178694409411686e-05, + "loss": 0.3198, "step": 1013500 }, { "epoch": 0.61, - "learning_rate": 5.178495752669656e-05, - "loss": 0.4225, + "learning_rate": 5.178484412855629e-05, + "loss": 0.3149, "step": 1014000 }, { "epoch": 0.61, - "learning_rate": 5.1782857561136e-05, - "loss": 0.4301, + "learning_rate": 5.1782744162995724e-05, + "loss": 0.3099, "step": 1014500 }, { "epoch": 0.61, - "learning_rate": 5.1780757595575435e-05, - "loss": 0.4302, + "learning_rate": 5.178064839736628e-05, + "loss": 0.3154, "step": 1015000 }, { "epoch": 0.61, - "learning_rate": 5.177865763001486e-05, - "loss": 0.4269, + "learning_rate": 5.177854843180572e-05, + "loss": 0.3121, "step": 1015500 }, { "epoch": 0.61, - "learning_rate": 5.177656186438542e-05, - "loss": 0.4262, + "learning_rate": 5.177645266617628e-05, + "loss": 0.313, "step": 1016000 }, { "epoch": 0.61, - "learning_rate": 5.177446609875598e-05, - "loss": 0.4248, + "learning_rate": 5.1774352700615705e-05, + "loss": 0.3139, "step": 1016500 }, { "epoch": 0.61, - "learning_rate": 5.1772366133195415e-05, - "loss": 0.4266, + "learning_rate": 5.1772252735055145e-05, + "loss": 0.3102, "step": 1017000 }, { "epoch": 0.61, - "learning_rate": 5.177026616763485e-05, - "loss": 0.4268, + "learning_rate": 5.177015276949458e-05, + "loss": 0.3121, "step": 1017500 }, { "epoch": 0.61, - "learning_rate": 5.176816620207429e-05, - "loss": 0.4256, + "learning_rate": 5.176805280393401e-05, + "loss": 0.3102, "step": 1018000 }, { "epoch": 0.61, - "learning_rate": 5.176606623651372e-05, - "loss": 0.4295, + "learning_rate": 5.176595283837345e-05, + "loss": 0.3169, "step": 1018500 }, { "epoch": 0.61, - "learning_rate": 5.1763966270953156e-05, - "loss": 0.4351, + "learning_rate": 5.1763852872812886e-05, + "loss": 0.3161, "step": 1019000 }, { "epoch": 0.61, - "learning_rate": 5.1761866305392596e-05, - "loss": 0.4265, + "learning_rate": 5.176175290725232e-05, + "loss": 0.3148, "step": 1019500 }, { "epoch": 0.61, - "learning_rate": 5.175976633983202e-05, - "loss": 0.4316, + "learning_rate": 5.175965294169176e-05, + "loss": 0.3205, "step": 1020000 }, { "epoch": 0.61, - "learning_rate": 5.1757674774133703e-05, - "loss": 0.4287, + "learning_rate": 5.175755717606231e-05, + "loss": 0.3111, "step": 1020500 }, { "epoch": 0.61, - "learning_rate": 5.175557480857314e-05, - "loss": 0.4393, + "learning_rate": 5.1755457210501747e-05, + "loss": 0.3156, "step": 1021000 }, { "epoch": 0.61, - "learning_rate": 5.175347484301257e-05, - "loss": 0.4313, + "learning_rate": 5.175335724494118e-05, + "loss": 0.3183, "step": 1021500 }, { "epoch": 0.61, - "learning_rate": 5.175137487745201e-05, - "loss": 0.4253, + "learning_rate": 5.175125727938062e-05, + "loss": 0.3149, "step": 1022000 }, { "epoch": 0.61, - "learning_rate": 5.1749274911891444e-05, - "loss": 0.4292, + "learning_rate": 5.1749161513751174e-05, + "loss": 0.3101, "step": 1022500 }, { "epoch": 0.61, - "learning_rate": 5.174717494633088e-05, - "loss": 0.4342, + "learning_rate": 5.1747065748121734e-05, + "loss": 0.3195, "step": 1023000 }, { "epoch": 0.61, - "learning_rate": 5.174507498077032e-05, - "loss": 0.4202, + "learning_rate": 5.174496578256116e-05, + "loss": 0.3147, "step": 1023500 }, { "epoch": 0.61, - "learning_rate": 5.174297501520975e-05, - "loss": 0.4403, + "learning_rate": 5.17428658170006e-05, + "loss": 0.3173, "step": 1024000 }, { "epoch": 0.61, - "learning_rate": 5.1740879249580305e-05, - "loss": 0.4241, + "learning_rate": 5.1740765851440035e-05, + "loss": 0.3065, "step": 1024500 }, { "epoch": 0.61, - "learning_rate": 5.1738779284019745e-05, - "loss": 0.4406, + "learning_rate": 5.173866588587947e-05, + "loss": 0.319, "step": 1025000 }, { "epoch": 0.61, - "learning_rate": 5.173667931845918e-05, - "loss": 0.4216, + "learning_rate": 5.173656592031891e-05, + "loss": 0.3045, "step": 1025500 }, { "epoch": 0.62, - "learning_rate": 5.173457935289861e-05, - "loss": 0.4347, + "learning_rate": 5.173447015468946e-05, + "loss": 0.3135, "step": 1026000 }, { "epoch": 0.62, - "learning_rate": 5.1732483587269166e-05, - "loss": 0.4272, + "learning_rate": 5.1732370189128895e-05, + "loss": 0.3166, "step": 1026500 }, { "epoch": 0.62, - "learning_rate": 5.1730383621708606e-05, - "loss": 0.4202, + "learning_rate": 5.173027022356833e-05, + "loss": 0.3074, "step": 1027000 }, { "epoch": 0.62, - "learning_rate": 5.172828365614804e-05, - "loss": 0.4246, + "learning_rate": 5.172817025800777e-05, + "loss": 0.3127, "step": 1027500 }, { "epoch": 0.62, - "learning_rate": 5.172618369058747e-05, - "loss": 0.44, + "learning_rate": 5.17260702924472e-05, + "loss": 0.3207, "step": 1028000 }, { "epoch": 0.62, - "learning_rate": 5.172409212488915e-05, - "loss": 0.4282, + "learning_rate": 5.1723970326886636e-05, + "loss": 0.3115, "step": 1028500 }, { "epoch": 0.62, - "learning_rate": 5.172199215932859e-05, - "loss": 0.4367, + "learning_rate": 5.1721870361326076e-05, + "loss": 0.3144, "step": 1029000 }, { "epoch": 0.62, - "learning_rate": 5.171989639369915e-05, - "loss": 0.4466, + "learning_rate": 5.171977039576551e-05, + "loss": 0.323, "step": 1029500 }, { "epoch": 0.62, - "learning_rate": 5.171779642813858e-05, - "loss": 0.4307, + "learning_rate": 5.171767463013606e-05, + "loss": 0.3136, "step": 1030000 }, { "epoch": 0.62, - "learning_rate": 5.1715696462578014e-05, - "loss": 0.4184, + "learning_rate": 5.17155746645755e-05, + "loss": 0.3096, "step": 1030500 }, { "epoch": 0.62, - "learning_rate": 5.1713596497017454e-05, - "loss": 0.4456, + "learning_rate": 5.171347469901494e-05, + "loss": 0.3238, "step": 1031000 }, { "epoch": 0.62, - "learning_rate": 5.171149653145688e-05, - "loss": 0.4273, + "learning_rate": 5.171137473345437e-05, + "loss": 0.312, "step": 1031500 }, { "epoch": 0.62, - "learning_rate": 5.1709396565896314e-05, - "loss": 0.4226, + "learning_rate": 5.1709278967824924e-05, + "loss": 0.3137, "step": 1032000 }, { "epoch": 0.62, - "learning_rate": 5.1707296600335755e-05, - "loss": 0.4193, + "learning_rate": 5.1707179002264364e-05, + "loss": 0.3109, "step": 1032500 }, { "epoch": 0.62, - "learning_rate": 5.170519663477519e-05, - "loss": 0.4315, + "learning_rate": 5.17050790367038e-05, + "loss": 0.3175, "step": 1033000 }, { "epoch": 0.62, - "learning_rate": 5.170310086914574e-05, - "loss": 0.4436, + "learning_rate": 5.170297907114323e-05, + "loss": 0.318, "step": 1033500 }, { "epoch": 0.62, - "learning_rate": 5.1701000903585175e-05, - "loss": 0.4384, + "learning_rate": 5.170087910558267e-05, + "loss": 0.3165, "step": 1034000 }, { "epoch": 0.62, - "learning_rate": 5.1698900938024615e-05, - "loss": 0.4245, + "learning_rate": 5.1698779140022105e-05, + "loss": 0.309, "step": 1034500 }, { "epoch": 0.62, - "learning_rate": 5.169680097246405e-05, - "loss": 0.4325, + "learning_rate": 5.169667917446154e-05, + "loss": 0.3158, "step": 1035000 }, { "epoch": 0.62, - "learning_rate": 5.169470100690348e-05, - "loss": 0.4235, + "learning_rate": 5.169457920890098e-05, + "loss": 0.3115, "step": 1035500 }, { "epoch": 0.62, - "learning_rate": 5.169260104134292e-05, - "loss": 0.4347, + "learning_rate": 5.1692479243340405e-05, + "loss": 0.3124, "step": 1036000 }, { "epoch": 0.62, - "learning_rate": 5.1690501075782356e-05, - "loss": 0.4322, + "learning_rate": 5.1690383477710965e-05, + "loss": 0.3179, "step": 1036500 }, { "epoch": 0.62, - "learning_rate": 5.168840111022179e-05, - "loss": 0.4348, + "learning_rate": 5.168828771208152e-05, + "loss": 0.3165, "step": 1037000 }, { "epoch": 0.62, - "learning_rate": 5.168630534459235e-05, - "loss": 0.4265, + "learning_rate": 5.168618774652095e-05, + "loss": 0.3139, "step": 1037500 }, { "epoch": 0.62, - "learning_rate": 5.168420957896291e-05, - "loss": 0.4334, + "learning_rate": 5.168408778096039e-05, + "loss": 0.3198, "step": 1038000 }, { "epoch": 0.62, - "learning_rate": 5.168210961340234e-05, - "loss": 0.4261, + "learning_rate": 5.1681987815399826e-05, + "loss": 0.3141, "step": 1038500 }, { "epoch": 0.62, - "learning_rate": 5.168000964784177e-05, - "loss": 0.4183, + "learning_rate": 5.167989204977038e-05, + "loss": 0.3062, "step": 1039000 }, { "epoch": 0.62, - "learning_rate": 5.167790968228121e-05, - "loss": 0.4369, + "learning_rate": 5.167779208420982e-05, + "loss": 0.3174, "step": 1039500 }, { "epoch": 0.62, - "learning_rate": 5.1675809716720644e-05, - "loss": 0.4241, + "learning_rate": 5.1675692118649253e-05, + "loss": 0.3094, "step": 1040000 }, { "epoch": 0.62, - "learning_rate": 5.1673713951091204e-05, - "loss": 0.4242, + "learning_rate": 5.167359215308869e-05, + "loss": 0.321, "step": 1040500 }, { "epoch": 0.62, - "learning_rate": 5.167161398553063e-05, - "loss": 0.4239, + "learning_rate": 5.167149218752813e-05, + "loss": 0.3086, "step": 1041000 }, { "epoch": 0.62, - "learning_rate": 5.166951401997007e-05, - "loss": 0.4282, + "learning_rate": 5.166939222196756e-05, + "loss": 0.3156, "step": 1041500 }, { "epoch": 0.62, - "learning_rate": 5.1667414054409505e-05, - "loss": 0.4168, + "learning_rate": 5.1667292256406994e-05, + "loss": 0.3035, "step": 1042000 }, { "epoch": 0.63, - "learning_rate": 5.166531408884894e-05, - "loss": 0.4336, + "learning_rate": 5.1665192290846434e-05, + "loss": 0.3144, "step": 1042500 }, { "epoch": 0.63, - "learning_rate": 5.166321412328838e-05, - "loss": 0.4288, + "learning_rate": 5.166309232528587e-05, + "loss": 0.3157, "step": 1043000 }, { "epoch": 0.63, - "learning_rate": 5.166111415772781e-05, - "loss": 0.4299, + "learning_rate": 5.1660992359725294e-05, + "loss": 0.3184, "step": 1043500 }, { "epoch": 0.63, - "learning_rate": 5.1659014192167245e-05, - "loss": 0.4252, + "learning_rate": 5.1658892394164735e-05, + "loss": 0.3147, "step": 1044000 }, { "epoch": 0.63, - "learning_rate": 5.1656918426537806e-05, - "loss": 0.4207, + "learning_rate": 5.165679242860417e-05, + "loss": 0.3066, "step": 1044500 }, { "epoch": 0.63, - "learning_rate": 5.165481846097724e-05, - "loss": 0.4217, + "learning_rate": 5.165469666297473e-05, + "loss": 0.3092, "step": 1045000 }, { "epoch": 0.63, - "learning_rate": 5.165272269534779e-05, - "loss": 0.427, + "learning_rate": 5.165259669741416e-05, + "loss": 0.3107, "step": 1045500 }, { "epoch": 0.63, - "learning_rate": 5.1650622729787226e-05, - "loss": 0.431, + "learning_rate": 5.1650496731853595e-05, + "loss": 0.3141, "step": 1046000 }, { "epoch": 0.63, - "learning_rate": 5.1648522764226666e-05, - "loss": 0.4284, + "learning_rate": 5.164839676629303e-05, + "loss": 0.3151, "step": 1046500 }, { "epoch": 0.63, - "learning_rate": 5.16464227986661e-05, - "loss": 0.4316, + "learning_rate": 5.164629680073247e-05, + "loss": 0.3114, "step": 1047000 }, { "epoch": 0.63, - "learning_rate": 5.164432283310553e-05, - "loss": 0.4263, + "learning_rate": 5.164420103510303e-05, + "loss": 0.3158, "step": 1047500 }, { "epoch": 0.63, - "learning_rate": 5.1642222867544973e-05, - "loss": 0.4364, + "learning_rate": 5.1642101069542456e-05, + "loss": 0.3151, "step": 1048000 }, { "epoch": 0.63, - "learning_rate": 5.164012290198441e-05, - "loss": 0.4186, + "learning_rate": 5.164000110398189e-05, + "loss": 0.3056, "step": 1048500 }, { "epoch": 0.63, - "learning_rate": 5.163802713635496e-05, - "loss": 0.4335, + "learning_rate": 5.163790113842133e-05, + "loss": 0.3151, "step": 1049000 }, { "epoch": 0.63, - "learning_rate": 5.1635927170794394e-05, - "loss": 0.4167, + "learning_rate": 5.163580117286076e-05, + "loss": 0.3057, "step": 1049500 }, { "epoch": 0.63, - "learning_rate": 5.1633827205233834e-05, - "loss": 0.4219, + "learning_rate": 5.16337012073002e-05, + "loss": 0.3133, "step": 1050000 }, { "epoch": 0.63, - "learning_rate": 5.163172723967327e-05, - "loss": 0.4269, + "learning_rate": 5.163160124173964e-05, + "loss": 0.3158, "step": 1050500 }, { "epoch": 0.63, - "learning_rate": 5.16296272741127e-05, - "loss": 0.4346, + "learning_rate": 5.162950127617907e-05, + "loss": 0.3141, "step": 1051000 }, { "epoch": 0.63, - "learning_rate": 5.162752730855214e-05, - "loss": 0.4185, + "learning_rate": 5.1627405510549624e-05, + "loss": 0.314, "step": 1051500 }, { "epoch": 0.63, - "learning_rate": 5.1625427342991575e-05, - "loss": 0.4341, + "learning_rate": 5.162530554498906e-05, + "loss": 0.3164, "step": 1052000 }, { "epoch": 0.63, - "learning_rate": 5.1623327377431015e-05, - "loss": 0.4147, + "learning_rate": 5.16232055794285e-05, + "loss": 0.3042, "step": 1052500 }, { "epoch": 0.63, - "learning_rate": 5.162123161180157e-05, - "loss": 0.4327, + "learning_rate": 5.162110981379905e-05, + "loss": 0.3185, "step": 1053000 }, { "epoch": 0.63, - "learning_rate": 5.1619131646241e-05, - "loss": 0.426, + "learning_rate": 5.1619009848238485e-05, + "loss": 0.3116, "step": 1053500 }, { "epoch": 0.63, - "learning_rate": 5.161704008054268e-05, - "loss": 0.4319, + "learning_rate": 5.1616914082609045e-05, + "loss": 0.3116, "step": 1054000 }, { "epoch": 0.63, - "learning_rate": 5.1614940114982116e-05, - "loss": 0.4247, + "learning_rate": 5.1614814117048485e-05, + "loss": 0.3131, "step": 1054500 }, { "epoch": 0.63, - "learning_rate": 5.161284014942154e-05, - "loss": 0.4279, + "learning_rate": 5.161271415148792e-05, + "loss": 0.314, "step": 1055000 }, { "epoch": 0.63, - "learning_rate": 5.161074018386098e-05, - "loss": 0.4407, + "learning_rate": 5.1610614185927345e-05, + "loss": 0.3151, "step": 1055500 }, { "epoch": 0.63, - "learning_rate": 5.1608640218300416e-05, - "loss": 0.432, + "learning_rate": 5.1608514220366786e-05, + "loss": 0.3162, "step": 1056000 }, { "epoch": 0.63, - "learning_rate": 5.160654025273985e-05, - "loss": 0.4339, + "learning_rate": 5.160641425480622e-05, + "loss": 0.3154, "step": 1056500 }, { "epoch": 0.63, - "learning_rate": 5.160444028717929e-05, - "loss": 0.4185, + "learning_rate": 5.160431428924565e-05, + "loss": 0.3061, "step": 1057000 }, { "epoch": 0.63, - "learning_rate": 5.1602340321618723e-05, - "loss": 0.4159, + "learning_rate": 5.160221432368509e-05, + "loss": 0.3085, "step": 1057500 }, { "epoch": 0.63, - "learning_rate": 5.160024035605816e-05, - "loss": 0.4201, + "learning_rate": 5.1600114358124526e-05, + "loss": 0.3113, "step": 1058000 }, { "epoch": 0.63, - "learning_rate": 5.15981403904976e-05, - "loss": 0.4264, + "learning_rate": 5.159801439256396e-05, + "loss": 0.3115, "step": 1058500 }, { "epoch": 0.63, - "learning_rate": 5.159604042493703e-05, - "loss": 0.4204, + "learning_rate": 5.15959144270034e-05, + "loss": 0.3075, "step": 1059000 }, { "epoch": 0.64, - "learning_rate": 5.159394045937647e-05, - "loss": 0.4253, + "learning_rate": 5.1593814461442833e-05, + "loss": 0.3109, "step": 1059500 }, { "epoch": 0.64, - "learning_rate": 5.1591844693747024e-05, - "loss": 0.421, + "learning_rate": 5.159171449588227e-05, + "loss": 0.3151, "step": 1060000 }, { "epoch": 0.64, - "learning_rate": 5.158974472818646e-05, - "loss": 0.4313, + "learning_rate": 5.158961873025282e-05, + "loss": 0.3147, "step": 1060500 }, { "epoch": 0.64, - "learning_rate": 5.158764896255701e-05, - "loss": 0.4245, + "learning_rate": 5.158751876469226e-05, + "loss": 0.3101, "step": 1061000 }, { "epoch": 0.64, - "learning_rate": 5.1585548996996445e-05, - "loss": 0.4168, + "learning_rate": 5.1585422999062814e-05, + "loss": 0.3085, "step": 1061500 }, { "epoch": 0.64, - "learning_rate": 5.1583449031435885e-05, - "loss": 0.4276, + "learning_rate": 5.158332303350225e-05, + "loss": 0.3131, "step": 1062000 }, { "epoch": 0.64, - "learning_rate": 5.158134906587532e-05, - "loss": 0.4215, + "learning_rate": 5.158122306794169e-05, + "loss": 0.3121, "step": 1062500 }, { "epoch": 0.64, - "learning_rate": 5.157924910031475e-05, - "loss": 0.4236, + "learning_rate": 5.157912310238112e-05, + "loss": 0.3101, "step": 1063000 }, { "epoch": 0.64, - "learning_rate": 5.157714913475419e-05, - "loss": 0.4312, + "learning_rate": 5.1577027336751675e-05, + "loss": 0.3198, "step": 1063500 }, { "epoch": 0.64, - "learning_rate": 5.1575049169193626e-05, - "loss": 0.4229, + "learning_rate": 5.157492737119111e-05, + "loss": 0.3068, "step": 1064000 }, { "epoch": 0.64, - "learning_rate": 5.157294920363306e-05, - "loss": 0.4199, + "learning_rate": 5.157282740563055e-05, + "loss": 0.3135, "step": 1064500 }, { "epoch": 0.64, - "learning_rate": 5.157085343800362e-05, - "loss": 0.424, + "learning_rate": 5.157072744006998e-05, + "loss": 0.3133, "step": 1065000 }, { "epoch": 0.64, - "learning_rate": 5.156875767237417e-05, - "loss": 0.4309, + "learning_rate": 5.1568627474509416e-05, + "loss": 0.3125, "step": 1065500 }, { "epoch": 0.64, - "learning_rate": 5.156665770681361e-05, - "loss": 0.4326, + "learning_rate": 5.1566527508948856e-05, + "loss": 0.3112, "step": 1066000 }, { "epoch": 0.64, - "learning_rate": 5.156455774125304e-05, - "loss": 0.4249, + "learning_rate": 5.156442754338829e-05, + "loss": 0.3089, "step": 1066500 }, { "epoch": 0.64, - "learning_rate": 5.156245777569248e-05, - "loss": 0.4282, + "learning_rate": 5.156232757782772e-05, + "loss": 0.3158, "step": 1067000 }, { "epoch": 0.64, - "learning_rate": 5.1560357810131914e-05, - "loss": 0.4274, + "learning_rate": 5.156022761226716e-05, + "loss": 0.3141, "step": 1067500 }, { "epoch": 0.64, - "learning_rate": 5.155826204450247e-05, - "loss": 0.4345, + "learning_rate": 5.155812764670659e-05, + "loss": 0.3152, "step": 1068000 }, { "epoch": 0.64, - "learning_rate": 5.15561620789419e-05, - "loss": 0.4236, + "learning_rate": 5.155603188107715e-05, + "loss": 0.3103, "step": 1068500 }, { "epoch": 0.64, - "learning_rate": 5.155406211338134e-05, - "loss": 0.4273, + "learning_rate": 5.1553931915516584e-05, + "loss": 0.3163, "step": 1069000 }, { "epoch": 0.64, - "learning_rate": 5.1551962147820775e-05, - "loss": 0.4159, + "learning_rate": 5.1551831949956024e-05, + "loss": 0.3097, "step": 1069500 }, { "epoch": 0.64, - "learning_rate": 5.154986218226021e-05, - "loss": 0.4257, + "learning_rate": 5.154973198439546e-05, + "loss": 0.3117, "step": 1070000 }, { "epoch": 0.64, - "learning_rate": 5.154776641663076e-05, - "loss": 0.4326, + "learning_rate": 5.154763201883489e-05, + "loss": 0.3134, "step": 1070500 }, { "epoch": 0.64, - "learning_rate": 5.15456664510702e-05, - "loss": 0.4267, + "learning_rate": 5.1545532053274324e-05, + "loss": 0.3144, "step": 1071000 }, { "epoch": 0.64, - "learning_rate": 5.1543566485509635e-05, - "loss": 0.4296, + "learning_rate": 5.154343208771376e-05, + "loss": 0.3142, "step": 1071500 }, { "epoch": 0.64, - "learning_rate": 5.1541466519949075e-05, - "loss": 0.4308, + "learning_rate": 5.15413321221532e-05, + "loss": 0.3121, "step": 1072000 }, { "epoch": 0.64, - "learning_rate": 5.153936655438851e-05, - "loss": 0.4332, + "learning_rate": 5.153923215659263e-05, + "loss": 0.3127, "step": 1072500 }, { "epoch": 0.64, - "learning_rate": 5.153726658882794e-05, - "loss": 0.4193, + "learning_rate": 5.1537136390963185e-05, + "loss": 0.313, "step": 1073000 }, { "epoch": 0.64, - "learning_rate": 5.153516662326738e-05, - "loss": 0.4313, + "learning_rate": 5.153503642540262e-05, + "loss": 0.3164, "step": 1073500 }, { "epoch": 0.64, - "learning_rate": 5.1533070857637936e-05, - "loss": 0.4253, + "learning_rate": 5.153293645984206e-05, + "loss": 0.3088, "step": 1074000 }, { "epoch": 0.64, - "learning_rate": 5.153097089207737e-05, - "loss": 0.4305, + "learning_rate": 5.153083649428149e-05, + "loss": 0.3118, "step": 1074500 }, { "epoch": 0.64, - "learning_rate": 5.15288709265168e-05, - "loss": 0.4157, + "learning_rate": 5.1528736528720925e-05, + "loss": 0.3074, "step": 1075000 }, { "epoch": 0.64, - "learning_rate": 5.152677096095624e-05, - "loss": 0.4267, + "learning_rate": 5.1526636563160366e-05, + "loss": 0.3107, "step": 1075500 }, { "epoch": 0.65, - "learning_rate": 5.152467099539568e-05, - "loss": 0.4186, + "learning_rate": 5.152454079753092e-05, + "loss": 0.3087, "step": 1076000 }, { "epoch": 0.65, - "learning_rate": 5.152257522976623e-05, - "loss": 0.4297, + "learning_rate": 5.152244083197035e-05, + "loss": 0.3128, "step": 1076500 }, { "epoch": 0.65, - "learning_rate": 5.1520475264205664e-05, - "loss": 0.4335, + "learning_rate": 5.1520340866409786e-05, + "loss": 0.3179, "step": 1077000 }, { "epoch": 0.65, - "learning_rate": 5.1518375298645104e-05, - "loss": 0.4354, + "learning_rate": 5.1518240900849226e-05, + "loss": 0.3137, "step": 1077500 }, { "epoch": 0.65, - "learning_rate": 5.151627533308454e-05, - "loss": 0.4163, + "learning_rate": 5.151614093528866e-05, + "loss": 0.3038, "step": 1078000 }, { "epoch": 0.65, - "learning_rate": 5.151417536752397e-05, - "loss": 0.4302, + "learning_rate": 5.151404936959034e-05, + "loss": 0.3199, "step": 1078500 }, { "epoch": 0.65, - "learning_rate": 5.151207540196341e-05, - "loss": 0.4207, + "learning_rate": 5.1511949404029774e-05, + "loss": 0.3126, "step": 1079000 }, { "epoch": 0.65, - "learning_rate": 5.1509979636333965e-05, - "loss": 0.4325, + "learning_rate": 5.1509849438469214e-05, + "loss": 0.3148, "step": 1079500 }, { "epoch": 0.65, - "learning_rate": 5.15078796707734e-05, - "loss": 0.4284, + "learning_rate": 5.150774947290864e-05, + "loss": 0.3172, "step": 1080000 }, { "epoch": 0.65, - "learning_rate": 5.150577970521284e-05, - "loss": 0.4343, + "learning_rate": 5.15056537072792e-05, + "loss": 0.3156, "step": 1080500 }, { "epoch": 0.65, - "learning_rate": 5.150367973965227e-05, - "loss": 0.414, + "learning_rate": 5.1503553741718635e-05, + "loss": 0.3042, "step": 1081000 }, { "epoch": 0.65, - "learning_rate": 5.1501579774091705e-05, - "loss": 0.416, + "learning_rate": 5.1501453776158075e-05, + "loss": 0.3101, "step": 1081500 }, { "epoch": 0.65, - "learning_rate": 5.149947980853114e-05, - "loss": 0.438, + "learning_rate": 5.149935381059751e-05, + "loss": 0.3195, "step": 1082000 }, { "epoch": 0.65, - "learning_rate": 5.149737984297057e-05, - "loss": 0.4386, + "learning_rate": 5.1497253845036935e-05, + "loss": 0.3213, "step": 1082500 }, { "epoch": 0.65, - "learning_rate": 5.1495279877410006e-05, - "loss": 0.424, + "learning_rate": 5.1495153879476375e-05, + "loss": 0.3081, "step": 1083000 }, { "epoch": 0.65, - "learning_rate": 5.1493184111780566e-05, - "loss": 0.4227, + "learning_rate": 5.149305391391581e-05, + "loss": 0.3123, "step": 1083500 }, { "epoch": 0.65, - "learning_rate": 5.1491084146220006e-05, - "loss": 0.4279, + "learning_rate": 5.149095394835524e-05, + "loss": 0.312, "step": 1084000 }, { "epoch": 0.65, - "learning_rate": 5.148898418065943e-05, - "loss": 0.442, + "learning_rate": 5.148885398279468e-05, + "loss": 0.319, "step": 1084500 }, { "epoch": 0.65, - "learning_rate": 5.1486884215098867e-05, - "loss": 0.4321, + "learning_rate": 5.1486754017234116e-05, + "loss": 0.3142, "step": 1085000 }, { "epoch": 0.65, - "learning_rate": 5.148478844946943e-05, - "loss": 0.4257, + "learning_rate": 5.148465405167355e-05, + "loss": 0.312, "step": 1085500 }, { "epoch": 0.65, - "learning_rate": 5.148268848390887e-05, - "loss": 0.4245, + "learning_rate": 5.148255408611299e-05, + "loss": 0.3125, "step": 1086000 }, { "epoch": 0.65, - "learning_rate": 5.148059271827942e-05, - "loss": 0.4393, + "learning_rate": 5.148045412055242e-05, + "loss": 0.3194, "step": 1086500 }, { "epoch": 0.65, - "learning_rate": 5.1478492752718854e-05, - "loss": 0.418, + "learning_rate": 5.1478358354922977e-05, + "loss": 0.3063, "step": 1087000 }, { "epoch": 0.65, - "learning_rate": 5.1476392787158294e-05, - "loss": 0.4185, + "learning_rate": 5.147625838936242e-05, + "loss": 0.3133, "step": 1087500 }, { "epoch": 0.65, - "learning_rate": 5.147429282159773e-05, - "loss": 0.4312, + "learning_rate": 5.147415842380185e-05, + "loss": 0.3079, "step": 1088000 }, { "epoch": 0.65, - "learning_rate": 5.147219705596828e-05, - "loss": 0.4259, + "learning_rate": 5.1472058458241284e-05, + "loss": 0.3063, "step": 1088500 }, { "epoch": 0.65, - "learning_rate": 5.1470097090407715e-05, - "loss": 0.4187, + "learning_rate": 5.146996269261184e-05, + "loss": 0.3076, "step": 1089000 }, { "epoch": 0.65, - "learning_rate": 5.1467997124847155e-05, - "loss": 0.4306, + "learning_rate": 5.146786272705128e-05, + "loss": 0.3117, "step": 1089500 }, { "epoch": 0.65, - "learning_rate": 5.146589715928659e-05, - "loss": 0.4247, + "learning_rate": 5.146576276149071e-05, + "loss": 0.3105, "step": 1090000 }, { "epoch": 0.65, - "learning_rate": 5.146379719372602e-05, - "loss": 0.4307, + "learning_rate": 5.1463662795930144e-05, + "loss": 0.3145, "step": 1090500 }, { "epoch": 0.65, - "learning_rate": 5.1461701428096576e-05, - "loss": 0.4234, + "learning_rate": 5.1461562830369585e-05, + "loss": 0.3157, "step": 1091000 }, { "epoch": 0.65, - "learning_rate": 5.1459601462536016e-05, - "loss": 0.4291, + "learning_rate": 5.145946706474014e-05, + "loss": 0.3201, "step": 1091500 }, { "epoch": 0.65, - "learning_rate": 5.145750149697545e-05, - "loss": 0.4307, + "learning_rate": 5.145736709917957e-05, + "loss": 0.3155, "step": 1092000 }, { "epoch": 0.65, - "learning_rate": 5.145540153141488e-05, - "loss": 0.426, + "learning_rate": 5.1455271333550125e-05, + "loss": 0.3092, "step": 1092500 }, { "epoch": 0.66, - "learning_rate": 5.145330156585432e-05, - "loss": 0.4222, + "learning_rate": 5.1453171367989565e-05, + "loss": 0.3105, "step": 1093000 }, { "epoch": 0.66, - "learning_rate": 5.1451201600293756e-05, - "loss": 0.4234, + "learning_rate": 5.1451071402429e-05, + "loss": 0.3106, "step": 1093500 }, { "epoch": 0.66, - "learning_rate": 5.144910583466431e-05, - "loss": 0.435, + "learning_rate": 5.144897143686843e-05, + "loss": 0.32, "step": 1094000 }, { "epoch": 0.66, - "learning_rate": 5.144700586910375e-05, - "loss": 0.4268, + "learning_rate": 5.144687147130787e-05, + "loss": 0.3106, "step": 1094500 }, { "epoch": 0.66, - "learning_rate": 5.1444905903543184e-05, - "loss": 0.4227, + "learning_rate": 5.1444771505747306e-05, + "loss": 0.313, "step": 1095000 }, { "epoch": 0.66, - "learning_rate": 5.144280593798262e-05, - "loss": 0.4119, + "learning_rate": 5.144267154018674e-05, + "loss": 0.3027, "step": 1095500 }, { "epoch": 0.66, - "learning_rate": 5.144070597242206e-05, - "loss": 0.4323, + "learning_rate": 5.144057157462618e-05, + "loss": 0.3123, "step": 1096000 }, { "epoch": 0.66, - "learning_rate": 5.1438606006861484e-05, - "loss": 0.4235, + "learning_rate": 5.143847160906561e-05, + "loss": 0.311, "step": 1096500 }, { "epoch": 0.66, - "learning_rate": 5.1436510241232044e-05, - "loss": 0.4196, + "learning_rate": 5.143637584343617e-05, + "loss": 0.3055, "step": 1097000 }, { "epoch": 0.66, - "learning_rate": 5.143441027567148e-05, - "loss": 0.4246, + "learning_rate": 5.14342758778756e-05, + "loss": 0.3142, "step": 1097500 }, { "epoch": 0.66, - "learning_rate": 5.143231031011092e-05, - "loss": 0.4261, + "learning_rate": 5.1432180112246154e-05, + "loss": 0.3105, "step": 1098000 }, { "epoch": 0.66, - "learning_rate": 5.143021034455035e-05, - "loss": 0.426, + "learning_rate": 5.1430084346616714e-05, + "loss": 0.3139, "step": 1098500 }, { "epoch": 0.66, - "learning_rate": 5.142811037898978e-05, - "loss": 0.4259, + "learning_rate": 5.142798438105615e-05, + "loss": 0.3176, "step": 1099000 }, { "epoch": 0.66, - "learning_rate": 5.142601461336034e-05, - "loss": 0.42, + "learning_rate": 5.142588441549558e-05, + "loss": 0.3127, "step": 1099500 }, { "epoch": 0.66, - "learning_rate": 5.142391464779978e-05, - "loss": 0.4198, + "learning_rate": 5.142378444993502e-05, + "loss": 0.312, "step": 1100000 }, { "epoch": 0.66, - "eval_loss": 0.40232425928115845, - "eval_runtime": 1114.6441, - "eval_samples_per_second": 472.545, - "eval_steps_per_second": 78.758, + "eval_loss": 0.28178706765174866, + "eval_runtime": 1457.3807, + "eval_samples_per_second": 361.416, + "eval_steps_per_second": 60.236, "step": 1100000 }, { "epoch": 0.66, - "learning_rate": 5.142181468223921e-05, - "loss": 0.4262, + "learning_rate": 5.1421684484374455e-05, + "loss": 0.3156, "step": 1100500 }, { "epoch": 0.66, - "learning_rate": 5.1419714716678646e-05, - "loss": 0.4156, + "learning_rate": 5.141958451881389e-05, + "loss": 0.3103, "step": 1101000 }, { "epoch": 0.66, - "learning_rate": 5.1417618951049206e-05, - "loss": 0.4152, + "learning_rate": 5.141748455325333e-05, + "loss": 0.3054, "step": 1101500 }, { "epoch": 0.66, - "learning_rate": 5.141551898548864e-05, - "loss": 0.4241, + "learning_rate": 5.141538458769276e-05, + "loss": 0.3063, "step": 1102000 }, { "epoch": 0.66, - "learning_rate": 5.141341901992807e-05, - "loss": 0.4178, + "learning_rate": 5.1413284622132195e-05, + "loss": 0.3144, "step": 1102500 }, { "epoch": 0.66, - "learning_rate": 5.141131905436751e-05, - "loss": 0.427, + "learning_rate": 5.1411184656571636e-05, + "loss": 0.313, "step": 1103000 }, { "epoch": 0.66, - "learning_rate": 5.140921908880694e-05, - "loss": 0.4144, + "learning_rate": 5.140908469101107e-05, + "loss": 0.3056, "step": 1103500 }, { "epoch": 0.66, - "learning_rate": 5.1407119123246373e-05, - "loss": 0.4336, + "learning_rate": 5.140698892538162e-05, + "loss": 0.3184, "step": 1104000 }, { "epoch": 0.66, - "learning_rate": 5.1405019157685814e-05, - "loss": 0.425, + "learning_rate": 5.1404888959821056e-05, + "loss": 0.307, "step": 1104500 }, { "epoch": 0.66, - "learning_rate": 5.140291919212525e-05, - "loss": 0.4254, + "learning_rate": 5.1402788994260496e-05, + "loss": 0.3122, "step": 1105000 }, { "epoch": 0.66, - "learning_rate": 5.140082342649581e-05, - "loss": 0.4198, + "learning_rate": 5.140068902869993e-05, + "loss": 0.3075, "step": 1105500 }, { "epoch": 0.66, - "learning_rate": 5.1398723460935234e-05, - "loss": 0.4296, + "learning_rate": 5.139858906313936e-05, + "loss": 0.3173, "step": 1106000 }, { "epoch": 0.66, - "learning_rate": 5.1396623495374674e-05, - "loss": 0.4291, + "learning_rate": 5.1396489097578804e-05, + "loss": 0.3109, "step": 1106500 }, { "epoch": 0.66, - "learning_rate": 5.139452352981411e-05, - "loss": 0.4133, + "learning_rate": 5.139438913201823e-05, + "loss": 0.309, "step": 1107000 }, { "epoch": 0.66, - "learning_rate": 5.139242776418467e-05, - "loss": 0.4294, + "learning_rate": 5.139228916645767e-05, + "loss": 0.3131, "step": 1107500 }, { "epoch": 0.66, - "learning_rate": 5.139033199855522e-05, - "loss": 0.4264, + "learning_rate": 5.139019340082823e-05, + "loss": 0.31, "step": 1108000 }, { "epoch": 0.66, - "learning_rate": 5.138823203299466e-05, - "loss": 0.4242, + "learning_rate": 5.1388093435267664e-05, + "loss": 0.3117, "step": 1108500 }, { "epoch": 0.66, - "learning_rate": 5.1386132067434095e-05, - "loss": 0.4334, + "learning_rate": 5.13859934697071e-05, + "loss": 0.3172, "step": 1109000 }, { "epoch": 0.67, - "learning_rate": 5.138403210187353e-05, - "loss": 0.4201, + "learning_rate": 5.138389350414653e-05, + "loss": 0.306, "step": 1109500 }, { "epoch": 0.67, - "learning_rate": 5.138193213631297e-05, - "loss": 0.4229, + "learning_rate": 5.138179773851709e-05, + "loss": 0.3107, "step": 1110000 }, { "epoch": 0.67, - "learning_rate": 5.1379832170752396e-05, - "loss": 0.4126, + "learning_rate": 5.1379697772956525e-05, + "loss": 0.3065, "step": 1110500 }, { "epoch": 0.67, - "learning_rate": 5.137773220519183e-05, - "loss": 0.4253, + "learning_rate": 5.137759780739596e-05, + "loss": 0.3185, "step": 1111000 }, { "epoch": 0.67, - "learning_rate": 5.137563643956239e-05, - "loss": 0.424, + "learning_rate": 5.137549784183539e-05, + "loss": 0.3121, "step": 1111500 }, { "epoch": 0.67, - "learning_rate": 5.137353647400183e-05, - "loss": 0.4263, + "learning_rate": 5.1373397876274825e-05, + "loss": 0.3128, "step": 1112000 }, { "epoch": 0.67, - "learning_rate": 5.137143650844126e-05, - "loss": 0.4231, + "learning_rate": 5.137129791071426e-05, + "loss": 0.3061, "step": 1112500 }, { "epoch": 0.67, - "learning_rate": 5.136934074281182e-05, - "loss": 0.4317, + "learning_rate": 5.13691979451537e-05, + "loss": 0.3191, "step": 1113000 }, { "epoch": 0.67, - "learning_rate": 5.136724077725125e-05, - "loss": 0.4292, + "learning_rate": 5.136709797959313e-05, + "loss": 0.3084, "step": 1113500 }, { "epoch": 0.67, - "learning_rate": 5.136514081169069e-05, - "loss": 0.4216, + "learning_rate": 5.1365002213963686e-05, + "loss": 0.3117, "step": 1114000 }, { "epoch": 0.67, - "learning_rate": 5.1363040846130124e-05, - "loss": 0.416, + "learning_rate": 5.1362906448334246e-05, + "loss": 0.3077, "step": 1114500 }, { "epoch": 0.67, - "learning_rate": 5.136094088056956e-05, - "loss": 0.4285, + "learning_rate": 5.136080648277369e-05, + "loss": 0.3097, "step": 1115000 }, { "epoch": 0.67, - "learning_rate": 5.135884091500899e-05, - "loss": 0.4246, + "learning_rate": 5.135870651721312e-05, + "loss": 0.3114, "step": 1115500 }, { "epoch": 0.67, - "learning_rate": 5.1356740949448424e-05, - "loss": 0.4106, + "learning_rate": 5.1356606551652554e-05, + "loss": 0.3119, "step": 1116000 }, { "epoch": 0.67, - "learning_rate": 5.1354640983887865e-05, - "loss": 0.4361, + "learning_rate": 5.135450658609199e-05, + "loss": 0.3144, "step": 1116500 }, { "epoch": 0.67, - "learning_rate": 5.13525410183273e-05, - "loss": 0.4286, + "learning_rate": 5.135240662053142e-05, + "loss": 0.3177, "step": 1117000 }, { "epoch": 0.67, - "learning_rate": 5.135044525269786e-05, - "loss": 0.4185, + "learning_rate": 5.1350306654970854e-05, + "loss": 0.3086, "step": 1117500 }, { "epoch": 0.67, - "learning_rate": 5.1348345287137285e-05, - "loss": 0.4226, + "learning_rate": 5.1348206689410294e-05, + "loss": 0.3158, "step": 1118000 }, { "epoch": 0.67, - "learning_rate": 5.1346245321576725e-05, - "loss": 0.423, + "learning_rate": 5.134610672384973e-05, + "loss": 0.31, "step": 1118500 }, { "epoch": 0.67, - "learning_rate": 5.134414535601616e-05, - "loss": 0.4265, + "learning_rate": 5.134401095822028e-05, + "loss": 0.3098, "step": 1119000 }, { "epoch": 0.67, - "learning_rate": 5.134204539045559e-05, - "loss": 0.4235, + "learning_rate": 5.1341910992659715e-05, + "loss": 0.3103, "step": 1119500 }, { "epoch": 0.67, - "learning_rate": 5.1339949624826146e-05, - "loss": 0.4245, + "learning_rate": 5.1339815227030275e-05, + "loss": 0.3075, "step": 1120000 }, { "epoch": 0.67, - "learning_rate": 5.1337849659265586e-05, - "loss": 0.4301, + "learning_rate": 5.1337715261469715e-05, + "loss": 0.3166, "step": 1120500 }, { "epoch": 0.67, - "learning_rate": 5.1335753893636146e-05, - "loss": 0.4318, + "learning_rate": 5.133561949584027e-05, + "loss": 0.3116, "step": 1121000 }, { "epoch": 0.67, - "learning_rate": 5.133365392807558e-05, - "loss": 0.4159, + "learning_rate": 5.13335195302797e-05, + "loss": 0.3125, "step": 1121500 }, { "epoch": 0.67, - "learning_rate": 5.1331553962515013e-05, - "loss": 0.4385, + "learning_rate": 5.133141956471914e-05, + "loss": 0.3135, "step": 1122000 }, { "epoch": 0.67, - "learning_rate": 5.132945399695445e-05, - "loss": 0.4205, + "learning_rate": 5.1329319599158576e-05, + "loss": 0.3099, "step": 1122500 }, { "epoch": 0.67, - "learning_rate": 5.132735403139388e-05, - "loss": 0.4231, + "learning_rate": 5.132721963359801e-05, + "loss": 0.3127, "step": 1123000 }, { "epoch": 0.67, - "learning_rate": 5.132525406583332e-05, - "loss": 0.4233, + "learning_rate": 5.132511966803744e-05, + "loss": 0.3086, "step": 1123500 }, { "epoch": 0.67, - "learning_rate": 5.1323154100272754e-05, - "loss": 0.4202, + "learning_rate": 5.1323019702476876e-05, + "loss": 0.3163, "step": 1124000 }, { "epoch": 0.67, - "learning_rate": 5.132105413471219e-05, - "loss": 0.4294, + "learning_rate": 5.132091973691631e-05, + "loss": 0.315, "step": 1124500 }, { "epoch": 0.67, - "learning_rate": 5.131895416915163e-05, - "loss": 0.422, + "learning_rate": 5.131881977135575e-05, + "loss": 0.3104, "step": 1125000 }, { "epoch": 0.67, - "learning_rate": 5.131685420359106e-05, - "loss": 0.4174, + "learning_rate": 5.1316719805795184e-05, + "loss": 0.3093, "step": 1125500 }, { "epoch": 0.68, - "learning_rate": 5.1314754238030495e-05, - "loss": 0.4222, + "learning_rate": 5.131461984023462e-05, + "loss": 0.3106, "step": 1126000 }, { "epoch": 0.68, - "learning_rate": 5.131265847240105e-05, - "loss": 0.413, + "learning_rate": 5.131251987467406e-05, + "loss": 0.3104, "step": 1126500 }, { "epoch": 0.68, - "learning_rate": 5.131056270677161e-05, - "loss": 0.4288, + "learning_rate": 5.131042410904461e-05, + "loss": 0.3086, "step": 1127000 }, { "epoch": 0.68, - "learning_rate": 5.130846274121104e-05, - "loss": 0.4288, + "learning_rate": 5.1308324143484044e-05, + "loss": 0.3109, "step": 1127500 }, { "epoch": 0.68, - "learning_rate": 5.1306362775650475e-05, - "loss": 0.4215, + "learning_rate": 5.1306228377854605e-05, + "loss": 0.3108, "step": 1128000 }, { "epoch": 0.68, - "learning_rate": 5.130426281008991e-05, - "loss": 0.4237, + "learning_rate": 5.130412841229404e-05, + "loss": 0.3085, "step": 1128500 }, { "epoch": 0.68, - "learning_rate": 5.130216284452935e-05, - "loss": 0.4307, + "learning_rate": 5.130202844673347e-05, + "loss": 0.317, "step": 1129000 }, { "epoch": 0.68, - "learning_rate": 5.130006287896878e-05, - "loss": 0.4291, + "learning_rate": 5.1299928481172905e-05, + "loss": 0.3117, "step": 1129500 }, { "epoch": 0.68, - "learning_rate": 5.1297962913408216e-05, - "loss": 0.4185, + "learning_rate": 5.1297828515612345e-05, + "loss": 0.308, "step": 1130000 }, { "epoch": 0.68, - "learning_rate": 5.1295862947847656e-05, - "loss": 0.4271, + "learning_rate": 5.129572855005178e-05, + "loss": 0.3097, "step": 1130500 }, { "epoch": 0.68, - "learning_rate": 5.129376718221821e-05, - "loss": 0.4406, + "learning_rate": 5.129363278442233e-05, + "loss": 0.3237, "step": 1131000 }, { "epoch": 0.68, - "learning_rate": 5.129166721665764e-05, - "loss": 0.4191, + "learning_rate": 5.1291532818861766e-05, + "loss": 0.3097, "step": 1131500 }, { "epoch": 0.68, - "learning_rate": 5.1289567251097084e-05, - "loss": 0.4267, + "learning_rate": 5.1289432853301206e-05, + "loss": 0.311, "step": 1132000 }, { "epoch": 0.68, - "learning_rate": 5.128746728553652e-05, - "loss": 0.4189, + "learning_rate": 5.128733288774064e-05, + "loss": 0.3076, "step": 1132500 }, { "epoch": 0.68, - "learning_rate": 5.128536731997595e-05, - "loss": 0.428, + "learning_rate": 5.128523292218007e-05, + "loss": 0.3133, "step": 1133000 }, { "epoch": 0.68, - "learning_rate": 5.128326735441539e-05, - "loss": 0.4212, + "learning_rate": 5.128313295661951e-05, + "loss": 0.3097, "step": 1133500 }, { "epoch": 0.68, - "learning_rate": 5.1281171588785944e-05, - "loss": 0.4221, + "learning_rate": 5.1281032991058947e-05, + "loss": 0.3105, "step": 1134000 }, { "epoch": 0.68, - "learning_rate": 5.127907162322538e-05, - "loss": 0.432, + "learning_rate": 5.127893302549838e-05, + "loss": 0.3122, "step": 1134500 }, { "epoch": 0.68, - "learning_rate": 5.127697165766481e-05, - "loss": 0.4322, + "learning_rate": 5.127683305993782e-05, + "loss": 0.3134, "step": 1135000 }, { "epoch": 0.68, - "learning_rate": 5.127487169210425e-05, - "loss": 0.412, + "learning_rate": 5.1274733094377254e-05, + "loss": 0.3077, "step": 1135500 }, { "epoch": 0.68, - "learning_rate": 5.1272771726543685e-05, - "loss": 0.4221, + "learning_rate": 5.127263312881668e-05, + "loss": 0.3069, "step": 1136000 }, { "epoch": 0.68, - "learning_rate": 5.127067176098312e-05, - "loss": 0.428, + "learning_rate": 5.127053736318724e-05, + "loss": 0.3087, "step": 1136500 }, { "epoch": 0.68, - "learning_rate": 5.126857599535368e-05, - "loss": 0.4189, + "learning_rate": 5.126843739762668e-05, + "loss": 0.3053, "step": 1137000 }, { "epoch": 0.68, - "learning_rate": 5.126647602979311e-05, - "loss": 0.4214, + "learning_rate": 5.1266337432066114e-05, + "loss": 0.3078, "step": 1137500 }, { "epoch": 0.68, - "learning_rate": 5.1264380264163666e-05, - "loss": 0.4361, + "learning_rate": 5.126423746650555e-05, + "loss": 0.3156, "step": 1138000 }, { "epoch": 0.68, - "learning_rate": 5.12622802986031e-05, - "loss": 0.4225, + "learning_rate": 5.126214170087611e-05, + "loss": 0.3083, "step": 1138500 }, { "epoch": 0.68, - "learning_rate": 5.126018033304254e-05, - "loss": 0.4206, + "learning_rate": 5.126004173531554e-05, + "loss": 0.3085, "step": 1139000 }, { "epoch": 0.68, - "learning_rate": 5.125808036748197e-05, - "loss": 0.4294, + "learning_rate": 5.1257941769754975e-05, + "loss": 0.3128, "step": 1139500 }, { "epoch": 0.68, - "learning_rate": 5.1255980401921406e-05, - "loss": 0.4178, + "learning_rate": 5.1255841804194415e-05, + "loss": 0.3062, "step": 1140000 }, { "epoch": 0.68, - "learning_rate": 5.125388043636085e-05, - "loss": 0.4172, + "learning_rate": 5.125374183863385e-05, + "loss": 0.3112, "step": 1140500 }, { "epoch": 0.68, - "learning_rate": 5.125178047080028e-05, - "loss": 0.4263, + "learning_rate": 5.1251641873073276e-05, + "loss": 0.3126, "step": 1141000 }, { "epoch": 0.68, - "learning_rate": 5.1249680505239714e-05, - "loss": 0.4161, + "learning_rate": 5.1249546107443836e-05, + "loss": 0.3049, "step": 1141500 }, { "epoch": 0.68, - "learning_rate": 5.1247580539679154e-05, - "loss": 0.4191, + "learning_rate": 5.1247446141883276e-05, + "loss": 0.3117, "step": 1142000 }, { "epoch": 0.68, - "learning_rate": 5.124548477404971e-05, - "loss": 0.4238, + "learning_rate": 5.124534617632271e-05, + "loss": 0.3092, "step": 1142500 }, { "epoch": 0.69, - "learning_rate": 5.124338480848914e-05, - "loss": 0.4215, + "learning_rate": 5.124324621076214e-05, + "loss": 0.3056, "step": 1143000 }, { "epoch": 0.69, - "learning_rate": 5.1241284842928574e-05, - "loss": 0.4367, + "learning_rate": 5.1241146245201577e-05, + "loss": 0.3137, "step": 1143500 }, { "epoch": 0.69, - "learning_rate": 5.1239184877368015e-05, - "loss": 0.4106, + "learning_rate": 5.123904627964101e-05, + "loss": 0.3082, "step": 1144000 }, { "epoch": 0.69, - "learning_rate": 5.123708491180745e-05, - "loss": 0.4255, + "learning_rate": 5.1236946314080443e-05, + "loss": 0.3105, "step": 1144500 }, { "epoch": 0.69, - "learning_rate": 5.123498494624688e-05, - "loss": 0.4124, + "learning_rate": 5.1234846348519884e-05, + "loss": 0.3033, "step": 1145000 }, { "epoch": 0.69, - "learning_rate": 5.1232884980686315e-05, - "loss": 0.4257, + "learning_rate": 5.123274638295932e-05, + "loss": 0.3069, "step": 1145500 }, { "epoch": 0.69, - "learning_rate": 5.123078501512575e-05, - "loss": 0.4265, + "learning_rate": 5.123064641739876e-05, + "loss": 0.3128, "step": 1146000 }, { "epoch": 0.69, - "learning_rate": 5.122868924949631e-05, - "loss": 0.4199, + "learning_rate": 5.122855065176931e-05, + "loss": 0.3118, "step": 1146500 }, { "epoch": 0.69, - "learning_rate": 5.122659348386686e-05, - "loss": 0.4251, + "learning_rate": 5.1226450686208744e-05, + "loss": 0.3111, "step": 1147000 }, { "epoch": 0.69, - "learning_rate": 5.12244935183063e-05, - "loss": 0.4153, + "learning_rate": 5.1224354920579305e-05, + "loss": 0.3118, "step": 1147500 }, { "epoch": 0.69, - "learning_rate": 5.1222393552745736e-05, - "loss": 0.4214, + "learning_rate": 5.122225495501873e-05, + "loss": 0.3078, "step": 1148000 }, { "epoch": 0.69, - "learning_rate": 5.122029358718517e-05, - "loss": 0.429, + "learning_rate": 5.122015498945817e-05, + "loss": 0.3228, "step": 1148500 }, { "epoch": 0.69, - "learning_rate": 5.121819362162461e-05, - "loss": 0.4193, + "learning_rate": 5.1218055023897605e-05, + "loss": 0.3066, "step": 1149000 }, { "epoch": 0.69, - "learning_rate": 5.1216093656064036e-05, - "loss": 0.4087, + "learning_rate": 5.121595505833704e-05, + "loss": 0.3039, "step": 1149500 }, { "epoch": 0.69, - "learning_rate": 5.121399369050347e-05, - "loss": 0.4249, + "learning_rate": 5.121385509277648e-05, + "loss": 0.3124, "step": 1150000 }, { "epoch": 0.69, - "learning_rate": 5.121189372494291e-05, - "loss": 0.4277, + "learning_rate": 5.121175512721591e-05, + "loss": 0.3138, "step": 1150500 }, { "epoch": 0.69, - "learning_rate": 5.120979795931347e-05, - "loss": 0.4142, + "learning_rate": 5.1209655161655346e-05, + "loss": 0.3072, "step": 1151000 }, { "epoch": 0.69, - "learning_rate": 5.1207697993752904e-05, - "loss": 0.4337, + "learning_rate": 5.12075593960259e-05, + "loss": 0.3107, "step": 1151500 }, { "epoch": 0.69, - "learning_rate": 5.120559802819234e-05, - "loss": 0.4258, + "learning_rate": 5.120545943046534e-05, + "loss": 0.3102, "step": 1152000 }, { "epoch": 0.69, - "learning_rate": 5.120349806263177e-05, - "loss": 0.4307, + "learning_rate": 5.120335946490477e-05, + "loss": 0.3142, "step": 1152500 }, { "epoch": 0.69, - "learning_rate": 5.120141069686457e-05, - "loss": 0.4306, + "learning_rate": 5.120125949934421e-05, + "loss": 0.3053, "step": 1153000 }, { "epoch": 0.69, - "learning_rate": 5.1199310731304005e-05, - "loss": 0.4171, + "learning_rate": 5.119915953378365e-05, + "loss": 0.3114, "step": 1153500 }, { "epoch": 0.69, - "learning_rate": 5.119721076574344e-05, - "loss": 0.4229, + "learning_rate": 5.11970637681542e-05, + "loss": 0.3053, "step": 1154000 }, { "epoch": 0.69, - "learning_rate": 5.119511080018287e-05, - "loss": 0.4133, + "learning_rate": 5.1194963802593634e-05, + "loss": 0.3085, "step": 1154500 }, { "epoch": 0.69, - "learning_rate": 5.119301083462231e-05, - "loss": 0.4238, + "learning_rate": 5.1192868036964194e-05, + "loss": 0.3102, "step": 1155000 }, { "epoch": 0.69, - "learning_rate": 5.1190910869061745e-05, - "loss": 0.4175, + "learning_rate": 5.119076807140363e-05, + "loss": 0.3066, "step": 1155500 }, { "epoch": 0.69, - "learning_rate": 5.118881090350118e-05, - "loss": 0.4287, + "learning_rate": 5.118866810584306e-05, + "loss": 0.3167, "step": 1156000 }, { "epoch": 0.69, - "learning_rate": 5.118671093794062e-05, - "loss": 0.4244, + "learning_rate": 5.1186568140282494e-05, + "loss": 0.3137, "step": 1156500 }, { "epoch": 0.69, - "learning_rate": 5.118461097238005e-05, - "loss": 0.4269, + "learning_rate": 5.1184468174721935e-05, + "loss": 0.3124, "step": 1157000 }, { "epoch": 0.69, - "learning_rate": 5.1182511006819486e-05, - "loss": 0.4163, + "learning_rate": 5.118236820916137e-05, + "loss": 0.3081, "step": 1157500 }, { "epoch": 0.69, - "learning_rate": 5.1180411041258926e-05, - "loss": 0.4108, + "learning_rate": 5.11802682436008e-05, + "loss": 0.3065, "step": 1158000 }, { "epoch": 0.69, - "learning_rate": 5.117831107569836e-05, - "loss": 0.4283, + "learning_rate": 5.117816827804024e-05, + "loss": 0.3135, "step": 1158500 }, { "epoch": 0.69, - "learning_rate": 5.117621531006891e-05, - "loss": 0.4275, + "learning_rate": 5.1176068312479675e-05, + "loss": 0.3104, "step": 1159000 }, { "epoch": 0.7, - "learning_rate": 5.1174115344508354e-05, - "loss": 0.4307, + "learning_rate": 5.117396834691911e-05, + "loss": 0.3116, "step": 1159500 }, { "epoch": 0.7, - "learning_rate": 5.117201537894779e-05, - "loss": 0.4123, + "learning_rate": 5.117186838135855e-05, + "loss": 0.3018, "step": 1160000 }, { "epoch": 0.7, - "learning_rate": 5.116991541338722e-05, - "loss": 0.4219, + "learning_rate": 5.116976841579798e-05, + "loss": 0.3103, "step": 1160500 }, { "epoch": 0.7, - "learning_rate": 5.116781544782666e-05, - "loss": 0.4217, + "learning_rate": 5.1167672650168536e-05, + "loss": 0.3085, "step": 1161000 }, { "epoch": 0.7, - "learning_rate": 5.116572388212833e-05, - "loss": 0.4191, + "learning_rate": 5.1165572684607976e-05, + "loss": 0.3061, "step": 1161500 }, { "epoch": 0.7, - "learning_rate": 5.116362391656777e-05, - "loss": 0.4208, + "learning_rate": 5.116347271904741e-05, + "loss": 0.3103, "step": 1162000 }, { "epoch": 0.7, - "learning_rate": 5.11615239510072e-05, - "loss": 0.4129, + "learning_rate": 5.116137275348684e-05, + "loss": 0.3013, "step": 1162500 }, { "epoch": 0.7, - "learning_rate": 5.1159423985446635e-05, - "loss": 0.4259, + "learning_rate": 5.115928118778852e-05, + "loss": 0.3129, "step": 1163000 }, { "epoch": 0.7, - "learning_rate": 5.1157324019886075e-05, - "loss": 0.4188, + "learning_rate": 5.115718122222795e-05, + "loss": 0.3103, "step": 1163500 }, { "epoch": 0.7, - "learning_rate": 5.115522405432551e-05, - "loss": 0.4243, + "learning_rate": 5.115508125666739e-05, + "loss": 0.3079, "step": 1164000 }, { "epoch": 0.7, - "learning_rate": 5.115312408876494e-05, - "loss": 0.4228, + "learning_rate": 5.1152981291106824e-05, + "loss": 0.309, "step": 1164500 }, { "epoch": 0.7, - "learning_rate": 5.115102412320438e-05, - "loss": 0.4224, + "learning_rate": 5.115088132554626e-05, + "loss": 0.3125, "step": 1165000 }, { "epoch": 0.7, - "learning_rate": 5.1148928357574936e-05, - "loss": 0.4296, + "learning_rate": 5.11487813599857e-05, + "loss": 0.3099, "step": 1165500 }, { "epoch": 0.7, - "learning_rate": 5.114682839201437e-05, - "loss": 0.4224, + "learning_rate": 5.114668559435625e-05, + "loss": 0.3082, "step": 1166000 }, { "epoch": 0.7, - "learning_rate": 5.114473262638492e-05, - "loss": 0.4255, + "learning_rate": 5.1144585628795685e-05, + "loss": 0.3172, "step": 1166500 }, { "epoch": 0.7, - "learning_rate": 5.114263266082436e-05, - "loss": 0.4274, + "learning_rate": 5.1142485663235125e-05, + "loss": 0.3144, "step": 1167000 }, { "epoch": 0.7, - "learning_rate": 5.1140532695263796e-05, - "loss": 0.416, + "learning_rate": 5.114038569767456e-05, + "loss": 0.3042, "step": 1167500 }, { "epoch": 0.7, - "learning_rate": 5.113843272970323e-05, - "loss": 0.4241, + "learning_rate": 5.113828573211399e-05, + "loss": 0.3104, "step": 1168000 }, { "epoch": 0.7, - "learning_rate": 5.113633276414267e-05, - "loss": 0.4316, + "learning_rate": 5.1136189966484546e-05, + "loss": 0.3148, "step": 1168500 }, { "epoch": 0.7, - "learning_rate": 5.1134236998513224e-05, - "loss": 0.42, + "learning_rate": 5.1134094200855106e-05, + "loss": 0.3128, "step": 1169000 }, { "epoch": 0.7, - "learning_rate": 5.113213703295266e-05, - "loss": 0.4224, + "learning_rate": 5.113199423529454e-05, + "loss": 0.3103, "step": 1169500 }, { "epoch": 0.7, - "learning_rate": 5.113003706739209e-05, - "loss": 0.4198, + "learning_rate": 5.112989426973397e-05, + "loss": 0.3151, "step": 1170000 }, { "epoch": 0.7, - "learning_rate": 5.112793710183153e-05, - "loss": 0.4273, + "learning_rate": 5.1127794304173406e-05, + "loss": 0.3063, "step": 1170500 }, { "epoch": 0.7, - "learning_rate": 5.1125837136270964e-05, - "loss": 0.4149, + "learning_rate": 5.1125694338612846e-05, + "loss": 0.3032, "step": 1171000 }, { "epoch": 0.7, - "learning_rate": 5.112374137064152e-05, - "loss": 0.4163, + "learning_rate": 5.112359437305228e-05, + "loss": 0.3051, "step": 1171500 }, { "epoch": 0.7, - "learning_rate": 5.112164140508096e-05, - "loss": 0.4161, + "learning_rate": 5.1121494407491713e-05, + "loss": 0.31, "step": 1172000 }, { "epoch": 0.7, - "learning_rate": 5.111954143952039e-05, - "loss": 0.4071, + "learning_rate": 5.1119394441931154e-05, + "loss": 0.3002, "step": 1172500 }, { "epoch": 0.7, - "learning_rate": 5.1117441473959825e-05, - "loss": 0.4223, + "learning_rate": 5.111729447637059e-05, + "loss": 0.3129, "step": 1173000 }, { "epoch": 0.7, - "learning_rate": 5.1115341508399265e-05, - "loss": 0.4234, + "learning_rate": 5.111519451081002e-05, + "loss": 0.3099, "step": 1173500 }, { "epoch": 0.7, - "learning_rate": 5.11132415428387e-05, - "loss": 0.4391, + "learning_rate": 5.111309454524946e-05, + "loss": 0.3209, "step": 1174000 }, { "epoch": 0.7, - "learning_rate": 5.111114157727813e-05, - "loss": 0.4234, + "learning_rate": 5.1110994579688894e-05, + "loss": 0.3118, "step": 1174500 }, { "epoch": 0.7, - "learning_rate": 5.1109045811648686e-05, - "loss": 0.4167, + "learning_rate": 5.110889461412833e-05, + "loss": 0.3078, "step": 1175000 }, { "epoch": 0.7, - "learning_rate": 5.1106945846088126e-05, - "loss": 0.4147, + "learning_rate": 5.110679464856776e-05, + "loss": 0.3048, "step": 1175500 }, { "epoch": 0.71, - "learning_rate": 5.110484588052756e-05, - "loss": 0.4323, + "learning_rate": 5.110469888293832e-05, + "loss": 0.3091, "step": 1176000 }, { "epoch": 0.71, - "learning_rate": 5.110274591496699e-05, - "loss": 0.4333, + "learning_rate": 5.1102598917377755e-05, + "loss": 0.3122, "step": 1176500 }, { "epoch": 0.71, - "learning_rate": 5.110064594940643e-05, - "loss": 0.4199, + "learning_rate": 5.110050315174831e-05, + "loss": 0.315, "step": 1177000 }, { "epoch": 0.71, - "learning_rate": 5.109854598384587e-05, - "loss": 0.423, + "learning_rate": 5.109840318618775e-05, + "loss": 0.3084, "step": 1177500 }, { "epoch": 0.71, - "learning_rate": 5.109644601828529e-05, - "loss": 0.4223, + "learning_rate": 5.109630322062718e-05, + "loss": 0.309, "step": 1178000 }, { "epoch": 0.71, - "learning_rate": 5.1094346052724734e-05, - "loss": 0.4351, + "learning_rate": 5.1094203255066616e-05, + "loss": 0.3133, "step": 1178500 }, { "epoch": 0.71, - "learning_rate": 5.1092250287095294e-05, - "loss": 0.4261, + "learning_rate": 5.1092103289506056e-05, + "loss": 0.3147, "step": 1179000 }, { "epoch": 0.71, - "learning_rate": 5.109015032153473e-05, - "loss": 0.4202, + "learning_rate": 5.109000332394549e-05, + "loss": 0.3059, "step": 1179500 }, { "epoch": 0.71, - "learning_rate": 5.108805035597416e-05, - "loss": 0.4222, + "learning_rate": 5.1087903358384916e-05, + "loss": 0.305, "step": 1180000 }, { "epoch": 0.71, - "learning_rate": 5.108595459034472e-05, - "loss": 0.4298, + "learning_rate": 5.1085803392824356e-05, + "loss": 0.3112, "step": 1180500 }, { "epoch": 0.71, - "learning_rate": 5.1083854624784155e-05, - "loss": 0.425, + "learning_rate": 5.108370342726379e-05, + "loss": 0.3084, "step": 1181000 }, { "epoch": 0.71, - "learning_rate": 5.108175465922359e-05, - "loss": 0.4165, + "learning_rate": 5.108160346170322e-05, + "loss": 0.3086, "step": 1181500 }, { "epoch": 0.71, - "learning_rate": 5.107965469366303e-05, - "loss": 0.4253, + "learning_rate": 5.1079507696073784e-05, + "loss": 0.3112, "step": 1182000 }, { "epoch": 0.71, - "learning_rate": 5.107755472810246e-05, - "loss": 0.4171, + "learning_rate": 5.1077411930444344e-05, + "loss": 0.3045, "step": 1182500 }, { "epoch": 0.71, - "learning_rate": 5.107545476254189e-05, - "loss": 0.4211, + "learning_rate": 5.107531196488378e-05, + "loss": 0.3078, "step": 1183000 }, { "epoch": 0.71, - "learning_rate": 5.107335479698133e-05, - "loss": 0.4272, + "learning_rate": 5.107321199932321e-05, + "loss": 0.3076, "step": 1183500 }, { "epoch": 0.71, - "learning_rate": 5.107125483142076e-05, - "loss": 0.4159, + "learning_rate": 5.107111203376265e-05, + "loss": 0.3031, "step": 1184000 }, { "epoch": 0.71, - "learning_rate": 5.106915906579132e-05, - "loss": 0.4126, + "learning_rate": 5.106901206820208e-05, + "loss": 0.3088, "step": 1184500 }, { "epoch": 0.71, - "learning_rate": 5.1067059100230756e-05, - "loss": 0.4183, + "learning_rate": 5.106691210264151e-05, + "loss": 0.3087, "step": 1185000 }, { "epoch": 0.71, - "learning_rate": 5.106495913467019e-05, - "loss": 0.427, + "learning_rate": 5.106481633701207e-05, + "loss": 0.3077, "step": 1185500 }, { "epoch": 0.71, - "learning_rate": 5.106286336904075e-05, - "loss": 0.4245, + "learning_rate": 5.106271637145151e-05, + "loss": 0.3093, "step": 1186000 }, { "epoch": 0.71, - "learning_rate": 5.106076340348018e-05, - "loss": 0.4251, + "learning_rate": 5.1060620605822065e-05, + "loss": 0.3085, "step": 1186500 }, { "epoch": 0.71, - "learning_rate": 5.105866343791962e-05, - "loss": 0.4232, + "learning_rate": 5.10585206402615e-05, + "loss": 0.3093, "step": 1187000 }, { "epoch": 0.71, - "learning_rate": 5.105656347235905e-05, - "loss": 0.4339, + "learning_rate": 5.105642067470093e-05, + "loss": 0.3145, "step": 1187500 }, { "epoch": 0.71, - "learning_rate": 5.1054463506798484e-05, - "loss": 0.4258, + "learning_rate": 5.105432070914037e-05, + "loss": 0.3095, "step": 1188000 }, { "epoch": 0.71, - "learning_rate": 5.1052363541237924e-05, - "loss": 0.4245, + "learning_rate": 5.1052220743579806e-05, + "loss": 0.3068, "step": 1188500 }, { "epoch": 0.71, - "learning_rate": 5.1050267775608484e-05, - "loss": 0.4253, + "learning_rate": 5.105012077801924e-05, + "loss": 0.3135, "step": 1189000 }, { "epoch": 0.71, - "learning_rate": 5.104816781004792e-05, - "loss": 0.4101, + "learning_rate": 5.104802081245867e-05, + "loss": 0.3069, "step": 1189500 }, { "epoch": 0.71, - "learning_rate": 5.1046067844487344e-05, - "loss": 0.4279, + "learning_rate": 5.1045920846898106e-05, + "loss": 0.314, "step": 1190000 }, { "epoch": 0.71, - "learning_rate": 5.1043967878926785e-05, - "loss": 0.4183, + "learning_rate": 5.104382088133755e-05, + "loss": 0.3107, "step": 1190500 }, { "epoch": 0.71, - "learning_rate": 5.104186791336622e-05, - "loss": 0.4224, + "learning_rate": 5.104172091577698e-05, + "loss": 0.3139, "step": 1191000 }, { "epoch": 0.71, - "learning_rate": 5.103977214773678e-05, - "loss": 0.4087, + "learning_rate": 5.1039620950216414e-05, + "loss": 0.3019, "step": 1191500 }, { "epoch": 0.71, - "learning_rate": 5.103767218217621e-05, - "loss": 0.4216, + "learning_rate": 5.1037520984655854e-05, + "loss": 0.3086, "step": 1192000 }, { "epoch": 0.71, - "learning_rate": 5.1035572216615645e-05, - "loss": 0.4271, + "learning_rate": 5.103542101909529e-05, + "loss": 0.3084, "step": 1192500 }, { "epoch": 0.72, - "learning_rate": 5.103347225105508e-05, - "loss": 0.4262, + "learning_rate": 5.103332525346584e-05, + "loss": 0.3104, "step": 1193000 }, { "epoch": 0.72, - "learning_rate": 5.103137228549451e-05, - "loss": 0.4285, + "learning_rate": 5.10312294878364e-05, + "loss": 0.3116, "step": 1193500 }, { "epoch": 0.72, - "learning_rate": 5.102927231993395e-05, - "loss": 0.4259, + "learning_rate": 5.102912952227583e-05, + "loss": 0.3084, "step": 1194000 }, { "epoch": 0.72, - "learning_rate": 5.102717655430451e-05, - "loss": 0.423, + "learning_rate": 5.102702955671527e-05, + "loss": 0.3058, "step": 1194500 }, { "epoch": 0.72, - "learning_rate": 5.102507658874394e-05, - "loss": 0.4205, + "learning_rate": 5.10249295911547e-05, + "loss": 0.314, "step": 1195000 }, { "epoch": 0.72, - "learning_rate": 5.102297662318338e-05, - "loss": 0.4189, + "learning_rate": 5.102283382552526e-05, + "loss": 0.3076, "step": 1195500 }, { "epoch": 0.72, - "learning_rate": 5.102087665762281e-05, - "loss": 0.4218, + "learning_rate": 5.1020733859964695e-05, + "loss": 0.3134, "step": 1196000 }, { "epoch": 0.72, - "learning_rate": 5.1018780891993374e-05, - "loss": 0.4241, + "learning_rate": 5.101863389440413e-05, + "loss": 0.3096, "step": 1196500 }, { "epoch": 0.72, - "learning_rate": 5.101668092643281e-05, - "loss": 0.4068, + "learning_rate": 5.101653392884356e-05, + "loss": 0.308, "step": 1197000 }, { "epoch": 0.72, - "learning_rate": 5.101458096087224e-05, - "loss": 0.4185, + "learning_rate": 5.1014433963283e-05, + "loss": 0.2989, "step": 1197500 }, { "epoch": 0.72, - "learning_rate": 5.1012480995311674e-05, - "loss": 0.419, + "learning_rate": 5.1012333997722436e-05, + "loss": 0.3078, "step": 1198000 }, { "epoch": 0.72, - "learning_rate": 5.101038102975111e-05, - "loss": 0.4192, + "learning_rate": 5.1010238232092996e-05, + "loss": 0.3097, "step": 1198500 }, { "epoch": 0.72, - "learning_rate": 5.100828106419055e-05, - "loss": 0.4205, + "learning_rate": 5.100813826653242e-05, + "loss": 0.3084, "step": 1199000 }, { "epoch": 0.72, - "learning_rate": 5.100618109862998e-05, - "loss": 0.4233, + "learning_rate": 5.100603830097186e-05, + "loss": 0.3068, "step": 1199500 }, { "epoch": 0.72, - "learning_rate": 5.1004081133069415e-05, - "loss": 0.413, + "learning_rate": 5.10039383354113e-05, + "loss": 0.3037, "step": 1200000 }, { "epoch": 0.72, - "eval_loss": 0.4001677334308624, - "eval_runtime": 1118.6829, - "eval_samples_per_second": 470.839, - "eval_steps_per_second": 78.474, + "eval_loss": 0.2797383666038513, + "eval_runtime": 1458.5506, + "eval_samples_per_second": 361.126, + "eval_steps_per_second": 60.188, "step": 1200000 }, { "epoch": 0.72, - "learning_rate": 5.1001981167508855e-05, - "loss": 0.4221, + "learning_rate": 5.100184256978186e-05, + "loss": 0.3071, "step": 1200500 }, { "epoch": 0.72, - "learning_rate": 5.099988120194829e-05, - "loss": 0.4216, + "learning_rate": 5.099974260422129e-05, + "loss": 0.3062, "step": 1201000 }, { "epoch": 0.72, - "learning_rate": 5.099778543631884e-05, - "loss": 0.4179, + "learning_rate": 5.0997646838591844e-05, + "loss": 0.3068, "step": 1201500 }, { "epoch": 0.72, - "learning_rate": 5.0995685470758275e-05, - "loss": 0.4188, + "learning_rate": 5.0995546873031284e-05, + "loss": 0.3068, "step": 1202000 }, { "epoch": 0.72, - "learning_rate": 5.0993585505197715e-05, - "loss": 0.4226, + "learning_rate": 5.099344690747072e-05, + "loss": 0.3054, "step": 1202500 }, { "epoch": 0.72, - "learning_rate": 5.099148553963715e-05, - "loss": 0.4315, + "learning_rate": 5.099134694191015e-05, + "loss": 0.3091, "step": 1203000 }, { "epoch": 0.72, - "learning_rate": 5.09893897740077e-05, - "loss": 0.4292, + "learning_rate": 5.0989246976349585e-05, + "loss": 0.3104, "step": 1203500 }, { "epoch": 0.72, - "learning_rate": 5.098728980844714e-05, - "loss": 0.4196, + "learning_rate": 5.098714701078902e-05, + "loss": 0.3087, "step": 1204000 }, { "epoch": 0.72, - "learning_rate": 5.0985189842886576e-05, - "loss": 0.4163, + "learning_rate": 5.098504704522846e-05, + "loss": 0.3065, "step": 1204500 }, { "epoch": 0.72, - "learning_rate": 5.098308987732601e-05, - "loss": 0.4115, + "learning_rate": 5.098294707966789e-05, + "loss": 0.3012, "step": 1205000 }, { "epoch": 0.72, - "learning_rate": 5.098098991176545e-05, - "loss": 0.4162, + "learning_rate": 5.0980847114107325e-05, + "loss": 0.3059, "step": 1205500 }, { "epoch": 0.72, - "learning_rate": 5.097888994620488e-05, - "loss": 0.4283, + "learning_rate": 5.0978747148546766e-05, + "loss": 0.3122, "step": 1206000 }, { "epoch": 0.72, - "learning_rate": 5.097678998064432e-05, - "loss": 0.4301, + "learning_rate": 5.09766471829862e-05, + "loss": 0.3141, "step": 1206500 }, { "epoch": 0.72, - "learning_rate": 5.097469001508376e-05, - "loss": 0.4174, + "learning_rate": 5.097454721742563e-05, + "loss": 0.3088, "step": 1207000 }, { "epoch": 0.72, - "learning_rate": 5.097259424945431e-05, - "loss": 0.4195, + "learning_rate": 5.097244725186507e-05, + "loss": 0.3075, "step": 1207500 }, { "epoch": 0.72, - "learning_rate": 5.0970494283893744e-05, - "loss": 0.4296, + "learning_rate": 5.0970347286304506e-05, + "loss": 0.3066, "step": 1208000 }, { "epoch": 0.72, - "learning_rate": 5.09683985182643e-05, - "loss": 0.4221, + "learning_rate": 5.096824732074394e-05, + "loss": 0.2994, "step": 1208500 }, { "epoch": 0.72, - "learning_rate": 5.096629855270374e-05, - "loss": 0.4087, + "learning_rate": 5.096614735518337e-05, + "loss": 0.3003, "step": 1209000 }, { "epoch": 0.73, - "learning_rate": 5.096419858714317e-05, - "loss": 0.4188, + "learning_rate": 5.0964047389622807e-05, + "loss": 0.3065, "step": 1209500 }, { "epoch": 0.73, - "learning_rate": 5.0962098621582605e-05, - "loss": 0.4359, + "learning_rate": 5.096194742406224e-05, + "loss": 0.3155, "step": 1210000 }, { "epoch": 0.73, - "learning_rate": 5.0959998656022045e-05, - "loss": 0.4234, + "learning_rate": 5.09598516584328e-05, + "loss": 0.3091, "step": 1210500 }, { "epoch": 0.73, - "learning_rate": 5.095789869046148e-05, - "loss": 0.4074, + "learning_rate": 5.095775169287224e-05, + "loss": 0.3033, "step": 1211000 }, { "epoch": 0.73, - "learning_rate": 5.095579872490091e-05, - "loss": 0.4187, + "learning_rate": 5.095565172731167e-05, + "loss": 0.308, "step": 1211500 }, { "epoch": 0.73, - "learning_rate": 5.095369875934035e-05, - "loss": 0.418, + "learning_rate": 5.095355176175111e-05, + "loss": 0.3097, "step": 1212000 }, { "epoch": 0.73, - "learning_rate": 5.095159879377978e-05, - "loss": 0.419, + "learning_rate": 5.095145599612167e-05, + "loss": 0.3065, "step": 1212500 }, { "epoch": 0.73, - "learning_rate": 5.094949882821921e-05, - "loss": 0.4158, + "learning_rate": 5.09493560305611e-05, + "loss": 0.304, "step": 1213000 }, { "epoch": 0.73, - "learning_rate": 5.094739886265865e-05, - "loss": 0.4067, + "learning_rate": 5.0947256065000535e-05, + "loss": 0.3065, "step": 1213500 }, { "epoch": 0.73, - "learning_rate": 5.0945298897098086e-05, - "loss": 0.4161, + "learning_rate": 5.094515609943997e-05, + "loss": 0.3025, "step": 1214000 }, { "epoch": 0.73, - "learning_rate": 5.094319893153752e-05, - "loss": 0.4167, + "learning_rate": 5.09430561338794e-05, + "loss": 0.3065, "step": 1214500 }, { "epoch": 0.73, - "learning_rate": 5.094110316590807e-05, - "loss": 0.4151, + "learning_rate": 5.0940956168318835e-05, + "loss": 0.3109, "step": 1215000 }, { "epoch": 0.73, - "learning_rate": 5.093900320034751e-05, - "loss": 0.4243, + "learning_rate": 5.0938856202758275e-05, + "loss": 0.3112, "step": 1215500 }, { "epoch": 0.73, - "learning_rate": 5.0936907434718074e-05, - "loss": 0.4097, + "learning_rate": 5.0936760437128836e-05, + "loss": 0.3071, "step": 1216000 }, { "epoch": 0.73, - "learning_rate": 5.093480746915751e-05, - "loss": 0.4176, + "learning_rate": 5.093466047156826e-05, + "loss": 0.3004, "step": 1216500 }, { "epoch": 0.73, - "learning_rate": 5.093270750359694e-05, - "loss": 0.4211, + "learning_rate": 5.093256470593882e-05, + "loss": 0.312, "step": 1217000 }, { "epoch": 0.73, - "learning_rate": 5.0930607538036374e-05, - "loss": 0.4217, + "learning_rate": 5.0930464740378256e-05, + "loss": 0.3049, "step": 1217500 }, { "epoch": 0.73, - "learning_rate": 5.0928511772406934e-05, - "loss": 0.4155, + "learning_rate": 5.0928364774817696e-05, + "loss": 0.3033, "step": 1218000 }, { "epoch": 0.73, - "learning_rate": 5.092641180684637e-05, - "loss": 0.4236, + "learning_rate": 5.092626480925713e-05, + "loss": 0.3089, "step": 1218500 }, { "epoch": 0.73, - "learning_rate": 5.092431184128581e-05, - "loss": 0.4204, + "learning_rate": 5.092416484369656e-05, + "loss": 0.3145, "step": 1219000 }, { "epoch": 0.73, - "learning_rate": 5.0922211875725235e-05, - "loss": 0.4201, + "learning_rate": 5.0922064878136e-05, + "loss": 0.3156, "step": 1219500 }, { "epoch": 0.73, - "learning_rate": 5.092011191016467e-05, - "loss": 0.4295, + "learning_rate": 5.091996911250656e-05, + "loss": 0.315, "step": 1220000 }, { "epoch": 0.73, - "learning_rate": 5.091801614453523e-05, - "loss": 0.4118, + "learning_rate": 5.091786914694599e-05, + "loss": 0.309, "step": 1220500 }, { "epoch": 0.73, - "learning_rate": 5.091591617897467e-05, - "loss": 0.4067, + "learning_rate": 5.0915769181385424e-05, + "loss": 0.3014, "step": 1221000 }, { "epoch": 0.73, - "learning_rate": 5.09138162134141e-05, - "loss": 0.4166, + "learning_rate": 5.091366921582486e-05, + "loss": 0.3049, "step": 1221500 }, { "epoch": 0.73, - "learning_rate": 5.091171624785353e-05, - "loss": 0.4133, + "learning_rate": 5.091156925026429e-05, + "loss": 0.3108, "step": 1222000 }, { "epoch": 0.73, - "learning_rate": 5.090961628229297e-05, - "loss": 0.4106, + "learning_rate": 5.090946928470373e-05, + "loss": 0.303, "step": 1222500 }, { "epoch": 0.73, - "learning_rate": 5.09075163167324e-05, - "loss": 0.4201, + "learning_rate": 5.0907369319143165e-05, + "loss": 0.3097, "step": 1223000 }, { "epoch": 0.73, - "learning_rate": 5.0905416351171836e-05, - "loss": 0.4175, + "learning_rate": 5.090527355351372e-05, + "loss": 0.3059, "step": 1223500 }, { "epoch": 0.73, - "learning_rate": 5.0903320585542396e-05, - "loss": 0.4153, + "learning_rate": 5.090317358795315e-05, + "loss": 0.3095, "step": 1224000 }, { "epoch": 0.73, - "learning_rate": 5.090122061998183e-05, - "loss": 0.4222, + "learning_rate": 5.090107782232371e-05, + "loss": 0.3118, "step": 1224500 }, { "epoch": 0.73, - "learning_rate": 5.089912485435239e-05, - "loss": 0.4162, + "learning_rate": 5.089897785676315e-05, + "loss": 0.3038, "step": 1225000 }, { "epoch": 0.73, - "learning_rate": 5.0897024888791824e-05, - "loss": 0.4311, + "learning_rate": 5.0896877891202586e-05, + "loss": 0.3139, "step": 1225500 }, { "epoch": 0.74, - "learning_rate": 5.0894924923231264e-05, - "loss": 0.4196, + "learning_rate": 5.089477792564202e-05, + "loss": 0.3037, "step": 1226000 }, { "epoch": 0.74, - "learning_rate": 5.089282495767069e-05, - "loss": 0.4123, + "learning_rate": 5.089267796008145e-05, + "loss": 0.305, "step": 1226500 }, { "epoch": 0.74, - "learning_rate": 5.0890724992110124e-05, - "loss": 0.418, + "learning_rate": 5.0890577994520886e-05, + "loss": 0.3096, "step": 1227000 }, { "epoch": 0.74, - "learning_rate": 5.0888625026549564e-05, - "loss": 0.4239, + "learning_rate": 5.0888478028960326e-05, + "loss": 0.3095, "step": 1227500 }, { "epoch": 0.74, - "learning_rate": 5.0886525060989e-05, - "loss": 0.4164, + "learning_rate": 5.088637806339976e-05, + "loss": 0.3084, "step": 1228000 }, { "epoch": 0.74, - "learning_rate": 5.088442509542843e-05, - "loss": 0.4261, + "learning_rate": 5.088427809783919e-05, + "loss": 0.3117, "step": 1228500 }, { "epoch": 0.74, - "learning_rate": 5.088232512986787e-05, - "loss": 0.4121, + "learning_rate": 5.088218233220975e-05, + "loss": 0.3076, "step": 1229000 }, { "epoch": 0.74, - "learning_rate": 5.0880225164307305e-05, - "loss": 0.4165, + "learning_rate": 5.088008236664919e-05, + "loss": 0.3042, "step": 1229500 }, { "epoch": 0.74, - "learning_rate": 5.087812519874674e-05, - "loss": 0.4118, + "learning_rate": 5.087798240108862e-05, + "loss": 0.3043, "step": 1230000 }, { "epoch": 0.74, - "learning_rate": 5.087602523318618e-05, - "loss": 0.4152, + "learning_rate": 5.0875882435528054e-05, + "loss": 0.3102, "step": 1230500 }, { "epoch": 0.74, - "learning_rate": 5.087392946755673e-05, - "loss": 0.4226, + "learning_rate": 5.0873782469967494e-05, + "loss": 0.3092, "step": 1231000 }, { "epoch": 0.74, - "learning_rate": 5.0871829501996166e-05, - "loss": 0.4125, + "learning_rate": 5.087168250440693e-05, + "loss": 0.3038, "step": 1231500 }, { "epoch": 0.74, - "learning_rate": 5.08697295364356e-05, - "loss": 0.4132, + "learning_rate": 5.086958253884636e-05, + "loss": 0.3042, "step": 1232000 }, { "epoch": 0.74, - "learning_rate": 5.086762957087504e-05, - "loss": 0.4197, + "learning_rate": 5.08674825732858e-05, + "loss": 0.3073, "step": 1232500 }, { "epoch": 0.74, - "learning_rate": 5.086552960531447e-05, - "loss": 0.4232, + "learning_rate": 5.0865382607725235e-05, + "loss": 0.3067, "step": 1233000 }, { "epoch": 0.74, - "learning_rate": 5.086343803961615e-05, - "loss": 0.4196, + "learning_rate": 5.086328264216467e-05, + "loss": 0.3048, "step": 1233500 }, { "epoch": 0.74, - "learning_rate": 5.086133807405558e-05, - "loss": 0.4156, + "learning_rate": 5.08611826766041e-05, + "loss": 0.3049, "step": 1234000 }, { "epoch": 0.74, - "learning_rate": 5.085923810849502e-05, - "loss": 0.426, + "learning_rate": 5.0859082711043535e-05, + "loss": 0.3006, "step": 1234500 }, { "epoch": 0.74, - "learning_rate": 5.0857138142934454e-05, - "loss": 0.4141, + "learning_rate": 5.0856986945414096e-05, + "loss": 0.3041, "step": 1235000 }, { "epoch": 0.74, - "learning_rate": 5.085503817737389e-05, - "loss": 0.4254, + "learning_rate": 5.085488697985353e-05, + "loss": 0.3099, "step": 1235500 }, { "epoch": 0.74, - "learning_rate": 5.085293821181333e-05, - "loss": 0.4089, + "learning_rate": 5.085278701429296e-05, + "loss": 0.3002, "step": 1236000 }, { "epoch": 0.74, - "learning_rate": 5.085083824625276e-05, - "loss": 0.4215, + "learning_rate": 5.0850687048732396e-05, + "loss": 0.3018, "step": 1236500 }, { "epoch": 0.74, - "learning_rate": 5.0848738280692194e-05, - "loss": 0.421, + "learning_rate": 5.0848587083171836e-05, + "loss": 0.3062, "step": 1237000 }, { "epoch": 0.74, - "learning_rate": 5.084664251506275e-05, - "loss": 0.4188, + "learning_rate": 5.084648711761127e-05, + "loss": 0.3059, "step": 1237500 }, { "epoch": 0.74, - "learning_rate": 5.084454254950219e-05, - "loss": 0.4194, + "learning_rate": 5.08443871520507e-05, + "loss": 0.3135, "step": 1238000 }, { "epoch": 0.74, - "learning_rate": 5.084244258394162e-05, - "loss": 0.4121, + "learning_rate": 5.084228718649014e-05, + "loss": 0.2999, "step": 1238500 }, { "epoch": 0.74, - "learning_rate": 5.0840346818312175e-05, - "loss": 0.4152, + "learning_rate": 5.08401914208607e-05, + "loss": 0.3073, "step": 1239000 }, { "epoch": 0.74, - "learning_rate": 5.0838246852751615e-05, - "loss": 0.422, + "learning_rate": 5.083809145530013e-05, + "loss": 0.3109, "step": 1239500 }, { "epoch": 0.74, - "learning_rate": 5.083614688719105e-05, - "loss": 0.4217, + "learning_rate": 5.083599568967069e-05, + "loss": 0.3086, "step": 1240000 }, { "epoch": 0.74, - "learning_rate": 5.083404692163048e-05, - "loss": 0.425, + "learning_rate": 5.0833899924041244e-05, + "loss": 0.3141, "step": 1240500 }, { "epoch": 0.74, - "learning_rate": 5.083194695606992e-05, - "loss": 0.4169, + "learning_rate": 5.083179995848068e-05, + "loss": 0.3066, "step": 1241000 }, { "epoch": 0.74, - "learning_rate": 5.0829846990509356e-05, - "loss": 0.4073, + "learning_rate": 5.082969999292012e-05, + "loss": 0.3004, "step": 1241500 }, { "epoch": 0.74, - "learning_rate": 5.082774702494879e-05, - "loss": 0.416, + "learning_rate": 5.082760002735955e-05, + "loss": 0.3075, "step": 1242000 }, { "epoch": 0.74, - "learning_rate": 5.082564705938823e-05, - "loss": 0.4188, + "learning_rate": 5.0825500061798985e-05, + "loss": 0.307, "step": 1242500 }, { "epoch": 0.75, - "learning_rate": 5.082355129375878e-05, - "loss": 0.4249, + "learning_rate": 5.0823400096238425e-05, + "loss": 0.3096, "step": 1243000 }, { "epoch": 0.75, - "learning_rate": 5.082145132819822e-05, - "loss": 0.4148, + "learning_rate": 5.082130013067785e-05, + "loss": 0.3095, "step": 1243500 }, { "epoch": 0.75, - "learning_rate": 5.081935136263765e-05, - "loss": 0.4173, + "learning_rate": 5.081920016511729e-05, + "loss": 0.3122, "step": 1244000 }, { "epoch": 0.75, - "learning_rate": 5.0817255597008204e-05, - "loss": 0.4187, + "learning_rate": 5.081710439948785e-05, + "loss": 0.3042, "step": 1244500 }, { "epoch": 0.75, - "learning_rate": 5.0815155631447644e-05, - "loss": 0.4164, + "learning_rate": 5.0815004433927286e-05, + "loss": 0.309, "step": 1245000 }, { "epoch": 0.75, - "learning_rate": 5.081305566588708e-05, - "loss": 0.4082, + "learning_rate": 5.081290446836671e-05, + "loss": 0.3074, "step": 1245500 }, { "epoch": 0.75, - "learning_rate": 5.081095570032651e-05, - "loss": 0.4085, + "learning_rate": 5.081080450280615e-05, + "loss": 0.3036, "step": 1246000 }, { "epoch": 0.75, - "learning_rate": 5.080885573476595e-05, - "loss": 0.4102, + "learning_rate": 5.0808704537245586e-05, + "loss": 0.3067, "step": 1246500 }, { "epoch": 0.75, - "learning_rate": 5.0806755769205385e-05, - "loss": 0.418, + "learning_rate": 5.080660877161615e-05, + "loss": 0.3151, "step": 1247000 }, { "epoch": 0.75, - "learning_rate": 5.080465580364482e-05, - "loss": 0.4142, + "learning_rate": 5.080450880605558e-05, + "loss": 0.3089, "step": 1247500 }, { "epoch": 0.75, - "learning_rate": 5.080255583808426e-05, - "loss": 0.4135, + "learning_rate": 5.0802408840495014e-05, + "loss": 0.3111, "step": 1248000 }, { "epoch": 0.75, - "learning_rate": 5.080046427238593e-05, - "loss": 0.4023, + "learning_rate": 5.080030887493445e-05, + "loss": 0.305, "step": 1248500 }, { "epoch": 0.75, - "learning_rate": 5.0798364306825365e-05, - "loss": 0.4111, + "learning_rate": 5.079821310930501e-05, + "loss": 0.3057, "step": 1249000 }, { "epoch": 0.75, - "learning_rate": 5.07962643412648e-05, - "loss": 0.4068, + "learning_rate": 5.079611314374444e-05, + "loss": 0.3085, "step": 1249500 }, { "epoch": 0.75, - "learning_rate": 5.079416437570424e-05, - "loss": 0.4075, + "learning_rate": 5.0794017378115e-05, + "loss": 0.3051, "step": 1250000 }, { "epoch": 0.75, - "learning_rate": 5.079206441014367e-05, - "loss": 0.4117, + "learning_rate": 5.0791917412554435e-05, + "loss": 0.3058, "step": 1250500 }, { "epoch": 0.75, - "learning_rate": 5.0789964444583106e-05, - "loss": 0.4054, + "learning_rate": 5.078981744699387e-05, + "loss": 0.3027, "step": 1251000 }, { "epoch": 0.75, - "learning_rate": 5.0787864479022546e-05, - "loss": 0.4194, + "learning_rate": 5.078771748143331e-05, + "loss": 0.3122, "step": 1251500 }, { "epoch": 0.75, - "learning_rate": 5.078576451346198e-05, - "loss": 0.424, + "learning_rate": 5.078561751587274e-05, + "loss": 0.3189, "step": 1252000 }, { "epoch": 0.75, - "learning_rate": 5.078366874783253e-05, - "loss": 0.4109, + "learning_rate": 5.0783517550312175e-05, + "loss": 0.3067, "step": 1252500 }, { "epoch": 0.75, - "learning_rate": 5.078157298220309e-05, - "loss": 0.411, + "learning_rate": 5.078141758475161e-05, + "loss": 0.3064, "step": 1253000 }, { "epoch": 0.75, - "learning_rate": 5.077947301664253e-05, - "loss": 0.4137, + "learning_rate": 5.077931761919104e-05, + "loss": 0.3051, "step": 1253500 }, { "epoch": 0.75, - "learning_rate": 5.077737305108196e-05, - "loss": 0.4032, + "learning_rate": 5.0777217653630476e-05, + "loss": 0.3096, "step": 1254000 }, { "epoch": 0.75, - "learning_rate": 5.0775273085521394e-05, - "loss": 0.4101, + "learning_rate": 5.0775117688069916e-05, + "loss": 0.3073, "step": 1254500 }, { "epoch": 0.75, - "learning_rate": 5.0773173119960834e-05, - "loss": 0.4101, + "learning_rate": 5.0773021922440476e-05, + "loss": 0.3052, "step": 1255000 }, { "epoch": 0.75, - "learning_rate": 5.077108155426251e-05, - "loss": 0.4122, + "learning_rate": 5.07709219568799e-05, + "loss": 0.3092, "step": 1255500 }, { "epoch": 0.75, - "learning_rate": 5.076898158870195e-05, - "loss": 0.4179, + "learning_rate": 5.0768821991319336e-05, + "loss": 0.3083, "step": 1256000 }, { "epoch": 0.75, - "learning_rate": 5.076688162314138e-05, - "loss": 0.41, + "learning_rate": 5.0766722025758777e-05, + "loss": 0.3073, "step": 1256500 }, { "epoch": 0.75, - "learning_rate": 5.0764781657580815e-05, - "loss": 0.4026, + "learning_rate": 5.076462626012934e-05, + "loss": 0.3062, "step": 1257000 }, { "epoch": 0.75, - "learning_rate": 5.076268169202025e-05, - "loss": 0.4138, + "learning_rate": 5.0762526294568764e-05, + "loss": 0.3118, "step": 1257500 }, { "epoch": 0.75, - "learning_rate": 5.076058172645968e-05, - "loss": 0.4016, + "learning_rate": 5.0760426329008204e-05, + "loss": 0.3038, "step": 1258000 }, { "epoch": 0.75, - "learning_rate": 5.0758481760899116e-05, - "loss": 0.4108, + "learning_rate": 5.075832636344764e-05, + "loss": 0.3043, "step": 1258500 }, { "epoch": 0.75, - "learning_rate": 5.0756381795338556e-05, - "loss": 0.403, + "learning_rate": 5.075622639788707e-05, + "loss": 0.3058, "step": 1259000 }, { "epoch": 0.76, - "learning_rate": 5.075428182977799e-05, - "loss": 0.3953, + "learning_rate": 5.075412643232651e-05, + "loss": 0.3022, "step": 1259500 }, { "epoch": 0.76, - "learning_rate": 5.075218606414854e-05, - "loss": 0.4147, + "learning_rate": 5.0752026466765944e-05, + "loss": 0.3155, "step": 1260000 }, { "epoch": 0.76, - "learning_rate": 5.075008609858798e-05, - "loss": 0.4173, + "learning_rate": 5.074992650120538e-05, + "loss": 0.3085, "step": 1260500 }, { "epoch": 0.76, - "learning_rate": 5.0747986133027416e-05, - "loss": 0.4167, + "learning_rate": 5.074783073557593e-05, + "loss": 0.3102, "step": 1261000 }, { "epoch": 0.76, - "learning_rate": 5.074588616746685e-05, - "loss": 0.3962, + "learning_rate": 5.074573077001537e-05, + "loss": 0.2997, "step": 1261500 }, { "epoch": 0.76, - "learning_rate": 5.074379040183741e-05, - "loss": 0.4032, + "learning_rate": 5.0743630804454805e-05, + "loss": 0.3024, "step": 1262000 }, { "epoch": 0.76, - "learning_rate": 5.0741690436276844e-05, - "loss": 0.4167, + "learning_rate": 5.074153083889424e-05, + "loss": 0.3075, "step": 1262500 }, { "epoch": 0.76, - "learning_rate": 5.073959047071628e-05, - "loss": 0.4224, + "learning_rate": 5.073943087333368e-05, + "loss": 0.3153, "step": 1263000 }, { "epoch": 0.76, - "learning_rate": 5.073749050515571e-05, - "loss": 0.4127, + "learning_rate": 5.073733090777311e-05, + "loss": 0.3095, "step": 1263500 }, { "epoch": 0.76, - "learning_rate": 5.073539053959515e-05, - "loss": 0.4167, + "learning_rate": 5.0735230942212546e-05, + "loss": 0.3004, "step": 1264000 }, { "epoch": 0.76, - "learning_rate": 5.0733290574034584e-05, - "loss": 0.4101, + "learning_rate": 5.0733135176583106e-05, + "loss": 0.3087, "step": 1264500 }, { "epoch": 0.76, - "learning_rate": 5.073119060847402e-05, - "loss": 0.4053, + "learning_rate": 5.073103521102254e-05, + "loss": 0.302, "step": 1265000 }, { "epoch": 0.76, - "learning_rate": 5.072909064291346e-05, - "loss": 0.4107, + "learning_rate": 5.072893524546197e-05, + "loss": 0.3073, "step": 1265500 }, { "epoch": 0.76, - "learning_rate": 5.072699487728401e-05, - "loss": 0.4021, + "learning_rate": 5.072683527990141e-05, + "loss": 0.2994, "step": 1266000 }, { "epoch": 0.76, - "learning_rate": 5.072489911165457e-05, - "loss": 0.4078, + "learning_rate": 5.072473951427197e-05, + "loss": 0.3083, "step": 1266500 }, { "epoch": 0.76, - "learning_rate": 5.0722799146094e-05, - "loss": 0.412, + "learning_rate": 5.072264374864252e-05, + "loss": 0.3105, "step": 1267000 }, { "epoch": 0.76, - "learning_rate": 5.072069918053344e-05, - "loss": 0.4141, + "learning_rate": 5.0720543783081954e-05, + "loss": 0.3051, "step": 1267500 }, { "epoch": 0.76, - "learning_rate": 5.071859921497287e-05, - "loss": 0.4115, + "learning_rate": 5.071844381752139e-05, + "loss": 0.3062, "step": 1268000 }, { "epoch": 0.76, - "learning_rate": 5.0716499249412306e-05, - "loss": 0.4152, + "learning_rate": 5.071634805189195e-05, + "loss": 0.307, "step": 1268500 }, { "epoch": 0.76, - "learning_rate": 5.0714399283851746e-05, - "loss": 0.4168, + "learning_rate": 5.071424808633139e-05, + "loss": 0.3117, "step": 1269000 }, { "epoch": 0.76, - "learning_rate": 5.071229931829118e-05, - "loss": 0.4047, + "learning_rate": 5.0712148120770815e-05, + "loss": 0.3058, "step": 1269500 }, { "epoch": 0.76, - "learning_rate": 5.071019935273061e-05, - "loss": 0.4223, + "learning_rate": 5.071004815521025e-05, + "loss": 0.3137, "step": 1270000 }, { "epoch": 0.76, - "learning_rate": 5.0708103587101167e-05, - "loss": 0.3979, + "learning_rate": 5.070794818964969e-05, + "loss": 0.3005, "step": 1270500 }, { "epoch": 0.76, - "learning_rate": 5.070600362154061e-05, - "loss": 0.4087, + "learning_rate": 5.070584822408912e-05, + "loss": 0.3064, "step": 1271000 }, { "epoch": 0.76, - "learning_rate": 5.070390785591117e-05, - "loss": 0.3993, + "learning_rate": 5.070374825852856e-05, + "loss": 0.3022, "step": 1271500 }, { "epoch": 0.76, - "learning_rate": 5.070181209028172e-05, - "loss": 0.4067, + "learning_rate": 5.0701648292967995e-05, + "loss": 0.3023, "step": 1272000 }, { "epoch": 0.76, - "learning_rate": 5.0699712124721154e-05, - "loss": 0.4099, + "learning_rate": 5.069954832740743e-05, + "loss": 0.3125, "step": 1272500 }, { "epoch": 0.76, - "learning_rate": 5.069761215916059e-05, - "loss": 0.4029, + "learning_rate": 5.069744836184687e-05, + "loss": 0.3056, "step": 1273000 }, { "epoch": 0.76, - "learning_rate": 5.069551219360003e-05, - "loss": 0.4029, + "learning_rate": 5.06953483962863e-05, + "loss": 0.3044, "step": 1273500 }, { "epoch": 0.76, - "learning_rate": 5.069341222803946e-05, - "loss": 0.4077, + "learning_rate": 5.0693248430725736e-05, + "loss": 0.307, "step": 1274000 }, { "epoch": 0.76, - "learning_rate": 5.0691312262478895e-05, - "loss": 0.4085, + "learning_rate": 5.0691148465165176e-05, + "loss": 0.3079, "step": 1274500 }, { "epoch": 0.76, - "learning_rate": 5.068921229691833e-05, - "loss": 0.4064, + "learning_rate": 5.06890484996046e-05, + "loss": 0.3096, "step": 1275000 }, { "epoch": 0.76, - "learning_rate": 5.068711233135776e-05, - "loss": 0.4131, + "learning_rate": 5.0686948534044036e-05, + "loss": 0.3052, "step": 1275500 }, { "epoch": 0.77, - "learning_rate": 5.06850123657972e-05, - "loss": 0.4063, + "learning_rate": 5.06848527684146e-05, + "loss": 0.3088, "step": 1276000 }, { "epoch": 0.77, - "learning_rate": 5.0682916600167756e-05, - "loss": 0.403, + "learning_rate": 5.068275280285404e-05, + "loss": 0.3001, "step": 1276500 }, { "epoch": 0.77, - "learning_rate": 5.068081663460719e-05, - "loss": 0.4129, + "learning_rate": 5.068065283729347e-05, + "loss": 0.3082, "step": 1277000 }, { "epoch": 0.77, - "learning_rate": 5.067871666904662e-05, - "loss": 0.4199, + "learning_rate": 5.06785528717329e-05, + "loss": 0.3118, "step": 1277500 }, { "epoch": 0.77, - "learning_rate": 5.067661670348606e-05, - "loss": 0.4191, + "learning_rate": 5.067645290617234e-05, + "loss": 0.3077, "step": 1278000 }, { "epoch": 0.77, - "learning_rate": 5.0674516737925496e-05, - "loss": 0.4061, + "learning_rate": 5.067435294061177e-05, + "loss": 0.3084, "step": 1278500 }, { "epoch": 0.77, - "learning_rate": 5.067241677236493e-05, - "loss": 0.4061, + "learning_rate": 5.0672252975051204e-05, + "loss": 0.3013, "step": 1279000 }, { "epoch": 0.77, - "learning_rate": 5.067031680680437e-05, - "loss": 0.4067, + "learning_rate": 5.0670153009490645e-05, + "loss": 0.3038, "step": 1279500 }, { "epoch": 0.77, - "learning_rate": 5.066822104117492e-05, - "loss": 0.4134, + "learning_rate": 5.066805304393008e-05, + "loss": 0.3115, "step": 1280000 }, { "epoch": 0.77, - "learning_rate": 5.066612107561436e-05, - "loss": 0.4171, + "learning_rate": 5.066595727830063e-05, + "loss": 0.3071, "step": 1280500 }, { "epoch": 0.77, - "learning_rate": 5.06640211100538e-05, - "loss": 0.4077, + "learning_rate": 5.066385731274007e-05, + "loss": 0.3038, "step": 1281000 }, { "epoch": 0.77, - "learning_rate": 5.066192114449323e-05, - "loss": 0.414, + "learning_rate": 5.0661757347179505e-05, + "loss": 0.306, "step": 1281500 }, { "epoch": 0.77, - "learning_rate": 5.0659821178932664e-05, - "loss": 0.4095, + "learning_rate": 5.065965738161894e-05, + "loss": 0.3056, "step": 1282000 }, { "epoch": 0.77, - "learning_rate": 5.0657721213372104e-05, - "loss": 0.4032, + "learning_rate": 5.065756161598949e-05, + "loss": 0.3059, "step": 1282500 }, { "epoch": 0.77, - "learning_rate": 5.065562124781154e-05, - "loss": 0.4105, + "learning_rate": 5.065546585036005e-05, + "loss": 0.3022, "step": 1283000 }, { "epoch": 0.77, - "learning_rate": 5.065352128225097e-05, - "loss": 0.4066, + "learning_rate": 5.065336588479949e-05, + "loss": 0.3048, "step": 1283500 }, { "epoch": 0.77, - "learning_rate": 5.0651425516621525e-05, - "loss": 0.4177, + "learning_rate": 5.0651265919238926e-05, + "loss": 0.3103, "step": 1284000 }, { "epoch": 0.77, - "learning_rate": 5.064932975099208e-05, - "loss": 0.4106, + "learning_rate": 5.064916595367835e-05, + "loss": 0.3044, "step": 1284500 }, { "epoch": 0.77, - "learning_rate": 5.064722978543152e-05, - "loss": 0.413, + "learning_rate": 5.064706598811779e-05, + "loss": 0.3088, "step": 1285000 }, { "epoch": 0.77, - "learning_rate": 5.064512981987095e-05, - "loss": 0.406, + "learning_rate": 5.064496602255723e-05, + "loss": 0.3109, "step": 1285500 }, { "epoch": 0.77, - "learning_rate": 5.0643029854310385e-05, - "loss": 0.4005, + "learning_rate": 5.064287025692779e-05, + "loss": 0.3011, "step": 1286000 }, { "epoch": 0.77, - "learning_rate": 5.0640929888749826e-05, - "loss": 0.4172, + "learning_rate": 5.064077029136722e-05, + "loss": 0.31, "step": 1286500 }, { "epoch": 0.77, - "learning_rate": 5.063883412312038e-05, - "loss": 0.4044, + "learning_rate": 5.0638670325806654e-05, + "loss": 0.3041, "step": 1287000 }, { "epoch": 0.77, - "learning_rate": 5.063673415755981e-05, - "loss": 0.4057, + "learning_rate": 5.063657036024609e-05, + "loss": 0.3038, "step": 1287500 }, { "epoch": 0.77, - "learning_rate": 5.063463419199925e-05, - "loss": 0.4115, + "learning_rate": 5.063447039468553e-05, + "loss": 0.3021, "step": 1288000 }, { "epoch": 0.77, - "learning_rate": 5.0632534226438686e-05, - "loss": 0.3972, + "learning_rate": 5.063237042912496e-05, + "loss": 0.302, "step": 1288500 }, { "epoch": 0.77, - "learning_rate": 5.063043426087812e-05, - "loss": 0.4065, + "learning_rate": 5.0630270463564395e-05, + "loss": 0.3117, "step": 1289000 }, { "epoch": 0.77, - "learning_rate": 5.062833429531756e-05, - "loss": 0.4076, + "learning_rate": 5.0628170498003835e-05, + "loss": 0.305, "step": 1289500 }, { "epoch": 0.77, - "learning_rate": 5.0626234329756994e-05, - "loss": 0.407, + "learning_rate": 5.062607053244327e-05, + "loss": 0.3042, "step": 1290000 }, { "epoch": 0.77, - "learning_rate": 5.062413436419643e-05, - "loss": 0.4129, + "learning_rate": 5.06239705668827e-05, + "loss": 0.3092, "step": 1290500 }, { "epoch": 0.77, - "learning_rate": 5.06220427984981e-05, - "loss": 0.4074, + "learning_rate": 5.062187060132214e-05, + "loss": 0.3024, "step": 1291000 }, { "epoch": 0.77, - "learning_rate": 5.0619942832937534e-05, - "loss": 0.4036, + "learning_rate": 5.0619774835692696e-05, + "loss": 0.3139, "step": 1291500 }, { "epoch": 0.77, - "learning_rate": 5.0617842867376974e-05, - "loss": 0.4116, + "learning_rate": 5.061767487013213e-05, + "loss": 0.3102, "step": 1292000 }, { "epoch": 0.77, - "learning_rate": 5.061574290181641e-05, - "loss": 0.4154, + "learning_rate": 5.061557490457156e-05, + "loss": 0.31, "step": 1292500 }, { "epoch": 0.78, - "learning_rate": 5.061364713618697e-05, - "loss": 0.3998, + "learning_rate": 5.0613474939011e-05, + "loss": 0.3002, "step": 1293000 }, { "epoch": 0.78, - "learning_rate": 5.06115471706264e-05, - "loss": 0.4044, + "learning_rate": 5.0611374973450436e-05, + "loss": 0.3041, "step": 1293500 }, { "epoch": 0.78, - "learning_rate": 5.0609447205065835e-05, - "loss": 0.3992, + "learning_rate": 5.060927500788987e-05, + "loss": 0.3007, "step": 1294000 }, { "epoch": 0.78, - "learning_rate": 5.060734723950527e-05, - "loss": 0.401, + "learning_rate": 5.060717504232931e-05, + "loss": 0.3011, "step": 1294500 }, { "epoch": 0.78, - "learning_rate": 5.060524727394471e-05, - "loss": 0.4153, + "learning_rate": 5.0605079276699864e-05, + "loss": 0.306, "step": 1295000 }, { "epoch": 0.78, - "learning_rate": 5.060314730838414e-05, - "loss": 0.4019, + "learning_rate": 5.060298351107042e-05, + "loss": 0.3082, "step": 1295500 }, { "epoch": 0.78, - "learning_rate": 5.0601047342823576e-05, - "loss": 0.4131, + "learning_rate": 5.060088774544098e-05, + "loss": 0.3014, "step": 1296000 }, { "epoch": 0.78, - "learning_rate": 5.0598947377263016e-05, - "loss": 0.4051, + "learning_rate": 5.0598787779880404e-05, + "loss": 0.3029, "step": 1296500 }, { "epoch": 0.78, - "learning_rate": 5.059684741170245e-05, - "loss": 0.4008, + "learning_rate": 5.0596687814319844e-05, + "loss": 0.299, "step": 1297000 }, { "epoch": 0.78, - "learning_rate": 5.059474744614188e-05, - "loss": 0.4086, + "learning_rate": 5.059458784875928e-05, + "loss": 0.3022, "step": 1297500 }, { "epoch": 0.78, - "learning_rate": 5.059264748058132e-05, - "loss": 0.4184, + "learning_rate": 5.059248788319871e-05, + "loss": 0.3054, "step": 1298000 }, { "epoch": 0.78, - "learning_rate": 5.0590547515020757e-05, - "loss": 0.4122, + "learning_rate": 5.059038791763815e-05, + "loss": 0.3084, "step": 1298500 }, { "epoch": 0.78, - "learning_rate": 5.058845174939131e-05, - "loss": 0.4187, + "learning_rate": 5.0588287952077585e-05, + "loss": 0.3097, "step": 1299000 }, { "epoch": 0.78, - "learning_rate": 5.0586355983761864e-05, - "loss": 0.4066, + "learning_rate": 5.058618798651702e-05, + "loss": 0.3032, "step": 1299500 }, { "epoch": 0.78, - "learning_rate": 5.05842560182013e-05, - "loss": 0.4035, + "learning_rate": 5.058408802095646e-05, + "loss": 0.3023, "step": 1300000 }, { "epoch": 0.78, - "eval_loss": 0.3874254822731018, - "eval_runtime": 1121.4615, - "eval_samples_per_second": 469.673, - "eval_steps_per_second": 78.279, + "eval_loss": 0.2759383022785187, + "eval_runtime": 1465.2871, + "eval_samples_per_second": 359.465, + "eval_steps_per_second": 59.911, "step": 1300000 }, { "epoch": 0.78, - "learning_rate": 5.058215605264074e-05, - "loss": 0.407, + "learning_rate": 5.058199225532701e-05, + "loss": 0.3049, "step": 1300500 }, { "epoch": 0.78, - "learning_rate": 5.058005608708017e-05, - "loss": 0.4223, + "learning_rate": 5.0579892289766446e-05, + "loss": 0.31, "step": 1301000 }, { "epoch": 0.78, - "learning_rate": 5.0577960321450724e-05, - "loss": 0.4132, + "learning_rate": 5.057779232420588e-05, + "loss": 0.3104, "step": 1301500 }, { "epoch": 0.78, - "learning_rate": 5.0575860355890165e-05, - "loss": 0.4059, + "learning_rate": 5.057569235864532e-05, + "loss": 0.3053, "step": 1302000 }, { "epoch": 0.78, - "learning_rate": 5.05737603903296e-05, - "loss": 0.4021, + "learning_rate": 5.057359239308475e-05, + "loss": 0.3023, "step": 1302500 }, { "epoch": 0.78, - "learning_rate": 5.057166042476903e-05, - "loss": 0.4105, + "learning_rate": 5.0571492427524186e-05, + "loss": 0.3058, "step": 1303000 }, { "epoch": 0.78, - "learning_rate": 5.0569564659139585e-05, - "loss": 0.4061, + "learning_rate": 5.0569392461963627e-05, + "loss": 0.3112, "step": 1303500 }, { "epoch": 0.78, - "learning_rate": 5.0567464693579025e-05, - "loss": 0.4021, + "learning_rate": 5.056729249640306e-05, + "loss": 0.2983, "step": 1304000 }, { "epoch": 0.78, - "learning_rate": 5.056536472801846e-05, - "loss": 0.417, + "learning_rate": 5.0565192530842493e-05, + "loss": 0.3077, "step": 1304500 }, { "epoch": 0.78, - "learning_rate": 5.056326476245789e-05, - "loss": 0.402, + "learning_rate": 5.0563096765213054e-05, + "loss": 0.3005, "step": 1305000 }, { "epoch": 0.78, - "learning_rate": 5.056116479689733e-05, - "loss": 0.4079, + "learning_rate": 5.056099679965249e-05, + "loss": 0.3078, "step": 1305500 }, { "epoch": 0.78, - "learning_rate": 5.0559064831336766e-05, - "loss": 0.4027, + "learning_rate": 5.055889683409192e-05, + "loss": 0.3099, "step": 1306000 }, { "epoch": 0.78, - "learning_rate": 5.05569648657762e-05, - "loss": 0.4067, + "learning_rate": 5.0556801068462474e-05, + "loss": 0.2995, "step": 1306500 }, { "epoch": 0.78, - "learning_rate": 5.055486490021564e-05, - "loss": 0.4151, + "learning_rate": 5.0554701102901915e-05, + "loss": 0.3043, "step": 1307000 }, { "epoch": 0.78, - "learning_rate": 5.055276913458619e-05, - "loss": 0.409, + "learning_rate": 5.055260113734135e-05, + "loss": 0.3114, "step": 1307500 }, { "epoch": 0.78, - "learning_rate": 5.055066916902563e-05, - "loss": 0.4003, + "learning_rate": 5.055050117178078e-05, + "loss": 0.2994, "step": 1308000 }, { "epoch": 0.78, - "learning_rate": 5.054856920346506e-05, - "loss": 0.4037, + "learning_rate": 5.054840120622022e-05, + "loss": 0.3025, "step": 1308500 }, { "epoch": 0.78, - "learning_rate": 5.05464692379045e-05, - "loss": 0.4047, + "learning_rate": 5.054630124065965e-05, + "loss": 0.3044, "step": 1309000 }, { "epoch": 0.79, - "learning_rate": 5.0544373472275054e-05, - "loss": 0.4042, + "learning_rate": 5.054420547503021e-05, + "loss": 0.3037, "step": 1309500 }, { "epoch": 0.79, - "learning_rate": 5.054227350671449e-05, - "loss": 0.4024, + "learning_rate": 5.054210550946964e-05, + "loss": 0.2991, "step": 1310000 }, { "epoch": 0.79, - "learning_rate": 5.054017774108504e-05, - "loss": 0.4058, + "learning_rate": 5.054000554390908e-05, + "loss": 0.303, "step": 1310500 }, { "epoch": 0.79, - "learning_rate": 5.053807777552448e-05, - "loss": 0.4074, + "learning_rate": 5.0537909778279636e-05, + "loss": 0.3036, "step": 1311000 }, { "epoch": 0.79, - "learning_rate": 5.0535977809963915e-05, - "loss": 0.4049, + "learning_rate": 5.053580981271907e-05, + "loss": 0.3063, "step": 1311500 }, { "epoch": 0.79, - "learning_rate": 5.053387784440335e-05, - "loss": 0.4063, + "learning_rate": 5.053370984715851e-05, + "loss": 0.3034, "step": 1312000 }, { "epoch": 0.79, - "learning_rate": 5.05317820787739e-05, - "loss": 0.399, + "learning_rate": 5.053160988159794e-05, + "loss": 0.2998, "step": 1312500 }, { "epoch": 0.79, - "learning_rate": 5.052968211321334e-05, - "loss": 0.4023, + "learning_rate": 5.0529509916037377e-05, + "loss": 0.3008, "step": 1313000 }, { "epoch": 0.79, - "learning_rate": 5.05275863475839e-05, - "loss": 0.4061, + "learning_rate": 5.052740995047682e-05, + "loss": 0.307, "step": 1313500 }, { "epoch": 0.79, - "learning_rate": 5.0525486382023336e-05, - "loss": 0.4068, + "learning_rate": 5.0525309984916244e-05, + "loss": 0.304, "step": 1314000 }, { "epoch": 0.79, - "learning_rate": 5.052338641646277e-05, - "loss": 0.4107, + "learning_rate": 5.052321001935568e-05, + "loss": 0.3031, "step": 1314500 }, { "epoch": 0.79, - "learning_rate": 5.05212864509022e-05, - "loss": 0.4019, + "learning_rate": 5.052111005379512e-05, + "loss": 0.2981, "step": 1315000 }, { "epoch": 0.79, - "learning_rate": 5.0519186485341636e-05, - "loss": 0.4154, + "learning_rate": 5.051901428816568e-05, + "loss": 0.3046, "step": 1315500 }, { "epoch": 0.79, - "learning_rate": 5.0517086519781076e-05, - "loss": 0.4081, + "learning_rate": 5.051691432260511e-05, + "loss": 0.3044, "step": 1316000 }, { "epoch": 0.79, - "learning_rate": 5.051498655422051e-05, - "loss": 0.4075, + "learning_rate": 5.0514818556975665e-05, + "loss": 0.3109, "step": 1316500 }, { "epoch": 0.79, - "learning_rate": 5.051288658865994e-05, - "loss": 0.4083, + "learning_rate": 5.0512718591415105e-05, + "loss": 0.3001, "step": 1317000 }, { "epoch": 0.79, - "learning_rate": 5.05107908230305e-05, - "loss": 0.4058, + "learning_rate": 5.051061862585454e-05, + "loss": 0.3049, "step": 1317500 }, { "epoch": 0.79, - "learning_rate": 5.050869085746994e-05, - "loss": 0.4032, + "learning_rate": 5.050851866029397e-05, + "loss": 0.3087, "step": 1318000 }, { "epoch": 0.79, - "learning_rate": 5.050659089190937e-05, - "loss": 0.3966, + "learning_rate": 5.0506418694733405e-05, + "loss": 0.2983, "step": 1318500 }, { "epoch": 0.79, - "learning_rate": 5.0504490926348804e-05, - "loss": 0.4087, + "learning_rate": 5.050431872917284e-05, + "loss": 0.3086, "step": 1319000 }, { "epoch": 0.79, - "learning_rate": 5.0502390960788244e-05, - "loss": 0.4063, + "learning_rate": 5.050221876361227e-05, + "loss": 0.3097, "step": 1319500 }, { "epoch": 0.79, - "learning_rate": 5.050029099522768e-05, - "loss": 0.3991, + "learning_rate": 5.050011879805171e-05, + "loss": 0.3005, "step": 1320000 }, { "epoch": 0.79, - "learning_rate": 5.049819102966711e-05, - "loss": 0.4113, + "learning_rate": 5.0498018832491146e-05, + "loss": 0.3075, "step": 1320500 }, { "epoch": 0.79, - "learning_rate": 5.049609106410655e-05, - "loss": 0.3931, + "learning_rate": 5.049591886693058e-05, + "loss": 0.303, "step": 1321000 }, { "epoch": 0.79, - "learning_rate": 5.0493991098545985e-05, - "loss": 0.4023, + "learning_rate": 5.049381890137002e-05, + "loss": 0.2986, "step": 1321500 }, { "epoch": 0.79, - "learning_rate": 5.049189533291654e-05, - "loss": 0.4125, + "learning_rate": 5.049171893580945e-05, + "loss": 0.3038, "step": 1322000 }, { "epoch": 0.79, - "learning_rate": 5.048979956728709e-05, - "loss": 0.4033, + "learning_rate": 5.0489618970248886e-05, + "loss": 0.3017, "step": 1322500 }, { "epoch": 0.79, - "learning_rate": 5.048769960172653e-05, - "loss": 0.4134, + "learning_rate": 5.048751900468833e-05, + "loss": 0.3043, "step": 1323000 }, { "epoch": 0.79, - "learning_rate": 5.048560383609709e-05, - "loss": 0.4148, + "learning_rate": 5.048541903912776e-05, + "loss": 0.306, "step": 1323500 }, { "epoch": 0.79, - "learning_rate": 5.0483503870536526e-05, - "loss": 0.4131, + "learning_rate": 5.0483323273498314e-05, + "loss": 0.3093, "step": 1324000 }, { "epoch": 0.79, - "learning_rate": 5.048140390497595e-05, - "loss": 0.398, + "learning_rate": 5.048122330793775e-05, + "loss": 0.3047, "step": 1324500 }, { "epoch": 0.79, - "learning_rate": 5.047930393941539e-05, - "loss": 0.4058, + "learning_rate": 5.047912334237719e-05, + "loss": 0.3066, "step": 1325000 }, { "epoch": 0.79, - "learning_rate": 5.0477203973854827e-05, - "loss": 0.4169, + "learning_rate": 5.047702337681662e-05, + "loss": 0.3104, "step": 1325500 }, { "epoch": 0.79, - "learning_rate": 5.047510820822539e-05, - "loss": 0.4119, + "learning_rate": 5.0474927611187174e-05, + "loss": 0.3046, "step": 1326000 }, { "epoch": 0.8, - "learning_rate": 5.0473008242664814e-05, - "loss": 0.3973, + "learning_rate": 5.0472827645626615e-05, + "loss": 0.2993, "step": 1326500 }, { "epoch": 0.8, - "learning_rate": 5.0470908277104254e-05, - "loss": 0.4126, + "learning_rate": 5.047072768006605e-05, + "loss": 0.3079, "step": 1327000 }, { "epoch": 0.8, - "learning_rate": 5.046880831154369e-05, - "loss": 0.4157, + "learning_rate": 5.046862771450548e-05, + "loss": 0.3145, "step": 1327500 }, { "epoch": 0.8, - "learning_rate": 5.046670834598312e-05, - "loss": 0.4021, + "learning_rate": 5.046652774894492e-05, + "loss": 0.3098, "step": 1328000 }, { "epoch": 0.8, - "learning_rate": 5.046460838042256e-05, - "loss": 0.4005, + "learning_rate": 5.0464431983315475e-05, + "loss": 0.3057, "step": 1328500 }, { "epoch": 0.8, - "learning_rate": 5.0462512614793115e-05, - "loss": 0.4148, + "learning_rate": 5.046233201775491e-05, + "loss": 0.309, "step": 1329000 }, { "epoch": 0.8, - "learning_rate": 5.046041264923255e-05, - "loss": 0.4081, + "learning_rate": 5.046023205219434e-05, + "loss": 0.3114, "step": 1329500 }, { "epoch": 0.8, - "learning_rate": 5.045831268367199e-05, - "loss": 0.4098, + "learning_rate": 5.045813208663378e-05, + "loss": 0.3034, "step": 1330000 }, { "epoch": 0.8, - "learning_rate": 5.045621271811142e-05, - "loss": 0.4137, + "learning_rate": 5.0456032121073216e-05, + "loss": 0.3077, "step": 1330500 }, { "epoch": 0.8, - "learning_rate": 5.0454112752550855e-05, - "loss": 0.4131, + "learning_rate": 5.045393215551265e-05, + "loss": 0.3058, "step": 1331000 }, { "epoch": 0.8, - "learning_rate": 5.0452012786990295e-05, - "loss": 0.4041, + "learning_rate": 5.045183218995208e-05, + "loss": 0.304, "step": 1331500 }, { "epoch": 0.8, - "learning_rate": 5.044991282142973e-05, - "loss": 0.3975, + "learning_rate": 5.0449732224391516e-05, + "loss": 0.3049, "step": 1332000 }, { "epoch": 0.8, - "learning_rate": 5.044781285586916e-05, - "loss": 0.4074, + "learning_rate": 5.044763645876208e-05, + "loss": 0.3054, "step": 1332500 }, { "epoch": 0.8, - "learning_rate": 5.04457128903086e-05, - "loss": 0.4013, + "learning_rate": 5.044553649320151e-05, + "loss": 0.3048, "step": 1333000 }, { "epoch": 0.8, - "learning_rate": 5.0443612924748036e-05, - "loss": 0.4122, + "learning_rate": 5.044343652764095e-05, + "loss": 0.3052, "step": 1333500 }, { "epoch": 0.8, - "learning_rate": 5.044151715911859e-05, - "loss": 0.4143, + "learning_rate": 5.044133656208038e-05, + "loss": 0.3097, "step": 1334000 }, { "epoch": 0.8, - "learning_rate": 5.043941719355802e-05, - "loss": 0.4051, + "learning_rate": 5.043924079645094e-05, + "loss": 0.3062, "step": 1334500 }, { "epoch": 0.8, - "learning_rate": 5.043731722799746e-05, - "loss": 0.413, + "learning_rate": 5.043714083089038e-05, + "loss": 0.3103, "step": 1335000 }, { "epoch": 0.8, - "learning_rate": 5.04352172624369e-05, - "loss": 0.4021, + "learning_rate": 5.043504086532981e-05, + "loss": 0.3032, "step": 1335500 }, { "epoch": 0.8, - "learning_rate": 5.043311729687633e-05, - "loss": 0.4134, + "learning_rate": 5.043294089976924e-05, + "loss": 0.311, "step": 1336000 }, { "epoch": 0.8, - "learning_rate": 5.043101733131577e-05, - "loss": 0.4122, + "learning_rate": 5.043084093420868e-05, + "loss": 0.305, "step": 1336500 }, { "epoch": 0.8, - "learning_rate": 5.0428921565686324e-05, - "loss": 0.4096, + "learning_rate": 5.042874516857924e-05, + "loss": 0.3084, "step": 1337000 }, { "epoch": 0.8, - "learning_rate": 5.042682160012576e-05, - "loss": 0.4011, + "learning_rate": 5.042664520301867e-05, + "loss": 0.2982, "step": 1337500 }, { "epoch": 0.8, - "learning_rate": 5.042472163456519e-05, - "loss": 0.4122, + "learning_rate": 5.0424545237458105e-05, + "loss": 0.3082, "step": 1338000 }, { "epoch": 0.8, - "learning_rate": 5.042262166900463e-05, - "loss": 0.404, + "learning_rate": 5.042244527189754e-05, + "loss": 0.303, "step": 1338500 }, { "epoch": 0.8, - "learning_rate": 5.0420521703444065e-05, - "loss": 0.3982, + "learning_rate": 5.04203495062681e-05, + "loss": 0.3045, "step": 1339000 }, { "epoch": 0.8, - "learning_rate": 5.04184217378835e-05, - "loss": 0.3985, + "learning_rate": 5.041824954070753e-05, + "loss": 0.2937, "step": 1339500 }, { "epoch": 0.8, - "learning_rate": 5.041632177232293e-05, - "loss": 0.4128, + "learning_rate": 5.0416153775078086e-05, + "loss": 0.3098, "step": 1340000 }, { "epoch": 0.8, - "learning_rate": 5.0414221806762365e-05, - "loss": 0.4028, + "learning_rate": 5.0414053809517526e-05, + "loss": 0.3043, "step": 1340500 }, { "epoch": 0.8, - "learning_rate": 5.0412130241064045e-05, - "loss": 0.4053, + "learning_rate": 5.041195384395696e-05, + "loss": 0.3033, "step": 1341000 }, { "epoch": 0.8, - "learning_rate": 5.041003027550348e-05, - "loss": 0.3961, + "learning_rate": 5.040985387839639e-05, + "loss": 0.3006, "step": 1341500 }, { "epoch": 0.8, - "learning_rate": 5.040793030994292e-05, - "loss": 0.4104, + "learning_rate": 5.0407753912835834e-05, + "loss": 0.309, "step": 1342000 }, { "epoch": 0.8, - "learning_rate": 5.040583034438235e-05, - "loss": 0.4004, + "learning_rate": 5.040565394727527e-05, + "loss": 0.3073, "step": 1342500 }, { "epoch": 0.81, - "learning_rate": 5.0403734578752906e-05, - "loss": 0.4171, + "learning_rate": 5.04035539817147e-05, + "loss": 0.307, "step": 1343000 }, { "epoch": 0.81, - "learning_rate": 5.040163461319234e-05, - "loss": 0.4146, + "learning_rate": 5.0401454016154134e-05, + "loss": 0.3077, "step": 1343500 }, { "epoch": 0.81, - "learning_rate": 5.039953464763178e-05, - "loss": 0.3999, + "learning_rate": 5.039935405059357e-05, + "loss": 0.3004, "step": 1344000 }, { "epoch": 0.81, - "learning_rate": 5.039743468207121e-05, - "loss": 0.4086, + "learning_rate": 5.0397254085033e-05, + "loss": 0.3079, "step": 1344500 }, { "epoch": 0.81, - "learning_rate": 5.039533471651065e-05, - "loss": 0.3986, + "learning_rate": 5.039515831940356e-05, + "loss": 0.2992, "step": 1345000 }, { "epoch": 0.81, - "learning_rate": 5.039323475095009e-05, - "loss": 0.4019, + "learning_rate": 5.0393058353842995e-05, + "loss": 0.302, "step": 1345500 }, { "epoch": 0.81, - "learning_rate": 5.039113478538952e-05, - "loss": 0.4184, + "learning_rate": 5.0390962588213555e-05, + "loss": 0.3052, "step": 1346000 }, { "epoch": 0.81, - "learning_rate": 5.0389034819828954e-05, - "loss": 0.4062, + "learning_rate": 5.038886262265299e-05, + "loss": 0.3105, "step": 1346500 }, { "epoch": 0.81, - "learning_rate": 5.038693485426839e-05, - "loss": 0.4172, + "learning_rate": 5.038676265709242e-05, + "loss": 0.306, "step": 1347000 }, { "epoch": 0.81, - "learning_rate": 5.038483488870782e-05, - "loss": 0.4052, + "learning_rate": 5.038466269153186e-05, + "loss": 0.3016, "step": 1347500 }, { "epoch": 0.81, - "learning_rate": 5.038273912307838e-05, - "loss": 0.4044, + "learning_rate": 5.038256272597129e-05, + "loss": 0.307, "step": 1348000 }, { "epoch": 0.81, - "learning_rate": 5.038063915751782e-05, - "loss": 0.4091, + "learning_rate": 5.038046696034185e-05, + "loss": 0.3078, "step": 1348500 }, { "epoch": 0.81, - "learning_rate": 5.037853919195725e-05, - "loss": 0.4006, + "learning_rate": 5.037836699478129e-05, + "loss": 0.3048, "step": 1349000 }, { "epoch": 0.81, - "learning_rate": 5.037643922639668e-05, - "loss": 0.4108, + "learning_rate": 5.037626702922072e-05, + "loss": 0.3055, "step": 1349500 }, { "epoch": 0.81, - "learning_rate": 5.037433926083612e-05, - "loss": 0.4125, + "learning_rate": 5.0374167063660156e-05, + "loss": 0.3076, "step": 1350000 }, { "epoch": 0.81, - "learning_rate": 5.0372239295275555e-05, - "loss": 0.4072, + "learning_rate": 5.037206709809959e-05, + "loss": 0.3041, "step": 1350500 }, { "epoch": 0.81, - "learning_rate": 5.037013932971499e-05, - "loss": 0.4081, + "learning_rate": 5.036996713253902e-05, + "loss": 0.3071, "step": 1351000 }, { "epoch": 0.81, - "learning_rate": 5.036804356408554e-05, - "loss": 0.4048, + "learning_rate": 5.036786716697846e-05, + "loss": 0.3024, "step": 1351500 }, { "epoch": 0.81, - "learning_rate": 5.036594359852498e-05, - "loss": 0.4077, + "learning_rate": 5.03657672014179e-05, + "loss": 0.2996, "step": 1352000 }, { "epoch": 0.81, - "learning_rate": 5.0363843632964416e-05, - "loss": 0.4053, + "learning_rate": 5.036366723585733e-05, + "loss": 0.3012, "step": 1352500 }, { "epoch": 0.81, - "learning_rate": 5.0361747867334976e-05, - "loss": 0.4095, + "learning_rate": 5.0361571470227884e-05, + "loss": 0.3062, "step": 1353000 }, { "epoch": 0.81, - "learning_rate": 5.035964790177441e-05, - "loss": 0.4097, + "learning_rate": 5.035947150466732e-05, + "loss": 0.3078, "step": 1353500 }, { "epoch": 0.81, - "learning_rate": 5.035754793621384e-05, - "loss": 0.3952, + "learning_rate": 5.035737153910676e-05, + "loss": 0.2967, "step": 1354000 }, { "epoch": 0.81, - "learning_rate": 5.035544797065328e-05, - "loss": 0.4074, + "learning_rate": 5.035527577347732e-05, + "loss": 0.3052, "step": 1354500 }, { "epoch": 0.81, - "learning_rate": 5.035334800509272e-05, - "loss": 0.3991, + "learning_rate": 5.0353175807916745e-05, + "loss": 0.2992, "step": 1355000 }, { "epoch": 0.81, - "learning_rate": 5.035125223946328e-05, - "loss": 0.4139, + "learning_rate": 5.0351080042287305e-05, + "loss": 0.3068, "step": 1355500 }, { "epoch": 0.81, - "learning_rate": 5.0349152273902704e-05, - "loss": 0.4092, + "learning_rate": 5.0348980076726745e-05, + "loss": 0.3027, "step": 1356000 }, { "epoch": 0.81, - "learning_rate": 5.034705230834214e-05, - "loss": 0.4067, + "learning_rate": 5.034688011116618e-05, + "loss": 0.3026, "step": 1356500 }, { "epoch": 0.81, - "learning_rate": 5.034495234278158e-05, - "loss": 0.4006, + "learning_rate": 5.034478014560561e-05, + "loss": 0.3034, "step": 1357000 }, { "epoch": 0.81, - "learning_rate": 5.034285237722101e-05, - "loss": 0.397, + "learning_rate": 5.0342680180045046e-05, + "loss": 0.2998, "step": 1357500 }, { "epoch": 0.81, - "learning_rate": 5.0340752411660445e-05, - "loss": 0.4086, + "learning_rate": 5.034058021448448e-05, + "loss": 0.3109, "step": 1358000 }, { "epoch": 0.81, - "learning_rate": 5.0338652446099885e-05, - "loss": 0.4042, + "learning_rate": 5.033848024892391e-05, + "loss": 0.3031, "step": 1358500 }, { "epoch": 0.81, - "learning_rate": 5.033655248053932e-05, - "loss": 0.3964, + "learning_rate": 5.033638028336335e-05, + "loss": 0.2982, "step": 1359000 }, { "epoch": 0.82, - "learning_rate": 5.033445251497875e-05, - "loss": 0.4081, + "learning_rate": 5.0334280317802786e-05, + "loss": 0.3013, "step": 1359500 }, { "epoch": 0.82, - "learning_rate": 5.033235674934931e-05, - "loss": 0.4124, + "learning_rate": 5.033218455217334e-05, + "loss": 0.3116, "step": 1360000 }, { "epoch": 0.82, - "learning_rate": 5.0330256783788746e-05, - "loss": 0.411, + "learning_rate": 5.033008458661277e-05, + "loss": 0.3096, "step": 1360500 }, { "epoch": 0.82, - "learning_rate": 5.032815681822818e-05, - "loss": 0.3958, + "learning_rate": 5.0327984621052214e-05, + "loss": 0.299, "step": 1361000 }, { "epoch": 0.82, - "learning_rate": 5.032605685266762e-05, - "loss": 0.4021, + "learning_rate": 5.032588465549165e-05, + "loss": 0.3054, "step": 1361500 }, { "epoch": 0.82, - "learning_rate": 5.032395688710705e-05, - "loss": 0.4041, + "learning_rate": 5.032378468993108e-05, + "loss": 0.3015, "step": 1362000 }, { "epoch": 0.82, - "learning_rate": 5.0321861121477606e-05, - "loss": 0.4099, + "learning_rate": 5.032168472437052e-05, + "loss": 0.3066, "step": 1362500 }, { "epoch": 0.82, - "learning_rate": 5.031976115591704e-05, - "loss": 0.4012, + "learning_rate": 5.0319584758809954e-05, + "loss": 0.3028, "step": 1363000 }, { "epoch": 0.82, - "learning_rate": 5.031766119035648e-05, - "loss": 0.4072, + "learning_rate": 5.0317484793249394e-05, + "loss": 0.306, "step": 1363500 }, { "epoch": 0.82, - "learning_rate": 5.0315561224795913e-05, - "loss": 0.4098, + "learning_rate": 5.031538902761995e-05, + "loss": 0.3102, "step": 1364000 }, { "epoch": 0.82, - "learning_rate": 5.031346125923535e-05, - "loss": 0.4088, + "learning_rate": 5.031328906205938e-05, + "loss": 0.3093, "step": 1364500 }, { "epoch": 0.82, - "learning_rate": 5.03113654936059e-05, - "loss": 0.395, + "learning_rate": 5.0311189096498815e-05, + "loss": 0.303, "step": 1365000 }, { "epoch": 0.82, - "learning_rate": 5.030926552804534e-05, - "loss": 0.4086, + "learning_rate": 5.0309089130938255e-05, + "loss": 0.3084, "step": 1365500 }, { "epoch": 0.82, - "learning_rate": 5.0307165562484774e-05, - "loss": 0.4002, + "learning_rate": 5.030698916537769e-05, + "loss": 0.2974, "step": 1366000 }, { "epoch": 0.82, - "learning_rate": 5.030506559692421e-05, - "loss": 0.4028, + "learning_rate": 5.030489339974824e-05, + "loss": 0.3004, "step": 1366500 }, { "epoch": 0.82, - "learning_rate": 5.030296563136365e-05, - "loss": 0.4034, + "learning_rate": 5.0302793434187676e-05, + "loss": 0.3035, "step": 1367000 }, { "epoch": 0.82, - "learning_rate": 5.030086566580308e-05, - "loss": 0.4054, + "learning_rate": 5.0300693468627116e-05, + "loss": 0.3005, "step": 1367500 }, { "epoch": 0.82, - "learning_rate": 5.0298765700242515e-05, - "loss": 0.4111, + "learning_rate": 5.029859350306655e-05, + "loss": 0.3008, "step": 1368000 }, { "epoch": 0.82, - "learning_rate": 5.0296669934613075e-05, - "loss": 0.4076, + "learning_rate": 5.02964977374371e-05, + "loss": 0.3037, "step": 1368500 }, { "epoch": 0.82, - "learning_rate": 5.029456996905251e-05, - "loss": 0.4053, + "learning_rate": 5.0294397771876536e-05, + "loss": 0.3061, "step": 1369000 }, { "epoch": 0.82, - "learning_rate": 5.029247000349194e-05, - "loss": 0.4098, + "learning_rate": 5.029229780631598e-05, + "loss": 0.3054, "step": 1369500 }, { "epoch": 0.82, - "learning_rate": 5.029037003793138e-05, - "loss": 0.4013, + "learning_rate": 5.029019784075541e-05, + "loss": 0.3011, "step": 1370000 }, { "epoch": 0.82, - "learning_rate": 5.0288270072370816e-05, - "loss": 0.4029, + "learning_rate": 5.028809787519485e-05, + "loss": 0.3031, "step": 1370500 }, { "epoch": 0.82, - "learning_rate": 5.028617010681024e-05, - "loss": 0.41, + "learning_rate": 5.0286002109565404e-05, + "loss": 0.3025, "step": 1371000 }, { "epoch": 0.82, - "learning_rate": 5.028407014124968e-05, - "loss": 0.3986, + "learning_rate": 5.0283906343935964e-05, + "loss": 0.2983, "step": 1371500 }, { "epoch": 0.82, - "learning_rate": 5.028197437562024e-05, - "loss": 0.4092, + "learning_rate": 5.028180637837539e-05, + "loss": 0.3057, "step": 1372000 }, { "epoch": 0.82, - "learning_rate": 5.0279874410059676e-05, - "loss": 0.4187, + "learning_rate": 5.0279706412814824e-05, + "loss": 0.309, "step": 1372500 }, { "epoch": 0.82, - "learning_rate": 5.027777444449911e-05, - "loss": 0.4107, + "learning_rate": 5.0277606447254265e-05, + "loss": 0.3126, "step": 1373000 }, { "epoch": 0.82, - "learning_rate": 5.0275678678869664e-05, - "loss": 0.416, + "learning_rate": 5.02755064816937e-05, + "loss": 0.3077, "step": 1373500 }, { "epoch": 0.82, - "learning_rate": 5.0273578713309104e-05, - "loss": 0.3992, + "learning_rate": 5.027341071606426e-05, + "loss": 0.2997, "step": 1374000 }, { "epoch": 0.82, - "learning_rate": 5.027147874774854e-05, - "loss": 0.4124, + "learning_rate": 5.0271310750503685e-05, + "loss": 0.3055, "step": 1374500 }, { "epoch": 0.82, - "learning_rate": 5.026937878218797e-05, - "loss": 0.4115, + "learning_rate": 5.0269214984874245e-05, + "loss": 0.3148, "step": 1375000 }, { "epoch": 0.82, - "learning_rate": 5.026727881662741e-05, - "loss": 0.3955, + "learning_rate": 5.0267115019313686e-05, + "loss": 0.3027, "step": 1375500 }, { "epoch": 0.82, - "learning_rate": 5.026517885106684e-05, - "loss": 0.4033, + "learning_rate": 5.026501505375312e-05, + "loss": 0.3017, "step": 1376000 }, { "epoch": 0.83, - "learning_rate": 5.026307888550628e-05, - "loss": 0.4079, + "learning_rate": 5.026291508819255e-05, + "loss": 0.3066, "step": 1376500 }, { "epoch": 0.83, - "learning_rate": 5.026097891994571e-05, - "loss": 0.4036, + "learning_rate": 5.0260815122631986e-05, + "loss": 0.2999, "step": 1377000 }, { "epoch": 0.83, - "learning_rate": 5.0258878954385145e-05, - "loss": 0.3996, + "learning_rate": 5.025871515707142e-05, + "loss": 0.3067, "step": 1377500 }, { "epoch": 0.83, - "learning_rate": 5.0256778988824585e-05, - "loss": 0.4056, + "learning_rate": 5.025661519151086e-05, + "loss": 0.305, "step": 1378000 }, { "epoch": 0.83, - "learning_rate": 5.025468322319514e-05, - "loss": 0.3982, + "learning_rate": 5.025451522595029e-05, + "loss": 0.295, "step": 1378500 }, { "epoch": 0.83, - "learning_rate": 5.025258325763457e-05, - "loss": 0.4081, + "learning_rate": 5.025241526038973e-05, + "loss": 0.3043, "step": 1379000 }, { "epoch": 0.83, - "learning_rate": 5.0250483292074005e-05, - "loss": 0.3993, + "learning_rate": 5.025031949476028e-05, + "loss": 0.2964, "step": 1379500 }, { "epoch": 0.83, - "learning_rate": 5.0248383326513446e-05, - "loss": 0.4044, + "learning_rate": 5.024821952919972e-05, + "loss": 0.3034, "step": 1380000 }, { "epoch": 0.83, - "learning_rate": 5.024628336095288e-05, - "loss": 0.3992, + "learning_rate": 5.024612376357028e-05, + "loss": 0.3033, "step": 1380500 }, { "epoch": 0.83, - "learning_rate": 5.024418339539231e-05, - "loss": 0.4105, + "learning_rate": 5.0244023798009714e-05, + "loss": 0.3096, "step": 1381000 }, { "epoch": 0.83, - "learning_rate": 5.0242087629762866e-05, - "loss": 0.4042, + "learning_rate": 5.024192383244914e-05, + "loss": 0.3023, "step": 1381500 }, { "epoch": 0.83, - "learning_rate": 5.0239987664202306e-05, - "loss": 0.4127, + "learning_rate": 5.023982386688858e-05, + "loss": 0.3099, "step": 1382000 }, { "epoch": 0.83, - "learning_rate": 5.023788769864174e-05, - "loss": 0.4054, + "learning_rate": 5.0237723901328015e-05, + "loss": 0.3063, "step": 1382500 }, { "epoch": 0.83, - "learning_rate": 5.023578773308117e-05, - "loss": 0.409, + "learning_rate": 5.0235623935767455e-05, + "loss": 0.3067, "step": 1383000 }, { "epoch": 0.83, - "learning_rate": 5.0233687767520614e-05, - "loss": 0.4057, + "learning_rate": 5.023352397020689e-05, + "loss": 0.3127, "step": 1383500 }, { "epoch": 0.83, - "learning_rate": 5.023159200189117e-05, - "loss": 0.4165, + "learning_rate": 5.023142400464632e-05, + "loss": 0.3057, "step": 1384000 }, { "epoch": 0.83, - "learning_rate": 5.02294920363306e-05, - "loss": 0.4007, + "learning_rate": 5.022932403908576e-05, + "loss": 0.3044, "step": 1384500 }, { "epoch": 0.83, - "learning_rate": 5.022739207077004e-05, - "loss": 0.4075, + "learning_rate": 5.0227224073525196e-05, + "loss": 0.3054, "step": 1385000 }, { "epoch": 0.83, - "learning_rate": 5.0225292105209474e-05, - "loss": 0.3994, + "learning_rate": 5.022512830789575e-05, + "loss": 0.3058, "step": 1385500 }, { "epoch": 0.83, - "learning_rate": 5.022319213964891e-05, - "loss": 0.4091, + "learning_rate": 5.022302834233518e-05, + "loss": 0.301, "step": 1386000 }, { "epoch": 0.83, - "learning_rate": 5.022109217408835e-05, - "loss": 0.4039, + "learning_rate": 5.022092837677462e-05, + "loss": 0.3075, "step": 1386500 }, { "epoch": 0.83, - "learning_rate": 5.021899220852778e-05, - "loss": 0.4052, + "learning_rate": 5.0218828411214056e-05, + "loss": 0.3, "step": 1387000 }, { "epoch": 0.83, - "learning_rate": 5.0216892242967215e-05, - "loss": 0.4039, + "learning_rate": 5.021672844565349e-05, + "loss": 0.3023, "step": 1387500 }, { "epoch": 0.83, - "learning_rate": 5.021480067726889e-05, - "loss": 0.4095, + "learning_rate": 5.021462848009293e-05, + "loss": 0.306, "step": 1388000 }, { "epoch": 0.83, - "learning_rate": 5.021270071170832e-05, - "loss": 0.3968, + "learning_rate": 5.0212528514532363e-05, + "loss": 0.3022, "step": 1388500 }, { "epoch": 0.83, - "learning_rate": 5.021060074614776e-05, - "loss": 0.4012, + "learning_rate": 5.02104285489718e-05, + "loss": 0.3045, "step": 1389000 }, { "epoch": 0.83, - "learning_rate": 5.0208500780587196e-05, - "loss": 0.3925, + "learning_rate": 5.020833278334235e-05, + "loss": 0.3002, "step": 1389500 }, { "epoch": 0.83, - "learning_rate": 5.020640501495775e-05, - "loss": 0.4009, + "learning_rate": 5.020623281778179e-05, + "loss": 0.2982, "step": 1390000 }, { "epoch": 0.83, - "learning_rate": 5.020430924932831e-05, - "loss": 0.3926, + "learning_rate": 5.0204132852221224e-05, + "loss": 0.301, "step": 1390500 }, { "epoch": 0.83, - "learning_rate": 5.020220928376775e-05, - "loss": 0.4065, + "learning_rate": 5.020203288666066e-05, + "loss": 0.3046, "step": 1391000 }, { "epoch": 0.83, - "learning_rate": 5.0200109318207183e-05, - "loss": 0.4038, + "learning_rate": 5.019994132096233e-05, + "loss": 0.3048, "step": 1391500 }, { "epoch": 0.83, - "learning_rate": 5.019800935264662e-05, - "loss": 0.3961, + "learning_rate": 5.019784135540177e-05, + "loss": 0.3056, "step": 1392000 }, { "epoch": 0.83, - "learning_rate": 5.019590938708605e-05, - "loss": 0.4038, + "learning_rate": 5.0195741389841205e-05, + "loss": 0.2999, "step": 1392500 }, { "epoch": 0.84, - "learning_rate": 5.0193809421525484e-05, - "loss": 0.4106, + "learning_rate": 5.019364142428064e-05, + "loss": 0.3064, "step": 1393000 }, { "epoch": 0.84, - "learning_rate": 5.019170945596492e-05, - "loss": 0.416, + "learning_rate": 5.019154145872008e-05, + "loss": 0.3112, "step": 1393500 }, { "epoch": 0.84, - "learning_rate": 5.018960949040436e-05, - "loss": 0.4141, + "learning_rate": 5.018944569309063e-05, + "loss": 0.3055, "step": 1394000 }, { "epoch": 0.84, - "learning_rate": 5.018750952484379e-05, - "loss": 0.3983, + "learning_rate": 5.0187345727530066e-05, + "loss": 0.2989, "step": 1394500 }, { "epoch": 0.84, - "learning_rate": 5.0185409559283224e-05, - "loss": 0.4127, + "learning_rate": 5.01852457619695e-05, + "loss": 0.3059, "step": 1395000 }, { "epoch": 0.84, - "learning_rate": 5.0183309593722665e-05, - "loss": 0.4036, + "learning_rate": 5.018314579640894e-05, + "loss": 0.3059, "step": 1395500 }, { "epoch": 0.84, - "learning_rate": 5.018121382809322e-05, - "loss": 0.3991, + "learning_rate": 5.018105003077949e-05, + "loss": 0.2952, "step": 1396000 }, { "epoch": 0.84, - "learning_rate": 5.017911386253265e-05, - "loss": 0.401, + "learning_rate": 5.0178950065218926e-05, + "loss": 0.3012, "step": 1396500 }, { "epoch": 0.84, - "learning_rate": 5.0177013896972085e-05, - "loss": 0.4076, + "learning_rate": 5.017685429958949e-05, + "loss": 0.3031, "step": 1397000 }, { "epoch": 0.84, - "learning_rate": 5.0174913931411525e-05, - "loss": 0.4047, + "learning_rate": 5.017475853396004e-05, + "loss": 0.3037, "step": 1397500 }, { "epoch": 0.84, - "learning_rate": 5.017281396585096e-05, - "loss": 0.3983, + "learning_rate": 5.017265856839948e-05, + "loss": 0.2945, "step": 1398000 }, { "epoch": 0.84, - "learning_rate": 5.017071820022151e-05, - "loss": 0.3931, + "learning_rate": 5.0170558602838914e-05, + "loss": 0.2971, "step": 1398500 }, { "epoch": 0.84, - "learning_rate": 5.016861823466095e-05, - "loss": 0.3963, + "learning_rate": 5.016845863727835e-05, + "loss": 0.2977, "step": 1399000 }, { "epoch": 0.84, - "learning_rate": 5.0166518269100386e-05, - "loss": 0.4083, + "learning_rate": 5.016635867171779e-05, + "loss": 0.2943, "step": 1399500 }, { "epoch": 0.84, - "learning_rate": 5.016441830353982e-05, - "loss": 0.4021, + "learning_rate": 5.016426290608834e-05, + "loss": 0.3009, "step": 1400000 }, { "epoch": 0.84, - "eval_loss": 0.3852459192276001, - "eval_runtime": 1130.5133, - "eval_samples_per_second": 465.912, - "eval_steps_per_second": 77.652, + "eval_loss": 0.27523356676101685, + "eval_runtime": 1475.9951, + "eval_samples_per_second": 356.858, + "eval_steps_per_second": 59.476, "step": 1400000 }, { "epoch": 0.84, - "learning_rate": 5.016231833797926e-05, - "loss": 0.4059, + "learning_rate": 5.0162162940527775e-05, + "loss": 0.3036, "step": 1400500 }, { "epoch": 0.84, - "learning_rate": 5.016021837241869e-05, - "loss": 0.41, + "learning_rate": 5.016006297496721e-05, + "loss": 0.3034, "step": 1401000 }, { "epoch": 0.84, - "learning_rate": 5.015811840685813e-05, - "loss": 0.3999, + "learning_rate": 5.015796300940665e-05, + "loss": 0.3032, "step": 1401500 }, { "epoch": 0.84, - "learning_rate": 5.015602264122868e-05, - "loss": 0.3988, + "learning_rate": 5.015586304384608e-05, + "loss": 0.3007, "step": 1402000 }, { "epoch": 0.84, - "learning_rate": 5.015392267566812e-05, - "loss": 0.3889, + "learning_rate": 5.0153763078285515e-05, + "loss": 0.2955, "step": 1402500 }, { "epoch": 0.84, - "learning_rate": 5.0151822710107554e-05, - "loss": 0.4051, + "learning_rate": 5.015166311272495e-05, + "loss": 0.299, "step": 1403000 }, { "epoch": 0.84, - "learning_rate": 5.014972274454699e-05, - "loss": 0.3999, + "learning_rate": 5.014956314716438e-05, + "loss": 0.3004, "step": 1403500 }, { "epoch": 0.84, - "learning_rate": 5.014762277898643e-05, - "loss": 0.4024, + "learning_rate": 5.014746318160382e-05, + "loss": 0.2987, "step": 1404000 }, { "epoch": 0.84, - "learning_rate": 5.014552701335698e-05, - "loss": 0.3961, + "learning_rate": 5.014536741597438e-05, + "loss": 0.3003, "step": 1404500 }, { "epoch": 0.84, - "learning_rate": 5.0143427047796415e-05, - "loss": 0.4119, + "learning_rate": 5.0143271650344936e-05, + "loss": 0.31, "step": 1405000 }, { "epoch": 0.84, - "learning_rate": 5.0141327082235855e-05, - "loss": 0.4088, + "learning_rate": 5.014117168478437e-05, + "loss": 0.3024, "step": 1405500 }, { "epoch": 0.84, - "learning_rate": 5.013922711667529e-05, - "loss": 0.4121, + "learning_rate": 5.0139071719223803e-05, + "loss": 0.3055, "step": 1406000 }, { "epoch": 0.84, - "learning_rate": 5.013712715111472e-05, - "loss": 0.4036, + "learning_rate": 5.0136971753663244e-05, + "loss": 0.3036, "step": 1406500 }, { "epoch": 0.84, - "learning_rate": 5.013502718555416e-05, - "loss": 0.4122, + "learning_rate": 5.013487178810268e-05, + "loss": 0.3069, "step": 1407000 }, { "epoch": 0.84, - "learning_rate": 5.013292721999359e-05, - "loss": 0.412, + "learning_rate": 5.0132771822542104e-05, + "loss": 0.3099, "step": 1407500 }, { "epoch": 0.84, - "learning_rate": 5.013082725443302e-05, - "loss": 0.4032, + "learning_rate": 5.0130671856981544e-05, + "loss": 0.3007, "step": 1408000 }, { "epoch": 0.84, - "learning_rate": 5.01287356887347e-05, - "loss": 0.4128, + "learning_rate": 5.012857189142098e-05, + "loss": 0.3031, "step": 1408500 }, { "epoch": 0.84, - "learning_rate": 5.0126635723174136e-05, - "loss": 0.403, + "learning_rate": 5.012647192586041e-05, + "loss": 0.3013, "step": 1409000 }, { "epoch": 0.85, - "learning_rate": 5.0124535757613576e-05, - "loss": 0.4054, + "learning_rate": 5.012437196029985e-05, + "loss": 0.298, "step": 1409500 }, { "epoch": 0.85, - "learning_rate": 5.012243579205301e-05, - "loss": 0.4034, + "learning_rate": 5.0122271994739285e-05, + "loss": 0.3012, "step": 1410000 }, { "epoch": 0.85, - "learning_rate": 5.012033582649244e-05, - "loss": 0.4079, + "learning_rate": 5.012017202917872e-05, + "loss": 0.3077, "step": 1410500 }, { "epoch": 0.85, - "learning_rate": 5.0118235860931884e-05, - "loss": 0.4091, + "learning_rate": 5.011807206361816e-05, + "loss": 0.3046, "step": 1411000 }, { "epoch": 0.85, - "learning_rate": 5.011613589537132e-05, - "loss": 0.4042, + "learning_rate": 5.011597209805759e-05, + "loss": 0.3038, "step": 1411500 }, { "epoch": 0.85, - "learning_rate": 5.011403592981075e-05, - "loss": 0.4029, + "learning_rate": 5.0113872132497025e-05, + "loss": 0.3036, "step": 1412000 }, { "epoch": 0.85, - "learning_rate": 5.011194016418131e-05, - "loss": 0.401, + "learning_rate": 5.0111772166936465e-05, + "loss": 0.3045, "step": 1412500 }, { "epoch": 0.85, - "learning_rate": 5.0109840198620744e-05, - "loss": 0.3962, + "learning_rate": 5.010967640130702e-05, + "loss": 0.3045, "step": 1413000 }, { "epoch": 0.85, - "learning_rate": 5.010774023306018e-05, - "loss": 0.4038, + "learning_rate": 5.010757643574645e-05, + "loss": 0.3011, "step": 1413500 }, { "epoch": 0.85, - "learning_rate": 5.010564026749962e-05, - "loss": 0.403, + "learning_rate": 5.010547647018589e-05, + "loss": 0.3038, "step": 1414000 }, { "epoch": 0.85, - "learning_rate": 5.010354450187017e-05, - "loss": 0.4061, + "learning_rate": 5.0103380704556446e-05, + "loss": 0.3098, "step": 1414500 }, { "epoch": 0.85, - "learning_rate": 5.0101444536309605e-05, - "loss": 0.3967, + "learning_rate": 5.010128073899588e-05, + "loss": 0.2999, "step": 1415000 }, { "epoch": 0.85, - "learning_rate": 5.009934457074904e-05, - "loss": 0.4102, + "learning_rate": 5.009918077343531e-05, + "loss": 0.3049, "step": 1415500 }, { "epoch": 0.85, - "learning_rate": 5.009724460518848e-05, - "loss": 0.4101, + "learning_rate": 5.0097080807874753e-05, + "loss": 0.2986, "step": 1416000 }, { "epoch": 0.85, - "learning_rate": 5.009514463962791e-05, - "loss": 0.4026, + "learning_rate": 5.009498084231419e-05, + "loss": 0.302, "step": 1416500 }, { "epoch": 0.85, - "learning_rate": 5.009304467406734e-05, - "loss": 0.4097, + "learning_rate": 5.009288087675362e-05, + "loss": 0.3072, "step": 1417000 }, { "epoch": 0.85, - "learning_rate": 5.009094470850678e-05, - "loss": 0.4048, + "learning_rate": 5.009078091119306e-05, + "loss": 0.3013, "step": 1417500 }, { "epoch": 0.85, - "learning_rate": 5.008884894287734e-05, - "loss": 0.4115, + "learning_rate": 5.008868094563249e-05, + "loss": 0.3026, "step": 1418000 }, { "epoch": 0.85, - "learning_rate": 5.008674897731677e-05, - "loss": 0.397, + "learning_rate": 5.008658098007192e-05, + "loss": 0.2991, "step": 1418500 }, { "epoch": 0.85, - "learning_rate": 5.0084649011756206e-05, - "loss": 0.4046, + "learning_rate": 5.008448521444248e-05, + "loss": 0.3057, "step": 1419000 }, { "epoch": 0.85, - "learning_rate": 5.008254904619564e-05, - "loss": 0.3951, + "learning_rate": 5.008238524888192e-05, + "loss": 0.3054, "step": 1419500 }, { "epoch": 0.85, - "learning_rate": 5.008044908063507e-05, - "loss": 0.3988, + "learning_rate": 5.0080285283321355e-05, + "loss": 0.3013, "step": 1420000 }, { "epoch": 0.85, - "learning_rate": 5.0078349115074513e-05, - "loss": 0.4105, + "learning_rate": 5.007818531776079e-05, + "loss": 0.299, "step": 1420500 }, { "epoch": 0.85, - "learning_rate": 5.007624914951395e-05, - "loss": 0.414, + "learning_rate": 5.007608535220022e-05, + "loss": 0.3045, "step": 1421000 }, { "epoch": 0.85, - "learning_rate": 5.007415338388451e-05, - "loss": 0.4008, + "learning_rate": 5.007398958657078e-05, + "loss": 0.3032, "step": 1421500 }, { "epoch": 0.85, - "learning_rate": 5.0072053418323934e-05, - "loss": 0.4069, + "learning_rate": 5.0071889621010216e-05, + "loss": 0.3066, "step": 1422000 }, { "epoch": 0.85, - "learning_rate": 5.0069953452763374e-05, - "loss": 0.4062, + "learning_rate": 5.006978965544965e-05, + "loss": 0.2975, "step": 1422500 }, { "epoch": 0.85, - "learning_rate": 5.006785348720281e-05, - "loss": 0.3949, + "learning_rate": 5.006769388982021e-05, + "loss": 0.3058, "step": 1423000 }, { "epoch": 0.85, - "learning_rate": 5.006575352164224e-05, - "loss": 0.4017, + "learning_rate": 5.006559392425964e-05, + "loss": 0.3032, "step": 1423500 }, { "epoch": 0.85, - "learning_rate": 5.00636577560128e-05, - "loss": 0.4014, + "learning_rate": 5.0063493958699076e-05, + "loss": 0.3023, "step": 1424000 }, { "epoch": 0.85, - "learning_rate": 5.0061557790452235e-05, - "loss": 0.402, + "learning_rate": 5.0061393993138517e-05, + "loss": 0.3036, "step": 1424500 }, { "epoch": 0.85, - "learning_rate": 5.005945782489167e-05, - "loss": 0.4058, + "learning_rate": 5.005929402757794e-05, + "loss": 0.3081, "step": 1425000 }, { "epoch": 0.85, - "learning_rate": 5.00573578593311e-05, - "loss": 0.3918, + "learning_rate": 5.005719406201738e-05, + "loss": 0.2976, "step": 1425500 }, { "epoch": 0.85, - "learning_rate": 5.005525789377054e-05, - "loss": 0.4159, + "learning_rate": 5.005509409645682e-05, + "loss": 0.3035, "step": 1426000 }, { "epoch": 0.86, - "learning_rate": 5.0053162128141096e-05, - "loss": 0.3947, + "learning_rate": 5.005299413089625e-05, + "loss": 0.3037, "step": 1426500 }, { "epoch": 0.86, - "learning_rate": 5.005106216258053e-05, - "loss": 0.4117, + "learning_rate": 5.0050894165335684e-05, + "loss": 0.3042, "step": 1427000 }, { "epoch": 0.86, - "learning_rate": 5.004896219701997e-05, - "loss": 0.3985, + "learning_rate": 5.0048798399706244e-05, + "loss": 0.3017, "step": 1427500 }, { "epoch": 0.86, - "learning_rate": 5.00468622314594e-05, - "loss": 0.41, + "learning_rate": 5.004669843414568e-05, + "loss": 0.3074, "step": 1428000 }, { "epoch": 0.86, - "learning_rate": 5.0044762265898836e-05, - "loss": 0.3975, + "learning_rate": 5.004459846858511e-05, + "loss": 0.3005, "step": 1428500 }, { "epoch": 0.86, - "learning_rate": 5.0042662300338277e-05, - "loss": 0.4008, + "learning_rate": 5.004249850302455e-05, + "loss": 0.3021, "step": 1429000 }, { "epoch": 0.86, - "learning_rate": 5.004056233477771e-05, - "loss": 0.4091, + "learning_rate": 5.004040273739511e-05, + "loss": 0.3073, "step": 1429500 }, { "epoch": 0.86, - "learning_rate": 5.0038466569148264e-05, - "loss": 0.3979, + "learning_rate": 5.003830277183454e-05, + "loss": 0.3014, "step": 1430000 }, { "epoch": 0.86, - "learning_rate": 5.00363666035877e-05, - "loss": 0.3948, + "learning_rate": 5.003620280627397e-05, + "loss": 0.2955, "step": 1430500 }, { "epoch": 0.86, - "learning_rate": 5.003426663802714e-05, - "loss": 0.4046, + "learning_rate": 5.003410284071341e-05, + "loss": 0.3037, "step": 1431000 }, { "epoch": 0.86, - "learning_rate": 5.003216667246657e-05, - "loss": 0.3988, + "learning_rate": 5.0032002875152845e-05, + "loss": 0.3043, "step": 1431500 }, { "epoch": 0.86, - "learning_rate": 5.0030066706906004e-05, - "loss": 0.3981, + "learning_rate": 5.0029907109523406e-05, + "loss": 0.3015, "step": 1432000 }, { "epoch": 0.86, - "learning_rate": 5.0027966741345444e-05, - "loss": 0.4074, + "learning_rate": 5.002780714396283e-05, + "loss": 0.311, "step": 1432500 }, { "epoch": 0.86, - "learning_rate": 5.002586677578488e-05, - "loss": 0.4058, + "learning_rate": 5.002570717840227e-05, + "loss": 0.3029, "step": 1433000 }, { "epoch": 0.86, - "learning_rate": 5.002376681022431e-05, - "loss": 0.402, + "learning_rate": 5.0023607212841706e-05, + "loss": 0.3036, "step": 1433500 }, { "epoch": 0.86, - "learning_rate": 5.0021671044594865e-05, - "loss": 0.3948, + "learning_rate": 5.0021511447212267e-05, + "loss": 0.2964, "step": 1434000 }, { "epoch": 0.86, - "learning_rate": 5.0019575278965425e-05, - "loss": 0.4009, + "learning_rate": 5.00194114816517e-05, + "loss": 0.2977, "step": 1434500 }, { "epoch": 0.86, - "learning_rate": 5.001747531340486e-05, - "loss": 0.3991, + "learning_rate": 5.0017311516091133e-05, + "loss": 0.2998, "step": 1435000 }, { "epoch": 0.86, - "learning_rate": 5.001537534784429e-05, - "loss": 0.4049, + "learning_rate": 5.001521155053057e-05, + "loss": 0.3067, "step": 1435500 }, { "epoch": 0.86, - "learning_rate": 5.001327538228373e-05, - "loss": 0.4014, + "learning_rate": 5.001311578490113e-05, + "loss": 0.3025, "step": 1436000 }, { "epoch": 0.86, - "learning_rate": 5.0011175416723166e-05, - "loss": 0.4044, + "learning_rate": 5.001101581934057e-05, + "loss": 0.3046, "step": 1436500 }, { "epoch": 0.86, - "learning_rate": 5.000907965109372e-05, - "loss": 0.4175, + "learning_rate": 5.0008915853779994e-05, + "loss": 0.3071, "step": 1437000 }, { "epoch": 0.86, - "learning_rate": 5.000697968553315e-05, - "loss": 0.403, + "learning_rate": 5.000681588821943e-05, + "loss": 0.2998, "step": 1437500 }, { "epoch": 0.86, - "learning_rate": 5.000487971997259e-05, - "loss": 0.4026, + "learning_rate": 5.000471592265887e-05, + "loss": 0.2985, "step": 1438000 }, { "epoch": 0.86, - "learning_rate": 5.0002779754412027e-05, - "loss": 0.4022, + "learning_rate": 5.00026159570983e-05, + "loss": 0.3029, "step": 1438500 }, { "epoch": 0.86, - "learning_rate": 5.000067978885146e-05, - "loss": 0.4008, + "learning_rate": 5.0000515991537735e-05, + "loss": 0.3, "step": 1439000 }, { "epoch": 0.86, - "learning_rate": 4.9998584023222014e-05, - "loss": 0.3995, + "learning_rate": 4.999842022590829e-05, + "loss": 0.3051, "step": 1439500 }, { "epoch": 0.86, - "learning_rate": 4.9996484057661454e-05, - "loss": 0.3989, + "learning_rate": 4.999632026034773e-05, + "loss": 0.3009, "step": 1440000 }, { "epoch": 0.86, - "learning_rate": 4.999438409210089e-05, - "loss": 0.4022, + "learning_rate": 4.999422029478716e-05, + "loss": 0.307, "step": 1440500 }, { "epoch": 0.86, - "learning_rate": 4.999228412654032e-05, - "loss": 0.3912, + "learning_rate": 4.999212452915772e-05, + "loss": 0.2938, "step": 1441000 }, { "epoch": 0.86, - "learning_rate": 4.999018416097976e-05, - "loss": 0.4076, + "learning_rate": 4.999002456359716e-05, + "loss": 0.3019, "step": 1441500 }, { "epoch": 0.86, - "learning_rate": 4.9988088395350315e-05, - "loss": 0.4023, + "learning_rate": 4.998792459803659e-05, + "loss": 0.3044, "step": 1442000 }, { "epoch": 0.86, - "learning_rate": 4.998598842978975e-05, - "loss": 0.4136, + "learning_rate": 4.998582463247602e-05, + "loss": 0.3072, "step": 1442500 }, { "epoch": 0.87, - "learning_rate": 4.998388846422919e-05, - "loss": 0.4029, + "learning_rate": 4.998372466691546e-05, + "loss": 0.3023, "step": 1443000 }, { "epoch": 0.87, - "learning_rate": 4.998178849866862e-05, - "loss": 0.4037, + "learning_rate": 4.9981624701354897e-05, + "loss": 0.3017, "step": 1443500 }, { "epoch": 0.87, - "learning_rate": 4.9979688533108055e-05, - "loss": 0.3998, + "learning_rate": 4.997952473579433e-05, + "loss": 0.3041, "step": 1444000 }, { "epoch": 0.87, - "learning_rate": 4.997759276747861e-05, - "loss": 0.4144, + "learning_rate": 4.9977428970164884e-05, + "loss": 0.3117, "step": 1444500 }, { "epoch": 0.87, - "learning_rate": 4.997549280191805e-05, - "loss": 0.3977, + "learning_rate": 4.9975329004604324e-05, + "loss": 0.2993, "step": 1445000 }, { "epoch": 0.87, - "learning_rate": 4.997339283635748e-05, - "loss": 0.4052, + "learning_rate": 4.997322903904376e-05, + "loss": 0.3017, "step": 1445500 }, { "epoch": 0.87, - "learning_rate": 4.9971292870796916e-05, - "loss": 0.4064, + "learning_rate": 4.997112907348319e-05, + "loss": 0.302, "step": 1446000 }, { "epoch": 0.87, - "learning_rate": 4.996919710516747e-05, - "loss": 0.401, + "learning_rate": 4.996902910792263e-05, + "loss": 0.3038, "step": 1446500 }, { "epoch": 0.87, - "learning_rate": 4.996709713960691e-05, - "loss": 0.3947, + "learning_rate": 4.9966929142362064e-05, + "loss": 0.2988, "step": 1447000 }, { "epoch": 0.87, - "learning_rate": 4.996499717404634e-05, - "loss": 0.4047, + "learning_rate": 4.99648291768015e-05, + "loss": 0.3063, "step": 1447500 }, { "epoch": 0.87, - "learning_rate": 4.996289720848578e-05, - "loss": 0.4045, + "learning_rate": 4.996272921124094e-05, + "loss": 0.3035, "step": 1448000 }, { "epoch": 0.87, - "learning_rate": 4.996079724292522e-05, - "loss": 0.432, + "learning_rate": 4.996062924568037e-05, + "loss": 0.3099, "step": 1448500 }, { "epoch": 0.87, - "learning_rate": 4.995869727736465e-05, - "loss": 0.3994, + "learning_rate": 4.9958529280119805e-05, + "loss": 0.3012, "step": 1449000 }, { "epoch": 0.87, - "learning_rate": 4.9956597311804084e-05, - "loss": 0.4011, + "learning_rate": 4.995642931455924e-05, + "loss": 0.3, "step": 1449500 }, { "epoch": 0.87, - "learning_rate": 4.9954497346243524e-05, - "loss": 0.3977, + "learning_rate": 4.995432934899867e-05, + "loss": 0.3, "step": 1450000 }, { "epoch": 0.87, - "learning_rate": 4.995240158061408e-05, - "loss": 0.3981, + "learning_rate": 4.995222938343811e-05, + "loss": 0.2999, "step": 1450500 }, { "epoch": 0.87, - "learning_rate": 4.995030161505351e-05, - "loss": 0.4046, + "learning_rate": 4.995013361780867e-05, + "loss": 0.3069, "step": 1451000 }, { "epoch": 0.87, - "learning_rate": 4.9948205849424065e-05, - "loss": 0.4028, + "learning_rate": 4.9948037852179226e-05, + "loss": 0.3033, "step": 1451500 }, { "epoch": 0.87, - "learning_rate": 4.9946105883863505e-05, - "loss": 0.3943, + "learning_rate": 4.994593788661866e-05, + "loss": 0.2922, "step": 1452000 }, { "epoch": 0.87, - "learning_rate": 4.994400591830294e-05, - "loss": 0.4071, + "learning_rate": 4.994383792105809e-05, + "loss": 0.3014, "step": 1452500 }, { "epoch": 0.87, - "learning_rate": 4.994190595274237e-05, - "loss": 0.398, + "learning_rate": 4.994173795549753e-05, + "loss": 0.2987, "step": 1453000 }, { "epoch": 0.87, - "learning_rate": 4.993980598718181e-05, - "loss": 0.4154, + "learning_rate": 4.993964218986809e-05, + "loss": 0.3102, "step": 1453500 }, { "epoch": 0.87, - "learning_rate": 4.9937710221552366e-05, - "loss": 0.4076, + "learning_rate": 4.993754222430752e-05, + "loss": 0.3072, "step": 1454000 }, { "epoch": 0.87, - "learning_rate": 4.99356102559918e-05, - "loss": 0.3971, + "learning_rate": 4.9935442258746954e-05, + "loss": 0.2953, "step": 1454500 }, { "epoch": 0.87, - "learning_rate": 4.993351029043123e-05, - "loss": 0.4051, + "learning_rate": 4.9933342293186394e-05, + "loss": 0.3036, "step": 1455000 }, { "epoch": 0.87, - "learning_rate": 4.993141032487067e-05, - "loss": 0.418, + "learning_rate": 4.993124232762583e-05, + "loss": 0.3079, "step": 1455500 }, { "epoch": 0.87, - "learning_rate": 4.9929310359310106e-05, - "loss": 0.4116, + "learning_rate": 4.992914236206526e-05, + "loss": 0.3103, "step": 1456000 }, { "epoch": 0.87, - "learning_rate": 4.992721039374954e-05, - "loss": 0.3935, + "learning_rate": 4.99270423965047e-05, + "loss": 0.297, "step": 1456500 }, { "epoch": 0.87, - "learning_rate": 4.99251146281201e-05, - "loss": 0.4023, + "learning_rate": 4.992494243094413e-05, + "loss": 0.3062, "step": 1457000 }, { "epoch": 0.87, - "learning_rate": 4.9923014662559533e-05, - "loss": 0.4001, + "learning_rate": 4.992284246538357e-05, + "loss": 0.2995, "step": 1457500 }, { "epoch": 0.87, - "learning_rate": 4.992091469699897e-05, - "loss": 0.3998, + "learning_rate": 4.9920742499823e-05, + "loss": 0.2985, "step": 1458000 }, { "epoch": 0.87, - "learning_rate": 4.991881473143841e-05, - "loss": 0.4061, + "learning_rate": 4.9918642534262435e-05, + "loss": 0.3071, "step": 1458500 }, { "epoch": 0.87, - "learning_rate": 4.991671476587784e-05, - "loss": 0.3977, + "learning_rate": 4.991654676863299e-05, + "loss": 0.3013, "step": 1459000 }, { "epoch": 0.88, - "learning_rate": 4.9914619000248394e-05, - "loss": 0.3896, + "learning_rate": 4.991444680307243e-05, + "loss": 0.2939, "step": 1459500 }, { "epoch": 0.88, - "learning_rate": 4.991251903468783e-05, - "loss": 0.4001, + "learning_rate": 4.991234683751186e-05, + "loss": 0.3017, "step": 1460000 }, { "epoch": 0.88, - "learning_rate": 4.991041906912727e-05, - "loss": 0.4089, + "learning_rate": 4.9910246871951296e-05, + "loss": 0.3048, "step": 1460500 }, { "epoch": 0.88, - "learning_rate": 4.99083191035667e-05, - "loss": 0.4105, + "learning_rate": 4.9908146906390736e-05, + "loss": 0.3051, "step": 1461000 }, { "epoch": 0.88, - "learning_rate": 4.9906223337937255e-05, - "loss": 0.4008, + "learning_rate": 4.990604694083017e-05, + "loss": 0.2996, "step": 1461500 }, { "epoch": 0.88, - "learning_rate": 4.990412337237669e-05, - "loss": 0.4138, + "learning_rate": 4.990395117520072e-05, + "loss": 0.3063, "step": 1462000 }, { "epoch": 0.88, - "learning_rate": 4.990202340681613e-05, - "loss": 0.4091, + "learning_rate": 4.9901851209640156e-05, + "loss": 0.3084, "step": 1462500 }, { "epoch": 0.88, - "learning_rate": 4.989992344125556e-05, - "loss": 0.4117, + "learning_rate": 4.98997512440796e-05, + "loss": 0.2981, "step": 1463000 }, { "epoch": 0.88, - "learning_rate": 4.9897823475694996e-05, - "loss": 0.4029, + "learning_rate": 4.989765127851903e-05, + "loss": 0.3074, "step": 1463500 }, { "epoch": 0.88, - "learning_rate": 4.9895723510134436e-05, - "loss": 0.4021, + "learning_rate": 4.9895551312958464e-05, + "loss": 0.296, "step": 1464000 }, { "epoch": 0.88, - "learning_rate": 4.989362354457387e-05, - "loss": 0.4018, + "learning_rate": 4.9893455547329024e-05, + "loss": 0.3052, "step": 1464500 }, { "epoch": 0.88, - "learning_rate": 4.989152357901331e-05, - "loss": 0.4049, + "learning_rate": 4.9891359781699584e-05, + "loss": 0.3043, "step": 1465000 }, { "epoch": 0.88, - "learning_rate": 4.988942781338386e-05, - "loss": 0.3994, + "learning_rate": 4.988926401607014e-05, + "loss": 0.3008, "step": 1465500 }, { "epoch": 0.88, - "learning_rate": 4.988733204775442e-05, - "loss": 0.4045, + "learning_rate": 4.988716405050957e-05, + "loss": 0.3047, "step": 1466000 }, { "epoch": 0.88, - "learning_rate": 4.988523208219385e-05, - "loss": 0.3953, + "learning_rate": 4.9885064084949005e-05, + "loss": 0.2979, "step": 1466500 }, { "epoch": 0.88, - "learning_rate": 4.9883132116633284e-05, - "loss": 0.3998, + "learning_rate": 4.9882964119388445e-05, + "loss": 0.3006, "step": 1467000 }, { "epoch": 0.88, - "learning_rate": 4.9881032151072724e-05, - "loss": 0.3889, + "learning_rate": 4.988086415382788e-05, + "loss": 0.2926, "step": 1467500 }, { "epoch": 0.88, - "learning_rate": 4.987893218551216e-05, - "loss": 0.4007, + "learning_rate": 4.987876418826731e-05, + "loss": 0.3028, "step": 1468000 }, { "epoch": 0.88, - "learning_rate": 4.987683221995159e-05, - "loss": 0.399, + "learning_rate": 4.987666422270675e-05, + "loss": 0.3054, "step": 1468500 }, { "epoch": 0.88, - "learning_rate": 4.987473225439103e-05, - "loss": 0.4028, + "learning_rate": 4.987456425714618e-05, + "loss": 0.3064, "step": 1469000 }, { "epoch": 0.88, - "learning_rate": 4.9872632288830464e-05, - "loss": 0.4031, + "learning_rate": 4.987246429158561e-05, + "loss": 0.3043, "step": 1469500 }, { "epoch": 0.88, - "learning_rate": 4.987053652320102e-05, - "loss": 0.3927, + "learning_rate": 4.987036432602505e-05, + "loss": 0.3, "step": 1470000 }, { "epoch": 0.88, - "learning_rate": 4.986843655764046e-05, - "loss": 0.3958, + "learning_rate": 4.986827276032673e-05, + "loss": 0.3031, "step": 1470500 }, { "epoch": 0.88, - "learning_rate": 4.986633659207989e-05, - "loss": 0.4044, + "learning_rate": 4.9866172794766166e-05, + "loss": 0.303, "step": 1471000 }, { "epoch": 0.88, - "learning_rate": 4.9864236626519325e-05, - "loss": 0.3999, + "learning_rate": 4.98640728292056e-05, + "loss": 0.3001, "step": 1471500 }, { "epoch": 0.88, - "learning_rate": 4.986214086088988e-05, - "loss": 0.4079, + "learning_rate": 4.986197286364504e-05, + "loss": 0.3063, "step": 1472000 }, { "epoch": 0.88, - "learning_rate": 4.986004089532932e-05, - "loss": 0.3988, + "learning_rate": 4.9859872898084474e-05, + "loss": 0.299, "step": 1472500 }, { "epoch": 0.88, - "learning_rate": 4.985794092976875e-05, - "loss": 0.3995, + "learning_rate": 4.985777293252391e-05, + "loss": 0.2965, "step": 1473000 }, { "epoch": 0.88, - "learning_rate": 4.9855840964208186e-05, - "loss": 0.3984, + "learning_rate": 4.985567296696334e-05, + "loss": 0.295, "step": 1473500 }, { "epoch": 0.88, - "learning_rate": 4.9853740998647626e-05, - "loss": 0.4, + "learning_rate": 4.9853573001402774e-05, + "loss": 0.3027, "step": 1474000 }, { "epoch": 0.88, - "learning_rate": 4.985164523301818e-05, - "loss": 0.4049, + "learning_rate": 4.9851477235773334e-05, + "loss": 0.3008, "step": 1474500 }, { "epoch": 0.88, - "learning_rate": 4.984954526745761e-05, - "loss": 0.3961, + "learning_rate": 4.984937727021277e-05, + "loss": 0.3042, "step": 1475000 }, { "epoch": 0.88, - "learning_rate": 4.984744950182817e-05, - "loss": 0.4049, + "learning_rate": 4.984727730465221e-05, + "loss": 0.3044, "step": 1475500 }, { "epoch": 0.88, - "learning_rate": 4.98453495362676e-05, - "loss": 0.4066, + "learning_rate": 4.9845177339091635e-05, + "loss": 0.3049, "step": 1476000 }, { "epoch": 0.89, - "learning_rate": 4.984324957070704e-05, - "loss": 0.403, + "learning_rate": 4.984307737353107e-05, + "loss": 0.3021, "step": 1476500 }, { "epoch": 0.89, - "learning_rate": 4.9841149605146474e-05, - "loss": 0.4142, + "learning_rate": 4.984097740797051e-05, + "loss": 0.3056, "step": 1477000 }, { "epoch": 0.89, - "learning_rate": 4.9839049639585914e-05, - "loss": 0.4006, + "learning_rate": 4.983888164234107e-05, + "loss": 0.3031, "step": 1477500 }, { "epoch": 0.89, - "learning_rate": 4.983694967402535e-05, - "loss": 0.4, + "learning_rate": 4.98367816767805e-05, + "loss": 0.302, "step": 1478000 }, { "epoch": 0.89, - "learning_rate": 4.983484970846478e-05, - "loss": 0.4009, + "learning_rate": 4.9834681711219936e-05, + "loss": 0.3021, "step": 1478500 }, { "epoch": 0.89, - "learning_rate": 4.983274974290422e-05, - "loss": 0.3986, + "learning_rate": 4.983258174565937e-05, + "loss": 0.2994, "step": 1479000 }, { "epoch": 0.89, - "learning_rate": 4.9830653977274775e-05, - "loss": 0.3993, + "learning_rate": 4.98304817800988e-05, + "loss": 0.3064, "step": 1479500 }, { "epoch": 0.89, - "learning_rate": 4.982855401171421e-05, - "loss": 0.4014, + "learning_rate": 4.982838181453824e-05, + "loss": 0.3018, "step": 1480000 }, { "epoch": 0.89, - "learning_rate": 4.982645404615364e-05, - "loss": 0.4023, + "learning_rate": 4.9826281848977676e-05, + "loss": 0.2981, "step": 1480500 }, { "epoch": 0.89, - "learning_rate": 4.982435408059308e-05, - "loss": 0.4029, + "learning_rate": 4.982418188341711e-05, + "loss": 0.2993, "step": 1481000 }, { "epoch": 0.89, - "learning_rate": 4.9822254115032515e-05, - "loss": 0.408, + "learning_rate": 4.982208191785655e-05, + "loss": 0.2998, "step": 1481500 }, { "epoch": 0.89, - "learning_rate": 4.982015834940307e-05, - "loss": 0.4104, + "learning_rate": 4.9819986152227104e-05, + "loss": 0.3003, "step": 1482000 }, { "epoch": 0.89, - "learning_rate": 4.98180583838425e-05, - "loss": 0.4011, + "learning_rate": 4.981788618666654e-05, + "loss": 0.3035, "step": 1482500 }, { "epoch": 0.89, - "learning_rate": 4.981595841828194e-05, - "loss": 0.3961, + "learning_rate": 4.981578622110597e-05, + "loss": 0.2985, "step": 1483000 }, { "epoch": 0.89, - "learning_rate": 4.9813858452721376e-05, - "loss": 0.3999, + "learning_rate": 4.981368625554541e-05, + "loss": 0.2966, "step": 1483500 }, { "epoch": 0.89, - "learning_rate": 4.981176268709193e-05, - "loss": 0.4042, + "learning_rate": 4.9811586289984844e-05, + "loss": 0.3019, "step": 1484000 }, { "epoch": 0.89, - "learning_rate": 4.980966272153137e-05, - "loss": 0.4149, + "learning_rate": 4.980948632442428e-05, + "loss": 0.3093, "step": 1484500 }, { "epoch": 0.89, - "learning_rate": 4.9807562755970803e-05, - "loss": 0.4107, + "learning_rate": 4.980739055879483e-05, + "loss": 0.3055, "step": 1485000 }, { "epoch": 0.89, - "learning_rate": 4.980546279041024e-05, - "loss": 0.3992, + "learning_rate": 4.980529059323427e-05, + "loss": 0.3001, "step": 1485500 }, { "epoch": 0.89, - "learning_rate": 4.980336282484968e-05, - "loss": 0.4042, + "learning_rate": 4.9803190627673705e-05, + "loss": 0.2994, "step": 1486000 }, { "epoch": 0.89, - "learning_rate": 4.980126285928911e-05, - "loss": 0.4078, + "learning_rate": 4.980109066211314e-05, + "loss": 0.3049, "step": 1486500 }, { "epoch": 0.89, - "learning_rate": 4.979916289372854e-05, - "loss": 0.4061, + "learning_rate": 4.97989948964837e-05, + "loss": 0.3004, "step": 1487000 }, { "epoch": 0.89, - "learning_rate": 4.979706292816798e-05, - "loss": 0.3908, + "learning_rate": 4.979689493092313e-05, + "loss": 0.2984, "step": 1487500 }, { "epoch": 0.89, - "learning_rate": 4.979496296260741e-05, - "loss": 0.4056, + "learning_rate": 4.9794794965362566e-05, + "loss": 0.3039, "step": 1488000 }, { "epoch": 0.89, - "learning_rate": 4.9792862997046844e-05, - "loss": 0.4031, + "learning_rate": 4.979269919973312e-05, + "loss": 0.2971, "step": 1488500 }, { "epoch": 0.89, - "learning_rate": 4.9790763031486285e-05, - "loss": 0.3924, + "learning_rate": 4.979059923417256e-05, + "loss": 0.3023, "step": 1489000 }, { "epoch": 0.89, - "learning_rate": 4.978866306592572e-05, - "loss": 0.3941, + "learning_rate": 4.978849926861199e-05, + "loss": 0.2982, "step": 1489500 }, { "epoch": 0.89, - "learning_rate": 4.978656730029627e-05, - "loss": 0.3941, + "learning_rate": 4.9786399303051426e-05, + "loss": 0.3038, "step": 1490000 }, { "epoch": 0.89, - "learning_rate": 4.9784467334735705e-05, - "loss": 0.4008, + "learning_rate": 4.978430353742198e-05, + "loss": 0.3039, "step": 1490500 }, { "epoch": 0.89, - "learning_rate": 4.9782367369175145e-05, - "loss": 0.4003, + "learning_rate": 4.978220357186142e-05, + "loss": 0.2985, "step": 1491000 }, { "epoch": 0.89, - "learning_rate": 4.978026740361458e-05, - "loss": 0.3981, + "learning_rate": 4.9780103606300854e-05, + "loss": 0.2964, "step": 1491500 }, { "epoch": 0.89, - "learning_rate": 4.977817163798513e-05, - "loss": 0.3932, + "learning_rate": 4.977800364074029e-05, + "loss": 0.2978, "step": 1492000 }, { "epoch": 0.89, - "learning_rate": 4.977607167242457e-05, - "loss": 0.3994, + "learning_rate": 4.977590367517973e-05, + "loss": 0.301, "step": 1492500 }, { "epoch": 0.9, - "learning_rate": 4.9773971706864006e-05, - "loss": 0.3996, + "learning_rate": 4.977380370961916e-05, + "loss": 0.2979, "step": 1493000 }, { "epoch": 0.9, - "learning_rate": 4.977187174130344e-05, - "loss": 0.3983, + "learning_rate": 4.9771703744058594e-05, + "loss": 0.3028, "step": 1493500 }, { "epoch": 0.9, - "learning_rate": 4.976977597567399e-05, - "loss": 0.406, + "learning_rate": 4.9769607978429155e-05, + "loss": 0.3014, "step": 1494000 }, { "epoch": 0.9, - "learning_rate": 4.9767680210044553e-05, - "loss": 0.4002, + "learning_rate": 4.976750801286859e-05, + "loss": 0.3035, "step": 1494500 }, { "epoch": 0.9, - "learning_rate": 4.9765580244483994e-05, - "loss": 0.4054, + "learning_rate": 4.976540804730802e-05, + "loss": 0.2989, "step": 1495000 }, { "epoch": 0.9, - "learning_rate": 4.976348027892343e-05, - "loss": 0.3898, + "learning_rate": 4.976330808174746e-05, + "loss": 0.2937, "step": 1495500 }, { "epoch": 0.9, - "learning_rate": 4.976138031336286e-05, - "loss": 0.3999, + "learning_rate": 4.9761208116186895e-05, + "loss": 0.3013, "step": 1496000 }, { "epoch": 0.9, - "learning_rate": 4.9759280347802294e-05, - "loss": 0.4097, + "learning_rate": 4.975910815062633e-05, + "loss": 0.3042, "step": 1496500 }, { "epoch": 0.9, - "learning_rate": 4.975718038224173e-05, - "loss": 0.4069, + "learning_rate": 4.975700818506577e-05, + "loss": 0.3034, "step": 1497000 }, { "epoch": 0.9, - "learning_rate": 4.975508041668116e-05, - "loss": 0.4037, + "learning_rate": 4.97549082195052e-05, + "loss": 0.3052, "step": 1497500 }, { "epoch": 0.9, - "learning_rate": 4.97529804511206e-05, - "loss": 0.4046, + "learning_rate": 4.975280825394463e-05, + "loss": 0.3041, "step": 1498000 }, { "epoch": 0.9, - "learning_rate": 4.975088468549116e-05, - "loss": 0.3988, + "learning_rate": 4.975070828838407e-05, + "loss": 0.3017, "step": 1498500 }, { "epoch": 0.9, - "learning_rate": 4.974878471993059e-05, - "loss": 0.4027, + "learning_rate": 4.97486083228235e-05, + "loss": 0.3055, "step": 1499000 }, { "epoch": 0.9, - "learning_rate": 4.974668475437003e-05, - "loss": 0.407, + "learning_rate": 4.9746508357262936e-05, + "loss": 0.301, "step": 1499500 }, { "epoch": 0.9, - "learning_rate": 4.974458478880946e-05, - "loss": 0.4007, + "learning_rate": 4.9744412591633497e-05, + "loss": 0.2999, "step": 1500000 }, { "epoch": 0.9, - "eval_loss": 0.3794560134410858, - "eval_runtime": 1129.597, - "eval_samples_per_second": 466.29, - "eval_steps_per_second": 77.715, + "eval_loss": 0.27191513776779175, + "eval_runtime": 1482.9586, + "eval_samples_per_second": 355.182, + "eval_steps_per_second": 59.197, "step": 1500000 }, { "epoch": 0.9, - "learning_rate": 4.974248902318002e-05, - "loss": 0.4074, + "learning_rate": 4.974231262607293e-05, + "loss": 0.2994, "step": 1500500 }, { "epoch": 0.9, - "learning_rate": 4.9740389057619456e-05, - "loss": 0.4007, + "learning_rate": 4.9740212660512363e-05, + "loss": 0.3043, "step": 1501000 }, { "epoch": 0.9, - "learning_rate": 4.973828909205889e-05, - "loss": 0.396, + "learning_rate": 4.9738112694951804e-05, + "loss": 0.2953, "step": 1501500 }, { "epoch": 0.9, - "learning_rate": 4.973618912649832e-05, - "loss": 0.3875, + "learning_rate": 4.9736016929322364e-05, + "loss": 0.2937, "step": 1502000 }, { "epoch": 0.9, - "learning_rate": 4.973409336086888e-05, - "loss": 0.403, + "learning_rate": 4.97339169637618e-05, + "loss": 0.3004, "step": 1502500 }, { "epoch": 0.9, - "learning_rate": 4.9731993395308317e-05, - "loss": 0.4012, + "learning_rate": 4.9731816998201224e-05, + "loss": 0.3003, "step": 1503000 }, { "epoch": 0.9, - "learning_rate": 4.972989342974775e-05, - "loss": 0.4021, + "learning_rate": 4.9729717032640664e-05, + "loss": 0.3061, "step": 1503500 }, { "epoch": 0.9, - "learning_rate": 4.9727793464187183e-05, - "loss": 0.3887, + "learning_rate": 4.97276170670801e-05, + "loss": 0.2934, "step": 1504000 }, { "epoch": 0.9, - "learning_rate": 4.972569349862662e-05, - "loss": 0.4006, + "learning_rate": 4.972551710151953e-05, + "loss": 0.294, "step": 1504500 }, { "epoch": 0.9, - "learning_rate": 4.972359353306606e-05, - "loss": 0.4025, + "learning_rate": 4.972341713595897e-05, + "loss": 0.304, "step": 1505000 }, { "epoch": 0.9, - "learning_rate": 4.972149356750549e-05, - "loss": 0.4123, + "learning_rate": 4.9721317170398405e-05, + "loss": 0.3031, "step": 1505500 }, { "epoch": 0.9, - "learning_rate": 4.9719393601944924e-05, - "loss": 0.3975, + "learning_rate": 4.971921720483784e-05, + "loss": 0.2981, "step": 1506000 }, { "epoch": 0.9, - "learning_rate": 4.9717302036246605e-05, - "loss": 0.4035, + "learning_rate": 4.971711723927728e-05, + "loss": 0.3003, "step": 1506500 }, { "epoch": 0.9, - "learning_rate": 4.9715202070686045e-05, - "loss": 0.4016, + "learning_rate": 4.971502147364783e-05, + "loss": 0.3007, "step": 1507000 }, { "epoch": 0.9, - "learning_rate": 4.971310210512548e-05, - "loss": 0.4217, + "learning_rate": 4.9712921508087266e-05, + "loss": 0.3108, "step": 1507500 }, { "epoch": 0.9, - "learning_rate": 4.971100213956491e-05, - "loss": 0.3977, + "learning_rate": 4.97108215425267e-05, + "loss": 0.3025, "step": 1508000 }, { "epoch": 0.9, - "learning_rate": 4.9708902174004345e-05, - "loss": 0.4075, + "learning_rate": 4.970872157696614e-05, + "loss": 0.304, "step": 1508500 }, { "epoch": 0.9, - "learning_rate": 4.970680220844378e-05, - "loss": 0.4105, + "learning_rate": 4.970662581133669e-05, + "loss": 0.3011, "step": 1509000 }, { "epoch": 0.91, - "learning_rate": 4.970470224288321e-05, - "loss": 0.4038, + "learning_rate": 4.9704525845776126e-05, + "loss": 0.3056, "step": 1509500 }, { "epoch": 0.91, - "learning_rate": 4.970260227732265e-05, - "loss": 0.4064, + "learning_rate": 4.970242588021557e-05, + "loss": 0.3041, "step": 1510000 }, { "epoch": 0.91, - "learning_rate": 4.970050651169321e-05, - "loss": 0.396, + "learning_rate": 4.9700325914655e-05, + "loss": 0.2992, "step": 1510500 }, { "epoch": 0.91, - "learning_rate": 4.969840654613264e-05, - "loss": 0.3902, + "learning_rate": 4.9698225949094434e-05, + "loss": 0.2953, "step": 1511000 }, { "epoch": 0.91, - "learning_rate": 4.969630658057207e-05, - "loss": 0.3979, + "learning_rate": 4.969613018346499e-05, + "loss": 0.3017, "step": 1511500 }, { "epoch": 0.91, - "learning_rate": 4.969420661501151e-05, - "loss": 0.4011, + "learning_rate": 4.969403021790443e-05, + "loss": 0.3007, "step": 1512000 }, { "epoch": 0.91, - "learning_rate": 4.969211084938207e-05, - "loss": 0.3905, + "learning_rate": 4.969193025234386e-05, + "loss": 0.2922, "step": 1512500 }, { "epoch": 0.91, - "learning_rate": 4.969001508375263e-05, - "loss": 0.4015, + "learning_rate": 4.9689830286783294e-05, + "loss": 0.3074, "step": 1513000 }, { "epoch": 0.91, - "learning_rate": 4.968791511819206e-05, - "loss": 0.3921, + "learning_rate": 4.9687730321222735e-05, + "loss": 0.2972, "step": 1513500 }, { "epoch": 0.91, - "learning_rate": 4.96858151526315e-05, - "loss": 0.3909, + "learning_rate": 4.968563455559329e-05, + "loss": 0.2963, "step": 1514000 }, { "epoch": 0.91, - "learning_rate": 4.9683715187070934e-05, - "loss": 0.3968, + "learning_rate": 4.968353878996385e-05, + "loss": 0.2946, "step": 1514500 }, { "epoch": 0.91, - "learning_rate": 4.968161522151037e-05, - "loss": 0.3973, + "learning_rate": 4.9681438824403275e-05, + "loss": 0.2989, "step": 1515000 }, { "epoch": 0.91, - "learning_rate": 4.967951945588092e-05, - "loss": 0.4059, + "learning_rate": 4.9679338858842715e-05, + "loss": 0.3001, "step": 1515500 }, { "epoch": 0.91, - "learning_rate": 4.967741949032036e-05, - "loss": 0.3962, + "learning_rate": 4.967723889328215e-05, + "loss": 0.2959, "step": 1516000 }, { "epoch": 0.91, - "learning_rate": 4.9675319524759795e-05, - "loss": 0.3878, + "learning_rate": 4.967513892772158e-05, + "loss": 0.2973, "step": 1516500 }, { "epoch": 0.91, - "learning_rate": 4.967321955919923e-05, - "loss": 0.3941, + "learning_rate": 4.9673043162092136e-05, + "loss": 0.297, "step": 1517000 }, { "epoch": 0.91, - "learning_rate": 4.967111959363867e-05, - "loss": 0.4089, + "learning_rate": 4.9670943196531576e-05, + "loss": 0.3087, "step": 1517500 }, { "epoch": 0.91, - "learning_rate": 4.9669019628078095e-05, - "loss": 0.3994, + "learning_rate": 4.966884323097101e-05, + "loss": 0.2972, "step": 1518000 }, { "epoch": 0.91, - "learning_rate": 4.966691966251753e-05, - "loss": 0.3919, + "learning_rate": 4.966674326541044e-05, + "loss": 0.2957, "step": 1518500 }, { "epoch": 0.91, - "learning_rate": 4.966481969695697e-05, - "loss": 0.4007, + "learning_rate": 4.966464329984988e-05, + "loss": 0.3022, "step": 1519000 }, { "epoch": 0.91, - "learning_rate": 4.966272393132753e-05, - "loss": 0.3994, + "learning_rate": 4.966254753422044e-05, + "loss": 0.2954, "step": 1519500 }, { "epoch": 0.91, - "learning_rate": 4.966062396576696e-05, - "loss": 0.3998, + "learning_rate": 4.966044756865987e-05, + "loss": 0.3013, "step": 1520000 }, { "epoch": 0.91, - "learning_rate": 4.9658528200137516e-05, - "loss": 0.3936, + "learning_rate": 4.9658347603099304e-05, + "loss": 0.3001, "step": 1520500 }, { "epoch": 0.91, - "learning_rate": 4.9656428234576957e-05, - "loss": 0.3968, + "learning_rate": 4.9656251837469864e-05, + "loss": 0.3034, "step": 1521000 }, { "epoch": 0.91, - "learning_rate": 4.965432826901639e-05, - "loss": 0.3943, + "learning_rate": 4.9654156071840425e-05, + "loss": 0.2999, "step": 1521500 }, { "epoch": 0.91, - "learning_rate": 4.9652228303455823e-05, - "loss": 0.3983, + "learning_rate": 4.965205610627986e-05, + "loss": 0.3043, "step": 1522000 }, { "epoch": 0.91, - "learning_rate": 4.9650128337895264e-05, - "loss": 0.3962, + "learning_rate": 4.964995614071929e-05, + "loss": 0.3046, "step": 1522500 }, { "epoch": 0.91, - "learning_rate": 4.964802837233469e-05, - "loss": 0.4097, + "learning_rate": 4.964785617515873e-05, + "loss": 0.2998, "step": 1523000 }, { "epoch": 0.91, - "learning_rate": 4.9645928406774124e-05, - "loss": 0.4114, + "learning_rate": 4.9645756209598165e-05, + "loss": 0.3031, "step": 1523500 }, { "epoch": 0.91, - "learning_rate": 4.9643828441213564e-05, - "loss": 0.4113, + "learning_rate": 4.96436562440376e-05, + "loss": 0.3015, "step": 1524000 }, { "epoch": 0.91, - "learning_rate": 4.964173687551524e-05, - "loss": 0.4016, + "learning_rate": 4.964155627847703e-05, + "loss": 0.2973, "step": 1524500 }, { "epoch": 0.91, - "learning_rate": 4.963963690995468e-05, - "loss": 0.3945, + "learning_rate": 4.9639456312916466e-05, + "loss": 0.2998, "step": 1525000 }, { "epoch": 0.91, - "learning_rate": 4.963753694439411e-05, - "loss": 0.3973, + "learning_rate": 4.96373563473559e-05, + "loss": 0.2945, "step": 1525500 }, { "epoch": 0.91, - "learning_rate": 4.9635436978833545e-05, - "loss": 0.4043, + "learning_rate": 4.963526058172646e-05, + "loss": 0.3053, "step": 1526000 }, { "epoch": 0.92, - "learning_rate": 4.9633337013272985e-05, - "loss": 0.3927, + "learning_rate": 4.963316061616589e-05, + "loss": 0.2998, "step": 1526500 }, { "epoch": 0.92, - "learning_rate": 4.963123704771242e-05, - "loss": 0.3974, + "learning_rate": 4.9631060650605326e-05, + "loss": 0.3016, "step": 1527000 }, { "epoch": 0.92, - "learning_rate": 4.962913708215185e-05, - "loss": 0.4042, + "learning_rate": 4.962896068504476e-05, + "loss": 0.3006, "step": 1527500 }, { "epoch": 0.92, - "learning_rate": 4.9627037116591285e-05, - "loss": 0.3977, + "learning_rate": 4.96268607194842e-05, + "loss": 0.2979, "step": 1528000 }, { "epoch": 0.92, - "learning_rate": 4.9624941350961846e-05, - "loss": 0.4043, + "learning_rate": 4.962476495385476e-05, + "loss": 0.3087, "step": 1528500 }, { "epoch": 0.92, - "learning_rate": 4.962284138540128e-05, - "loss": 0.4104, + "learning_rate": 4.962266498829419e-05, + "loss": 0.3059, "step": 1529000 }, { "epoch": 0.92, - "learning_rate": 4.962074141984072e-05, - "loss": 0.408, + "learning_rate": 4.962056502273363e-05, + "loss": 0.3014, "step": 1529500 }, { "epoch": 0.92, - "learning_rate": 4.9618641454280146e-05, - "loss": 0.4034, + "learning_rate": 4.961846505717306e-05, + "loss": 0.3004, "step": 1530000 }, { "epoch": 0.92, - "learning_rate": 4.9616545688650707e-05, - "loss": 0.3955, + "learning_rate": 4.9616365091612494e-05, + "loss": 0.2988, "step": 1530500 }, { "epoch": 0.92, - "learning_rate": 4.961444572309014e-05, - "loss": 0.3994, + "learning_rate": 4.9614265126051934e-05, + "loss": 0.3017, "step": 1531000 }, { "epoch": 0.92, - "learning_rate": 4.961234575752958e-05, - "loss": 0.3949, + "learning_rate": 4.961216516049137e-05, + "loss": 0.2965, "step": 1531500 }, { "epoch": 0.92, - "learning_rate": 4.9610245791969014e-05, - "loss": 0.4051, + "learning_rate": 4.96100651949308e-05, + "loss": 0.2993, "step": 1532000 }, { "epoch": 0.92, - "learning_rate": 4.960814582640844e-05, - "loss": 0.3973, + "learning_rate": 4.960796522937024e-05, + "loss": 0.2949, "step": 1532500 }, { "epoch": 0.92, - "learning_rate": 4.960604586084788e-05, - "loss": 0.3957, + "learning_rate": 4.9605865263809675e-05, + "loss": 0.2942, "step": 1533000 }, { "epoch": 0.92, - "learning_rate": 4.9603945895287314e-05, - "loss": 0.3979, + "learning_rate": 4.960376529824911e-05, + "loss": 0.2951, "step": 1533500 }, { "epoch": 0.92, - "learning_rate": 4.960184592972675e-05, - "loss": 0.4057, + "learning_rate": 4.960166533268855e-05, + "loss": 0.3009, "step": 1534000 }, { "epoch": 0.92, - "learning_rate": 4.959975016409731e-05, - "loss": 0.3929, + "learning_rate": 4.9599565367127975e-05, + "loss": 0.2999, "step": 1534500 }, { "epoch": 0.92, - "learning_rate": 4.959765439846787e-05, - "loss": 0.3925, + "learning_rate": 4.959746540156741e-05, + "loss": 0.298, "step": 1535000 }, { "epoch": 0.92, - "learning_rate": 4.95955544329073e-05, - "loss": 0.4034, + "learning_rate": 4.959536543600685e-05, + "loss": 0.304, "step": 1535500 }, { "epoch": 0.92, - "learning_rate": 4.9593454467346735e-05, - "loss": 0.4077, + "learning_rate": 4.959326547044628e-05, + "loss": 0.3067, "step": 1536000 }, { "epoch": 0.92, - "learning_rate": 4.9591354501786175e-05, - "loss": 0.3934, + "learning_rate": 4.959116970481684e-05, + "loss": 0.2972, "step": 1536500 }, { "epoch": 0.92, - "learning_rate": 4.95892545362256e-05, - "loss": 0.3951, + "learning_rate": 4.958906973925627e-05, + "loss": 0.2991, "step": 1537000 }, { "epoch": 0.92, - "learning_rate": 4.9587154570665036e-05, - "loss": 0.4154, + "learning_rate": 4.958696977369571e-05, + "loss": 0.303, "step": 1537500 }, { "epoch": 0.92, - "learning_rate": 4.9585054605104476e-05, - "loss": 0.3942, + "learning_rate": 4.958486980813514e-05, + "loss": 0.2975, "step": 1538000 }, { "epoch": 0.92, - "learning_rate": 4.958295463954391e-05, - "loss": 0.4112, + "learning_rate": 4.958276984257458e-05, + "loss": 0.3035, "step": 1538500 }, { "epoch": 0.92, - "learning_rate": 4.958086307384559e-05, - "loss": 0.4001, + "learning_rate": 4.958067407694514e-05, + "loss": 0.2969, "step": 1539000 }, { "epoch": 0.92, - "learning_rate": 4.957876310828502e-05, - "loss": 0.3966, + "learning_rate": 4.957857411138457e-05, + "loss": 0.2942, "step": 1539500 }, { "epoch": 0.92, - "learning_rate": 4.957666314272446e-05, - "loss": 0.4014, + "learning_rate": 4.9576474145824004e-05, + "loss": 0.3011, "step": 1540000 }, { "epoch": 0.92, - "learning_rate": 4.95745631771639e-05, - "loss": 0.3941, + "learning_rate": 4.9574374180263444e-05, + "loss": 0.2956, "step": 1540500 }, { "epoch": 0.92, - "learning_rate": 4.957246321160333e-05, - "loss": 0.3895, + "learning_rate": 4.957227421470288e-05, + "loss": 0.2902, "step": 1541000 }, { "epoch": 0.92, - "learning_rate": 4.9570363246042764e-05, - "loss": 0.3986, + "learning_rate": 4.957017424914231e-05, + "loss": 0.302, "step": 1541500 }, { "epoch": 0.92, - "learning_rate": 4.95682632804822e-05, - "loss": 0.3963, + "learning_rate": 4.9568078483512865e-05, + "loss": 0.2991, "step": 1542000 }, { "epoch": 0.92, - "learning_rate": 4.956616751485276e-05, - "loss": 0.405, + "learning_rate": 4.9565978517952305e-05, + "loss": 0.3002, "step": 1542500 }, { "epoch": 0.93, - "learning_rate": 4.956406754929219e-05, - "loss": 0.4128, + "learning_rate": 4.956387855239174e-05, + "loss": 0.306, "step": 1543000 }, { "epoch": 0.93, - "learning_rate": 4.956196758373163e-05, - "loss": 0.4049, + "learning_rate": 4.956177858683117e-05, + "loss": 0.3029, "step": 1543500 }, { "epoch": 0.93, - "learning_rate": 4.955986761817106e-05, - "loss": 0.3983, + "learning_rate": 4.9559682821201725e-05, + "loss": 0.2939, "step": 1544000 }, { "epoch": 0.93, - "learning_rate": 4.955776765261049e-05, - "loss": 0.407, + "learning_rate": 4.9557582855641166e-05, + "loss": 0.3036, "step": 1544500 }, { "epoch": 0.93, - "learning_rate": 4.955567188698105e-05, - "loss": 0.3942, + "learning_rate": 4.95554828900806e-05, + "loss": 0.2922, "step": 1545000 }, { "epoch": 0.93, - "learning_rate": 4.955357192142049e-05, - "loss": 0.391, + "learning_rate": 4.955338292452003e-05, + "loss": 0.3015, "step": 1545500 }, { "epoch": 0.93, - "learning_rate": 4.9551471955859925e-05, - "loss": 0.3967, + "learning_rate": 4.955128715889059e-05, + "loss": 0.3016, "step": 1546000 }, { "epoch": 0.93, - "learning_rate": 4.954937199029935e-05, - "loss": 0.399, + "learning_rate": 4.9549187193330026e-05, + "loss": 0.2986, "step": 1546500 }, { "epoch": 0.93, - "learning_rate": 4.954727202473879e-05, - "loss": 0.4027, + "learning_rate": 4.954708722776946e-05, + "loss": 0.3051, "step": 1547000 }, { "epoch": 0.93, - "learning_rate": 4.954517625910935e-05, - "loss": 0.4001, + "learning_rate": 4.95449872622089e-05, + "loss": 0.3021, "step": 1547500 }, { "epoch": 0.93, - "learning_rate": 4.9543076293548786e-05, - "loss": 0.3956, + "learning_rate": 4.9542887296648334e-05, + "loss": 0.2959, "step": 1548000 }, { "epoch": 0.93, - "learning_rate": 4.9540976327988226e-05, - "loss": 0.3947, + "learning_rate": 4.954078733108777e-05, + "loss": 0.3007, "step": 1548500 }, { "epoch": 0.93, - "learning_rate": 4.953887636242765e-05, - "loss": 0.3933, + "learning_rate": 4.953868736552721e-05, + "loss": 0.3014, "step": 1549000 }, { "epoch": 0.93, - "learning_rate": 4.9536776396867087e-05, - "loss": 0.4064, + "learning_rate": 4.953658739996664e-05, + "loss": 0.3044, "step": 1549500 }, { "epoch": 0.93, - "learning_rate": 4.953467643130653e-05, - "loss": 0.4016, + "learning_rate": 4.9534487434406074e-05, + "loss": 0.3001, "step": 1550000 }, { "epoch": 0.93, - "learning_rate": 4.953257646574596e-05, - "loss": 0.3944, + "learning_rate": 4.9532387468845514e-05, + "loss": 0.2939, "step": 1550500 }, { "epoch": 0.93, - "learning_rate": 4.9530476500185394e-05, - "loss": 0.3909, + "learning_rate": 4.953028750328495e-05, + "loss": 0.2957, "step": 1551000 }, { "epoch": 0.93, - "learning_rate": 4.952838073455595e-05, - "loss": 0.3999, + "learning_rate": 4.952818753772438e-05, + "loss": 0.2982, "step": 1551500 }, { "epoch": 0.93, - "learning_rate": 4.952628076899539e-05, - "loss": 0.404, + "learning_rate": 4.9526087572163815e-05, + "loss": 0.3029, "step": 1552000 }, { "epoch": 0.93, - "learning_rate": 4.952418080343482e-05, - "loss": 0.3932, + "learning_rate": 4.9523991806534375e-05, + "loss": 0.2956, "step": 1552500 }, { "epoch": 0.93, - "learning_rate": 4.9522080837874254e-05, - "loss": 0.3956, + "learning_rate": 4.952189184097381e-05, + "loss": 0.2975, "step": 1553000 }, { "epoch": 0.93, - "learning_rate": 4.9519985072244815e-05, - "loss": 0.3976, + "learning_rate": 4.951979187541324e-05, + "loss": 0.2989, "step": 1553500 }, { "epoch": 0.93, - "learning_rate": 4.951788510668425e-05, - "loss": 0.392, + "learning_rate": 4.951769190985268e-05, + "loss": 0.2931, "step": 1554000 }, { "epoch": 0.93, - "learning_rate": 4.951578514112368e-05, - "loss": 0.3935, + "learning_rate": 4.951559194429211e-05, + "loss": 0.2971, "step": 1554500 }, { "epoch": 0.93, - "learning_rate": 4.951368937549424e-05, - "loss": 0.3906, + "learning_rate": 4.951349197873155e-05, + "loss": 0.2966, "step": 1555000 }, { "epoch": 0.93, - "learning_rate": 4.9511593609864796e-05, - "loss": 0.3897, + "learning_rate": 4.951139621310211e-05, + "loss": 0.2973, "step": 1555500 }, { "epoch": 0.93, - "learning_rate": 4.9509493644304236e-05, - "loss": 0.4028, + "learning_rate": 4.950929624754154e-05, + "loss": 0.3028, "step": 1556000 }, { "epoch": 0.93, - "learning_rate": 4.950739367874367e-05, - "loss": 0.3969, + "learning_rate": 4.9507196281980976e-05, + "loss": 0.2985, "step": 1556500 }, { "epoch": 0.93, - "learning_rate": 4.95052937131831e-05, - "loss": 0.4038, + "learning_rate": 4.950509631642041e-05, + "loss": 0.301, "step": 1557000 }, { "epoch": 0.93, - "learning_rate": 4.950319374762254e-05, - "loss": 0.3962, + "learning_rate": 4.9503004750722084e-05, + "loss": 0.3007, "step": 1557500 }, { "epoch": 0.93, - "learning_rate": 4.9501093782061977e-05, - "loss": 0.4027, + "learning_rate": 4.9500908985092644e-05, + "loss": 0.2974, "step": 1558000 }, { "epoch": 0.93, - "learning_rate": 4.94989938165014e-05, - "loss": 0.4158, + "learning_rate": 4.94988132194632e-05, + "loss": 0.31, "step": 1558500 }, { "epoch": 0.93, - "learning_rate": 4.9496893850940843e-05, - "loss": 0.4029, + "learning_rate": 4.949671325390264e-05, + "loss": 0.3036, "step": 1559000 }, { "epoch": 0.93, - "learning_rate": 4.949479388538028e-05, - "loss": 0.386, + "learning_rate": 4.949461328834207e-05, + "loss": 0.2915, "step": 1559500 }, { "epoch": 0.94, - "learning_rate": 4.949269391981971e-05, - "loss": 0.3911, + "learning_rate": 4.949251332278151e-05, + "loss": 0.29, "step": 1560000 }, { "epoch": 0.94, - "learning_rate": 4.949059395425915e-05, - "loss": 0.4012, + "learning_rate": 4.9490413357220945e-05, + "loss": 0.3002, "step": 1560500 }, { "epoch": 0.94, - "learning_rate": 4.9488493988698584e-05, - "loss": 0.4041, + "learning_rate": 4.948831339166037e-05, + "loss": 0.3036, "step": 1561000 }, { "epoch": 0.94, - "learning_rate": 4.948639822306914e-05, - "loss": 0.3997, + "learning_rate": 4.948621342609981e-05, + "loss": 0.3002, "step": 1561500 }, { "epoch": 0.94, - "learning_rate": 4.948429825750858e-05, - "loss": 0.3929, + "learning_rate": 4.9484113460539245e-05, + "loss": 0.2982, "step": 1562000 }, { "epoch": 0.94, - "learning_rate": 4.948220249187914e-05, - "loss": 0.4075, + "learning_rate": 4.948201349497868e-05, + "loss": 0.3015, "step": 1562500 }, { "epoch": 0.94, - "learning_rate": 4.948010252631857e-05, - "loss": 0.3843, + "learning_rate": 4.947991772934923e-05, + "loss": 0.2916, "step": 1563000 }, { "epoch": 0.94, - "learning_rate": 4.9478002560758e-05, - "loss": 0.4202, + "learning_rate": 4.947781776378867e-05, + "loss": 0.3059, "step": 1563500 }, { "epoch": 0.94, - "learning_rate": 4.947590259519744e-05, - "loss": 0.4033, + "learning_rate": 4.9475717798228106e-05, + "loss": 0.2971, "step": 1564000 }, { "epoch": 0.94, - "learning_rate": 4.947380262963687e-05, - "loss": 0.3998, + "learning_rate": 4.947361783266754e-05, + "loss": 0.2987, "step": 1564500 }, { "epoch": 0.94, - "learning_rate": 4.9471702664076305e-05, - "loss": 0.3954, + "learning_rate": 4.947151786710698e-05, + "loss": 0.2963, "step": 1565000 }, { "epoch": 0.94, - "learning_rate": 4.9469602698515746e-05, - "loss": 0.3968, + "learning_rate": 4.946941790154641e-05, + "loss": 0.2953, "step": 1565500 }, { "epoch": 0.94, - "learning_rate": 4.94675069328863e-05, - "loss": 0.4035, + "learning_rate": 4.946731793598585e-05, + "loss": 0.2982, "step": 1566000 }, { "epoch": 0.94, - "learning_rate": 4.946540696732573e-05, - "loss": 0.3872, + "learning_rate": 4.946521797042529e-05, + "loss": 0.2944, "step": 1566500 }, { "epoch": 0.94, - "learning_rate": 4.9463307001765166e-05, - "loss": 0.3996, + "learning_rate": 4.946311800486472e-05, + "loss": 0.2985, "step": 1567000 }, { "epoch": 0.94, - "learning_rate": 4.9461207036204606e-05, - "loss": 0.4081, + "learning_rate": 4.9461018039304154e-05, + "loss": 0.3026, "step": 1567500 }, { "epoch": 0.94, - "learning_rate": 4.945910707064404e-05, - "loss": 0.3933, + "learning_rate": 4.9458922273674714e-05, + "loss": 0.2942, "step": 1568000 }, { "epoch": 0.94, - "learning_rate": 4.9457011305014593e-05, - "loss": 0.3907, + "learning_rate": 4.945682230811415e-05, + "loss": 0.2947, "step": 1568500 }, { "epoch": 0.94, - "learning_rate": 4.9454911339454034e-05, - "loss": 0.4086, + "learning_rate": 4.945472234255358e-05, + "loss": 0.3051, "step": 1569000 }, { "epoch": 0.94, - "learning_rate": 4.945281137389347e-05, - "loss": 0.4096, + "learning_rate": 4.945262237699302e-05, + "loss": 0.3024, "step": 1569500 }, { "epoch": 0.94, - "learning_rate": 4.94507114083329e-05, - "loss": 0.4034, + "learning_rate": 4.9450522411432455e-05, + "loss": 0.2948, "step": 1570000 }, { "epoch": 0.94, - "learning_rate": 4.944861144277234e-05, - "loss": 0.3939, + "learning_rate": 4.944842244587189e-05, + "loss": 0.2982, "step": 1570500 }, { "epoch": 0.94, - "learning_rate": 4.9446511477211774e-05, - "loss": 0.3917, + "learning_rate": 4.944632248031132e-05, + "loss": 0.2954, "step": 1571000 }, { "epoch": 0.94, - "learning_rate": 4.944441151165121e-05, - "loss": 0.3904, + "learning_rate": 4.9444222514750755e-05, + "loss": 0.2972, "step": 1571500 }, { "epoch": 0.94, - "learning_rate": 4.944231574602176e-05, - "loss": 0.4004, + "learning_rate": 4.9442130949052436e-05, + "loss": 0.3044, "step": 1572000 }, { "epoch": 0.94, - "learning_rate": 4.94402157804612e-05, - "loss": 0.3958, + "learning_rate": 4.944003098349187e-05, + "loss": 0.3003, "step": 1572500 }, { "epoch": 0.94, - "learning_rate": 4.9438115814900635e-05, - "loss": 0.4039, + "learning_rate": 4.94379310179313e-05, + "loss": 0.3011, "step": 1573000 }, { "epoch": 0.94, - "learning_rate": 4.943601584934007e-05, - "loss": 0.3906, + "learning_rate": 4.943583105237074e-05, + "loss": 0.3004, "step": 1573500 }, { "epoch": 0.94, - "learning_rate": 4.943392008371062e-05, - "loss": 0.3972, + "learning_rate": 4.9433731086810176e-05, + "loss": 0.2951, "step": 1574000 }, { "epoch": 0.94, - "learning_rate": 4.943182011815006e-05, - "loss": 0.4021, + "learning_rate": 4.943163112124961e-05, + "loss": 0.2963, "step": 1574500 }, { "epoch": 0.94, - "learning_rate": 4.9429720152589496e-05, - "loss": 0.3954, + "learning_rate": 4.942953115568905e-05, + "loss": 0.2996, "step": 1575000 }, { "epoch": 0.94, - "learning_rate": 4.942762018702893e-05, - "loss": 0.4037, + "learning_rate": 4.942743119012848e-05, + "loss": 0.3071, "step": 1575500 }, { "epoch": 0.94, - "learning_rate": 4.942552022146837e-05, - "loss": 0.3884, + "learning_rate": 4.942533122456792e-05, + "loss": 0.2966, "step": 1576000 }, { "epoch": 0.95, - "learning_rate": 4.94234202559078e-05, - "loss": 0.3984, + "learning_rate": 4.942323125900735e-05, + "loss": 0.299, "step": 1576500 }, { "epoch": 0.95, - "learning_rate": 4.9421320290347236e-05, - "loss": 0.3922, + "learning_rate": 4.9421131293446784e-05, + "loss": 0.2978, "step": 1577000 }, { "epoch": 0.95, - "learning_rate": 4.94192245247178e-05, - "loss": 0.3999, + "learning_rate": 4.9419031327886224e-05, + "loss": 0.2973, "step": 1577500 }, { "epoch": 0.95, - "learning_rate": 4.941712455915723e-05, - "loss": 0.3917, + "learning_rate": 4.941693136232566e-05, + "loss": 0.2979, "step": 1578000 }, { "epoch": 0.95, - "learning_rate": 4.9415024593596664e-05, - "loss": 0.4033, + "learning_rate": 4.941483139676509e-05, + "loss": 0.3091, "step": 1578500 }, { "epoch": 0.95, - "learning_rate": 4.9412924628036104e-05, - "loss": 0.3927, + "learning_rate": 4.9412735631135644e-05, + "loss": 0.2997, "step": 1579000 }, { "epoch": 0.95, - "learning_rate": 4.941082466247554e-05, - "loss": 0.4096, + "learning_rate": 4.9410635665575085e-05, + "loss": 0.3027, "step": 1579500 }, { "epoch": 0.95, - "learning_rate": 4.940872469691497e-05, - "loss": 0.3933, + "learning_rate": 4.940853570001452e-05, + "loss": 0.2982, "step": 1580000 }, { "epoch": 0.95, - "learning_rate": 4.9406624731354404e-05, - "loss": 0.3939, + "learning_rate": 4.940643573445395e-05, + "loss": 0.2944, "step": 1580500 }, { "epoch": 0.95, - "learning_rate": 4.940452476579384e-05, - "loss": 0.4043, + "learning_rate": 4.940433576889339e-05, + "loss": 0.3047, "step": 1581000 }, { "epoch": 0.95, - "learning_rate": 4.94024290001644e-05, - "loss": 0.3971, + "learning_rate": 4.9402240003263945e-05, + "loss": 0.2988, "step": 1581500 }, { "epoch": 0.95, - "learning_rate": 4.940032903460383e-05, - "loss": 0.4013, + "learning_rate": 4.940014003770338e-05, + "loss": 0.2963, "step": 1582000 }, { "epoch": 0.95, - "learning_rate": 4.939822906904327e-05, - "loss": 0.4005, + "learning_rate": 4.939804007214281e-05, + "loss": 0.3026, "step": 1582500 }, { "epoch": 0.95, - "learning_rate": 4.93961291034827e-05, - "loss": 0.3918, + "learning_rate": 4.939594010658225e-05, + "loss": 0.2969, "step": 1583000 }, { "epoch": 0.95, - "learning_rate": 4.939403333785326e-05, - "loss": 0.4037, + "learning_rate": 4.9393840141021686e-05, + "loss": 0.2982, "step": 1583500 }, { "epoch": 0.95, - "learning_rate": 4.939193337229269e-05, - "loss": 0.4008, + "learning_rate": 4.939174017546112e-05, + "loss": 0.2943, "step": 1584000 }, { "epoch": 0.95, - "learning_rate": 4.938983340673213e-05, - "loss": 0.4018, + "learning_rate": 4.938964440983168e-05, + "loss": 0.2994, "step": 1584500 }, { "epoch": 0.95, - "learning_rate": 4.9387737641102686e-05, - "loss": 0.4125, + "learning_rate": 4.938754444427111e-05, + "loss": 0.3038, "step": 1585000 }, { "epoch": 0.95, - "learning_rate": 4.938563767554212e-05, - "loss": 0.393, + "learning_rate": 4.938544447871055e-05, + "loss": 0.2947, "step": 1585500 }, { "epoch": 0.95, - "learning_rate": 4.938353770998156e-05, - "loss": 0.3916, + "learning_rate": 4.938334451314999e-05, + "loss": 0.3003, "step": 1586000 }, { "epoch": 0.95, - "learning_rate": 4.938143774442099e-05, - "loss": 0.3972, + "learning_rate": 4.938124874752054e-05, + "loss": 0.2979, "step": 1586500 }, { "epoch": 0.95, - "learning_rate": 4.937933777886043e-05, - "loss": 0.4013, + "learning_rate": 4.9379148781959974e-05, + "loss": 0.2988, "step": 1587000 }, { "epoch": 0.95, - "learning_rate": 4.937723781329987e-05, - "loss": 0.4045, + "learning_rate": 4.937704881639941e-05, + "loss": 0.3005, "step": 1587500 }, { "epoch": 0.95, - "learning_rate": 4.9375137847739294e-05, - "loss": 0.3985, + "learning_rate": 4.937494885083885e-05, + "loss": 0.3005, "step": 1588000 }, { "epoch": 0.95, - "learning_rate": 4.937303788217873e-05, - "loss": 0.4055, + "learning_rate": 4.93728530852094e-05, + "loss": 0.3002, "step": 1588500 }, { "epoch": 0.95, - "learning_rate": 4.937094211654929e-05, - "loss": 0.3989, + "learning_rate": 4.9370753119648835e-05, + "loss": 0.3002, "step": 1589000 }, { "epoch": 0.95, - "learning_rate": 4.936884215098873e-05, - "loss": 0.3932, + "learning_rate": 4.936865315408827e-05, + "loss": 0.2996, "step": 1589500 }, { "epoch": 0.95, - "learning_rate": 4.936674638535928e-05, - "loss": 0.3932, + "learning_rate": 4.936655318852771e-05, + "loss": 0.2956, "step": 1590000 }, { "epoch": 0.95, - "learning_rate": 4.9364646419798715e-05, - "loss": 0.3953, + "learning_rate": 4.936445742289826e-05, + "loss": 0.299, "step": 1590500 }, { "epoch": 0.95, - "learning_rate": 4.936254645423815e-05, - "loss": 0.3987, + "learning_rate": 4.9362357457337695e-05, + "loss": 0.295, "step": 1591000 }, { "epoch": 0.95, - "learning_rate": 4.936044648867759e-05, - "loss": 0.3867, + "learning_rate": 4.9360257491777136e-05, + "loss": 0.2938, "step": 1591500 }, { "epoch": 0.95, - "learning_rate": 4.935834652311702e-05, - "loss": 0.3898, + "learning_rate": 4.935815752621657e-05, + "loss": 0.2974, "step": 1592000 }, { "epoch": 0.95, - "learning_rate": 4.9356246557556455e-05, - "loss": 0.4007, + "learning_rate": 4.935606176058712e-05, + "loss": 0.3032, "step": 1592500 }, { "epoch": 0.96, - "learning_rate": 4.935414659199589e-05, - "loss": 0.4088, + "learning_rate": 4.9353961795026556e-05, + "loss": 0.2985, "step": 1593000 }, { "epoch": 0.96, - "learning_rate": 4.935204662643532e-05, - "loss": 0.3968, + "learning_rate": 4.9351861829465996e-05, + "loss": 0.2978, "step": 1593500 }, { "epoch": 0.96, - "learning_rate": 4.934995086080588e-05, - "loss": 0.4114, + "learning_rate": 4.934976186390543e-05, + "loss": 0.3021, "step": 1594000 }, { "epoch": 0.96, - "learning_rate": 4.934785089524532e-05, - "loss": 0.4046, + "learning_rate": 4.934766189834486e-05, + "loss": 0.2982, "step": 1594500 }, { "epoch": 0.96, - "learning_rate": 4.9345755129615876e-05, - "loss": 0.4072, + "learning_rate": 4.934556613271542e-05, + "loss": 0.3052, "step": 1595000 }, { "epoch": 0.96, - "learning_rate": 4.934365516405531e-05, - "loss": 0.4097, + "learning_rate": 4.934346616715486e-05, + "loss": 0.301, "step": 1595500 }, { "epoch": 0.96, - "learning_rate": 4.934155519849474e-05, - "loss": 0.3958, + "learning_rate": 4.934136620159429e-05, + "loss": 0.2959, "step": 1596000 }, { "epoch": 0.96, - "learning_rate": 4.9339455232934184e-05, - "loss": 0.3986, + "learning_rate": 4.9339266236033724e-05, + "loss": 0.2963, "step": 1596500 }, { "epoch": 0.96, - "learning_rate": 4.933735946730474e-05, - "loss": 0.3877, + "learning_rate": 4.9337166270473164e-05, + "loss": 0.2971, "step": 1597000 }, { "epoch": 0.96, - "learning_rate": 4.933525950174417e-05, - "loss": 0.3983, + "learning_rate": 4.93350663049126e-05, + "loss": 0.3002, "step": 1597500 }, { "epoch": 0.96, - "learning_rate": 4.9333159536183604e-05, - "loss": 0.3961, + "learning_rate": 4.933296633935203e-05, + "loss": 0.2981, "step": 1598000 }, { "epoch": 0.96, - "learning_rate": 4.9331059570623044e-05, - "loss": 0.4036, + "learning_rate": 4.933086637379147e-05, + "loss": 0.2938, "step": 1598500 }, { "epoch": 0.96, - "learning_rate": 4.93289638049936e-05, - "loss": 0.3901, + "learning_rate": 4.9328766408230905e-05, + "loss": 0.2943, "step": 1599000 }, { "epoch": 0.96, - "learning_rate": 4.932686383943303e-05, - "loss": 0.4081, + "learning_rate": 4.932666644267034e-05, + "loss": 0.3056, "step": 1599500 }, { "epoch": 0.96, - "learning_rate": 4.932476387387247e-05, - "loss": 0.4044, + "learning_rate": 4.93245706770409e-05, + "loss": 0.3022, "step": 1600000 }, { "epoch": 0.96, - "eval_loss": 0.37684500217437744, - "eval_runtime": 1131.2002, - "eval_samples_per_second": 465.629, - "eval_steps_per_second": 77.605, + "eval_loss": 0.2697148025035858, + "eval_runtime": 1482.5603, + "eval_samples_per_second": 355.277, + "eval_steps_per_second": 59.213, "step": 1600000 }, { "epoch": 0.96, - "learning_rate": 4.9322663908311905e-05, - "loss": 0.3948, + "learning_rate": 4.932247071148033e-05, + "loss": 0.2978, "step": 1600500 }, { "epoch": 0.96, - "learning_rate": 4.932056394275134e-05, - "loss": 0.3825, + "learning_rate": 4.9320370745919766e-05, + "loss": 0.2972, "step": 1601000 }, { "epoch": 0.96, - "learning_rate": 4.931846397719078e-05, - "loss": 0.3892, + "learning_rate": 4.9318270780359206e-05, + "loss": 0.2977, "step": 1601500 }, { "epoch": 0.96, - "learning_rate": 4.931636821156133e-05, - "loss": 0.3969, + "learning_rate": 4.931617081479864e-05, + "loss": 0.2996, "step": 1602000 }, { "epoch": 0.96, - "learning_rate": 4.9314268246000766e-05, - "loss": 0.3916, + "learning_rate": 4.931407504916919e-05, + "loss": 0.2948, "step": 1602500 }, { "epoch": 0.96, - "learning_rate": 4.93121682804402e-05, - "loss": 0.3976, + "learning_rate": 4.9311975083608626e-05, + "loss": 0.3011, "step": 1603000 }, { "epoch": 0.96, - "learning_rate": 4.931006831487964e-05, - "loss": 0.3865, + "learning_rate": 4.930987511804807e-05, + "loss": 0.2919, "step": 1603500 }, { "epoch": 0.96, - "learning_rate": 4.930796834931907e-05, - "loss": 0.3893, + "learning_rate": 4.93077751524875e-05, + "loss": 0.2936, "step": 1604000 }, { "epoch": 0.96, - "learning_rate": 4.9305872583689626e-05, - "loss": 0.3903, + "learning_rate": 4.9305679386858054e-05, + "loss": 0.3047, "step": 1604500 }, { "epoch": 0.96, - "learning_rate": 4.930377261812906e-05, - "loss": 0.4042, + "learning_rate": 4.930357942129749e-05, + "loss": 0.3004, "step": 1605000 }, { "epoch": 0.96, - "learning_rate": 4.93016726525685e-05, - "loss": 0.3973, + "learning_rate": 4.930147945573693e-05, + "loss": 0.2986, "step": 1605500 }, { "epoch": 0.96, - "learning_rate": 4.9299572687007934e-05, - "loss": 0.3906, + "learning_rate": 4.929937949017636e-05, + "loss": 0.2959, "step": 1606000 }, { "epoch": 0.96, - "learning_rate": 4.929747272144737e-05, - "loss": 0.3997, + "learning_rate": 4.92972795246158e-05, + "loss": 0.3009, "step": 1606500 }, { "epoch": 0.96, - "learning_rate": 4.929537695581793e-05, - "loss": 0.3955, + "learning_rate": 4.9295183758986355e-05, + "loss": 0.2992, "step": 1607000 }, { "epoch": 0.96, - "learning_rate": 4.929327699025736e-05, - "loss": 0.3981, + "learning_rate": 4.929308379342579e-05, + "loss": 0.2957, "step": 1607500 }, { "epoch": 0.96, - "learning_rate": 4.9291177024696794e-05, - "loss": 0.4023, + "learning_rate": 4.929098382786522e-05, + "loss": 0.3111, "step": 1608000 }, { "epoch": 0.96, - "learning_rate": 4.9289077059136235e-05, - "loss": 0.401, + "learning_rate": 4.928888386230466e-05, + "loss": 0.3089, "step": 1608500 }, { "epoch": 0.96, - "learning_rate": 4.928697709357567e-05, - "loss": 0.4034, + "learning_rate": 4.9286783896744095e-05, + "loss": 0.3041, "step": 1609000 }, { "epoch": 0.96, - "learning_rate": 4.9284877128015095e-05, - "loss": 0.3957, + "learning_rate": 4.928468393118353e-05, + "loss": 0.2996, "step": 1609500 }, { "epoch": 0.97, - "learning_rate": 4.9282777162454535e-05, - "loss": 0.3997, + "learning_rate": 4.928258396562296e-05, + "loss": 0.2961, "step": 1610000 }, { "epoch": 0.97, - "learning_rate": 4.928067719689397e-05, - "loss": 0.3997, + "learning_rate": 4.928048819999352e-05, + "loss": 0.3085, "step": 1610500 }, { "epoch": 0.97, - "learning_rate": 4.927858143126453e-05, - "loss": 0.3998, + "learning_rate": 4.9278388234432956e-05, + "loss": 0.3009, "step": 1611000 }, { "epoch": 0.97, - "learning_rate": 4.9276481465703955e-05, - "loss": 0.4005, + "learning_rate": 4.927628826887239e-05, + "loss": 0.303, "step": 1611500 }, { "epoch": 0.97, - "learning_rate": 4.9274385700074516e-05, - "loss": 0.3922, + "learning_rate": 4.927418830331183e-05, + "loss": 0.298, "step": 1612000 }, { "epoch": 0.97, - "learning_rate": 4.9272285734513956e-05, - "loss": 0.4072, + "learning_rate": 4.9272088337751256e-05, + "loss": 0.3013, "step": 1612500 }, { "epoch": 0.97, - "learning_rate": 4.927018576895339e-05, - "loss": 0.4047, + "learning_rate": 4.926999257212182e-05, + "loss": 0.3055, "step": 1613000 }, { "epoch": 0.97, - "learning_rate": 4.926808580339282e-05, - "loss": 0.3941, + "learning_rate": 4.926789680649237e-05, + "loss": 0.2985, "step": 1613500 }, { "epoch": 0.97, - "learning_rate": 4.9265985837832256e-05, - "loss": 0.3874, + "learning_rate": 4.926579684093181e-05, + "loss": 0.2947, "step": 1614000 }, { "epoch": 0.97, - "learning_rate": 4.926389007220282e-05, - "loss": 0.386, + "learning_rate": 4.9263696875371244e-05, + "loss": 0.2913, "step": 1614500 }, { "epoch": 0.97, - "learning_rate": 4.926179010664225e-05, - "loss": 0.4041, + "learning_rate": 4.926159690981068e-05, + "loss": 0.3012, "step": 1615000 }, { "epoch": 0.97, - "learning_rate": 4.925969014108169e-05, - "loss": 0.4068, + "learning_rate": 4.925949694425012e-05, + "loss": 0.3001, "step": 1615500 }, { "epoch": 0.97, - "learning_rate": 4.9257590175521124e-05, - "loss": 0.4056, + "learning_rate": 4.925739697868955e-05, + "loss": 0.3081, "step": 1616000 }, { "epoch": 0.97, - "learning_rate": 4.925549440989168e-05, - "loss": 0.4118, + "learning_rate": 4.9255297013128985e-05, + "loss": 0.302, "step": 1616500 }, { "epoch": 0.97, - "learning_rate": 4.925339444433111e-05, - "loss": 0.4037, + "learning_rate": 4.925319704756842e-05, + "loss": 0.3045, "step": 1617000 }, { "epoch": 0.97, - "learning_rate": 4.925129447877055e-05, - "loss": 0.4001, + "learning_rate": 4.925109708200785e-05, + "loss": 0.2932, "step": 1617500 }, { "epoch": 0.97, - "learning_rate": 4.9249194513209985e-05, - "loss": 0.3976, + "learning_rate": 4.924900131637841e-05, + "loss": 0.2986, "step": 1618000 }, { "epoch": 0.97, - "learning_rate": 4.924709874758054e-05, - "loss": 0.3889, + "learning_rate": 4.9246901350817845e-05, + "loss": 0.2967, "step": 1618500 }, { "epoch": 0.97, - "learning_rate": 4.924499878201997e-05, - "loss": 0.3877, + "learning_rate": 4.9244801385257286e-05, + "loss": 0.2948, "step": 1619000 }, { "epoch": 0.97, - "learning_rate": 4.924289881645941e-05, - "loss": 0.4045, + "learning_rate": 4.924270141969671e-05, + "loss": 0.3024, "step": 1619500 }, { "epoch": 0.97, - "learning_rate": 4.9240798850898845e-05, - "loss": 0.4037, + "learning_rate": 4.924060565406727e-05, + "loss": 0.2957, "step": 1620000 }, { "epoch": 0.97, - "learning_rate": 4.92387030852694e-05, - "loss": 0.3958, + "learning_rate": 4.923850568850671e-05, + "loss": 0.2995, "step": 1620500 }, { "epoch": 0.97, - "learning_rate": 4.923660311970884e-05, - "loss": 0.4147, + "learning_rate": 4.9236405722946146e-05, + "loss": 0.3053, "step": 1621000 }, { "epoch": 0.97, - "learning_rate": 4.923450315414827e-05, - "loss": 0.3963, + "learning_rate": 4.923430575738558e-05, + "loss": 0.2956, "step": 1621500 }, { "epoch": 0.97, - "learning_rate": 4.9232403188587706e-05, - "loss": 0.3923, + "learning_rate": 4.923220579182501e-05, + "loss": 0.2968, "step": 1622000 }, { "epoch": 0.97, - "learning_rate": 4.9230303223027146e-05, - "loss": 0.387, + "learning_rate": 4.923010582626445e-05, + "loss": 0.2901, "step": 1622500 }, { "epoch": 0.97, - "learning_rate": 4.922820325746658e-05, - "loss": 0.3952, + "learning_rate": 4.922800586070388e-05, + "loss": 0.294, "step": 1623000 }, { "epoch": 0.97, - "learning_rate": 4.9226103291906006e-05, - "loss": 0.3988, + "learning_rate": 4.922590589514332e-05, + "loss": 0.3018, "step": 1623500 }, { "epoch": 0.97, - "learning_rate": 4.922400332634545e-05, - "loss": 0.4009, + "learning_rate": 4.922381012951388e-05, + "loss": 0.3015, "step": 1624000 }, { "epoch": 0.97, - "learning_rate": 4.922190336078488e-05, - "loss": 0.3952, + "learning_rate": 4.922171016395331e-05, + "loss": 0.2926, "step": 1624500 }, { "epoch": 0.97, - "learning_rate": 4.9219803395224314e-05, - "loss": 0.3908, + "learning_rate": 4.921961019839274e-05, + "loss": 0.2998, "step": 1625000 }, { "epoch": 0.97, - "learning_rate": 4.9217703429663754e-05, - "loss": 0.3975, + "learning_rate": 4.921751023283218e-05, + "loss": 0.2963, "step": 1625500 }, { "epoch": 0.97, - "learning_rate": 4.921560346410319e-05, - "loss": 0.3918, + "learning_rate": 4.9215410267271615e-05, + "loss": 0.2957, "step": 1626000 }, { "epoch": 0.98, - "learning_rate": 4.921350769847374e-05, - "loss": 0.3953, + "learning_rate": 4.921331450164217e-05, + "loss": 0.2967, "step": 1626500 }, { "epoch": 0.98, - "learning_rate": 4.921140773291318e-05, - "loss": 0.3985, + "learning_rate": 4.921121453608161e-05, + "loss": 0.3032, "step": 1627000 }, { "epoch": 0.98, - "learning_rate": 4.9209307767352615e-05, - "loss": 0.389, + "learning_rate": 4.920911457052104e-05, + "loss": 0.2946, "step": 1627500 }, { "epoch": 0.98, - "learning_rate": 4.920720780179205e-05, - "loss": 0.3956, + "learning_rate": 4.9207014604960475e-05, + "loss": 0.2957, "step": 1628000 }, { "epoch": 0.98, - "learning_rate": 4.92051120361626e-05, - "loss": 0.3997, + "learning_rate": 4.9204914639399915e-05, + "loss": 0.2957, "step": 1628500 }, { "epoch": 0.98, - "learning_rate": 4.920301207060204e-05, - "loss": 0.3925, + "learning_rate": 4.920281887377047e-05, + "loss": 0.2932, "step": 1629000 }, { "epoch": 0.98, - "learning_rate": 4.9200912105041475e-05, - "loss": 0.3974, + "learning_rate": 4.92007189082099e-05, + "loss": 0.3002, "step": 1629500 }, { "epoch": 0.98, - "learning_rate": 4.919881213948091e-05, - "loss": 0.3942, + "learning_rate": 4.9198618942649336e-05, + "loss": 0.2938, "step": 1630000 }, { "epoch": 0.98, - "learning_rate": 4.919672057378259e-05, - "loss": 0.4006, + "learning_rate": 4.9196518977088776e-05, + "loss": 0.2999, "step": 1630500 }, { "epoch": 0.98, - "learning_rate": 4.919462060822202e-05, - "loss": 0.3958, + "learning_rate": 4.9194423211459337e-05, + "loss": 0.3004, "step": 1631000 }, { "epoch": 0.98, - "learning_rate": 4.919252064266146e-05, - "loss": 0.3854, + "learning_rate": 4.919232324589876e-05, + "loss": 0.2958, "step": 1631500 }, { "epoch": 0.98, - "learning_rate": 4.9190420677100896e-05, - "loss": 0.3993, + "learning_rate": 4.91902232803382e-05, + "loss": 0.3017, "step": 1632000 }, { "epoch": 0.98, - "learning_rate": 4.918832071154033e-05, - "loss": 0.4024, + "learning_rate": 4.918812331477764e-05, + "loss": 0.3011, "step": 1632500 }, { "epoch": 0.98, - "learning_rate": 4.918622074597976e-05, - "loss": 0.3893, + "learning_rate": 4.918602334921707e-05, + "loss": 0.2989, "step": 1633000 }, { "epoch": 0.98, - "learning_rate": 4.9184124980350324e-05, - "loss": 0.3971, + "learning_rate": 4.918392758358763e-05, + "loss": 0.3007, "step": 1633500 }, { "epoch": 0.98, - "learning_rate": 4.918202501478976e-05, - "loss": 0.3952, + "learning_rate": 4.9181827618027064e-05, + "loss": 0.299, "step": 1634000 }, { "epoch": 0.98, - "learning_rate": 4.91799250492292e-05, - "loss": 0.4021, + "learning_rate": 4.91797276524665e-05, + "loss": 0.3009, "step": 1634500 }, { "epoch": 0.98, - "learning_rate": 4.917782508366863e-05, - "loss": 0.3965, + "learning_rate": 4.917762768690593e-05, + "loss": 0.2998, "step": 1635000 }, { "epoch": 0.98, - "learning_rate": 4.917572511810806e-05, - "loss": 0.4006, + "learning_rate": 4.917552772134537e-05, + "loss": 0.3031, "step": 1635500 }, { "epoch": 0.98, - "learning_rate": 4.91736251525475e-05, - "loss": 0.3991, + "learning_rate": 4.9173427755784805e-05, + "loss": 0.3001, "step": 1636000 }, { "epoch": 0.98, - "learning_rate": 4.917152518698693e-05, - "loss": 0.3962, + "learning_rate": 4.917133199015536e-05, + "loss": 0.2963, "step": 1636500 }, { "epoch": 0.98, - "learning_rate": 4.9169425221426365e-05, - "loss": 0.3996, + "learning_rate": 4.916923202459479e-05, + "loss": 0.3005, "step": 1637000 }, { "epoch": 0.98, - "learning_rate": 4.9167329455796925e-05, - "loss": 0.3964, + "learning_rate": 4.916713205903423e-05, + "loss": 0.3029, "step": 1637500 }, { "epoch": 0.98, - "learning_rate": 4.916522949023636e-05, - "loss": 0.3975, + "learning_rate": 4.916503629340479e-05, + "loss": 0.3016, "step": 1638000 }, { "epoch": 0.98, - "learning_rate": 4.916312952467579e-05, - "loss": 0.3948, + "learning_rate": 4.916293632784422e-05, + "loss": 0.293, "step": 1638500 }, { "epoch": 0.98, - "learning_rate": 4.916103375904635e-05, - "loss": 0.4023, + "learning_rate": 4.916083636228365e-05, + "loss": 0.3065, "step": 1639000 }, { "epoch": 0.98, - "learning_rate": 4.9158933793485786e-05, - "loss": 0.3981, + "learning_rate": 4.915873639672309e-05, + "loss": 0.297, "step": 1639500 }, { "epoch": 0.98, - "learning_rate": 4.9156838027856346e-05, - "loss": 0.399, + "learning_rate": 4.9156636431162526e-05, + "loss": 0.2976, "step": 1640000 }, { "epoch": 0.98, - "learning_rate": 4.915473806229578e-05, - "loss": 0.3992, + "learning_rate": 4.915453646560196e-05, + "loss": 0.3002, "step": 1640500 }, { "epoch": 0.98, - "learning_rate": 4.915263809673521e-05, - "loss": 0.39, + "learning_rate": 4.91524365000414e-05, + "loss": 0.2949, "step": 1641000 }, { "epoch": 0.98, - "learning_rate": 4.915053813117465e-05, - "loss": 0.401, + "learning_rate": 4.9150336534480833e-05, + "loss": 0.3025, "step": 1641500 }, { "epoch": 0.98, - "learning_rate": 4.914843816561409e-05, - "loss": 0.3941, + "learning_rate": 4.914823656892027e-05, + "loss": 0.2973, "step": 1642000 }, { "epoch": 0.98, - "learning_rate": 4.914633820005351e-05, - "loss": 0.4024, + "learning_rate": 4.914613660335971e-05, + "loss": 0.3023, "step": 1642500 }, { "epoch": 0.99, - "learning_rate": 4.9144238234492954e-05, - "loss": 0.3974, + "learning_rate": 4.914404083773026e-05, + "loss": 0.3012, "step": 1643000 }, { "epoch": 0.99, - "learning_rate": 4.914213826893239e-05, - "loss": 0.4107, + "learning_rate": 4.9141940872169694e-05, + "loss": 0.3056, "step": 1643500 }, { "epoch": 0.99, - "learning_rate": 4.914003830337182e-05, - "loss": 0.3924, + "learning_rate": 4.9139840906609134e-05, + "loss": 0.2954, "step": 1644000 }, { "epoch": 0.99, - "learning_rate": 4.913793833781126e-05, - "loss": 0.3987, + "learning_rate": 4.913774094104857e-05, + "loss": 0.3032, "step": 1644500 }, { "epoch": 0.99, - "learning_rate": 4.9135838372250694e-05, - "loss": 0.4004, + "learning_rate": 4.9135640975488e-05, + "loss": 0.295, "step": 1645000 }, { "epoch": 0.99, - "learning_rate": 4.913373840669013e-05, - "loss": 0.3931, + "learning_rate": 4.9133545209858555e-05, + "loss": 0.296, "step": 1645500 }, { "epoch": 0.99, - "learning_rate": 4.913164264106068e-05, - "loss": 0.3895, + "learning_rate": 4.9131445244297995e-05, + "loss": 0.2927, "step": 1646000 }, { "epoch": 0.99, - "learning_rate": 4.912954687543124e-05, - "loss": 0.3896, + "learning_rate": 4.912934527873743e-05, + "loss": 0.2982, "step": 1646500 }, { "epoch": 0.99, - "learning_rate": 4.912744690987068e-05, - "loss": 0.4015, + "learning_rate": 4.912724531317686e-05, + "loss": 0.3019, "step": 1647000 }, { "epoch": 0.99, - "learning_rate": 4.912534694431011e-05, - "loss": 0.4104, + "learning_rate": 4.91251453476163e-05, + "loss": 0.3025, "step": 1647500 }, { "epoch": 0.99, - "learning_rate": 4.912324697874955e-05, - "loss": 0.3931, + "learning_rate": 4.9123045382055736e-05, + "loss": 0.2915, "step": 1648000 }, { "epoch": 0.99, - "learning_rate": 4.912114701318898e-05, - "loss": 0.3975, + "learning_rate": 4.912094961642629e-05, + "loss": 0.3009, "step": 1648500 }, { "epoch": 0.99, - "learning_rate": 4.9119047047628416e-05, - "loss": 0.3963, + "learning_rate": 4.911884965086572e-05, + "loss": 0.2985, "step": 1649000 }, { "epoch": 0.99, - "learning_rate": 4.9116947082067856e-05, - "loss": 0.3865, + "learning_rate": 4.911674968530516e-05, + "loss": 0.2944, "step": 1649500 }, { "epoch": 0.99, - "learning_rate": 4.911484711650729e-05, - "loss": 0.3903, + "learning_rate": 4.9114649719744596e-05, + "loss": 0.2906, "step": 1650000 }, { "epoch": 0.99, - "learning_rate": 4.911274715094672e-05, - "loss": 0.3948, + "learning_rate": 4.911254975418403e-05, + "loss": 0.2943, "step": 1650500 }, { "epoch": 0.99, - "learning_rate": 4.911064718538616e-05, - "loss": 0.3986, + "learning_rate": 4.911044978862347e-05, + "loss": 0.2981, "step": 1651000 }, { "epoch": 0.99, - "learning_rate": 4.9108547219825597e-05, - "loss": 0.4044, + "learning_rate": 4.9108354022994024e-05, + "loss": 0.3027, "step": 1651500 }, { "epoch": 0.99, - "learning_rate": 4.910644725426503e-05, - "loss": 0.3924, + "learning_rate": 4.910625405743346e-05, + "loss": 0.2953, "step": 1652000 }, { "epoch": 0.99, - "learning_rate": 4.9104351488635584e-05, - "loss": 0.3915, + "learning_rate": 4.91041540918729e-05, + "loss": 0.2906, "step": 1652500 }, { "epoch": 0.99, - "learning_rate": 4.9102251523075024e-05, - "loss": 0.3964, + "learning_rate": 4.910205412631233e-05, + "loss": 0.303, "step": 1653000 }, { "epoch": 0.99, - "learning_rate": 4.910015575744558e-05, - "loss": 0.3976, + "learning_rate": 4.9099958360682884e-05, + "loss": 0.3028, "step": 1653500 }, { "epoch": 0.99, - "learning_rate": 4.909805579188501e-05, - "loss": 0.392, + "learning_rate": 4.909785839512232e-05, + "loss": 0.2968, "step": 1654000 }, { "epoch": 0.99, - "learning_rate": 4.9095955826324444e-05, - "loss": 0.3941, + "learning_rate": 4.909575842956176e-05, + "loss": 0.298, "step": 1654500 }, { "epoch": 0.99, - "learning_rate": 4.9093855860763885e-05, - "loss": 0.3861, + "learning_rate": 4.909365846400119e-05, + "loss": 0.2917, "step": 1655000 }, { "epoch": 0.99, - "learning_rate": 4.909175589520332e-05, - "loss": 0.3955, + "learning_rate": 4.9091558498440625e-05, + "loss": 0.296, "step": 1655500 }, { "epoch": 0.99, - "learning_rate": 4.908965592964275e-05, - "loss": 0.395, + "learning_rate": 4.908945853288006e-05, + "loss": 0.2998, "step": 1656000 }, { "epoch": 0.99, - "learning_rate": 4.908755596408219e-05, - "loss": 0.3958, + "learning_rate": 4.908735856731949e-05, + "loss": 0.3012, "step": 1656500 }, { "epoch": 0.99, - "learning_rate": 4.9085460198452745e-05, - "loss": 0.3962, + "learning_rate": 4.9085258601758925e-05, + "loss": 0.2969, "step": 1657000 }, { "epoch": 0.99, - "learning_rate": 4.908336023289218e-05, - "loss": 0.3939, + "learning_rate": 4.9083158636198366e-05, + "loss": 0.2931, "step": 1657500 }, { "epoch": 0.99, - "learning_rate": 4.908126026733162e-05, - "loss": 0.3936, + "learning_rate": 4.9081062870568926e-05, + "loss": 0.2988, "step": 1658000 }, { "epoch": 0.99, - "learning_rate": 4.907916030177105e-05, - "loss": 0.3839, + "learning_rate": 4.907896290500835e-05, + "loss": 0.2922, "step": 1658500 }, { "epoch": 0.99, - "learning_rate": 4.9077060336210486e-05, - "loss": 0.39, + "learning_rate": 4.907686293944779e-05, + "loss": 0.2938, "step": 1659000 }, { "epoch": 0.99, - "learning_rate": 4.907496457058104e-05, - "loss": 0.3937, + "learning_rate": 4.907476717381835e-05, + "loss": 0.2949, "step": 1659500 }, { "epoch": 1.0, - "learning_rate": 4.907286460502048e-05, - "loss": 0.381, + "learning_rate": 4.907266720825779e-05, + "loss": 0.2919, "step": 1660000 }, { "epoch": 1.0, - "learning_rate": 4.907076463945991e-05, - "loss": 0.3848, + "learning_rate": 4.907056724269722e-05, + "loss": 0.2964, "step": 1660500 }, { "epoch": 1.0, - "learning_rate": 4.9068664673899347e-05, - "loss": 0.3931, + "learning_rate": 4.9068467277136654e-05, + "loss": 0.2956, "step": 1661000 }, { "epoch": 1.0, - "learning_rate": 4.906656470833879e-05, - "loss": 0.3917, + "learning_rate": 4.906636731157609e-05, + "loss": 0.2944, "step": 1661500 }, { "epoch": 1.0, - "learning_rate": 4.906446474277822e-05, - "loss": 0.3957, + "learning_rate": 4.906427154594665e-05, + "loss": 0.2978, "step": 1662000 }, { "epoch": 1.0, - "learning_rate": 4.906236477721765e-05, - "loss": 0.3948, + "learning_rate": 4.906217158038608e-05, + "loss": 0.2967, "step": 1662500 }, { "epoch": 1.0, - "learning_rate": 4.906026901158821e-05, - "loss": 0.4079, + "learning_rate": 4.9060071614825514e-05, + "loss": 0.3023, "step": 1663000 }, { "epoch": 1.0, - "learning_rate": 4.905816904602765e-05, - "loss": 0.3979, + "learning_rate": 4.905797164926495e-05, + "loss": 0.2982, "step": 1663500 }, { "epoch": 1.0, - "learning_rate": 4.905606908046708e-05, - "loss": 0.39, + "learning_rate": 4.905587168370438e-05, + "loss": 0.2964, "step": 1664000 }, { "epoch": 1.0, - "learning_rate": 4.9053969114906514e-05, - "loss": 0.3934, + "learning_rate": 4.905377171814382e-05, + "loss": 0.2917, "step": 1664500 }, { "epoch": 1.0, - "learning_rate": 4.905186914934595e-05, - "loss": 0.4007, + "learning_rate": 4.9051671752583255e-05, + "loss": 0.2998, "step": 1665000 }, { "epoch": 1.0, - "learning_rate": 4.904976918378538e-05, - "loss": 0.3971, + "learning_rate": 4.904957178702269e-05, + "loss": 0.3, "step": 1665500 }, { "epoch": 1.0, - "learning_rate": 4.904766921822482e-05, - "loss": 0.3962, + "learning_rate": 4.904747182146213e-05, + "loss": 0.2938, "step": 1666000 }, { "epoch": 1.0, - "learning_rate": 4.9045569252664255e-05, - "loss": 0.3955, + "learning_rate": 4.904538025576381e-05, + "loss": 0.2945, "step": 1666500 }, { "epoch": 1.0, - "learning_rate": 4.9043473487034815e-05, - "loss": 0.3834, + "learning_rate": 4.904328029020324e-05, + "loss": 0.2929, "step": 1667000 }, { "epoch": 1.0, - "learning_rate": 4.904137352147424e-05, - "loss": 0.3998, + "learning_rate": 4.9041180324642676e-05, + "loss": 0.2956, "step": 1667500 }, { "epoch": 1.0, - "learning_rate": 4.90392777558448e-05, - "loss": 0.3958, + "learning_rate": 4.903908035908211e-05, + "loss": 0.2947, "step": 1668000 }, { "epoch": 1.0, - "learning_rate": 4.903717779028424e-05, - "loss": 0.3835, + "learning_rate": 4.903698039352154e-05, + "loss": 0.2913, "step": 1668500 }, { "epoch": 1.0, - "learning_rate": 4.9035077824723676e-05, - "loss": 0.3999, + "learning_rate": 4.9034880427960976e-05, + "loss": 0.3005, "step": 1669000 }, { "epoch": 1.0, - "learning_rate": 4.90329778591631e-05, - "loss": 0.3916, + "learning_rate": 4.903278046240042e-05, + "loss": 0.2915, "step": 1669500 }, { "epoch": 1.0, - "learning_rate": 4.903087789360254e-05, - "loss": 0.3804, + "learning_rate": 4.903068049683985e-05, + "loss": 0.2947, "step": 1670000 }, { "epoch": 1.0, - "learning_rate": 4.9028777928041977e-05, - "loss": 0.3816, + "learning_rate": 4.9028584731210404e-05, + "loss": 0.2988, "step": 1670500 }, { "epoch": 1.0, - "learning_rate": 4.902667796248141e-05, - "loss": 0.3923, + "learning_rate": 4.902648476564984e-05, + "loss": 0.2954, "step": 1671000 }, { "epoch": 1.0, - "learning_rate": 4.902457799692085e-05, - "loss": 0.384, + "learning_rate": 4.90243890000204e-05, + "loss": 0.2867, "step": 1671500 }, { "epoch": 1.0, - "learning_rate": 4.9022482231291404e-05, - "loss": 0.3817, + "learning_rate": 4.902228903445984e-05, + "loss": 0.2885, "step": 1672000 }, { "epoch": 1.0, - "learning_rate": 4.902038226573084e-05, - "loss": 0.384, + "learning_rate": 4.9020189068899264e-05, + "loss": 0.2915, "step": 1672500 }, { "epoch": 1.0, - "learning_rate": 4.901828230017028e-05, - "loss": 0.3898, + "learning_rate": 4.9018093303269825e-05, + "loss": 0.296, "step": 1673000 }, { "epoch": 1.0, - "learning_rate": 4.901618653454084e-05, - "loss": 0.3948, + "learning_rate": 4.9015993337709265e-05, + "loss": 0.2975, "step": 1673500 }, { "epoch": 1.0, - "learning_rate": 4.901408656898027e-05, - "loss": 0.3915, + "learning_rate": 4.90138933721487e-05, + "loss": 0.2937, "step": 1674000 }, { "epoch": 1.0, - "learning_rate": 4.90119866034197e-05, - "loss": 0.3865, + "learning_rate": 4.901179340658813e-05, + "loss": 0.2974, "step": 1674500 }, { "epoch": 1.0, - "learning_rate": 4.900989083779026e-05, - "loss": 0.3964, + "learning_rate": 4.9009693441027565e-05, + "loss": 0.2887, "step": 1675000 }, { "epoch": 1.0, - "learning_rate": 4.90077908722297e-05, - "loss": 0.3939, + "learning_rate": 4.9007593475467e-05, + "loss": 0.2961, "step": 1675500 }, { "epoch": 1.0, - "learning_rate": 4.900569090666913e-05, - "loss": 0.3875, + "learning_rate": 4.900549350990643e-05, + "loss": 0.2905, "step": 1676000 }, { "epoch": 1.01, - "learning_rate": 4.9003590941108566e-05, - "loss": 0.3883, + "learning_rate": 4.900339354434587e-05, + "loss": 0.2968, "step": 1676500 }, { "epoch": 1.01, - "learning_rate": 4.9001490975548e-05, - "loss": 0.3887, + "learning_rate": 4.9001293578785306e-05, + "loss": 0.294, "step": 1677000 }, { "epoch": 1.01, - "learning_rate": 4.899939100998743e-05, - "loss": 0.3924, + "learning_rate": 4.899919361322474e-05, + "loss": 0.2988, "step": 1677500 }, { "epoch": 1.01, - "learning_rate": 4.8997291044426866e-05, - "loss": 0.3882, + "learning_rate": 4.899709364766418e-05, + "loss": 0.2972, "step": 1678000 }, { "epoch": 1.01, - "learning_rate": 4.8995191078866306e-05, - "loss": 0.3929, + "learning_rate": 4.899499368210361e-05, + "loss": 0.2991, "step": 1678500 }, { "epoch": 1.01, - "learning_rate": 4.899309531323686e-05, - "loss": 0.3843, + "learning_rate": 4.899289371654305e-05, + "loss": 0.2902, "step": 1679000 }, { "epoch": 1.01, - "learning_rate": 4.899099534767629e-05, - "loss": 0.3836, + "learning_rate": 4.899079375098249e-05, + "loss": 0.2917, "step": 1679500 }, { "epoch": 1.01, - "learning_rate": 4.8988895382115733e-05, - "loss": 0.3873, + "learning_rate": 4.898869798535304e-05, + "loss": 0.2915, "step": 1680000 }, { "epoch": 1.01, - "learning_rate": 4.898679541655517e-05, - "loss": 0.3921, + "learning_rate": 4.8986598019792474e-05, + "loss": 0.297, "step": 1680500 }, { "epoch": 1.01, - "learning_rate": 4.89846954509946e-05, - "loss": 0.3881, + "learning_rate": 4.8984498054231914e-05, + "loss": 0.3001, "step": 1681000 }, { "epoch": 1.01, - "learning_rate": 4.898259548543404e-05, - "loss": 0.3794, + "learning_rate": 4.898239808867135e-05, + "loss": 0.2879, "step": 1681500 }, { "epoch": 1.01, - "learning_rate": 4.8980495519873474e-05, - "loss": 0.3859, + "learning_rate": 4.89803023230419e-05, + "loss": 0.2876, "step": 1682000 }, { "epoch": 1.01, - "learning_rate": 4.897839975424403e-05, - "loss": 0.3885, + "learning_rate": 4.8978202357481335e-05, + "loss": 0.2916, "step": 1682500 }, { "epoch": 1.01, - "learning_rate": 4.897629978868346e-05, - "loss": 0.3768, + "learning_rate": 4.8976102391920775e-05, + "loss": 0.2905, "step": 1683000 }, { "epoch": 1.01, - "learning_rate": 4.89741998231229e-05, - "loss": 0.3897, + "learning_rate": 4.897400242636021e-05, + "loss": 0.2972, "step": 1683500 }, { "epoch": 1.01, - "learning_rate": 4.8972099857562335e-05, - "loss": 0.3781, + "learning_rate": 4.897190246079964e-05, + "loss": 0.2881, "step": 1684000 }, { "epoch": 1.01, - "learning_rate": 4.896999989200177e-05, - "loss": 0.3733, + "learning_rate": 4.8969806695170195e-05, + "loss": 0.2873, "step": 1684500 }, { "epoch": 1.01, - "learning_rate": 4.896789992644121e-05, - "loss": 0.3935, + "learning_rate": 4.8967710929540756e-05, + "loss": 0.2965, "step": 1685000 }, { "epoch": 1.01, - "learning_rate": 4.896579996088064e-05, - "loss": 0.3858, + "learning_rate": 4.896561096398019e-05, + "loss": 0.2947, "step": 1685500 }, { "epoch": 1.01, - "learning_rate": 4.8963704195251195e-05, - "loss": 0.3901, + "learning_rate": 4.896351099841962e-05, + "loss": 0.2911, "step": 1686000 }, { "epoch": 1.01, - "learning_rate": 4.8961604229690636e-05, - "loss": 0.3872, + "learning_rate": 4.896141103285906e-05, + "loss": 0.294, "step": 1686500 }, { "epoch": 1.01, - "learning_rate": 4.895950426413007e-05, - "loss": 0.3861, + "learning_rate": 4.8959311067298496e-05, + "loss": 0.2914, "step": 1687000 }, { "epoch": 1.01, - "learning_rate": 4.89574042985695e-05, - "loss": 0.3813, + "learning_rate": 4.895721110173793e-05, + "loss": 0.2881, "step": 1687500 }, { "epoch": 1.01, - "learning_rate": 4.895530433300894e-05, - "loss": 0.3831, + "learning_rate": 4.895511113617737e-05, + "loss": 0.2885, "step": 1688000 }, { "epoch": 1.01, - "learning_rate": 4.8953208567379496e-05, - "loss": 0.3923, + "learning_rate": 4.8953015370547924e-05, + "loss": 0.2952, "step": 1688500 }, { "epoch": 1.01, - "learning_rate": 4.895110860181893e-05, - "loss": 0.3831, + "learning_rate": 4.895091540498736e-05, + "loss": 0.2916, "step": 1689000 }, { "epoch": 1.01, - "learning_rate": 4.894900863625836e-05, - "loss": 0.4015, + "learning_rate": 4.894881543942679e-05, + "loss": 0.2976, "step": 1689500 }, { "epoch": 1.01, - "learning_rate": 4.8946908670697804e-05, - "loss": 0.3896, + "learning_rate": 4.8946719673797344e-05, + "loss": 0.2952, "step": 1690000 }, { "epoch": 1.01, - "learning_rate": 4.894480870513724e-05, - "loss": 0.3847, + "learning_rate": 4.8944619708236784e-05, + "loss": 0.2926, "step": 1690500 }, { "epoch": 1.01, - "learning_rate": 4.894270873957667e-05, - "loss": 0.3929, + "learning_rate": 4.894251974267622e-05, + "loss": 0.2988, "step": 1691000 }, { "epoch": 1.01, - "learning_rate": 4.894060877401611e-05, - "loss": 0.3883, + "learning_rate": 4.894041977711565e-05, + "loss": 0.2884, "step": 1691500 }, { "epoch": 1.01, - "learning_rate": 4.8938513008386664e-05, - "loss": 0.3757, + "learning_rate": 4.893831981155509e-05, + "loss": 0.2865, "step": 1692000 }, { "epoch": 1.01, - "learning_rate": 4.89364130428261e-05, - "loss": 0.4014, + "learning_rate": 4.8936219845994525e-05, + "loss": 0.3013, "step": 1692500 }, { "epoch": 1.02, - "learning_rate": 4.893431307726553e-05, - "loss": 0.3943, + "learning_rate": 4.893411988043396e-05, + "loss": 0.2949, "step": 1693000 }, { "epoch": 1.02, - "learning_rate": 4.893221311170497e-05, - "loss": 0.3872, + "learning_rate": 4.89320199148734e-05, + "loss": 0.2962, "step": 1693500 }, { "epoch": 1.02, - "learning_rate": 4.8930113146144405e-05, - "loss": 0.3849, + "learning_rate": 4.892991994931283e-05, + "loss": 0.2936, "step": 1694000 }, { "epoch": 1.02, - "learning_rate": 4.892801318058384e-05, - "loss": 0.3798, + "learning_rate": 4.8927819983752266e-05, + "loss": 0.2868, "step": 1694500 }, { "epoch": 1.02, - "learning_rate": 4.89259174149544e-05, - "loss": 0.3853, + "learning_rate": 4.89257200181917e-05, + "loss": 0.2968, "step": 1695000 }, { "epoch": 1.02, - "learning_rate": 4.892381744939383e-05, - "loss": 0.3878, + "learning_rate": 4.892362005263113e-05, + "loss": 0.2944, "step": 1695500 }, { "epoch": 1.02, - "learning_rate": 4.8921717483833266e-05, - "loss": 0.3919, + "learning_rate": 4.892152428700169e-05, + "loss": 0.2918, "step": 1696000 }, { "epoch": 1.02, - "learning_rate": 4.89196175182727e-05, - "loss": 0.3887, + "learning_rate": 4.891942432144113e-05, + "loss": 0.2929, "step": 1696500 }, { "epoch": 1.02, - "learning_rate": 4.891751755271213e-05, - "loss": 0.3884, + "learning_rate": 4.8917324355880567e-05, + "loss": 0.2926, "step": 1697000 }, { "epoch": 1.02, - "learning_rate": 4.8915417587151566e-05, - "loss": 0.3883, + "learning_rate": 4.891522439031999e-05, + "loss": 0.2933, "step": 1697500 }, { "epoch": 1.02, - "learning_rate": 4.8913321821522126e-05, - "loss": 0.3882, + "learning_rate": 4.8913124424759433e-05, + "loss": 0.2908, "step": 1698000 }, { "epoch": 1.02, - "learning_rate": 4.8911221855961567e-05, - "loss": 0.3787, + "learning_rate": 4.891102445919887e-05, + "loss": 0.2895, "step": 1698500 }, { "epoch": 1.02, - "learning_rate": 4.890912189040099e-05, - "loss": 0.3788, + "learning_rate": 4.890892869356943e-05, + "loss": 0.2952, "step": 1699000 }, { "epoch": 1.02, - "learning_rate": 4.890702192484043e-05, - "loss": 0.3907, + "learning_rate": 4.8906828728008854e-05, + "loss": 0.2948, "step": 1699500 }, { "epoch": 1.02, - "learning_rate": 4.890492195927987e-05, - "loss": 0.3863, + "learning_rate": 4.8904728762448294e-05, + "loss": 0.291, "step": 1700000 }, { "epoch": 1.02, - "eval_loss": 0.37541428208351135, - "eval_runtime": 1122.3076, - "eval_samples_per_second": 469.319, - "eval_steps_per_second": 78.22, + "eval_loss": 0.2680596709251404, + "eval_runtime": 1472.3776, + "eval_samples_per_second": 357.734, + "eval_steps_per_second": 59.623, "step": 1700000 }, { "epoch": 1.02, - "learning_rate": 4.89028219937193e-05, - "loss": 0.3904, + "learning_rate": 4.890262879688773e-05, + "loss": 0.2962, "step": 1700500 }, { "epoch": 1.02, - "learning_rate": 4.8900722028158734e-05, - "loss": 0.3878, + "learning_rate": 4.890053303125829e-05, + "loss": 0.2911, "step": 1701000 }, { "epoch": 1.02, - "learning_rate": 4.8898622062598174e-05, - "loss": 0.3831, + "learning_rate": 4.889843306569772e-05, + "loss": 0.2921, "step": 1701500 }, { "epoch": 1.02, - "learning_rate": 4.889652629696873e-05, - "loss": 0.3891, + "learning_rate": 4.8896333100137155e-05, + "loss": 0.2926, "step": 1702000 }, { "epoch": 1.02, - "learning_rate": 4.889442633140816e-05, - "loss": 0.3819, + "learning_rate": 4.889423313457659e-05, + "loss": 0.2897, "step": 1702500 }, { "epoch": 1.02, - "learning_rate": 4.88923263658476e-05, - "loss": 0.3874, + "learning_rate": 4.889213316901603e-05, + "loss": 0.2941, "step": 1703000 }, { "epoch": 1.02, - "learning_rate": 4.8890226400287035e-05, - "loss": 0.3772, + "learning_rate": 4.889003740338659e-05, + "loss": 0.2917, "step": 1703500 }, { "epoch": 1.02, - "learning_rate": 4.888813063465759e-05, - "loss": 0.385, + "learning_rate": 4.888794163775714e-05, + "loss": 0.3002, "step": 1704000 }, { "epoch": 1.02, - "learning_rate": 4.888603066909702e-05, - "loss": 0.3935, + "learning_rate": 4.8885841672196576e-05, + "loss": 0.2994, "step": 1704500 }, { "epoch": 1.02, - "learning_rate": 4.888393070353646e-05, - "loss": 0.3841, + "learning_rate": 4.888374170663601e-05, + "loss": 0.2854, "step": 1705000 }, { "epoch": 1.02, - "learning_rate": 4.8881830737975896e-05, - "loss": 0.3909, + "learning_rate": 4.888164174107545e-05, + "loss": 0.2867, "step": 1705500 }, { "epoch": 1.02, - "learning_rate": 4.887973077241533e-05, - "loss": 0.386, + "learning_rate": 4.887954177551488e-05, + "loss": 0.294, "step": 1706000 }, { "epoch": 1.02, - "learning_rate": 4.887763080685477e-05, - "loss": 0.3844, + "learning_rate": 4.8877441809954317e-05, + "loss": 0.2942, "step": 1706500 }, { "epoch": 1.02, - "learning_rate": 4.88755308412942e-05, - "loss": 0.3867, + "learning_rate": 4.887534184439375e-05, + "loss": 0.2905, "step": 1707000 }, { "epoch": 1.02, - "learning_rate": 4.8873435075664756e-05, - "loss": 0.3864, + "learning_rate": 4.887324607876431e-05, + "loss": 0.3044, "step": 1707500 }, { "epoch": 1.02, - "learning_rate": 4.887133511010419e-05, - "loss": 0.3833, + "learning_rate": 4.8871146113203744e-05, + "loss": 0.2933, "step": 1708000 }, { "epoch": 1.02, - "learning_rate": 4.886923514454363e-05, - "loss": 0.3811, + "learning_rate": 4.886904614764318e-05, + "loss": 0.2881, "step": 1708500 }, { "epoch": 1.02, - "learning_rate": 4.8867135178983063e-05, - "loss": 0.3793, + "learning_rate": 4.886694618208261e-05, + "loss": 0.286, "step": 1709000 }, { "epoch": 1.02, - "learning_rate": 4.88650352134225e-05, - "loss": 0.3873, + "learning_rate": 4.8864846216522044e-05, + "loss": 0.2946, "step": 1709500 }, { "epoch": 1.03, - "learning_rate": 4.886293524786194e-05, - "loss": 0.3955, + "learning_rate": 4.8862746250961484e-05, + "loss": 0.2991, "step": 1710000 }, { "epoch": 1.03, - "learning_rate": 4.886083528230137e-05, - "loss": 0.3893, + "learning_rate": 4.886064628540092e-05, + "loss": 0.2971, "step": 1710500 }, { "epoch": 1.03, - "learning_rate": 4.8858735316740804e-05, - "loss": 0.3769, + "learning_rate": 4.885854631984035e-05, + "loss": 0.2879, "step": 1711000 }, { "epoch": 1.03, - "learning_rate": 4.885663535118024e-05, - "loss": 0.3864, + "learning_rate": 4.885644635427979e-05, + "loss": 0.2939, "step": 1711500 }, { "epoch": 1.03, - "learning_rate": 4.88545395855508e-05, - "loss": 0.3862, + "learning_rate": 4.8854346388719225e-05, + "loss": 0.2958, "step": 1712000 }, { "epoch": 1.03, - "learning_rate": 4.885244381992135e-05, - "loss": 0.3922, + "learning_rate": 4.885225062308978e-05, + "loss": 0.2923, "step": 1712500 }, { "epoch": 1.03, - "learning_rate": 4.8850343854360785e-05, - "loss": 0.379, + "learning_rate": 4.885015065752921e-05, + "loss": 0.2843, "step": 1713000 }, { "epoch": 1.03, - "learning_rate": 4.8848243888800225e-05, - "loss": 0.3893, + "learning_rate": 4.884805069196865e-05, + "loss": 0.2968, "step": 1713500 }, { "epoch": 1.03, - "learning_rate": 4.884614392323966e-05, - "loss": 0.3862, + "learning_rate": 4.8845950726408086e-05, + "loss": 0.2897, "step": 1714000 }, { "epoch": 1.03, - "learning_rate": 4.884404395767909e-05, - "loss": 0.3768, + "learning_rate": 4.884385076084752e-05, + "loss": 0.2886, "step": 1714500 }, { "epoch": 1.03, - "learning_rate": 4.8841948192049646e-05, - "loss": 0.3982, + "learning_rate": 4.884175079528696e-05, + "loss": 0.2948, "step": 1715000 }, { "epoch": 1.03, - "learning_rate": 4.8839848226489086e-05, - "loss": 0.3833, + "learning_rate": 4.883965082972639e-05, + "loss": 0.2937, "step": 1715500 }, { "epoch": 1.03, - "learning_rate": 4.883774826092852e-05, - "loss": 0.3927, + "learning_rate": 4.8837550864165826e-05, + "loss": 0.2944, "step": 1716000 }, { "epoch": 1.03, - "learning_rate": 4.883564829536795e-05, - "loss": 0.3921, + "learning_rate": 4.883545509853638e-05, + "loss": 0.2958, "step": 1716500 }, { "epoch": 1.03, - "learning_rate": 4.883354832980739e-05, - "loss": 0.3847, + "learning_rate": 4.883335513297582e-05, + "loss": 0.2861, "step": 1717000 }, { "epoch": 1.03, - "learning_rate": 4.8831448364246827e-05, - "loss": 0.3878, + "learning_rate": 4.8831259367346374e-05, + "loss": 0.2906, "step": 1717500 }, { "epoch": 1.03, - "learning_rate": 4.882934839868626e-05, - "loss": 0.3921, + "learning_rate": 4.882915940178581e-05, + "loss": 0.2949, "step": 1718000 }, { "epoch": 1.03, - "learning_rate": 4.88272484331257e-05, - "loss": 0.3954, + "learning_rate": 4.882705943622525e-05, + "loss": 0.2948, "step": 1718500 }, { "epoch": 1.03, - "learning_rate": 4.8825152667496254e-05, - "loss": 0.3725, + "learning_rate": 4.88249636705958e-05, + "loss": 0.2892, "step": 1719000 }, { "epoch": 1.03, - "learning_rate": 4.882305270193569e-05, - "loss": 0.3896, + "learning_rate": 4.8822863705035235e-05, + "loss": 0.2954, "step": 1719500 }, { "epoch": 1.03, - "learning_rate": 4.882095693630624e-05, - "loss": 0.3899, + "learning_rate": 4.882076373947467e-05, + "loss": 0.2963, "step": 1720000 }, { "epoch": 1.03, - "learning_rate": 4.8818861170676794e-05, - "loss": 0.3885, + "learning_rate": 4.881866377391411e-05, + "loss": 0.2949, "step": 1720500 }, { "epoch": 1.03, - "learning_rate": 4.8816761205116235e-05, - "loss": 0.3774, + "learning_rate": 4.881656380835354e-05, + "loss": 0.2891, "step": 1721000 }, { "epoch": 1.03, - "learning_rate": 4.881466123955567e-05, - "loss": 0.3846, + "learning_rate": 4.8814463842792975e-05, + "loss": 0.2951, "step": 1721500 }, { "epoch": 1.03, - "learning_rate": 4.88125612739951e-05, - "loss": 0.3727, + "learning_rate": 4.8812363877232415e-05, + "loss": 0.2863, "step": 1722000 }, { "epoch": 1.03, - "learning_rate": 4.881046130843454e-05, - "loss": 0.3976, + "learning_rate": 4.881026391167185e-05, + "loss": 0.2938, "step": 1722500 }, { "epoch": 1.03, - "learning_rate": 4.8808361342873975e-05, - "loss": 0.3755, + "learning_rate": 4.88081681460424e-05, + "loss": 0.2885, "step": 1723000 }, { "epoch": 1.03, - "learning_rate": 4.880626137731341e-05, - "loss": 0.3887, + "learning_rate": 4.8806068180481836e-05, + "loss": 0.2917, "step": 1723500 }, { "epoch": 1.03, - "learning_rate": 4.880416141175285e-05, - "loss": 0.3837, + "learning_rate": 4.8803968214921276e-05, + "loss": 0.2912, "step": 1724000 }, { "epoch": 1.03, - "learning_rate": 4.880206144619228e-05, - "loss": 0.3811, + "learning_rate": 4.880187244929183e-05, + "loss": 0.2881, "step": 1724500 }, { "epoch": 1.03, - "learning_rate": 4.8799965680562836e-05, - "loss": 0.3925, + "learning_rate": 4.879977248373126e-05, + "loss": 0.2989, "step": 1725000 }, { "epoch": 1.03, - "learning_rate": 4.8797865715002276e-05, - "loss": 0.3876, + "learning_rate": 4.8797672518170703e-05, + "loss": 0.2927, "step": 1725500 }, { "epoch": 1.03, - "learning_rate": 4.879576574944171e-05, - "loss": 0.3947, + "learning_rate": 4.879557255261014e-05, + "loss": 0.2915, "step": 1726000 }, { "epoch": 1.04, - "learning_rate": 4.879366578388114e-05, - "loss": 0.3963, + "learning_rate": 4.879347258704957e-05, + "loss": 0.2928, "step": 1726500 }, { "epoch": 1.04, - "learning_rate": 4.879156581832058e-05, - "loss": 0.3801, + "learning_rate": 4.879137262148901e-05, + "loss": 0.2934, "step": 1727000 }, { "epoch": 1.04, - "learning_rate": 4.878946585276002e-05, - "loss": 0.3892, + "learning_rate": 4.8789272655928444e-05, + "loss": 0.291, "step": 1727500 }, { "epoch": 1.04, - "learning_rate": 4.878736588719945e-05, - "loss": 0.3846, + "learning_rate": 4.8787176890299e-05, + "loss": 0.2886, "step": 1728000 }, { "epoch": 1.04, - "learning_rate": 4.8785270121570004e-05, - "loss": 0.392, + "learning_rate": 4.878507692473843e-05, + "loss": 0.2956, "step": 1728500 }, { "epoch": 1.04, - "learning_rate": 4.8783170156009444e-05, - "loss": 0.3906, + "learning_rate": 4.878297695917787e-05, + "loss": 0.2973, "step": 1729000 }, { "epoch": 1.04, - "learning_rate": 4.878107019044888e-05, - "loss": 0.3756, + "learning_rate": 4.8780876993617305e-05, + "loss": 0.2876, "step": 1729500 }, { "epoch": 1.04, - "learning_rate": 4.877897022488831e-05, - "loss": 0.3881, + "learning_rate": 4.877877702805674e-05, + "loss": 0.2976, "step": 1730000 }, { "epoch": 1.04, - "learning_rate": 4.877687025932775e-05, - "loss": 0.3846, + "learning_rate": 4.877667706249618e-05, + "loss": 0.3002, "step": 1730500 }, { "epoch": 1.04, - "learning_rate": 4.877477029376718e-05, - "loss": 0.3888, + "learning_rate": 4.877457709693561e-05, + "loss": 0.2952, "step": 1731000 }, { "epoch": 1.04, - "learning_rate": 4.877267032820661e-05, - "loss": 0.3821, + "learning_rate": 4.8772477131375045e-05, + "loss": 0.2929, "step": 1731500 }, { "epoch": 1.04, - "learning_rate": 4.877057456257717e-05, - "loss": 0.399, + "learning_rate": 4.8770381365745606e-05, + "loss": 0.3017, "step": 1732000 }, { "epoch": 1.04, - "learning_rate": 4.876847459701661e-05, - "loss": 0.3959, + "learning_rate": 4.876828560011616e-05, + "loss": 0.2963, "step": 1732500 }, { "epoch": 1.04, - "learning_rate": 4.876637463145604e-05, - "loss": 0.3828, + "learning_rate": 4.876618563455559e-05, + "loss": 0.288, "step": 1733000 }, { "epoch": 1.04, - "learning_rate": 4.876427466589548e-05, - "loss": 0.3772, + "learning_rate": 4.8764085668995026e-05, + "loss": 0.2884, "step": 1733500 }, { "epoch": 1.04, - "learning_rate": 4.876217890026604e-05, - "loss": 0.3855, + "learning_rate": 4.8761985703434466e-05, + "loss": 0.2878, "step": 1734000 }, { "epoch": 1.04, - "learning_rate": 4.876007893470547e-05, - "loss": 0.3872, + "learning_rate": 4.87598857378739e-05, + "loss": 0.2892, "step": 1734500 }, { "epoch": 1.04, - "learning_rate": 4.8757978969144906e-05, - "loss": 0.3823, + "learning_rate": 4.875778577231333e-05, + "loss": 0.2899, "step": 1735000 }, { "epoch": 1.04, - "learning_rate": 4.875587900358434e-05, - "loss": 0.3869, + "learning_rate": 4.875569000668389e-05, + "loss": 0.2919, "step": 1735500 }, { "epoch": 1.04, - "learning_rate": 4.87537832379549e-05, - "loss": 0.391, + "learning_rate": 4.875359004112333e-05, + "loss": 0.2903, "step": 1736000 }, { "epoch": 1.04, - "learning_rate": 4.8751683272394333e-05, - "loss": 0.3868, + "learning_rate": 4.875149007556276e-05, + "loss": 0.2921, "step": 1736500 }, { "epoch": 1.04, - "learning_rate": 4.874958330683377e-05, - "loss": 0.3862, + "learning_rate": 4.8749390110002194e-05, + "loss": 0.2943, "step": 1737000 }, { "epoch": 1.04, - "learning_rate": 4.874748334127321e-05, - "loss": 0.383, + "learning_rate": 4.8747290144441634e-05, + "loss": 0.2981, "step": 1737500 }, { "epoch": 1.04, - "learning_rate": 4.8745383375712634e-05, - "loss": 0.3899, + "learning_rate": 4.874519017888107e-05, + "loss": 0.2886, "step": 1738000 }, { "epoch": 1.04, - "learning_rate": 4.874328341015207e-05, - "loss": 0.3857, + "learning_rate": 4.87430902133205e-05, + "loss": 0.2884, "step": 1738500 }, { "epoch": 1.04, - "learning_rate": 4.874118344459151e-05, - "loss": 0.3899, + "learning_rate": 4.8740990247759935e-05, + "loss": 0.2957, "step": 1739000 }, { "epoch": 1.04, - "learning_rate": 4.873908347903094e-05, - "loss": 0.3954, + "learning_rate": 4.8738894482130495e-05, + "loss": 0.2977, "step": 1739500 }, { "epoch": 1.04, - "learning_rate": 4.87369877134015e-05, - "loss": 0.3873, + "learning_rate": 4.873679451656993e-05, + "loss": 0.2954, "step": 1740000 }, { "epoch": 1.04, - "learning_rate": 4.8734887747840935e-05, - "loss": 0.3944, + "learning_rate": 4.873469455100937e-05, + "loss": 0.2957, "step": 1740500 }, { "epoch": 1.04, - "learning_rate": 4.873278778228037e-05, - "loss": 0.393, + "learning_rate": 4.8732594585448795e-05, + "loss": 0.2913, "step": 1741000 }, { "epoch": 1.04, - "learning_rate": 4.873069201665093e-05, - "loss": 0.3877, + "learning_rate": 4.873049461988823e-05, + "loss": 0.2916, "step": 1741500 }, { "epoch": 1.04, - "learning_rate": 4.872859625102148e-05, - "loss": 0.3905, + "learning_rate": 4.872839465432767e-05, + "loss": 0.2899, "step": 1742000 }, { "epoch": 1.04, - "learning_rate": 4.8726496285460916e-05, - "loss": 0.395, + "learning_rate": 4.872629888869823e-05, + "loss": 0.2983, "step": 1742500 }, { "epoch": 1.04, - "learning_rate": 4.8724396319900356e-05, - "loss": 0.3855, + "learning_rate": 4.872419892313766e-05, + "loss": 0.2913, "step": 1743000 }, { "epoch": 1.05, - "learning_rate": 4.872229635433979e-05, - "loss": 0.3909, + "learning_rate": 4.872209895757709e-05, + "loss": 0.2936, "step": 1743500 }, { "epoch": 1.05, - "learning_rate": 4.872019638877922e-05, - "loss": 0.3915, + "learning_rate": 4.871999899201653e-05, + "loss": 0.2871, "step": 1744000 }, { "epoch": 1.05, - "learning_rate": 4.871809642321866e-05, - "loss": 0.3826, + "learning_rate": 4.871789902645596e-05, + "loss": 0.2949, "step": 1744500 }, { "epoch": 1.05, - "learning_rate": 4.871599645765809e-05, - "loss": 0.3906, + "learning_rate": 4.87157990608954e-05, + "loss": 0.2942, "step": 1745000 }, { "epoch": 1.05, - "learning_rate": 4.871389649209753e-05, - "loss": 0.3871, + "learning_rate": 4.871369909533484e-05, + "loss": 0.2929, "step": 1745500 }, { "epoch": 1.05, - "learning_rate": 4.871179652653696e-05, - "loss": 0.385, + "learning_rate": 4.871159912977427e-05, + "loss": 0.2897, "step": 1746000 }, { "epoch": 1.05, - "learning_rate": 4.87096965609764e-05, - "loss": 0.3826, + "learning_rate": 4.8709499164213704e-05, + "loss": 0.2879, "step": 1746500 }, { "epoch": 1.05, - "learning_rate": 4.870760079534696e-05, - "loss": 0.3924, + "learning_rate": 4.8707403398584264e-05, + "loss": 0.2925, "step": 1747000 }, { "epoch": 1.05, - "learning_rate": 4.870550082978639e-05, - "loss": 0.401, + "learning_rate": 4.87053034330237e-05, + "loss": 0.3006, "step": 1747500 }, { "epoch": 1.05, - "learning_rate": 4.8703400864225824e-05, - "loss": 0.3897, + "learning_rate": 4.870320346746313e-05, + "loss": 0.2998, "step": 1748000 }, { "epoch": 1.05, - "learning_rate": 4.8701305098596384e-05, - "loss": 0.3883, + "learning_rate": 4.870110350190257e-05, + "loss": 0.2948, "step": 1748500 }, { "epoch": 1.05, - "learning_rate": 4.869920513303582e-05, - "loss": 0.3856, + "learning_rate": 4.8699007736273125e-05, + "loss": 0.2967, "step": 1749000 }, { "epoch": 1.05, - "learning_rate": 4.869710516747526e-05, - "loss": 0.3912, + "learning_rate": 4.869690777071256e-05, + "loss": 0.2893, "step": 1749500 }, { "epoch": 1.05, - "learning_rate": 4.8695005201914685e-05, - "loss": 0.3805, + "learning_rate": 4.869480780515199e-05, + "loss": 0.2912, "step": 1750000 }, { "epoch": 1.05, - "learning_rate": 4.869290523635412e-05, - "loss": 0.3994, + "learning_rate": 4.869270783959143e-05, + "loss": 0.3013, "step": 1750500 }, { "epoch": 1.05, - "learning_rate": 4.869080527079356e-05, - "loss": 0.3863, + "learning_rate": 4.8690607874030866e-05, + "loss": 0.297, "step": 1751000 }, { "epoch": 1.05, - "learning_rate": 4.868870530523299e-05, - "loss": 0.3833, + "learning_rate": 4.868851210840142e-05, + "loss": 0.2927, "step": 1751500 }, { "epoch": 1.05, - "learning_rate": 4.8686605339672425e-05, - "loss": 0.3917, + "learning_rate": 4.868641214284085e-05, + "loss": 0.2994, "step": 1752000 }, { "epoch": 1.05, - "learning_rate": 4.8684505374111866e-05, - "loss": 0.3894, + "learning_rate": 4.868431217728029e-05, + "loss": 0.2884, "step": 1752500 }, { "epoch": 1.05, - "learning_rate": 4.86824054085513e-05, - "loss": 0.3819, + "learning_rate": 4.8682216411650846e-05, + "loss": 0.2922, "step": 1753000 }, { "epoch": 1.05, - "learning_rate": 4.868030964292185e-05, - "loss": 0.3797, + "learning_rate": 4.868011644609028e-05, + "loss": 0.2902, "step": 1753500 }, { "epoch": 1.05, - "learning_rate": 4.867820967736129e-05, - "loss": 0.3961, + "learning_rate": 4.867801648052972e-05, + "loss": 0.2869, "step": 1754000 }, { "epoch": 1.05, - "learning_rate": 4.8676109711800726e-05, - "loss": 0.3866, + "learning_rate": 4.8675916514969154e-05, + "loss": 0.294, "step": 1754500 }, { "epoch": 1.05, - "learning_rate": 4.867400974624016e-05, - "loss": 0.3865, + "learning_rate": 4.867381654940859e-05, + "loss": 0.2945, "step": 1755000 }, { "epoch": 1.05, - "learning_rate": 4.86719097806796e-05, - "loss": 0.3816, + "learning_rate": 4.867171658384803e-05, + "loss": 0.2914, "step": 1755500 }, { "epoch": 1.05, - "learning_rate": 4.8669809815119034e-05, - "loss": 0.376, + "learning_rate": 4.866961661828746e-05, + "loss": 0.2873, "step": 1756000 }, { "epoch": 1.05, - "learning_rate": 4.866770984955847e-05, - "loss": 0.3793, + "learning_rate": 4.8667516652726894e-05, + "loss": 0.2918, "step": 1756500 }, { "epoch": 1.05, - "learning_rate": 4.866560988399791e-05, - "loss": 0.3793, + "learning_rate": 4.866542088709745e-05, + "loss": 0.2904, "step": 1757000 }, { "epoch": 1.05, - "learning_rate": 4.866351411836846e-05, - "loss": 0.3906, + "learning_rate": 4.866332092153689e-05, + "loss": 0.2918, "step": 1757500 }, { "epoch": 1.05, - "learning_rate": 4.8661414152807894e-05, - "loss": 0.3885, + "learning_rate": 4.866122095597632e-05, + "loss": 0.2918, "step": 1758000 }, { "epoch": 1.05, - "learning_rate": 4.865931418724733e-05, - "loss": 0.3825, + "learning_rate": 4.8659120990415755e-05, + "loss": 0.2861, "step": 1758500 }, { "epoch": 1.05, - "learning_rate": 4.865721842161788e-05, - "loss": 0.3931, + "learning_rate": 4.8657021024855195e-05, + "loss": 0.2943, "step": 1759000 }, { "epoch": 1.05, - "learning_rate": 4.865511845605732e-05, - "loss": 0.3815, + "learning_rate": 4.865492105929463e-05, + "loss": 0.2942, "step": 1759500 }, { "epoch": 1.06, - "learning_rate": 4.8653018490496755e-05, - "loss": 0.3938, + "learning_rate": 4.865282109373406e-05, + "loss": 0.2948, "step": 1760000 }, { "epoch": 1.06, - "learning_rate": 4.865091852493619e-05, - "loss": 0.3911, + "learning_rate": 4.86507211281735e-05, + "loss": 0.295, "step": 1760500 }, { "epoch": 1.06, - "learning_rate": 4.864882275930675e-05, - "loss": 0.3852, + "learning_rate": 4.8648625362544056e-05, + "loss": 0.2944, "step": 1761000 }, { "epoch": 1.06, - "learning_rate": 4.864672699367731e-05, - "loss": 0.3847, + "learning_rate": 4.864652959691461e-05, + "loss": 0.2904, "step": 1761500 }, { "epoch": 1.06, - "learning_rate": 4.8644627028116736e-05, - "loss": 0.3929, + "learning_rate": 4.864442963135404e-05, + "loss": 0.2954, "step": 1762000 }, { "epoch": 1.06, - "learning_rate": 4.864252706255617e-05, - "loss": 0.3846, + "learning_rate": 4.864232966579348e-05, + "loss": 0.2929, "step": 1762500 }, { "epoch": 1.06, - "learning_rate": 4.864043129692673e-05, - "loss": 0.3799, + "learning_rate": 4.864022970023292e-05, + "loss": 0.2925, "step": 1763000 }, { "epoch": 1.06, - "learning_rate": 4.863833133136617e-05, - "loss": 0.3945, + "learning_rate": 4.863812973467235e-05, + "loss": 0.3009, "step": 1763500 }, { "epoch": 1.06, - "learning_rate": 4.8636231365805597e-05, - "loss": 0.3853, + "learning_rate": 4.863602976911179e-05, + "loss": 0.2959, "step": 1764000 }, { "epoch": 1.06, - "learning_rate": 4.863413140024503e-05, - "loss": 0.3918, + "learning_rate": 4.8633929803551224e-05, + "loss": 0.2972, "step": 1764500 }, { "epoch": 1.06, - "learning_rate": 4.863203143468447e-05, - "loss": 0.391, + "learning_rate": 4.863182983799066e-05, + "loss": 0.2951, "step": 1765000 }, { "epoch": 1.06, - "learning_rate": 4.8629931469123904e-05, - "loss": 0.3959, + "learning_rate": 4.862972987243009e-05, + "loss": 0.3022, "step": 1765500 }, { "epoch": 1.06, - "learning_rate": 4.862783150356334e-05, - "loss": 0.3926, + "learning_rate": 4.862763410680065e-05, + "loss": 0.2943, "step": 1766000 }, { "epoch": 1.06, - "learning_rate": 4.862573153800278e-05, - "loss": 0.3802, + "learning_rate": 4.8625534141240085e-05, + "loss": 0.2935, "step": 1766500 }, { "epoch": 1.06, - "learning_rate": 4.862363157244221e-05, - "loss": 0.3799, + "learning_rate": 4.862343837561064e-05, + "loss": 0.2901, "step": 1767000 }, { "epoch": 1.06, - "learning_rate": 4.8621531606881644e-05, - "loss": 0.3911, + "learning_rate": 4.862133841005007e-05, + "loss": 0.2905, "step": 1767500 }, { "epoch": 1.06, - "learning_rate": 4.8619431641321085e-05, - "loss": 0.3793, + "learning_rate": 4.861923844448951e-05, + "loss": 0.2942, "step": 1768000 }, { "epoch": 1.06, - "learning_rate": 4.861733167576052e-05, - "loss": 0.3913, + "learning_rate": 4.8617138478928945e-05, + "loss": 0.2962, "step": 1768500 }, { "epoch": 1.06, - "learning_rate": 4.861523171019995e-05, - "loss": 0.385, + "learning_rate": 4.861503851336838e-05, + "loss": 0.289, "step": 1769000 }, { "epoch": 1.06, - "learning_rate": 4.8613131744639385e-05, - "loss": 0.391, + "learning_rate": 4.861293854780782e-05, + "loss": 0.2954, "step": 1769500 }, { "epoch": 1.06, - "learning_rate": 4.8611035979009945e-05, - "loss": 0.3795, + "learning_rate": 4.861083858224725e-05, + "loss": 0.2922, "step": 1770000 }, { "epoch": 1.06, - "learning_rate": 4.860893601344938e-05, - "loss": 0.3885, + "learning_rate": 4.8608738616686686e-05, + "loss": 0.2962, "step": 1770500 }, { "epoch": 1.06, - "learning_rate": 4.860683604788882e-05, - "loss": 0.3787, + "learning_rate": 4.860663865112612e-05, + "loss": 0.2899, "step": 1771000 }, { "epoch": 1.06, - "learning_rate": 4.860473608232825e-05, - "loss": 0.3874, + "learning_rate": 4.86045470854278e-05, + "loss": 0.2898, "step": 1771500 }, { "epoch": 1.06, - "learning_rate": 4.860263611676768e-05, - "loss": 0.3908, + "learning_rate": 4.860244711986723e-05, + "loss": 0.3012, "step": 1772000 }, { "epoch": 1.06, - "learning_rate": 4.860053615120712e-05, - "loss": 0.3824, + "learning_rate": 4.860034715430667e-05, + "loss": 0.2882, "step": 1772500 }, { "epoch": 1.06, - "learning_rate": 4.859843618564655e-05, - "loss": 0.3894, + "learning_rate": 4.859824718874611e-05, + "loss": 0.2922, "step": 1773000 }, { "epoch": 1.06, - "learning_rate": 4.8596336220085986e-05, - "loss": 0.3823, + "learning_rate": 4.859614722318554e-05, + "loss": 0.2914, "step": 1773500 }, { "epoch": 1.06, - "learning_rate": 4.859424045445655e-05, - "loss": 0.3939, + "learning_rate": 4.8594047257624974e-05, + "loss": 0.2972, "step": 1774000 }, { "epoch": 1.06, - "learning_rate": 4.85921446888271e-05, - "loss": 0.3903, + "learning_rate": 4.859195149199553e-05, + "loss": 0.2929, "step": 1774500 }, { "epoch": 1.06, - "learning_rate": 4.859004472326654e-05, - "loss": 0.3919, + "learning_rate": 4.858985152643497e-05, + "loss": 0.2929, "step": 1775000 }, { "epoch": 1.06, - "learning_rate": 4.8587944757705974e-05, - "loss": 0.3901, + "learning_rate": 4.85877515608744e-05, + "loss": 0.2955, "step": 1775500 }, { "epoch": 1.06, - "learning_rate": 4.858584479214541e-05, - "loss": 0.3819, + "learning_rate": 4.8585651595313835e-05, + "loss": 0.2907, "step": 1776000 }, { "epoch": 1.07, - "learning_rate": 4.858374482658485e-05, - "loss": 0.3833, + "learning_rate": 4.8583551629753275e-05, + "loss": 0.2912, "step": 1776500 }, { "epoch": 1.07, - "learning_rate": 4.8581644861024274e-05, - "loss": 0.4008, + "learning_rate": 4.858145166419271e-05, + "loss": 0.2998, "step": 1777000 }, { "epoch": 1.07, - "learning_rate": 4.8579549095394835e-05, - "loss": 0.3826, + "learning_rate": 4.857935169863214e-05, + "loss": 0.2914, "step": 1777500 }, { "epoch": 1.07, - "learning_rate": 4.8577449129834275e-05, - "loss": 0.3932, + "learning_rate": 4.8577251733071575e-05, + "loss": 0.2944, "step": 1778000 }, { "epoch": 1.07, - "learning_rate": 4.857534916427371e-05, - "loss": 0.3849, + "learning_rate": 4.857515176751101e-05, + "loss": 0.2868, "step": 1778500 }, { "epoch": 1.07, - "learning_rate": 4.8573249198713135e-05, - "loss": 0.3826, + "learning_rate": 4.857305180195045e-05, + "loss": 0.2983, "step": 1779000 }, { "epoch": 1.07, - "learning_rate": 4.8571149233152575e-05, - "loss": 0.3908, + "learning_rate": 4.857095603632101e-05, + "loss": 0.2902, "step": 1779500 }, { "epoch": 1.07, - "learning_rate": 4.8569053467523136e-05, - "loss": 0.3839, + "learning_rate": 4.8568856070760436e-05, + "loss": 0.2977, "step": 1780000 }, { "epoch": 1.07, - "learning_rate": 4.856695350196257e-05, - "loss": 0.3907, + "learning_rate": 4.8566760305130996e-05, + "loss": 0.2906, "step": 1780500 }, { "epoch": 1.07, - "learning_rate": 4.8564853536402e-05, - "loss": 0.3857, + "learning_rate": 4.856466033957043e-05, + "loss": 0.2946, "step": 1781000 }, { "epoch": 1.07, - "learning_rate": 4.8562753570841436e-05, - "loss": 0.384, + "learning_rate": 4.856256037400987e-05, + "loss": 0.2864, "step": 1781500 }, { "epoch": 1.07, - "learning_rate": 4.856065360528087e-05, - "loss": 0.3784, + "learning_rate": 4.85604604084493e-05, + "loss": 0.2961, "step": 1782000 }, { "epoch": 1.07, - "learning_rate": 4.85585536397203e-05, - "loss": 0.3901, + "learning_rate": 4.855836044288873e-05, + "loss": 0.2993, "step": 1782500 }, { "epoch": 1.07, - "learning_rate": 4.855645787409086e-05, - "loss": 0.3793, + "learning_rate": 4.855626047732817e-05, + "loss": 0.2912, "step": 1783000 }, { "epoch": 1.07, - "learning_rate": 4.8554357908530303e-05, - "loss": 0.3789, + "learning_rate": 4.8554160511767604e-05, + "loss": 0.2932, "step": 1783500 }, { "epoch": 1.07, - "learning_rate": 4.855225794296973e-05, - "loss": 0.384, + "learning_rate": 4.8552060546207044e-05, + "loss": 0.2932, "step": 1784000 }, { "epoch": 1.07, - "learning_rate": 4.855015797740917e-05, - "loss": 0.3803, + "learning_rate": 4.854996058064648e-05, + "loss": 0.2901, "step": 1784500 }, { "epoch": 1.07, - "learning_rate": 4.8548058011848604e-05, - "loss": 0.3828, + "learning_rate": 4.854786061508591e-05, + "loss": 0.2931, "step": 1785000 }, { "epoch": 1.07, - "learning_rate": 4.8545966446150284e-05, - "loss": 0.4057, + "learning_rate": 4.854576064952535e-05, + "loss": 0.304, "step": 1785500 }, { "epoch": 1.07, - "learning_rate": 4.854387068052084e-05, - "loss": 0.4014, + "learning_rate": 4.8543664883895905e-05, + "loss": 0.2942, "step": 1786000 }, { "epoch": 1.07, - "learning_rate": 4.854177071496027e-05, - "loss": 0.383, + "learning_rate": 4.854156491833534e-05, + "loss": 0.2911, "step": 1786500 }, { "epoch": 1.07, - "learning_rate": 4.8539670749399705e-05, - "loss": 0.3803, + "learning_rate": 4.853946495277477e-05, + "loss": 0.2896, "step": 1787000 }, { "epoch": 1.07, - "learning_rate": 4.8537570783839145e-05, - "loss": 0.379, + "learning_rate": 4.853736498721421e-05, + "loss": 0.2907, "step": 1787500 }, { "epoch": 1.07, - "learning_rate": 4.853547081827858e-05, - "loss": 0.3838, + "learning_rate": 4.8535269221584765e-05, + "loss": 0.2904, "step": 1788000 }, { "epoch": 1.07, - "learning_rate": 4.853337085271801e-05, - "loss": 0.382, + "learning_rate": 4.85331692560242e-05, + "loss": 0.2919, "step": 1788500 }, { "epoch": 1.07, - "learning_rate": 4.853127088715745e-05, - "loss": 0.3894, + "learning_rate": 4.853106929046363e-05, + "loss": 0.2901, "step": 1789000 }, { "epoch": 1.07, - "learning_rate": 4.8529170921596886e-05, - "loss": 0.3936, + "learning_rate": 4.8528973524834186e-05, + "loss": 0.2938, "step": 1789500 }, { "epoch": 1.07, - "learning_rate": 4.852707095603632e-05, - "loss": 0.3914, + "learning_rate": 4.8526873559273626e-05, + "loss": 0.289, "step": 1790000 }, { "epoch": 1.07, - "learning_rate": 4.852497099047576e-05, - "loss": 0.3938, + "learning_rate": 4.852477359371306e-05, + "loss": 0.2957, "step": 1790500 }, { "epoch": 1.07, - "learning_rate": 4.8522871024915186e-05, - "loss": 0.3932, + "learning_rate": 4.85226736281525e-05, + "loss": 0.2988, "step": 1791000 }, { "epoch": 1.07, - "learning_rate": 4.8520771059354626e-05, - "loss": 0.3812, + "learning_rate": 4.852057786252306e-05, + "loss": 0.2868, "step": 1791500 }, { "epoch": 1.07, - "learning_rate": 4.851867109379406e-05, - "loss": 0.3874, + "learning_rate": 4.851847789696249e-05, + "loss": 0.2947, "step": 1792000 }, { "epoch": 1.07, - "learning_rate": 4.851657532816462e-05, - "loss": 0.3901, + "learning_rate": 4.851637793140192e-05, + "loss": 0.289, "step": 1792500 }, { "epoch": 1.07, - "learning_rate": 4.8514475362604054e-05, - "loss": 0.3874, + "learning_rate": 4.851427796584136e-05, + "loss": 0.2933, "step": 1793000 }, { "epoch": 1.08, - "learning_rate": 4.851237539704349e-05, - "loss": 0.3868, + "learning_rate": 4.8512178000280794e-05, + "loss": 0.2872, "step": 1793500 }, { "epoch": 1.08, - "learning_rate": 4.851027543148292e-05, - "loss": 0.3923, + "learning_rate": 4.851007803472023e-05, + "loss": 0.2964, "step": 1794000 }, { "epoch": 1.08, - "learning_rate": 4.8508175465922354e-05, - "loss": 0.3934, + "learning_rate": 4.850797806915967e-05, + "loss": 0.2929, "step": 1794500 }, { "epoch": 1.08, - "learning_rate": 4.8506075500361794e-05, - "loss": 0.3794, + "learning_rate": 4.85058781035991e-05, + "loss": 0.2898, "step": 1795000 }, { "epoch": 1.08, - "learning_rate": 4.850397553480123e-05, - "loss": 0.3842, + "learning_rate": 4.8503778138038535e-05, + "loss": 0.2871, "step": 1795500 }, { "epoch": 1.08, - "learning_rate": 4.850187556924066e-05, - "loss": 0.3918, + "learning_rate": 4.8501678172477975e-05, + "loss": 0.2933, "step": 1796000 }, { "epoch": 1.08, - "learning_rate": 4.8499779803611215e-05, - "loss": 0.3919, + "learning_rate": 4.849957820691741e-05, + "loss": 0.2925, "step": 1796500 }, { "epoch": 1.08, - "learning_rate": 4.8497679838050655e-05, - "loss": 0.3865, + "learning_rate": 4.849747824135684e-05, + "loss": 0.2971, "step": 1797000 }, { "epoch": 1.08, - "learning_rate": 4.8495584072421215e-05, - "loss": 0.3809, + "learning_rate": 4.8495382475727395e-05, + "loss": 0.2926, "step": 1797500 }, { "epoch": 1.08, - "learning_rate": 4.849348410686065e-05, - "loss": 0.3839, + "learning_rate": 4.8493282510166836e-05, + "loss": 0.2901, "step": 1798000 }, { "epoch": 1.08, - "learning_rate": 4.849138414130008e-05, - "loss": 0.378, + "learning_rate": 4.849118674453739e-05, + "loss": 0.2884, "step": 1798500 }, { "epoch": 1.08, - "learning_rate": 4.8489284175739516e-05, - "loss": 0.3871, + "learning_rate": 4.848908677897682e-05, + "loss": 0.297, "step": 1799000 }, { "epoch": 1.08, - "learning_rate": 4.8487188410110076e-05, - "loss": 0.3898, + "learning_rate": 4.848698681341626e-05, + "loss": 0.294, "step": 1799500 }, { "epoch": 1.08, - "learning_rate": 4.848508844454951e-05, - "loss": 0.3869, + "learning_rate": 4.8484886847855696e-05, + "loss": 0.2912, "step": 1800000 }, { "epoch": 1.08, - "eval_loss": 0.3730049133300781, - "eval_runtime": 1122.2369, - "eval_samples_per_second": 469.348, - "eval_steps_per_second": 78.225, + "eval_loss": 0.2663831114768982, + "eval_runtime": 1462.6548, + "eval_samples_per_second": 360.112, + "eval_steps_per_second": 60.019, "step": 1800000 }, { "epoch": 1.08, - "learning_rate": 4.848298847898894e-05, - "loss": 0.3832, + "learning_rate": 4.848278688229513e-05, + "loss": 0.2876, "step": 1800500 }, { "epoch": 1.08, - "learning_rate": 4.8480888513428376e-05, - "loss": 0.3851, + "learning_rate": 4.848068691673457e-05, + "loss": 0.2896, "step": 1801000 }, { "epoch": 1.08, - "learning_rate": 4.847878854786781e-05, - "loss": 0.3911, + "learning_rate": 4.8478586951174004e-05, + "loss": 0.2962, "step": 1801500 }, { "epoch": 1.08, - "learning_rate": 4.847668858230725e-05, - "loss": 0.3917, + "learning_rate": 4.847648698561343e-05, + "loss": 0.2966, "step": 1802000 }, { "epoch": 1.08, - "learning_rate": 4.847459281667781e-05, - "loss": 0.3871, + "learning_rate": 4.847439121998399e-05, + "loss": 0.2965, "step": 1802500 }, { "epoch": 1.08, - "learning_rate": 4.847249285111724e-05, - "loss": 0.3916, + "learning_rate": 4.847229125442343e-05, + "loss": 0.295, "step": 1803000 }, { "epoch": 1.08, - "learning_rate": 4.847039288555667e-05, - "loss": 0.3815, + "learning_rate": 4.8470191288862864e-05, + "loss": 0.2863, "step": 1803500 }, { "epoch": 1.08, - "learning_rate": 4.846829711992723e-05, - "loss": 0.3927, + "learning_rate": 4.84680913233023e-05, + "loss": 0.296, "step": 1804000 }, { "epoch": 1.08, - "learning_rate": 4.846620135429779e-05, - "loss": 0.3978, + "learning_rate": 4.846599135774173e-05, + "loss": 0.2989, "step": 1804500 }, { "epoch": 1.08, - "learning_rate": 4.8464101388737225e-05, - "loss": 0.3767, + "learning_rate": 4.8463891392181165e-05, + "loss": 0.2899, "step": 1805000 }, { "epoch": 1.08, - "learning_rate": 4.846200142317666e-05, - "loss": 0.38, + "learning_rate": 4.84617914266206e-05, + "loss": 0.2842, "step": 1805500 }, { "epoch": 1.08, - "learning_rate": 4.84599014576161e-05, - "loss": 0.3917, + "learning_rate": 4.845969146106004e-05, + "loss": 0.2935, "step": 1806000 }, { "epoch": 1.08, - "learning_rate": 4.845780149205553e-05, - "loss": 0.391, + "learning_rate": 4.84575956954306e-05, + "loss": 0.2924, "step": 1806500 }, { "epoch": 1.08, - "learning_rate": 4.8455701526494965e-05, - "loss": 0.3864, + "learning_rate": 4.8455495729870025e-05, + "loss": 0.2933, "step": 1807000 }, { "epoch": 1.08, - "learning_rate": 4.8453601560934406e-05, - "loss": 0.3811, + "learning_rate": 4.8453395764309466e-05, + "loss": 0.2915, "step": 1807500 }, { "epoch": 1.08, - "learning_rate": 4.845150159537383e-05, - "loss": 0.3821, + "learning_rate": 4.84512957987489e-05, + "loss": 0.2895, "step": 1808000 }, { "epoch": 1.08, - "learning_rate": 4.8449401629813266e-05, - "loss": 0.3937, + "learning_rate": 4.844919583318833e-05, + "loss": 0.3027, "step": 1808500 }, { "epoch": 1.08, - "learning_rate": 4.8447301664252706e-05, - "loss": 0.3879, + "learning_rate": 4.844709586762777e-05, + "loss": 0.293, "step": 1809000 }, { "epoch": 1.08, - "learning_rate": 4.844520169869214e-05, - "loss": 0.3912, + "learning_rate": 4.8445000101998326e-05, + "loss": 0.2968, "step": 1809500 }, { "epoch": 1.09, - "learning_rate": 4.844310173313157e-05, - "loss": 0.3915, + "learning_rate": 4.844290013643776e-05, + "loss": 0.2948, "step": 1810000 }, { "epoch": 1.09, - "learning_rate": 4.844100176757101e-05, - "loss": 0.3805, + "learning_rate": 4.844080017087719e-05, + "loss": 0.2878, "step": 1810500 }, { "epoch": 1.09, - "learning_rate": 4.8438901802010447e-05, - "loss": 0.3835, + "learning_rate": 4.8438700205316633e-05, + "loss": 0.2899, "step": 1811000 }, { "epoch": 1.09, - "learning_rate": 4.8436806036381e-05, - "loss": 0.3797, + "learning_rate": 4.843660023975607e-05, + "loss": 0.2888, "step": 1811500 }, { "epoch": 1.09, - "learning_rate": 4.843470607082044e-05, - "loss": 0.3893, + "learning_rate": 4.84345002741955e-05, + "loss": 0.2958, "step": 1812000 }, { "epoch": 1.09, - "learning_rate": 4.8432606105259874e-05, - "loss": 0.3827, + "learning_rate": 4.843240030863494e-05, + "loss": 0.2845, "step": 1812500 }, { "epoch": 1.09, - "learning_rate": 4.843050613969931e-05, - "loss": 0.3751, + "learning_rate": 4.8430304543005494e-05, + "loss": 0.2833, "step": 1813000 }, { "epoch": 1.09, - "learning_rate": 4.842840617413875e-05, - "loss": 0.3842, + "learning_rate": 4.842820457744493e-05, + "loss": 0.2898, "step": 1813500 }, { "epoch": 1.09, - "learning_rate": 4.842630620857818e-05, - "loss": 0.3922, + "learning_rate": 4.842610461188436e-05, + "loss": 0.2924, "step": 1814000 }, { "epoch": 1.09, - "learning_rate": 4.8424206243017614e-05, - "loss": 0.3774, + "learning_rate": 4.84240046463238e-05, + "loss": 0.2845, "step": 1814500 }, { "epoch": 1.09, - "learning_rate": 4.8422106277457055e-05, - "loss": 0.3884, + "learning_rate": 4.8421904680763235e-05, + "loss": 0.2982, "step": 1815000 }, { "epoch": 1.09, - "learning_rate": 4.842001051182761e-05, - "loss": 0.3936, + "learning_rate": 4.841980471520267e-05, + "loss": 0.2967, "step": 1815500 }, { "epoch": 1.09, - "learning_rate": 4.841791054626704e-05, - "loss": 0.388, + "learning_rate": 4.841770474964211e-05, + "loss": 0.2914, "step": 1816000 }, { "epoch": 1.09, - "learning_rate": 4.8415810580706475e-05, - "loss": 0.3834, + "learning_rate": 4.841560478408154e-05, + "loss": 0.2921, "step": 1816500 }, { "epoch": 1.09, - "learning_rate": 4.841371481507703e-05, - "loss": 0.381, + "learning_rate": 4.8413504818520975e-05, + "loss": 0.2926, "step": 1817000 }, { "epoch": 1.09, - "learning_rate": 4.841161484951647e-05, - "loss": 0.3799, + "learning_rate": 4.841140485296041e-05, + "loss": 0.2849, "step": 1817500 }, { "epoch": 1.09, - "learning_rate": 4.84095148839559e-05, - "loss": 0.3851, + "learning_rate": 4.840930908733097e-05, + "loss": 0.2957, "step": 1818000 }, { "epoch": 1.09, - "learning_rate": 4.8407414918395336e-05, - "loss": 0.385, + "learning_rate": 4.84072091217704e-05, + "loss": 0.2906, "step": 1818500 }, { "epoch": 1.09, - "learning_rate": 4.8405314952834776e-05, - "loss": 0.3944, + "learning_rate": 4.840510915620984e-05, + "loss": 0.2978, "step": 1819000 }, { "epoch": 1.09, - "learning_rate": 4.840321498727421e-05, - "loss": 0.3894, + "learning_rate": 4.840300919064927e-05, + "loss": 0.2884, "step": 1819500 }, { "epoch": 1.09, - "learning_rate": 4.840111502171364e-05, - "loss": 0.3807, + "learning_rate": 4.84009092250887e-05, + "loss": 0.2934, "step": 1820000 }, { "epoch": 1.09, - "learning_rate": 4.8399015056153076e-05, - "loss": 0.3926, + "learning_rate": 4.8398817659390384e-05, + "loss": 0.2962, "step": 1820500 }, { "epoch": 1.09, - "learning_rate": 4.839691509059251e-05, - "loss": 0.3945, + "learning_rate": 4.839671769382982e-05, + "loss": 0.2936, "step": 1821000 }, { "epoch": 1.09, - "learning_rate": 4.839481512503195e-05, - "loss": 0.3791, + "learning_rate": 4.839461772826926e-05, + "loss": 0.2869, "step": 1821500 }, { "epoch": 1.09, - "learning_rate": 4.8392715159471384e-05, - "loss": 0.3839, + "learning_rate": 4.839251776270869e-05, + "loss": 0.295, "step": 1822000 }, { "epoch": 1.09, - "learning_rate": 4.8390619393841944e-05, - "loss": 0.3792, + "learning_rate": 4.8390417797148124e-05, + "loss": 0.2895, "step": 1822500 }, { "epoch": 1.09, - "learning_rate": 4.838851942828137e-05, - "loss": 0.3857, + "learning_rate": 4.8388322031518685e-05, + "loss": 0.2953, "step": 1823000 }, { "epoch": 1.09, - "learning_rate": 4.838641946272081e-05, - "loss": 0.3857, + "learning_rate": 4.838622206595812e-05, + "loss": 0.2942, "step": 1823500 }, { "epoch": 1.09, - "learning_rate": 4.8384319497160244e-05, - "loss": 0.3929, + "learning_rate": 4.838412210039755e-05, + "loss": 0.297, "step": 1824000 }, { "epoch": 1.09, - "learning_rate": 4.838221953159968e-05, - "loss": 0.3916, + "learning_rate": 4.838202213483699e-05, + "loss": 0.2921, "step": 1824500 }, { "epoch": 1.09, - "learning_rate": 4.838012796590136e-05, - "loss": 0.3908, + "learning_rate": 4.8379922169276425e-05, + "loss": 0.2962, "step": 1825000 }, { "epoch": 1.09, - "learning_rate": 4.837802800034079e-05, - "loss": 0.3962, + "learning_rate": 4.837782220371586e-05, + "loss": 0.2963, "step": 1825500 }, { "epoch": 1.09, - "learning_rate": 4.837592803478023e-05, - "loss": 0.3875, + "learning_rate": 4.83757222381553e-05, + "loss": 0.2963, "step": 1826000 }, { "epoch": 1.1, - "learning_rate": 4.8373828069219665e-05, - "loss": 0.4018, + "learning_rate": 4.8373622272594726e-05, + "loss": 0.2994, "step": 1826500 }, { "epoch": 1.1, - "learning_rate": 4.83717281036591e-05, - "loss": 0.387, + "learning_rate": 4.8371526506965286e-05, + "loss": 0.292, "step": 1827000 }, { "epoch": 1.1, - "learning_rate": 4.836962813809853e-05, - "loss": 0.3838, + "learning_rate": 4.836942654140472e-05, + "loss": 0.2941, "step": 1827500 }, { "epoch": 1.1, - "learning_rate": 4.8367528172537966e-05, - "loss": 0.3786, + "learning_rate": 4.836733077577527e-05, + "loss": 0.2847, "step": 1828000 }, { "epoch": 1.1, - "learning_rate": 4.8365428206977406e-05, - "loss": 0.3841, + "learning_rate": 4.836523081021471e-05, + "loss": 0.2932, "step": 1828500 }, { "epoch": 1.1, - "learning_rate": 4.836332824141684e-05, - "loss": 0.3852, + "learning_rate": 4.8363130844654147e-05, + "loss": 0.2896, "step": 1829000 }, { "epoch": 1.1, - "learning_rate": 4.836122827585627e-05, - "loss": 0.3851, + "learning_rate": 4.836103087909358e-05, + "loss": 0.2946, "step": 1829500 }, { "epoch": 1.1, - "learning_rate": 4.835912831029571e-05, - "loss": 0.3858, + "learning_rate": 4.835893091353302e-05, + "loss": 0.2908, "step": 1830000 }, { "epoch": 1.1, - "learning_rate": 4.835703254466627e-05, - "loss": 0.3832, + "learning_rate": 4.8356830947972454e-05, + "loss": 0.2899, "step": 1830500 }, { "epoch": 1.1, - "learning_rate": 4.83549325791057e-05, - "loss": 0.3832, + "learning_rate": 4.8354730982411894e-05, + "loss": 0.2913, "step": 1831000 }, { "epoch": 1.1, - "learning_rate": 4.8352832613545134e-05, - "loss": 0.3763, + "learning_rate": 4.835263101685132e-05, + "loss": 0.2872, "step": 1831500 }, { "epoch": 1.1, - "learning_rate": 4.8350732647984574e-05, - "loss": 0.3858, + "learning_rate": 4.8350531051290754e-05, + "loss": 0.2876, "step": 1832000 }, { "epoch": 1.1, - "learning_rate": 4.834863268242401e-05, - "loss": 0.3895, + "learning_rate": 4.8348435285661314e-05, + "loss": 0.2886, "step": 1832500 }, { "epoch": 1.1, - "learning_rate": 4.834653271686344e-05, - "loss": 0.3862, + "learning_rate": 4.8346335320100755e-05, + "loss": 0.295, "step": 1833000 }, { "epoch": 1.1, - "learning_rate": 4.834443275130288e-05, - "loss": 0.3824, + "learning_rate": 4.834423535454019e-05, + "loss": 0.2938, "step": 1833500 }, { "epoch": 1.1, - "learning_rate": 4.8342332785742315e-05, - "loss": 0.3773, + "learning_rate": 4.8342135388979615e-05, + "loss": 0.2917, "step": 1834000 }, { "epoch": 1.1, - "learning_rate": 4.8340241220043995e-05, - "loss": 0.3891, + "learning_rate": 4.8340035423419055e-05, + "loss": 0.2874, "step": 1834500 }, { "epoch": 1.1, - "learning_rate": 4.833814125448342e-05, - "loss": 0.3788, + "learning_rate": 4.8337939657789615e-05, + "loss": 0.2908, "step": 1835000 }, { "epoch": 1.1, - "learning_rate": 4.833604128892286e-05, - "loss": 0.3841, + "learning_rate": 4.833583969222905e-05, + "loss": 0.2879, "step": 1835500 }, { "epoch": 1.1, - "learning_rate": 4.8333941323362295e-05, - "loss": 0.3895, + "learning_rate": 4.8333739726668476e-05, + "loss": 0.2939, "step": 1836000 }, { "epoch": 1.1, - "learning_rate": 4.833184135780173e-05, - "loss": 0.3887, + "learning_rate": 4.8331639761107916e-05, + "loss": 0.2894, "step": 1836500 }, { "epoch": 1.1, - "learning_rate": 4.832974139224117e-05, - "loss": 0.3787, + "learning_rate": 4.832953979554735e-05, + "loss": 0.2821, "step": 1837000 }, { "epoch": 1.1, - "learning_rate": 4.832764562661172e-05, - "loss": 0.3877, + "learning_rate": 4.832744402991791e-05, + "loss": 0.2976, "step": 1837500 }, { "epoch": 1.1, - "learning_rate": 4.8325545661051156e-05, - "loss": 0.3863, + "learning_rate": 4.832534406435735e-05, + "loss": 0.2933, "step": 1838000 }, { "epoch": 1.1, - "learning_rate": 4.832344569549059e-05, - "loss": 0.3794, + "learning_rate": 4.8323244098796777e-05, + "loss": 0.29, "step": 1838500 }, { "epoch": 1.1, - "learning_rate": 4.832134572993003e-05, - "loss": 0.3775, + "learning_rate": 4.832114413323621e-05, + "loss": 0.2868, "step": 1839000 }, { "epoch": 1.1, - "learning_rate": 4.831924576436946e-05, - "loss": 0.3843, + "learning_rate": 4.831904416767565e-05, + "loss": 0.2958, "step": 1839500 }, { "epoch": 1.1, - "learning_rate": 4.83171457988089e-05, - "loss": 0.3916, + "learning_rate": 4.831694840204621e-05, + "loss": 0.2947, "step": 1840000 }, { "epoch": 1.1, - "learning_rate": 4.831505003317945e-05, - "loss": 0.3876, + "learning_rate": 4.8314848436485644e-05, + "loss": 0.2907, "step": 1840500 }, { "epoch": 1.1, - "learning_rate": 4.831295006761889e-05, - "loss": 0.3765, + "learning_rate": 4.831274847092507e-05, + "loss": 0.2897, "step": 1841000 }, { "epoch": 1.1, - "learning_rate": 4.8310850102058324e-05, - "loss": 0.3802, + "learning_rate": 4.831064850536451e-05, + "loss": 0.287, "step": 1841500 }, { "epoch": 1.1, - "learning_rate": 4.830875013649776e-05, - "loss": 0.3904, + "learning_rate": 4.8308556939666185e-05, + "loss": 0.2939, "step": 1842000 }, { "epoch": 1.1, - "learning_rate": 4.83066501709372e-05, - "loss": 0.3806, + "learning_rate": 4.8306456974105625e-05, + "loss": 0.2918, "step": 1842500 }, { "epoch": 1.1, - "learning_rate": 4.830455020537663e-05, - "loss": 0.3879, + "learning_rate": 4.830435700854506e-05, + "loss": 0.2906, "step": 1843000 }, { "epoch": 1.11, - "learning_rate": 4.8302454439747185e-05, - "loss": 0.3845, + "learning_rate": 4.83022570429845e-05, + "loss": 0.2886, "step": 1843500 }, { "epoch": 1.11, - "learning_rate": 4.8300354474186625e-05, - "loss": 0.3817, + "learning_rate": 4.830015707742393e-05, + "loss": 0.2856, "step": 1844000 }, { "epoch": 1.11, - "learning_rate": 4.829825450862606e-05, - "loss": 0.3811, + "learning_rate": 4.8298057111863366e-05, + "loss": 0.2908, "step": 1844500 }, { "epoch": 1.11, - "learning_rate": 4.829615454306549e-05, - "loss": 0.3963, + "learning_rate": 4.829596134623392e-05, + "loss": 0.296, "step": 1845000 }, { "epoch": 1.11, - "learning_rate": 4.829405457750493e-05, - "loss": 0.38, + "learning_rate": 4.829386138067336e-05, + "loss": 0.2926, "step": 1845500 }, { "epoch": 1.11, - "learning_rate": 4.8291954611944366e-05, - "loss": 0.3761, + "learning_rate": 4.829176561504391e-05, + "loss": 0.2876, "step": 1846000 }, { "epoch": 1.11, - "learning_rate": 4.828985884631492e-05, - "loss": 0.3914, + "learning_rate": 4.8289665649483346e-05, + "loss": 0.2938, "step": 1846500 }, { "epoch": 1.11, - "learning_rate": 4.828775888075435e-05, - "loss": 0.3886, + "learning_rate": 4.828756568392278e-05, + "loss": 0.2951, "step": 1847000 }, { "epoch": 1.11, - "learning_rate": 4.828565891519379e-05, - "loss": 0.3822, + "learning_rate": 4.828546571836222e-05, + "loss": 0.2888, "step": 1847500 }, { "epoch": 1.11, - "learning_rate": 4.8283558949633226e-05, - "loss": 0.3911, + "learning_rate": 4.8283365752801653e-05, + "loss": 0.2955, "step": 1848000 }, { "epoch": 1.11, - "learning_rate": 4.828145898407266e-05, - "loss": 0.3821, + "learning_rate": 4.828126578724109e-05, + "loss": 0.2967, "step": 1848500 }, { "epoch": 1.11, - "learning_rate": 4.82793590185121e-05, - "loss": 0.3819, + "learning_rate": 4.827916582168053e-05, + "loss": 0.2928, "step": 1849000 }, { "epoch": 1.11, - "learning_rate": 4.8277263252882654e-05, - "loss": 0.3834, + "learning_rate": 4.827706585611996e-05, + "loss": 0.2899, "step": 1849500 }, { "epoch": 1.11, - "learning_rate": 4.827516328732209e-05, - "loss": 0.3818, + "learning_rate": 4.8274965890559394e-05, + "loss": 0.2882, "step": 1850000 }, { "epoch": 1.11, - "learning_rate": 4.827306332176152e-05, - "loss": 0.3919, + "learning_rate": 4.827286592499883e-05, + "loss": 0.2918, "step": 1850500 }, { "epoch": 1.11, - "learning_rate": 4.827096335620096e-05, - "loss": 0.3839, + "learning_rate": 4.827076595943826e-05, + "loss": 0.2891, "step": 1851000 }, { "epoch": 1.11, - "learning_rate": 4.8268867590571514e-05, - "loss": 0.3853, + "learning_rate": 4.82686659938777e-05, + "loss": 0.2936, "step": 1851500 }, { "epoch": 1.11, - "learning_rate": 4.826676762501095e-05, - "loss": 0.3892, + "learning_rate": 4.826657022824826e-05, + "loss": 0.2904, "step": 1852000 }, { "epoch": 1.11, - "learning_rate": 4.826466765945039e-05, - "loss": 0.3789, + "learning_rate": 4.8264470262687695e-05, + "loss": 0.2887, "step": 1852500 }, { "epoch": 1.11, - "learning_rate": 4.826256769388982e-05, - "loss": 0.3776, + "learning_rate": 4.826237029712712e-05, + "loss": 0.2902, "step": 1853000 }, { "epoch": 1.11, - "learning_rate": 4.8260467728329255e-05, - "loss": 0.3923, + "learning_rate": 4.82602787314288e-05, + "loss": 0.298, "step": 1853500 }, { "epoch": 1.11, - "learning_rate": 4.825837196269981e-05, - "loss": 0.3838, + "learning_rate": 4.8258178765868236e-05, + "loss": 0.2901, "step": 1854000 }, { "epoch": 1.11, - "learning_rate": 4.825627199713925e-05, - "loss": 0.3759, + "learning_rate": 4.8256078800307676e-05, + "loss": 0.2887, "step": 1854500 }, { "epoch": 1.11, - "learning_rate": 4.82541762315098e-05, - "loss": 0.3762, + "learning_rate": 4.825397883474711e-05, + "loss": 0.2859, "step": 1855000 }, { "epoch": 1.11, - "learning_rate": 4.8252076265949236e-05, - "loss": 0.3844, + "learning_rate": 4.825187886918654e-05, + "loss": 0.2927, "step": 1855500 }, { "epoch": 1.11, - "learning_rate": 4.824997630038867e-05, - "loss": 0.394, + "learning_rate": 4.824977890362598e-05, + "loss": 0.2903, "step": 1856000 }, { "epoch": 1.11, - "learning_rate": 4.824787633482811e-05, - "loss": 0.3876, + "learning_rate": 4.8247678938065417e-05, + "loss": 0.291, "step": 1856500 }, { "epoch": 1.11, - "learning_rate": 4.824577636926754e-05, - "loss": 0.3875, + "learning_rate": 4.824557897250485e-05, + "loss": 0.2937, "step": 1857000 }, { "epoch": 1.11, - "learning_rate": 4.824367640370698e-05, - "loss": 0.391, + "learning_rate": 4.8243479006944283e-05, + "loss": 0.2912, "step": 1857500 }, { "epoch": 1.11, - "learning_rate": 4.8241576438146417e-05, - "loss": 0.3835, + "learning_rate": 4.8241383241314844e-05, + "loss": 0.2912, "step": 1858000 }, { "epoch": 1.11, - "learning_rate": 4.823947647258585e-05, - "loss": 0.3869, + "learning_rate": 4.823928327575428e-05, + "loss": 0.2948, "step": 1858500 }, { "epoch": 1.11, - "learning_rate": 4.8237380706956404e-05, - "loss": 0.3798, + "learning_rate": 4.823718331019372e-05, + "loss": 0.2895, "step": 1859000 }, { "epoch": 1.11, - "learning_rate": 4.8235280741395844e-05, - "loss": 0.4017, + "learning_rate": 4.823508334463315e-05, + "loss": 0.2951, "step": 1859500 }, { "epoch": 1.12, - "learning_rate": 4.823318077583528e-05, - "loss": 0.3846, + "learning_rate": 4.823298337907258e-05, + "loss": 0.2877, "step": 1860000 }, { "epoch": 1.12, - "learning_rate": 4.823108081027471e-05, - "loss": 0.396, + "learning_rate": 4.823088341351202e-05, + "loss": 0.3011, "step": 1860500 }, { "epoch": 1.12, - "learning_rate": 4.822898084471415e-05, - "loss": 0.3842, + "learning_rate": 4.822878344795145e-05, + "loss": 0.294, "step": 1861000 }, { "epoch": 1.12, - "learning_rate": 4.822688087915358e-05, - "loss": 0.382, + "learning_rate": 4.8226683482390885e-05, + "loss": 0.2903, "step": 1861500 }, { "epoch": 1.12, - "learning_rate": 4.822478511352414e-05, - "loss": 0.3866, + "learning_rate": 4.8224583516830325e-05, + "loss": 0.2853, "step": 1862000 }, { "epoch": 1.12, - "learning_rate": 4.822268514796357e-05, - "loss": 0.3776, + "learning_rate": 4.822248355126976e-05, + "loss": 0.2848, "step": 1862500 }, { "epoch": 1.12, - "learning_rate": 4.822058518240301e-05, - "loss": 0.3908, + "learning_rate": 4.822039198557144e-05, + "loss": 0.3018, "step": 1863000 }, { "epoch": 1.12, - "learning_rate": 4.8218485216842445e-05, - "loss": 0.3894, + "learning_rate": 4.821829202001087e-05, + "loss": 0.2903, "step": 1863500 }, { "epoch": 1.12, - "learning_rate": 4.821638525128188e-05, - "loss": 0.3822, + "learning_rate": 4.8216192054450306e-05, + "loss": 0.2939, "step": 1864000 }, { "epoch": 1.12, - "learning_rate": 4.821428948565244e-05, - "loss": 0.388, + "learning_rate": 4.8214092088889746e-05, + "loss": 0.2956, "step": 1864500 }, { "epoch": 1.12, - "learning_rate": 4.821218952009187e-05, - "loss": 0.3909, + "learning_rate": 4.821199212332917e-05, + "loss": 0.2987, "step": 1865000 }, { "epoch": 1.12, - "learning_rate": 4.8210089554531306e-05, - "loss": 0.381, + "learning_rate": 4.820989215776861e-05, + "loss": 0.2906, "step": 1865500 }, { "epoch": 1.12, - "learning_rate": 4.8207989588970746e-05, - "loss": 0.3861, + "learning_rate": 4.8207792192208046e-05, + "loss": 0.2951, "step": 1866000 }, { "epoch": 1.12, - "learning_rate": 4.820588962341017e-05, - "loss": 0.3803, + "learning_rate": 4.820569222664748e-05, + "loss": 0.2859, "step": 1866500 }, { "epoch": 1.12, - "learning_rate": 4.8203789657849606e-05, - "loss": 0.383, + "learning_rate": 4.8203596461018034e-05, + "loss": 0.2912, "step": 1867000 }, { "epoch": 1.12, - "learning_rate": 4.8201689692289047e-05, - "loss": 0.3778, + "learning_rate": 4.8201496495457474e-05, + "loss": 0.291, "step": 1867500 }, { "epoch": 1.12, - "learning_rate": 4.819959392665961e-05, - "loss": 0.381, + "learning_rate": 4.819939652989691e-05, + "loss": 0.2904, "step": 1868000 }, { "epoch": 1.12, - "learning_rate": 4.819749396109904e-05, - "loss": 0.3945, + "learning_rate": 4.819730496419859e-05, + "loss": 0.296, "step": 1868500 }, { "epoch": 1.12, - "learning_rate": 4.819539399553847e-05, - "loss": 0.3919, + "learning_rate": 4.819520499863802e-05, + "loss": 0.2926, "step": 1869000 }, { "epoch": 1.12, - "learning_rate": 4.819329822990903e-05, - "loss": 0.3882, + "learning_rate": 4.8193105033077455e-05, + "loss": 0.2901, "step": 1869500 }, { "epoch": 1.12, - "learning_rate": 4.819119826434847e-05, - "loss": 0.3737, + "learning_rate": 4.8191005067516895e-05, + "loss": 0.2865, "step": 1870000 }, { "epoch": 1.12, - "learning_rate": 4.81890982987879e-05, - "loss": 0.3878, + "learning_rate": 4.818890510195633e-05, + "loss": 0.2957, "step": 1870500 }, { "epoch": 1.12, - "learning_rate": 4.8186998333227335e-05, - "loss": 0.3902, + "learning_rate": 4.818680513639576e-05, + "loss": 0.2971, "step": 1871000 }, { "epoch": 1.12, - "learning_rate": 4.818489836766677e-05, - "loss": 0.3831, + "learning_rate": 4.81847051708352e-05, + "loss": 0.297, "step": 1871500 }, { "epoch": 1.12, - "learning_rate": 4.81827984021062e-05, - "loss": 0.3848, + "learning_rate": 4.818260520527463e-05, + "loss": 0.2948, "step": 1872000 }, { "epoch": 1.12, - "learning_rate": 4.818069843654564e-05, - "loss": 0.3829, + "learning_rate": 4.818050523971407e-05, + "loss": 0.2886, "step": 1872500 }, { "epoch": 1.12, - "learning_rate": 4.8178598470985075e-05, - "loss": 0.3838, + "learning_rate": 4.81784052741535e-05, + "loss": 0.2878, "step": 1873000 }, { "epoch": 1.12, - "learning_rate": 4.817649850542451e-05, - "loss": 0.3757, + "learning_rate": 4.8176305308592936e-05, + "loss": 0.2874, "step": 1873500 }, { "epoch": 1.12, - "learning_rate": 4.817440273979506e-05, - "loss": 0.3717, + "learning_rate": 4.8174205343032376e-05, + "loss": 0.2889, "step": 1874000 }, { "epoch": 1.12, - "learning_rate": 4.81723027742345e-05, - "loss": 0.3832, + "learning_rate": 4.817210957740293e-05, + "loss": 0.2848, "step": 1874500 }, { "epoch": 1.12, - "learning_rate": 4.817020700860506e-05, - "loss": 0.3815, + "learning_rate": 4.817001381177349e-05, + "loss": 0.2919, "step": 1875000 }, { "epoch": 1.12, - "learning_rate": 4.8168107043044496e-05, - "loss": 0.3904, + "learning_rate": 4.8167913846212923e-05, + "loss": 0.2922, "step": 1875500 }, { "epoch": 1.12, - "learning_rate": 4.816600707748392e-05, - "loss": 0.3786, + "learning_rate": 4.816581388065236e-05, + "loss": 0.2906, "step": 1876000 }, { "epoch": 1.13, - "learning_rate": 4.816390711192336e-05, - "loss": 0.3805, + "learning_rate": 4.816371391509179e-05, + "loss": 0.2887, "step": 1876500 }, { "epoch": 1.13, - "learning_rate": 4.81618071463628e-05, - "loss": 0.3818, + "learning_rate": 4.8161613949531224e-05, + "loss": 0.292, "step": 1877000 }, { "epoch": 1.13, - "learning_rate": 4.815970718080223e-05, - "loss": 0.3885, + "learning_rate": 4.815951398397066e-05, + "loss": 0.2857, "step": 1877500 }, { "epoch": 1.13, - "learning_rate": 4.815760721524167e-05, - "loss": 0.3829, + "learning_rate": 4.81574140184101e-05, + "loss": 0.2939, "step": 1878000 }, { "epoch": 1.13, - "learning_rate": 4.8155507249681104e-05, - "loss": 0.3866, + "learning_rate": 4.815531405284953e-05, + "loss": 0.2915, "step": 1878500 }, { "epoch": 1.13, - "learning_rate": 4.815340728412054e-05, - "loss": 0.383, + "learning_rate": 4.8153214087288964e-05, + "loss": 0.2877, "step": 1879000 }, { "epoch": 1.13, - "learning_rate": 4.815130731855998e-05, - "loss": 0.3857, + "learning_rate": 4.8151118321659525e-05, + "loss": 0.2913, "step": 1879500 }, { "epoch": 1.13, - "learning_rate": 4.814920735299941e-05, - "loss": 0.3828, + "learning_rate": 4.814901835609896e-05, + "loss": 0.2908, "step": 1880000 }, { "epoch": 1.13, - "learning_rate": 4.8147107387438844e-05, - "loss": 0.3854, + "learning_rate": 4.814691839053839e-05, + "loss": 0.2893, "step": 1880500 }, { "epoch": 1.13, - "learning_rate": 4.8145011621809405e-05, - "loss": 0.3746, + "learning_rate": 4.814481842497783e-05, + "loss": 0.289, "step": 1881000 }, { "epoch": 1.13, - "learning_rate": 4.814291585617996e-05, - "loss": 0.3822, + "learning_rate": 4.8142722659348386e-05, + "loss": 0.2941, "step": 1881500 }, { "epoch": 1.13, - "learning_rate": 4.814081589061939e-05, - "loss": 0.3891, + "learning_rate": 4.8140626893718946e-05, + "loss": 0.2914, "step": 1882000 }, { "epoch": 1.13, - "learning_rate": 4.8138715925058825e-05, - "loss": 0.396, + "learning_rate": 4.813852692815838e-05, + "loss": 0.2972, "step": 1882500 }, { "epoch": 1.13, - "learning_rate": 4.8136615959498265e-05, - "loss": 0.3855, + "learning_rate": 4.813642696259781e-05, + "loss": 0.2925, "step": 1883000 }, { "epoch": 1.13, - "learning_rate": 4.813452019386882e-05, - "loss": 0.3837, + "learning_rate": 4.813432699703725e-05, + "loss": 0.2901, "step": 1883500 }, { "epoch": 1.13, - "learning_rate": 4.813242022830825e-05, - "loss": 0.388, + "learning_rate": 4.813222703147668e-05, + "loss": 0.2917, "step": 1884000 }, { "epoch": 1.13, - "learning_rate": 4.8130320262747686e-05, - "loss": 0.3806, + "learning_rate": 4.813013126584724e-05, + "loss": 0.2884, "step": 1884500 }, { "epoch": 1.13, - "learning_rate": 4.8128220297187126e-05, - "loss": 0.3927, + "learning_rate": 4.8128031300286674e-05, + "loss": 0.2908, "step": 1885000 }, { "epoch": 1.13, - "learning_rate": 4.812612033162656e-05, - "loss": 0.3793, + "learning_rate": 4.8125931334726114e-05, + "loss": 0.2857, "step": 1885500 }, { "epoch": 1.13, - "learning_rate": 4.812402036606599e-05, - "loss": 0.3837, + "learning_rate": 4.812383136916554e-05, + "loss": 0.292, "step": 1886000 }, { "epoch": 1.13, - "learning_rate": 4.812192040050543e-05, - "loss": 0.382, + "learning_rate": 4.812173140360498e-05, + "loss": 0.2937, "step": 1886500 }, { "epoch": 1.13, - "learning_rate": 4.811982463487599e-05, - "loss": 0.3724, + "learning_rate": 4.8119631438044414e-05, + "loss": 0.2846, "step": 1887000 }, { "epoch": 1.13, - "learning_rate": 4.811772466931542e-05, - "loss": 0.384, + "learning_rate": 4.811753147248385e-05, + "loss": 0.2917, "step": 1887500 }, { "epoch": 1.13, - "learning_rate": 4.811562470375486e-05, - "loss": 0.3807, + "learning_rate": 4.811543150692329e-05, + "loss": 0.2872, "step": 1888000 }, { "epoch": 1.13, - "learning_rate": 4.8113524738194294e-05, - "loss": 0.3765, + "learning_rate": 4.811333154136272e-05, + "loss": 0.2871, "step": 1888500 }, { "epoch": 1.13, - "learning_rate": 4.811142477263373e-05, - "loss": 0.4014, + "learning_rate": 4.8111235775733275e-05, + "loss": 0.3003, "step": 1889000 }, { "epoch": 1.13, - "learning_rate": 4.810932480707317e-05, - "loss": 0.3786, + "learning_rate": 4.810913581017271e-05, + "loss": 0.2832, "step": 1889500 }, { "epoch": 1.13, - "learning_rate": 4.81072248415126e-05, - "loss": 0.3835, + "learning_rate": 4.810703584461215e-05, + "loss": 0.2912, "step": 1890000 }, { "epoch": 1.13, - "learning_rate": 4.8105124875952035e-05, - "loss": 0.3775, + "learning_rate": 4.810493587905158e-05, + "loss": 0.2878, "step": 1890500 }, { "epoch": 1.13, - "learning_rate": 4.810302911032259e-05, - "loss": 0.3855, + "learning_rate": 4.8102840113422136e-05, + "loss": 0.293, "step": 1891000 }, { "epoch": 1.13, - "learning_rate": 4.810092914476203e-05, - "loss": 0.3814, + "learning_rate": 4.810074014786157e-05, + "loss": 0.2896, "step": 1891500 }, { "epoch": 1.13, - "learning_rate": 4.809882917920146e-05, - "loss": 0.3844, + "learning_rate": 4.809864018230101e-05, + "loss": 0.2859, "step": 1892000 }, { "epoch": 1.13, - "learning_rate": 4.8096729213640895e-05, - "loss": 0.3847, + "learning_rate": 4.809654021674044e-05, + "loss": 0.2912, "step": 1892500 }, { "epoch": 1.13, - "learning_rate": 4.8094629248080336e-05, - "loss": 0.3794, + "learning_rate": 4.8094444451111e-05, + "loss": 0.2978, "step": 1893000 }, { "epoch": 1.14, - "learning_rate": 4.809252928251976e-05, - "loss": 0.3843, + "learning_rate": 4.8092344485550437e-05, + "loss": 0.2934, "step": 1893500 }, { "epoch": 1.14, - "learning_rate": 4.809043351689032e-05, - "loss": 0.3811, + "learning_rate": 4.809024451998987e-05, + "loss": 0.2919, "step": 1894000 }, { "epoch": 1.14, - "learning_rate": 4.8088333551329756e-05, - "loss": 0.3879, + "learning_rate": 4.808814875436043e-05, + "loss": 0.291, "step": 1894500 }, { "epoch": 1.14, - "learning_rate": 4.8086233585769196e-05, - "loss": 0.3712, + "learning_rate": 4.8086048788799864e-05, + "loss": 0.2879, "step": 1895000 }, { "epoch": 1.14, - "learning_rate": 4.808413362020863e-05, - "loss": 0.3842, + "learning_rate": 4.8083948823239304e-05, + "loss": 0.2979, "step": 1895500 }, { "epoch": 1.14, - "learning_rate": 4.808203365464806e-05, - "loss": 0.3808, + "learning_rate": 4.808184885767873e-05, + "loss": 0.2911, "step": 1896000 }, { "epoch": 1.14, - "learning_rate": 4.8079937889018624e-05, - "loss": 0.3806, + "learning_rate": 4.8079748892118164e-05, + "loss": 0.2892, "step": 1896500 }, { "epoch": 1.14, - "learning_rate": 4.807783792345806e-05, - "loss": 0.382, + "learning_rate": 4.8077648926557604e-05, + "loss": 0.2882, "step": 1897000 }, { "epoch": 1.14, - "learning_rate": 4.807573795789749e-05, - "loss": 0.3868, + "learning_rate": 4.807554896099704e-05, + "loss": 0.2929, "step": 1897500 }, { "epoch": 1.14, - "learning_rate": 4.8073637992336924e-05, - "loss": 0.3776, + "learning_rate": 4.807344899543647e-05, + "loss": 0.2846, "step": 1898000 }, { "epoch": 1.14, - "learning_rate": 4.807153802677636e-05, - "loss": 0.3823, + "learning_rate": 4.807134902987591e-05, + "loss": 0.2893, "step": 1898500 }, { "epoch": 1.14, - "learning_rate": 4.806944646107804e-05, - "loss": 0.3941, + "learning_rate": 4.8069257464177585e-05, + "loss": 0.2964, "step": 1899000 }, { "epoch": 1.14, - "learning_rate": 4.806734649551747e-05, - "loss": 0.3788, + "learning_rate": 4.8067157498617025e-05, + "loss": 0.2886, "step": 1899500 }, { "epoch": 1.14, - "learning_rate": 4.8065246529956905e-05, - "loss": 0.3895, + "learning_rate": 4.806505753305646e-05, + "loss": 0.2952, "step": 1900000 }, { "epoch": 1.14, - "eval_loss": 0.36924830079078674, - "eval_runtime": 1118.768, - "eval_samples_per_second": 470.804, - "eval_steps_per_second": 78.468, + "eval_loss": 0.26594987511634827, + "eval_runtime": 1483.6613, + "eval_samples_per_second": 355.014, + "eval_steps_per_second": 59.169, "step": 1900000 }, { "epoch": 1.14, - "learning_rate": 4.8063146564396345e-05, - "loss": 0.3742, + "learning_rate": 4.806295756749589e-05, + "loss": 0.2848, "step": 1900500 }, { "epoch": 1.14, - "learning_rate": 4.806104659883578e-05, - "loss": 0.3917, + "learning_rate": 4.8060857601935326e-05, + "loss": 0.2939, "step": 1901000 }, { "epoch": 1.14, - "learning_rate": 4.805895083320633e-05, - "loss": 0.3923, + "learning_rate": 4.805875763637476e-05, + "loss": 0.2955, "step": 1901500 }, { "epoch": 1.14, - "learning_rate": 4.805685086764577e-05, - "loss": 0.3892, + "learning_rate": 4.80566576708142e-05, + "loss": 0.2916, "step": 1902000 }, { "epoch": 1.14, - "learning_rate": 4.8054750902085206e-05, - "loss": 0.3879, + "learning_rate": 4.805456190518476e-05, + "loss": 0.2908, "step": 1902500 }, { "epoch": 1.14, - "learning_rate": 4.805265093652464e-05, - "loss": 0.3825, + "learning_rate": 4.8052461939624187e-05, + "loss": 0.2886, "step": 1903000 }, { "epoch": 1.14, - "learning_rate": 4.805055097096408e-05, - "loss": 0.379, + "learning_rate": 4.805036197406362e-05, + "loss": 0.2832, "step": 1903500 }, { "epoch": 1.14, - "learning_rate": 4.804845100540351e-05, - "loss": 0.3899, + "learning_rate": 4.804826200850306e-05, + "loss": 0.2933, "step": 1904000 }, { "epoch": 1.14, - "learning_rate": 4.8046351039842946e-05, - "loss": 0.3952, + "learning_rate": 4.8046162042942494e-05, + "loss": 0.2939, "step": 1904500 }, { "epoch": 1.14, - "learning_rate": 4.804425107428239e-05, - "loss": 0.3812, + "learning_rate": 4.804406207738193e-05, + "loss": 0.2937, "step": 1905000 }, { "epoch": 1.14, - "learning_rate": 4.804215110872181e-05, - "loss": 0.3904, + "learning_rate": 4.804196211182137e-05, + "loss": 0.2932, "step": 1905500 }, { "epoch": 1.14, - "learning_rate": 4.8040055343092374e-05, - "loss": 0.3722, + "learning_rate": 4.80398621462608e-05, + "loss": 0.282, "step": 1906000 }, { "epoch": 1.14, - "learning_rate": 4.803795537753181e-05, - "loss": 0.3799, + "learning_rate": 4.8037766380631354e-05, + "loss": 0.2887, "step": 1906500 }, { "epoch": 1.14, - "learning_rate": 4.803585541197125e-05, - "loss": 0.3802, + "learning_rate": 4.803566641507079e-05, + "loss": 0.2924, "step": 1907000 }, { "epoch": 1.14, - "learning_rate": 4.803375544641068e-05, - "loss": 0.3858, + "learning_rate": 4.803356644951023e-05, + "loss": 0.2897, "step": 1907500 }, { "epoch": 1.14, - "learning_rate": 4.8031659680781234e-05, - "loss": 0.3761, + "learning_rate": 4.803146648394966e-05, + "loss": 0.285, "step": 1908000 }, { "epoch": 1.14, - "learning_rate": 4.802955971522067e-05, - "loss": 0.3877, + "learning_rate": 4.80293665183891e-05, + "loss": 0.2909, "step": 1908500 }, { "epoch": 1.14, - "learning_rate": 4.802745974966011e-05, - "loss": 0.391, + "learning_rate": 4.8027270752759655e-05, + "loss": 0.2946, "step": 1909000 }, { "epoch": 1.14, - "learning_rate": 4.802535978409954e-05, - "loss": 0.3751, + "learning_rate": 4.802517078719909e-05, + "loss": 0.2875, "step": 1909500 }, { "epoch": 1.15, - "learning_rate": 4.8023259818538975e-05, - "loss": 0.3732, + "learning_rate": 4.802307082163852e-05, + "loss": 0.2864, "step": 1910000 }, { "epoch": 1.15, - "learning_rate": 4.802115985297841e-05, - "loss": 0.3749, + "learning_rate": 4.802097085607796e-05, + "loss": 0.2903, "step": 1910500 }, { "epoch": 1.15, - "learning_rate": 4.801905988741784e-05, - "loss": 0.3757, + "learning_rate": 4.8018875090448516e-05, + "loss": 0.2899, "step": 1911000 }, { "epoch": 1.15, - "learning_rate": 4.801695992185728e-05, - "loss": 0.3848, + "learning_rate": 4.801677512488795e-05, + "loss": 0.2847, "step": 1911500 }, { "epoch": 1.15, - "learning_rate": 4.801486415622784e-05, - "loss": 0.3764, + "learning_rate": 4.801467515932738e-05, + "loss": 0.2845, "step": 1912000 }, { "epoch": 1.15, - "learning_rate": 4.801276419066727e-05, - "loss": 0.3785, + "learning_rate": 4.801257519376682e-05, + "loss": 0.2868, "step": 1912500 }, { "epoch": 1.15, - "learning_rate": 4.801066842503783e-05, - "loss": 0.3768, + "learning_rate": 4.801047522820626e-05, + "loss": 0.2921, "step": 1913000 }, { "epoch": 1.15, - "learning_rate": 4.800856845947726e-05, - "loss": 0.3748, + "learning_rate": 4.800837946257681e-05, + "loss": 0.2813, "step": 1913500 }, { "epoch": 1.15, - "learning_rate": 4.80064684939167e-05, - "loss": 0.3885, + "learning_rate": 4.8006279497016244e-05, + "loss": 0.2881, "step": 1914000 }, { "epoch": 1.15, - "learning_rate": 4.800436852835614e-05, - "loss": 0.3809, + "learning_rate": 4.8004179531455684e-05, + "loss": 0.2876, "step": 1914500 }, { "epoch": 1.15, - "learning_rate": 4.8002268562795563e-05, - "loss": 0.3818, + "learning_rate": 4.800207956589512e-05, + "loss": 0.2882, "step": 1915000 }, { "epoch": 1.15, - "learning_rate": 4.8000172797166124e-05, - "loss": 0.3903, + "learning_rate": 4.799998380026567e-05, + "loss": 0.293, "step": 1915500 }, { "epoch": 1.15, - "learning_rate": 4.7998072831605564e-05, - "loss": 0.3884, + "learning_rate": 4.799788383470511e-05, + "loss": 0.2901, "step": 1916000 }, { "epoch": 1.15, - "learning_rate": 4.7995972866045e-05, - "loss": 0.3864, + "learning_rate": 4.7995783869144545e-05, + "loss": 0.295, "step": 1916500 }, { "epoch": 1.15, - "learning_rate": 4.799387290048444e-05, - "loss": 0.3831, + "learning_rate": 4.799368390358398e-05, + "loss": 0.2903, "step": 1917000 }, { "epoch": 1.15, - "learning_rate": 4.7991772934923864e-05, - "loss": 0.3859, + "learning_rate": 4.799158813795453e-05, + "loss": 0.294, "step": 1917500 }, { "epoch": 1.15, - "learning_rate": 4.7989677169294425e-05, - "loss": 0.3852, + "learning_rate": 4.798948817239397e-05, + "loss": 0.2882, "step": 1918000 }, { "epoch": 1.15, - "learning_rate": 4.798757720373386e-05, - "loss": 0.3775, + "learning_rate": 4.7987388206833406e-05, + "loss": 0.2909, "step": 1918500 }, { "epoch": 1.15, - "learning_rate": 4.79854772381733e-05, - "loss": 0.3841, + "learning_rate": 4.798528824127284e-05, + "loss": 0.2927, "step": 1919000 }, { "epoch": 1.15, - "learning_rate": 4.7983377272612725e-05, - "loss": 0.3954, + "learning_rate": 4.798318827571228e-05, + "loss": 0.2935, "step": 1919500 }, { "epoch": 1.15, - "learning_rate": 4.798127730705216e-05, - "loss": 0.3829, + "learning_rate": 4.798108831015171e-05, + "loss": 0.2842, "step": 1920000 }, { "epoch": 1.15, - "learning_rate": 4.797918154142272e-05, - "loss": 0.3759, + "learning_rate": 4.7978992544522266e-05, + "loss": 0.2879, "step": 1920500 }, { "epoch": 1.15, - "learning_rate": 4.797708157586216e-05, - "loss": 0.3911, + "learning_rate": 4.7976896778892827e-05, + "loss": 0.297, "step": 1921000 }, { "epoch": 1.15, - "learning_rate": 4.797498161030159e-05, - "loss": 0.3777, + "learning_rate": 4.797479681333227e-05, + "loss": 0.2915, "step": 1921500 }, { "epoch": 1.15, - "learning_rate": 4.797288164474102e-05, - "loss": 0.3838, + "learning_rate": 4.7972696847771694e-05, + "loss": 0.2868, "step": 1922000 }, { "epoch": 1.15, - "learning_rate": 4.7970785879111586e-05, - "loss": 0.3774, + "learning_rate": 4.797059688221113e-05, + "loss": 0.2894, "step": 1922500 }, { "epoch": 1.15, - "learning_rate": 4.796868591355102e-05, - "loss": 0.3844, + "learning_rate": 4.796849691665057e-05, + "loss": 0.2898, "step": 1923000 }, { "epoch": 1.15, - "learning_rate": 4.796658594799045e-05, - "loss": 0.3995, + "learning_rate": 4.796639695109e-05, + "loss": 0.2954, "step": 1923500 }, { "epoch": 1.15, - "learning_rate": 4.7964485982429894e-05, - "loss": 0.3827, + "learning_rate": 4.7964296985529434e-05, + "loss": 0.2914, "step": 1924000 }, { "epoch": 1.15, - "learning_rate": 4.796238601686932e-05, - "loss": 0.3918, + "learning_rate": 4.7962197019968874e-05, + "loss": 0.2888, "step": 1924500 }, { "epoch": 1.15, - "learning_rate": 4.7960286051308754e-05, - "loss": 0.3963, + "learning_rate": 4.796010125433943e-05, + "loss": 0.2993, "step": 1925000 }, { "epoch": 1.15, - "learning_rate": 4.7958190285679314e-05, - "loss": 0.3845, + "learning_rate": 4.795800128877886e-05, + "loss": 0.2964, "step": 1925500 }, { "epoch": 1.15, - "learning_rate": 4.7956090320118754e-05, - "loss": 0.3806, + "learning_rate": 4.7955901323218295e-05, + "loss": 0.288, "step": 1926000 }, { "epoch": 1.16, - "learning_rate": 4.795399035455819e-05, - "loss": 0.3864, + "learning_rate": 4.7953801357657735e-05, + "loss": 0.2901, "step": 1926500 }, { "epoch": 1.16, - "learning_rate": 4.7951890388997614e-05, - "loss": 0.3938, + "learning_rate": 4.795170139209717e-05, + "loss": 0.2949, "step": 1927000 }, { "epoch": 1.16, - "learning_rate": 4.7949790423437055e-05, - "loss": 0.3857, + "learning_rate": 4.794960562646772e-05, + "loss": 0.2873, "step": 1927500 }, { "epoch": 1.16, - "learning_rate": 4.794769045787649e-05, - "loss": 0.3889, + "learning_rate": 4.794750566090716e-05, + "loss": 0.2868, "step": 1928000 }, { "epoch": 1.16, - "learning_rate": 4.794559049231592e-05, - "loss": 0.3838, + "learning_rate": 4.7945405695346596e-05, + "loss": 0.2978, "step": 1928500 }, { "epoch": 1.16, - "learning_rate": 4.794349052675536e-05, - "loss": 0.3804, + "learning_rate": 4.794330572978603e-05, + "loss": 0.2914, "step": 1929000 }, { "epoch": 1.16, - "learning_rate": 4.7941394761125915e-05, - "loss": 0.3869, + "learning_rate": 4.794120576422547e-05, + "loss": 0.2915, "step": 1929500 }, { "epoch": 1.16, - "learning_rate": 4.7939298995496476e-05, - "loss": 0.3834, + "learning_rate": 4.79391057986649e-05, + "loss": 0.2893, "step": 1930000 }, { "epoch": 1.16, - "learning_rate": 4.793719902993591e-05, - "loss": 0.3811, + "learning_rate": 4.7937010033035457e-05, + "loss": 0.2881, "step": 1930500 }, { "epoch": 1.16, - "learning_rate": 4.793509906437535e-05, - "loss": 0.3825, + "learning_rate": 4.793491006747489e-05, + "loss": 0.2995, "step": 1931000 }, { "epoch": 1.16, - "learning_rate": 4.7932999098814776e-05, - "loss": 0.384, + "learning_rate": 4.793281010191433e-05, + "loss": 0.2902, "step": 1931500 }, { "epoch": 1.16, - "learning_rate": 4.793089913325421e-05, - "loss": 0.3805, + "learning_rate": 4.7930710136353764e-05, + "loss": 0.2875, "step": 1932000 }, { "epoch": 1.16, - "learning_rate": 4.792879916769365e-05, - "loss": 0.3806, + "learning_rate": 4.79286101707932e-05, + "loss": 0.2948, "step": 1932500 }, { "epoch": 1.16, - "learning_rate": 4.792670340206421e-05, - "loss": 0.3967, + "learning_rate": 4.792651020523264e-05, + "loss": 0.2933, "step": 1933000 }, { "epoch": 1.16, - "learning_rate": 4.7924603436503644e-05, - "loss": 0.3778, + "learning_rate": 4.792441023967207e-05, + "loss": 0.287, "step": 1933500 }, { "epoch": 1.16, - "learning_rate": 4.792250347094307e-05, - "loss": 0.3866, + "learning_rate": 4.7922318673973745e-05, + "loss": 0.2907, "step": 1934000 }, { "epoch": 1.16, - "learning_rate": 4.792040350538251e-05, - "loss": 0.3874, + "learning_rate": 4.792021870841318e-05, + "loss": 0.2916, "step": 1934500 }, { "epoch": 1.16, - "learning_rate": 4.7918303539821944e-05, - "loss": 0.38, + "learning_rate": 4.791812294278374e-05, + "loss": 0.2932, "step": 1935000 }, { "epoch": 1.16, - "learning_rate": 4.791620357426138e-05, - "loss": 0.388, + "learning_rate": 4.791602297722318e-05, + "loss": 0.2888, "step": 1935500 }, { "epoch": 1.16, - "learning_rate": 4.791410360870082e-05, - "loss": 0.3874, + "learning_rate": 4.791392301166261e-05, + "loss": 0.293, "step": 1936000 }, { "epoch": 1.16, - "learning_rate": 4.791200364314025e-05, - "loss": 0.3827, + "learning_rate": 4.791182304610204e-05, + "loss": 0.2938, "step": 1936500 }, { "epoch": 1.16, - "learning_rate": 4.7909903677579685e-05, - "loss": 0.3782, + "learning_rate": 4.790972308054148e-05, + "loss": 0.2851, "step": 1937000 }, { "epoch": 1.16, - "learning_rate": 4.7907807911950245e-05, - "loss": 0.3859, + "learning_rate": 4.790762311498091e-05, + "loss": 0.2944, "step": 1937500 }, { "epoch": 1.16, - "learning_rate": 4.790570794638968e-05, - "loss": 0.382, + "learning_rate": 4.7905523149420346e-05, + "loss": 0.2939, "step": 1938000 }, { "epoch": 1.16, - "learning_rate": 4.790360798082911e-05, - "loss": 0.3858, + "learning_rate": 4.7903423183859786e-05, + "loss": 0.2913, "step": 1938500 }, { "epoch": 1.16, - "learning_rate": 4.790150801526855e-05, - "loss": 0.3842, + "learning_rate": 4.790132321829922e-05, + "loss": 0.2898, "step": 1939000 }, { "epoch": 1.16, - "learning_rate": 4.7899412249639106e-05, - "loss": 0.3806, + "learning_rate": 4.789922325273865e-05, + "loss": 0.2872, "step": 1939500 }, { "epoch": 1.16, - "learning_rate": 4.7897316484009666e-05, - "loss": 0.3809, + "learning_rate": 4.789712328717809e-05, + "loss": 0.29, "step": 1940000 }, { "epoch": 1.16, - "learning_rate": 4.78952165184491e-05, - "loss": 0.3978, + "learning_rate": 4.789502332161753e-05, + "loss": 0.2954, "step": 1940500 }, { "epoch": 1.16, - "learning_rate": 4.789312075281965e-05, - "loss": 0.3888, + "learning_rate": 4.789292335605696e-05, + "loss": 0.2929, "step": 1941000 }, { "epoch": 1.16, - "learning_rate": 4.7891020787259087e-05, - "loss": 0.3844, + "learning_rate": 4.7890831790358634e-05, + "loss": 0.2874, "step": 1941500 }, { "epoch": 1.16, - "learning_rate": 4.788892082169853e-05, - "loss": 0.3857, + "learning_rate": 4.7888731824798074e-05, + "loss": 0.2902, "step": 1942000 }, { "epoch": 1.16, - "learning_rate": 4.788682085613796e-05, - "loss": 0.3876, + "learning_rate": 4.788663185923751e-05, + "loss": 0.2912, "step": 1942500 }, { "epoch": 1.16, - "learning_rate": 4.7884720890577394e-05, - "loss": 0.372, + "learning_rate": 4.788453189367694e-05, + "loss": 0.2841, "step": 1943000 }, { "epoch": 1.17, - "learning_rate": 4.788262092501683e-05, - "loss": 0.3844, + "learning_rate": 4.788243192811638e-05, + "loss": 0.2897, "step": 1943500 }, { "epoch": 1.17, - "learning_rate": 4.788052095945626e-05, - "loss": 0.3816, + "learning_rate": 4.7880331962555815e-05, + "loss": 0.2909, "step": 1944000 }, { "epoch": 1.17, - "learning_rate": 4.78784209938957e-05, - "loss": 0.3845, + "learning_rate": 4.787823199699525e-05, + "loss": 0.2887, "step": 1944500 }, { "epoch": 1.17, - "learning_rate": 4.7876321028335134e-05, - "loss": 0.3802, + "learning_rate": 4.787613203143469e-05, + "loss": 0.2839, "step": 1945000 }, { "epoch": 1.17, - "learning_rate": 4.7874225262705695e-05, - "loss": 0.3851, + "learning_rate": 4.787403206587412e-05, + "loss": 0.2914, "step": 1945500 }, { "epoch": 1.17, - "learning_rate": 4.787212529714512e-05, - "loss": 0.3804, + "learning_rate": 4.7871936300244675e-05, + "loss": 0.2869, "step": 1946000 }, { "epoch": 1.17, - "learning_rate": 4.787002533158456e-05, - "loss": 0.3918, + "learning_rate": 4.786983633468411e-05, + "loss": 0.3009, "step": 1946500 }, { "epoch": 1.17, - "learning_rate": 4.786792956595512e-05, - "loss": 0.3813, + "learning_rate": 4.786773636912355e-05, + "loss": 0.2904, "step": 1947000 }, { "epoch": 1.17, - "learning_rate": 4.7865829600394555e-05, - "loss": 0.3795, + "learning_rate": 4.786563640356298e-05, + "loss": 0.2857, "step": 1947500 }, { "epoch": 1.17, - "learning_rate": 4.786372963483399e-05, - "loss": 0.3822, + "learning_rate": 4.7863536438002416e-05, + "loss": 0.2883, "step": 1948000 }, { "epoch": 1.17, - "learning_rate": 4.786162966927342e-05, - "loss": 0.3961, + "learning_rate": 4.786144067237297e-05, + "loss": 0.2964, "step": 1948500 }, { "epoch": 1.17, - "learning_rate": 4.785953390364398e-05, - "loss": 0.3847, + "learning_rate": 4.785934070681241e-05, + "loss": 0.2915, "step": 1949000 }, { "epoch": 1.17, - "learning_rate": 4.7857433938083416e-05, - "loss": 0.3928, + "learning_rate": 4.785724074125184e-05, + "loss": 0.2928, "step": 1949500 }, { "epoch": 1.17, - "learning_rate": 4.785533397252285e-05, - "loss": 0.3765, + "learning_rate": 4.785514077569128e-05, + "loss": 0.2848, "step": 1950000 }, { "epoch": 1.17, - "learning_rate": 4.785323400696228e-05, - "loss": 0.3932, + "learning_rate": 4.785304081013072e-05, + "loss": 0.2933, "step": 1950500 }, { "epoch": 1.17, - "learning_rate": 4.7851134041401717e-05, - "loss": 0.3844, + "learning_rate": 4.785094504450127e-05, + "loss": 0.2879, "step": 1951000 }, { "epoch": 1.17, - "learning_rate": 4.784903407584116e-05, - "loss": 0.3831, + "learning_rate": 4.7848845078940704e-05, + "loss": 0.2892, "step": 1951500 }, { "epoch": 1.17, - "learning_rate": 4.784693411028059e-05, - "loss": 0.3851, + "learning_rate": 4.7846745113380144e-05, + "loss": 0.2859, "step": 1952000 }, { "epoch": 1.17, - "learning_rate": 4.7844834144720024e-05, - "loss": 0.3806, + "learning_rate": 4.784464514781958e-05, + "loss": 0.2904, "step": 1952500 }, { "epoch": 1.17, - "learning_rate": 4.7842734179159464e-05, - "loss": 0.3881, + "learning_rate": 4.784254938219013e-05, + "loss": 0.2933, "step": 1953000 }, { "epoch": 1.17, - "learning_rate": 4.78406342135989e-05, - "loss": 0.3781, + "learning_rate": 4.7840449416629565e-05, + "loss": 0.2869, "step": 1953500 }, { "epoch": 1.17, - "learning_rate": 4.783853424803833e-05, - "loss": 0.3839, + "learning_rate": 4.783835365100012e-05, + "loss": 0.2998, "step": 1954000 }, { "epoch": 1.17, - "learning_rate": 4.783643428247777e-05, - "loss": 0.383, + "learning_rate": 4.783625368543956e-05, + "loss": 0.2903, "step": 1954500 }, { "epoch": 1.17, - "learning_rate": 4.7834338516848325e-05, - "loss": 0.3829, + "learning_rate": 4.783415371987899e-05, + "loss": 0.288, "step": 1955000 }, { "epoch": 1.17, - "learning_rate": 4.783223855128776e-05, - "loss": 0.3821, + "learning_rate": 4.7832053754318426e-05, + "loss": 0.2878, "step": 1955500 }, { "epoch": 1.17, - "learning_rate": 4.783014278565831e-05, - "loss": 0.383, + "learning_rate": 4.7829953788757866e-05, + "loss": 0.2873, "step": 1956000 }, { "epoch": 1.17, - "learning_rate": 4.7828042820097745e-05, - "loss": 0.3812, + "learning_rate": 4.78278538231973e-05, + "loss": 0.2895, "step": 1956500 }, { "epoch": 1.17, - "learning_rate": 4.7825942854537185e-05, - "loss": 0.3856, + "learning_rate": 4.782575385763673e-05, + "loss": 0.2873, "step": 1957000 }, { "epoch": 1.17, - "learning_rate": 4.782384288897662e-05, - "loss": 0.3901, + "learning_rate": 4.782365389207617e-05, + "loss": 0.2911, "step": 1957500 }, { "epoch": 1.17, - "learning_rate": 4.782174292341605e-05, - "loss": 0.3861, + "learning_rate": 4.7821558126446726e-05, + "loss": 0.2911, "step": 1958000 }, { "epoch": 1.17, - "learning_rate": 4.781964295785549e-05, - "loss": 0.3864, + "learning_rate": 4.781945816088616e-05, + "loss": 0.291, "step": 1958500 }, { "epoch": 1.17, - "learning_rate": 4.7817542992294926e-05, - "loss": 0.3855, + "learning_rate": 4.78173581953256e-05, + "loss": 0.2869, "step": 1959000 }, { "epoch": 1.17, - "learning_rate": 4.781544722666548e-05, - "loss": 0.3791, + "learning_rate": 4.7815258229765034e-05, + "loss": 0.2819, "step": 1959500 }, { "epoch": 1.18, - "learning_rate": 4.781334726110492e-05, - "loss": 0.389, + "learning_rate": 4.781315826420447e-05, + "loss": 0.2919, "step": 1960000 }, { "epoch": 1.18, - "learning_rate": 4.781124729554435e-05, - "loss": 0.3903, + "learning_rate": 4.781106249857502e-05, + "loss": 0.2926, "step": 1960500 }, { "epoch": 1.18, - "learning_rate": 4.780914732998379e-05, - "loss": 0.3869, + "learning_rate": 4.780896253301446e-05, + "loss": 0.2943, "step": 1961000 }, { "epoch": 1.18, - "learning_rate": 4.780704736442323e-05, - "loss": 0.3897, + "learning_rate": 4.7806862567453894e-05, + "loss": 0.2923, "step": 1961500 }, { "epoch": 1.18, - "learning_rate": 4.780494739886266e-05, - "loss": 0.3886, + "learning_rate": 4.780476260189333e-05, + "loss": 0.2833, "step": 1962000 }, { "epoch": 1.18, - "learning_rate": 4.7802847433302094e-05, - "loss": 0.3893, + "learning_rate": 4.780266683626388e-05, + "loss": 0.2924, "step": 1962500 }, { "epoch": 1.18, - "learning_rate": 4.780075166767265e-05, - "loss": 0.396, + "learning_rate": 4.780056687070332e-05, + "loss": 0.2937, "step": 1963000 }, { "epoch": 1.18, - "learning_rate": 4.779865170211209e-05, - "loss": 0.3808, + "learning_rate": 4.7798475305005e-05, + "loss": 0.2916, "step": 1963500 }, { "epoch": 1.18, - "learning_rate": 4.779655173655152e-05, - "loss": 0.3953, + "learning_rate": 4.7796375339444436e-05, + "loss": 0.2946, "step": 1964000 }, { "epoch": 1.18, - "learning_rate": 4.7794451770990955e-05, - "loss": 0.3825, + "learning_rate": 4.779427537388387e-05, + "loss": 0.2905, "step": 1964500 }, { "epoch": 1.18, - "learning_rate": 4.7792351805430395e-05, - "loss": 0.3894, + "learning_rate": 4.77921754083233e-05, + "loss": 0.2922, "step": 1965000 }, { "epoch": 1.18, - "learning_rate": 4.779025603980095e-05, - "loss": 0.3956, + "learning_rate": 4.7790075442762736e-05, + "loss": 0.3025, "step": 1965500 }, { "epoch": 1.18, - "learning_rate": 4.778815607424038e-05, - "loss": 0.3729, + "learning_rate": 4.778797547720217e-05, + "loss": 0.2844, "step": 1966000 }, { "epoch": 1.18, - "learning_rate": 4.7786056108679815e-05, - "loss": 0.3888, + "learning_rate": 4.778587551164161e-05, + "loss": 0.29, "step": 1966500 }, { "epoch": 1.18, - "learning_rate": 4.7783956143119256e-05, - "loss": 0.383, + "learning_rate": 4.778377554608104e-05, + "loss": 0.2884, "step": 1967000 }, { "epoch": 1.18, - "learning_rate": 4.778185617755869e-05, - "loss": 0.3859, + "learning_rate": 4.7781675580520477e-05, + "loss": 0.2874, "step": 1967500 }, { "epoch": 1.18, - "learning_rate": 4.777975621199812e-05, - "loss": 0.393, + "learning_rate": 4.777957561495992e-05, + "loss": 0.2931, "step": 1968000 }, { "epoch": 1.18, - "learning_rate": 4.777766044636868e-05, - "loss": 0.3928, + "learning_rate": 4.777747564939935e-05, + "loss": 0.2915, "step": 1968500 }, { "epoch": 1.18, - "learning_rate": 4.7775560480808116e-05, - "loss": 0.3824, + "learning_rate": 4.7775375683838784e-05, + "loss": 0.2935, "step": 1969000 }, { "epoch": 1.18, - "learning_rate": 4.777346051524755e-05, - "loss": 0.3878, + "learning_rate": 4.7773275718278224e-05, + "loss": 0.2952, "step": 1969500 }, { "epoch": 1.18, - "learning_rate": 4.777136054968699e-05, - "loss": 0.3797, + "learning_rate": 4.777117995264878e-05, + "loss": 0.2925, "step": 1970000 }, { "epoch": 1.18, - "learning_rate": 4.776926058412642e-05, - "loss": 0.3845, + "learning_rate": 4.776907998708821e-05, + "loss": 0.292, "step": 1970500 }, { "epoch": 1.18, - "learning_rate": 4.776716061856585e-05, - "loss": 0.3777, + "learning_rate": 4.7766980021527644e-05, + "loss": 0.2931, "step": 1971000 }, { "epoch": 1.18, - "learning_rate": 4.776506065300529e-05, - "loss": 0.3801, + "learning_rate": 4.7764880055967085e-05, + "loss": 0.293, "step": 1971500 }, { "epoch": 1.18, - "learning_rate": 4.776296488737585e-05, - "loss": 0.3868, + "learning_rate": 4.776278429033764e-05, + "loss": 0.2887, "step": 1972000 }, { "epoch": 1.18, - "learning_rate": 4.7760864921815284e-05, - "loss": 0.3875, + "learning_rate": 4.776068432477707e-05, + "loss": 0.2844, "step": 1972500 }, { "epoch": 1.18, - "learning_rate": 4.775876495625471e-05, - "loss": 0.3846, + "learning_rate": 4.7758588559147625e-05, + "loss": 0.2961, "step": 1973000 }, { "epoch": 1.18, - "learning_rate": 4.775666499069415e-05, - "loss": 0.3823, + "learning_rate": 4.7756488593587066e-05, + "loss": 0.2865, "step": 1973500 }, { "epoch": 1.18, - "learning_rate": 4.7754565025133585e-05, - "loss": 0.381, + "learning_rate": 4.77543886280265e-05, + "loss": 0.2859, "step": 1974000 }, { "epoch": 1.18, - "learning_rate": 4.7752473459435265e-05, - "loss": 0.3853, + "learning_rate": 4.775228866246593e-05, + "loss": 0.289, "step": 1974500 }, { "epoch": 1.18, - "learning_rate": 4.77503734938747e-05, - "loss": 0.3674, + "learning_rate": 4.775018869690537e-05, + "loss": 0.2809, "step": 1975000 }, { "epoch": 1.18, - "learning_rate": 4.774827352831414e-05, - "loss": 0.3857, + "learning_rate": 4.7748088731344806e-05, + "loss": 0.2916, "step": 1975500 }, { "epoch": 1.18, - "learning_rate": 4.774617356275357e-05, - "loss": 0.3834, + "learning_rate": 4.774599296571536e-05, + "loss": 0.2862, "step": 1976000 }, { "epoch": 1.18, - "learning_rate": 4.7744073597193006e-05, - "loss": 0.3871, + "learning_rate": 4.774389300015479e-05, + "loss": 0.2963, "step": 1976500 }, { "epoch": 1.19, - "learning_rate": 4.7741973631632446e-05, - "loss": 0.3764, + "learning_rate": 4.7741797234525353e-05, + "loss": 0.2855, "step": 1977000 }, { "epoch": 1.19, - "learning_rate": 4.773987366607187e-05, - "loss": 0.383, + "learning_rate": 4.773969726896479e-05, + "loss": 0.2866, "step": 1977500 }, { "epoch": 1.19, - "learning_rate": 4.7737773700511306e-05, - "loss": 0.3762, + "learning_rate": 4.773759730340422e-05, + "loss": 0.2881, "step": 1978000 }, { "epoch": 1.19, - "learning_rate": 4.7735673734950746e-05, - "loss": 0.3885, + "learning_rate": 4.773549733784366e-05, + "loss": 0.2927, "step": 1978500 }, { "epoch": 1.19, - "learning_rate": 4.773357376939018e-05, - "loss": 0.3815, + "learning_rate": 4.7733397372283094e-05, + "loss": 0.2899, "step": 1979000 }, { "epoch": 1.19, - "learning_rate": 4.773147380382961e-05, - "loss": 0.3829, + "learning_rate": 4.773129740672253e-05, + "loss": 0.2893, "step": 1979500 }, { "epoch": 1.19, - "learning_rate": 4.772937383826905e-05, - "loss": 0.3937, + "learning_rate": 4.772919744116197e-05, + "loss": 0.2968, "step": 1980000 }, { "epoch": 1.19, - "learning_rate": 4.772727807263961e-05, - "loss": 0.3793, + "learning_rate": 4.77270974756014e-05, + "loss": 0.2933, "step": 1980500 }, { "epoch": 1.19, - "learning_rate": 4.772517810707904e-05, - "loss": 0.3852, + "learning_rate": 4.7724997510040835e-05, + "loss": 0.2895, "step": 1981000 }, { "epoch": 1.19, - "learning_rate": 4.7723078141518474e-05, - "loss": 0.3826, + "learning_rate": 4.7722897544480275e-05, + "loss": 0.2892, "step": 1981500 }, { "epoch": 1.19, - "learning_rate": 4.772098237588904e-05, - "loss": 0.3974, + "learning_rate": 4.772079757891971e-05, + "loss": 0.295, "step": 1982000 }, { "epoch": 1.19, - "learning_rate": 4.771888241032847e-05, - "loss": 0.3876, + "learning_rate": 4.7718697613359135e-05, + "loss": 0.2958, "step": 1982500 }, { "epoch": 1.19, - "learning_rate": 4.77167824447679e-05, - "loss": 0.382, + "learning_rate": 4.7716597647798575e-05, + "loss": 0.2907, "step": 1983000 }, { "epoch": 1.19, - "learning_rate": 4.771468247920734e-05, - "loss": 0.387, + "learning_rate": 4.7714501882169136e-05, + "loss": 0.2881, "step": 1983500 }, { "epoch": 1.19, - "learning_rate": 4.7712582513646775e-05, - "loss": 0.3876, + "learning_rate": 4.771240191660857e-05, + "loss": 0.2877, "step": 1984000 }, { "epoch": 1.19, - "learning_rate": 4.771048254808621e-05, - "loss": 0.3867, + "learning_rate": 4.7710301951047996e-05, + "loss": 0.2954, "step": 1984500 }, { "epoch": 1.19, - "learning_rate": 4.770838258252565e-05, - "loss": 0.3843, + "learning_rate": 4.7708201985487436e-05, + "loss": 0.2914, "step": 1985000 }, { "epoch": 1.19, - "learning_rate": 4.770628261696508e-05, - "loss": 0.3883, + "learning_rate": 4.7706106219857996e-05, + "loss": 0.2862, "step": 1985500 }, { "epoch": 1.19, - "learning_rate": 4.7704186851335636e-05, - "loss": 0.3821, + "learning_rate": 4.770400625429743e-05, + "loss": 0.2925, "step": 1986000 }, { "epoch": 1.19, - "learning_rate": 4.770208688577507e-05, - "loss": 0.387, + "learning_rate": 4.770190628873687e-05, + "loss": 0.2883, "step": 1986500 }, { "epoch": 1.19, - "learning_rate": 4.769998692021451e-05, - "loss": 0.371, + "learning_rate": 4.76998063231763e-05, + "loss": 0.2803, "step": 1987000 }, { "epoch": 1.19, - "learning_rate": 4.769788695465394e-05, - "loss": 0.3829, + "learning_rate": 4.769770635761573e-05, + "loss": 0.2854, "step": 1987500 }, { "epoch": 1.19, - "learning_rate": 4.7695791189024496e-05, - "loss": 0.3859, + "learning_rate": 4.769561059198629e-05, + "loss": 0.2887, "step": 1988000 }, { "epoch": 1.19, - "learning_rate": 4.7693691223463937e-05, - "loss": 0.3903, + "learning_rate": 4.769351062642573e-05, + "loss": 0.2936, "step": 1988500 }, { "epoch": 1.19, - "learning_rate": 4.769159125790337e-05, - "loss": 0.3897, + "learning_rate": 4.7691410660865164e-05, + "loss": 0.2926, "step": 1989000 }, { "epoch": 1.19, - "learning_rate": 4.7689491292342803e-05, - "loss": 0.3785, + "learning_rate": 4.768931069530459e-05, + "loss": 0.2889, "step": 1989500 }, { "epoch": 1.19, - "learning_rate": 4.768739552671336e-05, - "loss": 0.3821, + "learning_rate": 4.768721072974403e-05, + "loss": 0.2886, "step": 1990000 }, { "epoch": 1.19, - "learning_rate": 4.76852955611528e-05, - "loss": 0.3855, + "learning_rate": 4.7685110764183465e-05, + "loss": 0.2883, "step": 1990500 }, { "epoch": 1.19, - "learning_rate": 4.768319559559223e-05, - "loss": 0.3799, + "learning_rate": 4.7683014998554025e-05, + "loss": 0.2866, "step": 1991000 }, { "epoch": 1.19, - "learning_rate": 4.7681095630031664e-05, - "loss": 0.387, + "learning_rate": 4.768091503299346e-05, + "loss": 0.2932, "step": 1991500 }, { "epoch": 1.19, - "learning_rate": 4.767899986440222e-05, - "loss": 0.379, + "learning_rate": 4.767881506743289e-05, + "loss": 0.2905, "step": 1992000 }, { "epoch": 1.19, - "learning_rate": 4.767690409877278e-05, - "loss": 0.3794, + "learning_rate": 4.7676715101872325e-05, + "loss": 0.2874, "step": 1992500 }, { "epoch": 1.19, - "learning_rate": 4.767480413321222e-05, - "loss": 0.3823, + "learning_rate": 4.767461513631176e-05, + "loss": 0.2861, "step": 1993000 }, { "epoch": 1.2, - "learning_rate": 4.767270416765165e-05, - "loss": 0.3913, + "learning_rate": 4.7672519370682326e-05, + "loss": 0.2925, "step": 1993500 }, { "epoch": 1.2, - "learning_rate": 4.7670604202091085e-05, - "loss": 0.3726, + "learning_rate": 4.767042360505288e-05, + "loss": 0.2865, "step": 1994000 }, { "epoch": 1.2, - "learning_rate": 4.766850423653052e-05, - "loss": 0.3849, + "learning_rate": 4.766832363949231e-05, + "loss": 0.2851, "step": 1994500 }, { "epoch": 1.2, - "learning_rate": 4.766640427096995e-05, - "loss": 0.3878, + "learning_rate": 4.7666223673931746e-05, + "loss": 0.2911, "step": 1995000 }, { "epoch": 1.2, - "learning_rate": 4.766430430540939e-05, - "loss": 0.3811, + "learning_rate": 4.766412370837119e-05, + "loss": 0.2849, "step": 1995500 }, { "epoch": 1.2, - "learning_rate": 4.7662204339848826e-05, - "loss": 0.3835, + "learning_rate": 4.766202794274174e-05, + "loss": 0.287, "step": 1996000 }, { "epoch": 1.2, - "learning_rate": 4.766010857421938e-05, - "loss": 0.3889, + "learning_rate": 4.7659927977181174e-05, + "loss": 0.2934, "step": 1996500 }, { "epoch": 1.2, - "learning_rate": 4.765800860865881e-05, - "loss": 0.3848, + "learning_rate": 4.765782801162061e-05, + "loss": 0.2885, "step": 1997000 }, { "epoch": 1.2, - "learning_rate": 4.765590864309825e-05, - "loss": 0.3764, + "learning_rate": 4.765572804606005e-05, + "loss": 0.2812, "step": 1997500 }, { "epoch": 1.2, - "learning_rate": 4.7653808677537687e-05, - "loss": 0.392, + "learning_rate": 4.765362808049948e-05, + "loss": 0.291, "step": 1998000 }, { "epoch": 1.2, - "learning_rate": 4.765171291190825e-05, - "loss": 0.3937, + "learning_rate": 4.7651528114938914e-05, + "loss": 0.2923, "step": 1998500 }, { "epoch": 1.2, - "learning_rate": 4.7649612946347674e-05, - "loss": 0.3831, + "learning_rate": 4.764942814937835e-05, + "loss": 0.2855, "step": 1999000 }, { "epoch": 1.2, - "learning_rate": 4.7647512980787114e-05, - "loss": 0.3791, + "learning_rate": 4.764732818381778e-05, + "loss": 0.2872, "step": 1999500 }, { "epoch": 1.2, - "learning_rate": 4.764541301522655e-05, - "loss": 0.3742, + "learning_rate": 4.764522821825722e-05, + "loss": 0.2839, "step": 2000000 }, { "epoch": 1.2, - "eval_loss": 0.36836302280426025, - "eval_runtime": 1117.5583, - "eval_samples_per_second": 471.313, - "eval_steps_per_second": 78.552, + "eval_loss": 0.2663571536540985, + "eval_runtime": 1466.8387, + "eval_samples_per_second": 359.085, + "eval_steps_per_second": 59.848, "step": 2000000 }, { "epoch": 1.2, - "learning_rate": 4.764331724959711e-05, - "loss": 0.3824, + "learning_rate": 4.764313245262778e-05, + "loss": 0.2949, "step": 2000500 }, { "epoch": 1.2, - "learning_rate": 4.764121728403654e-05, - "loss": 0.3833, + "learning_rate": 4.7641032487067215e-05, + "loss": 0.2929, "step": 2001000 }, { "epoch": 1.2, - "learning_rate": 4.7639117318475975e-05, - "loss": 0.3765, + "learning_rate": 4.763893252150664e-05, + "loss": 0.2848, "step": 2001500 }, { "epoch": 1.2, - "learning_rate": 4.763701735291541e-05, - "loss": 0.3751, + "learning_rate": 4.763683255594608e-05, + "loss": 0.288, "step": 2002000 }, { "epoch": 1.2, - "learning_rate": 4.763491738735485e-05, - "loss": 0.3793, + "learning_rate": 4.7634732590385516e-05, + "loss": 0.2866, "step": 2002500 }, { "epoch": 1.2, - "learning_rate": 4.763282162172541e-05, - "loss": 0.3828, + "learning_rate": 4.7632636824756076e-05, + "loss": 0.2901, "step": 2003000 }, { "epoch": 1.2, - "learning_rate": 4.763072165616484e-05, - "loss": 0.3819, + "learning_rate": 4.763053685919551e-05, + "loss": 0.2909, "step": 2003500 }, { "epoch": 1.2, - "learning_rate": 4.762862169060427e-05, - "loss": 0.3824, + "learning_rate": 4.762844109356606e-05, + "loss": 0.2872, "step": 2004000 }, { "epoch": 1.2, - "learning_rate": 4.762652172504371e-05, - "loss": 0.3872, + "learning_rate": 4.76263411280055e-05, + "loss": 0.2933, "step": 2004500 }, { "epoch": 1.2, - "learning_rate": 4.762442595941427e-05, - "loss": 0.3886, + "learning_rate": 4.762424116244494e-05, + "loss": 0.2938, "step": 2005000 }, { "epoch": 1.2, - "learning_rate": 4.76223259938537e-05, - "loss": 0.3804, + "learning_rate": 4.762214119688437e-05, + "loss": 0.2884, "step": 2005500 }, { "epoch": 1.2, - "learning_rate": 4.762022602829313e-05, - "loss": 0.3851, + "learning_rate": 4.762004543125493e-05, + "loss": 0.2893, "step": 2006000 }, { "epoch": 1.2, - "learning_rate": 4.761813026266369e-05, - "loss": 0.3854, + "learning_rate": 4.7617945465694364e-05, + "loss": 0.2924, "step": 2006500 }, { "epoch": 1.2, - "learning_rate": 4.761603029710313e-05, - "loss": 0.3822, + "learning_rate": 4.76158455001338e-05, + "loss": 0.2868, "step": 2007000 }, { "epoch": 1.2, - "learning_rate": 4.7613930331542564e-05, - "loss": 0.3736, + "learning_rate": 4.761374553457324e-05, + "loss": 0.2847, "step": 2007500 }, { "epoch": 1.2, - "learning_rate": 4.7611830365982e-05, - "loss": 0.3848, + "learning_rate": 4.761164556901267e-05, + "loss": 0.2918, "step": 2008000 }, { "epoch": 1.2, - "learning_rate": 4.760973040042143e-05, - "loss": 0.3863, + "learning_rate": 4.76095456034521e-05, + "loss": 0.2901, "step": 2008500 }, { "epoch": 1.2, - "learning_rate": 4.7607630434860864e-05, - "loss": 0.3813, + "learning_rate": 4.760744983782266e-05, + "loss": 0.2897, "step": 2009000 }, { "epoch": 1.2, - "learning_rate": 4.7605530469300304e-05, - "loss": 0.377, + "learning_rate": 4.76053498722621e-05, + "loss": 0.2879, "step": 2009500 }, { "epoch": 1.21, - "learning_rate": 4.760343050373974e-05, - "loss": 0.3831, + "learning_rate": 4.760324990670153e-05, + "loss": 0.2895, "step": 2010000 }, { "epoch": 1.21, - "learning_rate": 4.760133053817917e-05, - "loss": 0.3836, + "learning_rate": 4.7601149941140965e-05, + "loss": 0.2908, "step": 2010500 }, { "epoch": 1.21, - "learning_rate": 4.759923057261861e-05, - "loss": 0.3782, + "learning_rate": 4.75990499755804e-05, + "loss": 0.2882, "step": 2011000 }, { "epoch": 1.21, - "learning_rate": 4.7597130607058045e-05, - "loss": 0.3734, + "learning_rate": 4.759695001001983e-05, + "loss": 0.2843, "step": 2011500 }, { "epoch": 1.21, - "learning_rate": 4.759503064149748e-05, - "loss": 0.3896, + "learning_rate": 4.7594850044459266e-05, + "loss": 0.2917, "step": 2012000 }, { "epoch": 1.21, - "learning_rate": 4.759293067593692e-05, - "loss": 0.3785, + "learning_rate": 4.7592750078898706e-05, + "loss": 0.2897, "step": 2012500 }, { "epoch": 1.21, - "learning_rate": 4.759083911023859e-05, - "loss": 0.3845, + "learning_rate": 4.759065011333814e-05, + "loss": 0.2903, "step": 2013000 }, { "epoch": 1.21, - "learning_rate": 4.7588739144678026e-05, - "loss": 0.3829, + "learning_rate": 4.758855014777757e-05, + "loss": 0.2857, "step": 2013500 }, { "epoch": 1.21, - "learning_rate": 4.758663917911746e-05, - "loss": 0.3828, + "learning_rate": 4.758645438214813e-05, + "loss": 0.2941, "step": 2014000 }, { "epoch": 1.21, - "learning_rate": 4.758453921355689e-05, - "loss": 0.3889, + "learning_rate": 4.758435441658757e-05, + "loss": 0.2988, "step": 2014500 }, { "epoch": 1.21, - "learning_rate": 4.758243924799633e-05, - "loss": 0.3894, + "learning_rate": 4.7582254451027e-05, + "loss": 0.2929, "step": 2015000 }, { "epoch": 1.21, - "learning_rate": 4.7580339282435766e-05, - "loss": 0.3861, + "learning_rate": 4.758015448546644e-05, + "loss": 0.2876, "step": 2015500 }, { "epoch": 1.21, - "learning_rate": 4.757824351680632e-05, - "loss": 0.3844, + "learning_rate": 4.7578054519905874e-05, + "loss": 0.2863, "step": 2016000 }, { "epoch": 1.21, - "learning_rate": 4.757614355124576e-05, - "loss": 0.3892, + "learning_rate": 4.757595875427643e-05, + "loss": 0.2929, "step": 2016500 }, { "epoch": 1.21, - "learning_rate": 4.7574043585685193e-05, - "loss": 0.3861, + "learning_rate": 4.757385878871586e-05, + "loss": 0.2873, "step": 2017000 }, { "epoch": 1.21, - "learning_rate": 4.757194362012463e-05, - "loss": 0.3928, + "learning_rate": 4.75717588231553e-05, + "loss": 0.2966, "step": 2017500 }, { "epoch": 1.21, - "learning_rate": 4.756984785449518e-05, - "loss": 0.3736, + "learning_rate": 4.7569658857594735e-05, + "loss": 0.2847, "step": 2018000 }, { "epoch": 1.21, - "learning_rate": 4.756774788893462e-05, - "loss": 0.3869, + "learning_rate": 4.756755889203417e-05, + "loss": 0.2926, "step": 2018500 }, { "epoch": 1.21, - "learning_rate": 4.7565647923374054e-05, - "loss": 0.3886, + "learning_rate": 4.756546312640472e-05, + "loss": 0.2893, "step": 2019000 }, { "epoch": 1.21, - "learning_rate": 4.7563552157744615e-05, - "loss": 0.3861, + "learning_rate": 4.756336316084416e-05, + "loss": 0.2936, "step": 2019500 }, { "epoch": 1.21, - "learning_rate": 4.756145219218405e-05, - "loss": 0.3835, + "learning_rate": 4.7561263195283595e-05, + "loss": 0.2902, "step": 2020000 }, { "epoch": 1.21, - "learning_rate": 4.755935222662348e-05, - "loss": 0.3864, + "learning_rate": 4.755916322972303e-05, + "loss": 0.2885, "step": 2020500 }, { "epoch": 1.21, - "learning_rate": 4.7557252261062915e-05, - "loss": 0.3801, + "learning_rate": 4.755706746409359e-05, + "loss": 0.2917, "step": 2021000 }, { "epoch": 1.21, - "learning_rate": 4.755515229550235e-05, - "loss": 0.3816, + "learning_rate": 4.755496749853302e-05, + "loss": 0.292, "step": 2021500 }, { "epoch": 1.21, - "learning_rate": 4.755305232994179e-05, - "loss": 0.3797, + "learning_rate": 4.7552867532972456e-05, + "loss": 0.2853, "step": 2022000 }, { "epoch": 1.21, - "learning_rate": 4.755095236438122e-05, - "loss": 0.3841, + "learning_rate": 4.7550771767343016e-05, + "loss": 0.292, "step": 2022500 }, { "epoch": 1.21, - "learning_rate": 4.7548852398820656e-05, - "loss": 0.3815, + "learning_rate": 4.754867180178245e-05, + "loss": 0.2933, "step": 2023000 }, { "epoch": 1.21, - "learning_rate": 4.7546756633191216e-05, - "loss": 0.3877, + "learning_rate": 4.754657183622188e-05, + "loss": 0.2953, "step": 2023500 }, { "epoch": 1.21, - "learning_rate": 4.754465666763065e-05, - "loss": 0.3938, + "learning_rate": 4.754447187066132e-05, + "loss": 0.2967, "step": 2024000 }, { "epoch": 1.21, - "learning_rate": 4.754255670207008e-05, - "loss": 0.3915, + "learning_rate": 4.754237190510076e-05, + "loss": 0.2924, "step": 2024500 }, { "epoch": 1.21, - "learning_rate": 4.754046093644064e-05, - "loss": 0.3897, + "learning_rate": 4.754027193954019e-05, + "loss": 0.2944, "step": 2025000 }, { "epoch": 1.21, - "learning_rate": 4.753836097088008e-05, - "loss": 0.3855, + "learning_rate": 4.7538171973979624e-05, + "loss": 0.2894, "step": 2025500 }, { "epoch": 1.21, - "learning_rate": 4.753626100531951e-05, - "loss": 0.378, + "learning_rate": 4.7536072008419064e-05, + "loss": 0.2885, "step": 2026000 }, { "epoch": 1.21, - "learning_rate": 4.7534161039758944e-05, - "loss": 0.3861, + "learning_rate": 4.75339720428585e-05, + "loss": 0.2927, "step": 2026500 }, { "epoch": 1.22, - "learning_rate": 4.7532061074198384e-05, - "loss": 0.3895, + "learning_rate": 4.753187207729793e-05, + "loss": 0.2981, "step": 2027000 }, { "epoch": 1.22, - "learning_rate": 4.752996110863782e-05, - "loss": 0.3771, + "learning_rate": 4.752977211173737e-05, + "loss": 0.285, "step": 2027500 }, { "epoch": 1.22, - "learning_rate": 4.752786114307725e-05, - "loss": 0.3743, + "learning_rate": 4.7527672146176805e-05, + "loss": 0.2874, "step": 2028000 }, { "epoch": 1.22, - "learning_rate": 4.752576117751669e-05, - "loss": 0.3753, + "learning_rate": 4.752557218061623e-05, + "loss": 0.2878, "step": 2028500 }, { "epoch": 1.22, - "learning_rate": 4.7523661211956124e-05, - "loss": 0.3781, + "learning_rate": 4.752347221505567e-05, + "loss": 0.2867, "step": 2029000 }, { "epoch": 1.22, - "learning_rate": 4.752156124639556e-05, - "loss": 0.381, + "learning_rate": 4.7521372249495105e-05, + "loss": 0.2823, "step": 2029500 }, { "epoch": 1.22, - "learning_rate": 4.7519461280835e-05, - "loss": 0.3912, + "learning_rate": 4.7519276483865666e-05, + "loss": 0.2955, "step": 2030000 }, { "epoch": 1.22, - "learning_rate": 4.751736551520555e-05, - "loss": 0.3873, + "learning_rate": 4.75171765183051e-05, + "loss": 0.2963, "step": 2030500 }, { "epoch": 1.22, - "learning_rate": 4.7515265549644985e-05, - "loss": 0.396, + "learning_rate": 4.751507655274453e-05, + "loss": 0.2935, "step": 2031000 }, { "epoch": 1.22, - "learning_rate": 4.751316558408442e-05, - "loss": 0.39, + "learning_rate": 4.751298078711509e-05, + "loss": 0.2988, "step": 2031500 }, { "epoch": 1.22, - "learning_rate": 4.751106561852386e-05, - "loss": 0.3745, + "learning_rate": 4.7510880821554526e-05, + "loss": 0.2833, "step": 2032000 }, { "epoch": 1.22, - "learning_rate": 4.750896565296329e-05, - "loss": 0.3771, + "learning_rate": 4.7508780855993966e-05, + "loss": 0.2845, "step": 2032500 }, { "epoch": 1.22, - "learning_rate": 4.7506865687402726e-05, - "loss": 0.3815, + "learning_rate": 4.750668089043339e-05, + "loss": 0.2861, "step": 2033000 }, { "epoch": 1.22, - "learning_rate": 4.750476572184216e-05, - "loss": 0.3797, + "learning_rate": 4.750458092487283e-05, + "loss": 0.283, "step": 2033500 }, { "epoch": 1.22, - "learning_rate": 4.750266575628159e-05, - "loss": 0.3771, + "learning_rate": 4.750248095931227e-05, + "loss": 0.2891, "step": 2034000 }, { "epoch": 1.22, - "learning_rate": 4.750056999065215e-05, - "loss": 0.3864, + "learning_rate": 4.750038519368283e-05, + "loss": 0.29, "step": 2034500 }, { "epoch": 1.22, - "learning_rate": 4.749847002509159e-05, - "loss": 0.3828, + "learning_rate": 4.749828522812226e-05, + "loss": 0.2865, "step": 2035000 }, { "epoch": 1.22, - "learning_rate": 4.749637425946215e-05, - "loss": 0.3857, + "learning_rate": 4.749618526256169e-05, + "loss": 0.2877, "step": 2035500 }, { "epoch": 1.22, - "learning_rate": 4.749427429390158e-05, - "loss": 0.3892, + "learning_rate": 4.749408529700113e-05, + "loss": 0.289, "step": 2036000 }, { "epoch": 1.22, - "learning_rate": 4.7492174328341014e-05, - "loss": 0.3812, + "learning_rate": 4.749198533144056e-05, + "loss": 0.2925, "step": 2036500 }, { "epoch": 1.22, - "learning_rate": 4.7490074362780454e-05, - "loss": 0.3858, + "learning_rate": 4.7489885365879995e-05, + "loss": 0.2957, "step": 2037000 }, { "epoch": 1.22, - "learning_rate": 4.748797859715101e-05, - "loss": 0.3798, + "learning_rate": 4.7487785400319435e-05, + "loss": 0.2851, "step": 2037500 }, { "epoch": 1.22, - "learning_rate": 4.748587863159044e-05, - "loss": 0.3865, + "learning_rate": 4.748568543475887e-05, + "loss": 0.2887, "step": 2038000 }, { "epoch": 1.22, - "learning_rate": 4.7483778666029874e-05, - "loss": 0.3879, + "learning_rate": 4.74835854691983e-05, + "loss": 0.2883, "step": 2038500 }, { "epoch": 1.22, - "learning_rate": 4.7481678700469315e-05, - "loss": 0.3907, + "learning_rate": 4.748149390349998e-05, + "loss": 0.2902, "step": 2039000 }, { "epoch": 1.22, - "learning_rate": 4.747958293483987e-05, - "loss": 0.3838, + "learning_rate": 4.747939393793942e-05, + "loss": 0.2929, "step": 2039500 }, { "epoch": 1.22, - "learning_rate": 4.74774829692793e-05, - "loss": 0.3731, + "learning_rate": 4.7477293972378856e-05, + "loss": 0.2865, "step": 2040000 }, { "epoch": 1.22, - "learning_rate": 4.747538300371874e-05, - "loss": 0.3857, + "learning_rate": 4.747519400681828e-05, + "loss": 0.2915, "step": 2040500 }, { "epoch": 1.22, - "learning_rate": 4.7473283038158175e-05, - "loss": 0.379, + "learning_rate": 4.747309404125772e-05, + "loss": 0.2858, "step": 2041000 }, { "epoch": 1.22, - "learning_rate": 4.747118307259761e-05, - "loss": 0.3827, + "learning_rate": 4.7470994075697156e-05, + "loss": 0.2921, "step": 2041500 }, { "epoch": 1.22, - "learning_rate": 4.746908310703705e-05, - "loss": 0.3782, + "learning_rate": 4.746889411013659e-05, + "loss": 0.2885, "step": 2042000 }, { "epoch": 1.22, - "learning_rate": 4.7466983141476476e-05, - "loss": 0.3816, + "learning_rate": 4.746679414457603e-05, + "loss": 0.2908, "step": 2042500 }, { "epoch": 1.22, - "learning_rate": 4.746488317591591e-05, - "loss": 0.3947, + "learning_rate": 4.746469417901546e-05, + "loss": 0.291, "step": 2043000 }, { "epoch": 1.23, - "learning_rate": 4.746278321035535e-05, - "loss": 0.3789, + "learning_rate": 4.746259841338602e-05, + "loss": 0.2931, "step": 2043500 }, { "epoch": 1.23, - "learning_rate": 4.746068324479478e-05, - "loss": 0.3891, + "learning_rate": 4.746049844782545e-05, + "loss": 0.2967, "step": 2044000 }, { "epoch": 1.23, - "learning_rate": 4.7458583279234216e-05, - "loss": 0.3894, + "learning_rate": 4.745839848226489e-05, + "loss": 0.2864, "step": 2044500 }, { "epoch": 1.23, - "learning_rate": 4.745648331367366e-05, - "loss": 0.3804, + "learning_rate": 4.7456298516704324e-05, + "loss": 0.2896, "step": 2045000 }, { "epoch": 1.23, - "learning_rate": 4.745438754804421e-05, - "loss": 0.3752, + "learning_rate": 4.745420275107488e-05, + "loss": 0.2899, "step": 2045500 }, { "epoch": 1.23, - "learning_rate": 4.7452287582483644e-05, - "loss": 0.3869, + "learning_rate": 4.745210278551432e-05, + "loss": 0.2894, "step": 2046000 }, { "epoch": 1.23, - "learning_rate": 4.745018761692308e-05, - "loss": 0.3875, + "learning_rate": 4.745000281995375e-05, + "loss": 0.2905, "step": 2046500 }, { "epoch": 1.23, - "learning_rate": 4.744808765136252e-05, - "loss": 0.3801, + "learning_rate": 4.7447902854393185e-05, + "loss": 0.2849, "step": 2047000 }, { "epoch": 1.23, - "learning_rate": 4.744599188573307e-05, - "loss": 0.3842, + "learning_rate": 4.7445802888832625e-05, + "loss": 0.2878, "step": 2047500 }, { "epoch": 1.23, - "learning_rate": 4.7443891920172504e-05, - "loss": 0.3797, + "learning_rate": 4.744370292327206e-05, + "loss": 0.2885, "step": 2048000 }, { "epoch": 1.23, - "learning_rate": 4.7441791954611945e-05, - "loss": 0.3749, + "learning_rate": 4.744160295771149e-05, + "loss": 0.2827, "step": 2048500 }, { "epoch": 1.23, - "learning_rate": 4.743969198905138e-05, - "loss": 0.3782, + "learning_rate": 4.7439507192082046e-05, + "loss": 0.2891, "step": 2049000 }, { "epoch": 1.23, - "learning_rate": 4.743759622342194e-05, - "loss": 0.3748, + "learning_rate": 4.7437407226521486e-05, + "loss": 0.2826, "step": 2049500 }, { "epoch": 1.23, - "learning_rate": 4.7435496257861365e-05, - "loss": 0.3911, + "learning_rate": 4.743531146089204e-05, + "loss": 0.287, "step": 2050000 }, { "epoch": 1.23, - "learning_rate": 4.7433396292300805e-05, - "loss": 0.3959, + "learning_rate": 4.743321149533147e-05, + "loss": 0.296, "step": 2050500 }, { "epoch": 1.23, - "learning_rate": 4.7431300526671366e-05, - "loss": 0.3914, + "learning_rate": 4.7431111529770906e-05, + "loss": 0.291, "step": 2051000 }, { "epoch": 1.23, - "learning_rate": 4.74292005611108e-05, - "loss": 0.3817, + "learning_rate": 4.7429011564210346e-05, + "loss": 0.2902, "step": 2051500 }, { "epoch": 1.23, - "learning_rate": 4.742710059555023e-05, - "loss": 0.387, + "learning_rate": 4.742691159864978e-05, + "loss": 0.2896, "step": 2052000 }, { "epoch": 1.23, - "learning_rate": 4.7425000629989666e-05, - "loss": 0.3771, + "learning_rate": 4.742481163308922e-05, + "loss": 0.2848, "step": 2052500 }, { "epoch": 1.23, - "learning_rate": 4.74229006644291e-05, - "loss": 0.3796, + "learning_rate": 4.7422711667528654e-05, + "loss": 0.2904, "step": 2053000 }, { "epoch": 1.23, - "learning_rate": 4.742080069886853e-05, - "loss": 0.3805, + "learning_rate": 4.742061170196809e-05, + "loss": 0.2874, "step": 2053500 }, { "epoch": 1.23, - "learning_rate": 4.74187049332391e-05, - "loss": 0.3957, + "learning_rate": 4.741851593633864e-05, + "loss": 0.2971, "step": 2054000 }, { "epoch": 1.23, - "learning_rate": 4.741660496767853e-05, - "loss": 0.3787, + "learning_rate": 4.7416420170709194e-05, + "loss": 0.2912, "step": 2054500 }, { "epoch": 1.23, - "learning_rate": 4.741450500211796e-05, - "loss": 0.3796, + "learning_rate": 4.7414320205148634e-05, + "loss": 0.2942, "step": 2055000 }, { "epoch": 1.23, - "learning_rate": 4.74124050365574e-05, - "loss": 0.3905, + "learning_rate": 4.741222023958807e-05, + "loss": 0.2954, "step": 2055500 }, { "epoch": 1.23, - "learning_rate": 4.741030927092796e-05, - "loss": 0.3822, + "learning_rate": 4.74101202740275e-05, + "loss": 0.2937, "step": 2056000 }, { "epoch": 1.23, - "learning_rate": 4.7408209305367394e-05, - "loss": 0.3777, + "learning_rate": 4.740802030846694e-05, + "loss": 0.2837, "step": 2056500 }, { "epoch": 1.23, - "learning_rate": 4.740610933980682e-05, - "loss": 0.389, + "learning_rate": 4.7405920342906375e-05, + "loss": 0.2897, "step": 2057000 }, { "epoch": 1.23, - "learning_rate": 4.740400937424626e-05, - "loss": 0.3783, + "learning_rate": 4.740382037734581e-05, + "loss": 0.2859, "step": 2057500 }, { "epoch": 1.23, - "learning_rate": 4.7401909408685695e-05, - "loss": 0.3916, + "learning_rate": 4.740172461171636e-05, + "loss": 0.29, "step": 2058000 }, { "epoch": 1.23, - "learning_rate": 4.739980944312513e-05, - "loss": 0.3771, + "learning_rate": 4.73996246461558e-05, + "loss": 0.2898, "step": 2058500 }, { "epoch": 1.23, - "learning_rate": 4.739771367749569e-05, - "loss": 0.3803, + "learning_rate": 4.739752888052636e-05, + "loss": 0.2893, "step": 2059000 }, { "epoch": 1.23, - "learning_rate": 4.739561371193512e-05, - "loss": 0.3826, + "learning_rate": 4.739542891496579e-05, + "loss": 0.2889, "step": 2059500 }, { "epoch": 1.24, - "learning_rate": 4.7393513746374555e-05, - "loss": 0.3927, + "learning_rate": 4.739332894940523e-05, + "loss": 0.2964, "step": 2060000 }, { "epoch": 1.24, - "learning_rate": 4.7391413780813996e-05, - "loss": 0.3802, + "learning_rate": 4.739122898384466e-05, + "loss": 0.2908, "step": 2060500 }, { "epoch": 1.24, - "learning_rate": 4.738931381525343e-05, - "loss": 0.3782, + "learning_rate": 4.7389129018284097e-05, + "loss": 0.2828, "step": 2061000 }, { "epoch": 1.24, - "learning_rate": 4.738721804962399e-05, - "loss": 0.3888, + "learning_rate": 4.738702905272354e-05, + "loss": 0.286, "step": 2061500 }, { "epoch": 1.24, - "learning_rate": 4.7385118084063416e-05, - "loss": 0.3744, + "learning_rate": 4.738492908716297e-05, + "loss": 0.286, "step": 2062000 }, { "epoch": 1.24, - "learning_rate": 4.7383018118502856e-05, - "loss": 0.3852, + "learning_rate": 4.7382829121602404e-05, + "loss": 0.2839, "step": 2062500 }, { "epoch": 1.24, - "learning_rate": 4.738091815294229e-05, - "loss": 0.3824, + "learning_rate": 4.738073335597296e-05, + "loss": 0.2901, "step": 2063000 }, { "epoch": 1.24, - "learning_rate": 4.737881818738172e-05, - "loss": 0.3891, + "learning_rate": 4.73786333904124e-05, + "loss": 0.2936, "step": 2063500 }, { "epoch": 1.24, - "learning_rate": 4.7376718221821164e-05, - "loss": 0.3875, + "learning_rate": 4.737653342485183e-05, + "loss": 0.2871, "step": 2064000 }, { "epoch": 1.24, - "learning_rate": 4.73746182562606e-05, - "loss": 0.3731, + "learning_rate": 4.7374433459291264e-05, + "loss": 0.291, "step": 2064500 }, { "epoch": 1.24, - "learning_rate": 4.737252249063115e-05, - "loss": 0.3935, + "learning_rate": 4.7372333493730705e-05, + "loss": 0.2962, "step": 2065000 }, { "epoch": 1.24, - "learning_rate": 4.7370422525070584e-05, - "loss": 0.3867, + "learning_rate": 4.737023352817014e-05, + "loss": 0.2929, "step": 2065500 }, { "epoch": 1.24, - "learning_rate": 4.7368322559510024e-05, - "loss": 0.3908, + "learning_rate": 4.736813356260957e-05, + "loss": 0.2923, "step": 2066000 }, { "epoch": 1.24, - "learning_rate": 4.736622259394946e-05, - "loss": 0.3786, + "learning_rate": 4.736603359704901e-05, + "loss": 0.2874, "step": 2066500 }, { "epoch": 1.24, - "learning_rate": 4.736412262838889e-05, - "loss": 0.3792, + "learning_rate": 4.736393363148844e-05, + "loss": 0.2895, "step": 2067000 }, { "epoch": 1.24, - "learning_rate": 4.736202686275945e-05, - "loss": 0.384, + "learning_rate": 4.736183366592788e-05, + "loss": 0.2912, "step": 2067500 }, { "epoch": 1.24, - "learning_rate": 4.7359926897198885e-05, - "loss": 0.3803, + "learning_rate": 4.735973370036731e-05, + "loss": 0.2893, "step": 2068000 }, { "epoch": 1.24, - "learning_rate": 4.735782693163832e-05, - "loss": 0.384, + "learning_rate": 4.7357633734806746e-05, + "loss": 0.2886, "step": 2068500 }, { "epoch": 1.24, - "learning_rate": 4.735572696607776e-05, - "loss": 0.3814, + "learning_rate": 4.7355537969177306e-05, + "loss": 0.2868, "step": 2069000 }, { "epoch": 1.24, - "learning_rate": 4.735362700051719e-05, - "loss": 0.3759, + "learning_rate": 4.735343800361674e-05, + "loss": 0.2899, "step": 2069500 }, { "epoch": 1.24, - "learning_rate": 4.7351527034956626e-05, - "loss": 0.393, + "learning_rate": 4.735133803805617e-05, + "loss": 0.2926, "step": 2070000 }, { "epoch": 1.24, - "learning_rate": 4.7349427069396066e-05, - "loss": 0.3773, + "learning_rate": 4.7349238072495606e-05, + "loss": 0.2847, "step": 2070500 }, { "epoch": 1.24, - "learning_rate": 4.734733130376662e-05, - "loss": 0.3841, + "learning_rate": 4.734713810693505e-05, + "loss": 0.2948, "step": 2071000 }, { "epoch": 1.24, - "learning_rate": 4.734523133820605e-05, - "loss": 0.3815, + "learning_rate": 4.734504234130561e-05, + "loss": 0.2889, "step": 2071500 }, { "epoch": 1.24, - "learning_rate": 4.7343131372645486e-05, - "loss": 0.382, + "learning_rate": 4.7342942375745034e-05, + "loss": 0.2858, "step": 2072000 }, { "epoch": 1.24, - "learning_rate": 4.7341031407084927e-05, - "loss": 0.3921, + "learning_rate": 4.734084241018447e-05, + "loss": 0.2901, "step": 2072500 }, { "epoch": 1.24, - "learning_rate": 4.733893564145548e-05, - "loss": 0.3723, + "learning_rate": 4.733874244462391e-05, + "loss": 0.2873, "step": 2073000 }, { "epoch": 1.24, - "learning_rate": 4.7336835675894914e-05, - "loss": 0.3785, + "learning_rate": 4.733664247906334e-05, + "loss": 0.2866, "step": 2073500 }, { "epoch": 1.24, - "learning_rate": 4.733473571033435e-05, - "loss": 0.3876, + "learning_rate": 4.73345467134339e-05, + "loss": 0.2844, "step": 2074000 }, { "epoch": 1.24, - "learning_rate": 4.733263574477379e-05, - "loss": 0.3742, + "learning_rate": 4.7332446747873335e-05, + "loss": 0.2854, "step": 2074500 }, { "epoch": 1.24, - "learning_rate": 4.733053577921322e-05, - "loss": 0.3832, + "learning_rate": 4.7330350982243895e-05, + "loss": 0.2919, "step": 2075000 }, { "epoch": 1.24, - "learning_rate": 4.7328435813652654e-05, - "loss": 0.3768, + "learning_rate": 4.732825101668333e-05, + "loss": 0.2856, "step": 2075500 }, { "epoch": 1.24, - "learning_rate": 4.7326340048023215e-05, - "loss": 0.3881, + "learning_rate": 4.732615105112276e-05, + "loss": 0.2967, "step": 2076000 }, { "epoch": 1.24, - "learning_rate": 4.732424008246265e-05, - "loss": 0.3813, + "learning_rate": 4.73240510855622e-05, + "loss": 0.2859, "step": 2076500 }, { "epoch": 1.25, - "learning_rate": 4.732214011690208e-05, - "loss": 0.3796, + "learning_rate": 4.732195112000163e-05, + "loss": 0.287, "step": 2077000 }, { "epoch": 1.25, - "learning_rate": 4.732004015134152e-05, - "loss": 0.3862, + "learning_rate": 4.731985115444106e-05, + "loss": 0.2951, "step": 2077500 }, { "epoch": 1.25, - "learning_rate": 4.7317940185780955e-05, - "loss": 0.3836, + "learning_rate": 4.73177511888805e-05, + "loss": 0.2888, "step": 2078000 }, { "epoch": 1.25, - "learning_rate": 4.731584022022039e-05, - "loss": 0.3797, + "learning_rate": 4.7315651223319936e-05, + "loss": 0.29, "step": 2078500 }, { "epoch": 1.25, - "learning_rate": 4.731374025465982e-05, - "loss": 0.3862, + "learning_rate": 4.731355125775937e-05, + "loss": 0.2904, "step": 2079000 }, { "epoch": 1.25, - "learning_rate": 4.731164448903038e-05, - "loss": 0.3836, + "learning_rate": 4.731145549212992e-05, + "loss": 0.293, "step": 2079500 }, { "epoch": 1.25, - "learning_rate": 4.7309548723400936e-05, - "loss": 0.3851, + "learning_rate": 4.730935552656936e-05, + "loss": 0.2866, "step": 2080000 }, { "epoch": 1.25, - "learning_rate": 4.730744875784037e-05, - "loss": 0.3761, + "learning_rate": 4.73072555610088e-05, + "loss": 0.2847, "step": 2080500 }, { "epoch": 1.25, - "learning_rate": 4.73053487922798e-05, - "loss": 0.3813, + "learning_rate": 4.730515979537936e-05, + "loss": 0.2854, "step": 2081000 }, { "epoch": 1.25, - "learning_rate": 4.730324882671924e-05, - "loss": 0.3886, + "learning_rate": 4.730305982981879e-05, + "loss": 0.2918, "step": 2081500 }, { "epoch": 1.25, - "learning_rate": 4.730114886115868e-05, - "loss": 0.3732, + "learning_rate": 4.7300959864258224e-05, + "loss": 0.2852, "step": 2082000 }, { "epoch": 1.25, - "learning_rate": 4.729904889559811e-05, - "loss": 0.3794, + "learning_rate": 4.729885989869766e-05, + "loss": 0.2858, "step": 2082500 }, { "epoch": 1.25, - "learning_rate": 4.729694893003755e-05, - "loss": 0.3728, + "learning_rate": 4.72967599331371e-05, + "loss": 0.2881, "step": 2083000 }, { "epoch": 1.25, - "learning_rate": 4.7294848964476984e-05, - "loss": 0.3782, + "learning_rate": 4.729465996757653e-05, + "loss": 0.2913, "step": 2083500 }, { "epoch": 1.25, - "learning_rate": 4.729274899891642e-05, - "loss": 0.369, + "learning_rate": 4.7292560002015965e-05, + "loss": 0.2846, "step": 2084000 }, { "epoch": 1.25, - "learning_rate": 4.729064903335585e-05, - "loss": 0.3723, + "learning_rate": 4.729046423638652e-05, + "loss": 0.2848, "step": 2084500 }, { "epoch": 1.25, - "learning_rate": 4.7288549067795284e-05, - "loss": 0.3769, + "learning_rate": 4.728836427082596e-05, + "loss": 0.2851, "step": 2085000 }, { "epoch": 1.25, - "learning_rate": 4.7286449102234724e-05, - "loss": 0.3806, + "learning_rate": 4.728626430526539e-05, + "loss": 0.2829, "step": 2085500 }, { "epoch": 1.25, - "learning_rate": 4.72843575365364e-05, - "loss": 0.3853, + "learning_rate": 4.7284164339704825e-05, + "loss": 0.2904, "step": 2086000 }, { "epoch": 1.25, - "learning_rate": 4.728225757097584e-05, - "loss": 0.3811, + "learning_rate": 4.7282064374144266e-05, + "loss": 0.2836, "step": 2086500 }, { "epoch": 1.25, - "learning_rate": 4.728015760541527e-05, - "loss": 0.3834, + "learning_rate": 4.72799644085837e-05, + "loss": 0.2887, "step": 2087000 }, { "epoch": 1.25, - "learning_rate": 4.7278057639854705e-05, - "loss": 0.3783, + "learning_rate": 4.727786444302313e-05, + "loss": 0.2851, "step": 2087500 }, { "epoch": 1.25, - "learning_rate": 4.7275957674294145e-05, - "loss": 0.3758, + "learning_rate": 4.7275768677393686e-05, + "loss": 0.285, "step": 2088000 }, { "epoch": 1.25, - "learning_rate": 4.72738619086647e-05, - "loss": 0.3773, + "learning_rate": 4.7273668711833126e-05, + "loss": 0.2879, "step": 2088500 }, { "epoch": 1.25, - "learning_rate": 4.727176194310413e-05, - "loss": 0.3883, + "learning_rate": 4.727156874627256e-05, + "loss": 0.2941, "step": 2089000 }, { "epoch": 1.25, - "learning_rate": 4.7269661977543566e-05, - "loss": 0.3796, + "learning_rate": 4.726947298064311e-05, + "loss": 0.2894, "step": 2089500 }, { "epoch": 1.25, - "learning_rate": 4.7267562011983006e-05, - "loss": 0.3931, + "learning_rate": 4.7267373015082554e-05, + "loss": 0.289, "step": 2090000 }, { "epoch": 1.25, - "learning_rate": 4.726546204642244e-05, - "loss": 0.3785, + "learning_rate": 4.726527304952199e-05, + "loss": 0.2852, "step": 2090500 }, { "epoch": 1.25, - "learning_rate": 4.726336208086187e-05, - "loss": 0.3818, + "learning_rate": 4.726317308396142e-05, + "loss": 0.287, "step": 2091000 }, { "epoch": 1.25, - "learning_rate": 4.7261266315232433e-05, - "loss": 0.3911, + "learning_rate": 4.726107311840086e-05, + "loss": 0.2898, "step": 2091500 }, { "epoch": 1.25, - "learning_rate": 4.725916634967187e-05, - "loss": 0.3776, + "learning_rate": 4.7258973152840294e-05, + "loss": 0.2872, "step": 2092000 }, { "epoch": 1.25, - "learning_rate": 4.72570663841113e-05, - "loss": 0.3892, + "learning_rate": 4.725687318727973e-05, + "loss": 0.2911, "step": 2092500 }, { "epoch": 1.25, - "learning_rate": 4.725496641855074e-05, - "loss": 0.3819, + "learning_rate": 4.725477322171917e-05, + "loss": 0.2889, "step": 2093000 }, { "epoch": 1.26, - "learning_rate": 4.7252870652921294e-05, - "loss": 0.3859, + "learning_rate": 4.72526732561586e-05, + "loss": 0.2893, "step": 2093500 }, { "epoch": 1.26, - "learning_rate": 4.725077068736073e-05, - "loss": 0.3729, + "learning_rate": 4.725057329059803e-05, + "loss": 0.2866, "step": 2094000 }, { "epoch": 1.26, - "learning_rate": 4.724867072180016e-05, - "loss": 0.372, + "learning_rate": 4.724847332503747e-05, + "loss": 0.284, "step": 2094500 }, { "epoch": 1.26, - "learning_rate": 4.72465707562396e-05, - "loss": 0.3773, + "learning_rate": 4.72463733594769e-05, + "loss": 0.2836, "step": 2095000 }, { "epoch": 1.26, - "learning_rate": 4.7244470790679035e-05, - "loss": 0.3883, + "learning_rate": 4.724427759384746e-05, + "loss": 0.2921, "step": 2095500 }, { "epoch": 1.26, - "learning_rate": 4.724237502504959e-05, - "loss": 0.3827, + "learning_rate": 4.7242177628286895e-05, + "loss": 0.2875, "step": 2096000 }, { "epoch": 1.26, - "learning_rate": 4.724027925942014e-05, - "loss": 0.3825, + "learning_rate": 4.724007766272633e-05, + "loss": 0.289, "step": 2096500 }, { "epoch": 1.26, - "learning_rate": 4.72381834937907e-05, - "loss": 0.3795, + "learning_rate": 4.723797769716576e-05, + "loss": 0.2909, "step": 2097000 }, { "epoch": 1.26, - "learning_rate": 4.7236083528230136e-05, - "loss": 0.3823, + "learning_rate": 4.7235877731605196e-05, + "loss": 0.2933, "step": 2097500 }, { "epoch": 1.26, - "learning_rate": 4.723398356266957e-05, - "loss": 0.379, + "learning_rate": 4.7233777766044636e-05, + "loss": 0.2882, "step": 2098000 }, { "epoch": 1.26, - "learning_rate": 4.7231883597109e-05, - "loss": 0.3815, + "learning_rate": 4.723167780048407e-05, + "loss": 0.2866, "step": 2098500 }, { "epoch": 1.26, - "learning_rate": 4.722978363154844e-05, - "loss": 0.3753, + "learning_rate": 4.722957783492351e-05, + "loss": 0.2836, "step": 2099000 }, { "epoch": 1.26, - "learning_rate": 4.7227683665987876e-05, - "loss": 0.3782, + "learning_rate": 4.722747786936294e-05, + "loss": 0.2872, "step": 2099500 }, { "epoch": 1.26, - "learning_rate": 4.722558370042731e-05, - "loss": 0.3766, + "learning_rate": 4.72253821037335e-05, + "loss": 0.2839, "step": 2100000 }, { "epoch": 1.26, - "eval_loss": 0.3669387102127075, - "eval_runtime": 1119.9259, - "eval_samples_per_second": 470.317, - "eval_steps_per_second": 78.386, + "eval_loss": 0.262479692697525, + "eval_runtime": 1461.3082, + "eval_samples_per_second": 360.444, + "eval_steps_per_second": 60.074, "step": 2100000 }, { "epoch": 1.26, - "learning_rate": 4.722348373486675e-05, - "loss": 0.3878, + "learning_rate": 4.722328213817293e-05, + "loss": 0.2908, "step": 2100500 }, { "epoch": 1.26, - "learning_rate": 4.7221383769306184e-05, - "loss": 0.3764, + "learning_rate": 4.722118217261237e-05, + "loss": 0.2864, "step": 2101000 }, { "epoch": 1.26, - "learning_rate": 4.721928380374562e-05, - "loss": 0.3789, + "learning_rate": 4.7219082207051804e-05, + "loss": 0.2912, "step": 2101500 }, { "epoch": 1.26, - "learning_rate": 4.721718383818506e-05, - "loss": 0.3707, + "learning_rate": 4.721698224149124e-05, + "loss": 0.2847, "step": 2102000 }, { "epoch": 1.26, - "learning_rate": 4.721508387262449e-05, - "loss": 0.3784, + "learning_rate": 4.721488647586179e-05, + "loss": 0.2832, "step": 2102500 }, { "epoch": 1.26, - "learning_rate": 4.721298390706392e-05, - "loss": 0.3842, + "learning_rate": 4.721278651030123e-05, + "loss": 0.2911, "step": 2103000 }, { "epoch": 1.26, - "learning_rate": 4.721088394150336e-05, - "loss": 0.3787, + "learning_rate": 4.7210686544740665e-05, + "loss": 0.2834, "step": 2103500 }, { "epoch": 1.26, - "learning_rate": 4.720878397594279e-05, - "loss": 0.3857, + "learning_rate": 4.72085865791801e-05, + "loss": 0.2924, "step": 2104000 }, { "epoch": 1.26, - "learning_rate": 4.720668821031335e-05, - "loss": 0.3738, + "learning_rate": 4.720648661361954e-05, + "loss": 0.285, "step": 2104500 }, { "epoch": 1.26, - "learning_rate": 4.7204588244752785e-05, - "loss": 0.3862, + "learning_rate": 4.720439084799009e-05, + "loss": 0.2949, "step": 2105000 }, { "epoch": 1.26, - "learning_rate": 4.720248827919222e-05, - "loss": 0.3874, + "learning_rate": 4.720229508236065e-05, + "loss": 0.2913, "step": 2105500 }, { "epoch": 1.26, - "learning_rate": 4.720038831363165e-05, - "loss": 0.37, + "learning_rate": 4.7200199316731206e-05, + "loss": 0.2911, "step": 2106000 }, { "epoch": 1.26, - "learning_rate": 4.719828834807109e-05, - "loss": 0.3783, + "learning_rate": 4.719809935117064e-05, + "loss": 0.2898, "step": 2106500 }, { "epoch": 1.26, - "learning_rate": 4.7196188382510526e-05, - "loss": 0.3715, + "learning_rate": 4.719599938561008e-05, + "loss": 0.2804, "step": 2107000 }, { "epoch": 1.26, - "learning_rate": 4.7194092616881086e-05, - "loss": 0.382, + "learning_rate": 4.719389942004951e-05, + "loss": 0.2855, "step": 2107500 }, { "epoch": 1.26, - "learning_rate": 4.719199265132051e-05, - "loss": 0.3834, + "learning_rate": 4.719180365442007e-05, + "loss": 0.2901, "step": 2108000 }, { "epoch": 1.26, - "learning_rate": 4.718989268575995e-05, - "loss": 0.3789, + "learning_rate": 4.71897036888595e-05, + "loss": 0.2806, "step": 2108500 }, { "epoch": 1.26, - "learning_rate": 4.7187792720199386e-05, - "loss": 0.3859, + "learning_rate": 4.718760372329894e-05, + "loss": 0.2884, "step": 2109000 }, { "epoch": 1.26, - "learning_rate": 4.718569275463882e-05, - "loss": 0.375, + "learning_rate": 4.7185503757738374e-05, + "loss": 0.2821, "step": 2109500 }, { "epoch": 1.27, - "learning_rate": 4.718359278907826e-05, - "loss": 0.3805, + "learning_rate": 4.718340379217781e-05, + "loss": 0.2918, "step": 2110000 }, { "epoch": 1.27, - "learning_rate": 4.7181492823517693e-05, - "loss": 0.3826, + "learning_rate": 4.718130382661725e-05, + "loss": 0.2849, "step": 2110500 }, { "epoch": 1.27, - "learning_rate": 4.717939705788825e-05, - "loss": 0.3797, + "learning_rate": 4.7179203861056674e-05, + "loss": 0.2892, "step": 2111000 }, { "epoch": 1.27, - "learning_rate": 4.717729709232768e-05, - "loss": 0.3766, + "learning_rate": 4.7177108095427235e-05, + "loss": 0.2847, "step": 2111500 }, { "epoch": 1.27, - "learning_rate": 4.717519712676712e-05, - "loss": 0.3786, + "learning_rate": 4.7175008129866675e-05, + "loss": 0.2837, "step": 2112000 }, { "epoch": 1.27, - "learning_rate": 4.7173097161206554e-05, - "loss": 0.3778, + "learning_rate": 4.717290816430611e-05, + "loss": 0.2866, "step": 2112500 }, { "epoch": 1.27, - "learning_rate": 4.7170997195645994e-05, - "loss": 0.3892, + "learning_rate": 4.717080819874554e-05, + "loss": 0.2896, "step": 2113000 }, { "epoch": 1.27, - "learning_rate": 4.716889723008543e-05, - "loss": 0.3796, + "learning_rate": 4.7168708233184975e-05, + "loss": 0.2888, "step": 2113500 }, { "epoch": 1.27, - "learning_rate": 4.716679726452486e-05, - "loss": 0.3807, + "learning_rate": 4.716660826762441e-05, + "loss": 0.2881, "step": 2114000 }, { "epoch": 1.27, - "learning_rate": 4.71646972989643e-05, - "loss": 0.3772, + "learning_rate": 4.716450830206384e-05, + "loss": 0.2846, "step": 2114500 }, { "epoch": 1.27, - "learning_rate": 4.7162601533334855e-05, - "loss": 0.3813, + "learning_rate": 4.716240833650328e-05, + "loss": 0.2886, "step": 2115000 }, { "epoch": 1.27, - "learning_rate": 4.716050156777429e-05, - "loss": 0.3803, + "learning_rate": 4.7160312570873836e-05, + "loss": 0.2848, "step": 2115500 }, { "epoch": 1.27, - "learning_rate": 4.715840160221372e-05, - "loss": 0.3749, + "learning_rate": 4.715821260531327e-05, + "loss": 0.2855, "step": 2116000 }, { "epoch": 1.27, - "learning_rate": 4.715630163665316e-05, - "loss": 0.3999, + "learning_rate": 4.71561126397527e-05, + "loss": 0.2971, "step": 2116500 }, { "epoch": 1.27, - "learning_rate": 4.7154210070954836e-05, - "loss": 0.3802, + "learning_rate": 4.715401267419214e-05, + "loss": 0.294, "step": 2117000 }, { "epoch": 1.27, - "learning_rate": 4.715211010539427e-05, - "loss": 0.371, + "learning_rate": 4.7151912708631576e-05, + "loss": 0.288, "step": 2117500 }, { "epoch": 1.27, - "learning_rate": 4.71500101398337e-05, - "loss": 0.3874, + "learning_rate": 4.714981274307101e-05, + "loss": 0.2898, "step": 2118000 }, { "epoch": 1.27, - "learning_rate": 4.7147910174273136e-05, - "loss": 0.3781, + "learning_rate": 4.714771697744157e-05, + "loss": 0.2862, "step": 2118500 }, { "epoch": 1.27, - "learning_rate": 4.7145814408643703e-05, - "loss": 0.3797, + "learning_rate": 4.7145617011881004e-05, + "loss": 0.2874, "step": 2119000 }, { "epoch": 1.27, - "learning_rate": 4.714371444308314e-05, - "loss": 0.3935, + "learning_rate": 4.714351704632044e-05, + "loss": 0.2944, "step": 2119500 }, { "epoch": 1.27, - "learning_rate": 4.7141614477522564e-05, - "loss": 0.3798, + "learning_rate": 4.714141708075988e-05, + "loss": 0.2836, "step": 2120000 }, { "epoch": 1.27, - "learning_rate": 4.7139514511962004e-05, - "loss": 0.3903, + "learning_rate": 4.713931711519931e-05, + "loss": 0.2885, "step": 2120500 }, { "epoch": 1.27, - "learning_rate": 4.713741454640144e-05, - "loss": 0.3859, + "learning_rate": 4.7137217149638744e-05, + "loss": 0.2917, "step": 2121000 }, { "epoch": 1.27, - "learning_rate": 4.713531458084087e-05, - "loss": 0.3794, + "learning_rate": 4.7135117184078185e-05, + "loss": 0.289, "step": 2121500 }, { "epoch": 1.27, - "learning_rate": 4.713321461528031e-05, - "loss": 0.3681, + "learning_rate": 4.713301721851762e-05, + "loss": 0.2902, "step": 2122000 }, { "epoch": 1.27, - "learning_rate": 4.7131114649719744e-05, - "loss": 0.375, + "learning_rate": 4.713092145288817e-05, + "loss": 0.2909, "step": 2122500 }, { "epoch": 1.27, - "learning_rate": 4.712901468415918e-05, - "loss": 0.3804, + "learning_rate": 4.7128821487327605e-05, + "loss": 0.2906, "step": 2123000 }, { "epoch": 1.27, - "learning_rate": 4.712691471859862e-05, - "loss": 0.3835, + "learning_rate": 4.7126721521767045e-05, + "loss": 0.2889, "step": 2123500 }, { "epoch": 1.27, - "learning_rate": 4.712481895296917e-05, - "loss": 0.3855, + "learning_rate": 4.71246257561376e-05, + "loss": 0.2861, "step": 2124000 }, { "epoch": 1.27, - "learning_rate": 4.7122718987408605e-05, - "loss": 0.3779, + "learning_rate": 4.712252579057703e-05, + "loss": 0.2828, "step": 2124500 }, { "epoch": 1.27, - "learning_rate": 4.712061902184804e-05, - "loss": 0.3864, + "learning_rate": 4.7120425825016466e-05, + "loss": 0.2916, "step": 2125000 }, { "epoch": 1.27, - "learning_rate": 4.711852325621859e-05, - "loss": 0.3799, + "learning_rate": 4.7118325859455906e-05, + "loss": 0.2867, "step": 2125500 }, { "epoch": 1.27, - "learning_rate": 4.711642329065803e-05, - "loss": 0.3747, + "learning_rate": 4.711622589389534e-05, + "loss": 0.2899, "step": 2126000 }, { "epoch": 1.27, - "learning_rate": 4.7114323325097466e-05, - "loss": 0.3841, + "learning_rate": 4.711412592833477e-05, + "loss": 0.2875, "step": 2126500 }, { "epoch": 1.28, - "learning_rate": 4.7112223359536906e-05, - "loss": 0.3809, + "learning_rate": 4.711202596277421e-05, + "loss": 0.2869, "step": 2127000 }, { "epoch": 1.28, - "learning_rate": 4.711012339397634e-05, - "loss": 0.3729, + "learning_rate": 4.710992599721365e-05, + "loss": 0.2861, "step": 2127500 }, { "epoch": 1.28, - "learning_rate": 4.710802342841577e-05, - "loss": 0.378, + "learning_rate": 4.710782603165308e-05, + "loss": 0.2868, "step": 2128000 }, { "epoch": 1.28, - "learning_rate": 4.710592346285521e-05, - "loss": 0.3852, + "learning_rate": 4.710573026602364e-05, + "loss": 0.2866, "step": 2128500 }, { "epoch": 1.28, - "learning_rate": 4.710382349729465e-05, - "loss": 0.3752, + "learning_rate": 4.7103630300463074e-05, + "loss": 0.289, "step": 2129000 }, { "epoch": 1.28, - "learning_rate": 4.710172353173408e-05, - "loss": 0.3793, + "learning_rate": 4.710153033490251e-05, + "loss": 0.2875, "step": 2129500 }, { "epoch": 1.28, - "learning_rate": 4.7099623566173514e-05, - "loss": 0.3783, + "learning_rate": 4.709943456927306e-05, + "loss": 0.2911, "step": 2130000 }, { "epoch": 1.28, - "learning_rate": 4.709752360061295e-05, - "loss": 0.3729, + "learning_rate": 4.70973346037125e-05, + "loss": 0.2801, "step": 2130500 }, { "epoch": 1.28, - "learning_rate": 4.709542363505238e-05, - "loss": 0.3748, + "learning_rate": 4.7095234638151935e-05, + "loss": 0.2883, "step": 2131000 }, { "epoch": 1.28, - "learning_rate": 4.709332786942294e-05, - "loss": 0.386, + "learning_rate": 4.709313467259137e-05, + "loss": 0.2891, "step": 2131500 }, { "epoch": 1.28, - "learning_rate": 4.7091232103793494e-05, - "loss": 0.3868, + "learning_rate": 4.709103470703081e-05, + "loss": 0.2955, "step": 2132000 }, { "epoch": 1.28, - "learning_rate": 4.7089132138232935e-05, - "loss": 0.3748, + "learning_rate": 4.708893474147024e-05, + "loss": 0.2832, "step": 2132500 }, { "epoch": 1.28, - "learning_rate": 4.708703217267237e-05, - "loss": 0.3776, + "learning_rate": 4.708683477590967e-05, + "loss": 0.2883, "step": 2133000 }, { "epoch": 1.28, - "learning_rate": 4.70849322071118e-05, - "loss": 0.3861, + "learning_rate": 4.708473481034911e-05, + "loss": 0.2943, "step": 2133500 }, { "epoch": 1.28, - "learning_rate": 4.708283224155124e-05, - "loss": 0.3793, + "learning_rate": 4.708263904471967e-05, + "loss": 0.2813, "step": 2134000 }, { "epoch": 1.28, - "learning_rate": 4.7080732275990675e-05, - "loss": 0.3796, + "learning_rate": 4.70805390791591e-05, + "loss": 0.2854, "step": 2134500 }, { "epoch": 1.28, - "learning_rate": 4.707863231043011e-05, - "loss": 0.3863, + "learning_rate": 4.7078439113598536e-05, + "loss": 0.2904, "step": 2135000 }, { "epoch": 1.28, - "learning_rate": 4.707653234486954e-05, - "loss": 0.3846, + "learning_rate": 4.7076343347969096e-05, + "loss": 0.2897, "step": 2135500 }, { "epoch": 1.28, - "learning_rate": 4.70744365792401e-05, - "loss": 0.3676, + "learning_rate": 4.707424338240853e-05, + "loss": 0.2773, "step": 2136000 }, { "epoch": 1.28, - "learning_rate": 4.7072336613679536e-05, - "loss": 0.3771, + "learning_rate": 4.707214341684796e-05, + "loss": 0.2891, "step": 2136500 }, { "epoch": 1.28, - "learning_rate": 4.707023664811897e-05, - "loss": 0.369, + "learning_rate": 4.7070043451287403e-05, + "loss": 0.2836, "step": 2137000 }, { "epoch": 1.28, - "learning_rate": 4.70681366825584e-05, - "loss": 0.3875, + "learning_rate": 4.706794348572684e-05, + "loss": 0.2904, "step": 2137500 }, { "epoch": 1.28, - "learning_rate": 4.7066045116860083e-05, - "loss": 0.3833, + "learning_rate": 4.706584772009739e-05, + "loss": 0.2911, "step": 2138000 }, { "epoch": 1.28, - "learning_rate": 4.706394515129952e-05, - "loss": 0.3877, + "learning_rate": 4.7063751954467944e-05, + "loss": 0.2893, "step": 2138500 }, { "epoch": 1.28, - "learning_rate": 4.706184518573895e-05, - "loss": 0.3782, + "learning_rate": 4.706165198890738e-05, + "loss": 0.2797, "step": 2139000 }, { "epoch": 1.28, - "learning_rate": 4.705974522017839e-05, - "loss": 0.382, + "learning_rate": 4.705955202334682e-05, + "loss": 0.2877, "step": 2139500 }, { "epoch": 1.28, - "learning_rate": 4.7057645254617824e-05, - "loss": 0.3716, + "learning_rate": 4.705745205778625e-05, + "loss": 0.2791, "step": 2140000 }, { "epoch": 1.28, - "learning_rate": 4.705554528905726e-05, - "loss": 0.3772, + "learning_rate": 4.7055352092225685e-05, + "loss": 0.287, "step": 2140500 }, { "epoch": 1.28, - "learning_rate": 4.705344952342782e-05, - "loss": 0.3846, + "learning_rate": 4.7053256326596245e-05, + "loss": 0.2973, "step": 2141000 }, { "epoch": 1.28, - "learning_rate": 4.705134955786725e-05, - "loss": 0.3779, + "learning_rate": 4.705115636103568e-05, + "loss": 0.2873, "step": 2141500 }, { "epoch": 1.28, - "learning_rate": 4.7049249592306685e-05, - "loss": 0.3788, + "learning_rate": 4.704905639547511e-05, + "loss": 0.2845, "step": 2142000 }, { "epoch": 1.28, - "learning_rate": 4.7047149626746125e-05, - "loss": 0.3758, + "learning_rate": 4.704695642991455e-05, + "loss": 0.2879, "step": 2142500 }, { "epoch": 1.28, - "learning_rate": 4.704504966118556e-05, - "loss": 0.3768, + "learning_rate": 4.7044856464353986e-05, + "loss": 0.2862, "step": 2143000 }, { "epoch": 1.29, - "learning_rate": 4.704294969562499e-05, - "loss": 0.3784, + "learning_rate": 4.704275649879342e-05, + "loss": 0.2849, "step": 2143500 }, { "epoch": 1.29, - "learning_rate": 4.7040853929995546e-05, - "loss": 0.3857, + "learning_rate": 4.704065653323286e-05, + "loss": 0.2904, "step": 2144000 }, { "epoch": 1.29, - "learning_rate": 4.7038753964434986e-05, - "loss": 0.3689, + "learning_rate": 4.703855656767229e-05, + "loss": 0.2819, "step": 2144500 }, { "epoch": 1.29, - "learning_rate": 4.703665399887442e-05, - "loss": 0.3777, + "learning_rate": 4.7036465001973967e-05, + "loss": 0.2878, "step": 2145000 }, { "epoch": 1.29, - "learning_rate": 4.703455403331385e-05, - "loss": 0.3832, + "learning_rate": 4.70343650364134e-05, + "loss": 0.289, "step": 2145500 }, { "epoch": 1.29, - "learning_rate": 4.703245406775329e-05, - "loss": 0.3733, + "learning_rate": 4.7032265070852833e-05, + "loss": 0.2783, "step": 2146000 }, { "epoch": 1.29, - "learning_rate": 4.703035410219272e-05, - "loss": 0.3885, + "learning_rate": 4.7030165105292274e-05, + "loss": 0.2877, "step": 2146500 }, { "epoch": 1.29, - "learning_rate": 4.702825413663215e-05, - "loss": 0.3874, + "learning_rate": 4.702806513973171e-05, + "loss": 0.2893, "step": 2147000 }, { "epoch": 1.29, - "learning_rate": 4.7026158371002713e-05, - "loss": 0.3848, + "learning_rate": 4.702596517417114e-05, + "loss": 0.287, "step": 2147500 }, { "epoch": 1.29, - "learning_rate": 4.7024058405442154e-05, - "loss": 0.3892, + "learning_rate": 4.702386520861058e-05, + "loss": 0.2878, "step": 2148000 }, { "epoch": 1.29, - "learning_rate": 4.702195843988159e-05, - "loss": 0.3824, + "learning_rate": 4.7021765243050014e-05, + "loss": 0.2893, "step": 2148500 }, { "epoch": 1.29, - "learning_rate": 4.701985847432102e-05, - "loss": 0.3761, + "learning_rate": 4.701966527748945e-05, + "loss": 0.2839, "step": 2149000 }, { "epoch": 1.29, - "learning_rate": 4.7017758508760454e-05, - "loss": 0.3807, + "learning_rate": 4.701756531192889e-05, + "loss": 0.2882, "step": 2149500 }, { "epoch": 1.29, - "learning_rate": 4.7015662743131014e-05, - "loss": 0.3928, + "learning_rate": 4.7015465346368315e-05, + "loss": 0.2944, "step": 2150000 }, { "epoch": 1.29, - "learning_rate": 4.701356277757045e-05, - "loss": 0.3877, + "learning_rate": 4.7013365380807755e-05, + "loss": 0.2943, "step": 2150500 }, { "epoch": 1.29, - "learning_rate": 4.701146281200989e-05, - "loss": 0.3797, + "learning_rate": 4.701126541524719e-05, + "loss": 0.2859, "step": 2151000 }, { "epoch": 1.29, - "learning_rate": 4.7009362846449315e-05, - "loss": 0.3697, + "learning_rate": 4.700916964961775e-05, + "loss": 0.2849, "step": 2151500 }, { "epoch": 1.29, - "learning_rate": 4.700726288088875e-05, - "loss": 0.3684, + "learning_rate": 4.7007069684057175e-05, + "loss": 0.2791, "step": 2152000 }, { "epoch": 1.29, - "learning_rate": 4.700516291532819e-05, - "loss": 0.3808, + "learning_rate": 4.7004969718496616e-05, + "loss": 0.2904, "step": 2152500 }, { "epoch": 1.29, - "learning_rate": 4.700306714969875e-05, - "loss": 0.379, + "learning_rate": 4.700286975293605e-05, + "loss": 0.2898, "step": 2153000 }, { "epoch": 1.29, - "learning_rate": 4.700096718413818e-05, - "loss": 0.3768, + "learning_rate": 4.700077398730661e-05, + "loss": 0.288, "step": 2153500 }, { "epoch": 1.29, - "learning_rate": 4.699886721857761e-05, - "loss": 0.3709, + "learning_rate": 4.699867402174604e-05, + "loss": 0.2838, "step": 2154000 }, { "epoch": 1.29, - "learning_rate": 4.699676725301705e-05, - "loss": 0.383, + "learning_rate": 4.6996574056185476e-05, + "loss": 0.292, "step": 2154500 }, { "epoch": 1.29, - "learning_rate": 4.699467148738761e-05, - "loss": 0.3848, + "learning_rate": 4.699447409062491e-05, + "loss": 0.2885, "step": 2155000 }, { "epoch": 1.29, - "learning_rate": 4.699257152182704e-05, - "loss": 0.3798, + "learning_rate": 4.699237412506434e-05, + "loss": 0.291, "step": 2155500 }, { "epoch": 1.29, - "learning_rate": 4.6990471556266476e-05, - "loss": 0.3843, + "learning_rate": 4.6990274159503784e-05, + "loss": 0.2857, "step": 2156000 }, { "epoch": 1.29, - "learning_rate": 4.698837159070591e-05, - "loss": 0.383, + "learning_rate": 4.698817419394322e-05, + "loss": 0.2889, "step": 2156500 }, { "epoch": 1.29, - "learning_rate": 4.698627162514534e-05, - "loss": 0.3824, + "learning_rate": 4.698607422838265e-05, + "loss": 0.2889, "step": 2157000 }, { "epoch": 1.29, - "learning_rate": 4.6984171659584784e-05, - "loss": 0.3839, + "learning_rate": 4.698397426282209e-05, + "loss": 0.2904, "step": 2157500 }, { "epoch": 1.29, - "learning_rate": 4.698207169402422e-05, - "loss": 0.3808, + "learning_rate": 4.6981874297261524e-05, + "loss": 0.2878, "step": 2158000 }, { "epoch": 1.29, - "learning_rate": 4.697997172846365e-05, - "loss": 0.3814, + "learning_rate": 4.697977853163208e-05, + "loss": 0.2938, "step": 2158500 }, { "epoch": 1.29, - "learning_rate": 4.6977875962834204e-05, - "loss": 0.3779, + "learning_rate": 4.697768276600264e-05, + "loss": 0.2882, "step": 2159000 }, { "epoch": 1.29, - "learning_rate": 4.6975775997273644e-05, - "loss": 0.3884, + "learning_rate": 4.697558280044207e-05, + "loss": 0.2907, "step": 2159500 }, { "epoch": 1.3, - "learning_rate": 4.697367603171308e-05, - "loss": 0.3729, + "learning_rate": 4.697348703481263e-05, + "loss": 0.2798, "step": 2160000 }, { "epoch": 1.3, - "learning_rate": 4.697157606615251e-05, - "loss": 0.3818, + "learning_rate": 4.6971387069252065e-05, + "loss": 0.29, "step": 2160500 }, { "epoch": 1.3, - "learning_rate": 4.6969480300523065e-05, - "loss": 0.3765, + "learning_rate": 4.69692871036915e-05, + "loss": 0.2849, "step": 2161000 }, { "epoch": 1.3, - "learning_rate": 4.6967380334962505e-05, - "loss": 0.3795, + "learning_rate": 4.696718713813093e-05, + "loss": 0.2869, "step": 2161500 }, { "epoch": 1.3, - "learning_rate": 4.696528036940194e-05, - "loss": 0.3892, + "learning_rate": 4.6965087172570366e-05, + "loss": 0.2899, "step": 2162000 }, { "epoch": 1.3, - "learning_rate": 4.696318040384137e-05, - "loss": 0.3809, + "learning_rate": 4.69629872070098e-05, + "loss": 0.2891, "step": 2162500 }, { "epoch": 1.3, - "learning_rate": 4.696108463821193e-05, - "loss": 0.3755, + "learning_rate": 4.696088724144924e-05, + "loss": 0.2866, "step": 2163000 }, { "epoch": 1.3, - "learning_rate": 4.6958984672651366e-05, - "loss": 0.3707, + "learning_rate": 4.695878727588867e-05, + "loss": 0.2857, "step": 2163500 }, { "epoch": 1.3, - "learning_rate": 4.69568847070908e-05, - "loss": 0.3751, + "learning_rate": 4.6956687310328106e-05, + "loss": 0.2832, "step": 2164000 }, { "epoch": 1.3, - "learning_rate": 4.695478474153024e-05, - "loss": 0.3788, + "learning_rate": 4.6954587344767547e-05, + "loss": 0.2829, "step": 2164500 }, { "epoch": 1.3, - "learning_rate": 4.695268477596967e-05, - "loss": 0.3816, + "learning_rate": 4.695248737920698e-05, + "loss": 0.2863, "step": 2165000 }, { "epoch": 1.3, - "learning_rate": 4.6950584810409106e-05, - "loss": 0.378, + "learning_rate": 4.695038741364642e-05, + "loss": 0.2805, "step": 2165500 }, { "epoch": 1.3, - "learning_rate": 4.6948484844848547e-05, - "loss": 0.3879, + "learning_rate": 4.6948287448085854e-05, + "loss": 0.2908, "step": 2166000 }, { "epoch": 1.3, - "learning_rate": 4.69463890792191e-05, - "loss": 0.3883, + "learning_rate": 4.694618748252529e-05, + "loss": 0.2913, "step": 2166500 }, { "epoch": 1.3, - "learning_rate": 4.6944289113658534e-05, - "loss": 0.3852, + "learning_rate": 4.694408751696472e-05, + "loss": 0.2914, "step": 2167000 }, { "epoch": 1.3, - "learning_rate": 4.694218914809797e-05, - "loss": 0.3973, + "learning_rate": 4.6941987551404154e-05, + "loss": 0.2919, "step": 2167500 }, { "epoch": 1.3, - "learning_rate": 4.694008918253741e-05, - "loss": 0.3753, + "learning_rate": 4.6939891785774714e-05, + "loss": 0.2861, "step": 2168000 }, { "epoch": 1.3, - "learning_rate": 4.693798921697684e-05, - "loss": 0.3898, + "learning_rate": 4.693779182021415e-05, + "loss": 0.2881, "step": 2168500 }, { "epoch": 1.3, - "learning_rate": 4.6935893451347394e-05, - "loss": 0.3795, + "learning_rate": 4.693569185465359e-05, + "loss": 0.2876, "step": 2169000 }, { "epoch": 1.3, - "learning_rate": 4.693379348578683e-05, - "loss": 0.3796, + "learning_rate": 4.6933591889093015e-05, + "loss": 0.2931, "step": 2169500 }, { "epoch": 1.3, - "learning_rate": 4.693169352022627e-05, - "loss": 0.3707, + "learning_rate": 4.6931496123463575e-05, + "loss": 0.2832, "step": 2170000 }, { "epoch": 1.3, - "learning_rate": 4.69295935546657e-05, - "loss": 0.3712, + "learning_rate": 4.692939615790301e-05, + "loss": 0.2791, "step": 2170500 }, { "epoch": 1.3, - "learning_rate": 4.6927493589105135e-05, - "loss": 0.3812, + "learning_rate": 4.692730039227357e-05, + "loss": 0.2899, "step": 2171000 }, { "epoch": 1.3, - "learning_rate": 4.6925397823475695e-05, - "loss": 0.3732, + "learning_rate": 4.6925200426713e-05, + "loss": 0.2843, "step": 2171500 }, { "epoch": 1.3, - "learning_rate": 4.692329785791513e-05, - "loss": 0.3796, + "learning_rate": 4.6923100461152436e-05, + "loss": 0.2871, "step": 2172000 }, { "epoch": 1.3, - "learning_rate": 4.692119789235456e-05, - "loss": 0.3803, + "learning_rate": 4.6921000495591876e-05, + "loss": 0.2867, "step": 2172500 }, { "epoch": 1.3, - "learning_rate": 4.6919097926794e-05, - "loss": 0.3831, + "learning_rate": 4.691890053003131e-05, + "loss": 0.2929, "step": 2173000 }, { "epoch": 1.3, - "learning_rate": 4.6916997961233436e-05, - "loss": 0.3789, + "learning_rate": 4.691680476440186e-05, + "loss": 0.2908, "step": 2173500 }, { "epoch": 1.3, - "learning_rate": 4.691490219560399e-05, - "loss": 0.3801, + "learning_rate": 4.6914704798841297e-05, + "loss": 0.2841, "step": 2174000 }, { "epoch": 1.3, - "learning_rate": 4.691280223004342e-05, - "loss": 0.3825, + "learning_rate": 4.691260483328074e-05, + "loss": 0.285, "step": 2174500 }, { "epoch": 1.3, - "learning_rate": 4.691070226448286e-05, - "loss": 0.3809, + "learning_rate": 4.691050486772017e-05, + "loss": 0.2882, "step": 2175000 }, { "epoch": 1.3, - "learning_rate": 4.69086022989223e-05, - "loss": 0.3675, + "learning_rate": 4.6908404902159604e-05, + "loss": 0.2805, "step": 2175500 }, { "epoch": 1.3, - "learning_rate": 4.690650653329285e-05, - "loss": 0.3812, + "learning_rate": 4.690630913653016e-05, + "loss": 0.2871, "step": 2176000 }, { "epoch": 1.3, - "learning_rate": 4.6904406567732284e-05, - "loss": 0.3734, + "learning_rate": 4.69042091709696e-05, + "loss": 0.2842, "step": 2176500 }, { "epoch": 1.31, - "learning_rate": 4.6902306602171724e-05, - "loss": 0.381, + "learning_rate": 4.690210920540903e-05, + "loss": 0.2854, "step": 2177000 }, { "epoch": 1.31, - "learning_rate": 4.690020663661116e-05, - "loss": 0.3741, + "learning_rate": 4.6900009239848464e-05, + "loss": 0.285, "step": 2177500 }, { "epoch": 1.31, - "learning_rate": 4.689810667105059e-05, - "loss": 0.3789, + "learning_rate": 4.6897909274287905e-05, + "loss": 0.2884, "step": 2178000 }, { "epoch": 1.31, - "learning_rate": 4.689600670549003e-05, - "loss": 0.3891, + "learning_rate": 4.689580930872734e-05, + "loss": 0.2939, "step": 2178500 }, { "epoch": 1.31, - "learning_rate": 4.6893906739929465e-05, - "loss": 0.3817, + "learning_rate": 4.689370934316677e-05, + "loss": 0.2871, "step": 2179000 }, { "epoch": 1.31, - "learning_rate": 4.6891806774368905e-05, - "loss": 0.3807, + "learning_rate": 4.6891609377606205e-05, + "loss": 0.2868, "step": 2179500 }, { "epoch": 1.31, - "learning_rate": 4.688971100873946e-05, - "loss": 0.3801, + "learning_rate": 4.688950941204564e-05, + "loss": 0.2894, "step": 2180000 }, { "epoch": 1.31, - "learning_rate": 4.688761104317889e-05, - "loss": 0.3801, + "learning_rate": 4.68874136464162e-05, + "loss": 0.2856, "step": 2180500 }, { "epoch": 1.31, - "learning_rate": 4.6885511077618325e-05, - "loss": 0.3916, + "learning_rate": 4.688531368085564e-05, + "loss": 0.2914, "step": 2181000 }, { "epoch": 1.31, - "learning_rate": 4.6883411112057766e-05, - "loss": 0.3772, + "learning_rate": 4.6883213715295066e-05, + "loss": 0.2878, "step": 2181500 }, { "epoch": 1.31, - "learning_rate": 4.688131534642832e-05, - "loss": 0.3811, + "learning_rate": 4.68811137497345e-05, + "loss": 0.2818, "step": 2182000 }, { "epoch": 1.31, - "learning_rate": 4.687921958079887e-05, - "loss": 0.3748, + "learning_rate": 4.687901798410506e-05, + "loss": 0.2889, "step": 2182500 }, { "epoch": 1.31, - "learning_rate": 4.6877119615238306e-05, - "loss": 0.3752, + "learning_rate": 4.68769180185445e-05, + "loss": 0.28, "step": 2183000 }, { "epoch": 1.31, - "learning_rate": 4.687501964967774e-05, - "loss": 0.3754, + "learning_rate": 4.687481805298393e-05, + "loss": 0.284, "step": 2183500 }, { "epoch": 1.31, - "learning_rate": 4.687291968411718e-05, - "loss": 0.3752, + "learning_rate": 4.687271808742336e-05, + "loss": 0.2839, "step": 2184000 }, { "epoch": 1.31, - "learning_rate": 4.687082391848774e-05, - "loss": 0.3901, + "learning_rate": 4.68706181218628e-05, + "loss": 0.2885, "step": 2184500 }, { "epoch": 1.31, - "learning_rate": 4.686872395292717e-05, - "loss": 0.3671, + "learning_rate": 4.686852235623336e-05, + "loss": 0.284, "step": 2185000 }, { "epoch": 1.31, - "learning_rate": 4.686662398736661e-05, - "loss": 0.384, + "learning_rate": 4.6866422390672794e-05, + "loss": 0.2915, "step": 2185500 }, { "epoch": 1.31, - "learning_rate": 4.686452402180604e-05, - "loss": 0.3839, + "learning_rate": 4.686432662504335e-05, + "loss": 0.2922, "step": 2186000 }, { "epoch": 1.31, - "learning_rate": 4.6862424056245474e-05, - "loss": 0.3777, + "learning_rate": 4.686222665948279e-05, + "loss": 0.2864, "step": 2186500 }, { "epoch": 1.31, - "learning_rate": 4.6860324090684914e-05, - "loss": 0.3756, + "learning_rate": 4.686012669392222e-05, + "loss": 0.2813, "step": 2187000 }, { "epoch": 1.31, - "learning_rate": 4.685822412512435e-05, - "loss": 0.3753, + "learning_rate": 4.6858026728361655e-05, + "loss": 0.2876, "step": 2187500 }, { "epoch": 1.31, - "learning_rate": 4.685612415956378e-05, - "loss": 0.3721, + "learning_rate": 4.6855926762801095e-05, + "loss": 0.289, "step": 2188000 }, { "epoch": 1.31, - "learning_rate": 4.6854028393934335e-05, - "loss": 0.3831, + "learning_rate": 4.685382679724052e-05, + "loss": 0.2907, "step": 2188500 }, { "epoch": 1.31, - "learning_rate": 4.6851928428373775e-05, - "loss": 0.383, + "learning_rate": 4.6851726831679955e-05, + "loss": 0.2884, "step": 2189000 }, { "epoch": 1.31, - "learning_rate": 4.684982846281321e-05, - "loss": 0.3859, + "learning_rate": 4.6849626866119395e-05, + "loss": 0.2888, "step": 2189500 }, { "epoch": 1.31, - "learning_rate": 4.684773269718376e-05, - "loss": 0.3742, + "learning_rate": 4.684752690055883e-05, + "loss": 0.2793, "step": 2190000 }, { "epoch": 1.31, - "learning_rate": 4.6845632731623195e-05, - "loss": 0.3763, + "learning_rate": 4.684542693499826e-05, + "loss": 0.2879, "step": 2190500 }, { "epoch": 1.31, - "learning_rate": 4.6843532766062636e-05, - "loss": 0.3756, + "learning_rate": 4.6843331169368816e-05, + "loss": 0.2867, "step": 2191000 }, { "epoch": 1.31, - "learning_rate": 4.684143280050207e-05, - "loss": 0.3764, + "learning_rate": 4.6841231203808256e-05, + "loss": 0.2901, "step": 2191500 }, { "epoch": 1.31, - "learning_rate": 4.683933283494151e-05, - "loss": 0.3742, + "learning_rate": 4.6839135438178816e-05, + "loss": 0.2861, "step": 2192000 }, { "epoch": 1.31, - "learning_rate": 4.683723286938094e-05, - "loss": 0.3771, + "learning_rate": 4.683703547261825e-05, + "loss": 0.2883, "step": 2192500 }, { "epoch": 1.31, - "learning_rate": 4.6835137103751496e-05, - "loss": 0.3931, + "learning_rate": 4.6834935507057683e-05, + "loss": 0.2894, "step": 2193000 }, { "epoch": 1.32, - "learning_rate": 4.683303713819093e-05, - "loss": 0.3817, + "learning_rate": 4.683283554149712e-05, + "loss": 0.283, "step": 2193500 }, { "epoch": 1.32, - "learning_rate": 4.683093717263037e-05, - "loss": 0.3724, + "learning_rate": 4.683073557593655e-05, + "loss": 0.2807, "step": 2194000 }, { "epoch": 1.32, - "learning_rate": 4.6828837207069804e-05, - "loss": 0.3782, + "learning_rate": 4.682863561037599e-05, + "loss": 0.2842, "step": 2194500 }, { "epoch": 1.32, - "learning_rate": 4.682673724150924e-05, - "loss": 0.3781, + "learning_rate": 4.6826535644815424e-05, + "loss": 0.2891, "step": 2195000 }, { "epoch": 1.32, - "learning_rate": 4.682463727594868e-05, - "loss": 0.3729, + "learning_rate": 4.6824439879185984e-05, + "loss": 0.2808, "step": 2195500 }, { "epoch": 1.32, - "learning_rate": 4.682253731038811e-05, - "loss": 0.3793, + "learning_rate": 4.682233991362541e-05, + "loss": 0.2885, "step": 2196000 }, { "epoch": 1.32, - "learning_rate": 4.6820437344827544e-05, - "loss": 0.3698, + "learning_rate": 4.682023994806485e-05, + "loss": 0.2823, "step": 2196500 }, { "epoch": 1.32, - "learning_rate": 4.68183415791981e-05, - "loss": 0.3768, + "learning_rate": 4.6818139982504285e-05, + "loss": 0.2843, "step": 2197000 }, { "epoch": 1.32, - "learning_rate": 4.681624581356866e-05, - "loss": 0.3816, + "learning_rate": 4.681604001694372e-05, + "loss": 0.2857, "step": 2197500 }, { "epoch": 1.32, - "learning_rate": 4.681414584800809e-05, - "loss": 0.3796, + "learning_rate": 4.681394005138316e-05, + "loss": 0.2894, "step": 2198000 }, { "epoch": 1.32, - "learning_rate": 4.6812045882447525e-05, - "loss": 0.3825, + "learning_rate": 4.681184008582259e-05, + "loss": 0.2869, "step": 2198500 }, { "epoch": 1.32, - "learning_rate": 4.6809945916886965e-05, - "loss": 0.3808, + "learning_rate": 4.6809740120262025e-05, + "loss": 0.2891, "step": 2199000 }, { "epoch": 1.32, - "learning_rate": 4.68078459513264e-05, - "loss": 0.3802, + "learning_rate": 4.680764435463258e-05, + "loss": 0.2893, "step": 2199500 }, { "epoch": 1.32, - "learning_rate": 4.680574598576583e-05, - "loss": 0.3751, + "learning_rate": 4.680554438907202e-05, + "loss": 0.2824, "step": 2200000 }, { "epoch": 1.32, - "eval_loss": 0.3648931682109833, - "eval_runtime": 1120.548, - "eval_samples_per_second": 470.056, - "eval_steps_per_second": 78.343, + "eval_loss": 0.2632770538330078, + "eval_runtime": 1460.696, + "eval_samples_per_second": 360.595, + "eval_steps_per_second": 60.099, "step": 2200000 }, { "epoch": 1.32, - "learning_rate": 4.680364602020527e-05, - "loss": 0.3758, + "learning_rate": 4.680344442351145e-05, + "loss": 0.2824, "step": 2200500 }, { "epoch": 1.32, - "learning_rate": 4.6801546054644706e-05, - "loss": 0.3872, + "learning_rate": 4.6801344457950886e-05, + "loss": 0.2899, "step": 2201000 }, { "epoch": 1.32, - "learning_rate": 4.679945028901526e-05, - "loss": 0.387, + "learning_rate": 4.6799244492390326e-05, + "loss": 0.2864, "step": 2201500 }, { "epoch": 1.32, - "learning_rate": 4.679735032345469e-05, - "loss": 0.3761, + "learning_rate": 4.679714452682976e-05, + "loss": 0.2858, "step": 2202000 }, { "epoch": 1.32, - "learning_rate": 4.679525035789413e-05, - "loss": 0.3872, + "learning_rate": 4.679504876120031e-05, + "loss": 0.2952, "step": 2202500 }, { "epoch": 1.32, - "learning_rate": 4.6793150392333567e-05, - "loss": 0.3766, + "learning_rate": 4.6792948795639754e-05, + "loss": 0.2869, "step": 2203000 }, { "epoch": 1.32, - "learning_rate": 4.6791050426773e-05, - "loss": 0.3739, + "learning_rate": 4.679084883007919e-05, + "loss": 0.2858, "step": 2203500 }, { "epoch": 1.32, - "learning_rate": 4.678895046121244e-05, - "loss": 0.3754, + "learning_rate": 4.678874886451862e-05, + "loss": 0.2866, "step": 2204000 }, { "epoch": 1.32, - "learning_rate": 4.678685049565187e-05, - "loss": 0.3678, + "learning_rate": 4.678664889895806e-05, + "loss": 0.2821, "step": 2204500 }, { "epoch": 1.32, - "learning_rate": 4.67847505300913e-05, - "loss": 0.3782, + "learning_rate": 4.6784548933397494e-05, + "loss": 0.2868, "step": 2205000 }, { "epoch": 1.32, - "learning_rate": 4.678265476446186e-05, - "loss": 0.377, + "learning_rate": 4.678245316776805e-05, + "loss": 0.2869, "step": 2205500 }, { "epoch": 1.32, - "learning_rate": 4.67805547989013e-05, - "loss": 0.3778, + "learning_rate": 4.678035320220748e-05, + "loss": 0.2836, "step": 2206000 }, { "epoch": 1.32, - "learning_rate": 4.6778454833340734e-05, - "loss": 0.3664, + "learning_rate": 4.677825323664692e-05, + "loss": 0.2822, "step": 2206500 }, { "epoch": 1.32, - "learning_rate": 4.677635486778017e-05, - "loss": 0.3808, + "learning_rate": 4.6776153271086355e-05, + "loss": 0.2875, "step": 2207000 }, { "epoch": 1.32, - "learning_rate": 4.67742549022196e-05, - "loss": 0.3755, + "learning_rate": 4.677405330552579e-05, + "loss": 0.2837, "step": 2207500 }, { "epoch": 1.32, - "learning_rate": 4.677215913659016e-05, - "loss": 0.3799, + "learning_rate": 4.677195753989634e-05, + "loss": 0.2886, "step": 2208000 }, { "epoch": 1.32, - "learning_rate": 4.6770059171029595e-05, - "loss": 0.3788, + "learning_rate": 4.676985757433578e-05, + "loss": 0.2872, "step": 2208500 }, { "epoch": 1.32, - "learning_rate": 4.6767959205469035e-05, - "loss": 0.384, + "learning_rate": 4.6767757608775216e-05, + "loss": 0.288, "step": 2209000 }, { "epoch": 1.32, - "learning_rate": 4.676585923990846e-05, - "loss": 0.377, + "learning_rate": 4.676565764321465e-05, + "loss": 0.2841, "step": 2209500 }, { "epoch": 1.32, - "learning_rate": 4.676376767421014e-05, - "loss": 0.3808, + "learning_rate": 4.676356187758521e-05, + "loss": 0.2839, "step": 2210000 }, { "epoch": 1.33, - "learning_rate": 4.6761667708649576e-05, - "loss": 0.3833, + "learning_rate": 4.676146191202464e-05, + "loss": 0.2901, "step": 2210500 }, { "epoch": 1.33, - "learning_rate": 4.675956774308901e-05, - "loss": 0.3785, + "learning_rate": 4.6759361946464076e-05, + "loss": 0.2872, "step": 2211000 }, { "epoch": 1.33, - "learning_rate": 4.675746777752845e-05, - "loss": 0.381, + "learning_rate": 4.675726618083463e-05, + "loss": 0.2876, "step": 2211500 }, { "epoch": 1.33, - "learning_rate": 4.675536781196788e-05, - "loss": 0.3797, + "learning_rate": 4.675516621527407e-05, + "loss": 0.2839, "step": 2212000 }, { "epoch": 1.33, - "learning_rate": 4.675327204633844e-05, - "loss": 0.3777, + "learning_rate": 4.6753066249713504e-05, + "loss": 0.2809, "step": 2212500 }, { "epoch": 1.33, - "learning_rate": 4.675117208077788e-05, - "loss": 0.3831, + "learning_rate": 4.675096628415294e-05, + "loss": 0.2882, "step": 2213000 }, { "epoch": 1.33, - "learning_rate": 4.674907211521731e-05, - "loss": 0.3761, + "learning_rate": 4.674887051852349e-05, + "loss": 0.2893, "step": 2213500 }, { "epoch": 1.33, - "learning_rate": 4.6746972149656744e-05, - "loss": 0.3738, + "learning_rate": 4.674677055296293e-05, + "loss": 0.284, "step": 2214000 }, { "epoch": 1.33, - "learning_rate": 4.67448763840273e-05, - "loss": 0.3687, + "learning_rate": 4.6744670587402364e-05, + "loss": 0.2827, "step": 2214500 }, { "epoch": 1.33, - "learning_rate": 4.674277641846674e-05, - "loss": 0.3718, + "learning_rate": 4.67425706218418e-05, + "loss": 0.2789, "step": 2215000 }, { "epoch": 1.33, - "learning_rate": 4.674067645290617e-05, - "loss": 0.3862, + "learning_rate": 4.674047485621236e-05, + "loss": 0.2828, "step": 2215500 }, { "epoch": 1.33, - "learning_rate": 4.6738576487345605e-05, - "loss": 0.391, + "learning_rate": 4.673837489065179e-05, + "loss": 0.295, "step": 2216000 }, { "epoch": 1.33, - "learning_rate": 4.6736476521785045e-05, - "loss": 0.3755, + "learning_rate": 4.6736274925091225e-05, + "loss": 0.286, "step": 2216500 }, { "epoch": 1.33, - "learning_rate": 4.67343807561556e-05, - "loss": 0.3759, + "learning_rate": 4.6734174959530665e-05, + "loss": 0.2811, "step": 2217000 }, { "epoch": 1.33, - "learning_rate": 4.673228079059503e-05, - "loss": 0.3793, + "learning_rate": 4.67320749939701e-05, + "loss": 0.2857, "step": 2217500 }, { "epoch": 1.33, - "learning_rate": 4.6730180825034465e-05, - "loss": 0.3818, + "learning_rate": 4.672997502840953e-05, + "loss": 0.2894, "step": 2218000 }, { "epoch": 1.33, - "learning_rate": 4.6728080859473906e-05, - "loss": 0.3808, + "learning_rate": 4.672787506284897e-05, + "loss": 0.2875, "step": 2218500 }, { "epoch": 1.33, - "learning_rate": 4.672598089391334e-05, - "loss": 0.3915, + "learning_rate": 4.6725775097288406e-05, + "loss": 0.3011, "step": 2219000 }, { "epoch": 1.33, - "learning_rate": 4.672388092835277e-05, - "loss": 0.3809, + "learning_rate": 4.672367513172784e-05, + "loss": 0.2843, "step": 2219500 }, { "epoch": 1.33, - "learning_rate": 4.672178096279221e-05, - "loss": 0.3831, + "learning_rate": 4.672157516616728e-05, + "loss": 0.2854, "step": 2220000 }, { "epoch": 1.33, - "learning_rate": 4.6719680997231646e-05, - "loss": 0.3816, + "learning_rate": 4.671947940053783e-05, + "loss": 0.2866, "step": 2220500 }, { "epoch": 1.33, - "learning_rate": 4.67175852316022e-05, - "loss": 0.3744, + "learning_rate": 4.671737943497727e-05, + "loss": 0.2823, "step": 2221000 }, { "epoch": 1.33, - "learning_rate": 4.6715489465972753e-05, - "loss": 0.3774, + "learning_rate": 4.67152794694167e-05, + "loss": 0.2864, "step": 2221500 }, { "epoch": 1.33, - "learning_rate": 4.6713389500412194e-05, - "loss": 0.371, + "learning_rate": 4.671317950385614e-05, + "loss": 0.2804, "step": 2222000 }, { "epoch": 1.33, - "learning_rate": 4.671128953485163e-05, - "loss": 0.3921, + "learning_rate": 4.6711083738226694e-05, + "loss": 0.2936, "step": 2222500 }, { "epoch": 1.33, - "learning_rate": 4.670918956929106e-05, - "loss": 0.3849, + "learning_rate": 4.670898797259725e-05, + "loss": 0.294, "step": 2223000 }, { "epoch": 1.33, - "learning_rate": 4.67070896037305e-05, - "loss": 0.3767, + "learning_rate": 4.670688800703668e-05, + "loss": 0.2821, "step": 2223500 }, { "epoch": 1.33, - "learning_rate": 4.6704989638169934e-05, - "loss": 0.3715, + "learning_rate": 4.670478804147612e-05, + "loss": 0.2894, "step": 2224000 }, { "epoch": 1.33, - "learning_rate": 4.670288967260937e-05, - "loss": 0.3747, + "learning_rate": 4.6702688075915555e-05, + "loss": 0.2815, "step": 2224500 }, { "epoch": 1.33, - "learning_rate": 4.670078970704881e-05, - "loss": 0.3861, + "learning_rate": 4.670059231028611e-05, + "loss": 0.2942, "step": 2225000 }, { "epoch": 1.33, - "learning_rate": 4.669869814135048e-05, - "loss": 0.3703, + "learning_rate": 4.669849234472554e-05, + "loss": 0.2829, "step": 2225500 }, { "epoch": 1.33, - "learning_rate": 4.6696598175789915e-05, - "loss": 0.3815, + "learning_rate": 4.669639237916498e-05, + "loss": 0.2918, "step": 2226000 }, { "epoch": 1.33, - "learning_rate": 4.669449821022935e-05, - "loss": 0.3728, + "learning_rate": 4.6694292413604415e-05, + "loss": 0.2866, "step": 2226500 }, { "epoch": 1.34, - "learning_rate": 4.669239824466879e-05, - "loss": 0.3827, + "learning_rate": 4.669219244804385e-05, + "loss": 0.2866, "step": 2227000 }, { "epoch": 1.34, - "learning_rate": 4.669029827910822e-05, - "loss": 0.3726, + "learning_rate": 4.669009248248329e-05, + "loss": 0.2854, "step": 2227500 }, { "epoch": 1.34, - "learning_rate": 4.6688198313547656e-05, - "loss": 0.3756, + "learning_rate": 4.668799251692272e-05, + "loss": 0.2875, "step": 2228000 }, { "epoch": 1.34, - "learning_rate": 4.6686098347987096e-05, - "loss": 0.3748, + "learning_rate": 4.6685892551362156e-05, + "loss": 0.2906, "step": 2228500 }, { "epoch": 1.34, - "learning_rate": 4.668399838242653e-05, - "loss": 0.3756, + "learning_rate": 4.6683792585801596e-05, + "loss": 0.2832, "step": 2229000 }, { "epoch": 1.34, - "learning_rate": 4.668190261679708e-05, - "loss": 0.3789, + "learning_rate": 4.668169682017215e-05, + "loss": 0.2883, "step": 2229500 }, { "epoch": 1.34, - "learning_rate": 4.6679806851167637e-05, - "loss": 0.3813, + "learning_rate": 4.667959685461158e-05, + "loss": 0.2916, "step": 2230000 }, { "epoch": 1.34, - "learning_rate": 4.667770688560707e-05, - "loss": 0.3898, + "learning_rate": 4.6677496889051024e-05, + "loss": 0.2894, "step": 2230500 }, { "epoch": 1.34, - "learning_rate": 4.667560692004651e-05, - "loss": 0.3782, + "learning_rate": 4.667539692349046e-05, + "loss": 0.291, "step": 2231000 }, { "epoch": 1.34, - "learning_rate": 4.6673506954485944e-05, - "loss": 0.3724, + "learning_rate": 4.667330115786101e-05, + "loss": 0.2923, "step": 2231500 }, { "epoch": 1.34, - "learning_rate": 4.6671411188856504e-05, - "loss": 0.3784, + "learning_rate": 4.6671201192300444e-05, + "loss": 0.2926, "step": 2232000 }, { "epoch": 1.34, - "learning_rate": 4.666931542322706e-05, - "loss": 0.3722, + "learning_rate": 4.6669101226739884e-05, + "loss": 0.2838, "step": 2232500 }, { "epoch": 1.34, - "learning_rate": 4.66672154576665e-05, - "loss": 0.376, + "learning_rate": 4.666700126117932e-05, + "loss": 0.2804, "step": 2233000 }, { "epoch": 1.34, - "learning_rate": 4.666511549210593e-05, - "loss": 0.3786, + "learning_rate": 4.666490129561875e-05, + "loss": 0.2866, "step": 2233500 }, { "epoch": 1.34, - "learning_rate": 4.6663015526545365e-05, - "loss": 0.3831, + "learning_rate": 4.666280133005819e-05, + "loss": 0.2848, "step": 2234000 }, { "epoch": 1.34, - "learning_rate": 4.6660915560984805e-05, - "loss": 0.3788, + "learning_rate": 4.666070136449762e-05, + "loss": 0.2854, "step": 2234500 }, { "epoch": 1.34, - "learning_rate": 4.665881559542423e-05, - "loss": 0.3804, + "learning_rate": 4.665860139893705e-05, + "loss": 0.2884, "step": 2235000 }, { "epoch": 1.34, - "learning_rate": 4.6656715629863665e-05, - "loss": 0.3775, + "learning_rate": 4.665650143337649e-05, + "loss": 0.2891, "step": 2235500 }, { "epoch": 1.34, - "learning_rate": 4.6654615664303105e-05, - "loss": 0.3751, + "learning_rate": 4.665440566774705e-05, + "loss": 0.2853, "step": 2236000 }, { "epoch": 1.34, - "learning_rate": 4.665251569874254e-05, - "loss": 0.3867, + "learning_rate": 4.6652305702186486e-05, + "loss": 0.2897, "step": 2236500 }, { "epoch": 1.34, - "learning_rate": 4.665041573318197e-05, - "loss": 0.3825, + "learning_rate": 4.665020993655704e-05, + "loss": 0.2897, "step": 2237000 }, { "epoch": 1.34, - "learning_rate": 4.664831576762141e-05, - "loss": 0.3839, + "learning_rate": 4.664810997099648e-05, + "loss": 0.2887, "step": 2237500 }, { "epoch": 1.34, - "learning_rate": 4.6646215802060846e-05, - "loss": 0.3806, + "learning_rate": 4.664601000543591e-05, + "loss": 0.2897, "step": 2238000 }, { "epoch": 1.34, - "learning_rate": 4.664411583650028e-05, - "loss": 0.3682, + "learning_rate": 4.6643910039875346e-05, + "loss": 0.2804, "step": 2238500 }, { "epoch": 1.34, - "learning_rate": 4.664201587093972e-05, - "loss": 0.3741, + "learning_rate": 4.66418142742459e-05, + "loss": 0.2865, "step": 2239000 }, { "epoch": 1.34, - "learning_rate": 4.663991590537915e-05, - "loss": 0.3744, + "learning_rate": 4.663971430868534e-05, + "loss": 0.2848, "step": 2239500 }, { "epoch": 1.34, - "learning_rate": 4.663781593981859e-05, - "loss": 0.3862, + "learning_rate": 4.6637614343124774e-05, + "loss": 0.2876, "step": 2240000 }, { "epoch": 1.34, - "learning_rate": 4.663572017418914e-05, - "loss": 0.3882, + "learning_rate": 4.663551437756421e-05, + "loss": 0.2885, "step": 2240500 }, { "epoch": 1.34, - "learning_rate": 4.663362020862858e-05, - "loss": 0.388, + "learning_rate": 4.663341441200365e-05, + "loss": 0.2889, "step": 2241000 }, { "epoch": 1.34, - "learning_rate": 4.6631520243068014e-05, - "loss": 0.3769, + "learning_rate": 4.663131444644308e-05, + "loss": 0.2816, "step": 2241500 }, { "epoch": 1.34, - "learning_rate": 4.662942027750745e-05, - "loss": 0.3822, + "learning_rate": 4.662921448088251e-05, + "loss": 0.2867, "step": 2242000 }, { "epoch": 1.34, - "learning_rate": 4.662732451187801e-05, - "loss": 0.3854, + "learning_rate": 4.662711451532195e-05, + "loss": 0.2921, "step": 2242500 }, { "epoch": 1.34, - "learning_rate": 4.662522454631744e-05, - "loss": 0.3679, + "learning_rate": 4.662501454976138e-05, + "loss": 0.2783, "step": 2243000 }, { "epoch": 1.35, - "learning_rate": 4.6623128780687995e-05, - "loss": 0.3783, + "learning_rate": 4.662291878413194e-05, + "loss": 0.2893, "step": 2243500 }, { "epoch": 1.35, - "learning_rate": 4.662102881512743e-05, - "loss": 0.3845, + "learning_rate": 4.6620818818571375e-05, + "loss": 0.284, "step": 2244000 }, { "epoch": 1.35, - "learning_rate": 4.661892884956687e-05, - "loss": 0.3828, + "learning_rate": 4.661871885301081e-05, + "loss": 0.2879, "step": 2244500 }, { "epoch": 1.35, - "learning_rate": 4.66168288840063e-05, - "loss": 0.3789, + "learning_rate": 4.661661888745024e-05, + "loss": 0.2902, "step": 2245000 }, { "epoch": 1.35, - "learning_rate": 4.6614728918445735e-05, - "loss": 0.378, + "learning_rate": 4.661451892188968e-05, + "loss": 0.2885, "step": 2245500 }, { "epoch": 1.35, - "learning_rate": 4.6612628952885176e-05, - "loss": 0.3779, + "learning_rate": 4.6612418956329116e-05, + "loss": 0.2843, "step": 2246000 }, { "epoch": 1.35, - "learning_rate": 4.661053318725573e-05, - "loss": 0.382, + "learning_rate": 4.661032319069967e-05, + "loss": 0.2895, "step": 2246500 }, { "epoch": 1.35, - "learning_rate": 4.660843322169516e-05, - "loss": 0.3779, + "learning_rate": 4.66082232251391e-05, + "loss": 0.2866, "step": 2247000 }, { "epoch": 1.35, - "learning_rate": 4.6606333256134596e-05, - "loss": 0.3803, + "learning_rate": 4.660612325957854e-05, + "loss": 0.2873, "step": 2247500 }, { "epoch": 1.35, - "learning_rate": 4.6604233290574036e-05, - "loss": 0.3792, + "learning_rate": 4.6604023294017976e-05, + "loss": 0.2885, "step": 2248000 }, { "epoch": 1.35, - "learning_rate": 4.660213332501347e-05, - "loss": 0.3802, + "learning_rate": 4.660192332845741e-05, + "loss": 0.287, "step": 2248500 }, { "epoch": 1.35, - "learning_rate": 4.66000333594529e-05, - "loss": 0.3897, + "learning_rate": 4.659982336289685e-05, + "loss": 0.287, "step": 2249000 }, { "epoch": 1.35, - "learning_rate": 4.6597933393892343e-05, - "loss": 0.3827, + "learning_rate": 4.6597727597267404e-05, + "loss": 0.2821, "step": 2249500 }, { "epoch": 1.35, - "learning_rate": 4.659583342833177e-05, - "loss": 0.3787, + "learning_rate": 4.659562763170684e-05, + "loss": 0.289, "step": 2250000 }, { "epoch": 1.35, - "learning_rate": 4.659373346277121e-05, - "loss": 0.387, + "learning_rate": 4.65935318660774e-05, + "loss": 0.2876, "step": 2250500 }, { "epoch": 1.35, - "learning_rate": 4.6591641897072884e-05, - "loss": 0.3896, + "learning_rate": 4.659143190051683e-05, + "loss": 0.2949, "step": 2251000 }, { "epoch": 1.35, - "learning_rate": 4.6589541931512324e-05, - "loss": 0.3779, + "learning_rate": 4.6589331934956264e-05, + "loss": 0.286, "step": 2251500 }, { "epoch": 1.35, - "learning_rate": 4.658744196595176e-05, - "loss": 0.365, + "learning_rate": 4.65872319693957e-05, + "loss": 0.2762, "step": 2252000 }, { "epoch": 1.35, - "learning_rate": 4.658534200039119e-05, - "loss": 0.3776, + "learning_rate": 4.658513200383514e-05, + "loss": 0.2817, "step": 2252500 }, { "epoch": 1.35, - "learning_rate": 4.658324203483063e-05, - "loss": 0.386, + "learning_rate": 4.658303203827457e-05, + "loss": 0.2899, "step": 2253000 }, { "epoch": 1.35, - "learning_rate": 4.6581142069270065e-05, - "loss": 0.381, + "learning_rate": 4.6580932072714005e-05, + "loss": 0.2844, "step": 2253500 }, { "epoch": 1.35, - "learning_rate": 4.65790421037095e-05, - "loss": 0.387, + "learning_rate": 4.6578832107153445e-05, + "loss": 0.2844, "step": 2254000 }, { "epoch": 1.35, - "learning_rate": 4.657694213814893e-05, - "loss": 0.3701, + "learning_rate": 4.6576736341524e-05, + "loss": 0.2827, "step": 2254500 }, { "epoch": 1.35, - "learning_rate": 4.6574842172588365e-05, - "loss": 0.3805, + "learning_rate": 4.657463637596343e-05, + "loss": 0.2857, "step": 2255000 }, { "epoch": 1.35, - "learning_rate": 4.65727422070278e-05, - "loss": 0.3731, + "learning_rate": 4.6572536410402866e-05, + "loss": 0.2854, "step": 2255500 }, { "epoch": 1.35, - "learning_rate": 4.657065064132948e-05, - "loss": 0.3831, + "learning_rate": 4.6570436444842306e-05, + "loss": 0.2837, "step": 2256000 }, { "epoch": 1.35, - "learning_rate": 4.656855067576892e-05, - "loss": 0.3772, + "learning_rate": 4.656833647928174e-05, + "loss": 0.2869, "step": 2256500 }, { "epoch": 1.35, - "learning_rate": 4.656645071020835e-05, - "loss": 0.3832, + "learning_rate": 4.656623651372117e-05, + "loss": 0.2895, "step": 2257000 }, { "epoch": 1.35, - "learning_rate": 4.6564350744647786e-05, - "loss": 0.3765, + "learning_rate": 4.6564140748091726e-05, + "loss": 0.2836, "step": 2257500 }, { "epoch": 1.35, - "learning_rate": 4.6562250779087227e-05, - "loss": 0.3736, + "learning_rate": 4.6562040782531167e-05, + "loss": 0.2859, "step": 2258000 }, { "epoch": 1.35, - "learning_rate": 4.656015081352666e-05, - "loss": 0.3868, + "learning_rate": 4.65599408169706e-05, + "loss": 0.2922, "step": 2258500 }, { "epoch": 1.35, - "learning_rate": 4.6558050847966094e-05, - "loss": 0.3737, + "learning_rate": 4.6557840851410033e-05, + "loss": 0.2836, "step": 2259000 }, { "epoch": 1.35, - "learning_rate": 4.655595088240553e-05, - "loss": 0.3817, + "learning_rate": 4.6555745085780594e-05, + "loss": 0.2854, "step": 2259500 }, { "epoch": 1.35, - "learning_rate": 4.655385091684496e-05, - "loss": 0.3733, + "learning_rate": 4.655364512022003e-05, + "loss": 0.2872, "step": 2260000 }, { "epoch": 1.36, - "learning_rate": 4.655175515121552e-05, - "loss": 0.3868, + "learning_rate": 4.655154515465946e-05, + "loss": 0.2932, "step": 2260500 }, { "epoch": 1.36, - "learning_rate": 4.6549655185654954e-05, - "loss": 0.3679, + "learning_rate": 4.65494451890989e-05, + "loss": 0.284, "step": 2261000 }, { "epoch": 1.36, - "learning_rate": 4.6547555220094394e-05, - "loss": 0.3763, + "learning_rate": 4.6547349423469455e-05, + "loss": 0.2908, "step": 2261500 }, { "epoch": 1.36, - "learning_rate": 4.654545525453382e-05, - "loss": 0.3706, + "learning_rate": 4.654524945790889e-05, + "loss": 0.2812, "step": 2262000 }, { "epoch": 1.36, - "learning_rate": 4.654335948890438e-05, - "loss": 0.3744, + "learning_rate": 4.654314949234832e-05, + "loss": 0.2848, "step": 2262500 }, { "epoch": 1.36, - "learning_rate": 4.654125952334382e-05, - "loss": 0.3709, + "learning_rate": 4.654104952678776e-05, + "loss": 0.2851, "step": 2263000 }, { "epoch": 1.36, - "learning_rate": 4.6539159557783255e-05, - "loss": 0.3789, + "learning_rate": 4.6538949561227195e-05, + "loss": 0.2903, "step": 2263500 }, { "epoch": 1.36, - "learning_rate": 4.653705959222269e-05, - "loss": 0.3855, + "learning_rate": 4.653684959566663e-05, + "loss": 0.2924, "step": 2264000 }, { "epoch": 1.36, - "learning_rate": 4.653495962666212e-05, - "loss": 0.3757, + "learning_rate": 4.653475383003718e-05, + "loss": 0.2843, "step": 2264500 }, { "epoch": 1.36, - "learning_rate": 4.6532859661101556e-05, - "loss": 0.3817, + "learning_rate": 4.653265386447662e-05, + "loss": 0.2838, "step": 2265000 }, { "epoch": 1.36, - "learning_rate": 4.653075969554099e-05, - "loss": 0.3774, + "learning_rate": 4.6530553898916056e-05, + "loss": 0.2883, "step": 2265500 }, { "epoch": 1.36, - "learning_rate": 4.652865972998043e-05, - "loss": 0.3726, + "learning_rate": 4.652845393335549e-05, + "loss": 0.2883, "step": 2266000 }, { "epoch": 1.36, - "learning_rate": 4.652656396435098e-05, - "loss": 0.3642, + "learning_rate": 4.652635396779493e-05, + "loss": 0.2817, "step": 2266500 }, { "epoch": 1.36, - "learning_rate": 4.652446819872154e-05, - "loss": 0.3721, + "learning_rate": 4.652425820216548e-05, + "loss": 0.279, "step": 2267000 }, { "epoch": 1.36, - "learning_rate": 4.652236823316098e-05, - "loss": 0.3866, + "learning_rate": 4.6522162436536044e-05, + "loss": 0.2911, "step": 2267500 }, { "epoch": 1.36, - "learning_rate": 4.652026826760041e-05, - "loss": 0.372, + "learning_rate": 4.652006247097547e-05, + "loss": 0.2831, "step": 2268000 }, { "epoch": 1.36, - "learning_rate": 4.651816830203985e-05, - "loss": 0.38, + "learning_rate": 4.651796250541491e-05, + "loss": 0.2865, "step": 2268500 }, { "epoch": 1.36, - "learning_rate": 4.651606833647928e-05, - "loss": 0.3748, + "learning_rate": 4.6515862539854344e-05, + "loss": 0.2847, "step": 2269000 }, { "epoch": 1.36, - "learning_rate": 4.651396837091872e-05, - "loss": 0.3869, + "learning_rate": 4.651376257429378e-05, + "loss": 0.2933, "step": 2269500 }, { "epoch": 1.36, - "learning_rate": 4.651186840535815e-05, - "loss": 0.3762, + "learning_rate": 4.651166260873322e-05, + "loss": 0.2885, "step": 2270000 }, { "epoch": 1.36, - "learning_rate": 4.6509768439797584e-05, - "loss": 0.375, + "learning_rate": 4.650956264317265e-05, + "loss": 0.2876, "step": 2270500 }, { "epoch": 1.36, - "learning_rate": 4.6507672674168145e-05, - "loss": 0.3801, + "learning_rate": 4.6507462677612085e-05, + "loss": 0.2855, "step": 2271000 }, { "epoch": 1.36, - "learning_rate": 4.65055769085387e-05, - "loss": 0.3807, + "learning_rate": 4.6505362712051525e-05, + "loss": 0.2848, "step": 2271500 }, { "epoch": 1.36, - "learning_rate": 4.650347694297814e-05, - "loss": 0.3742, + "learning_rate": 4.650326694642208e-05, + "loss": 0.2815, "step": 2272000 }, { "epoch": 1.36, - "learning_rate": 4.650137697741757e-05, - "loss": 0.3795, + "learning_rate": 4.650116698086151e-05, + "loss": 0.2879, "step": 2272500 }, { "epoch": 1.36, - "learning_rate": 4.6499277011857005e-05, - "loss": 0.3908, + "learning_rate": 4.6499067015300945e-05, + "loss": 0.2923, "step": 2273000 }, { "epoch": 1.36, - "learning_rate": 4.6497177046296446e-05, - "loss": 0.3818, + "learning_rate": 4.6496967049740385e-05, + "loss": 0.2917, "step": 2273500 }, { "epoch": 1.36, - "learning_rate": 4.6495081280667e-05, - "loss": 0.3811, + "learning_rate": 4.649486708417982e-05, + "loss": 0.285, "step": 2274000 }, { "epoch": 1.36, - "learning_rate": 4.649298131510643e-05, - "loss": 0.3758, + "learning_rate": 4.649277131855037e-05, + "loss": 0.2872, "step": 2274500 }, { "epoch": 1.36, - "learning_rate": 4.6490881349545866e-05, - "loss": 0.3782, + "learning_rate": 4.649067135298981e-05, + "loss": 0.2832, "step": 2275000 }, { "epoch": 1.36, - "learning_rate": 4.6488781383985306e-05, - "loss": 0.3732, + "learning_rate": 4.6488571387429246e-05, + "loss": 0.2862, "step": 2275500 }, { "epoch": 1.36, - "learning_rate": 4.648668141842473e-05, - "loss": 0.3847, + "learning_rate": 4.648647142186868e-05, + "loss": 0.2897, "step": 2276000 }, { "epoch": 1.36, - "learning_rate": 4.648458145286417e-05, - "loss": 0.3838, + "learning_rate": 4.648437145630812e-05, + "loss": 0.2849, "step": 2276500 }, { "epoch": 1.37, - "learning_rate": 4.6482485687234734e-05, - "loss": 0.3803, + "learning_rate": 4.648227149074755e-05, + "loss": 0.2854, "step": 2277000 }, { "epoch": 1.37, - "learning_rate": 4.648038572167417e-05, - "loss": 0.3694, + "learning_rate": 4.648017152518699e-05, + "loss": 0.2765, "step": 2277500 }, { "epoch": 1.37, - "learning_rate": 4.64782857561136e-05, - "loss": 0.3763, + "learning_rate": 4.647807575955754e-05, + "loss": 0.2852, "step": 2278000 }, { "epoch": 1.37, - "learning_rate": 4.6476185790553034e-05, - "loss": 0.3793, + "learning_rate": 4.647597579399698e-05, + "loss": 0.2821, "step": 2278500 }, { "epoch": 1.37, - "learning_rate": 4.647408582499247e-05, - "loss": 0.3796, + "learning_rate": 4.6473875828436414e-05, + "loss": 0.291, "step": 2279000 }, { "epoch": 1.37, - "learning_rate": 4.64719858594319e-05, - "loss": 0.376, + "learning_rate": 4.647177586287585e-05, + "loss": 0.2835, "step": 2279500 }, { "epoch": 1.37, - "learning_rate": 4.646988589387134e-05, - "loss": 0.3848, + "learning_rate": 4.64696800972464e-05, + "loss": 0.2875, "step": 2280000 }, { "epoch": 1.37, - "learning_rate": 4.6467785928310774e-05, - "loss": 0.3791, + "learning_rate": 4.646758013168584e-05, + "loss": 0.2871, "step": 2280500 }, { "epoch": 1.37, - "learning_rate": 4.646569016268133e-05, - "loss": 0.3723, + "learning_rate": 4.6465480166125275e-05, + "loss": 0.289, "step": 2281000 }, { "epoch": 1.37, - "learning_rate": 4.646359439705189e-05, - "loss": 0.3806, + "learning_rate": 4.646338020056471e-05, + "loss": 0.2845, "step": 2281500 }, { "epoch": 1.37, - "learning_rate": 4.646149443149132e-05, - "loss": 0.3771, + "learning_rate": 4.646128023500415e-05, + "loss": 0.2844, "step": 2282000 }, { "epoch": 1.37, - "learning_rate": 4.645939446593076e-05, - "loss": 0.3771, + "learning_rate": 4.645918026944358e-05, + "loss": 0.2818, "step": 2282500 }, { "epoch": 1.37, - "learning_rate": 4.6457294500370196e-05, - "loss": 0.3744, + "learning_rate": 4.6457080303883015e-05, + "loss": 0.2834, "step": 2283000 }, { "epoch": 1.37, - "learning_rate": 4.645519873474075e-05, - "loss": 0.3771, + "learning_rate": 4.645498033832245e-05, + "loss": 0.2851, "step": 2283500 }, { "epoch": 1.37, - "learning_rate": 4.645309876918019e-05, - "loss": 0.3792, + "learning_rate": 4.645288037276188e-05, + "loss": 0.2825, "step": 2284000 }, { "epoch": 1.37, - "learning_rate": 4.645099880361962e-05, - "loss": 0.3856, + "learning_rate": 4.645078460713244e-05, + "loss": 0.2887, "step": 2284500 }, { "epoch": 1.37, - "learning_rate": 4.6448898838059056e-05, - "loss": 0.3796, + "learning_rate": 4.644868464157188e-05, + "loss": 0.2847, "step": 2285000 }, { "epoch": 1.37, - "learning_rate": 4.644679887249849e-05, - "loss": 0.366, + "learning_rate": 4.644658467601131e-05, + "loss": 0.2803, "step": 2285500 }, { "epoch": 1.37, - "learning_rate": 4.644469890693792e-05, - "loss": 0.3806, + "learning_rate": 4.644448471045074e-05, + "loss": 0.2863, "step": 2286000 }, { "epoch": 1.37, - "learning_rate": 4.644259894137736e-05, - "loss": 0.3825, + "learning_rate": 4.644238474489018e-05, + "loss": 0.2861, "step": 2286500 }, { "epoch": 1.37, - "learning_rate": 4.644050317574792e-05, - "loss": 0.3826, + "learning_rate": 4.6440288979260744e-05, + "loss": 0.2878, "step": 2287000 }, { "epoch": 1.37, - "learning_rate": 4.643840741011847e-05, - "loss": 0.3727, + "learning_rate": 4.643818901370018e-05, + "loss": 0.2807, "step": 2287500 }, { "epoch": 1.37, - "learning_rate": 4.643630744455791e-05, - "loss": 0.3722, + "learning_rate": 4.6436089048139604e-05, + "loss": 0.2832, "step": 2288000 }, { "epoch": 1.37, - "learning_rate": 4.6434207478997344e-05, - "loss": 0.3869, + "learning_rate": 4.6433989082579044e-05, + "loss": 0.2938, "step": 2288500 }, { "epoch": 1.37, - "learning_rate": 4.643210751343678e-05, - "loss": 0.3746, + "learning_rate": 4.643188911701848e-05, + "loss": 0.2837, "step": 2289000 }, { "epoch": 1.37, - "learning_rate": 4.643000754787622e-05, - "loss": 0.3728, + "learning_rate": 4.642979335138904e-05, + "loss": 0.2829, "step": 2289500 }, { "epoch": 1.37, - "learning_rate": 4.642790758231565e-05, - "loss": 0.3672, + "learning_rate": 4.642769338582848e-05, + "loss": 0.2808, "step": 2290000 }, { "epoch": 1.37, - "learning_rate": 4.6425807616755085e-05, - "loss": 0.3789, + "learning_rate": 4.6425593420267905e-05, + "loss": 0.2889, "step": 2290500 }, { "epoch": 1.37, - "learning_rate": 4.642370765119452e-05, - "loss": 0.3827, + "learning_rate": 4.642349345470734e-05, + "loss": 0.2851, "step": 2291000 }, { "epoch": 1.37, - "learning_rate": 4.642160768563395e-05, - "loss": 0.3773, + "learning_rate": 4.64213976890779e-05, + "loss": 0.2883, "step": 2291500 }, { "epoch": 1.37, - "learning_rate": 4.641950772007339e-05, - "loss": 0.3734, + "learning_rate": 4.641929772351734e-05, + "loss": 0.2879, "step": 2292000 }, { "epoch": 1.37, - "learning_rate": 4.6417407754512826e-05, - "loss": 0.3707, + "learning_rate": 4.6417197757956765e-05, + "loss": 0.2811, "step": 2292500 }, { "epoch": 1.37, - "learning_rate": 4.641530778895226e-05, - "loss": 0.3742, + "learning_rate": 4.6415101992327326e-05, + "loss": 0.2818, "step": 2293000 }, { "epoch": 1.38, - "learning_rate": 4.641321202332281e-05, - "loss": 0.3851, + "learning_rate": 4.641300202676676e-05, + "loss": 0.2918, "step": 2293500 }, { "epoch": 1.38, - "learning_rate": 4.641111625769337e-05, - "loss": 0.3646, + "learning_rate": 4.64109020612062e-05, + "loss": 0.2771, "step": 2294000 }, { "epoch": 1.38, - "learning_rate": 4.640901629213281e-05, - "loss": 0.3824, + "learning_rate": 4.640880209564563e-05, + "loss": 0.2822, "step": 2294500 }, { "epoch": 1.38, - "learning_rate": 4.6406916326572247e-05, - "loss": 0.3807, + "learning_rate": 4.640670213008506e-05, + "loss": 0.2883, "step": 2295000 }, { "epoch": 1.38, - "learning_rate": 4.640481636101167e-05, - "loss": 0.3773, + "learning_rate": 4.64046021645245e-05, + "loss": 0.2827, "step": 2295500 }, { "epoch": 1.38, - "learning_rate": 4.6402716395451114e-05, - "loss": 0.3777, + "learning_rate": 4.640250219896393e-05, + "loss": 0.2857, "step": 2296000 }, { "epoch": 1.38, - "learning_rate": 4.640061642989055e-05, - "loss": 0.3756, + "learning_rate": 4.6400402233403374e-05, + "loss": 0.2881, "step": 2296500 }, { "epoch": 1.38, - "learning_rate": 4.639851646432998e-05, - "loss": 0.3754, + "learning_rate": 4.639831066770505e-05, + "loss": 0.2809, "step": 2297000 }, { "epoch": 1.38, - "learning_rate": 4.639642489863166e-05, - "loss": 0.3858, + "learning_rate": 4.639621070214449e-05, + "loss": 0.29, "step": 2297500 }, { "epoch": 1.38, - "learning_rate": 4.63943249330711e-05, - "loss": 0.3843, + "learning_rate": 4.639411073658392e-05, + "loss": 0.2888, "step": 2298000 }, { "epoch": 1.38, - "learning_rate": 4.6392224967510535e-05, - "loss": 0.3801, + "learning_rate": 4.6392010771023354e-05, + "loss": 0.2847, "step": 2298500 }, { "epoch": 1.38, - "learning_rate": 4.639012500194997e-05, - "loss": 0.3806, + "learning_rate": 4.6389910805462795e-05, + "loss": 0.286, "step": 2299000 }, { "epoch": 1.38, - "learning_rate": 4.638802503638941e-05, - "loss": 0.3761, + "learning_rate": 4.638781083990223e-05, + "loss": 0.2851, "step": 2299500 }, { "epoch": 1.38, - "learning_rate": 4.6385925070828835e-05, - "loss": 0.3765, + "learning_rate": 4.638571507427278e-05, + "loss": 0.2843, "step": 2300000 }, { "epoch": 1.38, - "eval_loss": 0.3626013994216919, - "eval_runtime": 1121.9655, - "eval_samples_per_second": 469.462, - "eval_steps_per_second": 78.244, + "eval_loss": 0.2611614763736725, + "eval_runtime": 1456.651, + "eval_samples_per_second": 361.597, + "eval_steps_per_second": 60.266, "step": 2300000 }, { "epoch": 1.38, - "learning_rate": 4.638382510526827e-05, - "loss": 0.3778, + "learning_rate": 4.6383615108712215e-05, + "loss": 0.2845, "step": 2300500 }, { "epoch": 1.38, - "learning_rate": 4.638172513970771e-05, - "loss": 0.3866, + "learning_rate": 4.6381515143151655e-05, + "loss": 0.2904, "step": 2301000 }, { "epoch": 1.38, - "learning_rate": 4.637962517414714e-05, - "loss": 0.3719, + "learning_rate": 4.637941517759109e-05, + "loss": 0.2838, "step": 2301500 }, { "epoch": 1.38, - "learning_rate": 4.6377525208586576e-05, - "loss": 0.3781, + "learning_rate": 4.637731521203052e-05, + "loss": 0.2844, "step": 2302000 }, { "epoch": 1.38, - "learning_rate": 4.6375425243026016e-05, - "loss": 0.3785, + "learning_rate": 4.6375215246469956e-05, + "loss": 0.2844, "step": 2302500 }, { "epoch": 1.38, - "learning_rate": 4.637332527746545e-05, - "loss": 0.3824, + "learning_rate": 4.637311528090939e-05, + "loss": 0.2884, "step": 2303000 }, { "epoch": 1.38, - "learning_rate": 4.637122531190488e-05, - "loss": 0.3773, + "learning_rate": 4.637101531534883e-05, + "loss": 0.2901, "step": 2303500 }, { "epoch": 1.38, - "learning_rate": 4.6369129546275436e-05, - "loss": 0.3772, + "learning_rate": 4.636891534978826e-05, + "loss": 0.2839, "step": 2304000 }, { "epoch": 1.38, - "learning_rate": 4.6367033780646e-05, - "loss": 0.3715, + "learning_rate": 4.6366819584158817e-05, + "loss": 0.2859, "step": 2304500 }, { "epoch": 1.38, - "learning_rate": 4.636493381508543e-05, - "loss": 0.3798, + "learning_rate": 4.636471961859825e-05, + "loss": 0.2822, "step": 2305000 }, { "epoch": 1.38, - "learning_rate": 4.6362833849524864e-05, - "loss": 0.3759, + "learning_rate": 4.636261965303769e-05, + "loss": 0.2847, "step": 2305500 }, { "epoch": 1.38, - "learning_rate": 4.6360733883964304e-05, - "loss": 0.3819, + "learning_rate": 4.6360519687477124e-05, + "loss": 0.2823, "step": 2306000 }, { "epoch": 1.38, - "learning_rate": 4.635863391840374e-05, - "loss": 0.3759, + "learning_rate": 4.635841972191656e-05, + "loss": 0.2829, "step": 2306500 }, { "epoch": 1.38, - "learning_rate": 4.635653395284317e-05, - "loss": 0.3714, + "learning_rate": 4.635632395628711e-05, + "loss": 0.2815, "step": 2307000 }, { "epoch": 1.38, - "learning_rate": 4.635443398728261e-05, - "loss": 0.3808, + "learning_rate": 4.635422819065767e-05, + "loss": 0.2904, "step": 2307500 }, { "epoch": 1.38, - "learning_rate": 4.6352334021722044e-05, - "loss": 0.368, + "learning_rate": 4.635212822509711e-05, + "loss": 0.2864, "step": 2308000 }, { "epoch": 1.38, - "learning_rate": 4.63502382560926e-05, - "loss": 0.3759, + "learning_rate": 4.6350028259536545e-05, + "loss": 0.2873, "step": 2308500 }, { "epoch": 1.38, - "learning_rate": 4.634813829053203e-05, - "loss": 0.373, + "learning_rate": 4.634792829397598e-05, + "loss": 0.2875, "step": 2309000 }, { "epoch": 1.38, - "learning_rate": 4.634603832497147e-05, - "loss": 0.3701, + "learning_rate": 4.634582832841541e-05, + "loss": 0.2874, "step": 2309500 }, { "epoch": 1.38, - "learning_rate": 4.6343938359410905e-05, - "loss": 0.3761, + "learning_rate": 4.6343728362854845e-05, + "loss": 0.281, "step": 2310000 }, { "epoch": 1.39, - "learning_rate": 4.634184259378146e-05, - "loss": 0.3793, + "learning_rate": 4.6341628397294285e-05, + "loss": 0.2879, "step": 2310500 }, { "epoch": 1.39, - "learning_rate": 4.633974682815202e-05, - "loss": 0.3739, + "learning_rate": 4.633952843173372e-05, + "loss": 0.2767, "step": 2311000 }, { "epoch": 1.39, - "learning_rate": 4.633764686259145e-05, - "loss": 0.3809, + "learning_rate": 4.633743266610427e-05, + "loss": 0.2891, "step": 2311500 }, { "epoch": 1.39, - "learning_rate": 4.6335546897030886e-05, - "loss": 0.3795, + "learning_rate": 4.633533690047483e-05, + "loss": 0.2848, "step": 2312000 }, { "epoch": 1.39, - "learning_rate": 4.633344693147032e-05, - "loss": 0.3809, + "learning_rate": 4.6333236934914266e-05, + "loss": 0.2898, "step": 2312500 }, { "epoch": 1.39, - "learning_rate": 4.633134696590976e-05, - "loss": 0.381, + "learning_rate": 4.6331136969353706e-05, + "loss": 0.2843, "step": 2313000 }, { "epoch": 1.39, - "learning_rate": 4.632924700034919e-05, - "loss": 0.3692, + "learning_rate": 4.632903700379314e-05, + "loss": 0.2812, "step": 2313500 }, { "epoch": 1.39, - "learning_rate": 4.632714703478863e-05, - "loss": 0.3791, + "learning_rate": 4.6326937038232567e-05, + "loss": 0.2906, "step": 2314000 }, { "epoch": 1.39, - "learning_rate": 4.632504706922807e-05, - "loss": 0.3735, + "learning_rate": 4.632483707267201e-05, + "loss": 0.2873, "step": 2314500 }, { "epoch": 1.39, - "learning_rate": 4.63229471036675e-05, - "loss": 0.3838, + "learning_rate": 4.632273710711144e-05, + "loss": 0.2927, "step": 2315000 }, { "epoch": 1.39, - "learning_rate": 4.6320847138106934e-05, - "loss": 0.3797, + "learning_rate": 4.6320637141550874e-05, + "loss": 0.2861, "step": 2315500 }, { "epoch": 1.39, - "learning_rate": 4.6318747172546374e-05, - "loss": 0.3814, + "learning_rate": 4.6318541375921434e-05, + "loss": 0.2853, "step": 2316000 }, { "epoch": 1.39, - "learning_rate": 4.631665140691693e-05, - "loss": 0.3833, + "learning_rate": 4.631644141036087e-05, + "loss": 0.2916, "step": 2316500 }, { "epoch": 1.39, - "learning_rate": 4.631455564128748e-05, - "loss": 0.376, + "learning_rate": 4.63143414448003e-05, + "loss": 0.2894, "step": 2317000 }, { "epoch": 1.39, - "learning_rate": 4.6312455675726915e-05, - "loss": 0.3752, + "learning_rate": 4.631224147923974e-05, + "loss": 0.2811, "step": 2317500 }, { "epoch": 1.39, - "learning_rate": 4.631035571016635e-05, - "loss": 0.3762, + "learning_rate": 4.6310141513679175e-05, + "loss": 0.2854, "step": 2318000 }, { "epoch": 1.39, - "learning_rate": 4.630825574460579e-05, - "loss": 0.377, + "learning_rate": 4.6308049947980855e-05, + "loss": 0.2836, "step": 2318500 }, { "epoch": 1.39, - "learning_rate": 4.630615577904522e-05, - "loss": 0.3795, + "learning_rate": 4.630594998242029e-05, + "loss": 0.2859, "step": 2319000 }, { "epoch": 1.39, - "learning_rate": 4.6304055813484655e-05, - "loss": 0.3706, + "learning_rate": 4.630385001685972e-05, + "loss": 0.283, "step": 2319500 }, { "epoch": 1.39, - "learning_rate": 4.6301960047855216e-05, - "loss": 0.3789, + "learning_rate": 4.630175005129916e-05, + "loss": 0.2866, "step": 2320000 }, { "epoch": 1.39, - "learning_rate": 4.629986008229465e-05, - "loss": 0.3819, + "learning_rate": 4.6299650085738596e-05, + "loss": 0.2918, "step": 2320500 }, { "epoch": 1.39, - "learning_rate": 4.629776011673408e-05, - "loss": 0.3767, + "learning_rate": 4.629755012017803e-05, + "loss": 0.287, "step": 2321000 }, { "epoch": 1.39, - "learning_rate": 4.629566015117352e-05, - "loss": 0.381, + "learning_rate": 4.629545015461746e-05, + "loss": 0.2896, "step": 2321500 }, { "epoch": 1.39, - "learning_rate": 4.6293560185612956e-05, - "loss": 0.3687, + "learning_rate": 4.6293350189056896e-05, + "loss": 0.2815, "step": 2322000 }, { "epoch": 1.39, - "learning_rate": 4.629146022005239e-05, - "loss": 0.3649, + "learning_rate": 4.629125022349633e-05, + "loss": 0.2782, "step": 2322500 }, { "epoch": 1.39, - "learning_rate": 4.628936025449183e-05, - "loss": 0.3782, + "learning_rate": 4.628915025793577e-05, + "loss": 0.2884, "step": 2323000 }, { "epoch": 1.39, - "learning_rate": 4.628726028893126e-05, - "loss": 0.37, + "learning_rate": 4.62870502923752e-05, + "loss": 0.2874, "step": 2323500 }, { "epoch": 1.39, - "learning_rate": 4.62851603233707e-05, - "loss": 0.3851, + "learning_rate": 4.628495032681464e-05, + "loss": 0.2874, "step": 2324000 }, { "epoch": 1.39, - "learning_rate": 4.628306035781013e-05, - "loss": 0.3795, + "learning_rate": 4.62828545611852e-05, + "loss": 0.2824, "step": 2324500 }, { "epoch": 1.39, - "learning_rate": 4.628096459218069e-05, - "loss": 0.3756, + "learning_rate": 4.628075459562463e-05, + "loss": 0.2884, "step": 2325000 }, { "epoch": 1.39, - "learning_rate": 4.6278864626620124e-05, - "loss": 0.3737, + "learning_rate": 4.6278654630064064e-05, + "loss": 0.2822, "step": 2325500 }, { "epoch": 1.39, - "learning_rate": 4.627676466105956e-05, - "loss": 0.3736, + "learning_rate": 4.6276554664503504e-05, + "loss": 0.2829, "step": 2326000 }, { "epoch": 1.39, - "learning_rate": 4.6274664695499e-05, - "loss": 0.3805, + "learning_rate": 4.627445889887406e-05, + "loss": 0.2918, "step": 2326500 }, { "epoch": 1.4, - "learning_rate": 4.6272564729938424e-05, - "loss": 0.3666, + "learning_rate": 4.627235893331349e-05, + "loss": 0.2791, "step": 2327000 }, { "epoch": 1.4, - "learning_rate": 4.627046476437786e-05, - "loss": 0.3725, + "learning_rate": 4.627026316768405e-05, + "loss": 0.2839, "step": 2327500 }, { "epoch": 1.4, - "learning_rate": 4.62683647988173e-05, - "loss": 0.3868, + "learning_rate": 4.6268163202123485e-05, + "loss": 0.2877, "step": 2328000 }, { "epoch": 1.4, - "learning_rate": 4.626626483325673e-05, - "loss": 0.3879, + "learning_rate": 4.626606323656292e-05, + "loss": 0.2947, "step": 2328500 }, { "epoch": 1.4, - "learning_rate": 4.626416906762729e-05, - "loss": 0.3778, + "learning_rate": 4.626396327100235e-05, + "loss": 0.288, "step": 2329000 }, { "epoch": 1.4, - "learning_rate": 4.6262069102066725e-05, - "loss": 0.3798, + "learning_rate": 4.6261863305441785e-05, + "loss": 0.2848, "step": 2329500 }, { "epoch": 1.4, - "learning_rate": 4.6259973336437286e-05, - "loss": 0.3773, + "learning_rate": 4.6259763339881226e-05, + "loss": 0.2896, "step": 2330000 }, { "epoch": 1.4, - "learning_rate": 4.625787337087672e-05, - "loss": 0.3769, + "learning_rate": 4.625766337432066e-05, + "loss": 0.285, "step": 2330500 }, { "epoch": 1.4, - "learning_rate": 4.625577340531615e-05, - "loss": 0.3772, + "learning_rate": 4.625556340876009e-05, + "loss": 0.2822, "step": 2331000 }, { "epoch": 1.4, - "learning_rate": 4.625367343975559e-05, - "loss": 0.3806, + "learning_rate": 4.625346764313065e-05, + "loss": 0.2934, "step": 2331500 }, { "epoch": 1.4, - "learning_rate": 4.6251577674126146e-05, - "loss": 0.3756, + "learning_rate": 4.6251367677570086e-05, + "loss": 0.2837, "step": 2332000 }, { "epoch": 1.4, - "learning_rate": 4.624947770856558e-05, - "loss": 0.3784, + "learning_rate": 4.624926771200952e-05, + "loss": 0.2882, "step": 2332500 }, { "epoch": 1.4, - "learning_rate": 4.6247377743005013e-05, - "loss": 0.3854, + "learning_rate": 4.624716774644896e-05, + "loss": 0.2858, "step": 2333000 }, { "epoch": 1.4, - "learning_rate": 4.6245277777444454e-05, - "loss": 0.3778, + "learning_rate": 4.6245067780888394e-05, + "loss": 0.2895, "step": 2333500 }, { "epoch": 1.4, - "learning_rate": 4.624317781188388e-05, - "loss": 0.384, + "learning_rate": 4.624297201525895e-05, + "loss": 0.2882, "step": 2334000 }, { "epoch": 1.4, - "learning_rate": 4.6241077846323314e-05, - "loss": 0.3766, + "learning_rate": 4.624087624962951e-05, + "loss": 0.2873, "step": 2334500 }, { "epoch": 1.4, - "learning_rate": 4.623898208069388e-05, - "loss": 0.3799, + "learning_rate": 4.623877628406894e-05, + "loss": 0.2885, "step": 2335000 }, { "epoch": 1.4, - "learning_rate": 4.6236882115133314e-05, - "loss": 0.3668, + "learning_rate": 4.6236676318508374e-05, + "loss": 0.2782, "step": 2335500 }, { "epoch": 1.4, - "learning_rate": 4.623478214957275e-05, - "loss": 0.3845, + "learning_rate": 4.623457635294781e-05, + "loss": 0.2883, "step": 2336000 }, { "epoch": 1.4, - "learning_rate": 4.623268218401218e-05, - "loss": 0.3697, + "learning_rate": 4.623247638738724e-05, + "loss": 0.2806, "step": 2336500 }, { "epoch": 1.4, - "learning_rate": 4.6230582218451615e-05, - "loss": 0.3693, + "learning_rate": 4.623037642182668e-05, + "loss": 0.2793, "step": 2337000 }, { "epoch": 1.4, - "learning_rate": 4.622848225289105e-05, - "loss": 0.3766, + "learning_rate": 4.622828065619724e-05, + "loss": 0.2873, "step": 2337500 }, { "epoch": 1.4, - "learning_rate": 4.622638228733049e-05, - "loss": 0.3715, + "learning_rate": 4.622618069063667e-05, + "loss": 0.2833, "step": 2338000 }, { "epoch": 1.4, - "learning_rate": 4.622428232176992e-05, - "loss": 0.381, + "learning_rate": 4.622408072507611e-05, + "loss": 0.2928, "step": 2338500 }, { "epoch": 1.4, - "learning_rate": 4.6222182356209355e-05, - "loss": 0.3769, + "learning_rate": 4.622198075951554e-05, + "loss": 0.2862, "step": 2339000 }, { "epoch": 1.4, - "learning_rate": 4.622008659057991e-05, - "loss": 0.3851, + "learning_rate": 4.6219880793954976e-05, + "loss": 0.2921, "step": 2339500 }, { "epoch": 1.4, - "learning_rate": 4.621798662501935e-05, - "loss": 0.3799, + "learning_rate": 4.6217785028325536e-05, + "loss": 0.2856, "step": 2340000 }, { "epoch": 1.4, - "learning_rate": 4.621588665945878e-05, - "loss": 0.3822, + "learning_rate": 4.621568506276497e-05, + "loss": 0.2841, "step": 2340500 }, { "epoch": 1.4, - "learning_rate": 4.6213786693898216e-05, - "loss": 0.3767, + "learning_rate": 4.62135850972044e-05, + "loss": 0.2869, "step": 2341000 }, { "epoch": 1.4, - "learning_rate": 4.6211686728337656e-05, - "loss": 0.3789, + "learning_rate": 4.6211485131643837e-05, + "loss": 0.2857, "step": 2341500 }, { "epoch": 1.4, - "learning_rate": 4.620958676277709e-05, - "loss": 0.3796, + "learning_rate": 4.620938516608328e-05, + "loss": 0.2881, "step": 2342000 }, { "epoch": 1.4, - "learning_rate": 4.620748679721652e-05, - "loss": 0.3741, + "learning_rate": 4.620728520052271e-05, + "loss": 0.2892, "step": 2342500 }, { "epoch": 1.4, - "learning_rate": 4.6205386831655963e-05, - "loss": 0.3828, + "learning_rate": 4.6205185234962144e-05, + "loss": 0.2912, "step": 2343000 }, { "epoch": 1.41, - "learning_rate": 4.620329106602652e-05, - "loss": 0.3774, + "learning_rate": 4.62030894693327e-05, + "loss": 0.2876, "step": 2343500 }, { "epoch": 1.41, - "learning_rate": 4.620119110046595e-05, - "loss": 0.3855, + "learning_rate": 4.620098950377214e-05, + "loss": 0.2948, "step": 2344000 }, { "epoch": 1.41, - "learning_rate": 4.619909113490539e-05, - "loss": 0.3811, + "learning_rate": 4.619888953821157e-05, + "loss": 0.2884, "step": 2344500 }, { "epoch": 1.41, - "learning_rate": 4.6196991169344824e-05, - "loss": 0.3728, + "learning_rate": 4.6196789572651004e-05, + "loss": 0.2801, "step": 2345000 }, { "epoch": 1.41, - "learning_rate": 4.619489540371538e-05, - "loss": 0.3779, + "learning_rate": 4.6194689607090445e-05, + "loss": 0.2788, "step": 2345500 }, { "epoch": 1.41, - "learning_rate": 4.619279543815481e-05, - "loss": 0.3727, + "learning_rate": 4.619258964152988e-05, + "loss": 0.2799, "step": 2346000 }, { "epoch": 1.41, - "learning_rate": 4.6190699672525365e-05, - "loss": 0.3852, + "learning_rate": 4.619048967596931e-05, + "loss": 0.2914, "step": 2346500 }, { "epoch": 1.41, - "learning_rate": 4.6188599706964805e-05, - "loss": 0.3761, + "learning_rate": 4.618838971040875e-05, + "loss": 0.2855, "step": 2347000 }, { "epoch": 1.41, - "learning_rate": 4.618649974140424e-05, - "loss": 0.3745, + "learning_rate": 4.6186293944779305e-05, + "loss": 0.2855, "step": 2347500 }, { "epoch": 1.41, - "learning_rate": 4.618439977584367e-05, - "loss": 0.3683, + "learning_rate": 4.618419397921874e-05, + "loss": 0.285, "step": 2348000 }, { "epoch": 1.41, - "learning_rate": 4.618229981028311e-05, - "loss": 0.3811, + "learning_rate": 4.618209821358929e-05, + "loss": 0.2859, "step": 2348500 }, { "epoch": 1.41, - "learning_rate": 4.6180199844722546e-05, - "loss": 0.3735, + "learning_rate": 4.617999824802873e-05, + "loss": 0.2816, "step": 2349000 }, { "epoch": 1.41, - "learning_rate": 4.61781040790931e-05, - "loss": 0.3803, + "learning_rate": 4.6177898282468166e-05, + "loss": 0.2851, "step": 2349500 }, { "epoch": 1.41, - "learning_rate": 4.617600411353254e-05, - "loss": 0.3794, + "learning_rate": 4.61757983169076e-05, + "loss": 0.2836, "step": 2350000 }, { "epoch": 1.41, - "learning_rate": 4.617390414797197e-05, - "loss": 0.3725, + "learning_rate": 4.617369835134704e-05, + "loss": 0.2831, "step": 2350500 }, { "epoch": 1.41, - "learning_rate": 4.6171804182411406e-05, - "loss": 0.3714, + "learning_rate": 4.617159838578647e-05, + "loss": 0.2869, "step": 2351000 }, { "epoch": 1.41, - "learning_rate": 4.616970421685085e-05, - "loss": 0.3674, + "learning_rate": 4.616950262015703e-05, + "loss": 0.2806, "step": 2351500 }, { "epoch": 1.41, - "learning_rate": 4.616760425129028e-05, - "loss": 0.3729, + "learning_rate": 4.616740265459646e-05, + "loss": 0.286, "step": 2352000 }, { "epoch": 1.41, - "learning_rate": 4.6165504285729714e-05, - "loss": 0.3726, + "learning_rate": 4.61653026890359e-05, + "loss": 0.2877, "step": 2352500 }, { "epoch": 1.41, - "learning_rate": 4.6163404320169154e-05, - "loss": 0.3834, + "learning_rate": 4.6163202723475334e-05, + "loss": 0.2884, "step": 2353000 }, { "epoch": 1.41, - "learning_rate": 4.616130855453971e-05, - "loss": 0.3924, + "learning_rate": 4.616110695784589e-05, + "loss": 0.2923, "step": 2353500 }, { "epoch": 1.41, - "learning_rate": 4.615920858897914e-05, - "loss": 0.3708, + "learning_rate": 4.615900699228533e-05, + "loss": 0.2825, "step": 2354000 }, { "epoch": 1.41, - "learning_rate": 4.6157108623418574e-05, - "loss": 0.3685, + "learning_rate": 4.615690702672476e-05, + "loss": 0.28, "step": 2354500 }, { "epoch": 1.41, - "learning_rate": 4.6155008657858015e-05, - "loss": 0.377, + "learning_rate": 4.6154807061164195e-05, + "loss": 0.2838, "step": 2355000 }, { "epoch": 1.41, - "learning_rate": 4.615291289222857e-05, - "loss": 0.3686, + "learning_rate": 4.6152707095603635e-05, + "loss": 0.2806, "step": 2355500 }, { "epoch": 1.41, - "learning_rate": 4.6150812926668e-05, - "loss": 0.3763, + "learning_rate": 4.615060713004307e-05, + "loss": 0.2841, "step": 2356000 }, { "epoch": 1.41, - "learning_rate": 4.6148712961107435e-05, - "loss": 0.3719, + "learning_rate": 4.61485071644825e-05, + "loss": 0.2827, "step": 2356500 }, { "epoch": 1.41, - "learning_rate": 4.6146612995546875e-05, - "loss": 0.3728, + "learning_rate": 4.614640719892194e-05, + "loss": 0.2934, "step": 2357000 }, { "epoch": 1.41, - "learning_rate": 4.614451722991743e-05, - "loss": 0.3762, + "learning_rate": 4.6144307233361376e-05, + "loss": 0.2816, "step": 2357500 }, { "epoch": 1.41, - "learning_rate": 4.614241726435686e-05, - "loss": 0.3649, + "learning_rate": 4.614221146773193e-05, + "loss": 0.2767, "step": 2358000 }, { "epoch": 1.41, - "learning_rate": 4.61403172987963e-05, - "loss": 0.3684, + "learning_rate": 4.614011150217136e-05, + "loss": 0.2828, "step": 2358500 }, { "epoch": 1.41, - "learning_rate": 4.6138217333235736e-05, - "loss": 0.3685, + "learning_rate": 4.61380115366108e-05, + "loss": 0.2779, "step": 2359000 }, { "epoch": 1.41, - "learning_rate": 4.613612156760629e-05, - "loss": 0.3703, + "learning_rate": 4.6135911571050236e-05, + "loss": 0.2802, "step": 2359500 }, { "epoch": 1.41, - "learning_rate": 4.613402580197685e-05, - "loss": 0.3864, + "learning_rate": 4.613381580542079e-05, + "loss": 0.2935, "step": 2360000 }, { "epoch": 1.42, - "learning_rate": 4.6131925836416277e-05, - "loss": 0.3838, + "learning_rate": 4.613171583986023e-05, + "loss": 0.2857, "step": 2360500 }, { "epoch": 1.42, - "learning_rate": 4.612982587085572e-05, - "loss": 0.3677, + "learning_rate": 4.6129620074230784e-05, + "loss": 0.2837, "step": 2361000 }, { "epoch": 1.42, - "learning_rate": 4.612772590529515e-05, - "loss": 0.3701, + "learning_rate": 4.612752010867022e-05, + "loss": 0.2837, "step": 2361500 }, { "epoch": 1.42, - "learning_rate": 4.6125625939734584e-05, - "loss": 0.3728, + "learning_rate": 4.612542014310965e-05, + "loss": 0.2857, "step": 2362000 }, { "epoch": 1.42, - "learning_rate": 4.6123525974174024e-05, - "loss": 0.3869, + "learning_rate": 4.612332017754909e-05, + "loss": 0.289, "step": 2362500 }, { "epoch": 1.42, - "learning_rate": 4.612142600861346e-05, - "loss": 0.385, + "learning_rate": 4.6121220211988524e-05, + "loss": 0.2887, "step": 2363000 }, { "epoch": 1.42, - "learning_rate": 4.611933024298401e-05, - "loss": 0.3703, + "learning_rate": 4.611912024642796e-05, + "loss": 0.2816, "step": 2363500 }, { "epoch": 1.42, - "learning_rate": 4.611723027742345e-05, - "loss": 0.3759, + "learning_rate": 4.61170202808674e-05, + "loss": 0.2816, "step": 2364000 }, { "epoch": 1.42, - "learning_rate": 4.6115130311862885e-05, - "loss": 0.3699, + "learning_rate": 4.611492451523795e-05, + "loss": 0.2821, "step": 2364500 }, { "epoch": 1.42, - "learning_rate": 4.611303034630232e-05, - "loss": 0.3802, + "learning_rate": 4.6112824549677385e-05, + "loss": 0.2882, "step": 2365000 }, { "epoch": 1.42, - "learning_rate": 4.611093038074176e-05, - "loss": 0.3807, + "learning_rate": 4.611072458411682e-05, + "loss": 0.2842, "step": 2365500 }, { "epoch": 1.42, - "learning_rate": 4.610883041518119e-05, - "loss": 0.3767, + "learning_rate": 4.610862461855626e-05, + "loss": 0.2853, "step": 2366000 }, { "epoch": 1.42, - "learning_rate": 4.6106734649551745e-05, - "loss": 0.3722, + "learning_rate": 4.610652885292681e-05, + "loss": 0.2847, "step": 2366500 }, { "epoch": 1.42, - "learning_rate": 4.610463468399118e-05, - "loss": 0.3811, + "learning_rate": 4.6104428887366246e-05, + "loss": 0.2839, "step": 2367000 }, { "epoch": 1.42, - "learning_rate": 4.610253471843062e-05, - "loss": 0.3752, + "learning_rate": 4.6102328921805686e-05, + "loss": 0.2851, "step": 2367500 }, { "epoch": 1.42, - "learning_rate": 4.610043475287005e-05, - "loss": 0.3788, + "learning_rate": 4.610022895624512e-05, + "loss": 0.2848, "step": 2368000 }, { "epoch": 1.42, - "learning_rate": 4.6098338987240606e-05, - "loss": 0.3752, + "learning_rate": 4.609812899068455e-05, + "loss": 0.2863, "step": 2368500 }, { "epoch": 1.42, - "learning_rate": 4.609623902168004e-05, - "loss": 0.368, + "learning_rate": 4.609602902512399e-05, + "loss": 0.2834, "step": 2369000 }, { "epoch": 1.42, - "learning_rate": 4.609413905611948e-05, - "loss": 0.3734, + "learning_rate": 4.609392905956342e-05, + "loss": 0.2824, "step": 2369500 }, { "epoch": 1.42, - "learning_rate": 4.609203909055891e-05, - "loss": 0.3715, + "learning_rate": 4.609182909400285e-05, + "loss": 0.2858, "step": 2370000 }, { "epoch": 1.42, - "learning_rate": 4.608993912499835e-05, - "loss": 0.3803, + "learning_rate": 4.6089729128442293e-05, + "loss": 0.2808, "step": 2370500 }, { "epoch": 1.42, - "learning_rate": 4.608783915943779e-05, - "loss": 0.3797, + "learning_rate": 4.608762916288173e-05, + "loss": 0.2827, "step": 2371000 }, { "epoch": 1.42, - "learning_rate": 4.608574339380834e-05, - "loss": 0.3817, + "learning_rate": 4.608552919732116e-05, + "loss": 0.2857, "step": 2371500 }, { "epoch": 1.42, - "learning_rate": 4.6083643428247774e-05, - "loss": 0.374, + "learning_rate": 4.60834292317606e-05, + "loss": 0.2837, "step": 2372000 }, { "epoch": 1.42, - "learning_rate": 4.6081543462687214e-05, - "loss": 0.3852, + "learning_rate": 4.6081329266200034e-05, + "loss": 0.2909, "step": 2372500 }, { "epoch": 1.42, - "learning_rate": 4.607944349712665e-05, - "loss": 0.3706, + "learning_rate": 4.607923350057059e-05, + "loss": 0.2893, "step": 2373000 }, { "epoch": 1.42, - "learning_rate": 4.607734353156608e-05, - "loss": 0.3845, + "learning_rate": 4.607713353501002e-05, + "loss": 0.2903, "step": 2373500 }, { "epoch": 1.42, - "learning_rate": 4.6075247765936635e-05, - "loss": 0.3783, + "learning_rate": 4.607503356944946e-05, + "loss": 0.2891, "step": 2374000 }, { "epoch": 1.42, - "learning_rate": 4.6073147800376075e-05, - "loss": 0.3811, + "learning_rate": 4.6072933603888895e-05, + "loss": 0.2876, "step": 2374500 }, { "epoch": 1.42, - "learning_rate": 4.607104783481551e-05, - "loss": 0.3888, + "learning_rate": 4.607083363832833e-05, + "loss": 0.2911, "step": 2375000 }, { "epoch": 1.42, - "learning_rate": 4.606894786925494e-05, - "loss": 0.3749, + "learning_rate": 4.606873787269889e-05, + "loss": 0.2861, "step": 2375500 }, { "epoch": 1.42, - "learning_rate": 4.6066852103625495e-05, - "loss": 0.3798, + "learning_rate": 4.606664210706945e-05, + "loss": 0.2844, "step": 2376000 }, { "epoch": 1.42, - "learning_rate": 4.6064752138064936e-05, - "loss": 0.3672, + "learning_rate": 4.606454214150888e-05, + "loss": 0.2797, "step": 2376500 }, { "epoch": 1.43, - "learning_rate": 4.606265217250437e-05, - "loss": 0.3832, + "learning_rate": 4.606244217594831e-05, + "loss": 0.2898, "step": 2377000 }, { "epoch": 1.43, - "learning_rate": 4.60605522069438e-05, - "loss": 0.3724, + "learning_rate": 4.606034221038775e-05, + "loss": 0.2854, "step": 2377500 }, { "epoch": 1.43, - "learning_rate": 4.605845224138324e-05, - "loss": 0.3619, + "learning_rate": 4.605824224482718e-05, + "loss": 0.2811, "step": 2378000 }, { "epoch": 1.43, - "learning_rate": 4.6056352275822676e-05, - "loss": 0.3763, + "learning_rate": 4.6056142279266616e-05, + "loss": 0.2809, "step": 2378500 }, { "epoch": 1.43, - "learning_rate": 4.605425231026211e-05, - "loss": 0.3775, + "learning_rate": 4.6054042313706057e-05, + "loss": 0.2853, "step": 2379000 }, { "epoch": 1.43, - "learning_rate": 4.605215234470155e-05, - "loss": 0.3822, + "learning_rate": 4.605194654807661e-05, + "loss": 0.2864, "step": 2379500 }, { "epoch": 1.43, - "learning_rate": 4.6050056579072104e-05, - "loss": 0.3796, + "learning_rate": 4.6049846582516044e-05, + "loss": 0.2823, "step": 2380000 }, { "epoch": 1.43, - "learning_rate": 4.604795661351154e-05, - "loss": 0.3755, + "learning_rate": 4.604774661695548e-05, + "loss": 0.2829, "step": 2380500 }, { "epoch": 1.43, - "learning_rate": 4.604586084788209e-05, - "loss": 0.3783, + "learning_rate": 4.604564665139492e-05, + "loss": 0.2875, "step": 2381000 }, { "epoch": 1.43, - "learning_rate": 4.604376088232153e-05, - "loss": 0.3732, + "learning_rate": 4.604355088576547e-05, + "loss": 0.2838, "step": 2381500 }, { "epoch": 1.43, - "learning_rate": 4.6041660916760964e-05, - "loss": 0.3743, + "learning_rate": 4.6041450920204904e-05, + "loss": 0.2765, "step": 2382000 }, { "epoch": 1.43, - "learning_rate": 4.60395609512004e-05, - "loss": 0.3695, + "learning_rate": 4.6039350954644345e-05, + "loss": 0.2828, "step": 2382500 }, { "epoch": 1.43, - "learning_rate": 4.603746098563984e-05, - "loss": 0.3755, + "learning_rate": 4.603725098908378e-05, + "loss": 0.2801, "step": 2383000 }, { "epoch": 1.43, - "learning_rate": 4.603536102007927e-05, - "loss": 0.3702, + "learning_rate": 4.603515102352321e-05, + "loss": 0.2823, "step": 2383500 }, { "epoch": 1.43, - "learning_rate": 4.6033261054518705e-05, - "loss": 0.3757, + "learning_rate": 4.603305105796265e-05, + "loss": 0.283, "step": 2384000 }, { "epoch": 1.43, - "learning_rate": 4.603116528888926e-05, - "loss": 0.3801, + "learning_rate": 4.6030951092402085e-05, + "loss": 0.287, "step": 2384500 }, { "epoch": 1.43, - "learning_rate": 4.60290653233287e-05, - "loss": 0.3708, + "learning_rate": 4.602885112684152e-05, + "loss": 0.2816, "step": 2385000 }, { "epoch": 1.43, - "learning_rate": 4.602696535776813e-05, - "loss": 0.3767, + "learning_rate": 4.602675536121207e-05, + "loss": 0.2832, "step": 2385500 }, { "epoch": 1.43, - "learning_rate": 4.6024865392207566e-05, - "loss": 0.3725, + "learning_rate": 4.602465539565151e-05, + "loss": 0.2833, "step": 2386000 }, { "epoch": 1.43, - "learning_rate": 4.6022765426647006e-05, - "loss": 0.3767, + "learning_rate": 4.6022555430090946e-05, + "loss": 0.282, "step": 2386500 }, { "epoch": 1.43, - "learning_rate": 4.602066546108644e-05, - "loss": 0.3728, + "learning_rate": 4.602045546453038e-05, + "loss": 0.281, "step": 2387000 }, { "epoch": 1.43, - "learning_rate": 4.601856549552587e-05, - "loss": 0.3682, + "learning_rate": 4.601835549896982e-05, + "loss": 0.2844, "step": 2387500 }, { "epoch": 1.43, - "learning_rate": 4.6016465529965306e-05, - "loss": 0.3769, + "learning_rate": 4.601625553340925e-05, + "loss": 0.2844, "step": 2388000 }, { "epoch": 1.43, - "learning_rate": 4.601436976433587e-05, - "loss": 0.3774, + "learning_rate": 4.6014155567848686e-05, + "loss": 0.2823, "step": 2388500 }, { "epoch": 1.43, - "learning_rate": 4.60122697987753e-05, - "loss": 0.3742, + "learning_rate": 4.601205980221924e-05, + "loss": 0.2844, "step": 2389000 }, { "epoch": 1.43, - "learning_rate": 4.6010169833214734e-05, - "loss": 0.3804, + "learning_rate": 4.600995983665868e-05, + "loss": 0.2874, "step": 2389500 }, { "epoch": 1.43, - "learning_rate": 4.600806986765417e-05, - "loss": 0.3785, + "learning_rate": 4.6007859871098114e-05, + "loss": 0.2835, "step": 2390000 }, { "epoch": 1.43, - "learning_rate": 4.60059699020936e-05, - "loss": 0.3702, + "learning_rate": 4.600575990553755e-05, + "loss": 0.2821, "step": 2390500 }, { "epoch": 1.43, - "learning_rate": 4.600386993653304e-05, - "loss": 0.3784, + "learning_rate": 4.600366413990811e-05, + "loss": 0.2867, "step": 2391000 }, { "epoch": 1.43, - "learning_rate": 4.60017741709036e-05, - "loss": 0.3719, + "learning_rate": 4.600156417434754e-05, + "loss": 0.2831, "step": 2391500 }, { "epoch": 1.43, - "learning_rate": 4.599967420534303e-05, - "loss": 0.3958, + "learning_rate": 4.5999464208786974e-05, + "loss": 0.2949, "step": 2392000 }, { "epoch": 1.43, - "learning_rate": 4.599757423978246e-05, - "loss": 0.3765, + "learning_rate": 4.5997364243226415e-05, + "loss": 0.282, "step": 2392500 }, { "epoch": 1.43, - "learning_rate": 4.59954742742219e-05, - "loss": 0.3765, + "learning_rate": 4.599526427766585e-05, + "loss": 0.2828, "step": 2393000 }, { "epoch": 1.44, - "learning_rate": 4.5993374308661335e-05, - "loss": 0.3809, + "learning_rate": 4.59931685120364e-05, + "loss": 0.2863, "step": 2393500 }, { "epoch": 1.44, - "learning_rate": 4.5991274343100775e-05, - "loss": 0.3735, + "learning_rate": 4.5991068546475835e-05, + "loss": 0.2847, "step": 2394000 }, { "epoch": 1.44, - "learning_rate": 4.598917857747133e-05, - "loss": 0.3684, + "learning_rate": 4.5988968580915275e-05, + "loss": 0.2841, "step": 2394500 }, { "epoch": 1.44, - "learning_rate": 4.598707861191076e-05, - "loss": 0.3748, + "learning_rate": 4.598687281528583e-05, + "loss": 0.2807, "step": 2395000 }, { "epoch": 1.44, - "learning_rate": 4.5984978646350196e-05, - "loss": 0.3826, + "learning_rate": 4.598477284972526e-05, + "loss": 0.2868, "step": 2395500 }, { "epoch": 1.44, - "learning_rate": 4.5982878680789636e-05, - "loss": 0.3766, + "learning_rate": 4.5982672884164696e-05, + "loss": 0.2854, "step": 2396000 }, { "epoch": 1.44, - "learning_rate": 4.5980782915160196e-05, - "loss": 0.3801, + "learning_rate": 4.5980572918604136e-05, + "loss": 0.2852, "step": 2396500 }, { "epoch": 1.44, - "learning_rate": 4.597868294959962e-05, - "loss": 0.3804, + "learning_rate": 4.597847295304357e-05, + "loss": 0.2883, "step": 2397000 }, { "epoch": 1.44, - "learning_rate": 4.5976582984039056e-05, - "loss": 0.3714, + "learning_rate": 4.5976372987483e-05, + "loss": 0.2798, "step": 2397500 }, { "epoch": 1.44, - "learning_rate": 4.5974483018478497e-05, - "loss": 0.3776, + "learning_rate": 4.597427302192244e-05, + "loss": 0.2846, "step": 2398000 }, { "epoch": 1.44, - "learning_rate": 4.597238305291793e-05, - "loss": 0.3815, + "learning_rate": 4.597217305636188e-05, + "loss": 0.2831, "step": 2398500 }, { "epoch": 1.44, - "learning_rate": 4.5970283087357363e-05, - "loss": 0.3768, + "learning_rate": 4.597007309080131e-05, + "loss": 0.2828, "step": 2399000 }, { "epoch": 1.44, - "learning_rate": 4.5968183121796804e-05, - "loss": 0.365, + "learning_rate": 4.596797732517187e-05, + "loss": 0.2843, "step": 2399500 }, { "epoch": 1.44, - "learning_rate": 4.596608735616736e-05, - "loss": 0.3779, + "learning_rate": 4.5965877359611304e-05, + "loss": 0.2828, "step": 2400000 }, { "epoch": 1.44, - "eval_loss": 0.3611328899860382, - "eval_runtime": 1123.5144, - "eval_samples_per_second": 468.815, - "eval_steps_per_second": 78.136, + "eval_loss": 0.25953343510627747, + "eval_runtime": 1454.488, + "eval_samples_per_second": 362.134, + "eval_steps_per_second": 60.356, "step": 2400000 }, { "epoch": 1.44, - "learning_rate": 4.596398739060679e-05, - "loss": 0.3885, + "learning_rate": 4.596377739405074e-05, + "loss": 0.2922, "step": 2400500 }, { "epoch": 1.44, - "learning_rate": 4.596188742504623e-05, - "loss": 0.3818, + "learning_rate": 4.596167742849018e-05, + "loss": 0.2815, "step": 2401000 }, { "epoch": 1.44, - "learning_rate": 4.5959787459485664e-05, - "loss": 0.3791, + "learning_rate": 4.5959577462929604e-05, + "loss": 0.2913, "step": 2401500 }, { "epoch": 1.44, - "learning_rate": 4.595769169385622e-05, - "loss": 0.3697, + "learning_rate": 4.595747749736904e-05, + "loss": 0.2798, "step": 2402000 }, { "epoch": 1.44, - "learning_rate": 4.595559172829565e-05, - "loss": 0.3775, + "learning_rate": 4.59553817317396e-05, + "loss": 0.2844, "step": 2402500 }, { "epoch": 1.44, - "learning_rate": 4.595349176273509e-05, - "loss": 0.3852, + "learning_rate": 4.595328176617904e-05, + "loss": 0.2889, "step": 2403000 }, { "epoch": 1.44, - "learning_rate": 4.5951391797174525e-05, - "loss": 0.3859, + "learning_rate": 4.595118180061847e-05, + "loss": 0.2885, "step": 2403500 }, { "epoch": 1.44, - "learning_rate": 4.594929603154508e-05, - "loss": 0.373, + "learning_rate": 4.59490818350579e-05, + "loss": 0.28, "step": 2404000 }, { "epoch": 1.44, - "learning_rate": 4.594719606598451e-05, - "loss": 0.3805, + "learning_rate": 4.594698186949734e-05, + "loss": 0.2818, "step": 2404500 }, { "epoch": 1.44, - "learning_rate": 4.594509610042395e-05, - "loss": 0.3867, + "learning_rate": 4.594489030379902e-05, + "loss": 0.2852, "step": 2405000 }, { "epoch": 1.44, - "learning_rate": 4.5942996134863386e-05, - "loss": 0.3654, + "learning_rate": 4.594279033823845e-05, + "loss": 0.2865, "step": 2405500 }, { "epoch": 1.44, - "learning_rate": 4.594089616930282e-05, - "loss": 0.3867, + "learning_rate": 4.5940690372677886e-05, + "loss": 0.2898, "step": 2406000 }, { "epoch": 1.44, - "learning_rate": 4.593879620374226e-05, - "loss": 0.3748, + "learning_rate": 4.593859460704844e-05, + "loss": 0.2842, "step": 2406500 }, { "epoch": 1.44, - "learning_rate": 4.593669623818169e-05, - "loss": 0.3799, + "learning_rate": 4.593649464148788e-05, + "loss": 0.2899, "step": 2407000 }, { "epoch": 1.44, - "learning_rate": 4.5934596272621127e-05, - "loss": 0.3745, + "learning_rate": 4.5934394675927313e-05, + "loss": 0.2846, "step": 2407500 }, { "epoch": 1.44, - "learning_rate": 4.593250050699169e-05, - "loss": 0.3772, + "learning_rate": 4.593229471036675e-05, + "loss": 0.2849, "step": 2408000 }, { "epoch": 1.44, - "learning_rate": 4.593040054143112e-05, - "loss": 0.3689, + "learning_rate": 4.593019474480619e-05, + "loss": 0.2775, "step": 2408500 }, { "epoch": 1.44, - "learning_rate": 4.5928300575870554e-05, - "loss": 0.3726, + "learning_rate": 4.592809477924562e-05, + "loss": 0.2764, "step": 2409000 }, { "epoch": 1.44, - "learning_rate": 4.5926200610309994e-05, - "loss": 0.372, + "learning_rate": 4.5925994813685054e-05, + "loss": 0.2892, "step": 2409500 }, { "epoch": 1.44, - "learning_rate": 4.592410064474943e-05, - "loss": 0.3816, + "learning_rate": 4.5923894848124494e-05, + "loss": 0.2902, "step": 2410000 }, { "epoch": 1.45, - "learning_rate": 4.592200067918886e-05, - "loss": 0.3767, + "learning_rate": 4.592179488256393e-05, + "loss": 0.2904, "step": 2410500 }, { "epoch": 1.45, - "learning_rate": 4.5919904913559415e-05, - "loss": 0.3733, + "learning_rate": 4.5919694917003354e-05, + "loss": 0.2801, "step": 2411000 }, { "epoch": 1.45, - "learning_rate": 4.5917804947998855e-05, - "loss": 0.3792, + "learning_rate": 4.5917594951442795e-05, + "loss": 0.283, "step": 2411500 }, { "epoch": 1.45, - "learning_rate": 4.591570498243829e-05, - "loss": 0.3719, + "learning_rate": 4.591549498588223e-05, + "loss": 0.2839, "step": 2412000 }, { "epoch": 1.45, - "learning_rate": 4.591360501687772e-05, - "loss": 0.3757, + "learning_rate": 4.591339502032166e-05, + "loss": 0.2875, "step": 2412500 }, { "epoch": 1.45, - "learning_rate": 4.5911509251248275e-05, - "loss": 0.3629, + "learning_rate": 4.591129925469222e-05, + "loss": 0.2838, "step": 2413000 }, { "epoch": 1.45, - "learning_rate": 4.5909409285687715e-05, - "loss": 0.3827, + "learning_rate": 4.5909199289131655e-05, + "loss": 0.2883, "step": 2413500 }, { "epoch": 1.45, - "learning_rate": 4.590730932012715e-05, - "loss": 0.3743, + "learning_rate": 4.590709932357109e-05, + "loss": 0.2816, "step": 2414000 }, { "epoch": 1.45, - "learning_rate": 4.590520935456658e-05, - "loss": 0.3722, + "learning_rate": 4.590499935801053e-05, + "loss": 0.2859, "step": 2414500 }, { "epoch": 1.45, - "learning_rate": 4.590310938900602e-05, - "loss": 0.3715, + "learning_rate": 4.590289939244996e-05, + "loss": 0.2832, "step": 2415000 }, { "epoch": 1.45, - "learning_rate": 4.5901009423445456e-05, - "loss": 0.3833, + "learning_rate": 4.5900799426889396e-05, + "loss": 0.2823, "step": 2415500 }, { "epoch": 1.45, - "learning_rate": 4.589890945788489e-05, - "loss": 0.372, + "learning_rate": 4.589870366125995e-05, + "loss": 0.2868, "step": 2416000 }, { "epoch": 1.45, - "learning_rate": 4.589680949232432e-05, - "loss": 0.378, + "learning_rate": 4.589660369569939e-05, + "loss": 0.2816, "step": 2416500 }, { "epoch": 1.45, - "learning_rate": 4.589471372669488e-05, - "loss": 0.3835, + "learning_rate": 4.589450373013882e-05, + "loss": 0.293, "step": 2417000 }, { "epoch": 1.45, - "learning_rate": 4.589261376113432e-05, - "loss": 0.3761, + "learning_rate": 4.589240376457826e-05, + "loss": 0.2865, "step": 2417500 }, { "epoch": 1.45, - "learning_rate": 4.589051379557376e-05, - "loss": 0.3729, + "learning_rate": 4.58903037990177e-05, + "loss": 0.2871, "step": 2418000 }, { "epoch": 1.45, - "learning_rate": 4.588841383001319e-05, - "loss": 0.3716, + "learning_rate": 4.588820803338825e-05, + "loss": 0.2848, "step": 2418500 }, { "epoch": 1.45, - "learning_rate": 4.588631386445262e-05, - "loss": 0.3785, + "learning_rate": 4.5886108067827684e-05, + "loss": 0.2826, "step": 2419000 }, { "epoch": 1.45, - "learning_rate": 4.588421389889206e-05, - "loss": 0.3836, + "learning_rate": 4.588400810226712e-05, + "loss": 0.2858, "step": 2419500 }, { "epoch": 1.45, - "learning_rate": 4.588211393333149e-05, - "loss": 0.3837, + "learning_rate": 4.588190813670656e-05, + "loss": 0.288, "step": 2420000 }, { "epoch": 1.45, - "learning_rate": 4.5880013967770924e-05, - "loss": 0.3707, + "learning_rate": 4.587980817114599e-05, + "loss": 0.2815, "step": 2420500 }, { "epoch": 1.45, - "learning_rate": 4.5877918202141485e-05, - "loss": 0.3638, + "learning_rate": 4.5877712405516545e-05, + "loss": 0.282, "step": 2421000 }, { "epoch": 1.45, - "learning_rate": 4.587582243651204e-05, - "loss": 0.3669, + "learning_rate": 4.5875612439955985e-05, + "loss": 0.2764, "step": 2421500 }, { "epoch": 1.45, - "learning_rate": 4.587372247095148e-05, - "loss": 0.383, + "learning_rate": 4.587351247439542e-05, + "loss": 0.2842, "step": 2422000 }, { "epoch": 1.45, - "learning_rate": 4.587162250539091e-05, - "loss": 0.3692, + "learning_rate": 4.587141250883485e-05, + "loss": 0.2829, "step": 2422500 }, { "epoch": 1.45, - "learning_rate": 4.5869522539830345e-05, - "loss": 0.3754, + "learning_rate": 4.5869316743205406e-05, + "loss": 0.2769, "step": 2423000 }, { "epoch": 1.45, - "learning_rate": 4.5867422574269786e-05, - "loss": 0.3693, + "learning_rate": 4.5867220977575966e-05, + "loss": 0.2825, "step": 2423500 }, { "epoch": 1.45, - "learning_rate": 4.586532260870921e-05, - "loss": 0.3652, + "learning_rate": 4.5865121012015406e-05, + "loss": 0.2801, "step": 2424000 }, { "epoch": 1.45, - "learning_rate": 4.586322264314865e-05, - "loss": 0.3701, + "learning_rate": 4.586302104645484e-05, + "loss": 0.2799, "step": 2424500 }, { "epoch": 1.45, - "learning_rate": 4.586112687751921e-05, - "loss": 0.3745, + "learning_rate": 4.586092108089427e-05, + "loss": 0.2866, "step": 2425000 }, { "epoch": 1.45, - "learning_rate": 4.5859026911958646e-05, - "loss": 0.3815, + "learning_rate": 4.5858821115333706e-05, + "loss": 0.2829, "step": 2425500 }, { "epoch": 1.45, - "learning_rate": 4.585692694639807e-05, - "loss": 0.3765, + "learning_rate": 4.585672534970427e-05, + "loss": 0.2851, "step": 2426000 }, { "epoch": 1.45, - "learning_rate": 4.585482698083751e-05, - "loss": 0.3766, + "learning_rate": 4.58546253841437e-05, + "loss": 0.2798, "step": 2426500 }, { "epoch": 1.46, - "learning_rate": 4.585272701527695e-05, - "loss": 0.3781, + "learning_rate": 4.585252541858314e-05, + "loss": 0.2853, "step": 2427000 }, { "epoch": 1.46, - "learning_rate": 4.585063124964751e-05, - "loss": 0.3748, + "learning_rate": 4.585042545302257e-05, + "loss": 0.2847, "step": 2427500 }, { "epoch": 1.46, - "learning_rate": 4.584853128408694e-05, - "loss": 0.3913, + "learning_rate": 4.5848325487462e-05, + "loss": 0.2927, "step": 2428000 }, { "epoch": 1.46, - "learning_rate": 4.5846431318526374e-05, - "loss": 0.3714, + "learning_rate": 4.584622552190144e-05, + "loss": 0.2825, "step": 2428500 }, { "epoch": 1.46, - "learning_rate": 4.584433135296581e-05, - "loss": 0.3786, + "learning_rate": 4.5844125556340874e-05, + "loss": 0.2835, "step": 2429000 }, { "epoch": 1.46, - "learning_rate": 4.584223138740524e-05, - "loss": 0.3782, + "learning_rate": 4.584202559078031e-05, + "loss": 0.2863, "step": 2429500 }, { "epoch": 1.46, - "learning_rate": 4.584013142184468e-05, - "loss": 0.3755, + "learning_rate": 4.583992982515086e-05, + "loss": 0.2812, "step": 2430000 }, { "epoch": 1.46, - "learning_rate": 4.5838031456284115e-05, - "loss": 0.3716, + "learning_rate": 4.58378298595903e-05, + "loss": 0.284, "step": 2430500 }, { "epoch": 1.46, - "learning_rate": 4.583593149072355e-05, - "loss": 0.3731, + "learning_rate": 4.5835729894029735e-05, + "loss": 0.2849, "step": 2431000 }, { "epoch": 1.46, - "learning_rate": 4.583383572509411e-05, - "loss": 0.3659, + "learning_rate": 4.583362992846917e-05, + "loss": 0.2826, "step": 2431500 }, { "epoch": 1.46, - "learning_rate": 4.583173575953354e-05, - "loss": 0.3701, + "learning_rate": 4.583153416283973e-05, + "loss": 0.2837, "step": 2432000 }, { "epoch": 1.46, - "learning_rate": 4.5829635793972975e-05, - "loss": 0.3727, + "learning_rate": 4.582943419727916e-05, + "loss": 0.2784, "step": 2432500 }, { "epoch": 1.46, - "learning_rate": 4.5827540028343536e-05, - "loss": 0.3871, + "learning_rate": 4.5827334231718596e-05, + "loss": 0.2904, "step": 2433000 }, { "epoch": 1.46, - "learning_rate": 4.582544006278297e-05, - "loss": 0.3674, + "learning_rate": 4.5825238466089156e-05, + "loss": 0.2806, "step": 2433500 }, { "epoch": 1.46, - "learning_rate": 4.58233400972224e-05, - "loss": 0.3808, + "learning_rate": 4.5823138500528596e-05, + "loss": 0.279, "step": 2434000 }, { "epoch": 1.46, - "learning_rate": 4.5821240131661836e-05, - "loss": 0.374, + "learning_rate": 4.582103853496803e-05, + "loss": 0.2834, "step": 2434500 }, { "epoch": 1.46, - "learning_rate": 4.5819140166101276e-05, - "loss": 0.376, + "learning_rate": 4.5818938569407457e-05, + "loss": 0.2829, "step": 2435000 }, { "epoch": 1.46, - "learning_rate": 4.581704020054071e-05, - "loss": 0.3691, + "learning_rate": 4.58168386038469e-05, + "loss": 0.2803, "step": 2435500 }, { "epoch": 1.46, - "learning_rate": 4.581494023498014e-05, - "loss": 0.3723, + "learning_rate": 4.581473863828633e-05, + "loss": 0.2809, "step": 2436000 }, { "epoch": 1.46, - "learning_rate": 4.58128444693507e-05, - "loss": 0.3722, + "learning_rate": 4.5812638672725764e-05, + "loss": 0.2891, "step": 2436500 }, { "epoch": 1.46, - "learning_rate": 4.581074450379014e-05, - "loss": 0.3839, + "learning_rate": 4.5810538707165204e-05, + "loss": 0.2895, "step": 2437000 }, { "epoch": 1.46, - "learning_rate": 4.580864453822957e-05, - "loss": 0.3722, + "learning_rate": 4.580844294153576e-05, + "loss": 0.2821, "step": 2437500 }, { "epoch": 1.46, - "learning_rate": 4.5806544572669004e-05, - "loss": 0.3801, + "learning_rate": 4.580634297597519e-05, + "loss": 0.2811, "step": 2438000 }, { "epoch": 1.46, - "learning_rate": 4.5804444607108444e-05, - "loss": 0.3708, + "learning_rate": 4.5804243010414624e-05, + "loss": 0.2772, "step": 2438500 }, { "epoch": 1.46, - "learning_rate": 4.580234464154788e-05, - "loss": 0.3756, + "learning_rate": 4.5802143044854065e-05, + "loss": 0.285, "step": 2439000 }, { "epoch": 1.46, - "learning_rate": 4.580024467598731e-05, - "loss": 0.3863, + "learning_rate": 4.58000430792935e-05, + "loss": 0.2851, "step": 2439500 }, { "epoch": 1.46, - "learning_rate": 4.579814891035787e-05, - "loss": 0.3638, + "learning_rate": 4.579794731366405e-05, + "loss": 0.2779, "step": 2440000 }, { "epoch": 1.46, - "learning_rate": 4.5796048944797305e-05, - "loss": 0.3745, + "learning_rate": 4.579584734810349e-05, + "loss": 0.289, "step": 2440500 }, { "epoch": 1.46, - "learning_rate": 4.579395317916786e-05, - "loss": 0.3825, + "learning_rate": 4.579375158247405e-05, + "loss": 0.2884, "step": 2441000 }, { "epoch": 1.46, - "learning_rate": 4.579185321360729e-05, - "loss": 0.3678, + "learning_rate": 4.5791651616913486e-05, + "loss": 0.2833, "step": 2441500 }, { "epoch": 1.46, - "learning_rate": 4.578975324804673e-05, - "loss": 0.3824, + "learning_rate": 4.578955165135291e-05, + "loss": 0.2818, "step": 2442000 }, { "epoch": 1.46, - "learning_rate": 4.5787653282486166e-05, - "loss": 0.3624, + "learning_rate": 4.578745168579235e-05, + "loss": 0.2785, "step": 2442500 }, { "epoch": 1.46, - "learning_rate": 4.57855533169256e-05, - "loss": 0.3696, + "learning_rate": 4.578535592016291e-05, + "loss": 0.2832, "step": 2443000 }, { "epoch": 1.46, - "learning_rate": 4.578345335136504e-05, - "loss": 0.3783, + "learning_rate": 4.5783255954602346e-05, + "loss": 0.2808, "step": 2443500 }, { "epoch": 1.47, - "learning_rate": 4.578135338580447e-05, - "loss": 0.3729, + "learning_rate": 4.578115598904178e-05, + "loss": 0.2777, "step": 2444000 }, { "epoch": 1.47, - "learning_rate": 4.5779253420243906e-05, - "loss": 0.3705, + "learning_rate": 4.577905602348121e-05, + "loss": 0.2805, "step": 2444500 }, { "epoch": 1.47, - "learning_rate": 4.5777153454683347e-05, - "loss": 0.3801, + "learning_rate": 4.577695605792065e-05, + "loss": 0.286, "step": 2445000 }, { "epoch": 1.47, - "learning_rate": 4.577505348912278e-05, - "loss": 0.3789, + "learning_rate": 4.577485609236008e-05, + "loss": 0.2903, "step": 2445500 }, { "epoch": 1.47, - "learning_rate": 4.577295352356221e-05, - "loss": 0.3779, + "learning_rate": 4.577275612679952e-05, + "loss": 0.2854, "step": 2446000 }, { "epoch": 1.47, - "learning_rate": 4.577085355800165e-05, - "loss": 0.375, + "learning_rate": 4.5770656161238954e-05, + "loss": 0.2834, "step": 2446500 }, { "epoch": 1.47, - "learning_rate": 4.576875779237221e-05, - "loss": 0.3703, + "learning_rate": 4.576856039560951e-05, + "loss": 0.2814, "step": 2447000 }, { "epoch": 1.47, - "learning_rate": 4.576666202674276e-05, - "loss": 0.3788, + "learning_rate": 4.576646043004895e-05, + "loss": 0.2853, "step": 2447500 }, { "epoch": 1.47, - "learning_rate": 4.5764562061182194e-05, - "loss": 0.3805, + "learning_rate": 4.576436046448838e-05, + "loss": 0.2834, "step": 2448000 }, { "epoch": 1.47, - "learning_rate": 4.5762462095621635e-05, - "loss": 0.3698, + "learning_rate": 4.5762260498927815e-05, + "loss": 0.2816, "step": 2448500 }, { "epoch": 1.47, - "learning_rate": 4.576036213006107e-05, - "loss": 0.3817, + "learning_rate": 4.5760160533367255e-05, + "loss": 0.2821, "step": 2449000 }, { "epoch": 1.47, - "learning_rate": 4.57582621645005e-05, - "loss": 0.3744, + "learning_rate": 4.575806476773781e-05, + "loss": 0.2827, "step": 2449500 }, { "epoch": 1.47, - "learning_rate": 4.575616219893994e-05, - "loss": 0.3743, + "learning_rate": 4.575596480217724e-05, + "loss": 0.2868, "step": 2450000 }, { "epoch": 1.47, - "learning_rate": 4.5754062233379375e-05, - "loss": 0.3727, + "learning_rate": 4.5753864836616675e-05, + "loss": 0.2805, "step": 2450500 }, { "epoch": 1.47, - "learning_rate": 4.57519622678188e-05, - "loss": 0.3732, + "learning_rate": 4.5751764871056116e-05, + "loss": 0.2806, "step": 2451000 }, { "epoch": 1.47, - "learning_rate": 4.574986650218936e-05, - "loss": 0.378, + "learning_rate": 4.574966910542667e-05, + "loss": 0.2866, "step": 2451500 }, { "epoch": 1.47, - "learning_rate": 4.574777493649104e-05, - "loss": 0.3808, + "learning_rate": 4.57475691398661e-05, + "loss": 0.2918, "step": 2452000 }, { "epoch": 1.47, - "learning_rate": 4.5745674970930476e-05, - "loss": 0.3746, + "learning_rate": 4.5745469174305536e-05, + "loss": 0.2836, "step": 2452500 }, { "epoch": 1.47, - "learning_rate": 4.574357500536991e-05, - "loss": 0.3781, + "learning_rate": 4.5743369208744976e-05, + "loss": 0.2846, "step": 2453000 }, { "epoch": 1.47, - "learning_rate": 4.574147503980934e-05, - "loss": 0.3776, + "learning_rate": 4.574126924318441e-05, + "loss": 0.2804, "step": 2453500 }, { "epoch": 1.47, - "learning_rate": 4.573937507424878e-05, - "loss": 0.3795, + "learning_rate": 4.573916927762384e-05, + "loss": 0.2867, "step": 2454000 }, { "epoch": 1.47, - "learning_rate": 4.573727510868822e-05, - "loss": 0.3825, + "learning_rate": 4.5737073511994404e-05, + "loss": 0.2857, "step": 2454500 }, { "epoch": 1.47, - "learning_rate": 4.573517514312765e-05, - "loss": 0.3811, + "learning_rate": 4.573497354643384e-05, + "loss": 0.282, "step": 2455000 }, { "epoch": 1.47, - "learning_rate": 4.573307517756709e-05, - "loss": 0.3819, + "learning_rate": 4.573287358087327e-05, + "loss": 0.2884, "step": 2455500 }, { "epoch": 1.47, - "learning_rate": 4.5730975212006524e-05, - "loss": 0.3831, + "learning_rate": 4.573077361531271e-05, + "loss": 0.2871, "step": 2456000 }, { "epoch": 1.47, - "learning_rate": 4.572887524644596e-05, - "loss": 0.3795, + "learning_rate": 4.5728677849683264e-05, + "loss": 0.2794, "step": 2456500 }, { "epoch": 1.47, - "learning_rate": 4.57267752808854e-05, - "loss": 0.3689, + "learning_rate": 4.57265778841227e-05, + "loss": 0.2807, "step": 2457000 }, { "epoch": 1.47, - "learning_rate": 4.572467951525595e-05, - "loss": 0.3724, + "learning_rate": 4.572447791856213e-05, + "loss": 0.2849, "step": 2457500 }, { "epoch": 1.47, - "learning_rate": 4.5722579549695385e-05, - "loss": 0.3648, + "learning_rate": 4.572237795300157e-05, + "loss": 0.2805, "step": 2458000 }, { "epoch": 1.47, - "learning_rate": 4.572047958413482e-05, - "loss": 0.3762, + "learning_rate": 4.5720277987441005e-05, + "loss": 0.2881, "step": 2458500 }, { "epoch": 1.47, - "learning_rate": 4.571837961857426e-05, - "loss": 0.3793, + "learning_rate": 4.571817802188044e-05, + "loss": 0.2846, "step": 2459000 }, { "epoch": 1.47, - "learning_rate": 4.571627965301369e-05, - "loss": 0.3811, + "learning_rate": 4.571608225625099e-05, + "loss": 0.2854, "step": 2459500 }, { "epoch": 1.47, - "learning_rate": 4.5714179687453125e-05, - "loss": 0.3821, + "learning_rate": 4.571398229069043e-05, + "loss": 0.2847, "step": 2460000 }, { "epoch": 1.48, - "learning_rate": 4.571207972189256e-05, - "loss": 0.3771, + "learning_rate": 4.5711882325129866e-05, + "loss": 0.2867, "step": 2460500 }, { "epoch": 1.48, - "learning_rate": 4.570998395626312e-05, - "loss": 0.3827, + "learning_rate": 4.57097823595693e-05, + "loss": 0.2873, "step": 2461000 }, { "epoch": 1.48, - "learning_rate": 4.570788399070255e-05, - "loss": 0.3853, + "learning_rate": 4.570768659393986e-05, + "loss": 0.286, "step": 2461500 }, { "epoch": 1.48, - "learning_rate": 4.570578402514199e-05, - "loss": 0.376, + "learning_rate": 4.570558662837929e-05, + "loss": 0.2843, "step": 2462000 }, { "epoch": 1.48, - "learning_rate": 4.570368405958142e-05, - "loss": 0.3655, + "learning_rate": 4.5703486662818726e-05, + "loss": 0.2757, "step": 2462500 }, { "epoch": 1.48, - "learning_rate": 4.570158409402085e-05, - "loss": 0.3741, + "learning_rate": 4.570139089718929e-05, + "loss": 0.2852, "step": 2463000 }, { "epoch": 1.48, - "learning_rate": 4.569948832839141e-05, - "loss": 0.3754, + "learning_rate": 4.569929093162872e-05, + "loss": 0.2835, "step": 2463500 }, { "epoch": 1.48, - "learning_rate": 4.5697388362830853e-05, - "loss": 0.3761, + "learning_rate": 4.5697190966068154e-05, + "loss": 0.2897, "step": 2464000 }, { "epoch": 1.48, - "learning_rate": 4.569528839727029e-05, - "loss": 0.3776, + "learning_rate": 4.569509100050759e-05, + "loss": 0.2866, "step": 2464500 }, { "epoch": 1.48, - "learning_rate": 4.5693188431709714e-05, - "loss": 0.3612, + "learning_rate": 4.569299103494703e-05, + "loss": 0.2799, "step": 2465000 }, { "epoch": 1.48, - "learning_rate": 4.5691088466149154e-05, - "loss": 0.371, + "learning_rate": 4.569089106938646e-05, + "loss": 0.2839, "step": 2465500 }, { "epoch": 1.48, - "learning_rate": 4.568898850058859e-05, - "loss": 0.3716, + "learning_rate": 4.5688791103825894e-05, + "loss": 0.2831, "step": 2466000 }, { "epoch": 1.48, - "learning_rate": 4.568688853502802e-05, - "loss": 0.3732, + "learning_rate": 4.5686691138265335e-05, + "loss": 0.277, "step": 2466500 }, { "epoch": 1.48, - "learning_rate": 4.568478856946746e-05, - "loss": 0.379, + "learning_rate": 4.568459957256701e-05, + "loss": 0.2856, "step": 2467000 }, { "epoch": 1.48, - "learning_rate": 4.5682692803838015e-05, - "loss": 0.3749, + "learning_rate": 4.568249960700645e-05, + "loss": 0.2817, "step": 2467500 }, { "epoch": 1.48, - "learning_rate": 4.568059283827745e-05, - "loss": 0.3764, + "learning_rate": 4.5680399641445875e-05, + "loss": 0.2848, "step": 2468000 }, { "epoch": 1.48, - "learning_rate": 4.567849707264801e-05, - "loss": 0.3827, + "learning_rate": 4.5678299675885315e-05, + "loss": 0.2857, "step": 2468500 }, { "epoch": 1.48, - "learning_rate": 4.567639710708745e-05, - "loss": 0.3728, + "learning_rate": 4.567619971032475e-05, + "loss": 0.2831, "step": 2469000 }, { "epoch": 1.48, - "learning_rate": 4.567429714152688e-05, - "loss": 0.3799, + "learning_rate": 4.567410394469531e-05, + "loss": 0.2853, "step": 2469500 }, { "epoch": 1.48, - "learning_rate": 4.5672201375897436e-05, - "loss": 0.3853, + "learning_rate": 4.567200397913474e-05, + "loss": 0.2858, "step": 2470000 }, { "epoch": 1.48, - "learning_rate": 4.567010141033687e-05, - "loss": 0.3929, + "learning_rate": 4.5669904013574176e-05, + "loss": 0.292, "step": 2470500 }, { "epoch": 1.48, - "learning_rate": 4.566800144477631e-05, - "loss": 0.3737, + "learning_rate": 4.566780404801361e-05, + "loss": 0.2835, "step": 2471000 }, { "epoch": 1.48, - "learning_rate": 4.566590147921574e-05, - "loss": 0.3766, + "learning_rate": 4.566570408245304e-05, + "loss": 0.2795, "step": 2471500 }, { "epoch": 1.48, - "learning_rate": 4.5663801513655176e-05, - "loss": 0.3791, + "learning_rate": 4.566360411689248e-05, + "loss": 0.2847, "step": 2472000 }, { "epoch": 1.48, - "learning_rate": 4.566170154809461e-05, - "loss": 0.3747, + "learning_rate": 4.566150415133192e-05, + "loss": 0.2856, "step": 2472500 }, { "epoch": 1.48, - "learning_rate": 4.565960158253404e-05, - "loss": 0.3875, + "learning_rate": 4.565940418577135e-05, + "loss": 0.2866, "step": 2473000 }, { "epoch": 1.48, - "learning_rate": 4.5657505816904604e-05, - "loss": 0.3848, + "learning_rate": 4.5657308420141904e-05, + "loss": 0.2905, "step": 2473500 }, { "epoch": 1.48, - "learning_rate": 4.565540585134404e-05, - "loss": 0.3748, + "learning_rate": 4.5655212654512464e-05, + "loss": 0.2831, "step": 2474000 }, { "epoch": 1.48, - "learning_rate": 4.565330588578347e-05, - "loss": 0.3876, + "learning_rate": 4.5653112688951904e-05, + "loss": 0.2891, "step": 2474500 }, { "epoch": 1.48, - "learning_rate": 4.5651205920222904e-05, - "loss": 0.361, + "learning_rate": 4.565101272339134e-05, + "loss": 0.2785, "step": 2475000 }, { "epoch": 1.48, - "learning_rate": 4.5649105954662344e-05, - "loss": 0.3833, + "learning_rate": 4.564891275783077e-05, + "loss": 0.2834, "step": 2475500 }, { "epoch": 1.48, - "learning_rate": 4.564700598910178e-05, - "loss": 0.3717, + "learning_rate": 4.5646812792270205e-05, + "loss": 0.2867, "step": 2476000 }, { "epoch": 1.48, - "learning_rate": 4.564490602354121e-05, - "loss": 0.3714, + "learning_rate": 4.564471282670964e-05, + "loss": 0.2829, "step": 2476500 }, { "epoch": 1.49, - "learning_rate": 4.564280605798065e-05, - "loss": 0.3602, + "learning_rate": 4.56426170610802e-05, + "loss": 0.2774, "step": 2477000 }, { "epoch": 1.49, - "learning_rate": 4.5640706092420085e-05, - "loss": 0.3673, + "learning_rate": 4.564051709551963e-05, + "loss": 0.284, "step": 2477500 }, { "epoch": 1.49, - "learning_rate": 4.563860612685952e-05, - "loss": 0.3694, + "learning_rate": 4.563842132989019e-05, + "loss": 0.2803, "step": 2478000 }, { "epoch": 1.49, - "learning_rate": 4.563650616129896e-05, - "loss": 0.373, + "learning_rate": 4.5636321364329626e-05, + "loss": 0.2852, "step": 2478500 }, { "epoch": 1.49, - "learning_rate": 4.563440619573839e-05, - "loss": 0.3738, + "learning_rate": 4.563422139876906e-05, + "loss": 0.279, "step": 2479000 }, { "epoch": 1.49, - "learning_rate": 4.5632310430108945e-05, - "loss": 0.3735, + "learning_rate": 4.56321214332085e-05, + "loss": 0.2849, "step": 2479500 }, { "epoch": 1.49, - "learning_rate": 4.563021046454838e-05, - "loss": 0.3781, + "learning_rate": 4.5630021467647926e-05, + "loss": 0.2847, "step": 2480000 }, { "epoch": 1.49, - "learning_rate": 4.562811049898782e-05, - "loss": 0.3787, + "learning_rate": 4.5627925702018487e-05, + "loss": 0.285, "step": 2480500 }, { "epoch": 1.49, - "learning_rate": 4.562601053342725e-05, - "loss": 0.3784, + "learning_rate": 4.562582573645792e-05, + "loss": 0.2862, "step": 2481000 }, { "epoch": 1.49, - "learning_rate": 4.5623910567866686e-05, - "loss": 0.3751, + "learning_rate": 4.562372577089736e-05, + "loss": 0.2826, "step": 2481500 }, { "epoch": 1.49, - "learning_rate": 4.5621810602306126e-05, - "loss": 0.3609, + "learning_rate": 4.5621625805336794e-05, + "loss": 0.2759, "step": 2482000 }, { "epoch": 1.49, - "learning_rate": 4.561971063674555e-05, - "loss": 0.3699, + "learning_rate": 4.561953003970735e-05, + "loss": 0.2824, "step": 2482500 }, { "epoch": 1.49, - "learning_rate": 4.561761487111611e-05, - "loss": 0.376, + "learning_rate": 4.561743007414679e-05, + "loss": 0.282, "step": 2483000 }, { "epoch": 1.49, - "learning_rate": 4.561551490555555e-05, - "loss": 0.3711, + "learning_rate": 4.561533010858622e-05, + "loss": 0.2796, "step": 2483500 }, { "epoch": 1.49, - "learning_rate": 4.561341493999499e-05, - "loss": 0.3786, + "learning_rate": 4.5613230143025654e-05, + "loss": 0.2882, "step": 2484000 }, { "epoch": 1.49, - "learning_rate": 4.561131497443442e-05, - "loss": 0.3674, + "learning_rate": 4.5611130177465095e-05, + "loss": 0.2784, "step": 2484500 }, { "epoch": 1.49, - "learning_rate": 4.5609215008873854e-05, - "loss": 0.3757, + "learning_rate": 4.560903021190452e-05, + "loss": 0.285, "step": 2485000 }, { "epoch": 1.49, - "learning_rate": 4.560711504331329e-05, - "loss": 0.3707, + "learning_rate": 4.5606930246343955e-05, + "loss": 0.283, "step": 2485500 }, { "epoch": 1.49, - "learning_rate": 4.560501927768385e-05, - "loss": 0.3698, + "learning_rate": 4.5604830280783395e-05, + "loss": 0.2833, "step": 2486000 }, { "epoch": 1.49, - "learning_rate": 4.560291931212328e-05, - "loss": 0.3767, + "learning_rate": 4.560273031522283e-05, + "loss": 0.2858, "step": 2486500 }, { "epoch": 1.49, - "learning_rate": 4.560081934656272e-05, - "loss": 0.3774, + "learning_rate": 4.560063034966226e-05, + "loss": 0.287, "step": 2487000 }, { "epoch": 1.49, - "learning_rate": 4.559871938100215e-05, - "loss": 0.3761, + "learning_rate": 4.55985303841017e-05, + "loss": 0.2856, "step": 2487500 }, { "epoch": 1.49, - "learning_rate": 4.559662361537271e-05, - "loss": 0.3703, + "learning_rate": 4.5596430418541136e-05, + "loss": 0.2834, "step": 2488000 }, { "epoch": 1.49, - "learning_rate": 4.559452784974326e-05, - "loss": 0.3709, + "learning_rate": 4.559433045298057e-05, + "loss": 0.2846, "step": 2488500 }, { "epoch": 1.49, - "learning_rate": 4.5592427884182696e-05, - "loss": 0.3854, + "learning_rate": 4.559223048742001e-05, + "loss": 0.2863, "step": 2489000 }, { "epoch": 1.49, - "learning_rate": 4.5590327918622136e-05, - "loss": 0.3776, + "learning_rate": 4.559013052185944e-05, + "loss": 0.2884, "step": 2489500 }, { "epoch": 1.49, - "learning_rate": 4.558822795306157e-05, - "loss": 0.3741, + "learning_rate": 4.5588030556298876e-05, + "loss": 0.2796, "step": 2490000 }, { "epoch": 1.49, - "learning_rate": 4.5586127987501e-05, - "loss": 0.3803, + "learning_rate": 4.558593059073831e-05, + "loss": 0.2848, "step": 2490500 }, { "epoch": 1.49, - "learning_rate": 4.558402802194044e-05, - "loss": 0.3597, + "learning_rate": 4.558383062517774e-05, + "loss": 0.2774, "step": 2491000 }, { "epoch": 1.49, - "learning_rate": 4.5581928056379876e-05, - "loss": 0.3805, + "learning_rate": 4.5581734859548304e-05, + "loss": 0.2857, "step": 2491500 }, { "epoch": 1.49, - "learning_rate": 4.557982809081931e-05, - "loss": 0.3714, + "learning_rate": 4.5579634893987744e-05, + "loss": 0.2827, "step": 2492000 }, { "epoch": 1.49, - "learning_rate": 4.557772812525874e-05, - "loss": 0.3802, + "learning_rate": 4.557753492842717e-05, + "loss": 0.2894, "step": 2492500 }, { "epoch": 1.49, - "learning_rate": 4.557562815969818e-05, - "loss": 0.3693, + "learning_rate": 4.5575434962866604e-05, + "loss": 0.284, "step": 2493000 }, { "epoch": 1.49, - "learning_rate": 4.557352819413762e-05, - "loss": 0.3766, + "learning_rate": 4.5573339197237164e-05, + "loss": 0.2861, "step": 2493500 }, { "epoch": 1.5, - "learning_rate": 4.557142822857705e-05, - "loss": 0.3819, + "learning_rate": 4.5571239231676605e-05, + "loss": 0.285, "step": 2494000 }, { "epoch": 1.5, - "learning_rate": 4.5569332462947604e-05, - "loss": 0.3753, + "learning_rate": 4.556913926611604e-05, + "loss": 0.2833, "step": 2494500 }, { "epoch": 1.5, - "learning_rate": 4.556723249738704e-05, - "loss": 0.3702, + "learning_rate": 4.5567039300555465e-05, + "loss": 0.2791, "step": 2495000 }, { "epoch": 1.5, - "learning_rate": 4.556513253182648e-05, - "loss": 0.3665, + "learning_rate": 4.5564939334994905e-05, + "loss": 0.2768, "step": 2495500 }, { "epoch": 1.5, - "learning_rate": 4.556303256626591e-05, - "loss": 0.3727, + "learning_rate": 4.5562843569365465e-05, + "loss": 0.2814, "step": 2496000 }, { "epoch": 1.5, - "learning_rate": 4.556093680063647e-05, - "loss": 0.3781, + "learning_rate": 4.55607436038049e-05, + "loss": 0.2862, "step": 2496500 }, { "epoch": 1.5, - "learning_rate": 4.5558841035007025e-05, - "loss": 0.3808, + "learning_rate": 4.555864363824433e-05, + "loss": 0.2858, "step": 2497000 }, { "epoch": 1.5, - "learning_rate": 4.555674106944646e-05, - "loss": 0.3794, + "learning_rate": 4.5556543672683766e-05, + "loss": 0.2858, "step": 2497500 }, { "epoch": 1.5, - "learning_rate": 4.55546411038859e-05, - "loss": 0.374, + "learning_rate": 4.55544437071232e-05, + "loss": 0.2856, "step": 2498000 }, { "epoch": 1.5, - "learning_rate": 4.555254113832533e-05, - "loss": 0.374, + "learning_rate": 4.555234374156264e-05, + "loss": 0.2813, "step": 2498500 }, { "epoch": 1.5, - "learning_rate": 4.5550441172764766e-05, - "loss": 0.3619, + "learning_rate": 4.55502479759332e-05, + "loss": 0.2793, "step": 2499000 }, { "epoch": 1.5, - "learning_rate": 4.55483412072042e-05, - "loss": 0.3731, + "learning_rate": 4.554814801037263e-05, + "loss": 0.285, "step": 2499500 }, { "epoch": 1.5, - "learning_rate": 4.554624124164363e-05, - "loss": 0.3674, + "learning_rate": 4.554604804481206e-05, + "loss": 0.2819, "step": 2500000 }, { "epoch": 1.5, - "eval_loss": 0.3600391149520874, - "eval_runtime": 1121.959, - "eval_samples_per_second": 469.465, - "eval_steps_per_second": 78.244, + "eval_loss": 0.25834259390830994, + "eval_runtime": 1455.0809, + "eval_samples_per_second": 361.987, + "eval_steps_per_second": 60.331, "step": 2500000 }, { "epoch": 1.5, - "learning_rate": 4.554414547601419e-05, - "loss": 0.3643, + "learning_rate": 4.55439480792515e-05, + "loss": 0.2824, "step": 2500500 }, { "epoch": 1.5, - "learning_rate": 4.554204551045363e-05, - "loss": 0.3667, + "learning_rate": 4.554185231362206e-05, + "loss": 0.2816, "step": 2501000 }, { "epoch": 1.5, - "learning_rate": 4.553994554489306e-05, - "loss": 0.3691, + "learning_rate": 4.5539752348061494e-05, + "loss": 0.2804, "step": 2501500 }, { "epoch": 1.5, - "learning_rate": 4.553784557933249e-05, - "loss": 0.3739, + "learning_rate": 4.553765238250093e-05, + "loss": 0.2838, "step": 2502000 }, { "epoch": 1.5, - "learning_rate": 4.5535749813703054e-05, - "loss": 0.3795, + "learning_rate": 4.553555241694036e-05, + "loss": 0.2893, "step": 2502500 }, { "epoch": 1.5, - "learning_rate": 4.5533649848142494e-05, - "loss": 0.3683, + "learning_rate": 4.553345665131092e-05, + "loss": 0.282, "step": 2503000 }, { "epoch": 1.5, - "learning_rate": 4.553154988258193e-05, - "loss": 0.3815, + "learning_rate": 4.5531356685750355e-05, + "loss": 0.2839, "step": 2503500 }, { "epoch": 1.5, - "learning_rate": 4.5529449917021354e-05, - "loss": 0.3759, + "learning_rate": 4.552925672018979e-05, + "loss": 0.2764, "step": 2504000 }, { "epoch": 1.5, - "learning_rate": 4.5527349951460794e-05, - "loss": 0.3702, + "learning_rate": 4.552715675462922e-05, + "loss": 0.2795, "step": 2504500 }, { "epoch": 1.5, - "learning_rate": 4.5525258385762475e-05, - "loss": 0.3709, + "learning_rate": 4.5525056789068655e-05, + "loss": 0.2839, "step": 2505000 }, { "epoch": 1.5, - "learning_rate": 4.552315842020191e-05, - "loss": 0.37, + "learning_rate": 4.5522965223370335e-05, + "loss": 0.2822, "step": 2505500 }, { "epoch": 1.5, - "learning_rate": 4.552105845464134e-05, - "loss": 0.3645, + "learning_rate": 4.552086525780977e-05, + "loss": 0.2784, "step": 2506000 }, { "epoch": 1.5, - "learning_rate": 4.551895848908078e-05, - "loss": 0.3758, + "learning_rate": 4.551876529224921e-05, + "loss": 0.2866, "step": 2506500 }, { "epoch": 1.5, - "learning_rate": 4.5516858523520215e-05, - "loss": 0.3763, + "learning_rate": 4.551666952661976e-05, + "loss": 0.2866, "step": 2507000 }, { "epoch": 1.5, - "learning_rate": 4.551475855795965e-05, - "loss": 0.3764, + "learning_rate": 4.5514569561059196e-05, + "loss": 0.2854, "step": 2507500 }, { "epoch": 1.5, - "learning_rate": 4.55126627923302e-05, - "loss": 0.3706, + "learning_rate": 4.551246959549863e-05, + "loss": 0.2844, "step": 2508000 }, { "epoch": 1.5, - "learning_rate": 4.551056282676964e-05, - "loss": 0.3665, + "learning_rate": 4.551036962993807e-05, + "loss": 0.2799, "step": 2508500 }, { "epoch": 1.5, - "learning_rate": 4.5508462861209076e-05, - "loss": 0.3815, + "learning_rate": 4.55082696643775e-05, + "loss": 0.2861, "step": 2509000 }, { "epoch": 1.5, - "learning_rate": 4.550636289564851e-05, - "loss": 0.3735, + "learning_rate": 4.550616969881694e-05, + "loss": 0.2864, "step": 2509500 }, { "epoch": 1.5, - "learning_rate": 4.550426293008795e-05, - "loss": 0.3733, + "learning_rate": 4.550406973325638e-05, + "loss": 0.2832, "step": 2510000 }, { "epoch": 1.51, - "learning_rate": 4.550216296452738e-05, - "loss": 0.3787, + "learning_rate": 4.550196976769581e-05, + "loss": 0.2822, "step": 2510500 }, { "epoch": 1.51, - "learning_rate": 4.550006299896681e-05, - "loss": 0.3795, + "learning_rate": 4.5499869802135244e-05, + "loss": 0.2877, "step": 2511000 }, { "epoch": 1.51, - "learning_rate": 4.549796303340625e-05, - "loss": 0.366, + "learning_rate": 4.5497769836574684e-05, + "loss": 0.2845, "step": 2511500 }, { "epoch": 1.51, - "learning_rate": 4.549586726777681e-05, - "loss": 0.3767, + "learning_rate": 4.549566987101411e-05, + "loss": 0.2859, "step": 2512000 }, { "epoch": 1.51, - "learning_rate": 4.5493767302216244e-05, - "loss": 0.3706, + "learning_rate": 4.549356990545355e-05, + "loss": 0.2827, "step": 2512500 }, { "epoch": 1.51, - "learning_rate": 4.5491667336655684e-05, - "loss": 0.3821, + "learning_rate": 4.549147413982411e-05, + "loss": 0.2883, "step": 2513000 }, { "epoch": 1.51, - "learning_rate": 4.548956737109511e-05, - "loss": 0.3747, + "learning_rate": 4.5489374174263545e-05, + "loss": 0.2761, "step": 2513500 }, { "epoch": 1.51, - "learning_rate": 4.5487467405534544e-05, - "loss": 0.3774, + "learning_rate": 4.548727420870297e-05, + "loss": 0.2802, "step": 2514000 }, { "epoch": 1.51, - "learning_rate": 4.5485371639905105e-05, - "loss": 0.3765, + "learning_rate": 4.548517424314241e-05, + "loss": 0.2886, "step": 2514500 }, { "epoch": 1.51, - "learning_rate": 4.5483271674344545e-05, - "loss": 0.3715, + "learning_rate": 4.548307847751297e-05, + "loss": 0.2815, "step": 2515000 }, { "epoch": 1.51, - "learning_rate": 4.548117170878398e-05, - "loss": 0.3746, + "learning_rate": 4.5480978511952406e-05, + "loss": 0.2783, "step": 2515500 }, { "epoch": 1.51, - "learning_rate": 4.5479071743223405e-05, - "loss": 0.3773, + "learning_rate": 4.547887854639184e-05, + "loss": 0.2826, "step": 2516000 }, { "epoch": 1.51, - "learning_rate": 4.5476971777662845e-05, - "loss": 0.3678, + "learning_rate": 4.547677858083127e-05, + "loss": 0.2795, "step": 2516500 }, { "epoch": 1.51, - "learning_rate": 4.547487181210228e-05, - "loss": 0.3549, + "learning_rate": 4.5474678615270706e-05, + "loss": 0.2779, "step": 2517000 }, { "epoch": 1.51, - "learning_rate": 4.547277604647284e-05, - "loss": 0.3744, + "learning_rate": 4.5472582849641266e-05, + "loss": 0.2889, "step": 2517500 }, { "epoch": 1.51, - "learning_rate": 4.547067608091227e-05, - "loss": 0.3642, + "learning_rate": 4.54704828840807e-05, + "loss": 0.2783, "step": 2518000 }, { "epoch": 1.51, - "learning_rate": 4.5468576115351706e-05, - "loss": 0.3758, + "learning_rate": 4.546838291852014e-05, + "loss": 0.2809, "step": 2518500 }, { "epoch": 1.51, - "learning_rate": 4.546647614979114e-05, - "loss": 0.3757, + "learning_rate": 4.546628295295957e-05, + "loss": 0.2916, "step": 2519000 }, { "epoch": 1.51, - "learning_rate": 4.546437618423058e-05, - "loss": 0.37, + "learning_rate": 4.546418718733013e-05, + "loss": 0.2833, "step": 2519500 }, { "epoch": 1.51, - "learning_rate": 4.546227621867001e-05, - "loss": 0.3679, + "learning_rate": 4.546208722176957e-05, + "loss": 0.2811, "step": 2520000 }, { "epoch": 1.51, - "learning_rate": 4.546017625310945e-05, - "loss": 0.3671, + "learning_rate": 4.5459987256209e-05, + "loss": 0.2788, "step": 2520500 }, { "epoch": 1.51, - "learning_rate": 4.545807628754889e-05, - "loss": 0.3763, + "learning_rate": 4.5457891490579554e-05, + "loss": 0.2837, "step": 2521000 }, { "epoch": 1.51, - "learning_rate": 4.545598052191944e-05, - "loss": 0.363, + "learning_rate": 4.545579152501899e-05, + "loss": 0.2814, "step": 2521500 }, { "epoch": 1.51, - "learning_rate": 4.5453880556358874e-05, - "loss": 0.3704, + "learning_rate": 4.545369155945843e-05, + "loss": 0.2819, "step": 2522000 }, { "epoch": 1.51, - "learning_rate": 4.5451784790729434e-05, - "loss": 0.3776, + "learning_rate": 4.545159159389786e-05, + "loss": 0.2854, "step": 2522500 }, { "epoch": 1.51, - "learning_rate": 4.544968482516886e-05, - "loss": 0.3699, + "learning_rate": 4.5449491628337295e-05, + "loss": 0.2875, "step": 2523000 }, { "epoch": 1.51, - "learning_rate": 4.54475848596083e-05, - "loss": 0.3811, + "learning_rate": 4.544739586270785e-05, + "loss": 0.2902, "step": 2523500 }, { "epoch": 1.51, - "learning_rate": 4.5445484894047735e-05, - "loss": 0.368, + "learning_rate": 4.544529589714729e-05, + "loss": 0.2849, "step": 2524000 }, { "epoch": 1.51, - "learning_rate": 4.544338492848717e-05, - "loss": 0.3778, + "learning_rate": 4.544319593158672e-05, + "loss": 0.282, "step": 2524500 }, { "epoch": 1.51, - "learning_rate": 4.544128916285773e-05, - "loss": 0.3833, + "learning_rate": 4.5441095966026156e-05, + "loss": 0.28, "step": 2525000 }, { "epoch": 1.51, - "learning_rate": 4.543918919729716e-05, - "loss": 0.3784, + "learning_rate": 4.5438996000465596e-05, + "loss": 0.2792, "step": 2525500 }, { "epoch": 1.51, - "learning_rate": 4.5437089231736595e-05, - "loss": 0.3842, + "learning_rate": 4.543689603490502e-05, + "loss": 0.2837, "step": 2526000 }, { "epoch": 1.51, - "learning_rate": 4.5434989266176036e-05, - "loss": 0.3759, + "learning_rate": 4.543479606934446e-05, + "loss": 0.2832, "step": 2526500 }, { "epoch": 1.52, - "learning_rate": 4.543288930061547e-05, - "loss": 0.3698, + "learning_rate": 4.5432696103783896e-05, + "loss": 0.2818, "step": 2527000 }, { "epoch": 1.52, - "learning_rate": 4.54307893350549e-05, - "loss": 0.3766, + "learning_rate": 4.543059613822333e-05, + "loss": 0.2817, "step": 2527500 }, { "epoch": 1.52, - "learning_rate": 4.542868936949434e-05, - "loss": 0.3812, + "learning_rate": 4.542850037259389e-05, + "loss": 0.2833, "step": 2528000 }, { "epoch": 1.52, - "learning_rate": 4.5426589403933776e-05, - "loss": 0.3808, + "learning_rate": 4.5426400407033324e-05, + "loss": 0.2839, "step": 2528500 }, { "epoch": 1.52, - "learning_rate": 4.542449783823546e-05, - "loss": 0.3665, + "learning_rate": 4.5424304641403884e-05, + "loss": 0.2792, "step": 2529000 }, { "epoch": 1.52, - "learning_rate": 4.542239787267489e-05, - "loss": 0.3698, + "learning_rate": 4.542220467584332e-05, + "loss": 0.2787, "step": 2529500 }, { "epoch": 1.52, - "learning_rate": 4.542029790711432e-05, - "loss": 0.3711, + "learning_rate": 4.542010471028275e-05, + "loss": 0.2809, "step": 2530000 }, { "epoch": 1.52, - "learning_rate": 4.541819794155376e-05, - "loss": 0.3618, + "learning_rate": 4.541800474472219e-05, + "loss": 0.2808, "step": 2530500 }, { "epoch": 1.52, - "learning_rate": 4.541609797599319e-05, - "loss": 0.3741, + "learning_rate": 4.541590477916162e-05, + "loss": 0.2854, "step": 2531000 }, { "epoch": 1.52, - "learning_rate": 4.5413998010432624e-05, - "loss": 0.3826, + "learning_rate": 4.541380481360105e-05, + "loss": 0.2863, "step": 2531500 }, { "epoch": 1.52, - "learning_rate": 4.5411898044872064e-05, - "loss": 0.3701, + "learning_rate": 4.541170484804049e-05, + "loss": 0.2796, "step": 2532000 }, { "epoch": 1.52, - "learning_rate": 4.54097980793115e-05, - "loss": 0.3664, + "learning_rate": 4.5409604882479925e-05, + "loss": 0.281, "step": 2532500 }, { "epoch": 1.52, - "learning_rate": 4.540770231368205e-05, - "loss": 0.382, + "learning_rate": 4.5407509116850485e-05, + "loss": 0.2841, "step": 2533000 }, { "epoch": 1.52, - "learning_rate": 4.540560654805261e-05, - "loss": 0.3785, + "learning_rate": 4.540540915128992e-05, + "loss": 0.283, "step": 2533500 }, { "epoch": 1.52, - "learning_rate": 4.540350658249205e-05, - "loss": 0.3724, + "learning_rate": 4.540330918572935e-05, + "loss": 0.2792, "step": 2534000 }, { "epoch": 1.52, - "learning_rate": 4.5401406616931485e-05, - "loss": 0.3772, + "learning_rate": 4.5401209220168786e-05, + "loss": 0.2834, "step": 2534500 }, { "epoch": 1.52, - "learning_rate": 4.539930665137091e-05, - "loss": 0.377, + "learning_rate": 4.5399109254608226e-05, + "loss": 0.2824, "step": 2535000 }, { "epoch": 1.52, - "learning_rate": 4.539720668581035e-05, - "loss": 0.3769, + "learning_rate": 4.539701348897878e-05, + "loss": 0.2873, "step": 2535500 }, { "epoch": 1.52, - "learning_rate": 4.5395106720249786e-05, - "loss": 0.3763, + "learning_rate": 4.539491352341821e-05, + "loss": 0.2801, "step": 2536000 }, { "epoch": 1.52, - "learning_rate": 4.539300675468922e-05, - "loss": 0.3794, + "learning_rate": 4.5392813557857646e-05, + "loss": 0.2824, "step": 2536500 }, { "epoch": 1.52, - "learning_rate": 4.539090678912866e-05, - "loss": 0.3704, + "learning_rate": 4.5390713592297087e-05, + "loss": 0.2801, "step": 2537000 }, { "epoch": 1.52, - "learning_rate": 4.538881102349921e-05, - "loss": 0.3662, + "learning_rate": 4.538861782666765e-05, + "loss": 0.2799, "step": 2537500 }, { "epoch": 1.52, - "learning_rate": 4.5386711057938646e-05, - "loss": 0.3793, + "learning_rate": 4.5386517861107074e-05, + "loss": 0.2916, "step": 2538000 }, { "epoch": 1.52, - "learning_rate": 4.538461109237808e-05, - "loss": 0.372, + "learning_rate": 4.538441789554651e-05, + "loss": 0.2841, "step": 2538500 }, { "epoch": 1.52, - "learning_rate": 4.538251112681752e-05, - "loss": 0.3726, + "learning_rate": 4.538231792998595e-05, + "loss": 0.2859, "step": 2539000 }, { "epoch": 1.52, - "learning_rate": 4.538041536118808e-05, - "loss": 0.3704, + "learning_rate": 4.538022216435651e-05, + "loss": 0.2803, "step": 2539500 }, { "epoch": 1.52, - "learning_rate": 4.537831539562751e-05, - "loss": 0.3718, + "learning_rate": 4.537812219879594e-05, + "loss": 0.2832, "step": 2540000 }, { "epoch": 1.52, - "learning_rate": 4.537621543006695e-05, - "loss": 0.3861, + "learning_rate": 4.5376022233235375e-05, + "loss": 0.2852, "step": 2540500 }, { "epoch": 1.52, - "learning_rate": 4.537411546450638e-05, - "loss": 0.3703, + "learning_rate": 4.537392226767481e-05, + "loss": 0.2808, "step": 2541000 }, { "epoch": 1.52, - "learning_rate": 4.537201969887694e-05, - "loss": 0.3755, + "learning_rate": 4.537182230211424e-05, + "loss": 0.2844, "step": 2541500 }, { "epoch": 1.52, - "learning_rate": 4.536991973331637e-05, - "loss": 0.371, + "learning_rate": 4.53697265364848e-05, + "loss": 0.2819, "step": 2542000 }, { "epoch": 1.52, - "learning_rate": 4.536781976775581e-05, - "loss": 0.3727, + "learning_rate": 4.536762657092424e-05, + "loss": 0.2808, "step": 2542500 }, { "epoch": 1.52, - "learning_rate": 4.536571980219524e-05, - "loss": 0.3805, + "learning_rate": 4.536552660536367e-05, + "loss": 0.2913, "step": 2543000 }, { "epoch": 1.52, - "learning_rate": 4.5363619836634675e-05, - "loss": 0.373, + "learning_rate": 4.53634266398031e-05, + "loss": 0.2793, "step": 2543500 }, { "epoch": 1.53, - "learning_rate": 4.5361519871074115e-05, - "loss": 0.3788, + "learning_rate": 4.536133087417366e-05, + "loss": 0.2878, "step": 2544000 }, { "epoch": 1.53, - "learning_rate": 4.535941990551355e-05, - "loss": 0.3735, + "learning_rate": 4.53592309086131e-05, + "loss": 0.2845, "step": 2544500 }, { "epoch": 1.53, - "learning_rate": 4.53573241398841e-05, - "loss": 0.3726, + "learning_rate": 4.535713094305253e-05, + "loss": 0.2821, "step": 2545000 }, { "epoch": 1.53, - "learning_rate": 4.5355224174323536e-05, - "loss": 0.3653, + "learning_rate": 4.535503097749196e-05, + "loss": 0.2812, "step": 2545500 }, { "epoch": 1.53, - "learning_rate": 4.5353124208762976e-05, - "loss": 0.3771, + "learning_rate": 4.535293521186252e-05, + "loss": 0.2862, "step": 2546000 }, { "epoch": 1.53, - "learning_rate": 4.535102424320241e-05, - "loss": 0.3706, + "learning_rate": 4.5350835246301964e-05, + "loss": 0.2767, "step": 2546500 }, { "epoch": 1.53, - "learning_rate": 4.534892427764184e-05, - "loss": 0.3754, + "learning_rate": 4.53487352807414e-05, + "loss": 0.2867, "step": 2547000 }, { "epoch": 1.53, - "learning_rate": 4.534682431208128e-05, - "loss": 0.3706, + "learning_rate": 4.534663531518083e-05, + "loss": 0.2842, "step": 2547500 }, { "epoch": 1.53, - "learning_rate": 4.534472434652072e-05, - "loss": 0.372, + "learning_rate": 4.5344535349620264e-05, + "loss": 0.2861, "step": 2548000 }, { "epoch": 1.53, - "learning_rate": 4.534262438096015e-05, - "loss": 0.3669, + "learning_rate": 4.5342439583990824e-05, + "loss": 0.283, "step": 2548500 }, { "epoch": 1.53, - "learning_rate": 4.534052861533071e-05, - "loss": 0.3773, + "learning_rate": 4.534033961843026e-05, + "loss": 0.2832, "step": 2549000 }, { "epoch": 1.53, - "learning_rate": 4.5338428649770144e-05, - "loss": 0.3874, + "learning_rate": 4.53382396528697e-05, + "loss": 0.2953, "step": 2549500 }, { "epoch": 1.53, - "learning_rate": 4.533632868420958e-05, - "loss": 0.3683, + "learning_rate": 4.5336139687309125e-05, + "loss": 0.28, "step": 2550000 }, { "epoch": 1.53, - "learning_rate": 4.533422871864902e-05, - "loss": 0.3696, + "learning_rate": 4.5334043921679685e-05, + "loss": 0.284, "step": 2550500 }, { "epoch": 1.53, - "learning_rate": 4.533213295301957e-05, - "loss": 0.3731, + "learning_rate": 4.533194815605024e-05, + "loss": 0.2846, "step": 2551000 }, { "epoch": 1.53, - "learning_rate": 4.5330032987459005e-05, - "loss": 0.3778, + "learning_rate": 4.532984819048967e-05, + "loss": 0.2892, "step": 2551500 }, { "epoch": 1.53, - "learning_rate": 4.532793302189844e-05, - "loss": 0.3779, + "learning_rate": 4.532774822492911e-05, + "loss": 0.2874, "step": 2552000 }, { "epoch": 1.53, - "learning_rate": 4.532583305633788e-05, - "loss": 0.3786, + "learning_rate": 4.5325648259368546e-05, + "loss": 0.2847, "step": 2552500 }, { "epoch": 1.53, - "learning_rate": 4.532373309077731e-05, - "loss": 0.3779, + "learning_rate": 4.532354829380798e-05, + "loss": 0.2877, "step": 2553000 }, { "epoch": 1.53, - "learning_rate": 4.5321633125216745e-05, - "loss": 0.3785, + "learning_rate": 4.532145252817854e-05, + "loss": 0.2825, "step": 2553500 }, { "epoch": 1.53, - "learning_rate": 4.5319533159656185e-05, - "loss": 0.3659, + "learning_rate": 4.531935256261797e-05, + "loss": 0.2771, "step": 2554000 }, { "epoch": 1.53, - "learning_rate": 4.531743319409562e-05, - "loss": 0.3693, + "learning_rate": 4.5317252597057406e-05, + "loss": 0.2841, "step": 2554500 }, { "epoch": 1.53, - "learning_rate": 4.531533742846617e-05, - "loss": 0.376, + "learning_rate": 4.531515263149685e-05, + "loss": 0.2859, "step": 2555000 }, { "epoch": 1.53, - "learning_rate": 4.5313237462905606e-05, - "loss": 0.3785, + "learning_rate": 4.531305266593628e-05, + "loss": 0.2873, "step": 2555500 }, { "epoch": 1.53, - "learning_rate": 4.5311137497345046e-05, - "loss": 0.378, + "learning_rate": 4.5310952700375714e-05, + "loss": 0.2851, "step": 2556000 }, { "epoch": 1.53, - "learning_rate": 4.53090417317156e-05, - "loss": 0.3657, + "learning_rate": 4.5308852734815154e-05, + "loss": 0.2793, "step": 2556500 }, { "epoch": 1.53, - "learning_rate": 4.530694176615503e-05, - "loss": 0.3711, + "learning_rate": 4.530675276925458e-05, + "loss": 0.2806, "step": 2557000 }, { "epoch": 1.53, - "learning_rate": 4.5304841800594473e-05, - "loss": 0.3709, + "learning_rate": 4.5304652803694014e-05, + "loss": 0.281, "step": 2557500 }, { "epoch": 1.53, - "learning_rate": 4.530274183503391e-05, - "loss": 0.367, + "learning_rate": 4.5302557038064574e-05, + "loss": 0.2827, "step": 2558000 }, { "epoch": 1.53, - "learning_rate": 4.530064606940446e-05, - "loss": 0.3737, + "learning_rate": 4.5300457072504015e-05, + "loss": 0.2797, "step": 2558500 }, { "epoch": 1.53, - "learning_rate": 4.5298546103843894e-05, - "loss": 0.3693, + "learning_rate": 4.529835710694345e-05, + "loss": 0.2804, "step": 2559000 }, { "epoch": 1.53, - "learning_rate": 4.5296446138283334e-05, - "loss": 0.373, + "learning_rate": 4.5296257141382875e-05, + "loss": 0.288, "step": 2559500 }, { "epoch": 1.53, - "learning_rate": 4.529434617272277e-05, - "loss": 0.3679, + "learning_rate": 4.5294161375753435e-05, + "loss": 0.2805, "step": 2560000 }, { "epoch": 1.54, - "learning_rate": 4.52922462071622e-05, - "loss": 0.3762, + "learning_rate": 4.5292061410192875e-05, + "loss": 0.2851, "step": 2560500 }, { "epoch": 1.54, - "learning_rate": 4.529014624160164e-05, - "loss": 0.3646, + "learning_rate": 4.528996144463231e-05, + "loss": 0.2781, "step": 2561000 }, { "epoch": 1.54, - "learning_rate": 4.5288046276041075e-05, - "loss": 0.3617, + "learning_rate": 4.528786147907174e-05, + "loss": 0.2751, "step": 2561500 }, { "epoch": 1.54, - "learning_rate": 4.52859463104805e-05, - "loss": 0.379, + "learning_rate": 4.52857657134423e-05, + "loss": 0.2913, "step": 2562000 }, { "epoch": 1.54, - "learning_rate": 4.528385054485106e-05, - "loss": 0.3637, + "learning_rate": 4.5283665747881736e-05, + "loss": 0.2792, "step": 2562500 }, { "epoch": 1.54, - "learning_rate": 4.52817505792905e-05, - "loss": 0.3762, + "learning_rate": 4.528156578232117e-05, + "loss": 0.2877, "step": 2563000 }, { "epoch": 1.54, - "learning_rate": 4.5279650613729936e-05, - "loss": 0.3734, + "learning_rate": 4.527946581676061e-05, + "loss": 0.2831, "step": 2563500 }, { "epoch": 1.54, - "learning_rate": 4.527755064816937e-05, - "loss": 0.3762, + "learning_rate": 4.527736585120004e-05, + "loss": 0.2833, "step": 2564000 }, { "epoch": 1.54, - "learning_rate": 4.52754506826088e-05, - "loss": 0.3785, + "learning_rate": 4.52752700855706e-05, + "loss": 0.2877, "step": 2564500 }, { "epoch": 1.54, - "learning_rate": 4.527335491697936e-05, - "loss": 0.3754, + "learning_rate": 4.527317012001003e-05, + "loss": 0.2819, "step": 2565000 }, { "epoch": 1.54, - "learning_rate": 4.5271254951418796e-05, - "loss": 0.3698, + "learning_rate": 4.5271074354380584e-05, + "loss": 0.2841, "step": 2565500 }, { "epoch": 1.54, - "learning_rate": 4.5269154985858236e-05, - "loss": 0.3735, + "learning_rate": 4.5268974388820024e-05, + "loss": 0.2861, "step": 2566000 }, { "epoch": 1.54, - "learning_rate": 4.526705502029766e-05, - "loss": 0.3741, + "learning_rate": 4.526687442325946e-05, + "loss": 0.2811, "step": 2566500 }, { "epoch": 1.54, - "learning_rate": 4.52649550547371e-05, - "loss": 0.3681, + "learning_rate": 4.526477445769889e-05, + "loss": 0.2844, "step": 2567000 }, { "epoch": 1.54, - "learning_rate": 4.526285508917654e-05, - "loss": 0.366, + "learning_rate": 4.526267449213833e-05, + "loss": 0.2829, "step": 2567500 }, { "epoch": 1.54, - "learning_rate": 4.526075512361597e-05, - "loss": 0.3773, + "learning_rate": 4.5260578726508885e-05, + "loss": 0.2843, "step": 2568000 }, { "epoch": 1.54, - "learning_rate": 4.525865935798653e-05, - "loss": 0.3814, + "learning_rate": 4.525847876094832e-05, + "loss": 0.2853, "step": 2568500 }, { "epoch": 1.54, - "learning_rate": 4.525655939242596e-05, - "loss": 0.3722, + "learning_rate": 4.525637879538776e-05, + "loss": 0.2842, "step": 2569000 }, { "epoch": 1.54, - "learning_rate": 4.525446362679652e-05, - "loss": 0.3724, + "learning_rate": 4.525427882982719e-05, + "loss": 0.2821, "step": 2569500 }, { "epoch": 1.54, - "learning_rate": 4.525236366123596e-05, - "loss": 0.3718, + "learning_rate": 4.5252178864266625e-05, + "loss": 0.2807, "step": 2570000 }, { "epoch": 1.54, - "learning_rate": 4.525026369567539e-05, - "loss": 0.3691, + "learning_rate": 4.5250078898706066e-05, + "loss": 0.2811, "step": 2570500 }, { "epoch": 1.54, - "learning_rate": 4.524816373011483e-05, - "loss": 0.3738, + "learning_rate": 4.52479789331455e-05, + "loss": 0.2804, "step": 2571000 }, { "epoch": 1.54, - "learning_rate": 4.524606376455426e-05, - "loss": 0.3722, + "learning_rate": 4.5245878967584926e-05, + "loss": 0.2816, "step": 2571500 }, { "epoch": 1.54, - "learning_rate": 4.524396379899369e-05, - "loss": 0.372, + "learning_rate": 4.5243783201955486e-05, + "loss": 0.2822, "step": 2572000 }, { "epoch": 1.54, - "learning_rate": 4.524186383343313e-05, - "loss": 0.3842, + "learning_rate": 4.5241683236394926e-05, + "loss": 0.2829, "step": 2572500 }, { "epoch": 1.54, - "learning_rate": 4.5239763867872565e-05, - "loss": 0.3781, + "learning_rate": 4.523958327083436e-05, + "loss": 0.2862, "step": 2573000 }, { "epoch": 1.54, - "learning_rate": 4.5237663902312e-05, - "loss": 0.383, + "learning_rate": 4.523748330527379e-05, + "loss": 0.2899, "step": 2573500 }, { "epoch": 1.54, - "learning_rate": 4.523556393675144e-05, - "loss": 0.3726, + "learning_rate": 4.523538753964435e-05, + "loss": 0.2894, "step": 2574000 }, { "epoch": 1.54, - "learning_rate": 4.523346397119087e-05, - "loss": 0.3618, + "learning_rate": 4.523328757408379e-05, + "loss": 0.2815, "step": 2574500 }, { "epoch": 1.54, - "learning_rate": 4.5231364005630306e-05, - "loss": 0.3746, + "learning_rate": 4.523118760852322e-05, + "loss": 0.2808, "step": 2575000 }, { "epoch": 1.54, - "learning_rate": 4.522926824000086e-05, - "loss": 0.3742, + "learning_rate": 4.5229091842893774e-05, + "loss": 0.2777, "step": 2575500 }, { "epoch": 1.54, - "learning_rate": 4.52271682744403e-05, - "loss": 0.3783, + "learning_rate": 4.5226991877333214e-05, + "loss": 0.2803, "step": 2576000 }, { "epoch": 1.54, - "learning_rate": 4.522506830887973e-05, - "loss": 0.3655, + "learning_rate": 4.522489191177265e-05, + "loss": 0.2729, "step": 2576500 }, { "epoch": 1.55, - "learning_rate": 4.522296834331917e-05, - "loss": 0.3725, + "learning_rate": 4.522279194621208e-05, + "loss": 0.2839, "step": 2577000 }, { "epoch": 1.55, - "learning_rate": 4.522087257768972e-05, - "loss": 0.3825, + "learning_rate": 4.522069198065152e-05, + "loss": 0.2824, "step": 2577500 }, { "epoch": 1.55, - "learning_rate": 4.521877261212916e-05, - "loss": 0.3673, + "learning_rate": 4.5218592015090955e-05, + "loss": 0.2779, "step": 2578000 }, { "epoch": 1.55, - "learning_rate": 4.5216672646568594e-05, - "loss": 0.3633, + "learning_rate": 4.521649204953038e-05, + "loss": 0.2794, "step": 2578500 }, { "epoch": 1.55, - "learning_rate": 4.5214572681008034e-05, - "loss": 0.3647, + "learning_rate": 4.521439208396982e-05, + "loss": 0.2786, "step": 2579000 }, { "epoch": 1.55, - "learning_rate": 4.521247691537859e-05, - "loss": 0.3736, + "learning_rate": 4.5212292118409255e-05, + "loss": 0.2845, "step": 2579500 }, { "epoch": 1.55, - "learning_rate": 4.521037694981802e-05, - "loss": 0.3809, + "learning_rate": 4.521019215284869e-05, + "loss": 0.2877, "step": 2580000 }, { "epoch": 1.55, - "learning_rate": 4.5208276984257455e-05, - "loss": 0.3733, + "learning_rate": 4.520809218728813e-05, + "loss": 0.2876, "step": 2580500 }, { "epoch": 1.55, - "learning_rate": 4.5206177018696895e-05, - "loss": 0.3759, + "learning_rate": 4.520599222172756e-05, + "loss": 0.2812, "step": 2581000 }, { "epoch": 1.55, - "learning_rate": 4.520407705313633e-05, - "loss": 0.3785, + "learning_rate": 4.5203896456098116e-05, + "loss": 0.2804, "step": 2581500 }, { "epoch": 1.55, - "learning_rate": 4.520198128750688e-05, - "loss": 0.3736, + "learning_rate": 4.520179649053755e-05, + "loss": 0.2818, "step": 2582000 }, { "epoch": 1.55, - "learning_rate": 4.5199881321946316e-05, - "loss": 0.3662, + "learning_rate": 4.519969652497699e-05, + "loss": 0.2755, "step": 2582500 }, { "epoch": 1.55, - "learning_rate": 4.5197781356385756e-05, - "loss": 0.3661, + "learning_rate": 4.519759655941642e-05, + "loss": 0.2856, "step": 2583000 }, { "epoch": 1.55, - "learning_rate": 4.519568139082519e-05, - "loss": 0.3713, + "learning_rate": 4.519550079378698e-05, + "loss": 0.2825, "step": 2583500 }, { "epoch": 1.55, - "learning_rate": 4.519358142526462e-05, - "loss": 0.3779, + "learning_rate": 4.519340082822642e-05, + "loss": 0.2837, "step": 2584000 }, { "epoch": 1.55, - "learning_rate": 4.519148145970406e-05, - "loss": 0.3782, + "learning_rate": 4.519130086266585e-05, + "loss": 0.2862, "step": 2584500 }, { "epoch": 1.55, - "learning_rate": 4.5189385694074617e-05, - "loss": 0.3655, + "learning_rate": 4.5189200897105284e-05, + "loss": 0.2814, "step": 2585000 }, { "epoch": 1.55, - "learning_rate": 4.518728572851405e-05, - "loss": 0.3711, + "learning_rate": 4.5187100931544724e-05, + "loss": 0.2862, "step": 2585500 }, { "epoch": 1.55, - "learning_rate": 4.518518576295349e-05, - "loss": 0.3708, + "learning_rate": 4.518500516591528e-05, + "loss": 0.2818, "step": 2586000 }, { "epoch": 1.55, - "learning_rate": 4.5183085797392924e-05, - "loss": 0.3737, + "learning_rate": 4.518290520035471e-05, + "loss": 0.2834, "step": 2586500 }, { "epoch": 1.55, - "learning_rate": 4.518098583183236e-05, - "loss": 0.3679, + "learning_rate": 4.5180805234794145e-05, + "loss": 0.2815, "step": 2587000 }, { "epoch": 1.55, - "learning_rate": 4.51788858662718e-05, - "loss": 0.3671, + "learning_rate": 4.5178705269233585e-05, + "loss": 0.2779, "step": 2587500 }, { "epoch": 1.55, - "learning_rate": 4.517678590071123e-05, - "loss": 0.3685, + "learning_rate": 4.517660950360414e-05, + "loss": 0.2843, "step": 2588000 }, { "epoch": 1.55, - "learning_rate": 4.5174685935150664e-05, - "loss": 0.3798, + "learning_rate": 4.517450953804357e-05, + "loss": 0.2852, "step": 2588500 }, { "epoch": 1.55, - "learning_rate": 4.517259016952122e-05, - "loss": 0.3678, + "learning_rate": 4.517240957248301e-05, + "loss": 0.2798, "step": 2589000 }, { "epoch": 1.55, - "learning_rate": 4.517049020396066e-05, - "loss": 0.3667, + "learning_rate": 4.5170309606922446e-05, + "loss": 0.2775, "step": 2589500 }, { "epoch": 1.55, - "learning_rate": 4.516839023840009e-05, - "loss": 0.3631, + "learning_rate": 4.5168213841293006e-05, + "loss": 0.2822, "step": 2590000 }, { "epoch": 1.55, - "learning_rate": 4.5166290272839525e-05, - "loss": 0.3564, + "learning_rate": 4.516611387573243e-05, + "loss": 0.2728, "step": 2590500 }, { "epoch": 1.55, - "learning_rate": 4.516419450721008e-05, - "loss": 0.3664, + "learning_rate": 4.516401811010299e-05, + "loss": 0.2775, "step": 2591000 }, { "epoch": 1.55, - "learning_rate": 4.516209454164952e-05, - "loss": 0.373, + "learning_rate": 4.516191814454243e-05, + "loss": 0.2808, "step": 2591500 }, { "epoch": 1.55, - "learning_rate": 4.515999457608895e-05, - "loss": 0.3756, + "learning_rate": 4.515981817898187e-05, + "loss": 0.282, "step": 2592000 }, { "epoch": 1.55, - "learning_rate": 4.5157894610528386e-05, - "loss": 0.3842, + "learning_rate": 4.51577182134213e-05, + "loss": 0.2872, "step": 2592500 }, { "epoch": 1.55, - "learning_rate": 4.5155798844898946e-05, - "loss": 0.375, + "learning_rate": 4.5155618247860734e-05, + "loss": 0.2821, "step": 2593000 }, { "epoch": 1.55, - "learning_rate": 4.51537030792695e-05, - "loss": 0.373, + "learning_rate": 4.515351828230017e-05, + "loss": 0.2843, "step": 2593500 }, { "epoch": 1.56, - "learning_rate": 4.515160311370893e-05, - "loss": 0.3679, + "learning_rate": 4.51514183167396e-05, + "loss": 0.2846, "step": 2594000 }, { "epoch": 1.56, - "learning_rate": 4.5149503148148367e-05, - "loss": 0.3787, + "learning_rate": 4.514931835117904e-05, + "loss": 0.2881, "step": 2594500 }, { "epoch": 1.56, - "learning_rate": 4.514740318258781e-05, - "loss": 0.3755, + "learning_rate": 4.514722678548072e-05, + "loss": 0.2817, "step": 2595000 }, { "epoch": 1.56, - "learning_rate": 4.514530321702724e-05, - "loss": 0.3633, + "learning_rate": 4.5145126819920155e-05, + "loss": 0.2739, "step": 2595500 }, { "epoch": 1.56, - "learning_rate": 4.5143203251466674e-05, - "loss": 0.3653, + "learning_rate": 4.514302685435959e-05, + "loss": 0.279, "step": 2596000 }, { "epoch": 1.56, - "learning_rate": 4.5141103285906114e-05, - "loss": 0.3724, + "learning_rate": 4.514092688879903e-05, + "loss": 0.2853, "step": 2596500 }, { "epoch": 1.56, - "learning_rate": 4.513900332034555e-05, - "loss": 0.3764, + "learning_rate": 4.513882692323846e-05, + "loss": 0.2845, "step": 2597000 }, { "epoch": 1.56, - "learning_rate": 4.513691175464722e-05, - "loss": 0.377, + "learning_rate": 4.513672695767789e-05, + "loss": 0.2819, "step": 2597500 }, { "epoch": 1.56, - "learning_rate": 4.5134811789086655e-05, - "loss": 0.3775, + "learning_rate": 4.513462699211733e-05, + "loss": 0.2827, "step": 2598000 }, { "epoch": 1.56, - "learning_rate": 4.5132711823526095e-05, - "loss": 0.3699, + "learning_rate": 4.513252702655676e-05, + "loss": 0.2797, "step": 2598500 }, { "epoch": 1.56, - "learning_rate": 4.513061185796553e-05, - "loss": 0.3646, + "learning_rate": 4.5130427060996196e-05, + "loss": 0.2782, "step": 2599000 }, { "epoch": 1.56, - "learning_rate": 4.512851189240496e-05, - "loss": 0.3714, + "learning_rate": 4.5128327095435636e-05, + "loss": 0.279, "step": 2599500 }, { "epoch": 1.56, - "learning_rate": 4.5126416126775515e-05, - "loss": 0.3848, + "learning_rate": 4.512623132980619e-05, + "loss": 0.2919, "step": 2600000 }, { "epoch": 1.56, - "eval_loss": 0.35718128085136414, - "eval_runtime": 1116.8014, - "eval_samples_per_second": 471.633, - "eval_steps_per_second": 78.606, + "eval_loss": 0.2566536068916321, + "eval_runtime": 1454.846, + "eval_samples_per_second": 362.045, + "eval_steps_per_second": 60.341, "step": 2600000 }, { "epoch": 1.56, - "learning_rate": 4.5124316161214956e-05, - "loss": 0.3717, + "learning_rate": 4.512413136424562e-05, + "loss": 0.2818, "step": 2600500 }, { "epoch": 1.56, - "learning_rate": 4.512221619565439e-05, - "loss": 0.369, + "learning_rate": 4.5122031398685056e-05, + "loss": 0.2757, "step": 2601000 }, { "epoch": 1.56, - "learning_rate": 4.512011623009382e-05, - "loss": 0.3685, + "learning_rate": 4.51199314331245e-05, + "loss": 0.2833, "step": 2601500 }, { "epoch": 1.56, - "learning_rate": 4.511802046446438e-05, - "loss": 0.3805, + "learning_rate": 4.511783146756393e-05, + "loss": 0.2843, "step": 2602000 }, { "epoch": 1.56, - "learning_rate": 4.5115920498903816e-05, - "loss": 0.3729, + "learning_rate": 4.5115735701934484e-05, + "loss": 0.2802, "step": 2602500 }, { "epoch": 1.56, - "learning_rate": 4.511382053334325e-05, - "loss": 0.3644, + "learning_rate": 4.5113639936305044e-05, + "loss": 0.2799, "step": 2603000 }, { "epoch": 1.56, - "learning_rate": 4.511172056778268e-05, - "loss": 0.3706, + "learning_rate": 4.51115441706756e-05, + "loss": 0.2837, "step": 2603500 }, { "epoch": 1.56, - "learning_rate": 4.5109620602222123e-05, - "loss": 0.3752, + "learning_rate": 4.510944420511504e-05, + "loss": 0.2857, "step": 2604000 }, { "epoch": 1.56, - "learning_rate": 4.510752063666156e-05, - "loss": 0.3626, + "learning_rate": 4.510734423955447e-05, + "loss": 0.2738, "step": 2604500 }, { "epoch": 1.56, - "learning_rate": 4.510542067110099e-05, - "loss": 0.3771, + "learning_rate": 4.5105244273993905e-05, + "loss": 0.2834, "step": 2605000 }, { "epoch": 1.56, - "learning_rate": 4.510332070554043e-05, - "loss": 0.3787, + "learning_rate": 4.5103144308433345e-05, + "loss": 0.2826, "step": 2605500 }, { "epoch": 1.56, - "learning_rate": 4.5101224939910984e-05, - "loss": 0.3776, + "learning_rate": 4.510104434287278e-05, + "loss": 0.2805, "step": 2606000 }, { "epoch": 1.56, - "learning_rate": 4.509912497435042e-05, - "loss": 0.3756, + "learning_rate": 4.509894437731221e-05, + "loss": 0.283, "step": 2606500 }, { "epoch": 1.56, - "learning_rate": 4.509702500878986e-05, - "loss": 0.3746, + "learning_rate": 4.5096844411751645e-05, + "loss": 0.2861, "step": 2607000 }, { "epoch": 1.56, - "learning_rate": 4.509492924316041e-05, - "loss": 0.3719, + "learning_rate": 4.509474444619108e-05, + "loss": 0.2827, "step": 2607500 }, { "epoch": 1.56, - "learning_rate": 4.509283347753097e-05, - "loss": 0.3645, + "learning_rate": 4.509264448063051e-05, + "loss": 0.2751, "step": 2608000 }, { "epoch": 1.56, - "learning_rate": 4.5090733511970405e-05, - "loss": 0.3676, + "learning_rate": 4.509054451506995e-05, + "loss": 0.2777, "step": 2608500 }, { "epoch": 1.56, - "learning_rate": 4.508863354640984e-05, - "loss": 0.3677, + "learning_rate": 4.5088444549509386e-05, + "loss": 0.2762, "step": 2609000 }, { "epoch": 1.56, - "learning_rate": 4.508653358084927e-05, - "loss": 0.3742, + "learning_rate": 4.508634458394882e-05, + "loss": 0.2847, "step": 2609500 }, { "epoch": 1.56, - "learning_rate": 4.5084433615288706e-05, - "loss": 0.3797, + "learning_rate": 4.50842530182505e-05, + "loss": 0.2885, "step": 2610000 }, { "epoch": 1.57, - "learning_rate": 4.508233364972814e-05, - "loss": 0.3596, + "learning_rate": 4.508215305268994e-05, + "loss": 0.2763, "step": 2610500 }, { "epoch": 1.57, - "learning_rate": 4.508023368416758e-05, - "loss": 0.368, + "learning_rate": 4.5080053087129374e-05, + "loss": 0.2848, "step": 2611000 }, { "epoch": 1.57, - "learning_rate": 4.507813371860701e-05, - "loss": 0.3715, + "learning_rate": 4.507795312156881e-05, + "loss": 0.2858, "step": 2611500 }, { "epoch": 1.57, - "learning_rate": 4.5076037952977566e-05, - "loss": 0.3785, + "learning_rate": 4.507585315600824e-05, + "loss": 0.2845, "step": 2612000 }, { "epoch": 1.57, - "learning_rate": 4.5073937987417007e-05, - "loss": 0.3624, + "learning_rate": 4.5073753190447674e-05, + "loss": 0.281, "step": 2612500 }, { "epoch": 1.57, - "learning_rate": 4.507183802185644e-05, - "loss": 0.3774, + "learning_rate": 4.507165322488711e-05, + "loss": 0.2792, "step": 2613000 }, { "epoch": 1.57, - "learning_rate": 4.5069738056295873e-05, - "loss": 0.3774, + "learning_rate": 4.506955325932655e-05, + "loss": 0.2839, "step": 2613500 }, { "epoch": 1.57, - "learning_rate": 4.5067638090735314e-05, - "loss": 0.3681, + "learning_rate": 4.506745749369711e-05, + "loss": 0.2879, "step": 2614000 }, { "epoch": 1.57, - "learning_rate": 4.506553812517475e-05, - "loss": 0.3744, + "learning_rate": 4.5065357528136535e-05, + "loss": 0.2818, "step": 2614500 }, { "epoch": 1.57, - "learning_rate": 4.506343815961418e-05, - "loss": 0.3684, + "learning_rate": 4.506325756257597e-05, + "loss": 0.2789, "step": 2615000 }, { "epoch": 1.57, - "learning_rate": 4.506133819405362e-05, - "loss": 0.3779, + "learning_rate": 4.506115759701541e-05, + "loss": 0.2857, "step": 2615500 }, { "epoch": 1.57, - "learning_rate": 4.5059242428424174e-05, - "loss": 0.3851, + "learning_rate": 4.505906183138597e-05, + "loss": 0.2842, "step": 2616000 }, { "epoch": 1.57, - "learning_rate": 4.505714246286361e-05, - "loss": 0.3659, + "learning_rate": 4.5056961865825395e-05, + "loss": 0.2778, "step": 2616500 }, { "epoch": 1.57, - "learning_rate": 4.505504249730304e-05, - "loss": 0.3737, + "learning_rate": 4.5054861900264836e-05, + "loss": 0.2824, "step": 2617000 }, { "epoch": 1.57, - "learning_rate": 4.5052946731673595e-05, - "loss": 0.3691, + "learning_rate": 4.505276193470427e-05, + "loss": 0.2853, "step": 2617500 }, { "epoch": 1.57, - "learning_rate": 4.5050846766113035e-05, - "loss": 0.3698, + "learning_rate": 4.50506619691437e-05, + "loss": 0.2819, "step": 2618000 }, { "epoch": 1.57, - "learning_rate": 4.504874680055247e-05, - "loss": 0.3657, + "learning_rate": 4.504856620351426e-05, + "loss": 0.2784, "step": 2618500 }, { "epoch": 1.57, - "learning_rate": 4.50466468349919e-05, - "loss": 0.3759, + "learning_rate": 4.5046466237953696e-05, + "loss": 0.2816, "step": 2619000 }, { "epoch": 1.57, - "learning_rate": 4.504454686943134e-05, - "loss": 0.3757, + "learning_rate": 4.504437047232426e-05, + "loss": 0.2865, "step": 2619500 }, { "epoch": 1.57, - "learning_rate": 4.5042446903870776e-05, - "loss": 0.3745, + "learning_rate": 4.504227050676369e-05, + "loss": 0.2799, "step": 2620000 }, { "epoch": 1.57, - "learning_rate": 4.504035113824133e-05, - "loss": 0.3862, + "learning_rate": 4.5040170541203124e-05, + "loss": 0.2899, "step": 2620500 }, { "epoch": 1.57, - "learning_rate": 4.503825117268077e-05, - "loss": 0.3798, + "learning_rate": 4.5038070575642564e-05, + "loss": 0.29, "step": 2621000 }, { "epoch": 1.57, - "learning_rate": 4.50361512071202e-05, - "loss": 0.3732, + "learning_rate": 4.503597061008199e-05, + "loss": 0.2822, "step": 2621500 }, { "epoch": 1.57, - "learning_rate": 4.5034051241559637e-05, - "loss": 0.3854, + "learning_rate": 4.5033870644521424e-05, + "loss": 0.2847, "step": 2622000 }, { "epoch": 1.57, - "learning_rate": 4.503195127599908e-05, - "loss": 0.3747, + "learning_rate": 4.5031774878891984e-05, + "loss": 0.2803, "step": 2622500 }, { "epoch": 1.57, - "learning_rate": 4.502985131043851e-05, - "loss": 0.3661, + "learning_rate": 4.5029674913331425e-05, + "loss": 0.2813, "step": 2623000 }, { "epoch": 1.57, - "learning_rate": 4.5027755544809064e-05, - "loss": 0.381, + "learning_rate": 4.502757494777086e-05, + "loss": 0.2921, "step": 2623500 }, { "epoch": 1.57, - "learning_rate": 4.50256555792485e-05, - "loss": 0.3767, + "learning_rate": 4.502547498221029e-05, + "loss": 0.2786, "step": 2624000 }, { "epoch": 1.57, - "learning_rate": 4.502355561368794e-05, - "loss": 0.3772, + "learning_rate": 4.5023375016649725e-05, + "loss": 0.2859, "step": 2624500 }, { "epoch": 1.57, - "learning_rate": 4.502145564812737e-05, - "loss": 0.3886, + "learning_rate": 4.502127505108916e-05, + "loss": 0.2906, "step": 2625000 }, { "epoch": 1.57, - "learning_rate": 4.5019355682566804e-05, - "loss": 0.3701, + "learning_rate": 4.50191750855286e-05, + "loss": 0.2841, "step": 2625500 }, { "epoch": 1.57, - "learning_rate": 4.5017255717006245e-05, - "loss": 0.3709, + "learning_rate": 4.501707511996803e-05, + "loss": 0.283, "step": 2626000 }, { "epoch": 1.57, - "learning_rate": 4.501515575144568e-05, - "loss": 0.3678, + "learning_rate": 4.5014975154407466e-05, + "loss": 0.2784, "step": 2626500 }, { "epoch": 1.57, - "learning_rate": 4.5013055785885105e-05, - "loss": 0.3787, + "learning_rate": 4.501287938877802e-05, + "loss": 0.287, "step": 2627000 }, { "epoch": 1.58, - "learning_rate": 4.5010960020255665e-05, - "loss": 0.3733, + "learning_rate": 4.501077942321746e-05, + "loss": 0.2844, "step": 2627500 }, { "epoch": 1.58, - "learning_rate": 4.5008864254626225e-05, - "loss": 0.3738, + "learning_rate": 4.500867945765689e-05, + "loss": 0.2833, "step": 2628000 }, { "epoch": 1.58, - "learning_rate": 4.500676428906566e-05, - "loss": 0.3736, + "learning_rate": 4.5006583692027446e-05, + "loss": 0.2871, "step": 2628500 }, { "epoch": 1.58, - "learning_rate": 4.500466432350509e-05, - "loss": 0.3765, + "learning_rate": 4.500448372646688e-05, + "loss": 0.2809, "step": 2629000 }, { "epoch": 1.58, - "learning_rate": 4.500256435794453e-05, - "loss": 0.3822, + "learning_rate": 4.500238376090632e-05, + "loss": 0.2823, "step": 2629500 }, { "epoch": 1.58, - "learning_rate": 4.5000464392383966e-05, - "loss": 0.3657, + "learning_rate": 4.5000283795345754e-05, + "loss": 0.2788, "step": 2630000 }, { "epoch": 1.58, - "learning_rate": 4.49983644268234e-05, - "loss": 0.3721, + "learning_rate": 4.499818382978519e-05, + "loss": 0.2816, "step": 2630500 }, { "epoch": 1.58, - "learning_rate": 4.499626446126284e-05, - "loss": 0.3862, + "learning_rate": 4.499608386422463e-05, + "loss": 0.2857, "step": 2631000 }, { "epoch": 1.58, - "learning_rate": 4.499416449570227e-05, - "loss": 0.374, + "learning_rate": 4.499398389866406e-05, + "loss": 0.281, "step": 2631500 }, { "epoch": 1.58, - "learning_rate": 4.49920645301417e-05, - "loss": 0.3676, + "learning_rate": 4.4991883933103494e-05, + "loss": 0.2769, "step": 2632000 }, { "epoch": 1.58, - "learning_rate": 4.498996876451226e-05, - "loss": 0.3767, + "learning_rate": 4.4989783967542934e-05, + "loss": 0.2802, "step": 2632500 }, { "epoch": 1.58, - "learning_rate": 4.49878687989517e-05, - "loss": 0.3711, + "learning_rate": 4.498768820191349e-05, + "loss": 0.2815, "step": 2633000 }, { "epoch": 1.58, - "learning_rate": 4.4985768833391134e-05, - "loss": 0.3697, + "learning_rate": 4.498558823635292e-05, + "loss": 0.2838, "step": 2633500 }, { "epoch": 1.58, - "learning_rate": 4.498366886783056e-05, - "loss": 0.3681, + "learning_rate": 4.498348827079236e-05, + "loss": 0.2838, "step": 2634000 }, { "epoch": 1.58, - "learning_rate": 4.498157310220112e-05, - "loss": 0.3652, + "learning_rate": 4.4981388305231795e-05, + "loss": 0.2785, "step": 2634500 }, { "epoch": 1.58, - "learning_rate": 4.497947313664056e-05, - "loss": 0.3725, + "learning_rate": 4.497929253960235e-05, + "loss": 0.2816, "step": 2635000 }, { "epoch": 1.58, - "learning_rate": 4.4977373171079995e-05, - "loss": 0.3635, + "learning_rate": 4.497719257404178e-05, + "loss": 0.275, "step": 2635500 }, { "epoch": 1.58, - "learning_rate": 4.497527320551943e-05, - "loss": 0.3584, + "learning_rate": 4.497509260848122e-05, + "loss": 0.2752, "step": 2636000 }, { "epoch": 1.58, - "learning_rate": 4.497317743988999e-05, - "loss": 0.3741, + "learning_rate": 4.4972992642920656e-05, + "loss": 0.281, "step": 2636500 }, { "epoch": 1.58, - "learning_rate": 4.497107747432942e-05, - "loss": 0.3718, + "learning_rate": 4.497089687729121e-05, + "loss": 0.2849, "step": 2637000 }, { "epoch": 1.58, - "learning_rate": 4.4968981708699976e-05, - "loss": 0.3731, + "learning_rate": 4.496879691173064e-05, + "loss": 0.2876, "step": 2637500 }, { "epoch": 1.58, - "learning_rate": 4.496688174313941e-05, - "loss": 0.3666, + "learning_rate": 4.496669694617008e-05, + "loss": 0.2769, "step": 2638000 }, { "epoch": 1.58, - "learning_rate": 4.496478177757885e-05, - "loss": 0.3734, + "learning_rate": 4.496459698060952e-05, + "loss": 0.2823, "step": 2638500 }, { "epoch": 1.58, - "learning_rate": 4.496268181201828e-05, - "loss": 0.3663, + "learning_rate": 4.496249701504895e-05, + "loss": 0.2796, "step": 2639000 }, { "epoch": 1.58, - "learning_rate": 4.4960581846457716e-05, - "loss": 0.379, + "learning_rate": 4.496039704948839e-05, + "loss": 0.2819, "step": 2639500 }, { "epoch": 1.58, - "learning_rate": 4.4958481880897156e-05, - "loss": 0.3676, + "learning_rate": 4.4958297083927824e-05, + "loss": 0.2766, "step": 2640000 }, { "epoch": 1.58, - "learning_rate": 4.495638611526771e-05, - "loss": 0.3651, + "learning_rate": 4.495619711836726e-05, + "loss": 0.2762, "step": 2640500 }, { "epoch": 1.58, - "learning_rate": 4.4954286149707143e-05, - "loss": 0.378, + "learning_rate": 4.495410135273782e-05, + "loss": 0.28, "step": 2641000 }, { "epoch": 1.58, - "learning_rate": 4.495218618414658e-05, - "loss": 0.3784, + "learning_rate": 4.495200138717725e-05, + "loss": 0.2815, "step": 2641500 }, { "epoch": 1.58, - "learning_rate": 4.495008621858602e-05, - "loss": 0.3616, + "learning_rate": 4.4949901421616685e-05, + "loss": 0.2771, "step": 2642000 }, { "epoch": 1.58, - "learning_rate": 4.494798625302545e-05, - "loss": 0.3761, + "learning_rate": 4.4947801456056125e-05, + "loss": 0.287, "step": 2642500 }, { "epoch": 1.58, - "learning_rate": 4.494588628746489e-05, - "loss": 0.3734, + "learning_rate": 4.494570569042668e-05, + "loss": 0.2828, "step": 2643000 }, { "epoch": 1.58, - "learning_rate": 4.4943786321904324e-05, - "loss": 0.3794, + "learning_rate": 4.494360572486611e-05, + "loss": 0.2893, "step": 2643500 }, { "epoch": 1.59, - "learning_rate": 4.494168635634375e-05, - "loss": 0.3691, + "learning_rate": 4.4941505759305545e-05, + "loss": 0.2774, "step": 2644000 }, { "epoch": 1.59, - "learning_rate": 4.493959059071431e-05, - "loss": 0.365, + "learning_rate": 4.4939405793744986e-05, + "loss": 0.2769, "step": 2644500 }, { "epoch": 1.59, - "learning_rate": 4.493749062515375e-05, - "loss": 0.3648, + "learning_rate": 4.493730582818442e-05, + "loss": 0.2754, "step": 2645000 }, { "epoch": 1.59, - "learning_rate": 4.4935390659593185e-05, - "loss": 0.3688, + "learning_rate": 4.493521006255497e-05, + "loss": 0.2783, "step": 2645500 }, { "epoch": 1.59, - "learning_rate": 4.493329069403261e-05, - "loss": 0.3668, + "learning_rate": 4.4933110096994406e-05, + "loss": 0.2806, "step": 2646000 }, { "epoch": 1.59, - "learning_rate": 4.493119072847205e-05, - "loss": 0.3834, + "learning_rate": 4.4931010131433846e-05, + "loss": 0.2879, "step": 2646500 }, { "epoch": 1.59, - "learning_rate": 4.4929090762911485e-05, - "loss": 0.3795, + "learning_rate": 4.492891016587328e-05, + "loss": 0.2866, "step": 2647000 }, { "epoch": 1.59, - "learning_rate": 4.492699079735092e-05, - "loss": 0.3704, + "learning_rate": 4.492681440024383e-05, + "loss": 0.2735, "step": 2647500 }, { "epoch": 1.59, - "learning_rate": 4.492489503172148e-05, - "loss": 0.3769, + "learning_rate": 4.4924714434683274e-05, + "loss": 0.284, "step": 2648000 }, { "epoch": 1.59, - "learning_rate": 4.492279506616091e-05, - "loss": 0.3696, + "learning_rate": 4.492261866905383e-05, + "loss": 0.2761, "step": 2648500 }, { "epoch": 1.59, - "learning_rate": 4.4920695100600346e-05, - "loss": 0.3739, + "learning_rate": 4.492051870349326e-05, + "loss": 0.2821, "step": 2649000 }, { "epoch": 1.59, - "learning_rate": 4.4918595135039786e-05, - "loss": 0.3643, + "learning_rate": 4.4918418737932694e-05, + "loss": 0.2799, "step": 2649500 }, { "epoch": 1.59, - "learning_rate": 4.491649936941035e-05, - "loss": 0.3742, + "learning_rate": 4.4916318772372134e-05, + "loss": 0.2838, "step": 2650000 }, { "epoch": 1.59, - "learning_rate": 4.491439940384978e-05, - "loss": 0.363, + "learning_rate": 4.491421880681157e-05, + "loss": 0.2741, "step": 2650500 }, { "epoch": 1.59, - "learning_rate": 4.491229943828921e-05, - "loss": 0.3689, + "learning_rate": 4.491212304118212e-05, + "loss": 0.28, "step": 2651000 }, { "epoch": 1.59, - "learning_rate": 4.491019947272865e-05, - "loss": 0.37, + "learning_rate": 4.4910023075621555e-05, + "loss": 0.2777, "step": 2651500 }, { "epoch": 1.59, - "learning_rate": 4.490809950716808e-05, - "loss": 0.3678, + "learning_rate": 4.4907923110060995e-05, + "loss": 0.28, "step": 2652000 }, { "epoch": 1.59, - "learning_rate": 4.490600374153864e-05, - "loss": 0.3769, + "learning_rate": 4.490582314450043e-05, + "loss": 0.2861, "step": 2652500 }, { "epoch": 1.59, - "learning_rate": 4.4903903775978074e-05, - "loss": 0.3735, + "learning_rate": 4.490372317893986e-05, + "loss": 0.2814, "step": 2653000 }, { "epoch": 1.59, - "learning_rate": 4.490180381041751e-05, - "loss": 0.378, + "learning_rate": 4.49016232133793e-05, + "loss": 0.2834, "step": 2653500 }, { "epoch": 1.59, - "learning_rate": 4.489970384485694e-05, - "loss": 0.3739, + "learning_rate": 4.4899523247818736e-05, + "loss": 0.2841, "step": 2654000 }, { "epoch": 1.59, - "learning_rate": 4.4897603879296375e-05, - "loss": 0.3722, + "learning_rate": 4.489742748218929e-05, + "loss": 0.289, "step": 2654500 }, { "epoch": 1.59, - "learning_rate": 4.4895503913735815e-05, - "loss": 0.3767, + "learning_rate": 4.489532751662873e-05, + "loss": 0.283, "step": 2655000 }, { "epoch": 1.59, - "learning_rate": 4.489340394817525e-05, - "loss": 0.3751, + "learning_rate": 4.489322755106816e-05, + "loss": 0.2783, "step": 2655500 }, { "epoch": 1.59, - "learning_rate": 4.489130398261468e-05, - "loss": 0.3754, + "learning_rate": 4.4891127585507596e-05, + "loss": 0.2827, "step": 2656000 }, { "epoch": 1.59, - "learning_rate": 4.488920821698524e-05, - "loss": 0.3741, + "learning_rate": 4.488903181987815e-05, + "loss": 0.2794, "step": 2656500 }, { "epoch": 1.59, - "learning_rate": 4.4887108251424676e-05, - "loss": 0.3696, + "learning_rate": 4.488693185431759e-05, + "loss": 0.2789, "step": 2657000 }, { "epoch": 1.59, - "learning_rate": 4.4885012485795236e-05, - "loss": 0.366, + "learning_rate": 4.4884831888757024e-05, + "loss": 0.2789, "step": 2657500 }, { "epoch": 1.59, - "learning_rate": 4.488291252023466e-05, - "loss": 0.3694, + "learning_rate": 4.488273192319646e-05, + "loss": 0.2796, "step": 2658000 }, { "epoch": 1.59, - "learning_rate": 4.48808125546741e-05, - "loss": 0.3714, + "learning_rate": 4.48806319576359e-05, + "loss": 0.284, "step": 2658500 }, { "epoch": 1.59, - "learning_rate": 4.4878712589113536e-05, - "loss": 0.3701, + "learning_rate": 4.487853199207533e-05, + "loss": 0.2816, "step": 2659000 }, { "epoch": 1.59, - "learning_rate": 4.487661262355297e-05, - "loss": 0.3675, + "learning_rate": 4.4876432026514764e-05, + "loss": 0.2812, "step": 2659500 }, { "epoch": 1.59, - "learning_rate": 4.487451265799241e-05, - "loss": 0.37, + "learning_rate": 4.4874332060954204e-05, + "loss": 0.2778, "step": 2660000 }, { "epoch": 1.6, - "learning_rate": 4.4872416892362964e-05, - "loss": 0.3822, + "learning_rate": 4.487223629532476e-05, + "loss": 0.2851, "step": 2660500 }, { "epoch": 1.6, - "learning_rate": 4.48703169268024e-05, - "loss": 0.3715, + "learning_rate": 4.487013632976419e-05, + "loss": 0.2798, "step": 2661000 }, { "epoch": 1.6, - "learning_rate": 4.486821696124183e-05, - "loss": 0.3689, + "learning_rate": 4.486803636420363e-05, + "loss": 0.2797, "step": 2661500 }, { "epoch": 1.6, - "learning_rate": 4.486611699568127e-05, - "loss": 0.3706, + "learning_rate": 4.4865936398643065e-05, + "loss": 0.2801, "step": 2662000 }, { "epoch": 1.6, - "learning_rate": 4.4864017030120704e-05, - "loss": 0.3836, + "learning_rate": 4.486383643308249e-05, + "loss": 0.2837, "step": 2662500 }, { "epoch": 1.6, - "learning_rate": 4.486192126449126e-05, - "loss": 0.3769, + "learning_rate": 4.486173646752193e-05, + "loss": 0.2776, "step": 2663000 }, { "epoch": 1.6, - "learning_rate": 4.485982549886182e-05, - "loss": 0.3646, + "learning_rate": 4.485964070189249e-05, + "loss": 0.2807, "step": 2663500 }, { "epoch": 1.6, - "learning_rate": 4.485772553330126e-05, - "loss": 0.3772, + "learning_rate": 4.4857540736331926e-05, + "loss": 0.2858, "step": 2664000 }, { "epoch": 1.6, - "learning_rate": 4.485562556774069e-05, - "loss": 0.3682, + "learning_rate": 4.485544077077136e-05, + "loss": 0.28, "step": 2664500 }, { "epoch": 1.6, - "learning_rate": 4.485352560218012e-05, - "loss": 0.3737, + "learning_rate": 4.485334500514191e-05, + "loss": 0.2843, "step": 2665000 }, { "epoch": 1.6, - "learning_rate": 4.485142563661956e-05, - "loss": 0.3622, + "learning_rate": 4.485124503958135e-05, + "loss": 0.2775, "step": 2665500 }, { "epoch": 1.6, - "learning_rate": 4.484932567105899e-05, - "loss": 0.3702, + "learning_rate": 4.4849145074020787e-05, + "loss": 0.2815, "step": 2666000 }, { "epoch": 1.6, - "learning_rate": 4.4847225705498426e-05, - "loss": 0.3708, + "learning_rate": 4.484704510846022e-05, + "loss": 0.2803, "step": 2666500 }, { "epoch": 1.6, - "learning_rate": 4.4845125739937866e-05, - "loss": 0.37, + "learning_rate": 4.484494514289966e-05, + "loss": 0.2819, "step": 2667000 }, { "epoch": 1.6, - "learning_rate": 4.48430257743773e-05, - "loss": 0.3758, + "learning_rate": 4.484284517733909e-05, + "loss": 0.2831, "step": 2667500 }, { "epoch": 1.6, - "learning_rate": 4.484092580881673e-05, - "loss": 0.3725, + "learning_rate": 4.484074521177853e-05, + "loss": 0.28, "step": 2668000 }, { "epoch": 1.6, - "learning_rate": 4.483882584325617e-05, - "loss": 0.3633, + "learning_rate": 4.483864524621796e-05, + "loss": 0.2778, "step": 2668500 }, { "epoch": 1.6, - "learning_rate": 4.4836725877695607e-05, - "loss": 0.3697, + "learning_rate": 4.4836545280657394e-05, + "loss": 0.2833, "step": 2669000 }, { "epoch": 1.6, - "learning_rate": 4.483463011206616e-05, - "loss": 0.3713, + "learning_rate": 4.4834449515027954e-05, + "loss": 0.2825, "step": 2669500 }, { "epoch": 1.6, - "learning_rate": 4.4832530146505594e-05, - "loss": 0.3662, + "learning_rate": 4.483234954946739e-05, + "loss": 0.2806, "step": 2670000 }, { "epoch": 1.6, - "learning_rate": 4.4830430180945034e-05, - "loss": 0.367, + "learning_rate": 4.483024958390682e-05, + "loss": 0.275, "step": 2670500 }, { "epoch": 1.6, - "learning_rate": 4.482833021538447e-05, - "loss": 0.3706, + "learning_rate": 4.4828149618346255e-05, + "loss": 0.2844, "step": 2671000 }, { "epoch": 1.6, - "learning_rate": 4.48262302498239e-05, - "loss": 0.3765, + "learning_rate": 4.4826053852716815e-05, + "loss": 0.2881, "step": 2671500 }, { "epoch": 1.6, - "learning_rate": 4.482413028426334e-05, - "loss": 0.3695, + "learning_rate": 4.4823953887156255e-05, + "loss": 0.277, "step": 2672000 }, { "epoch": 1.6, - "learning_rate": 4.4822030318702774e-05, - "loss": 0.3682, + "learning_rate": 4.482185392159568e-05, + "loss": 0.2823, "step": 2672500 }, { "epoch": 1.6, - "learning_rate": 4.481993455307333e-05, - "loss": 0.3755, + "learning_rate": 4.4819753956035116e-05, + "loss": 0.2789, "step": 2673000 }, { "epoch": 1.6, - "learning_rate": 4.481783458751277e-05, - "loss": 0.3664, + "learning_rate": 4.4817658190405676e-05, + "loss": 0.282, "step": 2673500 }, { "epoch": 1.6, - "learning_rate": 4.48157346219522e-05, - "loss": 0.3811, + "learning_rate": 4.4815558224845116e-05, + "loss": 0.2866, "step": 2674000 }, { "epoch": 1.6, - "learning_rate": 4.4813634656391635e-05, - "loss": 0.3797, + "learning_rate": 4.481345825928454e-05, + "loss": 0.2857, "step": 2674500 }, { "epoch": 1.6, - "learning_rate": 4.4811534690831075e-05, - "loss": 0.3632, + "learning_rate": 4.481135829372398e-05, + "loss": 0.2773, "step": 2675000 }, { "epoch": 1.6, - "learning_rate": 4.48094347252705e-05, - "loss": 0.3733, + "learning_rate": 4.4809258328163417e-05, + "loss": 0.2822, "step": 2675500 }, { "epoch": 1.6, - "learning_rate": 4.480733895964106e-05, - "loss": 0.3697, + "learning_rate": 4.480716256253398e-05, + "loss": 0.2883, "step": 2676000 }, { "epoch": 1.6, - "learning_rate": 4.4805238994080496e-05, - "loss": 0.361, + "learning_rate": 4.480506259697341e-05, + "loss": 0.2763, "step": 2676500 }, { "epoch": 1.6, - "learning_rate": 4.4803139028519936e-05, - "loss": 0.3725, + "learning_rate": 4.4802962631412844e-05, + "loss": 0.2789, "step": 2677000 }, { "epoch": 1.61, - "learning_rate": 4.480103906295937e-05, - "loss": 0.3776, + "learning_rate": 4.4800866865783404e-05, + "loss": 0.2846, "step": 2677500 }, { "epoch": 1.61, - "learning_rate": 4.4798939097398796e-05, - "loss": 0.3731, + "learning_rate": 4.479876690022284e-05, + "loss": 0.2809, "step": 2678000 }, { "epoch": 1.61, - "learning_rate": 4.479684333176936e-05, - "loss": 0.3742, + "learning_rate": 4.479666693466227e-05, + "loss": 0.2837, "step": 2678500 }, { "epoch": 1.61, - "learning_rate": 4.47947433662088e-05, - "loss": 0.3668, + "learning_rate": 4.479456696910171e-05, + "loss": 0.2787, "step": 2679000 }, { "epoch": 1.61, - "learning_rate": 4.479264340064823e-05, - "loss": 0.3706, + "learning_rate": 4.479246700354114e-05, + "loss": 0.2794, "step": 2679500 }, { "epoch": 1.61, - "learning_rate": 4.4790543435087664e-05, - "loss": 0.3696, + "learning_rate": 4.479036703798057e-05, + "loss": 0.2781, "step": 2680000 }, { "epoch": 1.61, - "learning_rate": 4.47884434695271e-05, - "loss": 0.378, + "learning_rate": 4.478826707242001e-05, + "loss": 0.2827, "step": 2680500 }, { "epoch": 1.61, - "learning_rate": 4.478634350396653e-05, - "loss": 0.3715, + "learning_rate": 4.4786167106859445e-05, + "loss": 0.2812, "step": 2681000 }, { "epoch": 1.61, - "learning_rate": 4.478424353840597e-05, - "loss": 0.3672, + "learning_rate": 4.478406714129888e-05, + "loss": 0.2795, "step": 2681500 }, { "epoch": 1.61, - "learning_rate": 4.4782143572845404e-05, - "loss": 0.3686, + "learning_rate": 4.478196717573832e-05, + "loss": 0.2772, "step": 2682000 }, { "epoch": 1.61, - "learning_rate": 4.478004780721596e-05, - "loss": 0.3621, + "learning_rate": 4.477986721017775e-05, + "loss": 0.2776, "step": 2682500 }, { "epoch": 1.61, - "learning_rate": 4.477794784165539e-05, - "loss": 0.3772, + "learning_rate": 4.4777767244617186e-05, + "loss": 0.2845, "step": 2683000 }, { "epoch": 1.61, - "learning_rate": 4.477585207602595e-05, - "loss": 0.3788, + "learning_rate": 4.4775671478987746e-05, + "loss": 0.2793, "step": 2683500 }, { "epoch": 1.61, - "learning_rate": 4.477375211046539e-05, - "loss": 0.3654, + "learning_rate": 4.477357151342718e-05, + "loss": 0.2808, "step": 2684000 }, { "epoch": 1.61, - "learning_rate": 4.4771652144904825e-05, - "loss": 0.3669, + "learning_rate": 4.477147154786661e-05, + "loss": 0.2815, "step": 2684500 }, { "epoch": 1.61, - "learning_rate": 4.476955217934425e-05, - "loss": 0.3656, + "learning_rate": 4.476937158230605e-05, + "loss": 0.2739, "step": 2685000 }, { "epoch": 1.61, - "learning_rate": 4.476745221378369e-05, - "loss": 0.3701, + "learning_rate": 4.476727161674549e-05, + "loss": 0.2773, "step": 2685500 }, { "epoch": 1.61, - "learning_rate": 4.4765352248223126e-05, - "loss": 0.3771, + "learning_rate": 4.476518005104717e-05, + "loss": 0.2849, "step": 2686000 }, { "epoch": 1.61, - "learning_rate": 4.4763256482593686e-05, - "loss": 0.376, + "learning_rate": 4.4763080085486594e-05, + "loss": 0.2795, "step": 2686500 }, { "epoch": 1.61, - "learning_rate": 4.476115651703312e-05, - "loss": 0.3697, + "learning_rate": 4.476098011992603e-05, + "loss": 0.2808, "step": 2687000 }, { "epoch": 1.61, - "learning_rate": 4.475905655147255e-05, - "loss": 0.3577, + "learning_rate": 4.475888015436547e-05, + "loss": 0.272, "step": 2687500 }, { "epoch": 1.61, - "learning_rate": 4.4756956585911987e-05, - "loss": 0.3753, + "learning_rate": 4.47567801888049e-05, + "loss": 0.2848, "step": 2688000 }, { "epoch": 1.61, - "learning_rate": 4.475485662035143e-05, - "loss": 0.378, + "learning_rate": 4.4754680223244335e-05, + "loss": 0.2835, "step": 2688500 }, { "epoch": 1.61, - "learning_rate": 4.475275665479086e-05, - "loss": 0.3731, + "learning_rate": 4.4752580257683775e-05, + "loss": 0.2842, "step": 2689000 }, { "epoch": 1.61, - "learning_rate": 4.4750656689230294e-05, - "loss": 0.3741, + "learning_rate": 4.475048029212321e-05, + "loss": 0.2841, "step": 2689500 }, { "epoch": 1.61, - "learning_rate": 4.4748556723669734e-05, - "loss": 0.372, + "learning_rate": 4.474838452649376e-05, + "loss": 0.2817, "step": 2690000 }, { "epoch": 1.61, - "learning_rate": 4.474646095804029e-05, - "loss": 0.3593, + "learning_rate": 4.47462845609332e-05, + "loss": 0.2738, "step": 2690500 }, { "epoch": 1.61, - "learning_rate": 4.474436099247972e-05, - "loss": 0.3754, + "learning_rate": 4.4744184595372635e-05, + "loss": 0.2826, "step": 2691000 }, { "epoch": 1.61, - "learning_rate": 4.4742261026919154e-05, - "loss": 0.3713, + "learning_rate": 4.474208462981207e-05, + "loss": 0.2798, "step": 2691500 }, { "epoch": 1.61, - "learning_rate": 4.474016526128971e-05, - "loss": 0.3711, + "learning_rate": 4.473998466425151e-05, + "loss": 0.2869, "step": 2692000 }, { "epoch": 1.61, - "learning_rate": 4.473806529572915e-05, - "loss": 0.3699, + "learning_rate": 4.473788469869094e-05, + "loss": 0.2848, "step": 2692500 }, { "epoch": 1.61, - "learning_rate": 4.473596533016858e-05, - "loss": 0.3659, + "learning_rate": 4.4735784733130376e-05, + "loss": 0.2795, "step": 2693000 }, { "epoch": 1.61, - "learning_rate": 4.4733865364608015e-05, - "loss": 0.3722, + "learning_rate": 4.473368896750093e-05, + "loss": 0.2799, "step": 2693500 }, { "epoch": 1.62, - "learning_rate": 4.4731765399047455e-05, - "loss": 0.3709, + "learning_rate": 4.473158900194037e-05, + "loss": 0.2794, "step": 2694000 }, { "epoch": 1.62, - "learning_rate": 4.472966543348689e-05, - "loss": 0.379, + "learning_rate": 4.47294890363798e-05, + "loss": 0.2847, "step": 2694500 }, { "epoch": 1.62, - "learning_rate": 4.472756966785744e-05, - "loss": 0.3768, + "learning_rate": 4.472738907081924e-05, + "loss": 0.2831, "step": 2695000 }, { "epoch": 1.62, - "learning_rate": 4.472546970229688e-05, - "loss": 0.3681, + "learning_rate": 4.472529330518979e-05, + "loss": 0.2798, "step": 2695500 }, { "epoch": 1.62, - "learning_rate": 4.4723369736736316e-05, - "loss": 0.373, + "learning_rate": 4.472319333962923e-05, + "loss": 0.2859, "step": 2696000 }, { "epoch": 1.62, - "learning_rate": 4.4721273971106877e-05, - "loss": 0.3703, + "learning_rate": 4.4721093374068664e-05, + "loss": 0.2812, "step": 2696500 }, { "epoch": 1.62, - "learning_rate": 4.47191740055463e-05, - "loss": 0.3619, + "learning_rate": 4.47189934085081e-05, + "loss": 0.274, "step": 2697000 }, { "epoch": 1.62, - "learning_rate": 4.4717074039985743e-05, - "loss": 0.373, + "learning_rate": 4.471689764287866e-05, + "loss": 0.2826, "step": 2697500 }, { "epoch": 1.62, - "learning_rate": 4.471497407442518e-05, - "loss": 0.371, + "learning_rate": 4.471479767731809e-05, + "loss": 0.2809, "step": 2698000 }, { "epoch": 1.62, - "learning_rate": 4.471287410886461e-05, - "loss": 0.376, + "learning_rate": 4.4712697711757525e-05, + "loss": 0.2798, "step": 2698500 }, { "epoch": 1.62, - "learning_rate": 4.471077414330405e-05, - "loss": 0.3642, + "learning_rate": 4.4710597746196965e-05, + "loss": 0.2786, "step": 2699000 }, { "epoch": 1.62, - "learning_rate": 4.4708674177743484e-05, - "loss": 0.3803, + "learning_rate": 4.470850198056752e-05, + "loss": 0.2875, "step": 2699500 }, { "epoch": 1.62, - "learning_rate": 4.470657421218292e-05, - "loss": 0.3724, + "learning_rate": 4.470640201500695e-05, + "loss": 0.283, "step": 2700000 }, { "epoch": 1.62, - "eval_loss": 0.3561702072620392, - "eval_runtime": 1123.298, - "eval_samples_per_second": 468.905, - "eval_steps_per_second": 78.151, + "eval_loss": 0.256526380777359, + "eval_runtime": 1451.6039, + "eval_samples_per_second": 362.854, + "eval_steps_per_second": 60.476, "step": 2700000 }, { "epoch": 1.62, - "learning_rate": 4.470447424662236e-05, - "loss": 0.3809, + "learning_rate": 4.4704302049446386e-05, + "loss": 0.2824, "step": 2700500 }, { "epoch": 1.62, - "learning_rate": 4.470237428106179e-05, - "loss": 0.3651, + "learning_rate": 4.4702202083885826e-05, + "loss": 0.2799, "step": 2701000 }, { "epoch": 1.62, - "learning_rate": 4.4700274315501225e-05, - "loss": 0.3678, + "learning_rate": 4.470010211832526e-05, + "loss": 0.2754, "step": 2701500 }, { "epoch": 1.62, - "learning_rate": 4.469817854987178e-05, - "loss": 0.3708, + "learning_rate": 4.469800635269581e-05, + "loss": 0.2795, "step": 2702000 }, { "epoch": 1.62, - "learning_rate": 4.469607858431122e-05, - "loss": 0.3684, + "learning_rate": 4.4695906387135246e-05, + "loss": 0.284, "step": 2702500 }, { "epoch": 1.62, - "learning_rate": 4.469397861875065e-05, - "loss": 0.3687, + "learning_rate": 4.4693806421574686e-05, + "loss": 0.2861, "step": 2703000 }, { "epoch": 1.62, - "learning_rate": 4.469187865319009e-05, - "loss": 0.3726, + "learning_rate": 4.469170645601412e-05, + "loss": 0.2786, "step": 2703500 }, { "epoch": 1.62, - "learning_rate": 4.4689778687629526e-05, - "loss": 0.3746, + "learning_rate": 4.4689606490453553e-05, + "loss": 0.2771, "step": 2704000 }, { "epoch": 1.62, - "learning_rate": 4.468767872206896e-05, - "loss": 0.3568, + "learning_rate": 4.4687506524892994e-05, + "loss": 0.2736, "step": 2704500 }, { "epoch": 1.62, - "learning_rate": 4.468557875650839e-05, - "loss": 0.3676, + "learning_rate": 4.468541075926355e-05, + "loss": 0.2764, "step": 2705000 }, { "epoch": 1.62, - "learning_rate": 4.4683478790947826e-05, - "loss": 0.3725, + "learning_rate": 4.468331079370298e-05, + "loss": 0.2805, "step": 2705500 }, { "epoch": 1.62, - "learning_rate": 4.4681383025318386e-05, - "loss": 0.3569, + "learning_rate": 4.468121082814242e-05, + "loss": 0.2772, "step": 2706000 }, { "epoch": 1.62, - "learning_rate": 4.467928725968894e-05, - "loss": 0.3732, + "learning_rate": 4.4679110862581854e-05, + "loss": 0.2841, "step": 2706500 }, { "epoch": 1.62, - "learning_rate": 4.4677187294128373e-05, - "loss": 0.3734, + "learning_rate": 4.467701089702129e-05, + "loss": 0.2868, "step": 2707000 }, { "epoch": 1.62, - "learning_rate": 4.4675087328567814e-05, - "loss": 0.3751, + "learning_rate": 4.467491093146073e-05, + "loss": 0.2871, "step": 2707500 }, { "epoch": 1.62, - "learning_rate": 4.467298736300725e-05, - "loss": 0.376, + "learning_rate": 4.467281516583128e-05, + "loss": 0.2875, "step": 2708000 }, { "epoch": 1.62, - "learning_rate": 4.467088739744668e-05, - "loss": 0.3615, + "learning_rate": 4.4670715200270715e-05, + "loss": 0.2704, "step": 2708500 }, { "epoch": 1.62, - "learning_rate": 4.466878743188612e-05, - "loss": 0.3702, + "learning_rate": 4.466861523471015e-05, + "loss": 0.2794, "step": 2709000 }, { "epoch": 1.62, - "learning_rate": 4.466668746632555e-05, - "loss": 0.3705, + "learning_rate": 4.466651526914959e-05, + "loss": 0.2812, "step": 2709500 }, { "epoch": 1.62, - "learning_rate": 4.466458750076499e-05, - "loss": 0.3673, + "learning_rate": 4.466441950352014e-05, + "loss": 0.2796, "step": 2710000 }, { "epoch": 1.63, - "learning_rate": 4.466249173513555e-05, - "loss": 0.3648, + "learning_rate": 4.4662319537959576e-05, + "loss": 0.2798, "step": 2710500 }, { "epoch": 1.63, - "learning_rate": 4.466039176957498e-05, - "loss": 0.3733, + "learning_rate": 4.466021957239901e-05, + "loss": 0.2783, "step": 2711000 }, { "epoch": 1.63, - "learning_rate": 4.4658291804014415e-05, - "loss": 0.3647, + "learning_rate": 4.465811960683845e-05, + "loss": 0.2778, "step": 2711500 }, { "epoch": 1.63, - "learning_rate": 4.465619183845385e-05, - "loss": 0.3714, + "learning_rate": 4.465601964127788e-05, + "loss": 0.2819, "step": 2712000 }, { "epoch": 1.63, - "learning_rate": 4.465409607282441e-05, - "loss": 0.3705, + "learning_rate": 4.4653923875648437e-05, + "loss": 0.2797, "step": 2712500 }, { "epoch": 1.63, - "learning_rate": 4.465199610726384e-05, - "loss": 0.3655, + "learning_rate": 4.465182391008788e-05, + "loss": 0.2787, "step": 2713000 }, { "epoch": 1.63, - "learning_rate": 4.4649896141703276e-05, - "loss": 0.3647, + "learning_rate": 4.464972394452731e-05, + "loss": 0.2802, "step": 2713500 }, { "epoch": 1.63, - "learning_rate": 4.4647796176142716e-05, - "loss": 0.3684, + "learning_rate": 4.4647623978966744e-05, + "loss": 0.282, "step": 2714000 }, { "epoch": 1.63, - "learning_rate": 4.464569621058214e-05, - "loss": 0.3791, + "learning_rate": 4.46455282133373e-05, + "loss": 0.2801, "step": 2714500 }, { "epoch": 1.63, - "learning_rate": 4.46436004449527e-05, - "loss": 0.3639, + "learning_rate": 4.464342824777674e-05, + "loss": 0.275, "step": 2715000 }, { "epoch": 1.63, - "learning_rate": 4.4641500479392136e-05, - "loss": 0.3664, + "learning_rate": 4.464132828221617e-05, + "loss": 0.2815, "step": 2715500 }, { "epoch": 1.63, - "learning_rate": 4.463940051383158e-05, - "loss": 0.3688, + "learning_rate": 4.4639228316655604e-05, + "loss": 0.2852, "step": 2716000 }, { "epoch": 1.63, - "learning_rate": 4.463730054827101e-05, - "loss": 0.3841, + "learning_rate": 4.463713255102616e-05, + "loss": 0.2869, "step": 2716500 }, { "epoch": 1.63, - "learning_rate": 4.4635200582710444e-05, - "loss": 0.3721, + "learning_rate": 4.46350325854656e-05, + "loss": 0.2859, "step": 2717000 }, { "epoch": 1.63, - "learning_rate": 4.463310061714988e-05, - "loss": 0.3589, + "learning_rate": 4.463293681983615e-05, + "loss": 0.2783, "step": 2717500 }, { "epoch": 1.63, - "learning_rate": 4.463100065158931e-05, - "loss": 0.3752, + "learning_rate": 4.4630836854275585e-05, + "loss": 0.288, "step": 2718000 }, { "epoch": 1.63, - "learning_rate": 4.462890488595987e-05, - "loss": 0.3729, + "learning_rate": 4.4628736888715026e-05, + "loss": 0.282, "step": 2718500 }, { "epoch": 1.63, - "learning_rate": 4.4626804920399304e-05, - "loss": 0.369, + "learning_rate": 4.462663692315446e-05, + "loss": 0.286, "step": 2719000 }, { "epoch": 1.63, - "learning_rate": 4.462470495483874e-05, - "loss": 0.3649, + "learning_rate": 4.462453695759389e-05, + "loss": 0.2753, "step": 2719500 }, { "epoch": 1.63, - "learning_rate": 4.462260498927817e-05, - "loss": 0.3756, + "learning_rate": 4.462243699203333e-05, + "loss": 0.2816, "step": 2720000 }, { "epoch": 1.63, - "learning_rate": 4.462050922364873e-05, - "loss": 0.3859, + "learning_rate": 4.4620341226403886e-05, + "loss": 0.2807, "step": 2720500 }, { "epoch": 1.63, - "learning_rate": 4.4618413458019285e-05, - "loss": 0.366, + "learning_rate": 4.461824126084332e-05, + "loss": 0.2771, "step": 2721000 }, { "epoch": 1.63, - "learning_rate": 4.4616313492458725e-05, - "loss": 0.3605, + "learning_rate": 4.461614129528275e-05, + "loss": 0.2742, "step": 2721500 }, { "epoch": 1.63, - "learning_rate": 4.461421352689816e-05, - "loss": 0.3698, + "learning_rate": 4.4614041329722193e-05, + "loss": 0.2821, "step": 2722000 }, { "epoch": 1.63, - "learning_rate": 4.461211356133759e-05, - "loss": 0.3672, + "learning_rate": 4.461194136416163e-05, + "loss": 0.2818, "step": 2722500 }, { "epoch": 1.63, - "learning_rate": 4.461001359577703e-05, - "loss": 0.3542, + "learning_rate": 4.460984139860106e-05, + "loss": 0.2748, "step": 2723000 }, { "epoch": 1.63, - "learning_rate": 4.4607913630216466e-05, - "loss": 0.3676, + "learning_rate": 4.46077414330405e-05, + "loss": 0.2762, "step": 2723500 }, { "epoch": 1.63, - "learning_rate": 4.46058136646559e-05, - "loss": 0.3639, + "learning_rate": 4.4605641467479934e-05, + "loss": 0.2761, "step": 2724000 }, { "epoch": 1.63, - "learning_rate": 4.460371369909533e-05, - "loss": 0.3621, + "learning_rate": 4.460354570185049e-05, + "loss": 0.2792, "step": 2724500 }, { "epoch": 1.63, - "learning_rate": 4.460161793346589e-05, - "loss": 0.3666, + "learning_rate": 4.460144573628992e-05, + "loss": 0.2841, "step": 2725000 }, { "epoch": 1.63, - "learning_rate": 4.459951796790533e-05, - "loss": 0.3746, + "learning_rate": 4.459934577072936e-05, + "loss": 0.2839, "step": 2725500 }, { "epoch": 1.63, - "learning_rate": 4.459741800234477e-05, - "loss": 0.3764, + "learning_rate": 4.4597245805168795e-05, + "loss": 0.2827, "step": 2726000 }, { "epoch": 1.63, - "learning_rate": 4.4595318036784194e-05, - "loss": 0.3685, + "learning_rate": 4.4595145839608235e-05, + "loss": 0.2786, "step": 2726500 }, { "epoch": 1.63, - "learning_rate": 4.459321807122363e-05, - "loss": 0.3716, + "learning_rate": 4.459305007397879e-05, + "loss": 0.2869, "step": 2727000 }, { "epoch": 1.64, - "learning_rate": 4.459111810566307e-05, - "loss": 0.3599, + "learning_rate": 4.459095010841822e-05, + "loss": 0.2733, "step": 2727500 }, { "epoch": 1.64, - "learning_rate": 4.45890181401025e-05, - "loss": 0.3782, + "learning_rate": 4.4588854342788776e-05, + "loss": 0.2829, "step": 2728000 }, { "epoch": 1.64, - "learning_rate": 4.4586918174541934e-05, - "loss": 0.3764, + "learning_rate": 4.458675437722821e-05, + "loss": 0.2831, "step": 2728500 }, { "epoch": 1.64, - "learning_rate": 4.458482240891249e-05, - "loss": 0.3667, + "learning_rate": 4.458465441166765e-05, + "loss": 0.2757, "step": 2729000 }, { "epoch": 1.64, - "learning_rate": 4.458272244335193e-05, - "loss": 0.3736, + "learning_rate": 4.458255444610708e-05, + "loss": 0.2812, "step": 2729500 }, { "epoch": 1.64, - "learning_rate": 4.458062667772249e-05, - "loss": 0.3799, + "learning_rate": 4.4580454480546516e-05, + "loss": 0.2837, "step": 2730000 }, { "epoch": 1.64, - "learning_rate": 4.457852671216192e-05, - "loss": 0.3764, + "learning_rate": 4.4578354514985956e-05, + "loss": 0.2822, "step": 2730500 }, { "epoch": 1.64, - "learning_rate": 4.4576426746601355e-05, - "loss": 0.3659, + "learning_rate": 4.457625454942539e-05, + "loss": 0.2762, "step": 2731000 }, { "epoch": 1.64, - "learning_rate": 4.457432678104079e-05, - "loss": 0.3694, + "learning_rate": 4.457415458386482e-05, + "loss": 0.2826, "step": 2731500 }, { "epoch": 1.64, - "learning_rate": 4.457222681548022e-05, - "loss": 0.359, + "learning_rate": 4.4572054618304264e-05, + "loss": 0.2792, "step": 2732000 }, { "epoch": 1.64, - "learning_rate": 4.457012684991966e-05, - "loss": 0.3733, + "learning_rate": 4.456996305260594e-05, + "loss": 0.2799, "step": 2732500 }, { "epoch": 1.64, - "learning_rate": 4.4568026884359096e-05, - "loss": 0.3737, + "learning_rate": 4.456786308704537e-05, + "loss": 0.281, "step": 2733000 }, { "epoch": 1.64, - "learning_rate": 4.456592691879853e-05, - "loss": 0.3625, + "learning_rate": 4.4565763121484804e-05, + "loss": 0.2732, "step": 2733500 }, { "epoch": 1.64, - "learning_rate": 4.456382695323797e-05, - "loss": 0.375, + "learning_rate": 4.4563663155924244e-05, + "loss": 0.2802, "step": 2734000 }, { "epoch": 1.64, - "learning_rate": 4.456173118760852e-05, - "loss": 0.3647, + "learning_rate": 4.456156319036368e-05, + "loss": 0.2802, "step": 2734500 }, { "epoch": 1.64, - "learning_rate": 4.455963122204796e-05, - "loss": 0.3656, + "learning_rate": 4.455946742473423e-05, + "loss": 0.2843, "step": 2735000 }, { "epoch": 1.64, - "learning_rate": 4.455753125648739e-05, - "loss": 0.3762, + "learning_rate": 4.4557367459173665e-05, + "loss": 0.2826, "step": 2735500 }, { "epoch": 1.64, - "learning_rate": 4.455543129092683e-05, - "loss": 0.3746, + "learning_rate": 4.4555267493613105e-05, + "loss": 0.2855, "step": 2736000 }, { "epoch": 1.64, - "learning_rate": 4.4553331325366264e-05, - "loss": 0.3731, + "learning_rate": 4.455316752805254e-05, + "loss": 0.2861, "step": 2736500 }, { "epoch": 1.64, - "learning_rate": 4.455123555973682e-05, - "loss": 0.3629, + "learning_rate": 4.455106756249197e-05, + "loss": 0.2782, "step": 2737000 }, { "epoch": 1.64, - "learning_rate": 4.454913559417625e-05, - "loss": 0.3687, + "learning_rate": 4.4548971796862526e-05, + "loss": 0.2839, "step": 2737500 }, { "epoch": 1.64, - "learning_rate": 4.454703562861569e-05, - "loss": 0.3585, + "learning_rate": 4.4546871831301966e-05, + "loss": 0.2778, "step": 2738000 }, { "epoch": 1.64, - "learning_rate": 4.4544935663055125e-05, - "loss": 0.3781, + "learning_rate": 4.45447718657414e-05, + "loss": 0.2818, "step": 2738500 }, { "epoch": 1.64, - "learning_rate": 4.454283569749456e-05, - "loss": 0.368, + "learning_rate": 4.454267190018084e-05, + "loss": 0.2782, "step": 2739000 }, { "epoch": 1.64, - "learning_rate": 4.4540735731934e-05, - "loss": 0.3724, + "learning_rate": 4.454057193462027e-05, + "loss": 0.2853, "step": 2739500 }, { "epoch": 1.64, - "learning_rate": 4.453863576637343e-05, - "loss": 0.3601, + "learning_rate": 4.4538471969059706e-05, + "loss": 0.2768, "step": 2740000 }, { "epoch": 1.64, - "learning_rate": 4.4536535800812865e-05, - "loss": 0.3627, + "learning_rate": 4.453637200349915e-05, + "loss": 0.2799, "step": 2740500 }, { "epoch": 1.64, - "learning_rate": 4.4534440035183426e-05, - "loss": 0.3678, + "learning_rate": 4.453427203793858e-05, + "loss": 0.2854, "step": 2741000 }, { "epoch": 1.64, - "learning_rate": 4.453234006962286e-05, - "loss": 0.3722, + "learning_rate": 4.4532172072378014e-05, + "loss": 0.2767, "step": 2741500 }, { "epoch": 1.64, - "learning_rate": 4.453024430399341e-05, - "loss": 0.3675, + "learning_rate": 4.453007210681745e-05, + "loss": 0.2795, "step": 2742000 }, { "epoch": 1.64, - "learning_rate": 4.4528144338432846e-05, - "loss": 0.3695, + "learning_rate": 4.452797634118801e-05, + "loss": 0.2804, "step": 2742500 }, { "epoch": 1.64, - "learning_rate": 4.4526044372872286e-05, - "loss": 0.3803, + "learning_rate": 4.452587637562744e-05, + "loss": 0.2898, "step": 2743000 }, { "epoch": 1.64, - "learning_rate": 4.452394440731172e-05, - "loss": 0.3764, + "learning_rate": 4.4523776410066874e-05, + "loss": 0.2859, "step": 2743500 }, { "epoch": 1.65, - "learning_rate": 4.452184444175115e-05, - "loss": 0.365, + "learning_rate": 4.4521676444506315e-05, + "loss": 0.2771, "step": 2744000 }, { "epoch": 1.65, - "learning_rate": 4.4519744476190593e-05, - "loss": 0.3757, + "learning_rate": 4.451958067887687e-05, + "loss": 0.2857, "step": 2744500 }, { "epoch": 1.65, - "learning_rate": 4.451764451063003e-05, - "loss": 0.3602, + "learning_rate": 4.45174807133163e-05, + "loss": 0.2753, "step": 2745000 }, { "epoch": 1.65, - "learning_rate": 4.451554874500058e-05, - "loss": 0.3636, + "learning_rate": 4.4515380747755735e-05, + "loss": 0.2741, "step": 2745500 }, { "epoch": 1.65, - "learning_rate": 4.4513448779440014e-05, - "loss": 0.3731, + "learning_rate": 4.4513284982126295e-05, + "loss": 0.2789, "step": 2746000 }, { "epoch": 1.65, - "learning_rate": 4.4511348813879454e-05, - "loss": 0.3749, + "learning_rate": 4.451118501656573e-05, + "loss": 0.2886, "step": 2746500 }, { "epoch": 1.65, - "learning_rate": 4.450924884831889e-05, - "loss": 0.367, + "learning_rate": 4.450908505100516e-05, + "loss": 0.2838, "step": 2747000 }, { "epoch": 1.65, - "learning_rate": 4.450714888275832e-05, - "loss": 0.3648, + "learning_rate": 4.45069850854446e-05, + "loss": 0.2789, "step": 2747500 }, { "epoch": 1.65, - "learning_rate": 4.450505311712888e-05, - "loss": 0.3719, + "learning_rate": 4.4504885119884036e-05, + "loss": 0.2788, "step": 2748000 }, { "epoch": 1.65, - "learning_rate": 4.4502953151568315e-05, - "loss": 0.3637, + "learning_rate": 4.450278515432347e-05, + "loss": 0.2783, "step": 2748500 }, { "epoch": 1.65, - "learning_rate": 4.450085318600775e-05, - "loss": 0.3752, + "learning_rate": 4.450068518876291e-05, + "loss": 0.2854, "step": 2749000 }, { "epoch": 1.65, - "learning_rate": 4.449875322044719e-05, - "loss": 0.3852, + "learning_rate": 4.4498585223202336e-05, + "loss": 0.2852, "step": 2749500 }, { "epoch": 1.65, - "learning_rate": 4.449665745481774e-05, - "loss": 0.3793, + "learning_rate": 4.44964894575729e-05, + "loss": 0.2812, "step": 2750000 }, { "epoch": 1.65, - "learning_rate": 4.4494557489257176e-05, - "loss": 0.3628, + "learning_rate": 4.449438949201233e-05, + "loss": 0.2719, "step": 2750500 }, { "epoch": 1.65, - "learning_rate": 4.449246172362773e-05, - "loss": 0.3705, + "learning_rate": 4.449228952645177e-05, + "loss": 0.2801, "step": 2751000 }, { "epoch": 1.65, - "learning_rate": 4.449036175806716e-05, - "loss": 0.3813, + "learning_rate": 4.44901895608912e-05, + "loss": 0.2843, "step": 2751500 }, { "epoch": 1.65, - "learning_rate": 4.44882617925066e-05, - "loss": 0.3727, + "learning_rate": 4.448809379526176e-05, + "loss": 0.284, "step": 2752000 }, { "epoch": 1.65, - "learning_rate": 4.4486161826946036e-05, - "loss": 0.3716, + "learning_rate": 4.448599802963231e-05, + "loss": 0.2829, "step": 2752500 }, { "epoch": 1.65, - "learning_rate": 4.448406186138547e-05, - "loss": 0.3764, + "learning_rate": 4.448389806407175e-05, + "loss": 0.2786, "step": 2753000 }, { "epoch": 1.65, - "learning_rate": 4.448196189582491e-05, - "loss": 0.3746, + "learning_rate": 4.4481798098511185e-05, + "loss": 0.2806, "step": 2753500 }, { "epoch": 1.65, - "learning_rate": 4.4479861930264343e-05, - "loss": 0.3761, + "learning_rate": 4.447969813295062e-05, + "loss": 0.2826, "step": 2754000 }, { "epoch": 1.65, - "learning_rate": 4.447776196470378e-05, - "loss": 0.3724, + "learning_rate": 4.447759816739006e-05, + "loss": 0.2853, "step": 2754500 }, { "epoch": 1.65, - "learning_rate": 4.447566199914322e-05, - "loss": 0.3714, + "learning_rate": 4.447549820182949e-05, + "loss": 0.2767, "step": 2755000 }, { "epoch": 1.65, - "learning_rate": 4.4473562033582644e-05, - "loss": 0.3753, + "learning_rate": 4.4473398236268925e-05, + "loss": 0.2812, "step": 2755500 }, { "epoch": 1.65, - "learning_rate": 4.4471462068022084e-05, - "loss": 0.3661, + "learning_rate": 4.4471298270708366e-05, + "loss": 0.2804, "step": 2756000 }, { "epoch": 1.65, - "learning_rate": 4.446936210246152e-05, - "loss": 0.3702, + "learning_rate": 4.446920250507892e-05, + "loss": 0.2843, "step": 2756500 }, { "epoch": 1.65, - "learning_rate": 4.446726633683208e-05, - "loss": 0.3725, + "learning_rate": 4.446710253951835e-05, + "loss": 0.2752, "step": 2757000 }, { "epoch": 1.65, - "learning_rate": 4.446516637127151e-05, - "loss": 0.3683, + "learning_rate": 4.4465002573957786e-05, + "loss": 0.2751, "step": 2757500 }, { "epoch": 1.65, - "learning_rate": 4.4463070605642065e-05, - "loss": 0.3627, + "learning_rate": 4.4462902608397226e-05, + "loss": 0.2762, "step": 2758000 }, { "epoch": 1.65, - "learning_rate": 4.4460970640081505e-05, - "loss": 0.3651, + "learning_rate": 4.446080264283666e-05, + "loss": 0.2781, "step": 2758500 }, { "epoch": 1.65, - "learning_rate": 4.445887067452094e-05, - "loss": 0.3668, + "learning_rate": 4.4458706877207213e-05, + "loss": 0.2764, "step": 2759000 }, { "epoch": 1.65, - "learning_rate": 4.445677070896037e-05, - "loss": 0.3661, + "learning_rate": 4.445660691164665e-05, + "loss": 0.2742, "step": 2759500 }, { "epoch": 1.65, - "learning_rate": 4.4454674943330926e-05, - "loss": 0.3693, + "learning_rate": 4.445450694608609e-05, + "loss": 0.2768, "step": 2760000 }, { "epoch": 1.66, - "learning_rate": 4.4452574977770366e-05, - "loss": 0.3631, + "learning_rate": 4.445240698052552e-05, + "loss": 0.2761, "step": 2760500 }, { "epoch": 1.66, - "learning_rate": 4.44504750122098e-05, - "loss": 0.3701, + "learning_rate": 4.4450311214896074e-05, + "loss": 0.2808, "step": 2761000 }, { "epoch": 1.66, - "learning_rate": 4.444837504664924e-05, - "loss": 0.3746, + "learning_rate": 4.4448211249335514e-05, + "loss": 0.2819, "step": 2761500 }, { "epoch": 1.66, - "learning_rate": 4.444627508108867e-05, - "loss": 0.3756, + "learning_rate": 4.444611128377495e-05, + "loss": 0.2855, "step": 2762000 }, { "epoch": 1.66, - "learning_rate": 4.4444175115528106e-05, - "loss": 0.3703, + "learning_rate": 4.444401131821438e-05, + "loss": 0.2778, "step": 2762500 }, { "epoch": 1.66, - "learning_rate": 4.444207514996754e-05, - "loss": 0.3685, + "learning_rate": 4.4441915552584935e-05, + "loss": 0.2774, "step": 2763000 }, { "epoch": 1.66, - "learning_rate": 4.4439975184406973e-05, - "loss": 0.3674, + "learning_rate": 4.4439815587024375e-05, + "loss": 0.2753, "step": 2763500 }, { "epoch": 1.66, - "learning_rate": 4.4437879418777534e-05, - "loss": 0.3674, + "learning_rate": 4.443771562146381e-05, + "loss": 0.2769, "step": 2764000 }, { "epoch": 1.66, - "learning_rate": 4.443577945321697e-05, - "loss": 0.3721, + "learning_rate": 4.443561565590324e-05, + "loss": 0.2863, "step": 2764500 }, { "epoch": 1.66, - "learning_rate": 4.443368368758752e-05, - "loss": 0.361, + "learning_rate": 4.4433519890273796e-05, + "loss": 0.2837, "step": 2765000 }, { "epoch": 1.66, - "learning_rate": 4.443158372202696e-05, - "loss": 0.371, + "learning_rate": 4.4431419924713236e-05, + "loss": 0.2869, "step": 2765500 }, { "epoch": 1.66, - "learning_rate": 4.4429483756466394e-05, - "loss": 0.367, + "learning_rate": 4.442931995915267e-05, + "loss": 0.2786, "step": 2766000 }, { "epoch": 1.66, - "learning_rate": 4.442738379090583e-05, - "loss": 0.38, + "learning_rate": 4.44272199935921e-05, + "loss": 0.2863, "step": 2766500 }, { "epoch": 1.66, - "learning_rate": 4.442528382534527e-05, - "loss": 0.3637, + "learning_rate": 4.442512422796266e-05, + "loss": 0.2766, "step": 2767000 }, { "epoch": 1.66, - "learning_rate": 4.4423183859784695e-05, - "loss": 0.372, + "learning_rate": 4.4423024262402097e-05, + "loss": 0.2817, "step": 2767500 }, { "epoch": 1.66, - "learning_rate": 4.4421083894224135e-05, - "loss": 0.3771, + "learning_rate": 4.442092429684153e-05, + "loss": 0.2796, "step": 2768000 }, { "epoch": 1.66, - "learning_rate": 4.441898392866357e-05, - "loss": 0.3688, + "learning_rate": 4.441882433128097e-05, + "loss": 0.282, "step": 2768500 }, { "epoch": 1.66, - "learning_rate": 4.441688816303413e-05, - "loss": 0.3575, + "learning_rate": 4.4416728565651524e-05, + "loss": 0.2735, "step": 2769000 }, { "epoch": 1.66, - "learning_rate": 4.441478819747356e-05, - "loss": 0.3719, + "learning_rate": 4.441462860009096e-05, + "loss": 0.2812, "step": 2769500 }, { "epoch": 1.66, - "learning_rate": 4.4412692431844116e-05, - "loss": 0.3881, + "learning_rate": 4.441252863453039e-05, + "loss": 0.2851, "step": 2770000 }, { "epoch": 1.66, - "learning_rate": 4.4410592466283556e-05, - "loss": 0.3631, + "learning_rate": 4.441042866896983e-05, + "loss": 0.2757, "step": 2770500 }, { "epoch": 1.66, - "learning_rate": 4.440849250072299e-05, - "loss": 0.3766, + "learning_rate": 4.4408328703409264e-05, + "loss": 0.2835, "step": 2771000 }, { "epoch": 1.66, - "learning_rate": 4.440639253516242e-05, - "loss": 0.3662, + "learning_rate": 4.440623293777982e-05, + "loss": 0.2787, "step": 2771500 }, { "epoch": 1.66, - "learning_rate": 4.440429256960186e-05, - "loss": 0.3819, + "learning_rate": 4.440413297221925e-05, + "loss": 0.2847, "step": 2772000 }, { "epoch": 1.66, - "learning_rate": 4.440219260404129e-05, - "loss": 0.3683, + "learning_rate": 4.440203720658981e-05, + "loss": 0.2779, "step": 2772500 }, { "epoch": 1.66, - "learning_rate": 4.4400092638480723e-05, - "loss": 0.365, + "learning_rate": 4.4399937241029245e-05, + "loss": 0.2795, "step": 2773000 }, { "epoch": 1.66, - "learning_rate": 4.4397992672920164e-05, - "loss": 0.3757, + "learning_rate": 4.439783727546868e-05, + "loss": 0.2803, "step": 2773500 }, { "epoch": 1.66, - "learning_rate": 4.4395896907290724e-05, - "loss": 0.3674, + "learning_rate": 4.439573730990812e-05, + "loss": 0.28, "step": 2774000 }, { "epoch": 1.66, - "learning_rate": 4.439379694173015e-05, - "loss": 0.3792, + "learning_rate": 4.439363734434755e-05, + "loss": 0.2823, "step": 2774500 }, { "epoch": 1.66, - "learning_rate": 4.439170117610071e-05, - "loss": 0.3653, + "learning_rate": 4.4391537378786986e-05, + "loss": 0.2811, "step": 2775000 }, { "epoch": 1.66, - "learning_rate": 4.438960121054015e-05, - "loss": 0.3663, + "learning_rate": 4.4389437413226426e-05, + "loss": 0.2779, "step": 2775500 }, { "epoch": 1.66, - "learning_rate": 4.4387501244979585e-05, - "loss": 0.3785, + "learning_rate": 4.438733744766586e-05, + "loss": 0.2815, "step": 2776000 }, { "epoch": 1.66, - "learning_rate": 4.438540127941902e-05, - "loss": 0.3665, + "learning_rate": 4.438523748210529e-05, + "loss": 0.282, "step": 2776500 }, { "epoch": 1.66, - "learning_rate": 4.438330131385845e-05, - "loss": 0.3649, + "learning_rate": 4.438314171647585e-05, + "loss": 0.2789, "step": 2777000 }, { "epoch": 1.67, - "learning_rate": 4.4381201348297885e-05, - "loss": 0.3634, + "learning_rate": 4.438104175091529e-05, + "loss": 0.2787, "step": 2777500 }, { "epoch": 1.67, - "learning_rate": 4.437910138273732e-05, - "loss": 0.3592, + "learning_rate": 4.437894178535472e-05, + "loss": 0.277, "step": 2778000 }, { "epoch": 1.67, - "learning_rate": 4.437700141717676e-05, - "loss": 0.3755, + "learning_rate": 4.4376841819794154e-05, + "loss": 0.2903, "step": 2778500 }, { "epoch": 1.67, - "learning_rate": 4.437490565154732e-05, - "loss": 0.38, + "learning_rate": 4.437474605416471e-05, + "loss": 0.2795, "step": 2779000 }, { "epoch": 1.67, - "learning_rate": 4.4372805685986746e-05, - "loss": 0.3726, + "learning_rate": 4.437264608860415e-05, + "loss": 0.2789, "step": 2779500 }, { "epoch": 1.67, - "learning_rate": 4.437070572042618e-05, - "loss": 0.3688, + "learning_rate": 4.43705503229747e-05, + "loss": 0.2846, "step": 2780000 }, { "epoch": 1.67, - "learning_rate": 4.436860995479674e-05, - "loss": 0.3719, + "learning_rate": 4.4368450357414135e-05, + "loss": 0.2842, "step": 2780500 }, { "epoch": 1.67, - "learning_rate": 4.436650998923618e-05, - "loss": 0.3646, + "learning_rate": 4.4366350391853575e-05, + "loss": 0.2749, "step": 2781000 }, { "epoch": 1.67, - "learning_rate": 4.4364410023675613e-05, - "loss": 0.3622, + "learning_rate": 4.436425042629301e-05, + "loss": 0.2719, "step": 2781500 }, { "epoch": 1.67, - "learning_rate": 4.436231005811505e-05, - "loss": 0.3639, + "learning_rate": 4.436215046073244e-05, + "loss": 0.2757, "step": 2782000 }, { "epoch": 1.67, - "learning_rate": 4.436021009255448e-05, - "loss": 0.363, + "learning_rate": 4.436005049517188e-05, + "loss": 0.28, "step": 2782500 }, { "epoch": 1.67, - "learning_rate": 4.435811432692504e-05, - "loss": 0.3637, + "learning_rate": 4.4357950529611315e-05, + "loss": 0.2728, "step": 2783000 }, { "epoch": 1.67, - "learning_rate": 4.4356014361364474e-05, - "loss": 0.379, + "learning_rate": 4.435585056405075e-05, + "loss": 0.2822, "step": 2783500 }, { "epoch": 1.67, - "learning_rate": 4.435391439580391e-05, - "loss": 0.3725, + "learning_rate": 4.435375899835243e-05, + "loss": 0.2818, "step": 2784000 }, { "epoch": 1.67, - "learning_rate": 4.435181443024334e-05, - "loss": 0.3754, + "learning_rate": 4.4351659032791856e-05, + "loss": 0.2847, "step": 2784500 }, { "epoch": 1.67, - "learning_rate": 4.4349714464682775e-05, - "loss": 0.3723, + "learning_rate": 4.4349559067231296e-05, + "loss": 0.2761, "step": 2785000 }, { "epoch": 1.67, - "learning_rate": 4.4347614499122215e-05, - "loss": 0.3804, + "learning_rate": 4.434745910167073e-05, + "loss": 0.2875, "step": 2785500 }, { "epoch": 1.67, - "learning_rate": 4.434551453356165e-05, - "loss": 0.3644, + "learning_rate": 4.434535913611016e-05, + "loss": 0.2765, "step": 2786000 }, { "epoch": 1.67, - "learning_rate": 4.434341456800108e-05, - "loss": 0.3587, + "learning_rate": 4.4343259170549603e-05, + "loss": 0.2761, "step": 2786500 }, { "epoch": 1.67, - "learning_rate": 4.4341318802371635e-05, - "loss": 0.3614, + "learning_rate": 4.434115920498904e-05, + "loss": 0.277, "step": 2787000 }, { "epoch": 1.67, - "learning_rate": 4.4339223036742196e-05, - "loss": 0.3786, + "learning_rate": 4.433906343935959e-05, + "loss": 0.2865, "step": 2787500 }, { "epoch": 1.67, - "learning_rate": 4.4337123071181636e-05, - "loss": 0.3725, + "learning_rate": 4.433696347379903e-05, + "loss": 0.2783, "step": 2788000 }, { "epoch": 1.67, - "learning_rate": 4.433502730555219e-05, - "loss": 0.365, + "learning_rate": 4.4334863508238464e-05, + "loss": 0.2771, "step": 2788500 }, { "epoch": 1.67, - "learning_rate": 4.433292733999162e-05, - "loss": 0.3564, + "learning_rate": 4.43327635426779e-05, + "loss": 0.2736, "step": 2789000 }, { "epoch": 1.67, - "learning_rate": 4.433082737443106e-05, - "loss": 0.3665, + "learning_rate": 4.433066357711734e-05, + "loss": 0.2789, "step": 2789500 }, { "epoch": 1.67, - "learning_rate": 4.4328727408870497e-05, - "loss": 0.3726, + "learning_rate": 4.432856781148789e-05, + "loss": 0.2802, "step": 2790000 }, { "epoch": 1.67, - "learning_rate": 4.432662744330993e-05, - "loss": 0.3659, + "learning_rate": 4.4326467845927325e-05, + "loss": 0.2804, "step": 2790500 }, { "epoch": 1.67, - "learning_rate": 4.432452747774937e-05, - "loss": 0.3692, + "learning_rate": 4.432436788036676e-05, + "loss": 0.2812, "step": 2791000 }, { "epoch": 1.67, - "learning_rate": 4.43224275121888e-05, - "loss": 0.3682, + "learning_rate": 4.43222679148062e-05, + "loss": 0.2754, "step": 2791500 }, { "epoch": 1.67, - "learning_rate": 4.432032754662823e-05, - "loss": 0.3664, + "learning_rate": 4.432016794924563e-05, + "loss": 0.2816, "step": 2792000 }, { "epoch": 1.67, - "learning_rate": 4.431822758106767e-05, - "loss": 0.3706, + "learning_rate": 4.4318067983685066e-05, + "loss": 0.2821, "step": 2792500 }, { "epoch": 1.67, - "learning_rate": 4.4316127615507104e-05, - "loss": 0.3632, + "learning_rate": 4.4315968018124506e-05, + "loss": 0.2753, "step": 2793000 }, { "epoch": 1.67, - "learning_rate": 4.4314031849877664e-05, - "loss": 0.3718, + "learning_rate": 4.431386805256394e-05, + "loss": 0.2833, "step": 2793500 }, { "epoch": 1.68, - "learning_rate": 4.431193188431709e-05, - "loss": 0.3651, + "learning_rate": 4.431176808700337e-05, + "loss": 0.2812, "step": 2794000 }, { "epoch": 1.68, - "learning_rate": 4.430983191875653e-05, - "loss": 0.374, + "learning_rate": 4.4309672321373926e-05, + "loss": 0.2848, "step": 2794500 }, { "epoch": 1.68, - "learning_rate": 4.4307731953195965e-05, - "loss": 0.3714, + "learning_rate": 4.4307576555744487e-05, + "loss": 0.2793, "step": 2795000 }, { "epoch": 1.68, - "learning_rate": 4.43056319876354e-05, - "loss": 0.3653, + "learning_rate": 4.430547659018392e-05, + "loss": 0.2814, "step": 2795500 }, { "epoch": 1.68, - "learning_rate": 4.430353202207484e-05, - "loss": 0.3648, + "learning_rate": 4.4303376624623354e-05, + "loss": 0.2824, "step": 2796000 }, { "epoch": 1.68, - "learning_rate": 4.430143625644539e-05, - "loss": 0.3703, + "learning_rate": 4.4301276659062794e-05, + "loss": 0.2798, "step": 2796500 }, { "epoch": 1.68, - "learning_rate": 4.4299336290884826e-05, - "loss": 0.3704, + "learning_rate": 4.429917669350223e-05, + "loss": 0.2767, "step": 2797000 }, { "epoch": 1.68, - "learning_rate": 4.4297236325324266e-05, - "loss": 0.3738, + "learning_rate": 4.429707672794166e-05, + "loss": 0.28, "step": 2797500 }, { "epoch": 1.68, - "learning_rate": 4.42951363597637e-05, - "loss": 0.3644, + "learning_rate": 4.42949767623811e-05, + "loss": 0.2779, "step": 2798000 }, { "epoch": 1.68, - "learning_rate": 4.429303639420313e-05, - "loss": 0.3663, + "learning_rate": 4.4292876796820534e-05, + "loss": 0.2769, "step": 2798500 }, { "epoch": 1.68, - "learning_rate": 4.4290940628573686e-05, - "loss": 0.3741, + "learning_rate": 4.429077683125997e-05, + "loss": 0.2803, "step": 2799000 }, { "epoch": 1.68, - "learning_rate": 4.4288840663013126e-05, - "loss": 0.3648, + "learning_rate": 4.428868526556164e-05, + "loss": 0.2792, "step": 2799500 }, { "epoch": 1.68, - "learning_rate": 4.428674069745256e-05, - "loss": 0.3699, + "learning_rate": 4.4286585300001075e-05, + "loss": 0.279, "step": 2800000 }, { "epoch": 1.68, - "eval_loss": 0.3532743453979492, - "eval_runtime": 1119.9621, - "eval_samples_per_second": 470.302, - "eval_steps_per_second": 78.384, + "eval_loss": 0.2554187774658203, + "eval_runtime": 1450.9376, + "eval_samples_per_second": 363.02, + "eval_steps_per_second": 60.504, "step": 2800000 }, { "epoch": 1.68, - "learning_rate": 4.4284640731891993e-05, - "loss": 0.3731, + "learning_rate": 4.4284485334440515e-05, + "loss": 0.283, "step": 2800500 }, { "epoch": 1.68, - "learning_rate": 4.428254496626255e-05, - "loss": 0.3727, + "learning_rate": 4.428238536887995e-05, + "loss": 0.2802, "step": 2801000 }, { "epoch": 1.68, - "learning_rate": 4.428044500070199e-05, - "loss": 0.3835, + "learning_rate": 4.428028540331938e-05, + "loss": 0.2917, "step": 2801500 }, { "epoch": 1.68, - "learning_rate": 4.427834503514142e-05, - "loss": 0.3702, + "learning_rate": 4.427818543775882e-05, + "loss": 0.2772, "step": 2802000 }, { "epoch": 1.68, - "learning_rate": 4.4276245069580854e-05, - "loss": 0.3781, + "learning_rate": 4.4276085472198256e-05, + "loss": 0.2806, "step": 2802500 }, { "epoch": 1.68, - "learning_rate": 4.4274145104020294e-05, - "loss": 0.3682, + "learning_rate": 4.427398550663769e-05, + "loss": 0.2764, "step": 2803000 }, { "epoch": 1.68, - "learning_rate": 4.427204513845973e-05, - "loss": 0.3676, + "learning_rate": 4.427188974100825e-05, + "loss": 0.2809, "step": 2803500 }, { "epoch": 1.68, - "learning_rate": 4.426994937283028e-05, - "loss": 0.3705, + "learning_rate": 4.426978977544768e-05, + "loss": 0.2768, "step": 2804000 }, { "epoch": 1.68, - "learning_rate": 4.426784940726972e-05, - "loss": 0.3662, + "learning_rate": 4.4267689809887117e-05, + "loss": 0.2745, "step": 2804500 }, { "epoch": 1.68, - "learning_rate": 4.4265749441709155e-05, - "loss": 0.3656, + "learning_rate": 4.426558984432656e-05, + "loss": 0.2766, "step": 2805000 }, { "epoch": 1.68, - "learning_rate": 4.426364947614859e-05, - "loss": 0.3702, + "learning_rate": 4.426349407869711e-05, + "loss": 0.2839, "step": 2805500 }, { "epoch": 1.68, - "learning_rate": 4.426154951058803e-05, - "loss": 0.363, + "learning_rate": 4.4261394113136544e-05, + "loss": 0.2775, "step": 2806000 }, { "epoch": 1.68, - "learning_rate": 4.425944954502746e-05, - "loss": 0.3663, + "learning_rate": 4.425929414757598e-05, + "loss": 0.2715, "step": 2806500 }, { "epoch": 1.68, - "learning_rate": 4.4257349579466896e-05, - "loss": 0.3677, + "learning_rate": 4.425719418201542e-05, + "loss": 0.2786, "step": 2807000 }, { "epoch": 1.68, - "learning_rate": 4.4255249613906336e-05, - "loss": 0.3667, + "learning_rate": 4.425509841638597e-05, + "loss": 0.2741, "step": 2807500 }, { "epoch": 1.68, - "learning_rate": 4.425314964834577e-05, - "loss": 0.37, + "learning_rate": 4.4252998450825405e-05, + "loss": 0.2827, "step": 2808000 }, { "epoch": 1.68, - "learning_rate": 4.425105388271632e-05, - "loss": 0.3723, + "learning_rate": 4.4250906885127085e-05, + "loss": 0.2805, "step": 2808500 }, { "epoch": 1.68, - "learning_rate": 4.4248953917155756e-05, - "loss": 0.3785, + "learning_rate": 4.424880691956652e-05, + "loss": 0.2823, "step": 2809000 }, { "epoch": 1.68, - "learning_rate": 4.42468539515952e-05, - "loss": 0.3756, + "learning_rate": 4.424670695400596e-05, + "loss": 0.2834, "step": 2809500 }, { "epoch": 1.68, - "learning_rate": 4.424475398603463e-05, - "loss": 0.3645, + "learning_rate": 4.424460698844539e-05, + "loss": 0.2774, "step": 2810000 }, { "epoch": 1.69, - "learning_rate": 4.4242658220405184e-05, - "loss": 0.3697, + "learning_rate": 4.424250702288482e-05, + "loss": 0.2803, "step": 2810500 }, { "epoch": 1.69, - "learning_rate": 4.424055825484462e-05, - "loss": 0.3684, + "learning_rate": 4.424040705732426e-05, + "loss": 0.2803, "step": 2811000 }, { "epoch": 1.69, - "learning_rate": 4.423845828928406e-05, - "loss": 0.3633, + "learning_rate": 4.423830709176369e-05, + "loss": 0.2773, "step": 2811500 }, { "epoch": 1.69, - "learning_rate": 4.423636252365461e-05, - "loss": 0.3754, + "learning_rate": 4.4236207126203126e-05, + "loss": 0.2911, "step": 2812000 }, { "epoch": 1.69, - "learning_rate": 4.4234262558094044e-05, - "loss": 0.3648, + "learning_rate": 4.4234107160642566e-05, + "loss": 0.2739, "step": 2812500 }, { "epoch": 1.69, - "learning_rate": 4.4232162592533485e-05, - "loss": 0.3685, + "learning_rate": 4.4232007195082e-05, + "loss": 0.2773, "step": 2813000 }, { "epoch": 1.69, - "learning_rate": 4.423006262697292e-05, - "loss": 0.3608, + "learning_rate": 4.422990722952143e-05, + "loss": 0.2764, "step": 2813500 }, { "epoch": 1.69, - "learning_rate": 4.422796266141235e-05, - "loss": 0.356, + "learning_rate": 4.4227807263960873e-05, + "loss": 0.2735, "step": 2814000 }, { "epoch": 1.69, - "learning_rate": 4.422586269585179e-05, - "loss": 0.3652, + "learning_rate": 4.422570729840031e-05, + "loss": 0.2757, "step": 2814500 }, { "epoch": 1.69, - "learning_rate": 4.4223766930222345e-05, - "loss": 0.3659, + "learning_rate": 4.422360733283974e-05, + "loss": 0.2816, "step": 2815000 }, { "epoch": 1.69, - "learning_rate": 4.422166696466178e-05, - "loss": 0.3679, + "learning_rate": 4.422150736727918e-05, + "loss": 0.2784, "step": 2815500 }, { "epoch": 1.69, - "learning_rate": 4.421956699910121e-05, - "loss": 0.3662, + "learning_rate": 4.421940740171861e-05, + "loss": 0.2763, "step": 2816000 }, { "epoch": 1.69, - "learning_rate": 4.421746703354065e-05, - "loss": 0.3676, + "learning_rate": 4.421731163608917e-05, + "loss": 0.2804, "step": 2816500 }, { "epoch": 1.69, - "learning_rate": 4.4215367067980086e-05, - "loss": 0.3706, + "learning_rate": 4.421521167052861e-05, + "loss": 0.2796, "step": 2817000 }, { "epoch": 1.69, - "learning_rate": 4.421326710241952e-05, - "loss": 0.3592, + "learning_rate": 4.421311590489916e-05, + "loss": 0.2749, "step": 2817500 }, { "epoch": 1.69, - "learning_rate": 4.421116713685896e-05, - "loss": 0.3625, + "learning_rate": 4.4211015939338595e-05, + "loss": 0.2774, "step": 2818000 }, { "epoch": 1.69, - "learning_rate": 4.4209067171298386e-05, - "loss": 0.3711, + "learning_rate": 4.420891597377803e-05, + "loss": 0.2802, "step": 2818500 }, { "epoch": 1.69, - "learning_rate": 4.420697140566895e-05, - "loss": 0.3676, + "learning_rate": 4.420681600821747e-05, + "loss": 0.2786, "step": 2819000 }, { "epoch": 1.69, - "learning_rate": 4.420487144010838e-05, - "loss": 0.3778, + "learning_rate": 4.42047160426569e-05, + "loss": 0.2817, "step": 2819500 }, { "epoch": 1.69, - "learning_rate": 4.420277567447894e-05, - "loss": 0.3729, + "learning_rate": 4.4202616077096335e-05, + "loss": 0.2807, "step": 2820000 }, { "epoch": 1.69, - "learning_rate": 4.4200679908849494e-05, - "loss": 0.3621, + "learning_rate": 4.4200516111535776e-05, + "loss": 0.2785, "step": 2820500 }, { "epoch": 1.69, - "learning_rate": 4.419857994328893e-05, - "loss": 0.3734, + "learning_rate": 4.41984161459752e-05, + "loss": 0.2805, "step": 2821000 }, { "epoch": 1.69, - "learning_rate": 4.419647997772836e-05, - "loss": 0.3606, + "learning_rate": 4.4196316180414636e-05, + "loss": 0.2751, "step": 2821500 }, { "epoch": 1.69, - "learning_rate": 4.41943800121678e-05, - "loss": 0.3646, + "learning_rate": 4.4194220414785196e-05, + "loss": 0.2757, "step": 2822000 }, { "epoch": 1.69, - "learning_rate": 4.4192280046607235e-05, - "loss": 0.3721, + "learning_rate": 4.4192120449224636e-05, + "loss": 0.2799, "step": 2822500 }, { "epoch": 1.69, - "learning_rate": 4.419018008104667e-05, - "loss": 0.3636, + "learning_rate": 4.419002048366406e-05, + "loss": 0.273, "step": 2823000 }, { "epoch": 1.69, - "learning_rate": 4.418808011548611e-05, - "loss": 0.3724, + "learning_rate": 4.41879205181035e-05, + "loss": 0.2777, "step": 2823500 }, { "epoch": 1.69, - "learning_rate": 4.418598014992554e-05, - "loss": 0.3642, + "learning_rate": 4.4185824752474064e-05, + "loss": 0.2766, "step": 2824000 }, { "epoch": 1.69, - "learning_rate": 4.4183880184364975e-05, - "loss": 0.3723, + "learning_rate": 4.41837247869135e-05, + "loss": 0.2819, "step": 2824500 }, { "epoch": 1.69, - "learning_rate": 4.4181780218804416e-05, - "loss": 0.3635, + "learning_rate": 4.418162482135293e-05, + "loss": 0.2764, "step": 2825000 }, { "epoch": 1.69, - "learning_rate": 4.417968025324384e-05, - "loss": 0.3719, + "learning_rate": 4.4179524855792364e-05, + "loss": 0.2792, "step": 2825500 }, { "epoch": 1.69, - "learning_rate": 4.41775844876144e-05, - "loss": 0.3689, + "learning_rate": 4.41774248902318e-05, + "loss": 0.2757, "step": 2826000 }, { "epoch": 1.69, - "learning_rate": 4.4175484522053836e-05, - "loss": 0.3762, + "learning_rate": 4.417533332453348e-05, + "loss": 0.2827, "step": 2826500 }, { "epoch": 1.69, - "learning_rate": 4.4173384556493276e-05, - "loss": 0.3827, + "learning_rate": 4.417323755890403e-05, + "loss": 0.2913, "step": 2827000 }, { "epoch": 1.7, - "learning_rate": 4.417128459093271e-05, - "loss": 0.3725, + "learning_rate": 4.4171137593343465e-05, + "loss": 0.2844, "step": 2827500 }, { "epoch": 1.7, - "learning_rate": 4.416918462537214e-05, - "loss": 0.3663, + "learning_rate": 4.41690376277829e-05, + "loss": 0.2821, "step": 2828000 }, { "epoch": 1.7, - "learning_rate": 4.4167088859742704e-05, - "loss": 0.3725, + "learning_rate": 4.416693766222234e-05, + "loss": 0.2833, "step": 2828500 }, { "epoch": 1.7, - "learning_rate": 4.416498889418214e-05, - "loss": 0.3536, + "learning_rate": 4.416483769666177e-05, + "loss": 0.2759, "step": 2829000 }, { "epoch": 1.7, - "learning_rate": 4.416288892862157e-05, - "loss": 0.3755, + "learning_rate": 4.416273773110121e-05, + "loss": 0.2817, "step": 2829500 }, { "epoch": 1.7, - "learning_rate": 4.416078896306101e-05, - "loss": 0.3727, + "learning_rate": 4.4160637765540646e-05, + "loss": 0.2809, "step": 2830000 }, { "epoch": 1.7, - "learning_rate": 4.415868899750044e-05, - "loss": 0.3768, + "learning_rate": 4.415853779998008e-05, + "loss": 0.283, "step": 2830500 }, { "epoch": 1.7, - "learning_rate": 4.415658903193987e-05, - "loss": 0.3672, + "learning_rate": 4.415643783441952e-05, + "loss": 0.2737, "step": 2831000 }, { "epoch": 1.7, - "learning_rate": 4.415448906637931e-05, - "loss": 0.3729, + "learning_rate": 4.415433786885895e-05, + "loss": 0.2773, "step": 2831500 }, { "epoch": 1.7, - "learning_rate": 4.4152389100818745e-05, - "loss": 0.372, + "learning_rate": 4.4152237903298386e-05, + "loss": 0.2825, "step": 2832000 }, { "epoch": 1.7, - "learning_rate": 4.41502933351893e-05, - "loss": 0.3713, + "learning_rate": 4.415014213766894e-05, + "loss": 0.2803, "step": 2832500 }, { "epoch": 1.7, - "learning_rate": 4.414819336962873e-05, - "loss": 0.368, + "learning_rate": 4.414804217210838e-05, + "loss": 0.2794, "step": 2833000 }, { "epoch": 1.7, - "learning_rate": 4.414609340406817e-05, - "loss": 0.3687, + "learning_rate": 4.4145942206547814e-05, + "loss": 0.2811, "step": 2833500 }, { "epoch": 1.7, - "learning_rate": 4.4143993438507605e-05, - "loss": 0.3793, + "learning_rate": 4.414384224098725e-05, + "loss": 0.2856, "step": 2834000 }, { "epoch": 1.7, - "learning_rate": 4.4141893472947046e-05, - "loss": 0.3721, + "learning_rate": 4.414174227542669e-05, + "loss": 0.2805, "step": 2834500 }, { "epoch": 1.7, - "learning_rate": 4.41397977073176e-05, - "loss": 0.367, + "learning_rate": 4.4139642309866114e-05, + "loss": 0.2784, "step": 2835000 }, { "epoch": 1.7, - "learning_rate": 4.413769774175703e-05, - "loss": 0.3648, + "learning_rate": 4.413754234430555e-05, + "loss": 0.2762, "step": 2835500 }, { "epoch": 1.7, - "learning_rate": 4.4135597776196466e-05, - "loss": 0.3621, + "learning_rate": 4.413544237874499e-05, + "loss": 0.2734, "step": 2836000 }, { "epoch": 1.7, - "learning_rate": 4.4133497810635906e-05, - "loss": 0.363, + "learning_rate": 4.413334661311555e-05, + "loss": 0.2807, "step": 2836500 }, { "epoch": 1.7, - "learning_rate": 4.413139784507534e-05, - "loss": 0.3709, + "learning_rate": 4.413124664755498e-05, + "loss": 0.2807, "step": 2837000 }, { "epoch": 1.7, - "learning_rate": 4.412929787951477e-05, - "loss": 0.3624, + "learning_rate": 4.4129146681994415e-05, + "loss": 0.2741, "step": 2837500 }, { "epoch": 1.7, - "learning_rate": 4.4127197913954213e-05, - "loss": 0.3699, + "learning_rate": 4.412704671643385e-05, + "loss": 0.2786, "step": 2838000 }, { "epoch": 1.7, - "learning_rate": 4.412510214832477e-05, - "loss": 0.3725, + "learning_rate": 4.412494675087328e-05, + "loss": 0.2822, "step": 2838500 }, { "epoch": 1.7, - "learning_rate": 4.41230021827642e-05, - "loss": 0.3676, + "learning_rate": 4.412285098524384e-05, + "loss": 0.2817, "step": 2839000 }, { "epoch": 1.7, - "learning_rate": 4.4120902217203634e-05, - "loss": 0.3644, + "learning_rate": 4.412075101968328e-05, + "loss": 0.2734, "step": 2839500 }, { "epoch": 1.7, - "learning_rate": 4.4118802251643074e-05, - "loss": 0.366, + "learning_rate": 4.411865105412271e-05, + "loss": 0.2775, "step": 2840000 }, { "epoch": 1.7, - "learning_rate": 4.411670228608251e-05, - "loss": 0.3754, + "learning_rate": 4.411655528849327e-05, + "loss": 0.2856, "step": 2840500 }, { "epoch": 1.7, - "learning_rate": 4.411460232052194e-05, - "loss": 0.3649, + "learning_rate": 4.41144553229327e-05, + "loss": 0.2747, "step": 2841000 }, { "epoch": 1.7, - "learning_rate": 4.411250235496138e-05, - "loss": 0.3588, + "learning_rate": 4.411235535737214e-05, + "loss": 0.2736, "step": 2841500 }, { "epoch": 1.7, - "learning_rate": 4.4110402389400815e-05, - "loss": 0.3559, + "learning_rate": 4.411025539181158e-05, + "loss": 0.2706, "step": 2842000 }, { "epoch": 1.7, - "learning_rate": 4.410830662377137e-05, - "loss": 0.3623, + "learning_rate": 4.4108155426251003e-05, + "loss": 0.2801, "step": 2842500 }, { "epoch": 1.7, - "learning_rate": 4.410620665821081e-05, - "loss": 0.3787, + "learning_rate": 4.4106055460690444e-05, + "loss": 0.2866, "step": 2843000 }, { "epoch": 1.7, - "learning_rate": 4.410410669265024e-05, - "loss": 0.3612, + "learning_rate": 4.410395549512988e-05, + "loss": 0.2763, "step": 2843500 }, { "epoch": 1.71, - "learning_rate": 4.4102006727089675e-05, - "loss": 0.3692, + "learning_rate": 4.410185552956931e-05, + "loss": 0.2789, "step": 2844000 }, { "epoch": 1.71, - "learning_rate": 4.4099906761529116e-05, - "loss": 0.3628, + "learning_rate": 4.409975976393987e-05, + "loss": 0.2784, "step": 2844500 }, { "epoch": 1.71, - "learning_rate": 4.409781099589967e-05, - "loss": 0.3725, + "learning_rate": 4.4097659798379304e-05, + "loss": 0.2817, "step": 2845000 }, { "epoch": 1.71, - "learning_rate": 4.40957110303391e-05, - "loss": 0.3714, + "learning_rate": 4.409555983281874e-05, + "loss": 0.283, "step": 2845500 }, { "epoch": 1.71, - "learning_rate": 4.4093611064778536e-05, - "loss": 0.3661, + "learning_rate": 4.409345986725818e-05, + "loss": 0.2789, "step": 2846000 }, { "epoch": 1.71, - "learning_rate": 4.4091511099217976e-05, - "loss": 0.3698, + "learning_rate": 4.409136410162874e-05, + "loss": 0.2825, "step": 2846500 }, { "epoch": 1.71, - "learning_rate": 4.408941113365741e-05, - "loss": 0.3768, + "learning_rate": 4.4089264136068165e-05, + "loss": 0.2849, "step": 2847000 }, { "epoch": 1.71, - "learning_rate": 4.4087311168096837e-05, - "loss": 0.3625, + "learning_rate": 4.4087168370438726e-05, + "loss": 0.2799, "step": 2847500 }, { "epoch": 1.71, - "learning_rate": 4.40852154024674e-05, - "loss": 0.3681, + "learning_rate": 4.408506840487816e-05, + "loss": 0.2795, "step": 2848000 }, { "epoch": 1.71, - "learning_rate": 4.408311543690684e-05, - "loss": 0.3742, + "learning_rate": 4.40829684393176e-05, + "loss": 0.2819, "step": 2848500 }, { "epoch": 1.71, - "learning_rate": 4.408101547134627e-05, - "loss": 0.3601, + "learning_rate": 4.408086847375703e-05, + "loss": 0.2812, "step": 2849000 }, { "epoch": 1.71, - "learning_rate": 4.4078915505785704e-05, - "loss": 0.3703, + "learning_rate": 4.407876850819646e-05, + "loss": 0.2764, "step": 2849500 }, { "epoch": 1.71, - "learning_rate": 4.407681554022514e-05, - "loss": 0.3645, + "learning_rate": 4.40766685426359e-05, + "loss": 0.2822, "step": 2850000 }, { "epoch": 1.71, - "learning_rate": 4.40747197745957e-05, - "loss": 0.3676, + "learning_rate": 4.407456857707533e-05, + "loss": 0.2789, "step": 2850500 }, { "epoch": 1.71, - "learning_rate": 4.407261980903513e-05, - "loss": 0.3674, + "learning_rate": 4.4072468611514767e-05, + "loss": 0.277, "step": 2851000 }, { "epoch": 1.71, - "learning_rate": 4.407051984347457e-05, - "loss": 0.3655, + "learning_rate": 4.407037284588533e-05, + "loss": 0.2823, "step": 2851500 }, { "epoch": 1.71, - "learning_rate": 4.4068419877914005e-05, - "loss": 0.3764, + "learning_rate": 4.406827288032476e-05, + "loss": 0.281, "step": 2852000 }, { "epoch": 1.71, - "learning_rate": 4.406631991235343e-05, - "loss": 0.3689, + "learning_rate": 4.4066172914764194e-05, + "loss": 0.2783, "step": 2852500 }, { "epoch": 1.71, - "learning_rate": 4.406421994679287e-05, - "loss": 0.3709, + "learning_rate": 4.4064077149134754e-05, + "loss": 0.2814, "step": 2853000 }, { "epoch": 1.71, - "learning_rate": 4.4062119981232305e-05, - "loss": 0.3771, + "learning_rate": 4.4061977183574194e-05, + "loss": 0.2834, "step": 2853500 }, { "epoch": 1.71, - "learning_rate": 4.406002001567174e-05, - "loss": 0.3745, + "learning_rate": 4.405987721801362e-05, + "loss": 0.2872, "step": 2854000 }, { "epoch": 1.71, - "learning_rate": 4.40579242500423e-05, - "loss": 0.3677, + "learning_rate": 4.4057777252453055e-05, + "loss": 0.2791, "step": 2854500 }, { "epoch": 1.71, - "learning_rate": 4.405582428448173e-05, - "loss": 0.3754, + "learning_rate": 4.4055677286892495e-05, + "loss": 0.2809, "step": 2855000 }, { "epoch": 1.71, - "learning_rate": 4.4053724318921166e-05, - "loss": 0.365, + "learning_rate": 4.405357732133193e-05, + "loss": 0.2722, "step": 2855500 }, { "epoch": 1.71, - "learning_rate": 4.40516243533606e-05, - "loss": 0.3705, + "learning_rate": 4.405148155570249e-05, + "loss": 0.2765, "step": 2856000 }, { "epoch": 1.71, - "learning_rate": 4.404952438780004e-05, - "loss": 0.3798, + "learning_rate": 4.4049381590141915e-05, + "loss": 0.2778, "step": 2856500 }, { "epoch": 1.71, - "learning_rate": 4.404742442223947e-05, - "loss": 0.3661, + "learning_rate": 4.4047281624581355e-05, + "loss": 0.2816, "step": 2857000 }, { "epoch": 1.71, - "learning_rate": 4.404532445667891e-05, - "loss": 0.3644, + "learning_rate": 4.404518165902079e-05, + "loss": 0.2792, "step": 2857500 }, { "epoch": 1.71, - "learning_rate": 4.404322869104947e-05, - "loss": 0.3636, + "learning_rate": 4.404308169346022e-05, + "loss": 0.2777, "step": 2858000 }, { "epoch": 1.71, - "learning_rate": 4.40411287254889e-05, - "loss": 0.3671, + "learning_rate": 4.404098172789966e-05, + "loss": 0.2784, "step": 2858500 }, { "epoch": 1.71, - "learning_rate": 4.4039028759928334e-05, - "loss": 0.371, + "learning_rate": 4.4038881762339096e-05, + "loss": 0.2816, "step": 2859000 }, { "epoch": 1.71, - "learning_rate": 4.4036928794367774e-05, - "loss": 0.372, + "learning_rate": 4.403678179677853e-05, + "loss": 0.2811, "step": 2859500 }, { "epoch": 1.71, - "learning_rate": 4.403482882880721e-05, - "loss": 0.3603, + "learning_rate": 4.403468183121797e-05, + "loss": 0.277, "step": 2860000 }, { "epoch": 1.71, - "learning_rate": 4.403272886324664e-05, - "loss": 0.3714, + "learning_rate": 4.40325818656574e-05, + "loss": 0.2784, "step": 2860500 }, { "epoch": 1.72, - "learning_rate": 4.4030633097617195e-05, - "loss": 0.3725, + "learning_rate": 4.4030490299959084e-05, + "loss": 0.2784, "step": 2861000 }, { "epoch": 1.72, - "learning_rate": 4.4028533132056635e-05, - "loss": 0.3727, + "learning_rate": 4.402839033439851e-05, + "loss": 0.2867, "step": 2861500 }, { "epoch": 1.72, - "learning_rate": 4.402643316649607e-05, - "loss": 0.3676, + "learning_rate": 4.402629036883795e-05, + "loss": 0.2773, "step": 2862000 }, { "epoch": 1.72, - "learning_rate": 4.40243332009355e-05, - "loss": 0.3604, + "learning_rate": 4.4024190403277384e-05, + "loss": 0.2776, "step": 2862500 }, { "epoch": 1.72, - "learning_rate": 4.402223323537494e-05, - "loss": 0.3702, + "learning_rate": 4.402209043771682e-05, + "loss": 0.278, "step": 2863000 }, { "epoch": 1.72, - "learning_rate": 4.4020137469745496e-05, - "loss": 0.3702, + "learning_rate": 4.401999047215626e-05, + "loss": 0.2812, "step": 2863500 }, { "epoch": 1.72, - "learning_rate": 4.401803750418493e-05, - "loss": 0.361, + "learning_rate": 4.401789050659569e-05, + "loss": 0.2788, "step": 2864000 }, { "epoch": 1.72, - "learning_rate": 4.401593753862436e-05, - "loss": 0.3696, + "learning_rate": 4.4015790541035125e-05, + "loss": 0.2737, "step": 2864500 }, { "epoch": 1.72, - "learning_rate": 4.40138375730638e-05, - "loss": 0.3603, + "learning_rate": 4.4013690575474565e-05, + "loss": 0.2728, "step": 2865000 }, { "epoch": 1.72, - "learning_rate": 4.4011737607503236e-05, - "loss": 0.3607, + "learning_rate": 4.401159480984512e-05, + "loss": 0.2841, "step": 2865500 }, { "epoch": 1.72, - "learning_rate": 4.400964184187379e-05, - "loss": 0.3743, + "learning_rate": 4.400949484428455e-05, + "loss": 0.276, "step": 2866000 }, { "epoch": 1.72, - "learning_rate": 4.400754187631323e-05, - "loss": 0.3618, + "learning_rate": 4.4007394878723985e-05, + "loss": 0.2765, "step": 2866500 }, { "epoch": 1.72, - "learning_rate": 4.4005441910752664e-05, - "loss": 0.3601, + "learning_rate": 4.4005294913163426e-05, + "loss": 0.2778, "step": 2867000 }, { "epoch": 1.72, - "learning_rate": 4.40033419451921e-05, - "loss": 0.36, + "learning_rate": 4.400319914753398e-05, + "loss": 0.2779, "step": 2867500 }, { "epoch": 1.72, - "learning_rate": 4.400124197963154e-05, - "loss": 0.3746, + "learning_rate": 4.400109918197341e-05, + "loss": 0.2833, "step": 2868000 }, { "epoch": 1.72, - "learning_rate": 4.399914201407097e-05, - "loss": 0.3686, + "learning_rate": 4.399899921641285e-05, + "loss": 0.2783, "step": 2868500 }, { "epoch": 1.72, - "learning_rate": 4.3997042048510404e-05, - "loss": 0.3748, + "learning_rate": 4.3996899250852286e-05, + "loss": 0.2813, "step": 2869000 }, { "epoch": 1.72, - "learning_rate": 4.399494628288096e-05, - "loss": 0.3656, + "learning_rate": 4.399480348522284e-05, + "loss": 0.2776, "step": 2869500 }, { "epoch": 1.72, - "learning_rate": 4.39928463173204e-05, - "loss": 0.371, + "learning_rate": 4.3992703519662273e-05, + "loss": 0.2735, "step": 2870000 }, { "epoch": 1.72, - "learning_rate": 4.399074635175983e-05, - "loss": 0.3686, + "learning_rate": 4.3990603554101714e-05, + "loss": 0.2785, "step": 2870500 }, { "epoch": 1.72, - "learning_rate": 4.3988646386199265e-05, - "loss": 0.3733, + "learning_rate": 4.398850358854115e-05, + "loss": 0.2798, "step": 2871000 }, { "epoch": 1.72, - "learning_rate": 4.3986546420638705e-05, - "loss": 0.3675, + "learning_rate": 4.39864078229117e-05, + "loss": 0.2809, "step": 2871500 }, { "epoch": 1.72, - "learning_rate": 4.398444645507814e-05, - "loss": 0.3715, + "learning_rate": 4.3984307857351134e-05, + "loss": 0.2805, "step": 2872000 }, { "epoch": 1.72, - "learning_rate": 4.3982346489517565e-05, - "loss": 0.3677, + "learning_rate": 4.3982207891790574e-05, + "loss": 0.28, "step": 2872500 }, { "epoch": 1.72, - "learning_rate": 4.3980246523957006e-05, - "loss": 0.3654, + "learning_rate": 4.398010792623001e-05, + "loss": 0.2801, "step": 2873000 }, { "epoch": 1.72, - "learning_rate": 4.3978150758327566e-05, - "loss": 0.369, + "learning_rate": 4.397800796066944e-05, + "loss": 0.2776, "step": 2873500 }, { "epoch": 1.72, - "learning_rate": 4.3976050792767e-05, - "loss": 0.3755, + "learning_rate": 4.397591219504e-05, + "loss": 0.283, "step": 2874000 }, { "epoch": 1.72, - "learning_rate": 4.397395082720643e-05, - "loss": 0.3679, + "learning_rate": 4.3973812229479435e-05, + "loss": 0.2792, "step": 2874500 }, { "epoch": 1.72, - "learning_rate": 4.3971850861645866e-05, - "loss": 0.3723, + "learning_rate": 4.397171226391887e-05, + "loss": 0.2819, "step": 2875000 }, { "epoch": 1.72, - "learning_rate": 4.396975509601643e-05, - "loss": 0.3697, + "learning_rate": 4.396961229835831e-05, + "loss": 0.2871, "step": 2875500 }, { "epoch": 1.72, - "learning_rate": 4.396765513045586e-05, - "loss": 0.3551, + "learning_rate": 4.396751233279774e-05, + "loss": 0.2767, "step": 2876000 }, { "epoch": 1.72, - "learning_rate": 4.39655551648953e-05, - "loss": 0.3702, + "learning_rate": 4.3965416567168296e-05, + "loss": 0.2782, "step": 2876500 }, { "epoch": 1.72, - "learning_rate": 4.396345519933473e-05, - "loss": 0.3674, + "learning_rate": 4.396331660160773e-05, + "loss": 0.2793, "step": 2877000 }, { "epoch": 1.73, - "learning_rate": 4.396135943370529e-05, - "loss": 0.3554, + "learning_rate": 4.396121663604717e-05, + "loss": 0.2755, "step": 2877500 }, { "epoch": 1.73, - "learning_rate": 4.395925946814472e-05, - "loss": 0.3557, + "learning_rate": 4.39591166704866e-05, + "loss": 0.2743, "step": 2878000 }, { "epoch": 1.73, - "learning_rate": 4.3957163702515274e-05, - "loss": 0.3622, + "learning_rate": 4.3957020904857157e-05, + "loss": 0.2745, "step": 2878500 }, { "epoch": 1.73, - "learning_rate": 4.3955063736954715e-05, - "loss": 0.3663, + "learning_rate": 4.395492093929659e-05, + "loss": 0.2778, "step": 2879000 }, { "epoch": 1.73, - "learning_rate": 4.395296797132527e-05, - "loss": 0.3645, + "learning_rate": 4.395282097373603e-05, + "loss": 0.279, "step": 2879500 }, { "epoch": 1.73, - "learning_rate": 4.395087220569583e-05, - "loss": 0.3716, + "learning_rate": 4.3950721008175464e-05, + "loss": 0.2777, "step": 2880000 }, { "epoch": 1.73, - "learning_rate": 4.394877644006638e-05, - "loss": 0.378, + "learning_rate": 4.394862524254602e-05, + "loss": 0.2776, "step": 2880500 }, { "epoch": 1.73, - "learning_rate": 4.3946676474505816e-05, - "loss": 0.3552, + "learning_rate": 4.394652527698546e-05, + "loss": 0.2768, "step": 2881000 }, { "epoch": 1.73, - "learning_rate": 4.3944576508945256e-05, - "loss": 0.3595, + "learning_rate": 4.394442531142489e-05, + "loss": 0.2724, "step": 2881500 }, { "epoch": 1.73, - "learning_rate": 4.394247654338469e-05, - "loss": 0.3698, + "learning_rate": 4.3942325345864324e-05, + "loss": 0.2804, "step": 2882000 }, { "epoch": 1.73, - "learning_rate": 4.394037657782412e-05, - "loss": 0.363, + "learning_rate": 4.3940225380303765e-05, + "loss": 0.2796, "step": 2882500 }, { "epoch": 1.73, - "learning_rate": 4.393827661226356e-05, - "loss": 0.3728, + "learning_rate": 4.393812961467432e-05, + "loss": 0.2795, "step": 2883000 }, { "epoch": 1.73, - "learning_rate": 4.393617664670299e-05, - "loss": 0.3679, + "learning_rate": 4.393602964911375e-05, + "loss": 0.2863, "step": 2883500 }, { "epoch": 1.73, - "learning_rate": 4.393407668114242e-05, - "loss": 0.3702, + "learning_rate": 4.3933929683553185e-05, + "loss": 0.2785, "step": 2884000 }, { "epoch": 1.73, - "learning_rate": 4.393197671558186e-05, - "loss": 0.3752, + "learning_rate": 4.3931833917923746e-05, + "loss": 0.2808, "step": 2884500 }, { "epoch": 1.73, - "learning_rate": 4.39298767500213e-05, - "loss": 0.3593, + "learning_rate": 4.392973395236318e-05, + "loss": 0.28, "step": 2885000 }, { "epoch": 1.73, - "learning_rate": 4.392777678446073e-05, - "loss": 0.3691, + "learning_rate": 4.392763398680261e-05, + "loss": 0.2801, "step": 2885500 }, { "epoch": 1.73, - "learning_rate": 4.392567681890017e-05, - "loss": 0.3578, + "learning_rate": 4.3925534021242046e-05, + "loss": 0.2749, "step": 2886000 }, { "epoch": 1.73, - "learning_rate": 4.3923576853339604e-05, - "loss": 0.3738, + "learning_rate": 4.3923434055681486e-05, + "loss": 0.2817, "step": 2886500 }, { "epoch": 1.73, - "learning_rate": 4.3921476887779044e-05, - "loss": 0.3719, + "learning_rate": 4.392133409012092e-05, + "loss": 0.2807, "step": 2887000 }, { "epoch": 1.73, - "learning_rate": 4.391937692221848e-05, - "loss": 0.3589, + "learning_rate": 4.391923412456035e-05, + "loss": 0.2755, "step": 2887500 }, { "epoch": 1.73, - "learning_rate": 4.391727695665791e-05, - "loss": 0.3716, + "learning_rate": 4.391713415899979e-05, + "loss": 0.2861, "step": 2888000 }, { "epoch": 1.73, - "learning_rate": 4.391517699109735e-05, - "loss": 0.3672, + "learning_rate": 4.391503419343923e-05, + "loss": 0.2797, "step": 2888500 }, { "epoch": 1.73, - "learning_rate": 4.391307702553678e-05, - "loss": 0.3643, + "learning_rate": 4.391293422787867e-05, + "loss": 0.2722, "step": 2889000 }, { "epoch": 1.73, - "learning_rate": 4.391097705997621e-05, - "loss": 0.367, + "learning_rate": 4.39108342623181e-05, + "loss": 0.2744, "step": 2889500 }, { "epoch": 1.73, - "learning_rate": 4.390887709441565e-05, - "loss": 0.3661, + "learning_rate": 4.3908734296757534e-05, + "loss": 0.2786, "step": 2890000 }, { "epoch": 1.73, - "learning_rate": 4.390678132878621e-05, - "loss": 0.3714, + "learning_rate": 4.390663853112809e-05, + "loss": 0.2829, "step": 2890500 }, { "epoch": 1.73, - "learning_rate": 4.3904681363225646e-05, - "loss": 0.3666, + "learning_rate": 4.390453856556753e-05, + "loss": 0.2774, "step": 2891000 }, { "epoch": 1.73, - "learning_rate": 4.390258139766507e-05, - "loss": 0.3632, + "learning_rate": 4.390243860000696e-05, + "loss": 0.2802, "step": 2891500 }, { "epoch": 1.73, - "learning_rate": 4.390048143210451e-05, - "loss": 0.3731, + "learning_rate": 4.3900338634446395e-05, + "loss": 0.2831, "step": 2892000 }, { "epoch": 1.73, - "learning_rate": 4.389838566647507e-05, - "loss": 0.3788, + "learning_rate": 4.389824286881695e-05, + "loss": 0.2827, "step": 2892500 }, { "epoch": 1.73, - "learning_rate": 4.3896289900845626e-05, - "loss": 0.3768, + "learning_rate": 4.389614290325639e-05, + "loss": 0.2895, "step": 2893000 }, { "epoch": 1.73, - "learning_rate": 4.389418993528506e-05, - "loss": 0.3644, + "learning_rate": 4.389404293769582e-05, + "loss": 0.2747, "step": 2893500 }, { "epoch": 1.74, - "learning_rate": 4.38920899697245e-05, - "loss": 0.3726, + "learning_rate": 4.3891942972135255e-05, + "loss": 0.2826, "step": 2894000 }, { "epoch": 1.74, - "learning_rate": 4.3889990004163934e-05, - "loss": 0.3683, + "learning_rate": 4.3889843006574696e-05, + "loss": 0.2805, "step": 2894500 }, { "epoch": 1.74, - "learning_rate": 4.388789423853449e-05, - "loss": 0.3712, + "learning_rate": 4.388774304101413e-05, + "loss": 0.2771, "step": 2895000 }, { "epoch": 1.74, - "learning_rate": 4.388579427297392e-05, - "loss": 0.3724, + "learning_rate": 4.388564727538468e-05, + "loss": 0.2856, "step": 2895500 }, { "epoch": 1.74, - "learning_rate": 4.388369430741336e-05, - "loss": 0.3657, + "learning_rate": 4.388354730982412e-05, + "loss": 0.2748, "step": 2896000 }, { "epoch": 1.74, - "learning_rate": 4.3881594341852794e-05, - "loss": 0.3659, + "learning_rate": 4.3881447344263556e-05, + "loss": 0.282, "step": 2896500 }, { "epoch": 1.74, - "learning_rate": 4.387949437629223e-05, - "loss": 0.3651, + "learning_rate": 4.387934737870299e-05, + "loss": 0.2776, "step": 2897000 }, { "epoch": 1.74, - "learning_rate": 4.387739441073167e-05, - "loss": 0.371, + "learning_rate": 4.387724741314243e-05, + "loss": 0.2773, "step": 2897500 }, { "epoch": 1.74, - "learning_rate": 4.38752944451711e-05, - "loss": 0.3717, + "learning_rate": 4.387514744758186e-05, + "loss": 0.2802, "step": 2898000 }, { "epoch": 1.74, - "learning_rate": 4.387319447961053e-05, - "loss": 0.3734, + "learning_rate": 4.387305168195242e-05, + "loss": 0.2819, "step": 2898500 }, { "epoch": 1.74, - "learning_rate": 4.387109871398109e-05, - "loss": 0.3612, + "learning_rate": 4.387095171639185e-05, + "loss": 0.2744, "step": 2899000 }, { "epoch": 1.74, - "learning_rate": 4.386899874842053e-05, - "loss": 0.3662, + "learning_rate": 4.386885175083129e-05, + "loss": 0.2874, "step": 2899500 }, { "epoch": 1.74, - "learning_rate": 4.386689878285996e-05, - "loss": 0.367, + "learning_rate": 4.386675178527072e-05, + "loss": 0.2745, "step": 2900000 }, { "epoch": 1.74, - "eval_loss": 0.35273319482803345, - "eval_runtime": 1140.8155, - "eval_samples_per_second": 461.705, - "eval_steps_per_second": 76.951, + "eval_loss": 0.2543603181838989, + "eval_runtime": 1454.9796, + "eval_samples_per_second": 362.012, + "eval_steps_per_second": 60.336, "step": 2900000 }, { "epoch": 1.74, - "learning_rate": 4.3864798817299396e-05, - "loss": 0.365, + "learning_rate": 4.386465181971015e-05, + "loss": 0.2725, "step": 2900500 }, { "epoch": 1.74, - "learning_rate": 4.386269885173883e-05, - "loss": 0.3641, + "learning_rate": 4.386255185414959e-05, + "loss": 0.2748, "step": 2901000 }, { "epoch": 1.74, - "learning_rate": 4.386059888617826e-05, - "loss": 0.3642, + "learning_rate": 4.386045608852015e-05, + "loss": 0.2812, "step": 2901500 }, { "epoch": 1.74, - "learning_rate": 4.385850312054882e-05, - "loss": 0.3628, + "learning_rate": 4.3858356122959585e-05, + "loss": 0.2781, "step": 2902000 }, { "epoch": 1.74, - "learning_rate": 4.385640315498826e-05, - "loss": 0.3685, + "learning_rate": 4.385625615739902e-05, + "loss": 0.2792, "step": 2902500 }, { "epoch": 1.74, - "learning_rate": 4.3854303189427697e-05, - "loss": 0.3718, + "learning_rate": 4.385415619183845e-05, + "loss": 0.2789, "step": 2903000 }, { "epoch": 1.74, - "learning_rate": 4.385220322386712e-05, - "loss": 0.3655, + "learning_rate": 4.3852056226277885e-05, + "loss": 0.2768, "step": 2903500 }, { "epoch": 1.74, - "learning_rate": 4.3850103258306564e-05, - "loss": 0.3644, + "learning_rate": 4.3849956260717326e-05, + "loss": 0.2725, "step": 2904000 }, { "epoch": 1.74, - "learning_rate": 4.3848003292746e-05, - "loss": 0.3744, + "learning_rate": 4.384785629515676e-05, + "loss": 0.2809, "step": 2904500 }, { "epoch": 1.74, - "learning_rate": 4.384590332718543e-05, - "loss": 0.3721, + "learning_rate": 4.384575632959619e-05, + "loss": 0.2769, "step": 2905000 }, { "epoch": 1.74, - "learning_rate": 4.384380336162487e-05, - "loss": 0.3632, + "learning_rate": 4.3843660563966746e-05, + "loss": 0.2759, "step": 2905500 }, { "epoch": 1.74, - "learning_rate": 4.3841707595995424e-05, - "loss": 0.3622, + "learning_rate": 4.3841560598406186e-05, + "loss": 0.2751, "step": 2906000 }, { "epoch": 1.74, - "learning_rate": 4.383960763043486e-05, - "loss": 0.3646, + "learning_rate": 4.383946063284562e-05, + "loss": 0.2762, "step": 2906500 }, { "epoch": 1.74, - "learning_rate": 4.383750766487429e-05, - "loss": 0.3662, + "learning_rate": 4.383736066728505e-05, + "loss": 0.2768, "step": 2907000 }, { "epoch": 1.74, - "learning_rate": 4.383540769931373e-05, - "loss": 0.3616, + "learning_rate": 4.383526490165561e-05, + "loss": 0.2733, "step": 2907500 }, { "epoch": 1.74, - "learning_rate": 4.3833307733753165e-05, - "loss": 0.3608, + "learning_rate": 4.383316493609505e-05, + "loss": 0.2805, "step": 2908000 }, { "epoch": 1.74, - "learning_rate": 4.38312077681926e-05, - "loss": 0.3641, + "learning_rate": 4.383106917046561e-05, + "loss": 0.2817, "step": 2908500 }, { "epoch": 1.74, - "learning_rate": 4.382911200256316e-05, - "loss": 0.3604, + "learning_rate": 4.382896920490504e-05, + "loss": 0.2746, "step": 2909000 }, { "epoch": 1.74, - "learning_rate": 4.382701203700259e-05, - "loss": 0.3666, + "learning_rate": 4.3826869239344474e-05, + "loss": 0.2735, "step": 2909500 }, { "epoch": 1.74, - "learning_rate": 4.3824912071442026e-05, - "loss": 0.3708, + "learning_rate": 4.382476927378391e-05, + "loss": 0.2798, "step": 2910000 }, { "epoch": 1.74, - "learning_rate": 4.382281630581258e-05, - "loss": 0.374, + "learning_rate": 4.382266930822334e-05, + "loss": 0.2832, "step": 2910500 }, { "epoch": 1.75, - "learning_rate": 4.382071634025202e-05, - "loss": 0.3745, + "learning_rate": 4.382056934266278e-05, + "loss": 0.2794, "step": 2911000 }, { "epoch": 1.75, - "learning_rate": 4.381861637469145e-05, - "loss": 0.3596, + "learning_rate": 4.3818469377102215e-05, + "loss": 0.277, "step": 2911500 }, { "epoch": 1.75, - "learning_rate": 4.3816516409130886e-05, - "loss": 0.3801, + "learning_rate": 4.381636941154165e-05, + "loss": 0.2828, "step": 2912000 }, { "epoch": 1.75, - "learning_rate": 4.3814416443570327e-05, - "loss": 0.3706, + "learning_rate": 4.38142736459122e-05, + "loss": 0.2811, "step": 2912500 }, { "epoch": 1.75, - "learning_rate": 4.381232067794088e-05, - "loss": 0.3644, + "learning_rate": 4.381217368035164e-05, + "loss": 0.2753, "step": 2913000 }, { "epoch": 1.75, - "learning_rate": 4.3810220712380314e-05, - "loss": 0.3587, + "learning_rate": 4.3810073714791076e-05, + "loss": 0.2823, "step": 2913500 }, { "epoch": 1.75, - "learning_rate": 4.380812074681975e-05, - "loss": 0.369, + "learning_rate": 4.380797374923051e-05, + "loss": 0.2778, "step": 2914000 }, { "epoch": 1.75, - "learning_rate": 4.380602078125919e-05, - "loss": 0.3665, + "learning_rate": 4.380587378366995e-05, + "loss": 0.2761, "step": 2914500 }, { "epoch": 1.75, - "learning_rate": 4.380392081569862e-05, - "loss": 0.3637, + "learning_rate": 4.38037780180405e-05, + "loss": 0.2776, "step": 2915000 }, { "epoch": 1.75, - "learning_rate": 4.3801820850138054e-05, - "loss": 0.3674, + "learning_rate": 4.3801678052479936e-05, + "loss": 0.2781, "step": 2915500 }, { "epoch": 1.75, - "learning_rate": 4.3799720884577494e-05, - "loss": 0.3735, + "learning_rate": 4.379957808691937e-05, + "loss": 0.2802, "step": 2916000 }, { "epoch": 1.75, - "learning_rate": 4.379762091901693e-05, - "loss": 0.3657, + "learning_rate": 4.379747812135881e-05, + "loss": 0.2746, "step": 2916500 }, { "epoch": 1.75, - "learning_rate": 4.379552095345636e-05, - "loss": 0.3719, + "learning_rate": 4.3795382355729364e-05, + "loss": 0.2784, "step": 2917000 }, { "epoch": 1.75, - "learning_rate": 4.37934209878958e-05, - "loss": 0.3631, + "learning_rate": 4.37932823901688e-05, + "loss": 0.2759, "step": 2917500 }, { "epoch": 1.75, - "learning_rate": 4.3791325222266355e-05, - "loss": 0.367, + "learning_rate": 4.379118662453936e-05, + "loss": 0.2774, "step": 2918000 }, { "epoch": 1.75, - "learning_rate": 4.378922525670579e-05, - "loss": 0.3646, + "learning_rate": 4.37890866589788e-05, + "loss": 0.2781, "step": 2918500 }, { "epoch": 1.75, - "learning_rate": 4.378712529114523e-05, - "loss": 0.369, + "learning_rate": 4.378698669341823e-05, + "loss": 0.2763, "step": 2919000 }, { "epoch": 1.75, - "learning_rate": 4.378502532558466e-05, - "loss": 0.372, + "learning_rate": 4.378488672785766e-05, + "loss": 0.2778, "step": 2919500 }, { "epoch": 1.75, - "learning_rate": 4.3782925360024096e-05, - "loss": 0.3689, + "learning_rate": 4.37827867622971e-05, + "loss": 0.2781, "step": 2920000 }, { "epoch": 1.75, - "learning_rate": 4.378082539446353e-05, - "loss": 0.3661, + "learning_rate": 4.378068679673653e-05, + "loss": 0.2772, "step": 2920500 }, { "epoch": 1.75, - "learning_rate": 4.377872542890296e-05, - "loss": 0.3741, + "learning_rate": 4.3778586831175965e-05, + "loss": 0.2844, "step": 2921000 }, { "epoch": 1.75, - "learning_rate": 4.3776625463342396e-05, - "loss": 0.3636, + "learning_rate": 4.377649106554652e-05, + "loss": 0.2808, "step": 2921500 }, { "epoch": 1.75, - "learning_rate": 4.3774529697712956e-05, - "loss": 0.3703, + "learning_rate": 4.377439109998596e-05, + "loss": 0.2814, "step": 2922000 }, { "epoch": 1.75, - "learning_rate": 4.37724297321524e-05, - "loss": 0.3682, + "learning_rate": 4.377229113442539e-05, + "loss": 0.2821, "step": 2922500 }, { "epoch": 1.75, - "learning_rate": 4.3770329766591823e-05, - "loss": 0.3707, + "learning_rate": 4.3770191168864826e-05, + "loss": 0.2749, "step": 2923000 }, { "epoch": 1.75, - "learning_rate": 4.3768234000962384e-05, - "loss": 0.3646, + "learning_rate": 4.3768091203304266e-05, + "loss": 0.2794, "step": 2923500 }, { "epoch": 1.75, - "learning_rate": 4.376613403540182e-05, - "loss": 0.3694, + "learning_rate": 4.37659912377437e-05, + "loss": 0.2779, "step": 2924000 }, { "epoch": 1.75, - "learning_rate": 4.376403406984126e-05, - "loss": 0.3737, + "learning_rate": 4.376389547211425e-05, + "loss": 0.2806, "step": 2924500 }, { "epoch": 1.75, - "learning_rate": 4.376193410428069e-05, - "loss": 0.3784, + "learning_rate": 4.376179550655369e-05, + "loss": 0.2836, "step": 2925000 }, { "epoch": 1.75, - "learning_rate": 4.3759834138720124e-05, - "loss": 0.3679, + "learning_rate": 4.375969554099313e-05, + "loss": 0.278, "step": 2925500 }, { "epoch": 1.75, - "learning_rate": 4.3757738373090685e-05, - "loss": 0.361, + "learning_rate": 4.375759557543256e-05, + "loss": 0.2736, "step": 2926000 }, { "epoch": 1.75, - "learning_rate": 4.375563840753012e-05, - "loss": 0.3628, + "learning_rate": 4.3755495609872e-05, + "loss": 0.2793, "step": 2926500 }, { "epoch": 1.75, - "learning_rate": 4.375354264190067e-05, - "loss": 0.3748, + "learning_rate": 4.3753395644311434e-05, + "loss": 0.2842, "step": 2927000 }, { "epoch": 1.76, - "learning_rate": 4.3751442676340105e-05, - "loss": 0.3662, + "learning_rate": 4.375129567875087e-05, + "loss": 0.277, "step": 2927500 }, { "epoch": 1.76, - "learning_rate": 4.3749342710779545e-05, - "loss": 0.3634, + "learning_rate": 4.374919571319031e-05, + "loss": 0.2693, "step": 2928000 }, { "epoch": 1.76, - "learning_rate": 4.374724274521898e-05, - "loss": 0.359, + "learning_rate": 4.374709574762974e-05, + "loss": 0.2736, "step": 2928500 }, { "epoch": 1.76, - "learning_rate": 4.374514277965841e-05, - "loss": 0.3696, + "learning_rate": 4.3744999982000295e-05, + "loss": 0.2767, "step": 2929000 }, { "epoch": 1.76, - "learning_rate": 4.374304281409785e-05, - "loss": 0.3772, + "learning_rate": 4.374290001643973e-05, + "loss": 0.2818, "step": 2929500 }, { "epoch": 1.76, - "learning_rate": 4.3740942848537286e-05, - "loss": 0.3656, + "learning_rate": 4.374080005087917e-05, + "loss": 0.2745, "step": 2930000 }, { "epoch": 1.76, - "learning_rate": 4.373884288297671e-05, - "loss": 0.3705, + "learning_rate": 4.37387000853186e-05, + "loss": 0.2854, "step": 2930500 }, { "epoch": 1.76, - "learning_rate": 4.373674291741615e-05, - "loss": 0.3571, + "learning_rate": 4.3736604319689155e-05, + "loss": 0.2792, "step": 2931000 }, { "epoch": 1.76, - "learning_rate": 4.3734642951855586e-05, - "loss": 0.3654, + "learning_rate": 4.373450435412859e-05, + "loss": 0.2744, "step": 2931500 }, { "epoch": 1.76, - "learning_rate": 4.373254298629502e-05, - "loss": 0.366, + "learning_rate": 4.373240438856803e-05, + "loss": 0.2766, "step": 2932000 }, { "epoch": 1.76, - "learning_rate": 4.373044302073446e-05, - "loss": 0.3585, + "learning_rate": 4.373030442300746e-05, + "loss": 0.2722, "step": 2932500 }, { "epoch": 1.76, - "learning_rate": 4.3728347255105014e-05, - "loss": 0.3754, + "learning_rate": 4.3728204457446896e-05, + "loss": 0.2834, "step": 2933000 }, { "epoch": 1.76, - "learning_rate": 4.372624728954445e-05, - "loss": 0.3601, + "learning_rate": 4.3726108691817456e-05, + "loss": 0.2738, "step": 2933500 }, { "epoch": 1.76, - "learning_rate": 4.372414732398389e-05, - "loss": 0.3635, + "learning_rate": 4.372400872625689e-05, + "loss": 0.2801, "step": 2934000 }, { "epoch": 1.76, - "learning_rate": 4.372204735842332e-05, - "loss": 0.3632, + "learning_rate": 4.372190876069632e-05, + "loss": 0.2722, "step": 2934500 }, { "epoch": 1.76, - "learning_rate": 4.3719951592793874e-05, - "loss": 0.3648, + "learning_rate": 4.371980879513576e-05, + "loss": 0.2805, "step": 2935000 }, { "epoch": 1.76, - "learning_rate": 4.371785162723331e-05, - "loss": 0.3714, + "learning_rate": 4.37177088295752e-05, + "loss": 0.282, "step": 2935500 }, { "epoch": 1.76, - "learning_rate": 4.371575166167275e-05, - "loss": 0.3639, + "learning_rate": 4.371561306394575e-05, + "loss": 0.274, "step": 2936000 }, { "epoch": 1.76, - "learning_rate": 4.371365589604331e-05, - "loss": 0.3605, + "learning_rate": 4.3713513098385184e-05, + "loss": 0.2746, "step": 2936500 }, { "epoch": 1.76, - "learning_rate": 4.371156013041386e-05, - "loss": 0.3647, + "learning_rate": 4.3711413132824624e-05, + "loss": 0.276, "step": 2937000 }, { "epoch": 1.76, - "learning_rate": 4.3709460164853296e-05, - "loss": 0.3654, + "learning_rate": 4.370931316726406e-05, + "loss": 0.2777, "step": 2937500 }, { "epoch": 1.76, - "learning_rate": 4.370736019929273e-05, - "loss": 0.3635, + "learning_rate": 4.370721740163461e-05, + "loss": 0.2742, "step": 2938000 }, { "epoch": 1.76, - "learning_rate": 4.370526023373217e-05, - "loss": 0.3679, + "learning_rate": 4.3705117436074045e-05, + "loss": 0.2767, "step": 2938500 }, { "epoch": 1.76, - "learning_rate": 4.37031602681716e-05, - "loss": 0.3562, + "learning_rate": 4.3703021670444605e-05, + "loss": 0.2744, "step": 2939000 }, { "epoch": 1.76, - "learning_rate": 4.370106030261104e-05, - "loss": 0.3705, + "learning_rate": 4.370092170488404e-05, + "loss": 0.2793, "step": 2939500 }, { "epoch": 1.76, - "learning_rate": 4.369896033705047e-05, - "loss": 0.3665, + "learning_rate": 4.369882173932347e-05, + "loss": 0.274, "step": 2940000 }, { "epoch": 1.76, - "learning_rate": 4.36968603714899e-05, - "loss": 0.3665, + "learning_rate": 4.369672177376291e-05, + "loss": 0.2759, "step": 2940500 }, { "epoch": 1.76, - "learning_rate": 4.369476040592934e-05, - "loss": 0.3646, + "learning_rate": 4.3694621808202346e-05, + "loss": 0.2774, "step": 2941000 }, { "epoch": 1.76, - "learning_rate": 4.3692664640299904e-05, - "loss": 0.3646, + "learning_rate": 4.369252184264178e-05, + "loss": 0.2795, "step": 2941500 }, { "epoch": 1.76, - "learning_rate": 4.369056467473933e-05, - "loss": 0.3729, + "learning_rate": 4.369042187708122e-05, + "loss": 0.2859, "step": 2942000 }, { "epoch": 1.76, - "learning_rate": 4.3688464709178764e-05, - "loss": 0.3703, + "learning_rate": 4.368832191152065e-05, + "loss": 0.2783, "step": 2942500 }, { "epoch": 1.76, - "learning_rate": 4.3686364743618204e-05, - "loss": 0.366, + "learning_rate": 4.3686221945960086e-05, + "loss": 0.2771, "step": 2943000 }, { "epoch": 1.76, - "learning_rate": 4.368426477805764e-05, - "loss": 0.3672, + "learning_rate": 4.368412618033064e-05, + "loss": 0.2717, "step": 2943500 }, { "epoch": 1.77, - "learning_rate": 4.368216481249707e-05, - "loss": 0.368, + "learning_rate": 4.368202621477008e-05, + "loss": 0.2797, "step": 2944000 }, { "epoch": 1.77, - "learning_rate": 4.368006484693651e-05, - "loss": 0.3653, + "learning_rate": 4.3679926249209513e-05, + "loss": 0.2812, "step": 2944500 }, { "epoch": 1.77, - "learning_rate": 4.3677964881375945e-05, - "loss": 0.36, + "learning_rate": 4.367783048358007e-05, + "loss": 0.2747, "step": 2945000 }, { "epoch": 1.77, - "learning_rate": 4.36758691157465e-05, - "loss": 0.3674, + "learning_rate": 4.36757305180195e-05, + "loss": 0.2779, "step": 2945500 }, { "epoch": 1.77, - "learning_rate": 4.367376915018594e-05, - "loss": 0.3705, + "learning_rate": 4.367363055245894e-05, + "loss": 0.2751, "step": 2946000 }, { "epoch": 1.77, - "learning_rate": 4.367166918462537e-05, - "loss": 0.3768, + "learning_rate": 4.3671534786829494e-05, + "loss": 0.2814, "step": 2946500 }, { "epoch": 1.77, - "learning_rate": 4.3669569219064805e-05, - "loss": 0.3653, + "learning_rate": 4.366943482126893e-05, + "loss": 0.2809, "step": 2947000 }, { "epoch": 1.77, - "learning_rate": 4.366747345343536e-05, - "loss": 0.3731, + "learning_rate": 4.366733485570837e-05, + "loss": 0.2783, "step": 2947500 }, { "epoch": 1.77, - "learning_rate": 4.36653734878748e-05, - "loss": 0.3654, + "learning_rate": 4.36652348901478e-05, + "loss": 0.2751, "step": 2948000 }, { "epoch": 1.77, - "learning_rate": 4.366327772224536e-05, - "loss": 0.3608, + "learning_rate": 4.3663134924587235e-05, + "loss": 0.2795, "step": 2948500 }, { "epoch": 1.77, - "learning_rate": 4.366117775668479e-05, - "loss": 0.3696, + "learning_rate": 4.3661034959026675e-05, + "loss": 0.2795, "step": 2949000 }, { "epoch": 1.77, - "learning_rate": 4.365907779112422e-05, - "loss": 0.3668, + "learning_rate": 4.365893499346611e-05, + "loss": 0.278, "step": 2949500 }, { "epoch": 1.77, - "learning_rate": 4.365697782556366e-05, - "loss": 0.3683, + "learning_rate": 4.365683502790554e-05, + "loss": 0.2762, "step": 2950000 }, { "epoch": 1.77, - "learning_rate": 4.365487786000309e-05, - "loss": 0.3671, + "learning_rate": 4.365473506234498e-05, + "loss": 0.28, "step": 2950500 }, { "epoch": 1.77, - "learning_rate": 4.3652782094373654e-05, - "loss": 0.3645, + "learning_rate": 4.365263509678441e-05, + "loss": 0.2779, "step": 2951000 }, { "epoch": 1.77, - "learning_rate": 4.365068212881308e-05, - "loss": 0.3703, + "learning_rate": 4.365053513122384e-05, + "loss": 0.2827, "step": 2951500 }, { "epoch": 1.77, - "learning_rate": 4.364858216325252e-05, - "loss": 0.3671, + "learning_rate": 4.364843516566328e-05, + "loss": 0.275, "step": 2952000 }, { "epoch": 1.77, - "learning_rate": 4.3646482197691954e-05, - "loss": 0.3678, + "learning_rate": 4.3646335200102716e-05, + "loss": 0.2779, "step": 2952500 }, { "epoch": 1.77, - "learning_rate": 4.3644382232131394e-05, - "loss": 0.364, + "learning_rate": 4.3644239434473276e-05, + "loss": 0.2752, "step": 2953000 }, { "epoch": 1.77, - "learning_rate": 4.364228226657083e-05, - "loss": 0.3709, + "learning_rate": 4.36421394689127e-05, + "loss": 0.274, "step": 2953500 }, { "epoch": 1.77, - "learning_rate": 4.364018230101026e-05, - "loss": 0.3628, + "learning_rate": 4.364003950335214e-05, + "loss": 0.2747, "step": 2954000 }, { "epoch": 1.77, - "learning_rate": 4.36380823354497e-05, - "loss": 0.3672, + "learning_rate": 4.363793953779158e-05, + "loss": 0.2781, "step": 2954500 }, { "epoch": 1.77, - "learning_rate": 4.3635990769751375e-05, - "loss": 0.3702, + "learning_rate": 4.363584377216214e-05, + "loss": 0.2827, "step": 2955000 }, { "epoch": 1.77, - "learning_rate": 4.3633890804190815e-05, - "loss": 0.3598, + "learning_rate": 4.363374380660158e-05, + "loss": 0.2797, "step": 2955500 }, { "epoch": 1.77, - "learning_rate": 4.363179083863025e-05, - "loss": 0.361, + "learning_rate": 4.3631643841041004e-05, + "loss": 0.2705, "step": 2956000 }, { "epoch": 1.77, - "learning_rate": 4.3629690873069676e-05, - "loss": 0.3735, + "learning_rate": 4.362954387548044e-05, + "loss": 0.2833, "step": 2956500 }, { "epoch": 1.77, - "learning_rate": 4.3627590907509116e-05, - "loss": 0.3653, + "learning_rate": 4.3627448109851e-05, + "loss": 0.2761, "step": 2957000 }, { "epoch": 1.77, - "learning_rate": 4.362549094194855e-05, - "loss": 0.3704, + "learning_rate": 4.362534814429044e-05, + "loss": 0.2825, "step": 2957500 }, { "epoch": 1.77, - "learning_rate": 4.362339097638798e-05, - "loss": 0.3653, + "learning_rate": 4.3623248178729865e-05, + "loss": 0.2783, "step": 2958000 }, { "epoch": 1.77, - "learning_rate": 4.362129101082742e-05, - "loss": 0.3658, + "learning_rate": 4.36211482131693e-05, + "loss": 0.2824, "step": 2958500 }, { "epoch": 1.77, - "learning_rate": 4.3619195245197976e-05, - "loss": 0.3701, + "learning_rate": 4.361905244753986e-05, + "loss": 0.2761, "step": 2959000 }, { "epoch": 1.77, - "learning_rate": 4.361709527963741e-05, - "loss": 0.3635, + "learning_rate": 4.36169524819793e-05, + "loss": 0.2801, "step": 2959500 }, { "epoch": 1.77, - "learning_rate": 4.361499951400797e-05, - "loss": 0.3747, + "learning_rate": 4.361485251641873e-05, + "loss": 0.2785, "step": 2960000 }, { "epoch": 1.77, - "learning_rate": 4.361289954844741e-05, - "loss": 0.3574, + "learning_rate": 4.361275255085816e-05, + "loss": 0.2689, "step": 2960500 }, { "epoch": 1.78, - "learning_rate": 4.3610803782817964e-05, - "loss": 0.3632, + "learning_rate": 4.36106525852976e-05, + "loss": 0.2771, "step": 2961000 }, { "epoch": 1.78, - "learning_rate": 4.36087038172574e-05, - "loss": 0.3727, + "learning_rate": 4.360855261973703e-05, + "loss": 0.2849, "step": 2961500 }, { "epoch": 1.78, - "learning_rate": 4.360660385169683e-05, - "loss": 0.3778, + "learning_rate": 4.360645265417647e-05, + "loss": 0.2806, "step": 2962000 }, { "epoch": 1.78, - "learning_rate": 4.360450388613627e-05, - "loss": 0.3548, + "learning_rate": 4.3604352688615906e-05, + "loss": 0.2757, "step": 2962500 }, { "epoch": 1.78, - "learning_rate": 4.3602403920575705e-05, - "loss": 0.3759, + "learning_rate": 4.360225692298646e-05, + "loss": 0.2798, "step": 2963000 }, { "epoch": 1.78, - "learning_rate": 4.360030395501513e-05, - "loss": 0.3556, + "learning_rate": 4.3600156957425893e-05, + "loss": 0.2675, "step": 2963500 }, { "epoch": 1.78, - "learning_rate": 4.359820398945457e-05, - "loss": 0.3721, + "learning_rate": 4.3598056991865334e-05, + "loss": 0.2819, "step": 2964000 }, { "epoch": 1.78, - "learning_rate": 4.3596104023894005e-05, - "loss": 0.3671, + "learning_rate": 4.359595702630477e-05, + "loss": 0.2769, "step": 2964500 }, { "epoch": 1.78, - "learning_rate": 4.359400405833344e-05, - "loss": 0.3721, + "learning_rate": 4.35938570607442e-05, + "loss": 0.2785, "step": 2965000 }, { "epoch": 1.78, - "learning_rate": 4.3591908292704e-05, - "loss": 0.3629, + "learning_rate": 4.3591761295114754e-05, + "loss": 0.2861, "step": 2965500 }, { "epoch": 1.78, - "learning_rate": 4.358980832714343e-05, - "loss": 0.3656, + "learning_rate": 4.3589661329554194e-05, + "loss": 0.2765, "step": 2966000 }, { "epoch": 1.78, - "learning_rate": 4.3587708361582866e-05, - "loss": 0.3619, + "learning_rate": 4.358756136399363e-05, + "loss": 0.2736, "step": 2966500 }, { "epoch": 1.78, - "learning_rate": 4.3585608396022306e-05, - "loss": 0.3725, + "learning_rate": 4.358546139843306e-05, + "loss": 0.2791, "step": 2967000 }, { "epoch": 1.78, - "learning_rate": 4.358350843046174e-05, - "loss": 0.3696, + "learning_rate": 4.35833614328725e-05, + "loss": 0.2777, "step": 2967500 }, { "epoch": 1.78, - "learning_rate": 4.358140846490117e-05, - "loss": 0.3562, + "learning_rate": 4.3581261467311935e-05, + "loss": 0.2754, "step": 2968000 }, { "epoch": 1.78, - "learning_rate": 4.357930849934061e-05, - "loss": 0.3641, + "learning_rate": 4.357916570168249e-05, + "loss": 0.2834, "step": 2968500 }, { "epoch": 1.78, - "learning_rate": 4.357720853378005e-05, - "loss": 0.3651, + "learning_rate": 4.357706573612193e-05, + "loss": 0.2776, "step": 2969000 }, { "epoch": 1.78, - "learning_rate": 4.35751127681506e-05, - "loss": 0.3553, + "learning_rate": 4.357496577056136e-05, + "loss": 0.278, "step": 2969500 }, { "epoch": 1.78, - "learning_rate": 4.3573012802590034e-05, - "loss": 0.37, + "learning_rate": 4.3572865805000796e-05, + "loss": 0.2805, "step": 2970000 }, { "epoch": 1.78, - "learning_rate": 4.3570912837029474e-05, - "loss": 0.3669, + "learning_rate": 4.357077003937135e-05, + "loss": 0.2729, "step": 2970500 }, { "epoch": 1.78, - "learning_rate": 4.356881707140003e-05, - "loss": 0.3668, + "learning_rate": 4.356867007381079e-05, + "loss": 0.2799, "step": 2971000 }, { "epoch": 1.78, - "learning_rate": 4.356671710583946e-05, - "loss": 0.3647, + "learning_rate": 4.356657010825022e-05, + "loss": 0.2763, "step": 2971500 }, { "epoch": 1.78, - "learning_rate": 4.3564617140278894e-05, - "loss": 0.3633, + "learning_rate": 4.3564470142689656e-05, + "loss": 0.2763, "step": 2972000 }, { "epoch": 1.78, - "learning_rate": 4.3562517174718335e-05, - "loss": 0.362, + "learning_rate": 4.356237437706021e-05, + "loss": 0.2755, "step": 2972500 }, { "epoch": 1.78, - "learning_rate": 4.356041720915777e-05, - "loss": 0.3619, + "learning_rate": 4.356027441149965e-05, + "loss": 0.2811, "step": 2973000 }, { "epoch": 1.78, - "learning_rate": 4.355832144352832e-05, - "loss": 0.3547, + "learning_rate": 4.3558174445939084e-05, + "loss": 0.2718, "step": 2973500 }, { "epoch": 1.78, - "learning_rate": 4.355622147796776e-05, - "loss": 0.3653, + "learning_rate": 4.355607448037852e-05, + "loss": 0.2806, "step": 2974000 }, { "epoch": 1.78, - "learning_rate": 4.3554121512407195e-05, - "loss": 0.3653, + "learning_rate": 4.355397451481796e-05, + "loss": 0.2784, "step": 2974500 }, { "epoch": 1.78, - "learning_rate": 4.355202154684663e-05, - "loss": 0.3703, + "learning_rate": 4.355187874918851e-05, + "loss": 0.279, "step": 2975000 }, { "epoch": 1.78, - "learning_rate": 4.354992158128607e-05, - "loss": 0.3604, + "learning_rate": 4.3549778783627944e-05, + "loss": 0.2787, "step": 2975500 }, { "epoch": 1.78, - "learning_rate": 4.35478216157255e-05, - "loss": 0.3695, + "learning_rate": 4.3547678818067385e-05, + "loss": 0.2815, "step": 2976000 }, { "epoch": 1.78, - "learning_rate": 4.3545721650164936e-05, - "loss": 0.3689, + "learning_rate": 4.354557885250682e-05, + "loss": 0.2817, "step": 2976500 }, { "epoch": 1.78, - "learning_rate": 4.3543621684604376e-05, - "loss": 0.3599, + "learning_rate": 4.354347888694625e-05, + "loss": 0.2721, "step": 2977000 }, { "epoch": 1.79, - "learning_rate": 4.354152591897493e-05, - "loss": 0.3619, + "learning_rate": 4.354137892138569e-05, + "loss": 0.2758, "step": 2977500 }, { "epoch": 1.79, - "learning_rate": 4.3539430153345483e-05, - "loss": 0.3722, + "learning_rate": 4.3539278955825125e-05, + "loss": 0.2775, "step": 2978000 }, { "epoch": 1.79, - "learning_rate": 4.3537334387716044e-05, - "loss": 0.3627, + "learning_rate": 4.353717899026456e-05, + "loss": 0.278, "step": 2978500 }, { "epoch": 1.79, - "learning_rate": 4.353523442215548e-05, - "loss": 0.3668, + "learning_rate": 4.353508322463511e-05, + "loss": 0.2764, "step": 2979000 }, { "epoch": 1.79, - "learning_rate": 4.353313865652603e-05, - "loss": 0.3693, + "learning_rate": 4.353298325907455e-05, + "loss": 0.2755, "step": 2979500 }, { "epoch": 1.79, - "learning_rate": 4.353103869096547e-05, - "loss": 0.3792, + "learning_rate": 4.3530883293513986e-05, + "loss": 0.2851, "step": 2980000 }, { "epoch": 1.79, - "learning_rate": 4.3528938725404904e-05, - "loss": 0.3726, + "learning_rate": 4.352878332795342e-05, + "loss": 0.2784, "step": 2980500 }, { "epoch": 1.79, - "learning_rate": 4.352683875984434e-05, - "loss": 0.367, + "learning_rate": 4.352668756232397e-05, + "loss": 0.2766, "step": 2981000 }, { "epoch": 1.79, - "learning_rate": 4.352473879428378e-05, - "loss": 0.3623, + "learning_rate": 4.352458759676341e-05, + "loss": 0.2766, "step": 2981500 }, { "epoch": 1.79, - "learning_rate": 4.352263882872321e-05, - "loss": 0.3653, + "learning_rate": 4.352249183113397e-05, + "loss": 0.2738, "step": 2982000 }, { "epoch": 1.79, - "learning_rate": 4.352053886316264e-05, - "loss": 0.3633, + "learning_rate": 4.35203918655734e-05, + "loss": 0.2778, "step": 2982500 }, { "epoch": 1.79, - "learning_rate": 4.351843889760208e-05, - "loss": 0.3585, + "learning_rate": 4.351829190001284e-05, + "loss": 0.2743, "step": 2983000 }, { "epoch": 1.79, - "learning_rate": 4.351633893204151e-05, - "loss": 0.3628, + "learning_rate": 4.3516191934452274e-05, + "loss": 0.2745, "step": 2983500 }, { "epoch": 1.79, - "learning_rate": 4.3514238966480945e-05, - "loss": 0.3609, + "learning_rate": 4.351409196889171e-05, + "loss": 0.2731, "step": 2984000 }, { "epoch": 1.79, - "learning_rate": 4.3512139000920386e-05, - "loss": 0.3607, + "learning_rate": 4.351199200333115e-05, + "loss": 0.2746, "step": 2984500 }, { "epoch": 1.79, - "learning_rate": 4.351003903535982e-05, - "loss": 0.3624, + "learning_rate": 4.350989203777058e-05, + "loss": 0.2747, "step": 2985000 }, { "epoch": 1.79, - "learning_rate": 4.350793906979925e-05, - "loss": 0.3564, + "learning_rate": 4.3507792072210015e-05, + "loss": 0.2722, "step": 2985500 }, { "epoch": 1.79, - "learning_rate": 4.3505843304169806e-05, - "loss": 0.3691, + "learning_rate": 4.350569630658057e-05, + "loss": 0.2748, "step": 2986000 }, { "epoch": 1.79, - "learning_rate": 4.3503743338609246e-05, - "loss": 0.3603, + "learning_rate": 4.350359634102001e-05, + "loss": 0.2727, "step": 2986500 }, { "epoch": 1.79, - "learning_rate": 4.350164337304868e-05, - "loss": 0.3701, + "learning_rate": 4.350150057539056e-05, + "loss": 0.2802, "step": 2987000 }, { "epoch": 1.79, - "learning_rate": 4.349954340748811e-05, - "loss": 0.365, + "learning_rate": 4.3499400609829995e-05, + "loss": 0.2711, "step": 2987500 }, { "epoch": 1.79, - "learning_rate": 4.3497443441927554e-05, - "loss": 0.368, + "learning_rate": 4.349730064426943e-05, + "loss": 0.2777, "step": 2988000 }, { "epoch": 1.79, - "learning_rate": 4.349534347636699e-05, - "loss": 0.3646, + "learning_rate": 4.349520067870887e-05, + "loss": 0.2778, "step": 2988500 }, { "epoch": 1.79, - "learning_rate": 4.349324771073754e-05, - "loss": 0.3729, + "learning_rate": 4.34931007131483e-05, + "loss": 0.2805, "step": 2989000 }, { "epoch": 1.79, - "learning_rate": 4.349114774517698e-05, - "loss": 0.3631, + "learning_rate": 4.3491000747587736e-05, + "loss": 0.2722, "step": 2989500 }, { "epoch": 1.79, - "learning_rate": 4.3489047779616414e-05, - "loss": 0.3728, + "learning_rate": 4.3488900782027176e-05, + "loss": 0.283, "step": 2990000 }, { "epoch": 1.79, - "learning_rate": 4.348694781405585e-05, - "loss": 0.3705, + "learning_rate": 4.348680081646661e-05, + "loss": 0.2792, "step": 2990500 }, { "epoch": 1.79, - "learning_rate": 4.348484784849529e-05, - "loss": 0.3657, + "learning_rate": 4.348470085090604e-05, + "loss": 0.2778, "step": 2991000 }, { "epoch": 1.79, - "learning_rate": 4.348274788293472e-05, - "loss": 0.3694, + "learning_rate": 4.3482605085276604e-05, + "loss": 0.2785, "step": 2991500 }, { "epoch": 1.79, - "learning_rate": 4.3480647917374155e-05, - "loss": 0.3658, + "learning_rate": 4.348050511971604e-05, + "loss": 0.277, "step": 2992000 }, { "epoch": 1.79, - "learning_rate": 4.347855215174471e-05, - "loss": 0.3716, + "learning_rate": 4.347840515415547e-05, + "loss": 0.2804, "step": 2992500 }, { "epoch": 1.79, - "learning_rate": 4.347645218618415e-05, - "loss": 0.3776, + "learning_rate": 4.347630518859491e-05, + "loss": 0.2791, "step": 2993000 }, { "epoch": 1.79, - "learning_rate": 4.347435222062358e-05, - "loss": 0.3796, + "learning_rate": 4.3474209422965464e-05, + "loss": 0.2849, "step": 2993500 }, { "epoch": 1.8, - "learning_rate": 4.3472256454994136e-05, - "loss": 0.3714, + "learning_rate": 4.34721094574049e-05, + "loss": 0.2804, "step": 2994000 }, { "epoch": 1.8, - "learning_rate": 4.347015648943357e-05, - "loss": 0.3672, + "learning_rate": 4.347000949184433e-05, + "loss": 0.2797, "step": 2994500 }, { "epoch": 1.8, - "learning_rate": 4.346805652387301e-05, - "loss": 0.3562, + "learning_rate": 4.346790952628377e-05, + "loss": 0.2715, "step": 2995000 }, { "epoch": 1.8, - "learning_rate": 4.346595655831244e-05, - "loss": 0.3638, + "learning_rate": 4.3465813760654325e-05, + "loss": 0.2761, "step": 2995500 }, { "epoch": 1.8, - "learning_rate": 4.3463856592751876e-05, - "loss": 0.3641, + "learning_rate": 4.346371379509376e-05, + "loss": 0.275, "step": 2996000 }, { "epoch": 1.8, - "learning_rate": 4.346175662719132e-05, - "loss": 0.3719, + "learning_rate": 4.346161382953319e-05, + "loss": 0.2802, "step": 2996500 }, { "epoch": 1.8, - "learning_rate": 4.345965666163075e-05, - "loss": 0.3656, + "learning_rate": 4.345951386397263e-05, + "loss": 0.2726, "step": 2997000 }, { "epoch": 1.8, - "learning_rate": 4.3457556696070184e-05, - "loss": 0.3791, + "learning_rate": 4.3457413898412066e-05, + "loss": 0.2819, "step": 2997500 }, { "epoch": 1.8, - "learning_rate": 4.345545673050962e-05, - "loss": 0.3749, + "learning_rate": 4.345531813278262e-05, + "loss": 0.2811, "step": 2998000 }, { "epoch": 1.8, - "learning_rate": 4.345335676494905e-05, - "loss": 0.3644, + "learning_rate": 4.345321816722206e-05, + "loss": 0.2823, "step": 2998500 }, { "epoch": 1.8, - "learning_rate": 4.345125679938849e-05, - "loss": 0.3682, + "learning_rate": 4.345111820166149e-05, + "loss": 0.2784, "step": 2999000 }, { "epoch": 1.8, - "learning_rate": 4.344916103375905e-05, - "loss": 0.3564, + "learning_rate": 4.3449022436032047e-05, + "loss": 0.271, "step": 2999500 }, { "epoch": 1.8, - "learning_rate": 4.344706106819848e-05, - "loss": 0.364, + "learning_rate": 4.344692247047148e-05, + "loss": 0.2729, "step": 3000000 }, { "epoch": 1.8, - "eval_loss": 0.35116276144981384, - "eval_runtime": 1120.4699, - "eval_samples_per_second": 470.088, - "eval_steps_per_second": 78.348, + "eval_loss": 0.2533246576786041, + "eval_runtime": 1463.101, + "eval_samples_per_second": 360.002, + "eval_steps_per_second": 60.001, "step": 3000000 }, { "epoch": 1.8, - "learning_rate": 4.344496110263791e-05, - "loss": 0.37, + "learning_rate": 4.344482250491092e-05, + "loss": 0.2817, "step": 3000500 }, { "epoch": 1.8, - "learning_rate": 4.344286533700847e-05, - "loss": 0.3574, + "learning_rate": 4.3442722539350354e-05, + "loss": 0.276, "step": 3001000 }, { "epoch": 1.8, - "learning_rate": 4.344076537144791e-05, - "loss": 0.3713, + "learning_rate": 4.344062257378979e-05, + "loss": 0.2776, "step": 3001500 }, { "epoch": 1.8, - "learning_rate": 4.3438665405887345e-05, - "loss": 0.3561, + "learning_rate": 4.343852260822923e-05, + "loss": 0.2771, "step": 3002000 }, { "epoch": 1.8, - "learning_rate": 4.343656544032677e-05, - "loss": 0.3707, + "learning_rate": 4.343642264266866e-05, + "loss": 0.2827, "step": 3002500 }, { "epoch": 1.8, - "learning_rate": 4.343446547476621e-05, - "loss": 0.359, + "learning_rate": 4.3434322677108094e-05, + "loss": 0.2736, "step": 3003000 }, { "epoch": 1.8, - "learning_rate": 4.3432365509205646e-05, - "loss": 0.3627, + "learning_rate": 4.343222691147865e-05, + "loss": 0.2757, "step": 3003500 }, { "epoch": 1.8, - "learning_rate": 4.343026554364508e-05, - "loss": 0.364, + "learning_rate": 4.343012694591809e-05, + "loss": 0.2771, "step": 3004000 }, { "epoch": 1.8, - "learning_rate": 4.3428169778015646e-05, - "loss": 0.3545, + "learning_rate": 4.342802698035752e-05, + "loss": 0.2701, "step": 3004500 }, { "epoch": 1.8, - "learning_rate": 4.342606981245507e-05, - "loss": 0.3729, + "learning_rate": 4.3425927014796955e-05, + "loss": 0.2836, "step": 3005000 }, { "epoch": 1.8, - "learning_rate": 4.3423969846894506e-05, - "loss": 0.3588, + "learning_rate": 4.3423827049236395e-05, + "loss": 0.2696, "step": 3005500 }, { "epoch": 1.8, - "learning_rate": 4.3421869881333947e-05, - "loss": 0.3579, + "learning_rate": 4.342173128360695e-05, + "loss": 0.2725, "step": 3006000 }, { "epoch": 1.8, - "learning_rate": 4.341976991577338e-05, - "loss": 0.3685, + "learning_rate": 4.341963131804638e-05, + "loss": 0.278, "step": 3006500 }, { "epoch": 1.8, - "learning_rate": 4.3417669950212813e-05, - "loss": 0.3614, + "learning_rate": 4.341753135248582e-05, + "loss": 0.2792, "step": 3007000 }, { "epoch": 1.8, - "learning_rate": 4.3415569984652254e-05, - "loss": 0.3671, + "learning_rate": 4.3415431386925256e-05, + "loss": 0.2786, "step": 3007500 }, { "epoch": 1.8, - "learning_rate": 4.341347001909169e-05, - "loss": 0.3726, + "learning_rate": 4.341333142136469e-05, + "loss": 0.278, "step": 3008000 }, { "epoch": 1.8, - "learning_rate": 4.341137005353112e-05, - "loss": 0.3607, + "learning_rate": 4.341123565573524e-05, + "loss": 0.2753, "step": 3008500 }, { "epoch": 1.8, - "learning_rate": 4.340927008797056e-05, - "loss": 0.3702, + "learning_rate": 4.340913569017468e-05, + "loss": 0.2804, "step": 3009000 }, { "epoch": 1.8, - "learning_rate": 4.3407170122409994e-05, - "loss": 0.3692, + "learning_rate": 4.340703572461412e-05, + "loss": 0.2801, "step": 3009500 }, { "epoch": 1.8, - "learning_rate": 4.340507015684943e-05, - "loss": 0.3638, + "learning_rate": 4.340493575905355e-05, + "loss": 0.277, "step": 3010000 }, { "epoch": 1.8, - "learning_rate": 4.340297439121998e-05, - "loss": 0.3654, + "learning_rate": 4.3402839993424104e-05, + "loss": 0.28, "step": 3010500 }, { "epoch": 1.81, - "learning_rate": 4.340087442565942e-05, - "loss": 0.366, + "learning_rate": 4.3400740027863544e-05, + "loss": 0.2714, "step": 3011000 }, { "epoch": 1.81, - "learning_rate": 4.3398778660029975e-05, - "loss": 0.3588, + "learning_rate": 4.339864006230298e-05, + "loss": 0.2705, "step": 3011500 }, { "epoch": 1.81, - "learning_rate": 4.339667869446941e-05, - "loss": 0.3627, + "learning_rate": 4.339654009674241e-05, + "loss": 0.275, "step": 3012000 }, { "epoch": 1.81, - "learning_rate": 4.339457872890885e-05, - "loss": 0.3741, + "learning_rate": 4.339444013118185e-05, + "loss": 0.2807, "step": 3012500 }, { "epoch": 1.81, - "learning_rate": 4.339247876334828e-05, - "loss": 0.3652, + "learning_rate": 4.3392344365552405e-05, + "loss": 0.2761, "step": 3013000 }, { "epoch": 1.81, - "learning_rate": 4.3390378797787716e-05, - "loss": 0.3645, + "learning_rate": 4.339024859992296e-05, + "loss": 0.2765, "step": 3013500 }, { "epoch": 1.81, - "learning_rate": 4.338828303215827e-05, - "loss": 0.3618, + "learning_rate": 4.338814863436239e-05, + "loss": 0.2775, "step": 3014000 }, { "epoch": 1.81, - "learning_rate": 4.338618306659771e-05, - "loss": 0.3573, + "learning_rate": 4.338604866880183e-05, + "loss": 0.2823, "step": 3014500 }, { "epoch": 1.81, - "learning_rate": 4.338408310103714e-05, - "loss": 0.3636, + "learning_rate": 4.3383948703241265e-05, + "loss": 0.2818, "step": 3015000 }, { "epoch": 1.81, - "learning_rate": 4.3381983135476577e-05, - "loss": 0.361, + "learning_rate": 4.33818487376807e-05, + "loss": 0.2782, "step": 3015500 }, { "epoch": 1.81, - "learning_rate": 4.337988316991602e-05, - "loss": 0.3621, + "learning_rate": 4.337974877212014e-05, + "loss": 0.2751, "step": 3016000 }, { "epoch": 1.81, - "learning_rate": 4.337778320435545e-05, - "loss": 0.3697, + "learning_rate": 4.337764880655957e-05, + "loss": 0.2789, "step": 3016500 }, { "epoch": 1.81, - "learning_rate": 4.3375687438726004e-05, - "loss": 0.3705, + "learning_rate": 4.3375553040930126e-05, + "loss": 0.2838, "step": 3017000 }, { "epoch": 1.81, - "learning_rate": 4.337358747316544e-05, - "loss": 0.3675, + "learning_rate": 4.337345307536956e-05, + "loss": 0.2779, "step": 3017500 }, { "epoch": 1.81, - "learning_rate": 4.337148750760488e-05, - "loss": 0.3651, + "learning_rate": 4.3371353109809e-05, + "loss": 0.2766, "step": 3018000 }, { "epoch": 1.81, - "learning_rate": 4.336938754204431e-05, - "loss": 0.3582, + "learning_rate": 4.336925314424843e-05, + "loss": 0.2733, "step": 3018500 }, { "epoch": 1.81, - "learning_rate": 4.3367287576483744e-05, - "loss": 0.3654, + "learning_rate": 4.336715317868787e-05, + "loss": 0.2816, "step": 3019000 }, { "epoch": 1.81, - "learning_rate": 4.3365187610923185e-05, - "loss": 0.3651, + "learning_rate": 4.336505321312731e-05, + "loss": 0.2768, "step": 3019500 }, { "epoch": 1.81, - "learning_rate": 4.336308764536261e-05, - "loss": 0.3646, + "learning_rate": 4.336295324756674e-05, + "loss": 0.2755, "step": 3020000 }, { "epoch": 1.81, - "learning_rate": 4.336099187973317e-05, - "loss": 0.3709, + "learning_rate": 4.336085328200618e-05, + "loss": 0.2813, "step": 3020500 }, { "epoch": 1.81, - "learning_rate": 4.335889191417261e-05, - "loss": 0.369, + "learning_rate": 4.3358757516376734e-05, + "loss": 0.2772, "step": 3021000 }, { "epoch": 1.81, - "learning_rate": 4.3356791948612045e-05, - "loss": 0.3687, + "learning_rate": 4.335666175074729e-05, + "loss": 0.28, "step": 3021500 }, { "epoch": 1.81, - "learning_rate": 4.335469198305148e-05, - "loss": 0.369, + "learning_rate": 4.335456178518672e-05, + "loss": 0.2757, "step": 3022000 }, { "epoch": 1.81, - "learning_rate": 4.335259201749091e-05, - "loss": 0.3658, + "learning_rate": 4.3352461819626155e-05, + "loss": 0.2736, "step": 3022500 }, { "epoch": 1.81, - "learning_rate": 4.3350492051930346e-05, - "loss": 0.3638, + "learning_rate": 4.3350361854065595e-05, + "loss": 0.2771, "step": 3023000 }, { "epoch": 1.81, - "learning_rate": 4.334839208636978e-05, - "loss": 0.3672, + "learning_rate": 4.334826188850503e-05, + "loss": 0.2772, "step": 3023500 }, { "epoch": 1.81, - "learning_rate": 4.334629212080922e-05, - "loss": 0.3647, + "learning_rate": 4.334616192294446e-05, + "loss": 0.2749, "step": 3024000 }, { "epoch": 1.81, - "learning_rate": 4.334419635517977e-05, - "loss": 0.3729, + "learning_rate": 4.33440619573839e-05, + "loss": 0.2802, "step": 3024500 }, { "epoch": 1.81, - "learning_rate": 4.3342096389619206e-05, - "loss": 0.3612, + "learning_rate": 4.3341961991823336e-05, + "loss": 0.2797, "step": 3025000 }, { "epoch": 1.81, - "learning_rate": 4.333999642405864e-05, - "loss": 0.3654, + "learning_rate": 4.333986202626276e-05, + "loss": 0.278, "step": 3025500 }, { "epoch": 1.81, - "learning_rate": 4.333789645849808e-05, - "loss": 0.3728, + "learning_rate": 4.333776626063333e-05, + "loss": 0.2804, "step": 3026000 }, { "epoch": 1.81, - "learning_rate": 4.333580069286864e-05, - "loss": 0.3656, + "learning_rate": 4.333566629507276e-05, + "loss": 0.2748, "step": 3026500 }, { "epoch": 1.81, - "learning_rate": 4.3333704927239194e-05, - "loss": 0.3666, + "learning_rate": 4.3333570529443316e-05, + "loss": 0.2803, "step": 3027000 }, { "epoch": 1.82, - "learning_rate": 4.333160496167863e-05, - "loss": 0.3629, + "learning_rate": 4.333147056388275e-05, + "loss": 0.2727, "step": 3027500 }, { "epoch": 1.82, - "learning_rate": 4.332950499611807e-05, - "loss": 0.3668, + "learning_rate": 4.332937059832219e-05, + "loss": 0.2766, "step": 3028000 }, { "epoch": 1.82, - "learning_rate": 4.33274050305575e-05, - "loss": 0.3577, + "learning_rate": 4.3327270632761624e-05, + "loss": 0.2734, "step": 3028500 }, { "epoch": 1.82, - "learning_rate": 4.3325305064996935e-05, - "loss": 0.3645, + "learning_rate": 4.332517486713218e-05, + "loss": 0.2768, "step": 3029000 }, { "epoch": 1.82, - "learning_rate": 4.332320509943637e-05, - "loss": 0.3549, + "learning_rate": 4.332307490157161e-05, + "loss": 0.2762, "step": 3029500 }, { "epoch": 1.82, - "learning_rate": 4.33211051338758e-05, - "loss": 0.3721, + "learning_rate": 4.332097493601105e-05, + "loss": 0.2778, "step": 3030000 }, { "epoch": 1.82, - "learning_rate": 4.3319005168315235e-05, - "loss": 0.3678, + "learning_rate": 4.3318874970450484e-05, + "loss": 0.2804, "step": 3030500 }, { "epoch": 1.82, - "learning_rate": 4.3316909402685795e-05, - "loss": 0.3722, + "learning_rate": 4.331677500488992e-05, + "loss": 0.2799, "step": 3031000 }, { "epoch": 1.82, - "learning_rate": 4.331481363705635e-05, - "loss": 0.3631, + "learning_rate": 4.331467503932936e-05, + "loss": 0.2723, "step": 3031500 }, { "epoch": 1.82, - "learning_rate": 4.331271367149579e-05, - "loss": 0.3632, + "learning_rate": 4.331257507376879e-05, + "loss": 0.2757, "step": 3032000 }, { "epoch": 1.82, - "learning_rate": 4.331061370593522e-05, - "loss": 0.3625, + "learning_rate": 4.3310475108208225e-05, + "loss": 0.2714, "step": 3032500 }, { "epoch": 1.82, - "learning_rate": 4.3308513740374656e-05, - "loss": 0.3617, + "learning_rate": 4.3308379342578785e-05, + "loss": 0.2786, "step": 3033000 }, { "epoch": 1.82, - "learning_rate": 4.3306413774814096e-05, - "loss": 0.3584, + "learning_rate": 4.330627937701822e-05, + "loss": 0.2706, "step": 3033500 }, { "epoch": 1.82, - "learning_rate": 4.330431800918465e-05, - "loss": 0.3618, + "learning_rate": 4.330417941145765e-05, + "loss": 0.2697, "step": 3034000 }, { "epoch": 1.82, - "learning_rate": 4.3302218043624083e-05, - "loss": 0.3632, + "learning_rate": 4.330207944589709e-05, + "loss": 0.2796, "step": 3034500 }, { "epoch": 1.82, - "learning_rate": 4.3300118078063524e-05, - "loss": 0.3663, + "learning_rate": 4.3299983680267646e-05, + "loss": 0.2763, "step": 3035000 }, { "epoch": 1.82, - "learning_rate": 4.329801811250296e-05, - "loss": 0.3667, + "learning_rate": 4.329788371470708e-05, + "loss": 0.2833, "step": 3035500 }, { "epoch": 1.82, - "learning_rate": 4.329591814694239e-05, - "loss": 0.3646, + "learning_rate": 4.329578374914651e-05, + "loss": 0.2784, "step": 3036000 }, { "epoch": 1.82, - "learning_rate": 4.3293818181381824e-05, - "loss": 0.3596, + "learning_rate": 4.329368378358595e-05, + "loss": 0.2746, "step": 3036500 }, { "epoch": 1.82, - "learning_rate": 4.329171821582126e-05, - "loss": 0.3682, + "learning_rate": 4.329158381802539e-05, + "loss": 0.2829, "step": 3037000 }, { "epoch": 1.82, - "learning_rate": 4.328961825026069e-05, - "loss": 0.3617, + "learning_rate": 4.328948805239594e-05, + "loss": 0.2775, "step": 3037500 }, { "epoch": 1.82, - "learning_rate": 4.328752248463125e-05, - "loss": 0.3611, + "learning_rate": 4.3287388086835374e-05, + "loss": 0.2793, "step": 3038000 }, { "epoch": 1.82, - "learning_rate": 4.328542251907069e-05, - "loss": 0.3604, + "learning_rate": 4.3285288121274814e-05, + "loss": 0.2786, "step": 3038500 }, { "epoch": 1.82, - "learning_rate": 4.328332255351012e-05, - "loss": 0.3735, + "learning_rate": 4.328318815571425e-05, + "loss": 0.2763, "step": 3039000 }, { "epoch": 1.82, - "learning_rate": 4.328122258794955e-05, - "loss": 0.3686, + "learning_rate": 4.32810923900848e-05, + "loss": 0.2784, "step": 3039500 }, { "epoch": 1.82, - "learning_rate": 4.327912682232011e-05, - "loss": 0.3661, + "learning_rate": 4.327899242452424e-05, + "loss": 0.2795, "step": 3040000 }, { "epoch": 1.82, - "learning_rate": 4.327702685675955e-05, - "loss": 0.3717, + "learning_rate": 4.3276892458963675e-05, + "loss": 0.274, "step": 3040500 }, { "epoch": 1.82, - "learning_rate": 4.3274926891198986e-05, - "loss": 0.3689, + "learning_rate": 4.327479249340311e-05, + "loss": 0.2765, "step": 3041000 }, { "epoch": 1.82, - "learning_rate": 4.327282692563842e-05, - "loss": 0.3657, + "learning_rate": 4.327269252784255e-05, + "loss": 0.2808, "step": 3041500 }, { "epoch": 1.82, - "learning_rate": 4.327072696007785e-05, - "loss": 0.3566, + "learning_rate": 4.3270600962144215e-05, + "loss": 0.2747, "step": 3042000 }, { "epoch": 1.82, - "learning_rate": 4.326863119444841e-05, - "loss": 0.3692, + "learning_rate": 4.3268500996583655e-05, + "loss": 0.2737, "step": 3042500 }, { "epoch": 1.82, - "learning_rate": 4.3266531228887846e-05, - "loss": 0.3767, + "learning_rate": 4.326640103102309e-05, + "loss": 0.2809, "step": 3043000 }, { "epoch": 1.82, - "learning_rate": 4.326443126332729e-05, - "loss": 0.368, + "learning_rate": 4.326430106546252e-05, + "loss": 0.2769, "step": 3043500 }, { "epoch": 1.83, - "learning_rate": 4.326233129776671e-05, - "loss": 0.3617, + "learning_rate": 4.326220109990196e-05, + "loss": 0.2782, "step": 3044000 }, { "epoch": 1.83, - "learning_rate": 4.3260235532137274e-05, - "loss": 0.365, + "learning_rate": 4.3260101134341396e-05, + "loss": 0.2783, "step": 3044500 }, { "epoch": 1.83, - "learning_rate": 4.325813556657671e-05, - "loss": 0.3707, + "learning_rate": 4.325800116878083e-05, + "loss": 0.278, "step": 3045000 }, { "epoch": 1.83, - "learning_rate": 4.325603560101615e-05, - "loss": 0.3662, + "learning_rate": 4.325590120322027e-05, + "loss": 0.2734, "step": 3045500 }, { "epoch": 1.83, - "learning_rate": 4.3253935635455574e-05, - "loss": 0.3633, + "learning_rate": 4.325380543759082e-05, + "loss": 0.2752, "step": 3046000 }, { "epoch": 1.83, - "learning_rate": 4.325183566989501e-05, - "loss": 0.3681, + "learning_rate": 4.325170547203026e-05, + "loss": 0.2757, "step": 3046500 }, { "epoch": 1.83, - "learning_rate": 4.324973570433445e-05, - "loss": 0.3525, + "learning_rate": 4.32496055064697e-05, + "loss": 0.2686, "step": 3047000 }, { "epoch": 1.83, - "learning_rate": 4.324763573877388e-05, - "loss": 0.3574, + "learning_rate": 4.324750554090913e-05, + "loss": 0.275, "step": 3047500 }, { "epoch": 1.83, - "learning_rate": 4.3245535773213315e-05, - "loss": 0.3609, + "learning_rate": 4.3245405575348564e-05, + "loss": 0.2782, "step": 3048000 }, { "epoch": 1.83, - "learning_rate": 4.3243440007583875e-05, - "loss": 0.367, + "learning_rate": 4.324330980971912e-05, + "loss": 0.2809, "step": 3048500 }, { "epoch": 1.83, - "learning_rate": 4.324134004202331e-05, - "loss": 0.3698, + "learning_rate": 4.324120984415856e-05, + "loss": 0.2794, "step": 3049000 }, { "epoch": 1.83, - "learning_rate": 4.323924007646274e-05, - "loss": 0.3675, + "learning_rate": 4.323910987859799e-05, + "loss": 0.2809, "step": 3049500 }, { "epoch": 1.83, - "learning_rate": 4.323714011090218e-05, - "loss": 0.3651, + "learning_rate": 4.3237009913037425e-05, + "loss": 0.2756, "step": 3050000 }, { "epoch": 1.83, - "learning_rate": 4.323504434527274e-05, - "loss": 0.3621, + "learning_rate": 4.3234909947476865e-05, + "loss": 0.2795, "step": 3050500 }, { "epoch": 1.83, - "learning_rate": 4.323294437971217e-05, - "loss": 0.3752, + "learning_rate": 4.32328099819163e-05, + "loss": 0.2751, "step": 3051000 }, { "epoch": 1.83, - "learning_rate": 4.32308444141516e-05, - "loss": 0.3592, + "learning_rate": 4.323071001635573e-05, + "loss": 0.2768, "step": 3051500 }, { "epoch": 1.83, - "learning_rate": 4.322874444859104e-05, - "loss": 0.362, + "learning_rate": 4.3228610050795165e-05, + "loss": 0.2753, "step": 3052000 }, { "epoch": 1.83, - "learning_rate": 4.32266486829616e-05, - "loss": 0.371, + "learning_rate": 4.32265100852346e-05, + "loss": 0.2796, "step": 3052500 }, { "epoch": 1.83, - "learning_rate": 4.322455291733216e-05, - "loss": 0.364, + "learning_rate": 4.322441431960516e-05, + "loss": 0.2727, "step": 3053000 }, { "epoch": 1.83, - "learning_rate": 4.322245295177159e-05, - "loss": 0.3675, + "learning_rate": 4.322231435404459e-05, + "loss": 0.2776, "step": 3053500 }, { "epoch": 1.83, - "learning_rate": 4.3220352986211024e-05, - "loss": 0.3654, + "learning_rate": 4.322021858841515e-05, + "loss": 0.2805, "step": 3054000 }, { "epoch": 1.83, - "learning_rate": 4.3218253020650464e-05, - "loss": 0.3696, + "learning_rate": 4.3218118622854586e-05, + "loss": 0.281, "step": 3054500 }, { "epoch": 1.83, - "learning_rate": 4.32161530550899e-05, - "loss": 0.3732, + "learning_rate": 4.321601865729402e-05, + "loss": 0.2785, "step": 3055000 }, { "epoch": 1.83, - "learning_rate": 4.321405308952933e-05, - "loss": 0.3682, + "learning_rate": 4.321391869173346e-05, + "loss": 0.2766, "step": 3055500 }, { "epoch": 1.83, - "learning_rate": 4.3211953123968764e-05, - "loss": 0.3695, + "learning_rate": 4.3211818726172894e-05, + "loss": 0.2829, "step": 3056000 }, { "epoch": 1.83, - "learning_rate": 4.3209857358339325e-05, - "loss": 0.3646, + "learning_rate": 4.320972296054345e-05, + "loss": 0.2781, "step": 3056500 }, { "epoch": 1.83, - "learning_rate": 4.320775739277876e-05, - "loss": 0.3692, + "learning_rate": 4.320762299498288e-05, + "loss": 0.2824, "step": 3057000 }, { "epoch": 1.83, - "learning_rate": 4.32056574272182e-05, - "loss": 0.3699, + "learning_rate": 4.320552302942232e-05, + "loss": 0.2769, "step": 3057500 }, { "epoch": 1.83, - "learning_rate": 4.3203557461657625e-05, - "loss": 0.3679, + "learning_rate": 4.3203423063861754e-05, + "loss": 0.2791, "step": 3058000 }, { "epoch": 1.83, - "learning_rate": 4.320145749609706e-05, - "loss": 0.3557, + "learning_rate": 4.320132309830119e-05, + "loss": 0.2719, "step": 3058500 }, { "epoch": 1.83, - "learning_rate": 4.31993575305365e-05, - "loss": 0.3651, + "learning_rate": 4.319922313274062e-05, + "loss": 0.2794, "step": 3059000 }, { "epoch": 1.83, - "learning_rate": 4.319726176490706e-05, - "loss": 0.3683, + "learning_rate": 4.3197123167180055e-05, + "loss": 0.2755, "step": 3059500 }, { "epoch": 1.83, - "learning_rate": 4.319516179934649e-05, - "loss": 0.3693, + "learning_rate": 4.319502320161949e-05, + "loss": 0.2782, "step": 3060000 }, { "epoch": 1.83, - "learning_rate": 4.319306183378592e-05, - "loss": 0.3642, + "learning_rate": 4.319292743599005e-05, + "loss": 0.2761, "step": 3060500 }, { "epoch": 1.84, - "learning_rate": 4.319096186822536e-05, - "loss": 0.3594, + "learning_rate": 4.319082747042949e-05, + "loss": 0.2798, "step": 3061000 }, { "epoch": 1.84, - "learning_rate": 4.318886190266479e-05, - "loss": 0.3574, + "learning_rate": 4.3188727504868915e-05, + "loss": 0.2725, "step": 3061500 }, { "epoch": 1.84, - "learning_rate": 4.3186761937104226e-05, - "loss": 0.3714, + "learning_rate": 4.3186627539308356e-05, + "loss": 0.2777, "step": 3062000 }, { "epoch": 1.84, - "learning_rate": 4.318466197154367e-05, - "loss": 0.3619, + "learning_rate": 4.3184531773678916e-05, + "loss": 0.2715, "step": 3062500 }, { "epoch": 1.84, - "learning_rate": 4.31825620059831e-05, - "loss": 0.381, + "learning_rate": 4.318243180811835e-05, + "loss": 0.2814, "step": 3063000 }, { "epoch": 1.84, - "learning_rate": 4.3180466240353654e-05, - "loss": 0.371, + "learning_rate": 4.318033184255778e-05, + "loss": 0.2843, "step": 3063500 }, { "epoch": 1.84, - "learning_rate": 4.3178366274793094e-05, - "loss": 0.3656, + "learning_rate": 4.3178231876997216e-05, + "loss": 0.2718, "step": 3064000 }, { "epoch": 1.84, - "learning_rate": 4.317626630923253e-05, - "loss": 0.3694, + "learning_rate": 4.317613191143665e-05, + "loss": 0.2771, "step": 3064500 }, { "epoch": 1.84, - "learning_rate": 4.317417054360308e-05, - "loss": 0.3648, + "learning_rate": 4.317403614580721e-05, + "loss": 0.2771, "step": 3065000 }, { "epoch": 1.84, - "learning_rate": 4.3172070578042514e-05, - "loss": 0.3625, + "learning_rate": 4.3171936180246644e-05, + "loss": 0.2739, "step": 3065500 }, { "epoch": 1.84, - "learning_rate": 4.3169970612481955e-05, - "loss": 0.3717, + "learning_rate": 4.31698404146172e-05, + "loss": 0.2847, "step": 3066000 }, { "epoch": 1.84, - "learning_rate": 4.316787064692139e-05, - "loss": 0.3621, + "learning_rate": 4.316774464898776e-05, + "loss": 0.2746, "step": 3066500 }, { "epoch": 1.84, - "learning_rate": 4.316577068136082e-05, - "loss": 0.3668, + "learning_rate": 4.316564468342719e-05, + "loss": 0.2768, "step": 3067000 }, { "epoch": 1.84, - "learning_rate": 4.316367071580026e-05, - "loss": 0.3585, + "learning_rate": 4.3163544717866624e-05, + "loss": 0.2776, "step": 3067500 }, { "epoch": 1.84, - "learning_rate": 4.3161570750239695e-05, - "loss": 0.3652, + "learning_rate": 4.3161444752306065e-05, + "loss": 0.2761, "step": 3068000 }, { "epoch": 1.84, - "learning_rate": 4.315947078467913e-05, - "loss": 0.363, + "learning_rate": 4.31593447867455e-05, + "loss": 0.2772, "step": 3068500 }, { "epoch": 1.84, - "learning_rate": 4.315737501904968e-05, - "loss": 0.3662, + "learning_rate": 4.315724482118493e-05, + "loss": 0.2813, "step": 3069000 }, { "epoch": 1.84, - "learning_rate": 4.315527505348912e-05, - "loss": 0.3648, + "learning_rate": 4.315514485562437e-05, + "loss": 0.2795, "step": 3069500 }, { "epoch": 1.84, - "learning_rate": 4.3153175087928556e-05, - "loss": 0.3615, + "learning_rate": 4.3153044890063805e-05, + "loss": 0.2757, "step": 3070000 }, { "epoch": 1.84, - "learning_rate": 4.3151075122367996e-05, - "loss": 0.3704, + "learning_rate": 4.315094492450324e-05, + "loss": 0.2746, "step": 3070500 }, { "epoch": 1.84, - "learning_rate": 4.314897515680743e-05, - "loss": 0.3584, + "learning_rate": 4.314884495894267e-05, + "loss": 0.2743, "step": 3071000 }, { "epoch": 1.84, - "learning_rate": 4.314687519124686e-05, - "loss": 0.3649, + "learning_rate": 4.3146744993382106e-05, + "loss": 0.2772, "step": 3071500 }, { "epoch": 1.84, - "learning_rate": 4.3144775225686303e-05, - "loss": 0.3794, + "learning_rate": 4.314464502782154e-05, + "loss": 0.2809, "step": 3072000 }, { "epoch": 1.84, - "learning_rate": 4.314267526012574e-05, - "loss": 0.3587, + "learning_rate": 4.314254506226098e-05, + "loss": 0.272, "step": 3072500 }, { "epoch": 1.84, - "learning_rate": 4.3140575294565164e-05, - "loss": 0.3716, + "learning_rate": 4.314044929663154e-05, + "loss": 0.2756, "step": 3073000 }, { "epoch": 1.84, - "learning_rate": 4.3138479528935724e-05, - "loss": 0.3622, + "learning_rate": 4.3138349331070966e-05, + "loss": 0.2785, "step": 3073500 }, { "epoch": 1.84, - "learning_rate": 4.313638376330628e-05, - "loss": 0.3676, + "learning_rate": 4.31362493655104e-05, + "loss": 0.2774, "step": 3074000 }, { "epoch": 1.84, - "learning_rate": 4.313428379774572e-05, - "loss": 0.3733, + "learning_rate": 4.313414939994984e-05, + "loss": 0.278, "step": 3074500 }, { "epoch": 1.84, - "learning_rate": 4.313218383218515e-05, - "loss": 0.3585, + "learning_rate": 4.31320536343204e-05, + "loss": 0.2734, "step": 3075000 }, { "epoch": 1.84, - "learning_rate": 4.3130083866624585e-05, - "loss": 0.3638, + "learning_rate": 4.312995366875983e-05, + "loss": 0.2806, "step": 3075500 }, { "epoch": 1.84, - "learning_rate": 4.3127983901064025e-05, - "loss": 0.3549, + "learning_rate": 4.312785370319927e-05, + "loss": 0.2717, "step": 3076000 }, { "epoch": 1.84, - "learning_rate": 4.312588813543458e-05, - "loss": 0.3635, + "learning_rate": 4.31257537376387e-05, + "loss": 0.2734, "step": 3076500 }, { "epoch": 1.84, - "learning_rate": 4.312378816987401e-05, - "loss": 0.3573, + "learning_rate": 4.312365797200926e-05, + "loss": 0.2732, "step": 3077000 }, { "epoch": 1.85, - "learning_rate": 4.312168820431345e-05, - "loss": 0.3663, + "learning_rate": 4.3121562206379815e-05, + "loss": 0.2779, "step": 3077500 }, { "epoch": 1.85, - "learning_rate": 4.3119588238752886e-05, - "loss": 0.3595, + "learning_rate": 4.311946224081925e-05, + "loss": 0.2736, "step": 3078000 }, { "epoch": 1.85, - "learning_rate": 4.311748827319232e-05, - "loss": 0.3599, + "learning_rate": 4.31173664751898e-05, + "loss": 0.2779, "step": 3078500 }, { "epoch": 1.85, - "learning_rate": 4.311538830763176e-05, - "loss": 0.3741, + "learning_rate": 4.311526650962924e-05, + "loss": 0.2829, "step": 3079000 }, { "epoch": 1.85, - "learning_rate": 4.311328834207119e-05, - "loss": 0.3584, + "learning_rate": 4.3113166544068675e-05, + "loss": 0.276, "step": 3079500 }, { "epoch": 1.85, - "learning_rate": 4.3111188376510626e-05, - "loss": 0.3696, + "learning_rate": 4.311106657850811e-05, + "loss": 0.2808, "step": 3080000 }, { "epoch": 1.85, - "learning_rate": 4.310909261088118e-05, - "loss": 0.3685, + "learning_rate": 4.310896661294755e-05, + "loss": 0.282, "step": 3080500 }, { "epoch": 1.85, - "learning_rate": 4.310699264532062e-05, - "loss": 0.3599, + "learning_rate": 4.310686664738698e-05, + "loss": 0.2749, "step": 3081000 }, { "epoch": 1.85, - "learning_rate": 4.3104892679760053e-05, - "loss": 0.3623, + "learning_rate": 4.3104766681826416e-05, + "loss": 0.2753, "step": 3081500 }, { "epoch": 1.85, - "learning_rate": 4.310279691413061e-05, - "loss": 0.3563, + "learning_rate": 4.3102666716265856e-05, + "loss": 0.273, "step": 3082000 }, { "epoch": 1.85, - "learning_rate": 4.310070114850116e-05, - "loss": 0.3811, + "learning_rate": 4.310056675070529e-05, + "loss": 0.2762, "step": 3082500 }, { "epoch": 1.85, - "learning_rate": 4.30986011829406e-05, - "loss": 0.3751, + "learning_rate": 4.309846678514472e-05, + "loss": 0.2765, "step": 3083000 }, { "epoch": 1.85, - "learning_rate": 4.3096501217380034e-05, - "loss": 0.3695, + "learning_rate": 4.309636681958416e-05, + "loss": 0.2846, "step": 3083500 }, { "epoch": 1.85, - "learning_rate": 4.309440125181947e-05, - "loss": 0.3691, + "learning_rate": 4.309426685402359e-05, + "loss": 0.2735, "step": 3084000 }, { "epoch": 1.85, - "learning_rate": 4.309230128625891e-05, - "loss": 0.3605, + "learning_rate": 4.309217108839415e-05, + "loss": 0.2724, "step": 3084500 }, { "epoch": 1.85, - "learning_rate": 4.309020552062946e-05, - "loss": 0.3765, + "learning_rate": 4.3090071122833584e-05, + "loss": 0.2753, "step": 3085000 }, { "epoch": 1.85, - "learning_rate": 4.3088105555068895e-05, - "loss": 0.3678, + "learning_rate": 4.308797115727302e-05, + "loss": 0.2773, "step": 3085500 }, { "epoch": 1.85, - "learning_rate": 4.308600558950833e-05, - "loss": 0.3685, + "learning_rate": 4.308587119171245e-05, + "loss": 0.2803, "step": 3086000 }, { "epoch": 1.85, - "learning_rate": 4.308390982387888e-05, - "loss": 0.3702, + "learning_rate": 4.308377122615189e-05, + "loss": 0.2694, "step": 3086500 }, { "epoch": 1.85, - "learning_rate": 4.308180985831832e-05, - "loss": 0.3655, + "learning_rate": 4.308167546052245e-05, + "loss": 0.2745, "step": 3087000 }, { "epoch": 1.85, - "learning_rate": 4.3079709892757756e-05, - "loss": 0.3715, + "learning_rate": 4.307957549496188e-05, + "loss": 0.2779, "step": 3087500 }, { "epoch": 1.85, - "learning_rate": 4.307760992719719e-05, - "loss": 0.3687, + "learning_rate": 4.307747972933244e-05, + "loss": 0.2797, "step": 3088000 }, { "epoch": 1.85, - "learning_rate": 4.307550996163663e-05, - "loss": 0.3619, + "learning_rate": 4.307537976377187e-05, + "loss": 0.2758, "step": 3088500 }, { "epoch": 1.85, - "learning_rate": 4.307340999607606e-05, - "loss": 0.3563, + "learning_rate": 4.307327979821131e-05, + "loss": 0.2732, "step": 3089000 }, { "epoch": 1.85, - "learning_rate": 4.3071310030515496e-05, - "loss": 0.3667, + "learning_rate": 4.3071179832650746e-05, + "loss": 0.2781, "step": 3089500 }, { "epoch": 1.85, - "learning_rate": 4.306921006495494e-05, - "loss": 0.3672, + "learning_rate": 4.306907986709018e-05, + "loss": 0.2735, "step": 3090000 }, { "epoch": 1.85, - "learning_rate": 4.306711009939437e-05, - "loss": 0.359, + "learning_rate": 4.306697990152961e-05, + "loss": 0.276, "step": 3090500 }, { "epoch": 1.85, - "learning_rate": 4.3065010133833804e-05, - "loss": 0.3561, + "learning_rate": 4.3064879935969046e-05, + "loss": 0.2735, "step": 3091000 }, { "epoch": 1.85, - "learning_rate": 4.3062910168273244e-05, - "loss": 0.3606, + "learning_rate": 4.3062779970408486e-05, + "loss": 0.2703, "step": 3091500 }, { "epoch": 1.85, - "learning_rate": 4.306081020271267e-05, - "loss": 0.3678, + "learning_rate": 4.306068000484792e-05, + "loss": 0.2777, "step": 3092000 }, { "epoch": 1.85, - "learning_rate": 4.305871023715211e-05, - "loss": 0.3779, + "learning_rate": 4.305858423921847e-05, + "loss": 0.2782, "step": 3092500 }, { "epoch": 1.85, - "learning_rate": 4.305661447152267e-05, - "loss": 0.3684, + "learning_rate": 4.305648427365791e-05, + "loss": 0.2798, "step": 3093000 }, { "epoch": 1.85, - "learning_rate": 4.3054514505962105e-05, - "loss": 0.3628, + "learning_rate": 4.305438430809735e-05, + "loss": 0.2743, "step": 3093500 }, { "epoch": 1.85, - "learning_rate": 4.305241454040154e-05, - "loss": 0.3605, + "learning_rate": 4.305228434253678e-05, + "loss": 0.2759, "step": 3094000 }, { "epoch": 1.86, - "learning_rate": 4.305031457484097e-05, - "loss": 0.3689, + "learning_rate": 4.305018857690734e-05, + "loss": 0.2796, "step": 3094500 }, { "epoch": 1.86, - "learning_rate": 4.3048214609280405e-05, - "loss": 0.3709, + "learning_rate": 4.304808861134677e-05, + "loss": 0.2772, "step": 3095000 }, { "epoch": 1.86, - "learning_rate": 4.304611464371984e-05, - "loss": 0.3575, + "learning_rate": 4.304599284571733e-05, + "loss": 0.2741, "step": 3095500 }, { "epoch": 1.86, - "learning_rate": 4.304401467815928e-05, - "loss": 0.363, + "learning_rate": 4.304389288015677e-05, + "loss": 0.281, "step": 3096000 }, { "epoch": 1.86, - "learning_rate": 4.304191891252984e-05, - "loss": 0.3572, + "learning_rate": 4.30417929145962e-05, + "loss": 0.2743, "step": 3096500 }, { "epoch": 1.86, - "learning_rate": 4.3039818946969266e-05, - "loss": 0.3657, + "learning_rate": 4.3039692949035635e-05, + "loss": 0.2801, "step": 3097000 }, { "epoch": 1.86, - "learning_rate": 4.30377189814087e-05, - "loss": 0.3703, + "learning_rate": 4.3037597183406195e-05, + "loss": 0.276, "step": 3097500 }, { "epoch": 1.86, - "learning_rate": 4.303561901584814e-05, - "loss": 0.3627, + "learning_rate": 4.303549721784563e-05, + "loss": 0.2759, "step": 3098000 }, { "epoch": 1.86, - "learning_rate": 4.303351905028757e-05, - "loss": 0.3634, + "learning_rate": 4.303339725228506e-05, + "loss": 0.2773, "step": 3098500 }, { "epoch": 1.86, - "learning_rate": 4.3031419084727006e-05, - "loss": 0.3626, + "learning_rate": 4.30312972867245e-05, + "loss": 0.2748, "step": 3099000 }, { "epoch": 1.86, - "learning_rate": 4.3029319119166446e-05, - "loss": 0.3567, + "learning_rate": 4.302919732116393e-05, + "loss": 0.2706, "step": 3099500 }, { "epoch": 1.86, - "learning_rate": 4.302721915360588e-05, - "loss": 0.3623, + "learning_rate": 4.302709735560336e-05, + "loss": 0.2716, "step": 3100000 }, { "epoch": 1.86, - "eval_loss": 0.349761039018631, - "eval_runtime": 1142.9955, - "eval_samples_per_second": 460.824, - "eval_steps_per_second": 76.804, + "eval_loss": 0.25133055448532104, + "eval_runtime": 1463.358, + "eval_samples_per_second": 359.939, + "eval_steps_per_second": 59.99, "step": 3100000 }, { "epoch": 1.86, - "learning_rate": 4.3025123387976434e-05, - "loss": 0.3552, + "learning_rate": 4.30249973900428e-05, + "loss": 0.2748, "step": 3100500 }, { "epoch": 1.86, - "learning_rate": 4.3023023422415874e-05, - "loss": 0.3559, + "learning_rate": 4.3022897424482236e-05, + "loss": 0.2667, "step": 3101000 }, { "epoch": 1.86, - "learning_rate": 4.302092765678643e-05, - "loss": 0.3589, + "learning_rate": 4.30208016588528e-05, + "loss": 0.2761, "step": 3101500 }, { "epoch": 1.86, - "learning_rate": 4.301882769122586e-05, - "loss": 0.3675, + "learning_rate": 4.3018701693292223e-05, + "loss": 0.28, "step": 3102000 }, { "epoch": 1.86, - "learning_rate": 4.3016727725665294e-05, - "loss": 0.3617, + "learning_rate": 4.3016601727731664e-05, + "loss": 0.2772, "step": 3102500 }, { "epoch": 1.86, - "learning_rate": 4.3014627760104734e-05, - "loss": 0.3608, + "learning_rate": 4.30145017621711e-05, + "loss": 0.2746, "step": 3103000 }, { "epoch": 1.86, - "learning_rate": 4.301252779454417e-05, - "loss": 0.3632, + "learning_rate": 4.301240179661053e-05, + "loss": 0.2757, "step": 3103500 }, { "epoch": 1.86, - "learning_rate": 4.30104278289836e-05, - "loss": 0.363, + "learning_rate": 4.30103060309811e-05, + "loss": 0.2732, "step": 3104000 }, { "epoch": 1.86, - "learning_rate": 4.300832786342304e-05, - "loss": 0.3634, + "learning_rate": 4.3008206065420524e-05, + "loss": 0.2819, "step": 3104500 }, { "epoch": 1.86, - "learning_rate": 4.3006227897862475e-05, - "loss": 0.3573, + "learning_rate": 4.300610609985996e-05, + "loss": 0.278, "step": 3105000 }, { "epoch": 1.86, - "learning_rate": 4.300413213223303e-05, - "loss": 0.3693, + "learning_rate": 4.30040061342994e-05, + "loss": 0.2804, "step": 3105500 }, { "epoch": 1.86, - "learning_rate": 4.300203216667246e-05, - "loss": 0.3641, + "learning_rate": 4.300191036866996e-05, + "loss": 0.2756, "step": 3106000 }, { "epoch": 1.86, - "learning_rate": 4.29999322011119e-05, - "loss": 0.3681, + "learning_rate": 4.2999810403109385e-05, + "loss": 0.2749, "step": 3106500 }, { "epoch": 1.86, - "learning_rate": 4.2997832235551336e-05, - "loss": 0.3659, + "learning_rate": 4.299771043754882e-05, + "loss": 0.2792, "step": 3107000 }, { "epoch": 1.86, - "learning_rate": 4.299573226999077e-05, - "loss": 0.3571, + "learning_rate": 4.299561047198826e-05, + "loss": 0.276, "step": 3107500 }, { "epoch": 1.86, - "learning_rate": 4.299363650436133e-05, - "loss": 0.3664, + "learning_rate": 4.299351470635882e-05, + "loss": 0.2795, "step": 3108000 }, { "epoch": 1.86, - "learning_rate": 4.299153653880076e-05, - "loss": 0.3693, + "learning_rate": 4.299141474079825e-05, + "loss": 0.2779, "step": 3108500 }, { "epoch": 1.86, - "learning_rate": 4.2989436573240197e-05, - "loss": 0.3684, + "learning_rate": 4.298931477523768e-05, + "loss": 0.2773, "step": 3109000 }, { "epoch": 1.86, - "learning_rate": 4.298733660767964e-05, - "loss": 0.3712, + "learning_rate": 4.298721480967712e-05, + "loss": 0.2805, "step": 3109500 }, { "epoch": 1.86, - "learning_rate": 4.298524084205019e-05, - "loss": 0.3694, + "learning_rate": 4.298511904404768e-05, + "loss": 0.2774, "step": 3110000 }, { "epoch": 1.86, - "learning_rate": 4.2983140876489624e-05, - "loss": 0.365, + "learning_rate": 4.298301907848711e-05, + "loss": 0.2772, "step": 3110500 }, { "epoch": 1.87, - "learning_rate": 4.298104091092906e-05, - "loss": 0.367, + "learning_rate": 4.2980919112926554e-05, + "loss": 0.2802, "step": 3111000 }, { "epoch": 1.87, - "learning_rate": 4.29789409453685e-05, - "loss": 0.3627, + "learning_rate": 4.297881914736598e-05, + "loss": 0.2745, "step": 3111500 }, { "epoch": 1.87, - "learning_rate": 4.297684517973905e-05, - "loss": 0.3503, + "learning_rate": 4.2976719181805414e-05, + "loss": 0.2723, "step": 3112000 }, { "epoch": 1.87, - "learning_rate": 4.2974745214178485e-05, - "loss": 0.3661, + "learning_rate": 4.2974623416175974e-05, + "loss": 0.2775, "step": 3112500 }, { "epoch": 1.87, - "learning_rate": 4.297264524861792e-05, - "loss": 0.3568, + "learning_rate": 4.2972523450615414e-05, + "loss": 0.2685, "step": 3113000 }, { "epoch": 1.87, - "learning_rate": 4.297054528305736e-05, - "loss": 0.3667, + "learning_rate": 4.297042348505485e-05, + "loss": 0.2744, "step": 3113500 }, { "epoch": 1.87, - "learning_rate": 4.296845371735904e-05, - "loss": 0.3558, + "learning_rate": 4.2968323519494274e-05, + "loss": 0.2752, "step": 3114000 }, { "epoch": 1.87, - "learning_rate": 4.296635375179847e-05, - "loss": 0.3668, + "learning_rate": 4.2966227753864835e-05, + "loss": 0.2723, "step": 3114500 }, { "epoch": 1.87, - "learning_rate": 4.2964253786237906e-05, - "loss": 0.366, + "learning_rate": 4.296413198823539e-05, + "loss": 0.2797, "step": 3115000 }, { "epoch": 1.87, - "learning_rate": 4.2962153820677346e-05, - "loss": 0.3623, + "learning_rate": 4.296203202267483e-05, + "loss": 0.2748, "step": 3115500 }, { "epoch": 1.87, - "learning_rate": 4.29600580550479e-05, - "loss": 0.3676, + "learning_rate": 4.295993205711426e-05, + "loss": 0.2781, "step": 3116000 }, { "epoch": 1.87, - "learning_rate": 4.295795808948733e-05, - "loss": 0.3569, + "learning_rate": 4.2957832091553695e-05, + "loss": 0.2696, "step": 3116500 }, { "epoch": 1.87, - "learning_rate": 4.2955858123926766e-05, - "loss": 0.3632, + "learning_rate": 4.2955732125993136e-05, + "loss": 0.274, "step": 3117000 }, { "epoch": 1.87, - "learning_rate": 4.2953758158366207e-05, - "loss": 0.3554, + "learning_rate": 4.295363216043257e-05, + "loss": 0.2776, "step": 3117500 }, { "epoch": 1.87, - "learning_rate": 4.295165819280564e-05, - "loss": 0.3612, + "learning_rate": 4.295153219487201e-05, + "loss": 0.2705, "step": 3118000 }, { "epoch": 1.87, - "learning_rate": 4.294955822724507e-05, - "loss": 0.3687, + "learning_rate": 4.2949432229311436e-05, + "loss": 0.2746, "step": 3118500 }, { "epoch": 1.87, - "learning_rate": 4.294745826168451e-05, - "loss": 0.3688, + "learning_rate": 4.294733226375087e-05, + "loss": 0.28, "step": 3119000 }, { "epoch": 1.87, - "learning_rate": 4.294535829612394e-05, - "loss": 0.3659, + "learning_rate": 4.294523649812143e-05, + "loss": 0.2772, "step": 3119500 }, { "epoch": 1.87, - "learning_rate": 4.29432625304945e-05, - "loss": 0.363, + "learning_rate": 4.294313653256087e-05, + "loss": 0.2773, "step": 3120000 }, { "epoch": 1.87, - "learning_rate": 4.2941162564933934e-05, - "loss": 0.3648, + "learning_rate": 4.2941036567000304e-05, + "loss": 0.2767, "step": 3120500 }, { "epoch": 1.87, - "learning_rate": 4.293906259937337e-05, - "loss": 0.3649, + "learning_rate": 4.293893660143973e-05, + "loss": 0.2748, "step": 3121000 }, { "epoch": 1.87, - "learning_rate": 4.29369626338128e-05, - "loss": 0.3736, + "learning_rate": 4.293684503574141e-05, + "loss": 0.2848, "step": 3121500 }, { "epoch": 1.87, - "learning_rate": 4.293486686818336e-05, - "loss": 0.3695, + "learning_rate": 4.2934745070180844e-05, + "loss": 0.282, "step": 3122000 }, { "epoch": 1.87, - "learning_rate": 4.29327669026228e-05, - "loss": 0.3631, + "learning_rate": 4.2932645104620284e-05, + "loss": 0.2737, "step": 3122500 }, { "epoch": 1.87, - "learning_rate": 4.293066693706223e-05, - "loss": 0.3667, + "learning_rate": 4.293054513905972e-05, + "loss": 0.2757, "step": 3123000 }, { "epoch": 1.87, - "learning_rate": 4.292856697150166e-05, - "loss": 0.3694, + "learning_rate": 4.292844517349916e-05, + "loss": 0.2802, "step": 3123500 }, { "epoch": 1.87, - "learning_rate": 4.29264670059411e-05, - "loss": 0.3731, + "learning_rate": 4.292634520793859e-05, + "loss": 0.2818, "step": 3124000 }, { "epoch": 1.87, - "learning_rate": 4.292437124031166e-05, - "loss": 0.3719, + "learning_rate": 4.2924249442309145e-05, + "loss": 0.2822, "step": 3124500 }, { "epoch": 1.87, - "learning_rate": 4.2922271274751096e-05, - "loss": 0.367, + "learning_rate": 4.292214947674858e-05, + "loss": 0.2835, "step": 3125000 }, { "epoch": 1.87, - "learning_rate": 4.292017130919052e-05, - "loss": 0.373, + "learning_rate": 4.292004951118802e-05, + "loss": 0.2797, "step": 3125500 }, { "epoch": 1.87, - "learning_rate": 4.291807134362996e-05, - "loss": 0.3597, + "learning_rate": 4.291794954562745e-05, + "loss": 0.2722, "step": 3126000 }, { "epoch": 1.87, - "learning_rate": 4.2915971378069396e-05, - "loss": 0.3631, + "learning_rate": 4.2915849580066886e-05, + "loss": 0.2767, "step": 3126500 }, { "epoch": 1.87, - "learning_rate": 4.291387141250883e-05, - "loss": 0.3651, + "learning_rate": 4.2913749614506326e-05, + "loss": 0.2745, "step": 3127000 }, { "epoch": 1.88, - "learning_rate": 4.291177144694827e-05, - "loss": 0.3606, + "learning_rate": 4.291164964894576e-05, + "loss": 0.2749, "step": 3127500 }, { "epoch": 1.88, - "learning_rate": 4.2909671481387703e-05, - "loss": 0.3793, + "learning_rate": 4.2909549683385186e-05, + "loss": 0.2812, "step": 3128000 }, { "epoch": 1.88, - "learning_rate": 4.290757571575826e-05, - "loss": 0.3559, + "learning_rate": 4.2907453917755747e-05, + "loss": 0.2711, "step": 3128500 }, { "epoch": 1.88, - "learning_rate": 4.29054757501977e-05, - "loss": 0.3773, + "learning_rate": 4.290535395219519e-05, + "loss": 0.2814, "step": 3129000 }, { "epoch": 1.88, - "learning_rate": 4.290337578463713e-05, - "loss": 0.3691, + "learning_rate": 4.290325398663462e-05, + "loss": 0.2804, "step": 3129500 }, { "epoch": 1.88, - "learning_rate": 4.2901275819076564e-05, - "loss": 0.3704, + "learning_rate": 4.2901154021074054e-05, + "loss": 0.2799, "step": 3130000 }, { "epoch": 1.88, - "learning_rate": 4.289918005344712e-05, - "loss": 0.3657, + "learning_rate": 4.2899058255444614e-05, + "loss": 0.2782, "step": 3130500 }, { "epoch": 1.88, - "learning_rate": 4.289708008788656e-05, - "loss": 0.3623, + "learning_rate": 4.289695828988405e-05, + "loss": 0.2753, "step": 3131000 }, { "epoch": 1.88, - "learning_rate": 4.289498012232599e-05, - "loss": 0.3633, + "learning_rate": 4.289485832432348e-05, + "loss": 0.2748, "step": 3131500 }, { "epoch": 1.88, - "learning_rate": 4.2892880156765425e-05, - "loss": 0.3637, + "learning_rate": 4.289275835876292e-05, + "loss": 0.2735, "step": 3132000 }, { "epoch": 1.88, - "learning_rate": 4.289078439113598e-05, - "loss": 0.3578, + "learning_rate": 4.2890662593133475e-05, + "loss": 0.2702, "step": 3132500 }, { "epoch": 1.88, - "learning_rate": 4.288868442557542e-05, - "loss": 0.3682, + "learning_rate": 4.288856262757291e-05, + "loss": 0.2793, "step": 3133000 }, { "epoch": 1.88, - "learning_rate": 4.288658446001485e-05, - "loss": 0.3628, + "learning_rate": 4.288646266201234e-05, + "loss": 0.2774, "step": 3133500 }, { "epoch": 1.88, - "learning_rate": 4.2884484494454286e-05, - "loss": 0.3687, + "learning_rate": 4.288436269645178e-05, + "loss": 0.2795, "step": 3134000 }, { "epoch": 1.88, - "learning_rate": 4.2882388728824846e-05, - "loss": 0.3666, + "learning_rate": 4.2882266930822335e-05, + "loss": 0.2776, "step": 3134500 }, { "epoch": 1.88, - "learning_rate": 4.2880292963195406e-05, - "loss": 0.3644, + "learning_rate": 4.288016696526177e-05, + "loss": 0.276, "step": 3135000 }, { "epoch": 1.88, - "learning_rate": 4.287819299763484e-05, - "loss": 0.3679, + "learning_rate": 4.28780669997012e-05, + "loss": 0.279, "step": 3135500 }, { "epoch": 1.88, - "learning_rate": 4.287609303207427e-05, - "loss": 0.3604, + "learning_rate": 4.287596703414064e-05, + "loss": 0.275, "step": 3136000 }, { "epoch": 1.88, - "learning_rate": 4.2873993066513713e-05, - "loss": 0.3736, + "learning_rate": 4.2873867068580076e-05, + "loss": 0.2777, "step": 3136500 }, { "epoch": 1.88, - "learning_rate": 4.287189310095315e-05, - "loss": 0.3549, + "learning_rate": 4.287176710301951e-05, + "loss": 0.2757, "step": 3137000 }, { "epoch": 1.88, - "learning_rate": 4.2869793135392574e-05, - "loss": 0.3671, + "learning_rate": 4.286966713745894e-05, + "loss": 0.2799, "step": 3137500 }, { "epoch": 1.88, - "learning_rate": 4.2867693169832014e-05, - "loss": 0.3621, + "learning_rate": 4.2867567171898376e-05, + "loss": 0.2793, "step": 3138000 }, { "epoch": 1.88, - "learning_rate": 4.286559320427145e-05, - "loss": 0.3526, + "learning_rate": 4.286547140626894e-05, + "loss": 0.2735, "step": 3138500 }, { "epoch": 1.88, - "learning_rate": 4.286349743864201e-05, - "loss": 0.3628, + "learning_rate": 4.286337144070838e-05, + "loss": 0.275, "step": 3139000 }, { "epoch": 1.88, - "learning_rate": 4.286139747308144e-05, - "loss": 0.36, + "learning_rate": 4.286127147514781e-05, + "loss": 0.2715, "step": 3139500 }, { "epoch": 1.88, - "learning_rate": 4.2859297507520875e-05, - "loss": 0.3615, + "learning_rate": 4.285917150958724e-05, + "loss": 0.2774, "step": 3140000 }, { "epoch": 1.88, - "learning_rate": 4.285719754196031e-05, - "loss": 0.3599, + "learning_rate": 4.285707154402668e-05, + "loss": 0.2732, "step": 3140500 }, { "epoch": 1.88, - "learning_rate": 4.285510177633087e-05, - "loss": 0.3595, + "learning_rate": 4.285497157846611e-05, + "loss": 0.2737, "step": 3141000 }, { "epoch": 1.88, - "learning_rate": 4.285300181077031e-05, - "loss": 0.3637, + "learning_rate": 4.2852871612905544e-05, + "loss": 0.2827, "step": 3141500 }, { "epoch": 1.88, - "learning_rate": 4.285090184520974e-05, - "loss": 0.3635, + "learning_rate": 4.2850771647344985e-05, + "loss": 0.2781, "step": 3142000 }, { "epoch": 1.88, - "learning_rate": 4.284880187964917e-05, - "loss": 0.3703, + "learning_rate": 4.284867588171554e-05, + "loss": 0.2743, "step": 3142500 }, { "epoch": 1.88, - "learning_rate": 4.284670611401973e-05, - "loss": 0.3622, + "learning_rate": 4.284657591615497e-05, + "loss": 0.2748, "step": 3143000 }, { "epoch": 1.88, - "learning_rate": 4.284461034839028e-05, - "loss": 0.3657, + "learning_rate": 4.2844475950594405e-05, + "loss": 0.2765, "step": 3143500 }, { "epoch": 1.88, - "learning_rate": 4.284251038282972e-05, - "loss": 0.3524, + "learning_rate": 4.2842375985033845e-05, + "loss": 0.2705, "step": 3144000 }, { "epoch": 1.89, - "learning_rate": 4.2840410417269156e-05, - "loss": 0.3697, + "learning_rate": 4.284027601947328e-05, + "loss": 0.2808, "step": 3144500 }, { "epoch": 1.89, - "learning_rate": 4.283831045170859e-05, - "loss": 0.3654, + "learning_rate": 4.283818025384383e-05, + "loss": 0.2767, "step": 3145000 }, { "epoch": 1.89, - "learning_rate": 4.2836214686079143e-05, - "loss": 0.3804, + "learning_rate": 4.283608028828327e-05, + "loss": 0.28, "step": 3145500 }, { "epoch": 1.89, - "learning_rate": 4.2834114720518584e-05, - "loss": 0.36, + "learning_rate": 4.2833980322722706e-05, + "loss": 0.2765, "step": 3146000 }, { "epoch": 1.89, - "learning_rate": 4.283201475495802e-05, - "loss": 0.357, + "learning_rate": 4.2831884557093266e-05, + "loss": 0.2747, "step": 3146500 }, { "epoch": 1.89, - "learning_rate": 4.282991478939745e-05, - "loss": 0.3631, + "learning_rate": 4.282978459153269e-05, + "loss": 0.2766, "step": 3147000 }, { "epoch": 1.89, - "learning_rate": 4.282781482383689e-05, - "loss": 0.366, + "learning_rate": 4.282768462597213e-05, + "loss": 0.2751, "step": 3147500 }, { "epoch": 1.89, - "learning_rate": 4.2825714858276324e-05, - "loss": 0.359, + "learning_rate": 4.282558466041157e-05, + "loss": 0.2733, "step": 3148000 }, { "epoch": 1.89, - "learning_rate": 4.2823614892715765e-05, - "loss": 0.3665, + "learning_rate": 4.2823484694851e-05, + "loss": 0.2762, "step": 3148500 }, { "epoch": 1.89, - "learning_rate": 4.282151912708632e-05, - "loss": 0.3569, + "learning_rate": 4.282138472929044e-05, + "loss": 0.269, "step": 3149000 }, { "epoch": 1.89, - "learning_rate": 4.281941916152575e-05, - "loss": 0.3589, + "learning_rate": 4.2819288963660994e-05, + "loss": 0.2739, "step": 3149500 }, { "epoch": 1.89, - "learning_rate": 4.2817319195965185e-05, - "loss": 0.3571, + "learning_rate": 4.281718899810043e-05, + "loss": 0.2785, "step": 3150000 }, { "epoch": 1.89, - "learning_rate": 4.2815219230404625e-05, - "loss": 0.3744, + "learning_rate": 4.281508903253986e-05, + "loss": 0.2842, "step": 3150500 }, { "epoch": 1.89, - "learning_rate": 4.281311926484406e-05, - "loss": 0.3582, + "learning_rate": 4.28129890669793e-05, + "loss": 0.2713, "step": 3151000 }, { "epoch": 1.89, - "learning_rate": 4.281102349921461e-05, - "loss": 0.3544, + "learning_rate": 4.2810889101418735e-05, + "loss": 0.2754, "step": 3151500 }, { "epoch": 1.89, - "learning_rate": 4.2808923533654046e-05, - "loss": 0.3615, + "learning_rate": 4.280878913585817e-05, + "loss": 0.2702, "step": 3152000 }, { "epoch": 1.89, - "learning_rate": 4.2806823568093486e-05, - "loss": 0.3718, + "learning_rate": 4.280668917029761e-05, + "loss": 0.2825, "step": 3152500 }, { "epoch": 1.89, - "learning_rate": 4.280472360253292e-05, - "loss": 0.3573, + "learning_rate": 4.280458920473704e-05, + "loss": 0.2753, "step": 3153000 }, { "epoch": 1.89, - "learning_rate": 4.280262363697235e-05, - "loss": 0.3594, + "learning_rate": 4.2802493439107595e-05, + "loss": 0.2755, "step": 3153500 }, { "epoch": 1.89, - "learning_rate": 4.2800527871342906e-05, - "loss": 0.3724, + "learning_rate": 4.2800393473547036e-05, + "loss": 0.2773, "step": 3154000 }, { "epoch": 1.89, - "learning_rate": 4.279842790578235e-05, - "loss": 0.357, + "learning_rate": 4.279829350798647e-05, + "loss": 0.2762, "step": 3154500 }, { "epoch": 1.89, - "learning_rate": 4.279632794022178e-05, - "loss": 0.3666, + "learning_rate": 4.27961935424259e-05, + "loss": 0.2739, "step": 3155000 }, { "epoch": 1.89, - "learning_rate": 4.2794232174592334e-05, - "loss": 0.3732, + "learning_rate": 4.2794097776796456e-05, + "loss": 0.2767, "step": 3155500 }, { "epoch": 1.89, - "learning_rate": 4.2792132209031774e-05, - "loss": 0.37, + "learning_rate": 4.2791997811235896e-05, + "loss": 0.2756, "step": 3156000 }, { "epoch": 1.89, - "learning_rate": 4.279003224347121e-05, - "loss": 0.3816, + "learning_rate": 4.278989784567533e-05, + "loss": 0.2795, "step": 3156500 }, { "epoch": 1.89, - "learning_rate": 4.278793227791064e-05, - "loss": 0.3619, + "learning_rate": 4.278779788011476e-05, + "loss": 0.2755, "step": 3157000 }, { "epoch": 1.89, - "learning_rate": 4.278583231235008e-05, - "loss": 0.3571, + "learning_rate": 4.2785697914554204e-05, + "loss": 0.2685, "step": 3157500 }, { "epoch": 1.89, - "learning_rate": 4.2783732346789515e-05, - "loss": 0.3791, + "learning_rate": 4.278359794899364e-05, + "loss": 0.283, "step": 3158000 }, { "epoch": 1.89, - "learning_rate": 4.278163238122895e-05, - "loss": 0.3732, + "learning_rate": 4.278149798343307e-05, + "loss": 0.2804, "step": 3158500 }, { "epoch": 1.89, - "learning_rate": 4.277953241566838e-05, - "loss": 0.3642, + "learning_rate": 4.277939801787251e-05, + "loss": 0.2747, "step": 3159000 }, { "epoch": 1.89, - "learning_rate": 4.2777432450107815e-05, - "loss": 0.3507, + "learning_rate": 4.2777298052311944e-05, + "loss": 0.272, "step": 3159500 }, { "epoch": 1.89, - "learning_rate": 4.277533248454725e-05, - "loss": 0.363, + "learning_rate": 4.27752022866825e-05, + "loss": 0.2755, "step": 3160000 }, { "epoch": 1.89, - "learning_rate": 4.277323251898669e-05, - "loss": 0.3547, + "learning_rate": 4.277310232112193e-05, + "loss": 0.2726, "step": 3160500 }, { "epoch": 1.9, - "learning_rate": 4.277113255342612e-05, - "loss": 0.3618, + "learning_rate": 4.277100235556137e-05, + "loss": 0.2737, "step": 3161000 }, { "epoch": 1.9, - "learning_rate": 4.2769036787796676e-05, - "loss": 0.3621, + "learning_rate": 4.2768902390000805e-05, + "loss": 0.2691, "step": 3161500 }, { "epoch": 1.9, - "learning_rate": 4.2766941022167236e-05, - "loss": 0.3696, + "learning_rate": 4.276680662437136e-05, + "loss": 0.2722, "step": 3162000 }, { "epoch": 1.9, - "learning_rate": 4.276484525653779e-05, - "loss": 0.3612, + "learning_rate": 4.276471085874191e-05, + "loss": 0.2782, "step": 3162500 }, { "epoch": 1.9, - "learning_rate": 4.276274529097723e-05, - "loss": 0.3656, + "learning_rate": 4.276261089318135e-05, + "loss": 0.2734, "step": 3163000 }, { "epoch": 1.9, - "learning_rate": 4.276064532541666e-05, - "loss": 0.3662, + "learning_rate": 4.2760510927620786e-05, + "loss": 0.2766, "step": 3163500 }, { "epoch": 1.9, - "learning_rate": 4.27585453598561e-05, - "loss": 0.3565, + "learning_rate": 4.275841096206022e-05, + "loss": 0.2727, "step": 3164000 }, { "epoch": 1.9, - "learning_rate": 4.275644959422665e-05, - "loss": 0.3725, + "learning_rate": 4.275631099649966e-05, + "loss": 0.2848, "step": 3164500 }, { "epoch": 1.9, - "learning_rate": 4.275434962866609e-05, - "loss": 0.3589, + "learning_rate": 4.275421103093909e-05, + "loss": 0.2715, "step": 3165000 }, { "epoch": 1.9, - "learning_rate": 4.2752249663105524e-05, - "loss": 0.3685, + "learning_rate": 4.2752111065378526e-05, + "loss": 0.2812, "step": 3165500 }, { "epoch": 1.9, - "learning_rate": 4.275014969754496e-05, - "loss": 0.3689, + "learning_rate": 4.2750011099817967e-05, + "loss": 0.2784, "step": 3166000 }, { "epoch": 1.9, - "learning_rate": 4.27480497319844e-05, - "loss": 0.364, + "learning_rate": 4.27479111342574e-05, + "loss": 0.2777, "step": 3166500 }, { "epoch": 1.9, - "learning_rate": 4.274594976642383e-05, - "loss": 0.3631, + "learning_rate": 4.2745815368627954e-05, + "loss": 0.2715, "step": 3167000 }, { "epoch": 1.9, - "learning_rate": 4.2743849800863265e-05, - "loss": 0.3781, + "learning_rate": 4.274371540306739e-05, + "loss": 0.2793, "step": 3167500 }, { "epoch": 1.9, - "learning_rate": 4.2741749835302705e-05, - "loss": 0.3628, + "learning_rate": 4.274161543750683e-05, + "loss": 0.2768, "step": 3168000 }, { "epoch": 1.9, - "learning_rate": 4.273964986974213e-05, - "loss": 0.3579, + "learning_rate": 4.273951547194626e-05, + "loss": 0.2749, "step": 3168500 }, { "epoch": 1.9, - "learning_rate": 4.273755410411269e-05, - "loss": 0.3672, + "learning_rate": 4.2737419706316814e-05, + "loss": 0.2797, "step": 3169000 }, { "epoch": 1.9, - "learning_rate": 4.273545413855213e-05, - "loss": 0.3632, + "learning_rate": 4.2735319740756255e-05, + "loss": 0.2715, "step": 3169500 }, { "epoch": 1.9, - "learning_rate": 4.2733354172991566e-05, - "loss": 0.369, + "learning_rate": 4.273321977519569e-05, + "loss": 0.2775, "step": 3170000 }, { "epoch": 1.9, - "learning_rate": 4.2731254207431e-05, - "loss": 0.3627, + "learning_rate": 4.273112400956624e-05, + "loss": 0.2759, "step": 3170500 }, { "epoch": 1.9, - "learning_rate": 4.272915424187043e-05, - "loss": 0.3617, + "learning_rate": 4.2729024044005675e-05, + "loss": 0.2713, "step": 3171000 }, { "epoch": 1.9, - "learning_rate": 4.2727054276309866e-05, - "loss": 0.3558, + "learning_rate": 4.2726924078445115e-05, + "loss": 0.2663, "step": 3171500 }, { "epoch": 1.9, - "learning_rate": 4.27249543107493e-05, - "loss": 0.3662, + "learning_rate": 4.272482411288455e-05, + "loss": 0.2803, "step": 3172000 }, { "epoch": 1.9, - "learning_rate": 4.272285434518874e-05, - "loss": 0.3727, + "learning_rate": 4.272272414732398e-05, + "loss": 0.2803, "step": 3172500 }, { "epoch": 1.9, - "learning_rate": 4.272075857955929e-05, - "loss": 0.371, + "learning_rate": 4.272062418176342e-05, + "loss": 0.2755, "step": 3173000 }, { "epoch": 1.9, - "learning_rate": 4.271865861399873e-05, - "loss": 0.365, + "learning_rate": 4.2718528416133976e-05, + "loss": 0.2848, "step": 3173500 }, { "epoch": 1.9, - "learning_rate": 4.271655864843816e-05, - "loss": 0.3645, + "learning_rate": 4.271642845057341e-05, + "loss": 0.2774, "step": 3174000 }, { "epoch": 1.9, - "learning_rate": 4.27144586828776e-05, - "loss": 0.3711, + "learning_rate": 4.271432848501284e-05, + "loss": 0.2775, "step": 3174500 }, { "epoch": 1.9, - "learning_rate": 4.2712358717317034e-05, - "loss": 0.3626, + "learning_rate": 4.271222851945228e-05, + "loss": 0.2713, "step": 3175000 }, { "epoch": 1.9, - "learning_rate": 4.271025875175647e-05, - "loss": 0.3608, + "learning_rate": 4.2710128553891717e-05, + "loss": 0.2753, "step": 3175500 }, { "epoch": 1.9, - "learning_rate": 4.270816298612703e-05, - "loss": 0.3564, + "learning_rate": 4.270802858833116e-05, + "loss": 0.2672, "step": 3176000 }, { "epoch": 1.9, - "learning_rate": 4.270606302056646e-05, - "loss": 0.3674, + "learning_rate": 4.2705928622770584e-05, + "loss": 0.2774, "step": 3176500 }, { "epoch": 1.9, - "learning_rate": 4.2703963055005895e-05, - "loss": 0.3566, + "learning_rate": 4.270382865721002e-05, + "loss": 0.2746, "step": 3177000 }, { "epoch": 1.91, - "learning_rate": 4.2701863089445335e-05, - "loss": 0.3657, + "learning_rate": 4.270173289158058e-05, + "loss": 0.2755, "step": 3177500 }, { "epoch": 1.91, - "learning_rate": 4.269976312388477e-05, - "loss": 0.3615, + "learning_rate": 4.269964132588225e-05, + "loss": 0.2752, "step": 3178000 }, { "epoch": 1.91, - "learning_rate": 4.26976631583242e-05, - "loss": 0.3686, + "learning_rate": 4.2697541360321684e-05, + "loss": 0.2764, "step": 3178500 }, { "epoch": 1.91, - "learning_rate": 4.269556319276364e-05, - "loss": 0.3611, + "learning_rate": 4.2695441394761125e-05, + "loss": 0.2706, "step": 3179000 }, { "epoch": 1.91, - "learning_rate": 4.2693467427134196e-05, - "loss": 0.361, + "learning_rate": 4.269334142920056e-05, + "loss": 0.2703, "step": 3179500 }, { "epoch": 1.91, - "learning_rate": 4.269136746157363e-05, - "loss": 0.3604, + "learning_rate": 4.269124146363999e-05, + "loss": 0.2728, "step": 3180000 }, { "epoch": 1.91, - "learning_rate": 4.268926749601306e-05, - "loss": 0.3733, + "learning_rate": 4.268914149807943e-05, + "loss": 0.2789, "step": 3180500 }, { "epoch": 1.91, - "learning_rate": 4.26871675304525e-05, - "loss": 0.3553, + "learning_rate": 4.2687041532518865e-05, + "loss": 0.2718, "step": 3181000 }, { "epoch": 1.91, - "learning_rate": 4.2685067564891936e-05, - "loss": 0.371, + "learning_rate": 4.26849415669583e-05, + "loss": 0.2758, "step": 3181500 }, { "epoch": 1.91, - "learning_rate": 4.268296759933137e-05, - "loss": 0.3574, + "learning_rate": 4.268284160139774e-05, + "loss": 0.2689, "step": 3182000 }, { "epoch": 1.91, - "learning_rate": 4.268086763377081e-05, - "loss": 0.3553, + "learning_rate": 4.268074163583717e-05, + "loss": 0.2732, "step": 3182500 }, { "epoch": 1.91, - "learning_rate": 4.267876766821024e-05, - "loss": 0.3581, + "learning_rate": 4.267864167027661e-05, + "loss": 0.2763, "step": 3183000 }, { "epoch": 1.91, - "learning_rate": 4.26766719025808e-05, - "loss": 0.3671, + "learning_rate": 4.267654170471604e-05, + "loss": 0.275, "step": 3183500 }, { "epoch": 1.91, - "learning_rate": 4.267457193702023e-05, - "loss": 0.3617, + "learning_rate": 4.267444173915547e-05, + "loss": 0.2715, "step": 3184000 }, { "epoch": 1.91, - "learning_rate": 4.267247197145967e-05, - "loss": 0.3585, + "learning_rate": 4.267234597352603e-05, + "loss": 0.2735, "step": 3184500 }, { "epoch": 1.91, - "learning_rate": 4.2670372005899104e-05, - "loss": 0.3592, + "learning_rate": 4.267025020789659e-05, + "loss": 0.2733, "step": 3185000 }, { "epoch": 1.91, - "learning_rate": 4.266827624026966e-05, - "loss": 0.3654, + "learning_rate": 4.266815444226714e-05, + "loss": 0.2785, "step": 3185500 }, { "epoch": 1.91, - "learning_rate": 4.26661762747091e-05, - "loss": 0.3653, + "learning_rate": 4.266605447670658e-05, + "loss": 0.2696, "step": 3186000 }, { "epoch": 1.91, - "learning_rate": 4.266407630914853e-05, - "loss": 0.3622, + "learning_rate": 4.2663954511146014e-05, + "loss": 0.2737, "step": 3186500 }, { "epoch": 1.91, - "learning_rate": 4.2661976343587965e-05, - "loss": 0.361, + "learning_rate": 4.266185454558545e-05, + "loss": 0.274, "step": 3187000 }, { "epoch": 1.91, - "learning_rate": 4.265988057795852e-05, - "loss": 0.368, + "learning_rate": 4.265975458002489e-05, + "loss": 0.2778, "step": 3187500 }, { "epoch": 1.91, - "learning_rate": 4.265778061239796e-05, - "loss": 0.3622, + "learning_rate": 4.265765881439544e-05, + "loss": 0.275, "step": 3188000 }, { "epoch": 1.91, - "learning_rate": 4.265568484676851e-05, - "loss": 0.3555, + "learning_rate": 4.2655558848834875e-05, + "loss": 0.2719, "step": 3188500 }, { "epoch": 1.91, - "learning_rate": 4.2653584881207946e-05, - "loss": 0.3583, + "learning_rate": 4.2653458883274315e-05, + "loss": 0.2764, "step": 3189000 }, { "epoch": 1.91, - "learning_rate": 4.265148491564738e-05, - "loss": 0.3555, + "learning_rate": 4.265135891771375e-05, + "loss": 0.2737, "step": 3189500 }, { "epoch": 1.91, - "learning_rate": 4.264938495008682e-05, - "loss": 0.3601, + "learning_rate": 4.264925895215318e-05, + "loss": 0.275, "step": 3190000 }, { "epoch": 1.91, - "learning_rate": 4.264728498452625e-05, - "loss": 0.3609, + "learning_rate": 4.264715898659262e-05, + "loss": 0.2726, "step": 3190500 }, { "epoch": 1.91, - "learning_rate": 4.2645185018965686e-05, - "loss": 0.3633, + "learning_rate": 4.2645059021032056e-05, + "loss": 0.2727, "step": 3191000 }, { "epoch": 1.91, - "learning_rate": 4.2643089253336247e-05, - "loss": 0.3673, + "learning_rate": 4.264295905547149e-05, + "loss": 0.2747, "step": 3191500 }, { "epoch": 1.91, - "learning_rate": 4.264098928777568e-05, - "loss": 0.3657, + "learning_rate": 4.264085908991093e-05, + "loss": 0.2793, "step": 3192000 }, { "epoch": 1.91, - "learning_rate": 4.2638889322215113e-05, - "loss": 0.3582, + "learning_rate": 4.263875912435036e-05, + "loss": 0.2723, "step": 3192500 }, { "epoch": 1.91, - "learning_rate": 4.2636789356654554e-05, - "loss": 0.3489, + "learning_rate": 4.2636659158789796e-05, + "loss": 0.2628, "step": 3193000 }, { "epoch": 1.91, - "learning_rate": 4.263468939109399e-05, - "loss": 0.361, + "learning_rate": 4.263456339316035e-05, + "loss": 0.2716, "step": 3193500 }, { "epoch": 1.91, - "learning_rate": 4.263258942553342e-05, - "loss": 0.3672, + "learning_rate": 4.263246342759979e-05, + "loss": 0.2751, "step": 3194000 }, { "epoch": 1.92, - "learning_rate": 4.263048945997286e-05, - "loss": 0.366, + "learning_rate": 4.2630363462039224e-05, + "loss": 0.2769, "step": 3194500 }, { "epoch": 1.92, - "learning_rate": 4.2628389494412294e-05, - "loss": 0.3582, + "learning_rate": 4.262826349647866e-05, + "loss": 0.2699, "step": 3195000 }, { "epoch": 1.92, - "learning_rate": 4.262629372878285e-05, - "loss": 0.3582, + "learning_rate": 4.262616353091809e-05, + "loss": 0.2739, "step": 3195500 }, { "epoch": 1.92, - "learning_rate": 4.262419376322228e-05, - "loss": 0.3659, + "learning_rate": 4.2624063565357524e-05, + "loss": 0.2674, "step": 3196000 }, { "epoch": 1.92, - "learning_rate": 4.2622097997592835e-05, - "loss": 0.3548, + "learning_rate": 4.2621963599796964e-05, + "loss": 0.2749, "step": 3196500 }, { "epoch": 1.92, - "learning_rate": 4.2619998032032275e-05, - "loss": 0.3661, + "learning_rate": 4.26198636342364e-05, + "loss": 0.2829, "step": 3197000 }, { "epoch": 1.92, - "learning_rate": 4.261789806647171e-05, - "loss": 0.3605, + "learning_rate": 4.261777206853808e-05, + "loss": 0.2725, "step": 3197500 }, { "epoch": 1.92, - "learning_rate": 4.261579810091114e-05, - "loss": 0.3614, + "learning_rate": 4.261567210297751e-05, + "loss": 0.2745, "step": 3198000 }, { "epoch": 1.92, - "learning_rate": 4.261369813535058e-05, - "loss": 0.3679, + "learning_rate": 4.2613572137416945e-05, + "loss": 0.2757, "step": 3198500 }, { "epoch": 1.92, - "learning_rate": 4.2611602369721136e-05, - "loss": 0.3602, + "learning_rate": 4.2611472171856385e-05, + "loss": 0.2725, "step": 3199000 }, { "epoch": 1.92, - "learning_rate": 4.260950240416057e-05, - "loss": 0.3669, + "learning_rate": 4.260937220629582e-05, + "loss": 0.2782, "step": 3199500 }, { "epoch": 1.92, - "learning_rate": 4.260740243860001e-05, - "loss": 0.3585, + "learning_rate": 4.260727224073525e-05, + "loss": 0.268, "step": 3200000 }, { "epoch": 1.92, - "eval_loss": 0.3485775291919708, - "eval_runtime": 1121.258, - "eval_samples_per_second": 469.758, - "eval_steps_per_second": 78.293, + "eval_loss": 0.2533232569694519, + "eval_runtime": 1462.1663, + "eval_samples_per_second": 360.233, + "eval_steps_per_second": 60.039, "step": 3200000 }, { "epoch": 1.92, - "learning_rate": 4.260530247303944e-05, - "loss": 0.3697, + "learning_rate": 4.2605172275174686e-05, + "loss": 0.2787, "step": 3200500 }, { "epoch": 1.92, - "learning_rate": 4.2603202507478877e-05, - "loss": 0.3736, + "learning_rate": 4.260307230961412e-05, + "loss": 0.2789, "step": 3201000 }, { "epoch": 1.92, - "learning_rate": 4.260110674184943e-05, - "loss": 0.3561, + "learning_rate": 4.260097654398468e-05, + "loss": 0.2737, "step": 3201500 }, { "epoch": 1.92, - "learning_rate": 4.259900677628887e-05, - "loss": 0.363, + "learning_rate": 4.259887657842411e-05, + "loss": 0.2738, "step": 3202000 }, { "epoch": 1.92, - "learning_rate": 4.2596906810728304e-05, - "loss": 0.367, + "learning_rate": 4.259677661286355e-05, + "loss": 0.2723, "step": 3202500 }, { "epoch": 1.92, - "learning_rate": 4.259480684516774e-05, - "loss": 0.3662, + "learning_rate": 4.259467664730298e-05, + "loss": 0.2778, "step": 3203000 }, { "epoch": 1.92, - "learning_rate": 4.259270687960718e-05, - "loss": 0.3612, + "learning_rate": 4.259258088167354e-05, + "loss": 0.2733, "step": 3203500 }, { "epoch": 1.92, - "learning_rate": 4.259060691404661e-05, - "loss": 0.3582, + "learning_rate": 4.259048091611298e-05, + "loss": 0.2721, "step": 3204000 }, { "epoch": 1.92, - "learning_rate": 4.2588506948486044e-05, - "loss": 0.3671, + "learning_rate": 4.2588385150483534e-05, + "loss": 0.2781, "step": 3204500 }, { "epoch": 1.92, - "learning_rate": 4.25864111828566e-05, - "loss": 0.3619, + "learning_rate": 4.258628518492297e-05, + "loss": 0.2699, "step": 3205000 }, { "epoch": 1.92, - "learning_rate": 4.258431121729604e-05, - "loss": 0.3689, + "learning_rate": 4.25841852193624e-05, + "loss": 0.2793, "step": 3205500 }, { "epoch": 1.92, - "learning_rate": 4.258221125173547e-05, - "loss": 0.3663, + "learning_rate": 4.258208525380184e-05, + "loss": 0.2772, "step": 3206000 }, { "epoch": 1.92, - "learning_rate": 4.2580111286174905e-05, - "loss": 0.37, + "learning_rate": 4.2579985288241275e-05, + "loss": 0.2756, "step": 3206500 }, { "epoch": 1.92, - "learning_rate": 4.2578011320614345e-05, - "loss": 0.3675, + "learning_rate": 4.257788532268071e-05, + "loss": 0.2791, "step": 3207000 }, { "epoch": 1.92, - "learning_rate": 4.257591135505377e-05, - "loss": 0.3585, + "learning_rate": 4.257578535712014e-05, + "loss": 0.2733, "step": 3207500 }, { "epoch": 1.92, - "learning_rate": 4.257381138949321e-05, - "loss": 0.3658, + "learning_rate": 4.2573685391559575e-05, + "loss": 0.2837, "step": 3208000 }, { "epoch": 1.92, - "learning_rate": 4.257171562386377e-05, - "loss": 0.3577, + "learning_rate": 4.257158542599901e-05, + "loss": 0.2729, "step": 3208500 }, { "epoch": 1.92, - "learning_rate": 4.2569615658303206e-05, - "loss": 0.3679, + "learning_rate": 4.256948966036957e-05, + "loss": 0.2787, "step": 3209000 }, { "epoch": 1.92, - "learning_rate": 4.256751569274263e-05, - "loss": 0.3689, + "learning_rate": 4.256738969480901e-05, + "loss": 0.278, "step": 3209500 }, { "epoch": 1.92, - "learning_rate": 4.256541572718207e-05, - "loss": 0.37, + "learning_rate": 4.2565289729248436e-05, + "loss": 0.2776, "step": 3210000 }, { "epoch": 1.92, - "learning_rate": 4.256331996155263e-05, - "loss": 0.3554, + "learning_rate": 4.2563189763687876e-05, + "loss": 0.2766, "step": 3210500 }, { "epoch": 1.93, - "learning_rate": 4.256121999599207e-05, - "loss": 0.3701, + "learning_rate": 4.2561093998058436e-05, + "loss": 0.2777, "step": 3211000 }, { "epoch": 1.93, - "learning_rate": 4.25591200304315e-05, - "loss": 0.3663, + "learning_rate": 4.255899403249787e-05, + "loss": 0.2775, "step": 3211500 }, { "epoch": 1.93, - "learning_rate": 4.2557020064870934e-05, - "loss": 0.3651, + "learning_rate": 4.25568940669373e-05, + "loss": 0.2822, "step": 3212000 }, { "epoch": 1.93, - "learning_rate": 4.255492009931037e-05, - "loss": 0.3621, + "learning_rate": 4.2554794101376737e-05, + "loss": 0.2752, "step": 3212500 }, { "epoch": 1.93, - "learning_rate": 4.25528201337498e-05, - "loss": 0.3663, + "learning_rate": 4.255269413581617e-05, + "loss": 0.2823, "step": 3213000 }, { "epoch": 1.93, - "learning_rate": 4.255072016818924e-05, - "loss": 0.3666, + "learning_rate": 4.255059837018673e-05, + "loss": 0.2731, "step": 3213500 }, { "epoch": 1.93, - "learning_rate": 4.2548620202628674e-05, - "loss": 0.3664, + "learning_rate": 4.2548498404626164e-05, + "loss": 0.2757, "step": 3214000 }, { "epoch": 1.93, - "learning_rate": 4.254652443699923e-05, - "loss": 0.3633, + "learning_rate": 4.25463984390656e-05, + "loss": 0.2758, "step": 3214500 }, { "epoch": 1.93, - "learning_rate": 4.254442447143867e-05, - "loss": 0.3611, + "learning_rate": 4.254429847350503e-05, + "loss": 0.2779, "step": 3215000 }, { "epoch": 1.93, - "learning_rate": 4.25423245058781e-05, - "loss": 0.3535, + "learning_rate": 4.254220270787559e-05, + "loss": 0.2703, "step": 3215500 }, { "epoch": 1.93, - "learning_rate": 4.2540224540317535e-05, - "loss": 0.3584, + "learning_rate": 4.2540102742315025e-05, + "loss": 0.2708, "step": 3216000 }, { "epoch": 1.93, - "learning_rate": 4.2538128774688095e-05, - "loss": 0.3772, + "learning_rate": 4.2538002776754465e-05, + "loss": 0.284, "step": 3216500 }, { "epoch": 1.93, - "learning_rate": 4.253602880912753e-05, - "loss": 0.3561, + "learning_rate": 4.253590281119389e-05, + "loss": 0.2723, "step": 3217000 }, { "epoch": 1.93, - "learning_rate": 4.253392884356696e-05, - "loss": 0.3665, + "learning_rate": 4.253380704556445e-05, + "loss": 0.276, "step": 3217500 }, { "epoch": 1.93, - "learning_rate": 4.2531828878006396e-05, - "loss": 0.3672, + "learning_rate": 4.253170708000389e-05, + "loss": 0.2783, "step": 3218000 }, { "epoch": 1.93, - "learning_rate": 4.2529728912445836e-05, - "loss": 0.3551, + "learning_rate": 4.2529607114443326e-05, + "loss": 0.2675, "step": 3218500 }, { "epoch": 1.93, - "learning_rate": 4.252762894688527e-05, - "loss": 0.3637, + "learning_rate": 4.252750714888276e-05, + "loss": 0.2757, "step": 3219000 }, { "epoch": 1.93, - "learning_rate": 4.252553318125582e-05, - "loss": 0.3562, + "learning_rate": 4.252541138325331e-05, + "loss": 0.2707, "step": 3219500 }, { "epoch": 1.93, - "learning_rate": 4.252343321569526e-05, - "loss": 0.3655, + "learning_rate": 4.252331141769275e-05, + "loss": 0.2744, "step": 3220000 }, { "epoch": 1.93, - "learning_rate": 4.25213332501347e-05, - "loss": 0.3639, + "learning_rate": 4.2521211452132186e-05, + "loss": 0.2742, "step": 3220500 }, { "epoch": 1.93, - "learning_rate": 4.251923328457413e-05, - "loss": 0.3724, + "learning_rate": 4.251911148657162e-05, + "loss": 0.2783, "step": 3221000 }, { "epoch": 1.93, - "learning_rate": 4.251713331901357e-05, - "loss": 0.3728, + "learning_rate": 4.251701152101106e-05, + "loss": 0.2796, "step": 3221500 }, { "epoch": 1.93, - "learning_rate": 4.2515037553384124e-05, - "loss": 0.358, + "learning_rate": 4.251491155545049e-05, + "loss": 0.2672, "step": 3222000 }, { "epoch": 1.93, - "learning_rate": 4.251293758782356e-05, - "loss": 0.3652, + "learning_rate": 4.251281158988992e-05, + "loss": 0.2779, "step": 3222500 }, { "epoch": 1.93, - "learning_rate": 4.251083762226299e-05, - "loss": 0.3603, + "learning_rate": 4.251071162432936e-05, + "loss": 0.2733, "step": 3223000 }, { "epoch": 1.93, - "learning_rate": 4.250873765670243e-05, - "loss": 0.3539, + "learning_rate": 4.250861585869992e-05, + "loss": 0.27, "step": 3223500 }, { "epoch": 1.93, - "learning_rate": 4.2506637691141865e-05, - "loss": 0.3606, + "learning_rate": 4.2506520093070474e-05, + "loss": 0.2664, "step": 3224000 }, { "epoch": 1.93, - "learning_rate": 4.25045377255813e-05, - "loss": 0.3689, + "learning_rate": 4.250442012750991e-05, + "loss": 0.2764, "step": 3224500 }, { "epoch": 1.93, - "learning_rate": 4.250244195995185e-05, - "loss": 0.3694, + "learning_rate": 4.250232016194935e-05, + "loss": 0.277, "step": 3225000 }, { "epoch": 1.93, - "learning_rate": 4.250034199439129e-05, - "loss": 0.3715, + "learning_rate": 4.250022019638878e-05, + "loss": 0.276, "step": 3225500 }, { "epoch": 1.93, - "learning_rate": 4.2498242028830725e-05, - "loss": 0.3645, + "learning_rate": 4.2498120230828215e-05, + "loss": 0.2748, "step": 3226000 }, { "epoch": 1.93, - "learning_rate": 4.249614206327016e-05, - "loss": 0.3526, + "learning_rate": 4.249602026526765e-05, + "loss": 0.2736, "step": 3226500 }, { "epoch": 1.93, - "learning_rate": 4.24940420977096e-05, - "loss": 0.3635, + "learning_rate": 4.249392029970708e-05, + "loss": 0.2762, "step": 3227000 }, { "epoch": 1.94, - "learning_rate": 4.249194213214903e-05, - "loss": 0.363, + "learning_rate": 4.2491820334146515e-05, + "loss": 0.2732, "step": 3227500 }, { "epoch": 1.94, - "learning_rate": 4.2489842166588466e-05, - "loss": 0.3636, + "learning_rate": 4.2489724568517076e-05, + "loss": 0.279, "step": 3228000 }, { "epoch": 1.94, - "learning_rate": 4.2487742201027906e-05, - "loss": 0.3641, + "learning_rate": 4.2487624602956516e-05, + "loss": 0.2763, "step": 3228500 }, { "epoch": 1.94, - "learning_rate": 4.248564643539846e-05, - "loss": 0.3733, + "learning_rate": 4.248552463739594e-05, + "loss": 0.2782, "step": 3229000 }, { "epoch": 1.94, - "learning_rate": 4.248354646983789e-05, - "loss": 0.3679, + "learning_rate": 4.2483424671835376e-05, + "loss": 0.2779, "step": 3229500 }, { "epoch": 1.94, - "learning_rate": 4.2481446504277334e-05, - "loss": 0.3568, + "learning_rate": 4.2481328906205936e-05, + "loss": 0.2739, "step": 3230000 }, { "epoch": 1.94, - "learning_rate": 4.247934653871677e-05, - "loss": 0.3595, + "learning_rate": 4.2479228940645377e-05, + "loss": 0.2702, "step": 3230500 }, { "epoch": 1.94, - "learning_rate": 4.24772465731562e-05, - "loss": 0.3606, + "learning_rate": 4.247712897508481e-05, + "loss": 0.2797, "step": 3231000 }, { "epoch": 1.94, - "learning_rate": 4.247514660759564e-05, - "loss": 0.3586, + "learning_rate": 4.2475029009524244e-05, + "loss": 0.2731, "step": 3231500 }, { "epoch": 1.94, - "learning_rate": 4.247304664203507e-05, - "loss": 0.3602, + "learning_rate": 4.2472933243894804e-05, + "loss": 0.2737, "step": 3232000 }, { "epoch": 1.94, - "learning_rate": 4.24709466764745e-05, - "loss": 0.3668, + "learning_rate": 4.247083327833424e-05, + "loss": 0.2811, "step": 3232500 }, { "epoch": 1.94, - "learning_rate": 4.246885091084506e-05, - "loss": 0.3677, + "learning_rate": 4.246873331277367e-05, + "loss": 0.2705, "step": 3233000 }, { "epoch": 1.94, - "learning_rate": 4.24667509452845e-05, - "loss": 0.3542, + "learning_rate": 4.246663334721311e-05, + "loss": 0.273, "step": 3233500 }, { "epoch": 1.94, - "learning_rate": 4.2464650979723935e-05, - "loss": 0.3657, + "learning_rate": 4.2464537581583665e-05, + "loss": 0.2754, "step": 3234000 }, { "epoch": 1.94, - "learning_rate": 4.246255101416336e-05, - "loss": 0.3612, + "learning_rate": 4.24624376160231e-05, + "loss": 0.2781, "step": 3234500 }, { "epoch": 1.94, - "learning_rate": 4.246045944846504e-05, - "loss": 0.3504, + "learning_rate": 4.246033765046253e-05, + "loss": 0.2682, "step": 3235000 }, { "epoch": 1.94, - "learning_rate": 4.245835948290448e-05, - "loss": 0.3672, + "learning_rate": 4.245823768490197e-05, + "loss": 0.2762, "step": 3235500 }, { "epoch": 1.94, - "learning_rate": 4.2456259517343916e-05, - "loss": 0.3574, + "learning_rate": 4.24561377193414e-05, + "loss": 0.2745, "step": 3236000 }, { "epoch": 1.94, - "learning_rate": 4.245415955178335e-05, - "loss": 0.3571, + "learning_rate": 4.245404195371196e-05, + "loss": 0.2771, "step": 3236500 }, { "epoch": 1.94, - "learning_rate": 4.245205958622279e-05, - "loss": 0.3604, + "learning_rate": 4.245194198815139e-05, + "loss": 0.2737, "step": 3237000 }, { "epoch": 1.94, - "learning_rate": 4.244995962066222e-05, - "loss": 0.3578, + "learning_rate": 4.244984202259083e-05, + "loss": 0.2696, "step": 3237500 }, { "epoch": 1.94, - "learning_rate": 4.2447859655101656e-05, - "loss": 0.3579, + "learning_rate": 4.2447742057030266e-05, + "loss": 0.2753, "step": 3238000 }, { "epoch": 1.94, - "learning_rate": 4.2445759689541097e-05, - "loss": 0.3596, + "learning_rate": 4.244564629140082e-05, + "loss": 0.2718, "step": 3238500 }, { "epoch": 1.94, - "learning_rate": 4.244365972398052e-05, - "loss": 0.3607, + "learning_rate": 4.244354632584026e-05, + "loss": 0.2727, "step": 3239000 }, { "epoch": 1.94, - "learning_rate": 4.244155975841996e-05, - "loss": 0.3619, + "learning_rate": 4.244144636027969e-05, + "loss": 0.2765, "step": 3239500 }, { "epoch": 1.94, - "learning_rate": 4.24394597928594e-05, - "loss": 0.3651, + "learning_rate": 4.243934639471913e-05, + "loss": 0.2786, "step": 3240000 }, { "epoch": 1.94, - "learning_rate": 4.243736402722996e-05, - "loss": 0.3515, + "learning_rate": 4.243725062908968e-05, + "loss": 0.2649, "step": 3240500 }, { "epoch": 1.94, - "learning_rate": 4.243526406166939e-05, - "loss": 0.379, + "learning_rate": 4.243515066352912e-05, + "loss": 0.2786, "step": 3241000 }, { "epoch": 1.94, - "learning_rate": 4.243316409610882e-05, - "loss": 0.3665, + "learning_rate": 4.2433050697968554e-05, + "loss": 0.2788, "step": 3241500 }, { "epoch": 1.94, - "learning_rate": 4.243106413054826e-05, - "loss": 0.3595, + "learning_rate": 4.243095073240799e-05, + "loss": 0.2758, "step": 3242000 }, { "epoch": 1.94, - "learning_rate": 4.242896416498769e-05, - "loss": 0.3716, + "learning_rate": 4.242885076684743e-05, + "loss": 0.2736, "step": 3242500 }, { "epoch": 1.94, - "learning_rate": 4.2426864199427125e-05, - "loss": 0.3652, + "learning_rate": 4.242675500121798e-05, + "loss": 0.2786, "step": 3243000 }, { "epoch": 1.94, - "learning_rate": 4.2424764233866565e-05, - "loss": 0.3568, + "learning_rate": 4.2424655035657415e-05, + "loss": 0.2698, "step": 3243500 }, { "epoch": 1.94, - "learning_rate": 4.2422664268306e-05, - "loss": 0.3638, + "learning_rate": 4.242255507009685e-05, + "loss": 0.2731, "step": 3244000 }, { "epoch": 1.95, - "learning_rate": 4.242057270260768e-05, - "loss": 0.3734, + "learning_rate": 4.242045510453629e-05, + "loss": 0.2823, "step": 3244500 }, { "epoch": 1.95, - "learning_rate": 4.241847273704711e-05, - "loss": 0.3595, + "learning_rate": 4.241835933890684e-05, + "loss": 0.2759, "step": 3245000 }, { "epoch": 1.95, - "learning_rate": 4.2416376971417666e-05, - "loss": 0.3792, + "learning_rate": 4.2416259373346275e-05, + "loss": 0.2783, "step": 3245500 }, { "epoch": 1.95, - "learning_rate": 4.2414277005857106e-05, - "loss": 0.3656, + "learning_rate": 4.2414159407785716e-05, + "loss": 0.2761, "step": 3246000 }, { "epoch": 1.95, - "learning_rate": 4.241218544015878e-05, - "loss": 0.36, + "learning_rate": 4.241205944222515e-05, + "loss": 0.2758, "step": 3246500 }, { "epoch": 1.95, - "learning_rate": 4.241008547459822e-05, - "loss": 0.3595, + "learning_rate": 4.24099636765957e-05, + "loss": 0.2719, "step": 3247000 }, { "epoch": 1.95, - "learning_rate": 4.240798550903765e-05, - "loss": 0.3653, + "learning_rate": 4.2407863711035136e-05, + "loss": 0.2707, "step": 3247500 }, { "epoch": 1.95, - "learning_rate": 4.240588554347709e-05, - "loss": 0.3615, + "learning_rate": 4.2405763745474576e-05, + "loss": 0.2767, "step": 3248000 }, { "epoch": 1.95, - "learning_rate": 4.240378557791652e-05, - "loss": 0.3538, + "learning_rate": 4.240366377991401e-05, + "loss": 0.2678, "step": 3248500 }, { "epoch": 1.95, - "learning_rate": 4.2401685612355954e-05, - "loss": 0.3677, + "learning_rate": 4.240156381435344e-05, + "loss": 0.2758, "step": 3249000 }, { "epoch": 1.95, - "learning_rate": 4.2399585646795394e-05, - "loss": 0.3725, + "learning_rate": 4.2399468048724e-05, + "loss": 0.2752, "step": 3249500 }, { "epoch": 1.95, - "learning_rate": 4.239748568123483e-05, - "loss": 0.3538, + "learning_rate": 4.239736808316344e-05, + "loss": 0.2703, "step": 3250000 }, { "epoch": 1.95, - "learning_rate": 4.239538571567426e-05, - "loss": 0.3745, + "learning_rate": 4.239526811760287e-05, + "loss": 0.2775, "step": 3250500 }, { "epoch": 1.95, - "learning_rate": 4.23932857501137e-05, - "loss": 0.3572, + "learning_rate": 4.2393168152042304e-05, + "loss": 0.2696, "step": 3251000 }, { "epoch": 1.95, - "learning_rate": 4.2391185784553135e-05, - "loss": 0.3628, + "learning_rate": 4.2391068186481744e-05, + "loss": 0.2732, "step": 3251500 }, { "epoch": 1.95, - "learning_rate": 4.238908581899257e-05, - "loss": 0.366, + "learning_rate": 4.23889724208523e-05, + "loss": 0.2803, "step": 3252000 }, { "epoch": 1.95, - "learning_rate": 4.238698585343201e-05, - "loss": 0.3655, + "learning_rate": 4.238687245529173e-05, + "loss": 0.2759, "step": 3252500 }, { "epoch": 1.95, - "learning_rate": 4.238488588787144e-05, - "loss": 0.3631, + "learning_rate": 4.238477248973117e-05, + "loss": 0.2701, "step": 3253000 }, { "epoch": 1.95, - "learning_rate": 4.238278592231087e-05, - "loss": 0.3624, + "learning_rate": 4.2382672524170605e-05, + "loss": 0.2703, "step": 3253500 }, { "epoch": 1.95, - "learning_rate": 4.238068595675031e-05, - "loss": 0.3507, + "learning_rate": 4.238057255861004e-05, + "loss": 0.2723, "step": 3254000 }, { "epoch": 1.95, - "learning_rate": 4.237858599118974e-05, - "loss": 0.3612, + "learning_rate": 4.237847679298059e-05, + "loss": 0.2754, "step": 3254500 }, { "epoch": 1.95, - "learning_rate": 4.2376486025629176e-05, - "loss": 0.3562, + "learning_rate": 4.237637682742003e-05, + "loss": 0.2763, "step": 3255000 }, { "epoch": 1.95, - "learning_rate": 4.2374390259999736e-05, - "loss": 0.3699, + "learning_rate": 4.2374276861859466e-05, + "loss": 0.2743, "step": 3255500 }, { "epoch": 1.95, - "learning_rate": 4.237229029443917e-05, - "loss": 0.3608, + "learning_rate": 4.23721768962989e-05, + "loss": 0.2749, "step": 3256000 }, { "epoch": 1.95, - "learning_rate": 4.23701903288786e-05, - "loss": 0.3601, + "learning_rate": 4.237008113066945e-05, + "loss": 0.278, "step": 3256500 }, { "epoch": 1.95, - "learning_rate": 4.2368090363318036e-05, - "loss": 0.361, + "learning_rate": 4.236798116510889e-05, + "loss": 0.2774, "step": 3257000 }, { "epoch": 1.95, - "learning_rate": 4.2365990397757477e-05, - "loss": 0.3609, + "learning_rate": 4.2365881199548326e-05, + "loss": 0.2795, "step": 3257500 }, { "epoch": 1.95, - "learning_rate": 4.236389043219691e-05, - "loss": 0.3633, + "learning_rate": 4.236378123398776e-05, + "loss": 0.2729, "step": 3258000 }, { "epoch": 1.95, - "learning_rate": 4.2361794666567464e-05, - "loss": 0.3643, + "learning_rate": 4.236168546835832e-05, + "loss": 0.2786, "step": 3258500 }, { "epoch": 1.95, - "learning_rate": 4.2359694701006904e-05, - "loss": 0.3575, + "learning_rate": 4.2359585502797754e-05, + "loss": 0.2752, "step": 3259000 }, { "epoch": 1.95, - "learning_rate": 4.2357598935377464e-05, - "loss": 0.3615, + "learning_rate": 4.235748553723719e-05, + "loss": 0.2688, "step": 3259500 }, { "epoch": 1.95, - "learning_rate": 4.23554989698169e-05, - "loss": 0.3584, + "learning_rate": 4.235538557167663e-05, + "loss": 0.2697, "step": 3260000 }, { "epoch": 1.95, - "learning_rate": 4.2353399004256324e-05, - "loss": 0.3541, + "learning_rate": 4.235328560611606e-05, + "loss": 0.2739, "step": 3260500 }, { "epoch": 1.96, - "learning_rate": 4.2351299038695765e-05, - "loss": 0.3582, + "learning_rate": 4.2351189840486614e-05, + "loss": 0.2721, "step": 3261000 }, { "epoch": 1.96, - "learning_rate": 4.23491990731352e-05, - "loss": 0.363, + "learning_rate": 4.234908987492605e-05, + "loss": 0.2786, "step": 3261500 }, { "epoch": 1.96, - "learning_rate": 4.234709910757463e-05, - "loss": 0.3605, + "learning_rate": 4.234698990936549e-05, + "loss": 0.2695, "step": 3262000 }, { "epoch": 1.96, - "learning_rate": 4.234499914201407e-05, - "loss": 0.3661, + "learning_rate": 4.234489414373604e-05, + "loss": 0.275, "step": 3262500 }, { "epoch": 1.96, - "learning_rate": 4.2342899176453505e-05, - "loss": 0.3453, + "learning_rate": 4.2342794178175475e-05, + "loss": 0.2653, "step": 3263000 }, { "epoch": 1.96, - "learning_rate": 4.234079921089294e-05, - "loss": 0.3768, + "learning_rate": 4.234069421261491e-05, + "loss": 0.2811, "step": 3263500 }, { "epoch": 1.96, - "learning_rate": 4.233869924533238e-05, - "loss": 0.353, + "learning_rate": 4.233859424705435e-05, + "loss": 0.2751, "step": 3264000 }, { "epoch": 1.96, - "learning_rate": 4.233659927977181e-05, - "loss": 0.357, + "learning_rate": 4.233649428149378e-05, + "loss": 0.2699, "step": 3264500 }, { "epoch": 1.96, - "learning_rate": 4.2334503514142366e-05, - "loss": 0.3658, + "learning_rate": 4.2334394315933216e-05, + "loss": 0.2782, "step": 3265000 }, { "epoch": 1.96, - "learning_rate": 4.23324035485818e-05, - "loss": 0.3747, + "learning_rate": 4.2332294350372656e-05, + "loss": 0.2786, "step": 3265500 }, { "epoch": 1.96, - "learning_rate": 4.233030358302124e-05, - "loss": 0.364, + "learning_rate": 4.233019438481209e-05, + "loss": 0.2774, "step": 3266000 }, { "epoch": 1.96, - "learning_rate": 4.232820361746067e-05, - "loss": 0.3613, + "learning_rate": 4.232809861918264e-05, + "loss": 0.2745, "step": 3266500 }, { "epoch": 1.96, - "learning_rate": 4.232610785183123e-05, - "loss": 0.3729, + "learning_rate": 4.232599865362208e-05, + "loss": 0.2748, "step": 3267000 }, { "epoch": 1.96, - "learning_rate": 4.232400788627067e-05, - "loss": 0.3606, + "learning_rate": 4.232390288799264e-05, + "loss": 0.2742, "step": 3267500 }, { "epoch": 1.96, - "learning_rate": 4.23219079207101e-05, - "loss": 0.3624, + "learning_rate": 4.232180292243207e-05, + "loss": 0.2768, "step": 3268000 }, { "epoch": 1.96, - "learning_rate": 4.2319807955149534e-05, - "loss": 0.364, + "learning_rate": 4.2319702956871504e-05, + "loss": 0.2728, "step": 3268500 }, { "epoch": 1.96, - "learning_rate": 4.2317707989588974e-05, - "loss": 0.3573, + "learning_rate": 4.2317602991310944e-05, + "loss": 0.2712, "step": 3269000 }, { "epoch": 1.96, - "learning_rate": 4.231560802402841e-05, - "loss": 0.3696, + "learning_rate": 4.231550302575038e-05, + "loss": 0.2731, "step": 3269500 }, { "epoch": 1.96, - "learning_rate": 4.231350805846784e-05, - "loss": 0.3617, + "learning_rate": 4.231340306018981e-05, + "loss": 0.2738, "step": 3270000 }, { "epoch": 1.96, - "learning_rate": 4.231140809290728e-05, - "loss": 0.3592, + "learning_rate": 4.231130309462925e-05, + "loss": 0.2723, "step": 3270500 }, { "epoch": 1.96, - "learning_rate": 4.2309312327277835e-05, - "loss": 0.3618, + "learning_rate": 4.2309203129068685e-05, + "loss": 0.2695, "step": 3271000 }, { "epoch": 1.96, - "learning_rate": 4.230721236171727e-05, - "loss": 0.353, + "learning_rate": 4.230710316350812e-05, + "loss": 0.2712, "step": 3271500 }, { "epoch": 1.96, - "learning_rate": 4.23051123961567e-05, - "loss": 0.3611, + "learning_rate": 4.230500739787867e-05, + "loss": 0.2739, "step": 3272000 }, { "epoch": 1.96, - "learning_rate": 4.230301243059614e-05, - "loss": 0.3608, + "learning_rate": 4.230290743231811e-05, + "loss": 0.2713, "step": 3272500 }, { "epoch": 1.96, - "learning_rate": 4.230091246503557e-05, - "loss": 0.3597, + "learning_rate": 4.2300807466757545e-05, + "loss": 0.27, "step": 3273000 }, { "epoch": 1.96, - "learning_rate": 4.229881249947501e-05, - "loss": 0.3555, + "learning_rate": 4.2298707501196986e-05, + "loss": 0.275, "step": 3273500 }, { "epoch": 1.96, - "learning_rate": 4.229671253391444e-05, - "loss": 0.361, + "learning_rate": 4.229661593549865e-05, + "loss": 0.2707, "step": 3274000 }, { "epoch": 1.96, - "learning_rate": 4.2294616768285e-05, - "loss": 0.3682, + "learning_rate": 4.229451596993809e-05, + "loss": 0.2729, "step": 3274500 }, { "epoch": 1.96, - "learning_rate": 4.2292516802724436e-05, - "loss": 0.3557, + "learning_rate": 4.2292416004377526e-05, + "loss": 0.2709, "step": 3275000 }, { "epoch": 1.96, - "learning_rate": 4.229041683716387e-05, - "loss": 0.3583, + "learning_rate": 4.229031603881696e-05, + "loss": 0.2758, "step": 3275500 }, { "epoch": 1.96, - "learning_rate": 4.22883168716033e-05, - "loss": 0.3672, + "learning_rate": 4.22882160732564e-05, + "loss": 0.2725, "step": 3276000 }, { "epoch": 1.96, - "learning_rate": 4.2286216906042736e-05, - "loss": 0.359, + "learning_rate": 4.228611610769583e-05, + "loss": 0.2706, "step": 3276500 }, { "epoch": 1.96, - "learning_rate": 4.22841211404133e-05, - "loss": 0.3572, + "learning_rate": 4.228401614213527e-05, + "loss": 0.2714, "step": 3277000 }, { "epoch": 1.96, - "learning_rate": 4.228202117485274e-05, - "loss": 0.3593, + "learning_rate": 4.228192037650582e-05, + "loss": 0.2742, "step": 3277500 }, { "epoch": 1.97, - "learning_rate": 4.2279921209292164e-05, - "loss": 0.3679, + "learning_rate": 4.227982041094526e-05, + "loss": 0.2771, "step": 3278000 }, { "epoch": 1.97, - "learning_rate": 4.22778212437316e-05, - "loss": 0.3659, + "learning_rate": 4.2277720445384694e-05, + "loss": 0.2793, "step": 3278500 }, { "epoch": 1.97, - "learning_rate": 4.227572127817104e-05, - "loss": 0.3583, + "learning_rate": 4.227562047982413e-05, + "loss": 0.2712, "step": 3279000 }, { "epoch": 1.97, - "learning_rate": 4.227362131261047e-05, - "loss": 0.351, + "learning_rate": 4.227352471419469e-05, + "loss": 0.2714, "step": 3279500 }, { "epoch": 1.97, - "learning_rate": 4.227152554698103e-05, - "loss": 0.3718, + "learning_rate": 4.227142474863412e-05, + "loss": 0.2749, "step": 3280000 }, { "epoch": 1.97, - "learning_rate": 4.2269425581420465e-05, - "loss": 0.3554, + "learning_rate": 4.2269324783073555e-05, + "loss": 0.2731, "step": 3280500 }, { "epoch": 1.97, - "learning_rate": 4.22673256158599e-05, - "loss": 0.3614, + "learning_rate": 4.2267224817512995e-05, + "loss": 0.2713, "step": 3281000 }, { "epoch": 1.97, - "learning_rate": 4.226522565029933e-05, - "loss": 0.3609, + "learning_rate": 4.226512485195243e-05, + "loss": 0.2787, "step": 3281500 }, { "epoch": 1.97, - "learning_rate": 4.226312988466989e-05, - "loss": 0.3661, + "learning_rate": 4.226302488639186e-05, + "loss": 0.2766, "step": 3282000 }, { "epoch": 1.97, - "learning_rate": 4.2261034119040446e-05, - "loss": 0.3505, + "learning_rate": 4.22609249208313e-05, + "loss": 0.2751, "step": 3282500 }, { "epoch": 1.97, - "learning_rate": 4.2258934153479886e-05, - "loss": 0.3607, + "learning_rate": 4.2258824955270736e-05, + "loss": 0.2734, "step": 3283000 }, { "epoch": 1.97, - "learning_rate": 4.225683418791932e-05, - "loss": 0.3661, + "learning_rate": 4.225672918964129e-05, + "loss": 0.2792, "step": 3283500 }, { "epoch": 1.97, - "learning_rate": 4.225473422235875e-05, - "loss": 0.3656, + "learning_rate": 4.225462922408072e-05, + "loss": 0.2805, "step": 3284000 }, { "epoch": 1.97, - "learning_rate": 4.225263425679819e-05, - "loss": 0.3582, + "learning_rate": 4.225252925852016e-05, + "loss": 0.2736, "step": 3284500 }, { "epoch": 1.97, - "learning_rate": 4.225053429123762e-05, - "loss": 0.363, + "learning_rate": 4.2250429292959596e-05, + "loss": 0.2769, "step": 3285000 }, { "epoch": 1.97, - "learning_rate": 4.224843432567705e-05, - "loss": 0.3613, + "learning_rate": 4.224832932739903e-05, + "loss": 0.2704, "step": 3285500 }, { "epoch": 1.97, - "learning_rate": 4.224633436011649e-05, - "loss": 0.3649, + "learning_rate": 4.224623356176959e-05, + "loss": 0.2775, "step": 3286000 }, { "epoch": 1.97, - "learning_rate": 4.224423439455593e-05, - "loss": 0.3628, + "learning_rate": 4.2244137796140144e-05, + "loss": 0.2746, "step": 3286500 }, { "epoch": 1.97, - "learning_rate": 4.224213442899536e-05, - "loss": 0.3572, + "learning_rate": 4.224203783057958e-05, + "loss": 0.27, "step": 3287000 }, { "epoch": 1.97, - "learning_rate": 4.22400344634348e-05, - "loss": 0.3549, + "learning_rate": 4.223993786501901e-05, + "loss": 0.2685, "step": 3287500 }, { "epoch": 1.97, - "learning_rate": 4.2237934497874234e-05, - "loss": 0.3693, + "learning_rate": 4.223783789945845e-05, + "loss": 0.2827, "step": 3288000 }, { "epoch": 1.97, - "learning_rate": 4.223583873224479e-05, - "loss": 0.3684, + "learning_rate": 4.2235742133829004e-05, + "loss": 0.2788, "step": 3288500 }, { "epoch": 1.97, - "learning_rate": 4.223373876668423e-05, - "loss": 0.3578, + "learning_rate": 4.223364216826844e-05, + "loss": 0.2759, "step": 3289000 }, { "epoch": 1.97, - "learning_rate": 4.223163880112366e-05, - "loss": 0.3549, + "learning_rate": 4.223154220270787e-05, + "loss": 0.2736, "step": 3289500 }, { "epoch": 1.97, - "learning_rate": 4.2229538835563095e-05, - "loss": 0.3506, + "learning_rate": 4.222944223714731e-05, + "loss": 0.2721, "step": 3290000 }, { "epoch": 1.97, - "learning_rate": 4.2227438870002535e-05, - "loss": 0.3606, + "learning_rate": 4.2227342271586745e-05, + "loss": 0.2779, "step": 3290500 }, { "epoch": 1.97, - "learning_rate": 4.222534310437309e-05, - "loss": 0.3573, + "learning_rate": 4.22252465059573e-05, + "loss": 0.2759, "step": 3291000 }, { "epoch": 1.97, - "learning_rate": 4.222324313881252e-05, - "loss": 0.3749, + "learning_rate": 4.222314654039673e-05, + "loss": 0.2827, "step": 3291500 }, { "epoch": 1.97, - "learning_rate": 4.222114737318308e-05, - "loss": 0.3619, + "learning_rate": 4.222104657483617e-05, + "loss": 0.2746, "step": 3292000 }, { "epoch": 1.97, - "learning_rate": 4.221904740762251e-05, - "loss": 0.3624, + "learning_rate": 4.2218946609275606e-05, + "loss": 0.2764, "step": 3292500 }, { "epoch": 1.97, - "learning_rate": 4.221694744206195e-05, - "loss": 0.3684, + "learning_rate": 4.2216846643715046e-05, + "loss": 0.2787, "step": 3293000 }, { "epoch": 1.97, - "learning_rate": 4.221484747650138e-05, - "loss": 0.3564, + "learning_rate": 4.221474667815448e-05, + "loss": 0.2728, "step": 3293500 }, { "epoch": 1.97, - "learning_rate": 4.2212747510940816e-05, - "loss": 0.3519, + "learning_rate": 4.221265091252503e-05, + "loss": 0.2699, "step": 3294000 }, { "epoch": 1.98, - "learning_rate": 4.2210651745311376e-05, - "loss": 0.3635, + "learning_rate": 4.2210550946964467e-05, + "loss": 0.2738, "step": 3294500 }, { "epoch": 1.98, - "learning_rate": 4.220855177975081e-05, - "loss": 0.3519, + "learning_rate": 4.220845098140391e-05, + "loss": 0.2652, "step": 3295000 }, { "epoch": 1.98, - "learning_rate": 4.220645181419024e-05, - "loss": 0.3731, + "learning_rate": 4.220635101584334e-05, + "loss": 0.2792, "step": 3295500 }, { "epoch": 1.98, - "learning_rate": 4.2204351848629684e-05, - "loss": 0.3678, + "learning_rate": 4.2204251050282774e-05, + "loss": 0.2816, "step": 3296000 }, { "epoch": 1.98, - "learning_rate": 4.2202256083000244e-05, - "loss": 0.3692, + "learning_rate": 4.2202151084722214e-05, + "loss": 0.2797, "step": 3296500 }, { "epoch": 1.98, - "learning_rate": 4.220015611743967e-05, - "loss": 0.3598, + "learning_rate": 4.220005111916165e-05, + "loss": 0.278, "step": 3297000 }, { "epoch": 1.98, - "learning_rate": 4.2198056151879104e-05, - "loss": 0.357, + "learning_rate": 4.219795115360108e-05, + "loss": 0.2714, "step": 3297500 }, { "epoch": 1.98, - "learning_rate": 4.2195956186318544e-05, - "loss": 0.3647, + "learning_rate": 4.2195851188040514e-05, + "loss": 0.2747, "step": 3298000 }, { "epoch": 1.98, - "learning_rate": 4.219385622075798e-05, - "loss": 0.3591, + "learning_rate": 4.2193755422411075e-05, + "loss": 0.271, "step": 3298500 }, { "epoch": 1.98, - "learning_rate": 4.219175625519741e-05, - "loss": 0.3622, + "learning_rate": 4.219165545685051e-05, + "loss": 0.2742, "step": 3299000 }, { "epoch": 1.98, - "learning_rate": 4.2189660489567965e-05, - "loss": 0.3644, + "learning_rate": 4.218955969122106e-05, + "loss": 0.275, "step": 3299500 }, { "epoch": 1.98, - "learning_rate": 4.2187564723938525e-05, - "loss": 0.3805, + "learning_rate": 4.21874597256605e-05, + "loss": 0.2783, "step": 3300000 }, { "epoch": 1.98, - "eval_loss": 0.34579384326934814, - "eval_runtime": 1120.1725, - "eval_samples_per_second": 470.213, - "eval_steps_per_second": 78.369, + "eval_loss": 0.2493673712015152, + "eval_runtime": 1464.6104, + "eval_samples_per_second": 359.631, + "eval_steps_per_second": 59.939, "step": 3300000 }, { "epoch": 1.98, - "learning_rate": 4.2185464758377965e-05, - "loss": 0.3607, + "learning_rate": 4.2185359760099935e-05, + "loss": 0.2738, "step": 3300500 }, { "epoch": 1.98, - "learning_rate": 4.21833647928174e-05, - "loss": 0.3694, + "learning_rate": 4.218325979453937e-05, + "loss": 0.2761, "step": 3301000 }, { "epoch": 1.98, - "learning_rate": 4.218126482725683e-05, - "loss": 0.3614, + "learning_rate": 4.218115982897881e-05, + "loss": 0.271, "step": 3301500 }, { "epoch": 1.98, - "learning_rate": 4.2179164861696266e-05, - "loss": 0.3571, + "learning_rate": 4.217905986341824e-05, + "loss": 0.28, "step": 3302000 }, { "epoch": 1.98, - "learning_rate": 4.21770648961357e-05, - "loss": 0.3564, + "learning_rate": 4.2176959897857676e-05, + "loss": 0.2729, "step": 3302500 }, { "epoch": 1.98, - "learning_rate": 4.217496493057514e-05, - "loss": 0.3531, + "learning_rate": 4.217485993229711e-05, + "loss": 0.2684, "step": 3303000 }, { "epoch": 1.98, - "learning_rate": 4.217286496501457e-05, - "loss": 0.3571, + "learning_rate": 4.217276416666767e-05, + "loss": 0.2681, "step": 3303500 }, { "epoch": 1.98, - "learning_rate": 4.2170764999454006e-05, - "loss": 0.3514, + "learning_rate": 4.21706642011071e-05, + "loss": 0.2692, "step": 3304000 }, { "epoch": 1.98, - "learning_rate": 4.216866503389345e-05, - "loss": 0.3602, + "learning_rate": 4.216856423554654e-05, + "loss": 0.2726, "step": 3304500 }, { "epoch": 1.98, - "learning_rate": 4.216656506833288e-05, - "loss": 0.3608, + "learning_rate": 4.216646426998598e-05, + "loss": 0.2787, "step": 3305000 }, { "epoch": 1.98, - "learning_rate": 4.2164465102772314e-05, - "loss": 0.3678, + "learning_rate": 4.2164364304425404e-05, + "loss": 0.278, "step": 3305500 }, { "epoch": 1.98, - "learning_rate": 4.216236933714287e-05, - "loss": 0.3614, + "learning_rate": 4.2162268538795964e-05, + "loss": 0.2715, "step": 3306000 }, { "epoch": 1.98, - "learning_rate": 4.216026937158231e-05, - "loss": 0.3637, + "learning_rate": 4.21601685732354e-05, + "loss": 0.2741, "step": 3306500 }, { "epoch": 1.98, - "learning_rate": 4.215817360595286e-05, - "loss": 0.3667, + "learning_rate": 4.215806860767484e-05, + "loss": 0.2775, "step": 3307000 }, { "epoch": 1.98, - "learning_rate": 4.2156073640392294e-05, - "loss": 0.3603, + "learning_rate": 4.2155968642114264e-05, + "loss": 0.276, "step": 3307500 }, { "epoch": 1.98, - "learning_rate": 4.215397367483173e-05, - "loss": 0.3588, + "learning_rate": 4.2153872876484825e-05, + "loss": 0.2741, "step": 3308000 }, { "epoch": 1.98, - "learning_rate": 4.215187370927117e-05, - "loss": 0.3624, + "learning_rate": 4.2151772910924265e-05, + "loss": 0.2745, "step": 3308500 }, { "epoch": 1.98, - "learning_rate": 4.21497737437106e-05, - "loss": 0.3627, + "learning_rate": 4.21496729453637e-05, + "loss": 0.2749, "step": 3309000 }, { "epoch": 1.98, - "learning_rate": 4.2147673778150035e-05, - "loss": 0.3651, + "learning_rate": 4.214757297980313e-05, + "loss": 0.2716, "step": 3309500 }, { "epoch": 1.98, - "learning_rate": 4.2145578012520595e-05, - "loss": 0.3631, + "learning_rate": 4.2145477214173685e-05, + "loss": 0.2729, "step": 3310000 }, { "epoch": 1.98, - "learning_rate": 4.214347804696003e-05, - "loss": 0.3606, + "learning_rate": 4.2143377248613126e-05, + "loss": 0.2738, "step": 3310500 }, { "epoch": 1.99, - "learning_rate": 4.214137808139946e-05, - "loss": 0.3634, + "learning_rate": 4.214127728305256e-05, + "loss": 0.2757, "step": 3311000 }, { "epoch": 1.99, - "learning_rate": 4.21392781158389e-05, - "loss": 0.3516, + "learning_rate": 4.213917731749199e-05, + "loss": 0.2717, "step": 3311500 }, { "epoch": 1.99, - "learning_rate": 4.2137178150278336e-05, - "loss": 0.355, + "learning_rate": 4.2137081551862546e-05, + "loss": 0.2725, "step": 3312000 }, { "epoch": 1.99, - "learning_rate": 4.213507818471777e-05, - "loss": 0.3601, + "learning_rate": 4.2134981586301986e-05, + "loss": 0.2723, "step": 3312500 }, { "epoch": 1.99, - "learning_rate": 4.213298241908832e-05, - "loss": 0.3642, + "learning_rate": 4.213288162074142e-05, + "loss": 0.2714, "step": 3313000 }, { "epoch": 1.99, - "learning_rate": 4.213088245352776e-05, - "loss": 0.3616, + "learning_rate": 4.213078165518085e-05, + "loss": 0.2737, "step": 3313500 }, { "epoch": 1.99, - "learning_rate": 4.21287824879672e-05, - "loss": 0.3675, + "learning_rate": 4.2128681689620294e-05, + "loss": 0.2748, "step": 3314000 }, { "epoch": 1.99, - "learning_rate": 4.212668252240663e-05, - "loss": 0.3652, + "learning_rate": 4.212658172405973e-05, + "loss": 0.277, "step": 3314500 }, { "epoch": 1.99, - "learning_rate": 4.212458255684607e-05, - "loss": 0.3533, + "learning_rate": 4.212448175849916e-05, + "loss": 0.2745, "step": 3315000 }, { "epoch": 1.99, - "learning_rate": 4.2122482591285504e-05, - "loss": 0.3666, + "learning_rate": 4.2122381792938594e-05, + "loss": 0.2751, "step": 3315500 }, { "epoch": 1.99, - "learning_rate": 4.212038262572494e-05, - "loss": 0.3579, + "learning_rate": 4.2120286027309154e-05, + "loss": 0.2699, "step": 3316000 }, { "epoch": 1.99, - "learning_rate": 4.211828266016438e-05, - "loss": 0.363, + "learning_rate": 4.211818606174859e-05, + "loss": 0.2762, "step": 3316500 }, { "epoch": 1.99, - "learning_rate": 4.211618689453493e-05, - "loss": 0.3626, + "learning_rate": 4.211608609618802e-05, + "loss": 0.2747, "step": 3317000 }, { "epoch": 1.99, - "learning_rate": 4.2114086928974365e-05, - "loss": 0.3603, + "learning_rate": 4.2113986130627455e-05, + "loss": 0.2751, "step": 3317500 }, { "epoch": 1.99, - "learning_rate": 4.21119869634138e-05, - "loss": 0.3699, + "learning_rate": 4.2111890364998015e-05, + "loss": 0.2768, "step": 3318000 }, { "epoch": 1.99, - "learning_rate": 4.210988699785324e-05, - "loss": 0.3706, + "learning_rate": 4.210979039943745e-05, + "loss": 0.2797, "step": 3318500 }, { "epoch": 1.99, - "learning_rate": 4.210778703229267e-05, - "loss": 0.3706, + "learning_rate": 4.210769043387689e-05, + "loss": 0.2791, "step": 3319000 }, { "epoch": 1.99, - "learning_rate": 4.2105687066732105e-05, - "loss": 0.3628, + "learning_rate": 4.210559886817856e-05, + "loss": 0.281, "step": 3319500 }, { "epoch": 1.99, - "learning_rate": 4.210358710117154e-05, - "loss": 0.3655, + "learning_rate": 4.2103498902617996e-05, + "loss": 0.2768, "step": 3320000 }, { "epoch": 1.99, - "learning_rate": 4.210148713561097e-05, - "loss": 0.3576, + "learning_rate": 4.210139893705743e-05, + "loss": 0.2691, "step": 3320500 }, { "epoch": 1.99, - "learning_rate": 4.209939556991265e-05, - "loss": 0.3685, + "learning_rate": 4.209929897149687e-05, + "loss": 0.2823, "step": 3321000 }, { "epoch": 1.99, - "learning_rate": 4.2097295604352086e-05, - "loss": 0.3583, + "learning_rate": 4.20971990059363e-05, + "loss": 0.272, "step": 3321500 }, { "epoch": 1.99, - "learning_rate": 4.2095195638791526e-05, - "loss": 0.3632, + "learning_rate": 4.2095099040375736e-05, + "loss": 0.2757, "step": 3322000 }, { "epoch": 1.99, - "learning_rate": 4.209309567323096e-05, - "loss": 0.3645, + "learning_rate": 4.209299907481518e-05, + "loss": 0.2728, "step": 3322500 }, { "epoch": 1.99, - "learning_rate": 4.209099570767039e-05, - "loss": 0.3538, + "learning_rate": 4.209089910925461e-05, + "loss": 0.2698, "step": 3323000 }, { "epoch": 1.99, - "learning_rate": 4.2088895742109833e-05, - "loss": 0.3573, + "learning_rate": 4.2088799143694044e-05, + "loss": 0.2738, "step": 3323500 }, { "epoch": 1.99, - "learning_rate": 4.208679577654926e-05, - "loss": 0.3662, + "learning_rate": 4.2086699178133484e-05, + "loss": 0.2751, "step": 3324000 }, { "epoch": 1.99, - "learning_rate": 4.2084695810988694e-05, - "loss": 0.3514, + "learning_rate": 4.208459921257291e-05, + "loss": 0.2707, "step": 3324500 }, { "epoch": 1.99, - "learning_rate": 4.2082600045359254e-05, - "loss": 0.3643, + "learning_rate": 4.2082499247012344e-05, + "loss": 0.2735, "step": 3325000 }, { "epoch": 1.99, - "learning_rate": 4.2080504279729814e-05, - "loss": 0.3586, + "learning_rate": 4.2080403481382904e-05, + "loss": 0.2725, "step": 3325500 }, { "epoch": 1.99, - "learning_rate": 4.207840431416925e-05, - "loss": 0.3654, + "learning_rate": 4.2078303515822345e-05, + "loss": 0.2743, "step": 3326000 }, { "epoch": 1.99, - "learning_rate": 4.20763085485398e-05, - "loss": 0.3687, + "learning_rate": 4.207620355026177e-05, + "loss": 0.2799, "step": 3326500 }, { "epoch": 1.99, - "learning_rate": 4.2074208582979235e-05, - "loss": 0.3581, + "learning_rate": 4.2074103584701205e-05, + "loss": 0.2708, "step": 3327000 }, { "epoch": 1.99, - "learning_rate": 4.2072108617418675e-05, - "loss": 0.3599, + "learning_rate": 4.2072003619140645e-05, + "loss": 0.2707, "step": 3327500 }, { "epoch": 2.0, - "learning_rate": 4.207000865185811e-05, - "loss": 0.3575, + "learning_rate": 4.2069907853511205e-05, + "loss": 0.2721, "step": 3328000 }, { "epoch": 2.0, - "learning_rate": 4.206790868629754e-05, - "loss": 0.3627, + "learning_rate": 4.206780788795064e-05, + "loss": 0.2701, "step": 3328500 }, { "epoch": 2.0, - "learning_rate": 4.206580872073698e-05, - "loss": 0.3653, + "learning_rate": 4.206570792239007e-05, + "loss": 0.2768, "step": 3329000 }, { "epoch": 2.0, - "learning_rate": 4.2063708755176416e-05, - "loss": 0.3603, + "learning_rate": 4.2063607956829506e-05, + "loss": 0.2742, "step": 3329500 }, { "epoch": 2.0, - "learning_rate": 4.206160878961585e-05, - "loss": 0.3689, + "learning_rate": 4.2061512191200066e-05, + "loss": 0.277, "step": 3330000 }, { "epoch": 2.0, - "learning_rate": 4.205950882405529e-05, - "loss": 0.3625, + "learning_rate": 4.20594122256395e-05, + "loss": 0.2707, "step": 3330500 }, { "epoch": 2.0, - "learning_rate": 4.2057408858494716e-05, - "loss": 0.3641, + "learning_rate": 4.205731646001005e-05, + "loss": 0.2747, "step": 3331000 }, { "epoch": 2.0, - "learning_rate": 4.2055313092865276e-05, - "loss": 0.3554, + "learning_rate": 4.205521649444949e-05, + "loss": 0.2737, "step": 3331500 }, { "epoch": 2.0, - "learning_rate": 4.2053213127304717e-05, - "loss": 0.3631, + "learning_rate": 4.205311652888893e-05, + "loss": 0.2698, "step": 3332000 }, { "epoch": 2.0, - "learning_rate": 4.205111316174415e-05, - "loss": 0.354, + "learning_rate": 4.205101656332836e-05, + "loss": 0.2676, "step": 3332500 }, { "epoch": 2.0, - "learning_rate": 4.2049013196183583e-05, - "loss": 0.3631, + "learning_rate": 4.20489165977678e-05, + "loss": 0.2718, "step": 3333000 }, { "epoch": 2.0, - "learning_rate": 4.204691743055414e-05, - "loss": 0.3657, + "learning_rate": 4.2046816632207234e-05, + "loss": 0.2781, "step": 3333500 }, { "epoch": 2.0, - "learning_rate": 4.204481746499358e-05, - "loss": 0.3536, + "learning_rate": 4.204471666664666e-05, + "loss": 0.2721, "step": 3334000 }, { "epoch": 2.0, - "learning_rate": 4.204271749943301e-05, - "loss": 0.3682, + "learning_rate": 4.20426167010861e-05, + "loss": 0.2806, "step": 3334500 }, { "epoch": 2.0, - "learning_rate": 4.2040621733803564e-05, - "loss": 0.3757, + "learning_rate": 4.204052093545666e-05, + "loss": 0.2819, "step": 3335000 }, { "epoch": 2.0, - "learning_rate": 4.2038521768243e-05, - "loss": 0.3511, + "learning_rate": 4.2038420969896095e-05, + "loss": 0.2719, "step": 3335500 }, { "epoch": 2.0, - "learning_rate": 4.203642180268244e-05, - "loss": 0.3625, + "learning_rate": 4.203632100433553e-05, + "loss": 0.2746, "step": 3336000 }, { "epoch": 2.0, - "learning_rate": 4.203432183712187e-05, - "loss": 0.3516, + "learning_rate": 4.203422103877496e-05, + "loss": 0.2676, "step": 3336500 }, { "epoch": 2.0, - "learning_rate": 4.2032221871561305e-05, - "loss": 0.3507, + "learning_rate": 4.203212527314552e-05, + "loss": 0.2674, "step": 3337000 }, { "epoch": 2.0, - "learning_rate": 4.2030121906000745e-05, - "loss": 0.354, + "learning_rate": 4.2030025307584955e-05, + "loss": 0.2729, "step": 3337500 }, { "epoch": 2.0, - "learning_rate": 4.202802194044018e-05, - "loss": 0.3475, + "learning_rate": 4.2027925342024396e-05, + "loss": 0.2663, "step": 3338000 }, { "epoch": 2.0, - "learning_rate": 4.202592617481073e-05, - "loss": 0.3563, + "learning_rate": 4.202582537646382e-05, + "loss": 0.275, "step": 3338500 }, { "epoch": 2.0, - "learning_rate": 4.202382620925017e-05, - "loss": 0.3444, + "learning_rate": 4.202372961083438e-05, + "loss": 0.265, "step": 3339000 }, { "epoch": 2.0, - "learning_rate": 4.2021726243689606e-05, - "loss": 0.354, + "learning_rate": 4.2021629645273816e-05, + "loss": 0.2676, "step": 3339500 }, { "epoch": 2.0, - "learning_rate": 4.201962627812904e-05, - "loss": 0.3498, + "learning_rate": 4.2019529679713256e-05, + "loss": 0.2644, "step": 3340000 }, { "epoch": 2.0, - "learning_rate": 4.201752631256847e-05, - "loss": 0.3572, + "learning_rate": 4.201742971415269e-05, + "loss": 0.2755, "step": 3340500 }, { "epoch": 2.0, - "learning_rate": 4.2015426347007906e-05, - "loss": 0.3506, + "learning_rate": 4.2015329748592116e-05, + "loss": 0.2651, "step": 3341000 }, { "epoch": 2.0, - "learning_rate": 4.201332638144734e-05, - "loss": 0.3627, + "learning_rate": 4.201323398296268e-05, + "loss": 0.271, "step": 3341500 }, { "epoch": 2.0, - "learning_rate": 4.201122641588678e-05, - "loss": 0.3527, + "learning_rate": 4.201113401740212e-05, + "loss": 0.269, "step": 3342000 }, { "epoch": 2.0, - "learning_rate": 4.2009126450326213e-05, - "loss": 0.3512, + "learning_rate": 4.200903405184155e-05, + "loss": 0.2678, "step": 3342500 }, { "epoch": 2.0, - "learning_rate": 4.200702648476565e-05, - "loss": 0.3599, + "learning_rate": 4.2006934086280984e-05, + "loss": 0.2704, "step": 3343000 }, { "epoch": 2.0, - "learning_rate": 4.20049307191362e-05, - "loss": 0.3481, + "learning_rate": 4.2004838320651544e-05, + "loss": 0.2718, "step": 3343500 }, { "epoch": 2.0, - "learning_rate": 4.200283075357564e-05, - "loss": 0.3587, + "learning_rate": 4.200273835509098e-05, + "loss": 0.275, "step": 3344000 }, { "epoch": 2.01, - "learning_rate": 4.2000730788015074e-05, - "loss": 0.3445, + "learning_rate": 4.200063838953041e-05, + "loss": 0.266, "step": 3344500 }, { "epoch": 2.01, - "learning_rate": 4.199863082245451e-05, - "loss": 0.3484, + "learning_rate": 4.199853842396985e-05, + "loss": 0.2641, "step": 3345000 }, { "epoch": 2.01, - "learning_rate": 4.199653505682507e-05, - "loss": 0.3571, + "learning_rate": 4.1996442658340405e-05, + "loss": 0.2705, "step": 3345500 }, { "epoch": 2.01, - "learning_rate": 4.19944350912645e-05, - "loss": 0.3543, + "learning_rate": 4.199434269277984e-05, + "loss": 0.2686, "step": 3346000 }, { "epoch": 2.01, - "learning_rate": 4.1992335125703935e-05, - "loss": 0.3435, + "learning_rate": 4.199224272721927e-05, + "loss": 0.2695, "step": 3346500 }, { "epoch": 2.01, - "learning_rate": 4.1990235160143375e-05, - "loss": 0.3564, + "learning_rate": 4.199014276165871e-05, + "loss": 0.2673, "step": 3347000 }, { "epoch": 2.01, - "learning_rate": 4.1988139394513935e-05, - "loss": 0.361, + "learning_rate": 4.1988046996029266e-05, + "loss": 0.2724, "step": 3347500 }, { "epoch": 2.01, - "learning_rate": 4.198603942895336e-05, - "loss": 0.3691, + "learning_rate": 4.19859470304687e-05, + "loss": 0.2733, "step": 3348000 }, { "epoch": 2.01, - "learning_rate": 4.1983939463392796e-05, - "loss": 0.3596, + "learning_rate": 4.198384706490813e-05, + "loss": 0.27, "step": 3348500 }, { "epoch": 2.01, - "learning_rate": 4.1981839497832236e-05, - "loss": 0.3428, + "learning_rate": 4.198174709934757e-05, + "loss": 0.2619, "step": 3349000 }, { "epoch": 2.01, - "learning_rate": 4.197973953227167e-05, - "loss": 0.3525, + "learning_rate": 4.1979647133787006e-05, + "loss": 0.2723, "step": 3349500 }, { "epoch": 2.01, - "learning_rate": 4.19776395667111e-05, - "loss": 0.3508, + "learning_rate": 4.197754716822644e-05, + "loss": 0.2715, "step": 3350000 }, { "epoch": 2.01, - "learning_rate": 4.1975543801081656e-05, - "loss": 0.3436, + "learning_rate": 4.197544720266587e-05, + "loss": 0.2639, "step": 3350500 }, { "epoch": 2.01, - "learning_rate": 4.1973443835521097e-05, - "loss": 0.3478, + "learning_rate": 4.1973351437036434e-05, + "loss": 0.2628, "step": 3351000 }, { "epoch": 2.01, - "learning_rate": 4.197134386996053e-05, - "loss": 0.3598, + "learning_rate": 4.197125147147587e-05, + "loss": 0.2775, "step": 3351500 }, { "epoch": 2.01, - "learning_rate": 4.1969243904399963e-05, - "loss": 0.3591, + "learning_rate": 4.196915150591531e-05, + "loss": 0.271, "step": 3352000 }, { "epoch": 2.01, - "learning_rate": 4.1967143938839404e-05, - "loss": 0.3491, + "learning_rate": 4.196705154035474e-05, + "loss": 0.2686, "step": 3352500 }, { "epoch": 2.01, - "learning_rate": 4.196504397327884e-05, - "loss": 0.3541, + "learning_rate": 4.196495157479417e-05, + "loss": 0.2702, "step": 3353000 }, { "epoch": 2.01, - "learning_rate": 4.196294820764939e-05, - "loss": 0.3527, + "learning_rate": 4.196285160923361e-05, + "loss": 0.2734, "step": 3353500 }, { "epoch": 2.01, - "learning_rate": 4.196084824208883e-05, - "loss": 0.346, + "learning_rate": 4.196075164367304e-05, + "loss": 0.2667, "step": 3354000 }, { "epoch": 2.01, - "learning_rate": 4.1958748276528264e-05, - "loss": 0.3534, + "learning_rate": 4.1958651678112475e-05, + "loss": 0.2689, "step": 3354500 }, { "epoch": 2.01, - "learning_rate": 4.19566483109677e-05, - "loss": 0.3508, + "learning_rate": 4.1956551712551915e-05, + "loss": 0.268, "step": 3355000 }, { "epoch": 2.01, - "learning_rate": 4.195454834540714e-05, - "loss": 0.3498, + "learning_rate": 4.195446014685359e-05, + "loss": 0.2699, "step": 3355500 }, { "epoch": 2.01, - "learning_rate": 4.195244837984657e-05, - "loss": 0.3593, + "learning_rate": 4.195236018129303e-05, + "loss": 0.2743, "step": 3356000 }, { "epoch": 2.01, - "learning_rate": 4.1950348414286005e-05, - "loss": 0.3484, + "learning_rate": 4.195026021573246e-05, + "loss": 0.2683, "step": 3356500 }, { "epoch": 2.01, - "learning_rate": 4.194825264865656e-05, - "loss": 0.3551, + "learning_rate": 4.1948160250171896e-05, + "loss": 0.2678, "step": 3357000 }, { "epoch": 2.01, - "learning_rate": 4.1946152683096e-05, - "loss": 0.3616, + "learning_rate": 4.194606028461133e-05, + "loss": 0.2726, "step": 3357500 }, { "epoch": 2.01, - "learning_rate": 4.194405271753543e-05, - "loss": 0.3512, + "learning_rate": 4.194396031905076e-05, + "loss": 0.2696, "step": 3358000 }, { "epoch": 2.01, - "learning_rate": 4.1941952751974866e-05, - "loss": 0.3537, + "learning_rate": 4.19418603534902e-05, + "loss": 0.2694, "step": 3358500 }, { "epoch": 2.01, - "learning_rate": 4.193985698634542e-05, - "loss": 0.3553, + "learning_rate": 4.1939760387929636e-05, + "loss": 0.2729, "step": 3359000 }, { "epoch": 2.01, - "learning_rate": 4.193775702078486e-05, - "loss": 0.3467, + "learning_rate": 4.193766042236907e-05, + "loss": 0.2661, "step": 3359500 }, { "epoch": 2.01, - "learning_rate": 4.193565705522429e-05, - "loss": 0.3465, + "learning_rate": 4.193556885667075e-05, + "loss": 0.27, "step": 3360000 }, { "epoch": 2.01, - "learning_rate": 4.1933557089663727e-05, - "loss": 0.3571, + "learning_rate": 4.1933468891110184e-05, + "loss": 0.2706, "step": 3360500 }, { "epoch": 2.02, - "learning_rate": 4.193145712410317e-05, - "loss": 0.349, + "learning_rate": 4.1931368925549624e-05, + "loss": 0.2684, "step": 3361000 }, { "epoch": 2.02, - "learning_rate": 4.19293571585426e-05, - "loss": 0.3504, + "learning_rate": 4.192926895998906e-05, + "loss": 0.2691, "step": 3361500 }, { "epoch": 2.02, - "learning_rate": 4.1927257192982034e-05, - "loss": 0.3422, + "learning_rate": 4.192717319435961e-05, + "loss": 0.2668, "step": 3362000 }, { "epoch": 2.02, - "learning_rate": 4.1925157227421474e-05, - "loss": 0.3553, + "learning_rate": 4.1925073228799044e-05, + "loss": 0.2748, "step": 3362500 }, { "epoch": 2.02, - "learning_rate": 4.19230572618609e-05, - "loss": 0.3512, + "learning_rate": 4.1922973263238485e-05, + "loss": 0.27, "step": 3363000 }, { "epoch": 2.02, - "learning_rate": 4.192095729630034e-05, - "loss": 0.3454, + "learning_rate": 4.192087329767792e-05, + "loss": 0.2663, "step": 3363500 }, { "epoch": 2.02, - "learning_rate": 4.19188615306709e-05, - "loss": 0.3533, + "learning_rate": 4.191877333211736e-05, + "loss": 0.2665, "step": 3364000 }, { "epoch": 2.02, - "learning_rate": 4.1916761565110335e-05, - "loss": 0.359, + "learning_rate": 4.191667336655679e-05, + "loss": 0.2772, "step": 3364500 }, { "epoch": 2.02, - "learning_rate": 4.191466159954977e-05, - "loss": 0.3509, + "learning_rate": 4.191457340099622e-05, + "loss": 0.2688, "step": 3365000 }, { "epoch": 2.02, - "learning_rate": 4.19125616339892e-05, - "loss": 0.3518, + "learning_rate": 4.191247763536678e-05, + "loss": 0.2655, "step": 3365500 }, { "epoch": 2.02, - "learning_rate": 4.191046586835976e-05, - "loss": 0.3476, + "learning_rate": 4.191037766980622e-05, + "loss": 0.2712, "step": 3366000 }, { "epoch": 2.02, - "learning_rate": 4.1908365902799195e-05, - "loss": 0.3508, + "learning_rate": 4.190827770424565e-05, + "loss": 0.2678, "step": 3366500 }, { "epoch": 2.02, - "learning_rate": 4.190626593723863e-05, - "loss": 0.3429, + "learning_rate": 4.1906177738685086e-05, + "loss": 0.2673, "step": 3367000 }, { "epoch": 2.02, - "learning_rate": 4.190416597167806e-05, - "loss": 0.3552, + "learning_rate": 4.190407777312452e-05, + "loss": 0.2676, "step": 3367500 }, { "epoch": 2.02, - "learning_rate": 4.1902066006117496e-05, - "loss": 0.3459, + "learning_rate": 4.190197780756395e-05, + "loss": 0.2685, "step": 3368000 }, { "epoch": 2.02, - "learning_rate": 4.1899970240488056e-05, - "loss": 0.3507, + "learning_rate": 4.1899877842003386e-05, + "loss": 0.2701, "step": 3368500 }, { "epoch": 2.02, - "learning_rate": 4.189787027492749e-05, - "loss": 0.3557, + "learning_rate": 4.189777787644283e-05, + "loss": 0.2721, "step": 3369000 }, { "epoch": 2.02, - "learning_rate": 4.189577030936693e-05, - "loss": 0.3542, + "learning_rate": 4.189568211081338e-05, + "loss": 0.2762, "step": 3369500 }, { "epoch": 2.02, - "learning_rate": 4.1893670343806356e-05, - "loss": 0.3528, + "learning_rate": 4.1893582145252814e-05, + "loss": 0.2726, "step": 3370000 }, { "epoch": 2.02, - "learning_rate": 4.189157457817692e-05, - "loss": 0.3603, + "learning_rate": 4.1891486379623374e-05, + "loss": 0.2683, "step": 3370500 }, { "epoch": 2.02, - "learning_rate": 4.188947461261636e-05, - "loss": 0.3551, + "learning_rate": 4.188939061399393e-05, + "loss": 0.2694, "step": 3371000 }, { "epoch": 2.02, - "learning_rate": 4.188737464705579e-05, - "loss": 0.3511, + "learning_rate": 4.188729064843337e-05, + "loss": 0.2685, "step": 3371500 }, { "epoch": 2.02, - "learning_rate": 4.1885274681495224e-05, - "loss": 0.3597, + "learning_rate": 4.18851906828728e-05, + "loss": 0.2768, "step": 3372000 }, { "epoch": 2.02, - "learning_rate": 4.188317891586578e-05, - "loss": 0.3486, + "learning_rate": 4.1883090717312235e-05, + "loss": 0.2734, "step": 3372500 }, { "epoch": 2.02, - "learning_rate": 4.188107895030522e-05, - "loss": 0.3581, + "learning_rate": 4.1880990751751675e-05, + "loss": 0.2689, "step": 3373000 }, { "epoch": 2.02, - "learning_rate": 4.187897898474465e-05, - "loss": 0.359, + "learning_rate": 4.187889078619111e-05, + "loss": 0.2715, "step": 3373500 }, { "epoch": 2.02, - "learning_rate": 4.1876879019184085e-05, - "loss": 0.3535, + "learning_rate": 4.187679082063054e-05, + "loss": 0.2696, "step": 3374000 }, { "epoch": 2.02, - "learning_rate": 4.1874779053623525e-05, - "loss": 0.3633, + "learning_rate": 4.1874690855069975e-05, + "loss": 0.2735, "step": 3374500 }, { "epoch": 2.02, - "learning_rate": 4.187267908806295e-05, - "loss": 0.3444, + "learning_rate": 4.187259088950941e-05, + "loss": 0.2649, "step": 3375000 }, { "epoch": 2.02, - "learning_rate": 4.187058332243351e-05, - "loss": 0.3419, + "learning_rate": 4.187049512387997e-05, + "loss": 0.2597, "step": 3375500 }, { "epoch": 2.02, - "learning_rate": 4.1868483356872945e-05, - "loss": 0.3621, + "learning_rate": 4.18683951583194e-05, + "loss": 0.2789, "step": 3376000 }, { "epoch": 2.02, - "learning_rate": 4.1866383391312386e-05, - "loss": 0.3562, + "learning_rate": 4.186629519275884e-05, + "loss": 0.2696, "step": 3376500 }, { "epoch": 2.02, - "learning_rate": 4.186428342575181e-05, - "loss": 0.3543, + "learning_rate": 4.186419522719827e-05, + "loss": 0.2713, "step": 3377000 }, { "epoch": 2.02, - "learning_rate": 4.186218766012237e-05, - "loss": 0.363, + "learning_rate": 4.186209526163771e-05, + "loss": 0.2726, "step": 3377500 }, { "epoch": 2.03, - "learning_rate": 4.186008769456181e-05, - "loss": 0.3489, + "learning_rate": 4.185999529607714e-05, + "loss": 0.2696, "step": 3378000 }, { "epoch": 2.03, - "learning_rate": 4.1857987729001246e-05, - "loss": 0.3614, + "learning_rate": 4.185789533051658e-05, + "loss": 0.2699, "step": 3378500 }, { "epoch": 2.03, - "learning_rate": 4.185588776344068e-05, - "loss": 0.3519, + "learning_rate": 4.185579536495602e-05, + "loss": 0.2691, "step": 3379000 }, { "epoch": 2.03, - "learning_rate": 4.185378779788011e-05, - "loss": 0.3567, + "learning_rate": 4.185369959932657e-05, + "loss": 0.2709, "step": 3379500 }, { "epoch": 2.03, - "learning_rate": 4.185168783231955e-05, - "loss": 0.354, + "learning_rate": 4.185160383369713e-05, + "loss": 0.273, "step": 3380000 }, { "epoch": 2.03, - "learning_rate": 4.184958786675898e-05, - "loss": 0.3541, + "learning_rate": 4.1849503868136564e-05, + "loss": 0.2693, "step": 3380500 }, { "epoch": 2.03, - "learning_rate": 4.184748790119842e-05, - "loss": 0.3562, + "learning_rate": 4.1847403902576e-05, + "loss": 0.272, "step": 3381000 }, { "epoch": 2.03, - "learning_rate": 4.1845387935637854e-05, - "loss": 0.3533, + "learning_rate": 4.184530393701543e-05, + "loss": 0.2653, "step": 3381500 }, { "epoch": 2.03, - "learning_rate": 4.184328797007729e-05, - "loss": 0.3498, + "learning_rate": 4.1843203971454865e-05, + "loss": 0.2682, "step": 3382000 }, { "epoch": 2.03, - "learning_rate": 4.184118800451673e-05, - "loss": 0.3471, + "learning_rate": 4.18411040058943e-05, + "loss": 0.2663, "step": 3382500 }, { "epoch": 2.03, - "learning_rate": 4.183908803895616e-05, - "loss": 0.3639, + "learning_rate": 4.183900404033374e-05, + "loss": 0.2719, "step": 3383000 }, { "epoch": 2.03, - "learning_rate": 4.1836992273326715e-05, - "loss": 0.3463, + "learning_rate": 4.183690407477317e-05, + "loss": 0.2713, "step": 3383500 }, { "epoch": 2.03, - "learning_rate": 4.1834896507697275e-05, - "loss": 0.3611, + "learning_rate": 4.1834804109212605e-05, + "loss": 0.2736, "step": 3384000 }, { "epoch": 2.03, - "learning_rate": 4.183280074206783e-05, - "loss": 0.3513, + "learning_rate": 4.1832708343583166e-05, + "loss": 0.2734, "step": 3384500 }, { "epoch": 2.03, - "learning_rate": 4.183070077650727e-05, - "loss": 0.3594, + "learning_rate": 4.18306083780226e-05, + "loss": 0.2744, "step": 3385000 }, { "epoch": 2.03, - "learning_rate": 4.18286008109467e-05, - "loss": 0.3595, + "learning_rate": 4.182850841246203e-05, + "loss": 0.2732, "step": 3385500 }, { "epoch": 2.03, - "learning_rate": 4.1826500845386136e-05, - "loss": 0.345, + "learning_rate": 4.182640844690147e-05, + "loss": 0.2643, "step": 3386000 }, { "epoch": 2.03, - "learning_rate": 4.182440087982557e-05, - "loss": 0.3513, + "learning_rate": 4.1824312681272026e-05, + "loss": 0.2735, "step": 3386500 }, { "epoch": 2.03, - "learning_rate": 4.1822300914265e-05, - "loss": 0.3459, + "learning_rate": 4.182221271571146e-05, + "loss": 0.2683, "step": 3387000 }, { "epoch": 2.03, - "learning_rate": 4.1820200948704436e-05, - "loss": 0.3461, + "learning_rate": 4.182011275015089e-05, + "loss": 0.268, "step": 3387500 }, { "epoch": 2.03, - "learning_rate": 4.1818100983143876e-05, - "loss": 0.36, + "learning_rate": 4.1818012784590334e-05, + "loss": 0.2735, "step": 3388000 }, { "epoch": 2.03, - "learning_rate": 4.181600101758331e-05, - "loss": 0.3505, + "learning_rate": 4.181591281902977e-05, + "loss": 0.2621, "step": 3388500 }, { "epoch": 2.03, - "learning_rate": 4.181390105202274e-05, - "loss": 0.3561, + "learning_rate": 4.18138128534692e-05, + "loss": 0.2669, "step": 3389000 }, { "epoch": 2.03, - "learning_rate": 4.1811801086462183e-05, - "loss": 0.3492, + "learning_rate": 4.1811717087839754e-05, + "loss": 0.2674, "step": 3389500 }, { "epoch": 2.03, - "learning_rate": 4.180970532083274e-05, - "loss": 0.3571, + "learning_rate": 4.1809617122279194e-05, + "loss": 0.272, "step": 3390000 }, { "epoch": 2.03, - "learning_rate": 4.18076095552033e-05, - "loss": 0.3528, + "learning_rate": 4.1807521356649755e-05, + "loss": 0.273, "step": 3390500 }, { "epoch": 2.03, - "learning_rate": 4.180550958964273e-05, - "loss": 0.3514, + "learning_rate": 4.180542139108918e-05, + "loss": 0.2691, "step": 3391000 }, { "epoch": 2.03, - "learning_rate": 4.1803409624082164e-05, - "loss": 0.3484, + "learning_rate": 4.180332142552862e-05, + "loss": 0.2659, "step": 3391500 }, { "epoch": 2.03, - "learning_rate": 4.18013096585216e-05, - "loss": 0.343, + "learning_rate": 4.1801221459968055e-05, + "loss": 0.2675, "step": 3392000 }, { "epoch": 2.03, - "learning_rate": 4.179920969296103e-05, - "loss": 0.3589, + "learning_rate": 4.179912149440749e-05, + "loss": 0.2706, "step": 3392500 }, { "epoch": 2.03, - "learning_rate": 4.179710972740047e-05, - "loss": 0.345, + "learning_rate": 4.179702152884693e-05, + "loss": 0.2663, "step": 3393000 }, { "epoch": 2.03, - "learning_rate": 4.1795009761839905e-05, - "loss": 0.3511, + "learning_rate": 4.179492156328636e-05, + "loss": 0.2695, "step": 3393500 }, { "epoch": 2.03, - "learning_rate": 4.179290979627934e-05, - "loss": 0.3571, + "learning_rate": 4.1792821597725796e-05, + "loss": 0.2714, "step": 3394000 }, { "epoch": 2.04, - "learning_rate": 4.179081403064989e-05, - "loss": 0.353, + "learning_rate": 4.179072583209635e-05, + "loss": 0.2707, "step": 3394500 }, { "epoch": 2.04, - "learning_rate": 4.178871406508933e-05, - "loss": 0.3483, + "learning_rate": 4.178862586653579e-05, + "loss": 0.2746, "step": 3395000 }, { "epoch": 2.04, - "learning_rate": 4.1786614099528766e-05, - "loss": 0.3587, + "learning_rate": 4.178653010090635e-05, + "loss": 0.2694, "step": 3395500 }, { "epoch": 2.04, - "learning_rate": 4.17845141339682e-05, - "loss": 0.3517, + "learning_rate": 4.1784430135345776e-05, + "loss": 0.274, "step": 3396000 }, { "epoch": 2.04, - "learning_rate": 4.178241416840764e-05, - "loss": 0.3511, + "learning_rate": 4.178233016978521e-05, + "loss": 0.2691, "step": 3396500 }, { "epoch": 2.04, - "learning_rate": 4.178031420284707e-05, - "loss": 0.3573, + "learning_rate": 4.178023020422465e-05, + "loss": 0.273, "step": 3397000 }, { "epoch": 2.04, - "learning_rate": 4.1778214237286506e-05, - "loss": 0.3564, + "learning_rate": 4.1778130238664084e-05, + "loss": 0.2708, "step": 3397500 }, { "epoch": 2.04, - "learning_rate": 4.1776114271725947e-05, - "loss": 0.3589, + "learning_rate": 4.177603027310352e-05, + "loss": 0.2722, "step": 3398000 }, { "epoch": 2.04, - "learning_rate": 4.17740185060965e-05, - "loss": 0.3594, + "learning_rate": 4.177393030754296e-05, + "loss": 0.2675, "step": 3398500 }, { "epoch": 2.04, - "learning_rate": 4.1771918540535934e-05, - "loss": 0.3517, + "learning_rate": 4.177183034198239e-05, + "loss": 0.2673, "step": 3399000 }, { "epoch": 2.04, - "learning_rate": 4.1769818574975374e-05, - "loss": 0.3529, + "learning_rate": 4.176973877628407e-05, + "loss": 0.2709, "step": 3399500 }, { "epoch": 2.04, - "learning_rate": 4.176771860941481e-05, - "loss": 0.353, + "learning_rate": 4.1767638810723505e-05, + "loss": 0.2732, "step": 3400000 }, { "epoch": 2.04, - "eval_loss": 0.3463619649410248, - "eval_runtime": 1118.9984, - "eval_samples_per_second": 470.707, - "eval_steps_per_second": 78.451, + "eval_loss": 0.25032973289489746, + "eval_runtime": 1464.6668, + "eval_samples_per_second": 359.618, + "eval_steps_per_second": 59.936, "step": 3400000 }, { "epoch": 2.04, - "learning_rate": 4.176562284378536e-05, - "loss": 0.3509, + "learning_rate": 4.176553884516294e-05, + "loss": 0.2722, "step": 3400500 }, { "epoch": 2.04, - "learning_rate": 4.1763522878224794e-05, - "loss": 0.36, + "learning_rate": 4.176343887960237e-05, + "loss": 0.2716, "step": 3401000 }, { "epoch": 2.04, - "learning_rate": 4.1761422912664235e-05, - "loss": 0.3477, + "learning_rate": 4.176134311397293e-05, + "loss": 0.2698, "step": 3401500 }, { "epoch": 2.04, - "learning_rate": 4.175932294710367e-05, - "loss": 0.352, + "learning_rate": 4.1759243148412365e-05, + "loss": 0.2692, "step": 3402000 }, { "epoch": 2.04, - "learning_rate": 4.175723138140535e-05, - "loss": 0.368, + "learning_rate": 4.1757143182851806e-05, + "loss": 0.2734, "step": 3402500 }, { "epoch": 2.04, - "learning_rate": 4.175513141584478e-05, - "loss": 0.3562, + "learning_rate": 4.175504321729123e-05, + "loss": 0.2757, "step": 3403000 }, { "epoch": 2.04, - "learning_rate": 4.175303145028421e-05, - "loss": 0.3582, + "learning_rate": 4.1752943251730666e-05, + "loss": 0.2724, "step": 3403500 }, { "epoch": 2.04, - "learning_rate": 4.175093148472365e-05, - "loss": 0.358, + "learning_rate": 4.1750843286170106e-05, + "loss": 0.2737, "step": 3404000 }, { "epoch": 2.04, - "learning_rate": 4.174883151916308e-05, - "loss": 0.3594, + "learning_rate": 4.174874332060954e-05, + "loss": 0.2742, "step": 3404500 }, { "epoch": 2.04, - "learning_rate": 4.174673155360252e-05, - "loss": 0.3644, + "learning_rate": 4.17466475549801e-05, + "loss": 0.2751, "step": 3405000 }, { "epoch": 2.04, - "learning_rate": 4.174463578797308e-05, - "loss": 0.3521, + "learning_rate": 4.174454758941953e-05, + "loss": 0.2719, "step": 3405500 }, { "epoch": 2.04, - "learning_rate": 4.174253582241251e-05, - "loss": 0.3513, + "learning_rate": 4.174244762385897e-05, + "loss": 0.2695, "step": 3406000 }, { "epoch": 2.04, - "learning_rate": 4.174043585685194e-05, - "loss": 0.3517, + "learning_rate": 4.17403476582984e-05, + "loss": 0.2694, "step": 3406500 }, { "epoch": 2.04, - "learning_rate": 4.173833589129138e-05, - "loss": 0.3647, + "learning_rate": 4.173824769273784e-05, + "loss": 0.2803, "step": 3407000 }, { "epoch": 2.04, - "learning_rate": 4.173623592573082e-05, - "loss": 0.3679, + "learning_rate": 4.1736147727177274e-05, + "loss": 0.2701, "step": 3407500 }, { "epoch": 2.04, - "learning_rate": 4.173413596017025e-05, - "loss": 0.3557, + "learning_rate": 4.173404776161671e-05, + "loss": 0.2694, "step": 3408000 }, { "epoch": 2.04, - "learning_rate": 4.1732040194540804e-05, - "loss": 0.3507, + "learning_rate": 4.173194779605615e-05, + "loss": 0.2703, "step": 3408500 }, { "epoch": 2.04, - "learning_rate": 4.1729940228980244e-05, - "loss": 0.3527, + "learning_rate": 4.172984783049558e-05, + "loss": 0.2693, "step": 3409000 }, { "epoch": 2.04, - "learning_rate": 4.172784026341968e-05, - "loss": 0.3637, + "learning_rate": 4.1727752064866135e-05, + "loss": 0.281, "step": 3409500 }, { "epoch": 2.04, - "learning_rate": 4.172574029785911e-05, - "loss": 0.3514, + "learning_rate": 4.172565209930557e-05, + "loss": 0.2698, "step": 3410000 }, { "epoch": 2.04, - "learning_rate": 4.172364453222967e-05, - "loss": 0.3566, + "learning_rate": 4.172355213374501e-05, + "loss": 0.2756, "step": 3410500 }, { "epoch": 2.05, - "learning_rate": 4.1721544566669105e-05, - "loss": 0.3512, + "learning_rate": 4.172145216818444e-05, + "loss": 0.2697, "step": 3411000 }, { "epoch": 2.05, - "learning_rate": 4.171944460110854e-05, - "loss": 0.3575, + "learning_rate": 4.1719356402554995e-05, + "loss": 0.2649, "step": 3411500 }, { "epoch": 2.05, - "learning_rate": 4.171734463554798e-05, - "loss": 0.3475, + "learning_rate": 4.171725643699443e-05, + "loss": 0.2672, "step": 3412000 }, { "epoch": 2.05, - "learning_rate": 4.171524466998741e-05, - "loss": 0.3562, + "learning_rate": 4.171515647143387e-05, + "loss": 0.2641, "step": 3412500 }, { "epoch": 2.05, - "learning_rate": 4.1713144704426845e-05, - "loss": 0.3514, + "learning_rate": 4.17130565058733e-05, + "loss": 0.2731, "step": 3413000 }, { "epoch": 2.05, - "learning_rate": 4.1711044738866286e-05, - "loss": 0.3542, + "learning_rate": 4.1710960740243856e-05, + "loss": 0.2723, "step": 3413500 }, { "epoch": 2.05, - "learning_rate": 4.170894477330572e-05, - "loss": 0.3413, + "learning_rate": 4.1708860774683296e-05, + "loss": 0.2655, "step": 3414000 }, { "epoch": 2.05, - "learning_rate": 4.170684900767627e-05, - "loss": 0.3551, + "learning_rate": 4.170676080912273e-05, + "loss": 0.2698, "step": 3414500 }, { "epoch": 2.05, - "learning_rate": 4.1704749042115706e-05, - "loss": 0.3593, + "learning_rate": 4.170466084356216e-05, + "loss": 0.2759, "step": 3415000 }, { "epoch": 2.05, - "learning_rate": 4.1702649076555146e-05, - "loss": 0.351, + "learning_rate": 4.170256507793272e-05, + "loss": 0.2715, "step": 3415500 }, { "epoch": 2.05, - "learning_rate": 4.170054911099458e-05, - "loss": 0.344, + "learning_rate": 4.170046931230328e-05, + "loss": 0.2683, "step": 3416000 }, { "epoch": 2.05, - "learning_rate": 4.169845334536513e-05, - "loss": 0.3531, + "learning_rate": 4.169836934674272e-05, + "loss": 0.2715, "step": 3416500 }, { "epoch": 2.05, - "learning_rate": 4.169635337980457e-05, - "loss": 0.3504, + "learning_rate": 4.169626938118215e-05, + "loss": 0.2665, "step": 3417000 }, { "epoch": 2.05, - "learning_rate": 4.169425341424401e-05, - "loss": 0.3558, + "learning_rate": 4.169416941562158e-05, + "loss": 0.274, "step": 3417500 }, { "epoch": 2.05, - "learning_rate": 4.169215764861456e-05, - "loss": 0.3522, + "learning_rate": 4.169206945006102e-05, + "loss": 0.2708, "step": 3418000 }, { "epoch": 2.05, - "learning_rate": 4.1690057683053994e-05, - "loss": 0.3481, + "learning_rate": 4.168996948450045e-05, + "loss": 0.2702, "step": 3418500 }, { "epoch": 2.05, - "learning_rate": 4.1687957717493434e-05, - "loss": 0.3444, + "learning_rate": 4.1687869518939885e-05, + "loss": 0.2658, "step": 3419000 }, { "epoch": 2.05, - "learning_rate": 4.168585775193287e-05, - "loss": 0.3415, + "learning_rate": 4.1685769553379325e-05, + "loss": 0.2651, "step": 3419500 }, { "epoch": 2.05, - "learning_rate": 4.16837577863723e-05, - "loss": 0.35, + "learning_rate": 4.168367378774988e-05, + "loss": 0.2683, "step": 3420000 }, { "epoch": 2.05, - "learning_rate": 4.168165782081174e-05, - "loss": 0.3493, + "learning_rate": 4.168157382218931e-05, + "loss": 0.2705, "step": 3420500 }, { "epoch": 2.05, - "learning_rate": 4.1679557855251175e-05, - "loss": 0.348, + "learning_rate": 4.167947385662875e-05, + "loss": 0.2651, "step": 3421000 }, { "epoch": 2.05, - "learning_rate": 4.167745788969061e-05, - "loss": 0.3438, + "learning_rate": 4.167737809099931e-05, + "loss": 0.2687, "step": 3421500 }, { "epoch": 2.05, - "learning_rate": 4.167536212406116e-05, - "loss": 0.3485, + "learning_rate": 4.167527812543874e-05, + "loss": 0.2685, "step": 3422000 }, { "epoch": 2.05, - "learning_rate": 4.16732621585006e-05, - "loss": 0.3541, + "learning_rate": 4.167317815987817e-05, + "loss": 0.2719, "step": 3422500 }, { "epoch": 2.05, - "learning_rate": 4.1671162192940036e-05, - "loss": 0.3534, + "learning_rate": 4.167107819431761e-05, + "loss": 0.2681, "step": 3423000 }, { "epoch": 2.05, - "learning_rate": 4.166906642731059e-05, - "loss": 0.3483, + "learning_rate": 4.1668978228757046e-05, + "loss": 0.2677, "step": 3423500 }, { "epoch": 2.05, - "learning_rate": 4.166696646175002e-05, - "loss": 0.3598, + "learning_rate": 4.166687826319648e-05, + "loss": 0.2763, "step": 3424000 }, { "epoch": 2.05, - "learning_rate": 4.166486649618946e-05, - "loss": 0.3549, + "learning_rate": 4.166477829763592e-05, + "loss": 0.2705, "step": 3424500 }, { "epoch": 2.05, - "learning_rate": 4.1662766530628896e-05, - "loss": 0.3711, + "learning_rate": 4.1662678332075354e-05, + "loss": 0.2737, "step": 3425000 }, { "epoch": 2.05, - "learning_rate": 4.166066656506833e-05, - "loss": 0.3656, + "learning_rate": 4.166058256644591e-05, + "loss": 0.279, "step": 3425500 }, { "epoch": 2.05, - "learning_rate": 4.165856659950777e-05, - "loss": 0.3587, + "learning_rate": 4.165848260088534e-05, + "loss": 0.2714, "step": 3426000 }, { "epoch": 2.05, - "learning_rate": 4.1656466633947204e-05, - "loss": 0.356, + "learning_rate": 4.165638263532478e-05, + "loss": 0.2738, "step": 3426500 }, { "epoch": 2.05, - "learning_rate": 4.165436666838664e-05, - "loss": 0.3557, + "learning_rate": 4.1654282669764214e-05, + "loss": 0.2709, "step": 3427000 }, { "epoch": 2.05, - "learning_rate": 4.165227510268831e-05, - "loss": 0.3471, + "learning_rate": 4.165218690413477e-05, + "loss": 0.265, "step": 3427500 }, { "epoch": 2.06, - "learning_rate": 4.165017513712775e-05, - "loss": 0.3626, + "learning_rate": 4.165008693857421e-05, + "loss": 0.2698, "step": 3428000 }, { "epoch": 2.06, - "learning_rate": 4.1648075171567184e-05, - "loss": 0.3562, + "learning_rate": 4.164798697301364e-05, + "loss": 0.2727, "step": 3428500 }, { "epoch": 2.06, - "learning_rate": 4.164597520600662e-05, - "loss": 0.3477, + "learning_rate": 4.1645887007453075e-05, + "loss": 0.2716, "step": 3429000 }, { "epoch": 2.06, - "learning_rate": 4.164387524044606e-05, - "loss": 0.3517, + "learning_rate": 4.164379124182363e-05, + "loss": 0.2701, "step": 3429500 }, { "epoch": 2.06, - "learning_rate": 4.164177947481661e-05, - "loss": 0.3577, + "learning_rate": 4.164169547619419e-05, + "loss": 0.2709, "step": 3430000 }, { "epoch": 2.06, - "learning_rate": 4.1639679509256045e-05, - "loss": 0.3547, + "learning_rate": 4.163959551063363e-05, + "loss": 0.2714, "step": 3430500 }, { "epoch": 2.06, - "learning_rate": 4.163757954369548e-05, - "loss": 0.3466, + "learning_rate": 4.163749554507306e-05, + "loss": 0.2673, "step": 3431000 }, { "epoch": 2.06, - "learning_rate": 4.163547957813492e-05, - "loss": 0.3495, + "learning_rate": 4.163539557951249e-05, + "loss": 0.2678, "step": 3431500 }, { "epoch": 2.06, - "learning_rate": 4.163337961257435e-05, - "loss": 0.3605, + "learning_rate": 4.163329561395193e-05, + "loss": 0.2715, "step": 3432000 }, { "epoch": 2.06, - "learning_rate": 4.1631279647013786e-05, - "loss": 0.362, + "learning_rate": 4.163119564839136e-05, + "loss": 0.271, "step": 3432500 }, { "epoch": 2.06, - "learning_rate": 4.1629183881384346e-05, - "loss": 0.3483, + "learning_rate": 4.1629095682830796e-05, + "loss": 0.2704, "step": 3433000 }, { "epoch": 2.06, - "learning_rate": 4.162708391582378e-05, - "loss": 0.3585, + "learning_rate": 4.162699991720136e-05, + "loss": 0.2703, "step": 3433500 }, { "epoch": 2.06, - "learning_rate": 4.162498395026321e-05, - "loss": 0.3536, + "learning_rate": 4.162489995164079e-05, + "loss": 0.271, "step": 3434000 }, { "epoch": 2.06, - "learning_rate": 4.162288398470265e-05, - "loss": 0.3559, + "learning_rate": 4.1622799986080224e-05, + "loss": 0.2759, "step": 3434500 }, { "epoch": 2.06, - "learning_rate": 4.162078821907321e-05, - "loss": 0.3505, + "learning_rate": 4.1620700020519664e-05, + "loss": 0.2675, "step": 3435000 }, { "epoch": 2.06, - "learning_rate": 4.161868825351264e-05, - "loss": 0.3514, + "learning_rate": 4.16186000549591e-05, + "loss": 0.2666, "step": 3435500 }, { "epoch": 2.06, - "learning_rate": 4.1616588287952074e-05, - "loss": 0.3573, + "learning_rate": 4.161650008939853e-05, + "loss": 0.269, "step": 3436000 }, { "epoch": 2.06, - "learning_rate": 4.1614488322391514e-05, - "loss": 0.3696, + "learning_rate": 4.161440012383797e-05, + "loss": 0.2786, "step": 3436500 }, { "epoch": 2.06, - "learning_rate": 4.161238835683095e-05, - "loss": 0.3551, + "learning_rate": 4.1612300158277405e-05, + "loss": 0.2719, "step": 3437000 }, { "epoch": 2.06, - "learning_rate": 4.161028839127038e-05, - "loss": 0.3514, + "learning_rate": 4.161020439264796e-05, + "loss": 0.2713, "step": 3437500 }, { "epoch": 2.06, - "learning_rate": 4.160818842570982e-05, - "loss": 0.3599, + "learning_rate": 4.160810442708739e-05, + "loss": 0.2741, "step": 3438000 }, { "epoch": 2.06, - "learning_rate": 4.1606088460149255e-05, - "loss": 0.3582, + "learning_rate": 4.160600446152683e-05, + "loss": 0.2706, "step": 3438500 }, { "epoch": 2.06, - "learning_rate": 4.160399269451981e-05, - "loss": 0.3511, + "learning_rate": 4.1603904495966265e-05, + "loss": 0.2704, "step": 3439000 }, { "epoch": 2.06, - "learning_rate": 4.160189692889036e-05, - "loss": 0.3485, + "learning_rate": 4.160180873033682e-05, + "loss": 0.2667, "step": 3439500 }, { "epoch": 2.06, - "learning_rate": 4.15997969633298e-05, - "loss": 0.3528, + "learning_rate": 4.159971296470738e-05, + "loss": 0.2633, "step": 3440000 }, { "epoch": 2.06, - "learning_rate": 4.1597696997769235e-05, - "loss": 0.3497, + "learning_rate": 4.159761719907793e-05, + "loss": 0.2702, "step": 3440500 }, { "epoch": 2.06, - "learning_rate": 4.159559703220867e-05, - "loss": 0.353, + "learning_rate": 4.159551723351737e-05, + "loss": 0.2722, "step": 3441000 }, { "epoch": 2.06, - "learning_rate": 4.159349706664811e-05, - "loss": 0.3527, + "learning_rate": 4.1593417267956807e-05, + "loss": 0.2678, "step": 3441500 }, { "epoch": 2.06, - "learning_rate": 4.159140130101866e-05, - "loss": 0.3616, + "learning_rate": 4.159131730239624e-05, + "loss": 0.2734, "step": 3442000 }, { "epoch": 2.06, - "learning_rate": 4.1589301335458096e-05, - "loss": 0.3509, + "learning_rate": 4.158921733683568e-05, + "loss": 0.2744, "step": 3442500 }, { "epoch": 2.06, - "learning_rate": 4.158720136989753e-05, - "loss": 0.3421, + "learning_rate": 4.1587117371275114e-05, + "loss": 0.2614, "step": 3443000 }, { "epoch": 2.06, - "learning_rate": 4.158510140433697e-05, - "loss": 0.357, + "learning_rate": 4.158501740571454e-05, + "loss": 0.2677, "step": 3443500 }, { "epoch": 2.06, - "learning_rate": 4.15830014387764e-05, - "loss": 0.3538, + "learning_rate": 4.158291744015398e-05, + "loss": 0.2683, "step": 3444000 }, { "epoch": 2.07, - "learning_rate": 4.158090567314696e-05, - "loss": 0.3598, + "learning_rate": 4.1580817474593414e-05, + "loss": 0.2733, "step": 3444500 }, { "epoch": 2.07, - "learning_rate": 4.157880570758639e-05, - "loss": 0.3511, + "learning_rate": 4.157871750903285e-05, + "loss": 0.2674, "step": 3445000 }, { "epoch": 2.07, - "learning_rate": 4.157670574202583e-05, - "loss": 0.364, + "learning_rate": 4.157661754347229e-05, + "loss": 0.2719, "step": 3445500 }, { "epoch": 2.07, - "learning_rate": 4.1574605776465264e-05, - "loss": 0.3551, + "learning_rate": 4.157451757791172e-05, + "loss": 0.27, "step": 3446000 }, { "epoch": 2.07, - "learning_rate": 4.15725058109047e-05, - "loss": 0.3549, + "learning_rate": 4.1572421812282275e-05, + "loss": 0.2692, "step": 3446500 }, { "epoch": 2.07, - "learning_rate": 4.157040584534414e-05, - "loss": 0.3485, + "learning_rate": 4.1570326046652835e-05, + "loss": 0.2654, "step": 3447000 }, { "epoch": 2.07, - "learning_rate": 4.156831007971469e-05, - "loss": 0.3496, + "learning_rate": 4.156822608109227e-05, + "loss": 0.2662, "step": 3447500 }, { "epoch": 2.07, - "learning_rate": 4.1566210114154125e-05, - "loss": 0.3545, + "learning_rate": 4.156612611553171e-05, + "loss": 0.2727, "step": 3448000 }, { "epoch": 2.07, - "learning_rate": 4.1564110148593565e-05, - "loss": 0.3531, + "learning_rate": 4.1564026149971136e-05, + "loss": 0.2707, "step": 3448500 }, { "epoch": 2.07, - "learning_rate": 4.156201438296412e-05, - "loss": 0.3488, + "learning_rate": 4.1561926184410576e-05, + "loss": 0.2642, "step": 3449000 }, { "epoch": 2.07, - "learning_rate": 4.155991441740355e-05, - "loss": 0.3525, + "learning_rate": 4.155982621885001e-05, + "loss": 0.2678, "step": 3449500 }, { "epoch": 2.07, - "learning_rate": 4.1557814451842985e-05, - "loss": 0.3593, + "learning_rate": 4.155772625328944e-05, + "loss": 0.2725, "step": 3450000 }, { "epoch": 2.07, - "learning_rate": 4.1555714486282426e-05, - "loss": 0.3543, + "learning_rate": 4.155562628772888e-05, + "loss": 0.269, "step": 3450500 }, { "epoch": 2.07, - "learning_rate": 4.155361452072186e-05, - "loss": 0.3583, + "learning_rate": 4.1553526322168316e-05, + "loss": 0.2735, "step": 3451000 }, { "epoch": 2.07, - "learning_rate": 4.155151455516129e-05, - "loss": 0.3474, + "learning_rate": 4.155143055653887e-05, + "loss": 0.27, "step": 3451500 }, { "epoch": 2.07, - "learning_rate": 4.154941458960073e-05, - "loss": 0.3521, + "learning_rate": 4.1549330590978303e-05, + "loss": 0.263, "step": 3452000 }, { "epoch": 2.07, - "learning_rate": 4.1547314624040166e-05, - "loss": 0.3535, + "learning_rate": 4.1547230625417744e-05, + "loss": 0.2708, "step": 3452500 }, { "epoch": 2.07, - "learning_rate": 4.15452146584796e-05, - "loss": 0.3464, + "learning_rate": 4.154513065985718e-05, + "loss": 0.2648, "step": 3453000 }, { "epoch": 2.07, - "learning_rate": 4.154311889285015e-05, - "loss": 0.3482, + "learning_rate": 4.154303489422773e-05, + "loss": 0.2725, "step": 3453500 }, { "epoch": 2.07, - "learning_rate": 4.1541018927289594e-05, - "loss": 0.3537, + "learning_rate": 4.1540934928667164e-05, + "loss": 0.2694, "step": 3454000 }, { "epoch": 2.07, - "learning_rate": 4.153891896172903e-05, - "loss": 0.3523, + "learning_rate": 4.1538834963106604e-05, + "loss": 0.2704, "step": 3454500 }, { "epoch": 2.07, - "learning_rate": 4.153681899616846e-05, - "loss": 0.3518, + "learning_rate": 4.153673499754604e-05, + "loss": 0.2698, "step": 3455000 }, { "epoch": 2.07, - "learning_rate": 4.15347190306079e-05, - "loss": 0.3505, + "learning_rate": 4.153463503198548e-05, + "loss": 0.2687, "step": 3455500 }, { "epoch": 2.07, - "learning_rate": 4.1532619065047334e-05, - "loss": 0.3548, + "learning_rate": 4.153253926635603e-05, + "loss": 0.2703, "step": 3456000 }, { "epoch": 2.07, - "learning_rate": 4.153052329941789e-05, - "loss": 0.3605, + "learning_rate": 4.1530439300795465e-05, + "loss": 0.2737, "step": 3456500 }, { "epoch": 2.07, - "learning_rate": 4.152842333385733e-05, - "loss": 0.3586, + "learning_rate": 4.15283393352349e-05, + "loss": 0.2694, "step": 3457000 }, { "epoch": 2.07, - "learning_rate": 4.152632336829676e-05, - "loss": 0.3636, + "learning_rate": 4.152624356960546e-05, + "loss": 0.2745, "step": 3457500 }, { "epoch": 2.07, - "learning_rate": 4.1524223402736195e-05, - "loss": 0.3533, + "learning_rate": 4.152414360404489e-05, + "loss": 0.2665, "step": 3458000 }, { "epoch": 2.07, - "learning_rate": 4.1522123437175635e-05, - "loss": 0.3595, + "learning_rate": 4.1522043638484326e-05, + "loss": 0.2717, "step": 3458500 }, { "epoch": 2.07, - "learning_rate": 4.152002347161506e-05, - "loss": 0.3445, + "learning_rate": 4.151994367292376e-05, + "loss": 0.2668, "step": 3459000 }, { "epoch": 2.07, - "learning_rate": 4.1517923506054495e-05, - "loss": 0.3527, + "learning_rate": 4.15178437073632e-05, + "loss": 0.2666, "step": 3459500 }, { "epoch": 2.07, - "learning_rate": 4.1515823540493936e-05, - "loss": 0.359, + "learning_rate": 4.151574374180263e-05, + "loss": 0.27, "step": 3460000 }, { "epoch": 2.07, - "learning_rate": 4.1513727774864496e-05, - "loss": 0.3449, + "learning_rate": 4.1513643776242066e-05, + "loss": 0.2582, "step": 3460500 }, { "epoch": 2.08, - "learning_rate": 4.151162780930393e-05, - "loss": 0.3464, + "learning_rate": 4.151154381068151e-05, + "loss": 0.2663, "step": 3461000 }, { "epoch": 2.08, - "learning_rate": 4.1509527843743356e-05, - "loss": 0.3475, + "learning_rate": 4.150944804505206e-05, + "loss": 0.266, "step": 3461500 }, { "epoch": 2.08, - "learning_rate": 4.1507427878182796e-05, - "loss": 0.3543, + "learning_rate": 4.1507348079491494e-05, + "loss": 0.2713, "step": 3462000 }, { "epoch": 2.08, - "learning_rate": 4.1505332112553357e-05, - "loss": 0.3454, + "learning_rate": 4.1505248113930934e-05, + "loss": 0.2704, "step": 3462500 }, { "epoch": 2.08, - "learning_rate": 4.150323214699279e-05, - "loss": 0.3532, + "learning_rate": 4.150314814837037e-05, + "loss": 0.2745, "step": 3463000 }, { "epoch": 2.08, - "learning_rate": 4.150113218143223e-05, - "loss": 0.3458, + "learning_rate": 4.150105238274092e-05, + "loss": 0.2693, "step": 3463500 }, { "epoch": 2.08, - "learning_rate": 4.149903221587166e-05, - "loss": 0.3533, + "learning_rate": 4.1498952417180354e-05, + "loss": 0.2687, "step": 3464000 }, { "epoch": 2.08, - "learning_rate": 4.149693645024222e-05, - "loss": 0.3558, + "learning_rate": 4.1496852451619795e-05, + "loss": 0.2696, "step": 3464500 }, { "epoch": 2.08, - "learning_rate": 4.149483648468165e-05, - "loss": 0.3447, + "learning_rate": 4.149475248605923e-05, + "loss": 0.2648, "step": 3465000 }, { "epoch": 2.08, - "learning_rate": 4.149273651912109e-05, - "loss": 0.3524, + "learning_rate": 4.149265252049866e-05, + "loss": 0.2712, "step": 3465500 }, { "epoch": 2.08, - "learning_rate": 4.149063655356052e-05, - "loss": 0.3472, + "learning_rate": 4.149056095480034e-05, + "loss": 0.2739, "step": 3466000 }, { "epoch": 2.08, - "learning_rate": 4.148854078793108e-05, - "loss": 0.3524, + "learning_rate": 4.1488460989239775e-05, + "loss": 0.2705, "step": 3466500 }, { "epoch": 2.08, - "learning_rate": 4.148644502230163e-05, - "loss": 0.3569, + "learning_rate": 4.1486361023679216e-05, + "loss": 0.2699, "step": 3467000 }, { "epoch": 2.08, - "learning_rate": 4.1484345056741065e-05, - "loss": 0.3495, + "learning_rate": 4.148426105811864e-05, + "loss": 0.2732, "step": 3467500 }, { "epoch": 2.08, - "learning_rate": 4.1482245091180505e-05, - "loss": 0.3522, + "learning_rate": 4.148216109255808e-05, + "loss": 0.2697, "step": 3468000 }, { "epoch": 2.08, - "learning_rate": 4.148014512561994e-05, - "loss": 0.3521, + "learning_rate": 4.148006532692864e-05, + "loss": 0.2718, "step": 3468500 }, { "epoch": 2.08, - "learning_rate": 4.147804516005938e-05, - "loss": 0.3532, + "learning_rate": 4.1477965361368076e-05, + "loss": 0.267, "step": 3469000 }, { "epoch": 2.08, - "learning_rate": 4.147594519449881e-05, - "loss": 0.3619, + "learning_rate": 4.147586539580751e-05, + "loss": 0.2711, "step": 3469500 }, { "epoch": 2.08, - "learning_rate": 4.1473845228938246e-05, - "loss": 0.3519, + "learning_rate": 4.147376543024694e-05, + "loss": 0.2705, "step": 3470000 }, { "epoch": 2.08, - "learning_rate": 4.1471745263377686e-05, - "loss": 0.3523, + "learning_rate": 4.147166546468638e-05, + "loss": 0.27, "step": 3470500 }, { "epoch": 2.08, - "learning_rate": 4.146964949774824e-05, - "loss": 0.35, + "learning_rate": 4.146956549912581e-05, + "loss": 0.2712, "step": 3471000 }, { "epoch": 2.08, - "learning_rate": 4.146754953218767e-05, - "loss": 0.3501, + "learning_rate": 4.146746553356525e-05, + "loss": 0.2719, "step": 3471500 }, { "epoch": 2.08, - "learning_rate": 4.146544956662711e-05, - "loss": 0.3496, + "learning_rate": 4.1465365568004684e-05, + "loss": 0.2664, "step": 3472000 }, { "epoch": 2.08, - "learning_rate": 4.146334960106655e-05, - "loss": 0.3592, + "learning_rate": 4.146326560244412e-05, + "loss": 0.2718, "step": 3472500 }, { "epoch": 2.08, - "learning_rate": 4.1461258035368214e-05, - "loss": 0.3551, + "learning_rate": 4.146116983681467e-05, + "loss": 0.2679, "step": 3473000 }, { "epoch": 2.08, - "learning_rate": 4.1459158069807654e-05, - "loss": 0.3505, + "learning_rate": 4.145906987125411e-05, + "loss": 0.263, "step": 3473500 }, { "epoch": 2.08, - "learning_rate": 4.145705810424709e-05, - "loss": 0.3442, + "learning_rate": 4.1456969905693545e-05, + "loss": 0.2649, "step": 3474000 }, { "epoch": 2.08, - "learning_rate": 4.145495813868652e-05, - "loss": 0.3501, + "learning_rate": 4.14548741400641e-05, + "loss": 0.2737, "step": 3474500 }, { "epoch": 2.08, - "learning_rate": 4.145286237305708e-05, - "loss": 0.3507, + "learning_rate": 4.145277417450354e-05, + "loss": 0.2693, "step": 3475000 }, { "epoch": 2.08, - "learning_rate": 4.1450762407496515e-05, - "loss": 0.3564, + "learning_rate": 4.145067420894297e-05, + "loss": 0.2703, "step": 3475500 }, { "epoch": 2.08, - "learning_rate": 4.144866244193595e-05, - "loss": 0.3599, + "learning_rate": 4.1448574243382405e-05, + "loss": 0.2734, "step": 3476000 }, { "epoch": 2.08, - "learning_rate": 4.144656247637539e-05, - "loss": 0.3538, + "learning_rate": 4.1446474277821846e-05, + "loss": 0.2717, "step": 3476500 }, { "epoch": 2.08, - "learning_rate": 4.144446251081482e-05, - "loss": 0.361, + "learning_rate": 4.144437431226128e-05, + "loss": 0.2702, "step": 3477000 }, { "epoch": 2.08, - "learning_rate": 4.1442362545254255e-05, - "loss": 0.3603, + "learning_rate": 4.144227434670071e-05, + "loss": 0.2689, "step": 3477500 }, { "epoch": 2.09, - "learning_rate": 4.144026677962481e-05, - "loss": 0.3498, + "learning_rate": 4.144017438114015e-05, + "loss": 0.2661, "step": 3478000 }, { "epoch": 2.09, - "learning_rate": 4.143816681406425e-05, - "loss": 0.3508, + "learning_rate": 4.1438078615510706e-05, + "loss": 0.2735, "step": 3478500 }, { "epoch": 2.09, - "learning_rate": 4.143606684850368e-05, - "loss": 0.3502, + "learning_rate": 4.143597864995014e-05, + "loss": 0.2669, "step": 3479000 }, { "epoch": 2.09, - "learning_rate": 4.1433966882943116e-05, - "loss": 0.3516, + "learning_rate": 4.1433882884320693e-05, + "loss": 0.279, "step": 3479500 }, { "epoch": 2.09, - "learning_rate": 4.1431866917382556e-05, - "loss": 0.3604, + "learning_rate": 4.143178291876013e-05, + "loss": 0.2736, "step": 3480000 }, { "epoch": 2.09, - "learning_rate": 4.142976695182199e-05, - "loss": 0.3436, + "learning_rate": 4.142968295319957e-05, + "loss": 0.2688, "step": 3480500 }, { "epoch": 2.09, - "learning_rate": 4.142766698626142e-05, - "loss": 0.357, + "learning_rate": 4.142758718757013e-05, + "loss": 0.2688, "step": 3481000 }, { "epoch": 2.09, - "learning_rate": 4.142557122063198e-05, - "loss": 0.352, + "learning_rate": 4.1425487222009554e-05, + "loss": 0.2708, "step": 3481500 }, { "epoch": 2.09, - "learning_rate": 4.142347125507142e-05, - "loss": 0.3547, + "learning_rate": 4.1423387256448994e-05, + "loss": 0.2712, "step": 3482000 }, { "epoch": 2.09, - "learning_rate": 4.142137128951085e-05, - "loss": 0.3501, + "learning_rate": 4.142128729088843e-05, + "loss": 0.2706, "step": 3482500 }, { "epoch": 2.09, - "learning_rate": 4.141927132395029e-05, - "loss": 0.3534, + "learning_rate": 4.141918732532786e-05, + "loss": 0.2709, "step": 3483000 }, { "epoch": 2.09, - "learning_rate": 4.1417171358389724e-05, - "loss": 0.3469, + "learning_rate": 4.14170873597673e-05, + "loss": 0.2711, "step": 3483500 }, { "epoch": 2.09, - "learning_rate": 4.141507559276028e-05, - "loss": 0.3548, + "learning_rate": 4.1414987394206735e-05, + "loss": 0.2661, "step": 3484000 }, { "epoch": 2.09, - "learning_rate": 4.141297562719971e-05, - "loss": 0.3508, + "learning_rate": 4.141288742864617e-05, + "loss": 0.2696, "step": 3484500 }, { "epoch": 2.09, - "learning_rate": 4.141087566163915e-05, - "loss": 0.3543, + "learning_rate": 4.141078746308561e-05, + "loss": 0.2662, "step": 3485000 }, { "epoch": 2.09, - "learning_rate": 4.1408775696078585e-05, - "loss": 0.3453, + "learning_rate": 4.140868749752504e-05, + "loss": 0.2671, "step": 3485500 }, { "epoch": 2.09, - "learning_rate": 4.140667573051802e-05, - "loss": 0.3493, + "learning_rate": 4.1406587531964476e-05, + "loss": 0.2651, "step": 3486000 }, { "epoch": 2.09, - "learning_rate": 4.140457996488857e-05, - "loss": 0.3561, + "learning_rate": 4.140449176633503e-05, + "loss": 0.2748, "step": 3486500 }, { "epoch": 2.09, - "learning_rate": 4.140247999932801e-05, - "loss": 0.3485, + "learning_rate": 4.140239180077447e-05, + "loss": 0.2659, "step": 3487000 }, { "epoch": 2.09, - "learning_rate": 4.1400380033767446e-05, - "loss": 0.3558, + "learning_rate": 4.14002918352139e-05, + "loss": 0.2691, "step": 3487500 }, { "epoch": 2.09, - "learning_rate": 4.139828006820688e-05, - "loss": 0.3626, + "learning_rate": 4.1398191869653336e-05, + "loss": 0.2754, "step": 3488000 }, { "epoch": 2.09, - "learning_rate": 4.139618010264632e-05, - "loss": 0.3545, + "learning_rate": 4.1396091904092777e-05, + "loss": 0.2703, "step": 3488500 }, { "epoch": 2.09, - "learning_rate": 4.139408013708575e-05, - "loss": 0.3467, + "learning_rate": 4.139399193853221e-05, + "loss": 0.2703, "step": 3489000 }, { "epoch": 2.09, - "learning_rate": 4.1391980171525186e-05, - "loss": 0.3578, + "learning_rate": 4.139189197297164e-05, + "loss": 0.2726, "step": 3489500 }, { "epoch": 2.09, - "learning_rate": 4.138988020596462e-05, - "loss": 0.3501, + "learning_rate": 4.138979200741108e-05, + "loss": 0.2657, "step": 3490000 }, { "epoch": 2.09, - "learning_rate": 4.138778444033518e-05, - "loss": 0.3621, + "learning_rate": 4.138769204185051e-05, + "loss": 0.2691, "step": 3490500 }, { "epoch": 2.09, - "learning_rate": 4.1385684474774614e-05, - "loss": 0.3543, + "learning_rate": 4.138559627622107e-05, + "loss": 0.2686, "step": 3491000 }, { "epoch": 2.09, - "learning_rate": 4.138358870914517e-05, - "loss": 0.3553, + "learning_rate": 4.1383496310660504e-05, + "loss": 0.272, "step": 3491500 }, { "epoch": 2.09, - "learning_rate": 4.138148874358461e-05, - "loss": 0.3605, + "learning_rate": 4.138139634509994e-05, + "loss": 0.268, "step": 3492000 }, { "epoch": 2.09, - "learning_rate": 4.137938877802404e-05, - "loss": 0.3518, + "learning_rate": 4.137929637953937e-05, + "loss": 0.2726, "step": 3492500 }, { "epoch": 2.09, - "learning_rate": 4.1377288812463474e-05, - "loss": 0.3443, + "learning_rate": 4.137720061390993e-05, + "loss": 0.2659, "step": 3493000 }, { "epoch": 2.09, - "learning_rate": 4.1375188846902915e-05, - "loss": 0.3448, + "learning_rate": 4.137510064834937e-05, + "loss": 0.2609, "step": 3493500 }, { "epoch": 2.09, - "learning_rate": 4.137308888134235e-05, - "loss": 0.3544, + "learning_rate": 4.1373000682788805e-05, + "loss": 0.2737, "step": 3494000 }, { "epoch": 2.1, - "learning_rate": 4.137098891578178e-05, - "loss": 0.3594, + "learning_rate": 4.137090071722823e-05, + "loss": 0.2743, "step": 3494500 }, { "epoch": 2.1, - "learning_rate": 4.1368888950221215e-05, - "loss": 0.3489, + "learning_rate": 4.136880075166767e-05, + "loss": 0.2657, "step": 3495000 }, { "epoch": 2.1, - "learning_rate": 4.1366793184591775e-05, - "loss": 0.3573, + "learning_rate": 4.1366700786107106e-05, + "loss": 0.2812, "step": 3495500 }, { "epoch": 2.1, - "learning_rate": 4.136469741896233e-05, - "loss": 0.356, + "learning_rate": 4.136460082054654e-05, + "loss": 0.2726, "step": 3496000 }, { "epoch": 2.1, - "learning_rate": 4.136259745340176e-05, - "loss": 0.3495, + "learning_rate": 4.136250085498598e-05, + "loss": 0.2658, "step": 3496500 }, { "epoch": 2.1, - "learning_rate": 4.13604974878412e-05, - "loss": 0.3533, + "learning_rate": 4.136040508935653e-05, + "loss": 0.2674, "step": 3497000 }, { "epoch": 2.1, - "learning_rate": 4.1358397522280636e-05, - "loss": 0.3512, + "learning_rate": 4.1358305123795966e-05, + "loss": 0.2671, "step": 3497500 }, { "epoch": 2.1, - "learning_rate": 4.135629755672007e-05, - "loss": 0.351, + "learning_rate": 4.13562051582354e-05, + "loss": 0.2713, "step": 3498000 }, { "epoch": 2.1, - "learning_rate": 4.135419759115951e-05, - "loss": 0.3498, + "learning_rate": 4.135410519267484e-05, + "loss": 0.2648, "step": 3498500 }, { "epoch": 2.1, - "learning_rate": 4.135209762559894e-05, - "loss": 0.3454, + "learning_rate": 4.1352009427045394e-05, + "loss": 0.2717, "step": 3499000 }, { "epoch": 2.1, - "learning_rate": 4.134999766003837e-05, - "loss": 0.3484, + "learning_rate": 4.134990946148483e-05, + "loss": 0.2675, "step": 3499500 }, { "epoch": 2.1, - "learning_rate": 4.134789769447781e-05, - "loss": 0.3501, + "learning_rate": 4.134780949592427e-05, + "loss": 0.2686, "step": 3500000 }, { "epoch": 2.1, - "eval_loss": 0.34503260254859924, - "eval_runtime": 1117.4396, - "eval_samples_per_second": 471.363, - "eval_steps_per_second": 78.561, + "eval_loss": 0.24986667931079865, + "eval_runtime": 1459.8444, + "eval_samples_per_second": 360.806, + "eval_steps_per_second": 60.134, "step": 3500000 }, { "epoch": 2.1, - "learning_rate": 4.134580192884837e-05, - "loss": 0.3546, + "learning_rate": 4.134571373029483e-05, + "loss": 0.2725, "step": 3500500 }, { "epoch": 2.1, - "learning_rate": 4.1343706163218924e-05, - "loss": 0.356, + "learning_rate": 4.134361376473426e-05, + "loss": 0.2692, "step": 3501000 }, { "epoch": 2.1, - "learning_rate": 4.134160619765836e-05, - "loss": 0.3565, + "learning_rate": 4.134151379917369e-05, + "loss": 0.2697, "step": 3501500 }, { "epoch": 2.1, - "learning_rate": 4.133950623209779e-05, - "loss": 0.3592, + "learning_rate": 4.133941383361313e-05, + "loss": 0.2703, "step": 3502000 }, { "epoch": 2.1, - "learning_rate": 4.133740626653723e-05, - "loss": 0.3566, + "learning_rate": 4.133731386805256e-05, + "loss": 0.2658, "step": 3502500 }, { "epoch": 2.1, - "learning_rate": 4.1335310500907785e-05, - "loss": 0.3474, + "learning_rate": 4.1335213902491995e-05, + "loss": 0.2699, "step": 3503000 }, { "epoch": 2.1, - "learning_rate": 4.133321473527834e-05, - "loss": 0.3567, + "learning_rate": 4.1333118136862555e-05, + "loss": 0.2737, "step": 3503500 }, { "epoch": 2.1, - "learning_rate": 4.133111476971777e-05, - "loss": 0.354, + "learning_rate": 4.133101817130199e-05, + "loss": 0.2706, "step": 3504000 }, { "epoch": 2.1, - "learning_rate": 4.132901480415721e-05, - "loss": 0.3509, + "learning_rate": 4.132891820574142e-05, + "loss": 0.266, "step": 3504500 }, { "epoch": 2.1, - "learning_rate": 4.1326914838596645e-05, - "loss": 0.3533, + "learning_rate": 4.1326818240180856e-05, + "loss": 0.2737, "step": 3505000 }, { "epoch": 2.1, - "learning_rate": 4.132481487303608e-05, - "loss": 0.3493, + "learning_rate": 4.1324718274620296e-05, + "loss": 0.2685, "step": 3505500 }, { "epoch": 2.1, - "learning_rate": 4.132271490747552e-05, - "loss": 0.3527, + "learning_rate": 4.132261830905973e-05, + "loss": 0.2679, "step": 3506000 }, { "epoch": 2.1, - "learning_rate": 4.132061494191495e-05, - "loss": 0.3445, + "learning_rate": 4.132051834349916e-05, + "loss": 0.2662, "step": 3506500 }, { "epoch": 2.1, - "learning_rate": 4.1318514976354386e-05, - "loss": 0.3498, + "learning_rate": 4.13184183779386e-05, + "loss": 0.2742, "step": 3507000 }, { "epoch": 2.1, - "learning_rate": 4.1316415010793826e-05, - "loss": 0.3519, + "learning_rate": 4.1316322612309157e-05, + "loss": 0.2716, "step": 3507500 }, { "epoch": 2.1, - "learning_rate": 4.131431504523326e-05, - "loss": 0.3545, + "learning_rate": 4.131422264674859e-05, + "loss": 0.2741, "step": 3508000 }, { "epoch": 2.1, - "learning_rate": 4.131221507967269e-05, - "loss": 0.3506, + "learning_rate": 4.131212268118803e-05, + "loss": 0.266, "step": 3508500 }, { "epoch": 2.1, - "learning_rate": 4.131011511411213e-05, - "loss": 0.3566, + "learning_rate": 4.1310022715627464e-05, + "loss": 0.2736, "step": 3509000 }, { "epoch": 2.1, - "learning_rate": 4.130801934848269e-05, - "loss": 0.37, + "learning_rate": 4.13079227500669e-05, + "loss": 0.2752, "step": 3509500 }, { "epoch": 2.1, - "learning_rate": 4.130591938292212e-05, - "loss": 0.3542, + "learning_rate": 4.130582278450634e-05, + "loss": 0.2691, "step": 3510000 }, { "epoch": 2.1, - "learning_rate": 4.1303819417361554e-05, - "loss": 0.3482, + "learning_rate": 4.130372701887689e-05, + "loss": 0.2674, "step": 3510500 }, { "epoch": 2.1, - "learning_rate": 4.1301719451800994e-05, - "loss": 0.3488, + "learning_rate": 4.1301627053316324e-05, + "loss": 0.2678, "step": 3511000 }, { "epoch": 2.11, - "learning_rate": 4.129962368617155e-05, - "loss": 0.3468, + "learning_rate": 4.129952708775576e-05, + "loss": 0.2694, "step": 3511500 }, { "epoch": 2.11, - "learning_rate": 4.129752372061098e-05, - "loss": 0.344, + "learning_rate": 4.12974271221952e-05, + "loss": 0.2674, "step": 3512000 }, { "epoch": 2.11, - "learning_rate": 4.1295427954981535e-05, - "loss": 0.3537, + "learning_rate": 4.129533135656575e-05, + "loss": 0.2705, "step": 3512500 }, { "epoch": 2.11, - "learning_rate": 4.1293327989420975e-05, - "loss": 0.3517, + "learning_rate": 4.1293231391005185e-05, + "loss": 0.2663, "step": 3513000 }, { "epoch": 2.11, - "learning_rate": 4.129122802386041e-05, - "loss": 0.3532, + "learning_rate": 4.129113562537574e-05, + "loss": 0.2682, "step": 3513500 }, { "epoch": 2.11, - "learning_rate": 4.128912805829984e-05, - "loss": 0.3536, + "learning_rate": 4.128903565981518e-05, + "loss": 0.2693, "step": 3514000 }, { "epoch": 2.11, - "learning_rate": 4.1287032292670395e-05, - "loss": 0.356, + "learning_rate": 4.128693569425461e-05, + "loss": 0.2702, "step": 3514500 }, { "epoch": 2.11, - "learning_rate": 4.1284932327109836e-05, - "loss": 0.3575, + "learning_rate": 4.1284835728694046e-05, + "loss": 0.2737, "step": 3515000 }, { "epoch": 2.11, - "learning_rate": 4.128283236154927e-05, - "loss": 0.3437, + "learning_rate": 4.1282735763133486e-05, + "loss": 0.2659, "step": 3515500 }, { "epoch": 2.11, - "learning_rate": 4.12807323959887e-05, - "loss": 0.3553, + "learning_rate": 4.128063579757292e-05, + "loss": 0.2747, "step": 3516000 }, { "epoch": 2.11, - "learning_rate": 4.127863243042814e-05, - "loss": 0.353, + "learning_rate": 4.127853583201235e-05, + "loss": 0.2717, "step": 3516500 }, { "epoch": 2.11, - "learning_rate": 4.1276536664798696e-05, - "loss": 0.3583, + "learning_rate": 4.127643586645179e-05, + "loss": 0.2734, "step": 3517000 }, { "epoch": 2.11, - "learning_rate": 4.127443669923813e-05, - "loss": 0.3603, + "learning_rate": 4.127434010082235e-05, + "loss": 0.2748, "step": 3517500 }, { "epoch": 2.11, - "learning_rate": 4.127233673367757e-05, - "loss": 0.3544, + "learning_rate": 4.127224013526178e-05, + "loss": 0.2691, "step": 3518000 }, { "epoch": 2.11, - "learning_rate": 4.1270236768117004e-05, - "loss": 0.3577, + "learning_rate": 4.1270140169701214e-05, + "loss": 0.2693, "step": 3518500 }, { "epoch": 2.11, - "learning_rate": 4.126813680255644e-05, - "loss": 0.3403, + "learning_rate": 4.1268040204140654e-05, + "loss": 0.2645, "step": 3519000 }, { "epoch": 2.11, - "learning_rate": 4.126603683699588e-05, - "loss": 0.3511, + "learning_rate": 4.126594023858009e-05, + "loss": 0.2695, "step": 3519500 }, { "epoch": 2.11, - "learning_rate": 4.126394107136643e-05, - "loss": 0.3449, + "learning_rate": 4.126384447295064e-05, + "loss": 0.2681, "step": 3520000 }, { "epoch": 2.11, - "learning_rate": 4.1261841105805864e-05, - "loss": 0.355, + "learning_rate": 4.126174450739008e-05, + "loss": 0.2731, "step": 3520500 }, { "epoch": 2.11, - "learning_rate": 4.125974534017642e-05, - "loss": 0.3518, + "learning_rate": 4.1259644541829515e-05, + "loss": 0.2707, "step": 3521000 }, { "epoch": 2.11, - "learning_rate": 4.125764537461585e-05, - "loss": 0.3447, + "learning_rate": 4.125754457626895e-05, + "loss": 0.2661, "step": 3521500 }, { "epoch": 2.11, - "learning_rate": 4.125554540905529e-05, - "loss": 0.3532, + "learning_rate": 4.12554488106395e-05, + "loss": 0.2691, "step": 3522000 }, { "epoch": 2.11, - "learning_rate": 4.1253445443494725e-05, - "loss": 0.3522, + "learning_rate": 4.125334884507894e-05, + "loss": 0.2667, "step": 3522500 }, { "epoch": 2.11, - "learning_rate": 4.125134547793416e-05, - "loss": 0.3575, + "learning_rate": 4.1251248879518376e-05, + "loss": 0.2725, "step": 3523000 }, { "epoch": 2.11, - "learning_rate": 4.12492455123736e-05, - "loss": 0.3528, + "learning_rate": 4.124914891395781e-05, + "loss": 0.2669, "step": 3523500 }, { "epoch": 2.11, - "learning_rate": 4.124714554681303e-05, - "loss": 0.3547, + "learning_rate": 4.124705314832836e-05, + "loss": 0.2718, "step": 3524000 }, { "epoch": 2.11, - "learning_rate": 4.1245045581252466e-05, - "loss": 0.3611, + "learning_rate": 4.12449531827678e-05, + "loss": 0.2702, "step": 3524500 }, { "epoch": 2.11, - "learning_rate": 4.1242945615691906e-05, - "loss": 0.3548, + "learning_rate": 4.1242853217207236e-05, + "loss": 0.2702, "step": 3525000 }, { "epoch": 2.11, - "learning_rate": 4.124084565013133e-05, - "loss": 0.3514, + "learning_rate": 4.124075325164667e-05, + "loss": 0.2673, "step": 3525500 }, { "epoch": 2.11, - "learning_rate": 4.123874988450189e-05, - "loss": 0.3572, + "learning_rate": 4.123865328608611e-05, + "loss": 0.2713, "step": 3526000 }, { "epoch": 2.11, - "learning_rate": 4.123664991894133e-05, - "loss": 0.35, + "learning_rate": 4.1236557520456664e-05, + "loss": 0.271, "step": 3526500 }, { "epoch": 2.11, - "learning_rate": 4.123454995338077e-05, - "loss": 0.3525, + "learning_rate": 4.12344575548961e-05, + "loss": 0.2683, "step": 3527000 }, { "epoch": 2.11, - "learning_rate": 4.12324499878202e-05, - "loss": 0.3551, + "learning_rate": 4.123235758933554e-05, + "loss": 0.2718, "step": 3527500 }, { "epoch": 2.12, - "learning_rate": 4.1230354222190754e-05, - "loss": 0.3557, + "learning_rate": 4.123025762377497e-05, + "loss": 0.2746, "step": 3528000 }, { "epoch": 2.12, - "learning_rate": 4.1228254256630194e-05, - "loss": 0.3615, + "learning_rate": 4.1228161858145524e-05, + "loss": 0.2729, "step": 3528500 }, { "epoch": 2.12, - "learning_rate": 4.122615429106963e-05, - "loss": 0.3516, + "learning_rate": 4.122606189258496e-05, + "loss": 0.2681, "step": 3529000 }, { "epoch": 2.12, - "learning_rate": 4.122405432550906e-05, - "loss": 0.344, + "learning_rate": 4.12239619270244e-05, + "loss": 0.2697, "step": 3529500 }, { "epoch": 2.12, - "learning_rate": 4.12219543599485e-05, - "loss": 0.3484, + "learning_rate": 4.122186196146383e-05, + "loss": 0.2684, "step": 3530000 }, { "epoch": 2.12, - "learning_rate": 4.1219858594319055e-05, - "loss": 0.3544, + "learning_rate": 4.1219766195834385e-05, + "loss": 0.2713, "step": 3530500 }, { "epoch": 2.12, - "learning_rate": 4.121775862875849e-05, - "loss": 0.3525, + "learning_rate": 4.121766623027382e-05, + "loss": 0.2694, "step": 3531000 }, { "epoch": 2.12, - "learning_rate": 4.121565866319792e-05, - "loss": 0.3511, + "learning_rate": 4.121556626471326e-05, + "loss": 0.2685, "step": 3531500 }, { "epoch": 2.12, - "learning_rate": 4.121355869763736e-05, - "loss": 0.3539, + "learning_rate": 4.121346629915269e-05, + "loss": 0.2707, "step": 3532000 }, { "epoch": 2.12, - "learning_rate": 4.1211458732076795e-05, - "loss": 0.3512, + "learning_rate": 4.1211370533523246e-05, + "loss": 0.2649, "step": 3532500 }, { "epoch": 2.12, - "learning_rate": 4.120935876651623e-05, - "loss": 0.3478, + "learning_rate": 4.1209270567962686e-05, + "loss": 0.2601, "step": 3533000 }, { "epoch": 2.12, - "learning_rate": 4.120725880095566e-05, - "loss": 0.3532, + "learning_rate": 4.120717060240212e-05, + "loss": 0.2697, "step": 3533500 }, { "epoch": 2.12, - "learning_rate": 4.1205158835395096e-05, - "loss": 0.3554, + "learning_rate": 4.120507063684155e-05, + "loss": 0.2703, "step": 3534000 }, { "epoch": 2.12, - "learning_rate": 4.1203063069765656e-05, - "loss": 0.3571, + "learning_rate": 4.120297067128099e-05, + "loss": 0.2738, "step": 3534500 }, { "epoch": 2.12, - "learning_rate": 4.1200963104205096e-05, - "loss": 0.354, + "learning_rate": 4.120087490565155e-05, + "loss": 0.2708, "step": 3535000 }, { "epoch": 2.12, - "learning_rate": 4.119886313864452e-05, - "loss": 0.3568, + "learning_rate": 4.119877914002211e-05, + "loss": 0.2769, "step": 3535500 }, { "epoch": 2.12, - "learning_rate": 4.1196763173083956e-05, - "loss": 0.3464, + "learning_rate": 4.119667917446154e-05, + "loss": 0.2712, "step": 3536000 }, { "epoch": 2.12, - "learning_rate": 4.1194663207523397e-05, - "loss": 0.3587, + "learning_rate": 4.1194579208900974e-05, + "loss": 0.2723, "step": 3536500 }, { "epoch": 2.12, - "learning_rate": 4.119256744189396e-05, - "loss": 0.3615, + "learning_rate": 4.119247924334041e-05, + "loss": 0.274, "step": 3537000 }, { "epoch": 2.12, - "learning_rate": 4.1190467476333384e-05, - "loss": 0.3673, + "learning_rate": 4.119037927777984e-05, + "loss": 0.2738, "step": 3537500 }, { "epoch": 2.12, - "learning_rate": 4.118836751077282e-05, - "loss": 0.3536, + "learning_rate": 4.1188279312219274e-05, + "loss": 0.2714, "step": 3538000 }, { "epoch": 2.12, - "learning_rate": 4.118626754521226e-05, - "loss": 0.3529, + "learning_rate": 4.1186179346658715e-05, + "loss": 0.2655, "step": 3538500 }, { "epoch": 2.12, - "learning_rate": 4.118416757965169e-05, - "loss": 0.3682, + "learning_rate": 4.1184083581029275e-05, + "loss": 0.2738, "step": 3539000 }, { "epoch": 2.12, - "learning_rate": 4.1182067614091124e-05, - "loss": 0.3608, + "learning_rate": 4.11819836154687e-05, + "loss": 0.2777, "step": 3539500 }, { "epoch": 2.12, - "learning_rate": 4.1179967648530564e-05, - "loss": 0.3625, + "learning_rate": 4.117988364990814e-05, + "loss": 0.2714, "step": 3540000 }, { "epoch": 2.12, - "learning_rate": 4.117787188290112e-05, - "loss": 0.3531, + "learning_rate": 4.1177783684347575e-05, + "loss": 0.2682, "step": 3540500 }, { "epoch": 2.12, - "learning_rate": 4.117577191734055e-05, - "loss": 0.3612, + "learning_rate": 4.117568371878701e-05, + "loss": 0.2742, "step": 3541000 }, { "epoch": 2.12, - "learning_rate": 4.117367195177999e-05, - "loss": 0.3604, + "learning_rate": 4.117358375322645e-05, + "loss": 0.2741, "step": 3541500 }, { "epoch": 2.12, - "learning_rate": 4.1171571986219425e-05, - "loss": 0.3443, + "learning_rate": 4.117148378766588e-05, + "loss": 0.267, "step": 3542000 }, { "epoch": 2.12, - "learning_rate": 4.116947202065886e-05, - "loss": 0.3578, + "learning_rate": 4.1169383822105316e-05, + "loss": 0.2686, "step": 3542500 }, { "epoch": 2.12, - "learning_rate": 4.11673720550983e-05, - "loss": 0.356, + "learning_rate": 4.116728805647587e-05, + "loss": 0.2713, "step": 3543000 }, { "epoch": 2.12, - "learning_rate": 4.116527628946885e-05, - "loss": 0.3555, + "learning_rate": 4.116518809091531e-05, + "loss": 0.2672, "step": 3543500 }, { "epoch": 2.12, - "learning_rate": 4.1163176323908286e-05, - "loss": 0.3541, + "learning_rate": 4.116308812535474e-05, + "loss": 0.2715, "step": 3544000 }, { "epoch": 2.13, - "learning_rate": 4.116107635834772e-05, - "loss": 0.3421, + "learning_rate": 4.116098815979418e-05, + "loss": 0.2666, "step": 3544500 }, { "epoch": 2.13, - "learning_rate": 4.115897639278716e-05, - "loss": 0.3547, + "learning_rate": 4.115888819423362e-05, + "loss": 0.2732, "step": 3545000 }, { "epoch": 2.13, - "learning_rate": 4.115687642722659e-05, - "loss": 0.3587, + "learning_rate": 4.115679242860417e-05, + "loss": 0.2696, "step": 3545500 }, { "epoch": 2.13, - "learning_rate": 4.1154776461666027e-05, - "loss": 0.3429, + "learning_rate": 4.1154692463043604e-05, + "loss": 0.2686, "step": 3546000 }, { "epoch": 2.13, - "learning_rate": 4.115267649610547e-05, - "loss": 0.3569, + "learning_rate": 4.115259249748304e-05, + "loss": 0.266, "step": 3546500 }, { "epoch": 2.13, - "learning_rate": 4.115058073047602e-05, - "loss": 0.3614, + "learning_rate": 4.115049253192248e-05, + "loss": 0.2748, "step": 3547000 }, { "epoch": 2.13, - "learning_rate": 4.1148480764915454e-05, - "loss": 0.352, + "learning_rate": 4.114839676629303e-05, + "loss": 0.2657, "step": 3547500 }, { "epoch": 2.13, - "learning_rate": 4.1146380799354894e-05, - "loss": 0.3492, + "learning_rate": 4.1146296800732465e-05, + "loss": 0.2714, "step": 3548000 }, { "epoch": 2.13, - "learning_rate": 4.114428083379433e-05, - "loss": 0.3466, + "learning_rate": 4.1144196835171905e-05, + "loss": 0.2638, "step": 3548500 }, { "epoch": 2.13, - "learning_rate": 4.114218086823376e-05, - "loss": 0.3561, + "learning_rate": 4.114209686961134e-05, + "loss": 0.2668, "step": 3549000 }, { "epoch": 2.13, - "learning_rate": 4.11400809026732e-05, - "loss": 0.3532, + "learning_rate": 4.113999690405077e-05, + "loss": 0.2695, "step": 3549500 }, { "epoch": 2.13, - "learning_rate": 4.1137980937112635e-05, - "loss": 0.3601, + "learning_rate": 4.113789693849021e-05, + "loss": 0.2686, "step": 3550000 }, { "epoch": 2.13, - "learning_rate": 4.113588517148319e-05, - "loss": 0.3477, + "learning_rate": 4.1135796972929645e-05, + "loss": 0.2678, "step": 3550500 }, { "epoch": 2.13, - "learning_rate": 4.113378520592262e-05, - "loss": 0.3559, + "learning_rate": 4.11337012073002e-05, + "loss": 0.2708, "step": 3551000 }, { "epoch": 2.13, - "learning_rate": 4.113168524036206e-05, - "loss": 0.3644, + "learning_rate": 4.113160124173963e-05, + "loss": 0.2746, "step": 3551500 }, { "epoch": 2.13, - "learning_rate": 4.1129585274801495e-05, - "loss": 0.3505, + "learning_rate": 4.112950127617907e-05, + "loss": 0.273, "step": 3552000 }, { "epoch": 2.13, - "learning_rate": 4.112748530924092e-05, - "loss": 0.3448, + "learning_rate": 4.1127401310618506e-05, + "loss": 0.2645, "step": 3552500 }, { "epoch": 2.13, - "learning_rate": 4.112538534368036e-05, - "loss": 0.3517, + "learning_rate": 4.112530134505794e-05, + "loss": 0.2669, "step": 3553000 }, { "epoch": 2.13, - "learning_rate": 4.1123285378119796e-05, - "loss": 0.3498, + "learning_rate": 4.112320137949738e-05, + "loss": 0.268, "step": 3553500 }, { "epoch": 2.13, - "learning_rate": 4.112118541255923e-05, - "loss": 0.3541, + "learning_rate": 4.112110141393681e-05, + "loss": 0.2717, "step": 3554000 }, { "epoch": 2.13, - "learning_rate": 4.111908964692979e-05, - "loss": 0.3547, + "learning_rate": 4.111900144837624e-05, + "loss": 0.272, "step": 3554500 }, { "epoch": 2.13, - "learning_rate": 4.111699388130035e-05, - "loss": 0.3557, + "learning_rate": 4.11169056827468e-05, + "loss": 0.2739, "step": 3555000 }, { "epoch": 2.13, - "learning_rate": 4.111489391573978e-05, - "loss": 0.3533, + "learning_rate": 4.111480571718624e-05, + "loss": 0.2733, "step": 3555500 }, { "epoch": 2.13, - "learning_rate": 4.111279395017922e-05, - "loss": 0.3529, + "learning_rate": 4.1112705751625674e-05, + "loss": 0.2685, "step": 3556000 }, { "epoch": 2.13, - "learning_rate": 4.111069398461866e-05, - "loss": 0.3524, + "learning_rate": 4.111060578606511e-05, + "loss": 0.2711, "step": 3556500 }, { "epoch": 2.13, - "learning_rate": 4.110859401905809e-05, - "loss": 0.3542, + "learning_rate": 4.110850582050454e-05, + "loss": 0.2718, "step": 3557000 }, { "epoch": 2.13, - "learning_rate": 4.110649405349752e-05, - "loss": 0.3519, + "learning_rate": 4.11064100548751e-05, + "loss": 0.272, "step": 3557500 }, { "epoch": 2.13, - "learning_rate": 4.110439408793696e-05, - "loss": 0.3618, + "learning_rate": 4.1104310089314535e-05, + "loss": 0.2701, "step": 3558000 }, { "epoch": 2.13, - "learning_rate": 4.110229412237639e-05, - "loss": 0.3572, + "learning_rate": 4.1102210123753975e-05, + "loss": 0.2684, "step": 3558500 }, { "epoch": 2.13, - "learning_rate": 4.110019835674695e-05, - "loss": 0.3628, + "learning_rate": 4.110011015819341e-05, + "loss": 0.276, "step": 3559000 }, { "epoch": 2.13, - "learning_rate": 4.1098098391186385e-05, - "loss": 0.3519, + "learning_rate": 4.109801439256396e-05, + "loss": 0.271, "step": 3559500 }, { "epoch": 2.13, - "learning_rate": 4.109599842562582e-05, - "loss": 0.3478, + "learning_rate": 4.1095914427003396e-05, + "loss": 0.272, "step": 3560000 }, { "epoch": 2.13, - "learning_rate": 4.109389846006525e-05, - "loss": 0.359, + "learning_rate": 4.109381866137395e-05, + "loss": 0.2711, "step": 3560500 }, { "epoch": 2.13, - "learning_rate": 4.109180269443581e-05, - "loss": 0.3565, + "learning_rate": 4.109171869581339e-05, + "loss": 0.2719, "step": 3561000 }, { "epoch": 2.14, - "learning_rate": 4.1089702728875245e-05, - "loss": 0.3588, + "learning_rate": 4.108961873025282e-05, + "loss": 0.2759, "step": 3561500 }, { "epoch": 2.14, - "learning_rate": 4.108760276331468e-05, - "loss": 0.3603, + "learning_rate": 4.1087518764692256e-05, + "loss": 0.2715, "step": 3562000 }, { "epoch": 2.14, - "learning_rate": 4.108550279775411e-05, - "loss": 0.349, + "learning_rate": 4.1085418799131696e-05, + "loss": 0.2645, "step": 3562500 }, { "epoch": 2.14, - "learning_rate": 4.108340703212467e-05, - "loss": 0.3529, + "learning_rate": 4.108331883357113e-05, + "loss": 0.2798, "step": 3563000 }, { "epoch": 2.14, - "learning_rate": 4.108130706656411e-05, - "loss": 0.3595, + "learning_rate": 4.1081218868010563e-05, + "loss": 0.2666, "step": 3563500 }, { "epoch": 2.14, - "learning_rate": 4.1079207101003546e-05, - "loss": 0.3501, + "learning_rate": 4.107911890245e-05, + "loss": 0.2695, "step": 3564000 }, { "epoch": 2.14, - "learning_rate": 4.107710713544297e-05, - "loss": 0.3511, + "learning_rate": 4.107702313682056e-05, + "loss": 0.2702, "step": 3564500 }, { "epoch": 2.14, - "learning_rate": 4.1075015569744654e-05, - "loss": 0.3493, + "learning_rate": 4.107492317125999e-05, + "loss": 0.2724, "step": 3565000 }, { "epoch": 2.14, - "learning_rate": 4.107291560418409e-05, - "loss": 0.3601, + "learning_rate": 4.107282320569943e-05, + "loss": 0.2726, "step": 3565500 }, { "epoch": 2.14, - "learning_rate": 4.107081563862353e-05, - "loss": 0.3576, + "learning_rate": 4.1070723240138864e-05, + "loss": 0.2685, "step": 3566000 }, { "epoch": 2.14, - "learning_rate": 4.106871567306296e-05, - "loss": 0.3574, + "learning_rate": 4.106862747450942e-05, + "loss": 0.2726, "step": 3566500 }, { "epoch": 2.14, - "learning_rate": 4.1066615707502394e-05, - "loss": 0.3543, + "learning_rate": 4.106652750894885e-05, + "loss": 0.2702, "step": 3567000 }, { "epoch": 2.14, - "learning_rate": 4.1064515741941834e-05, - "loss": 0.3553, + "learning_rate": 4.106442754338829e-05, + "loss": 0.2687, "step": 3567500 }, { "epoch": 2.14, - "learning_rate": 4.106241577638127e-05, - "loss": 0.3492, + "learning_rate": 4.1062327577827725e-05, + "loss": 0.2704, "step": 3568000 }, { "epoch": 2.14, - "learning_rate": 4.10603158108207e-05, - "loss": 0.3469, + "learning_rate": 4.106022761226716e-05, + "loss": 0.2661, "step": 3568500 }, { "epoch": 2.14, - "learning_rate": 4.105822004519126e-05, - "loss": 0.3548, + "learning_rate": 4.105813184663771e-05, + "loss": 0.2691, "step": 3569000 }, { "epoch": 2.14, - "learning_rate": 4.1056120079630695e-05, - "loss": 0.3564, + "learning_rate": 4.105603608100827e-05, + "loss": 0.2766, "step": 3569500 }, { "epoch": 2.14, - "learning_rate": 4.105402011407013e-05, - "loss": 0.3543, + "learning_rate": 4.1053936115447706e-05, + "loss": 0.2734, "step": 3570000 }, { "epoch": 2.14, - "learning_rate": 4.105192434844068e-05, - "loss": 0.3597, + "learning_rate": 4.105183614988714e-05, + "loss": 0.2737, "step": 3570500 }, { "epoch": 2.14, - "learning_rate": 4.104982438288012e-05, - "loss": 0.3573, + "learning_rate": 4.104973618432658e-05, + "loss": 0.2739, "step": 3571000 }, { "epoch": 2.14, - "learning_rate": 4.1047724417319556e-05, - "loss": 0.3516, + "learning_rate": 4.104763621876601e-05, + "loss": 0.2659, "step": 3571500 }, { "epoch": 2.14, - "learning_rate": 4.104562445175899e-05, - "loss": 0.3439, + "learning_rate": 4.1045536253205447e-05, + "loss": 0.2644, "step": 3572000 }, { "epoch": 2.14, - "learning_rate": 4.104352448619843e-05, - "loss": 0.3478, + "learning_rate": 4.104343628764489e-05, + "loss": 0.2637, "step": 3572500 }, { "epoch": 2.14, - "learning_rate": 4.104142452063786e-05, - "loss": 0.337, + "learning_rate": 4.104133632208432e-05, + "loss": 0.2605, "step": 3573000 }, { "epoch": 2.14, - "learning_rate": 4.1039324555077296e-05, - "loss": 0.3531, + "learning_rate": 4.1039240556454874e-05, + "loss": 0.2673, "step": 3573500 }, { "epoch": 2.14, - "learning_rate": 4.103722878944785e-05, - "loss": 0.3526, + "learning_rate": 4.103714059089431e-05, + "loss": 0.269, "step": 3574000 }, { "epoch": 2.14, - "learning_rate": 4.103512882388729e-05, - "loss": 0.3487, + "learning_rate": 4.103504062533375e-05, + "loss": 0.2686, "step": 3574500 }, { "epoch": 2.14, - "learning_rate": 4.1033028858326724e-05, - "loss": 0.354, + "learning_rate": 4.103294065977318e-05, + "loss": 0.2702, "step": 3575000 }, { "epoch": 2.14, - "learning_rate": 4.103092889276616e-05, - "loss": 0.3431, + "learning_rate": 4.1030840694212614e-05, + "loss": 0.2617, "step": 3575500 }, { "epoch": 2.14, - "learning_rate": 4.10288289272056e-05, - "loss": 0.362, + "learning_rate": 4.102874492858317e-05, + "loss": 0.2686, "step": 3576000 }, { "epoch": 2.14, - "learning_rate": 4.1026728961645024e-05, - "loss": 0.3575, + "learning_rate": 4.102664496302261e-05, + "loss": 0.274, "step": 3576500 }, { "epoch": 2.14, - "learning_rate": 4.1024628996084464e-05, - "loss": 0.3548, + "learning_rate": 4.102454499746204e-05, + "loss": 0.2624, "step": 3577000 }, { "epoch": 2.14, - "learning_rate": 4.1022533230455025e-05, - "loss": 0.3451, + "learning_rate": 4.1022445031901475e-05, + "loss": 0.2702, "step": 3577500 }, { "epoch": 2.15, - "learning_rate": 4.102043326489446e-05, - "loss": 0.3442, + "learning_rate": 4.1020349266272036e-05, + "loss": 0.263, "step": 3578000 }, { "epoch": 2.15, - "learning_rate": 4.101833329933389e-05, - "loss": 0.3429, + "learning_rate": 4.101824930071147e-05, + "loss": 0.266, "step": 3578500 }, { "epoch": 2.15, - "learning_rate": 4.1016233333773325e-05, - "loss": 0.3536, + "learning_rate": 4.10161493351509e-05, + "loss": 0.2703, "step": 3579000 }, { "epoch": 2.15, - "learning_rate": 4.101413336821276e-05, - "loss": 0.3529, + "learning_rate": 4.101404936959034e-05, + "loss": 0.2645, "step": 3579500 }, { "epoch": 2.15, - "learning_rate": 4.101203760258332e-05, - "loss": 0.3645, + "learning_rate": 4.1011953603960896e-05, + "loss": 0.2641, "step": 3580000 }, { "epoch": 2.15, - "learning_rate": 4.100993763702275e-05, - "loss": 0.3533, + "learning_rate": 4.100985363840033e-05, + "loss": 0.2704, "step": 3580500 }, { "epoch": 2.15, - "learning_rate": 4.100783767146219e-05, - "loss": 0.3564, + "learning_rate": 4.100775367283976e-05, + "loss": 0.273, "step": 3581000 }, { "epoch": 2.15, - "learning_rate": 4.100573770590162e-05, - "loss": 0.352, + "learning_rate": 4.1005653707279203e-05, + "loss": 0.27, "step": 3581500 }, { "epoch": 2.15, - "learning_rate": 4.100363774034105e-05, - "loss": 0.3555, + "learning_rate": 4.100355794164976e-05, + "loss": 0.2707, "step": 3582000 }, { "epoch": 2.15, - "learning_rate": 4.100154197471161e-05, - "loss": 0.3473, + "learning_rate": 4.100145797608919e-05, + "loss": 0.267, "step": 3582500 }, { "epoch": 2.15, - "learning_rate": 4.099944200915105e-05, - "loss": 0.3577, + "learning_rate": 4.0999358010528624e-05, + "loss": 0.2724, "step": 3583000 }, { "epoch": 2.15, - "learning_rate": 4.099734204359048e-05, - "loss": 0.3504, + "learning_rate": 4.0997258044968064e-05, + "loss": 0.2659, "step": 3583500 }, { "epoch": 2.15, - "learning_rate": 4.099524207802992e-05, - "loss": 0.3501, + "learning_rate": 4.099516227933862e-05, + "loss": 0.2639, "step": 3584000 }, { "epoch": 2.15, - "learning_rate": 4.0993142112469354e-05, - "loss": 0.3578, + "learning_rate": 4.099306231377805e-05, + "loss": 0.2692, "step": 3584500 }, { "epoch": 2.15, - "learning_rate": 4.099104214690879e-05, - "loss": 0.3541, + "learning_rate": 4.099096234821749e-05, + "loss": 0.2706, "step": 3585000 }, { "epoch": 2.15, - "learning_rate": 4.098894218134823e-05, - "loss": 0.3476, + "learning_rate": 4.0988862382656925e-05, + "loss": 0.2719, "step": 3585500 }, { "epoch": 2.15, - "learning_rate": 4.098684641571878e-05, - "loss": 0.3526, + "learning_rate": 4.098676241709636e-05, + "loss": 0.2697, "step": 3586000 }, { "epoch": 2.15, - "learning_rate": 4.0984746450158214e-05, - "loss": 0.3484, + "learning_rate": 4.09846624515358e-05, + "loss": 0.2648, "step": 3586500 }, { "epoch": 2.15, - "learning_rate": 4.098264648459765e-05, - "loss": 0.3538, + "learning_rate": 4.098256248597523e-05, + "loss": 0.2701, "step": 3587000 }, { "epoch": 2.15, - "learning_rate": 4.098054651903709e-05, - "loss": 0.3678, + "learning_rate": 4.0980462520414665e-05, + "loss": 0.2768, "step": 3587500 }, { "epoch": 2.15, - "learning_rate": 4.097844655347652e-05, - "loss": 0.3411, + "learning_rate": 4.097836675478522e-05, + "loss": 0.2658, "step": 3588000 }, { "epoch": 2.15, - "learning_rate": 4.0976346587915955e-05, - "loss": 0.3536, + "learning_rate": 4.097626678922466e-05, + "loss": 0.2683, "step": 3588500 }, { "epoch": 2.15, - "learning_rate": 4.0974246622355395e-05, - "loss": 0.3551, + "learning_rate": 4.097416682366409e-05, + "loss": 0.2687, "step": 3589000 }, { "epoch": 2.15, - "learning_rate": 4.097214665679483e-05, - "loss": 0.346, + "learning_rate": 4.0972066858103526e-05, + "loss": 0.2636, "step": 3589500 }, { "epoch": 2.15, - "learning_rate": 4.097005509109651e-05, - "loss": 0.3474, + "learning_rate": 4.096997109247408e-05, + "loss": 0.2631, "step": 3590000 }, { "epoch": 2.15, - "learning_rate": 4.096795512553594e-05, - "loss": 0.3514, + "learning_rate": 4.096787112691352e-05, + "loss": 0.2732, "step": 3590500 }, { "epoch": 2.15, - "learning_rate": 4.0965855159975376e-05, - "loss": 0.3498, + "learning_rate": 4.0965771161352953e-05, + "loss": 0.2665, "step": 3591000 }, { "epoch": 2.15, - "learning_rate": 4.096375519441481e-05, - "loss": 0.3462, + "learning_rate": 4.096367119579239e-05, + "loss": 0.2616, "step": 3591500 }, { "epoch": 2.15, - "learning_rate": 4.096165522885424e-05, - "loss": 0.3495, + "learning_rate": 4.096157123023183e-05, + "loss": 0.2729, "step": 3592000 }, { "epoch": 2.15, - "learning_rate": 4.0959559463224803e-05, - "loss": 0.3485, + "learning_rate": 4.095947546460238e-05, + "loss": 0.2659, "step": 3592500 }, { "epoch": 2.15, - "learning_rate": 4.095745949766424e-05, - "loss": 0.3596, + "learning_rate": 4.0957375499041814e-05, + "loss": 0.2724, "step": 3593000 }, { "epoch": 2.15, - "learning_rate": 4.095535953210367e-05, - "loss": 0.3588, + "learning_rate": 4.0955275533481254e-05, + "loss": 0.2759, "step": 3593500 }, { "epoch": 2.15, - "learning_rate": 4.0953259566543104e-05, - "loss": 0.3504, + "learning_rate": 4.095317976785181e-05, + "loss": 0.2701, "step": 3594000 }, { "epoch": 2.16, - "learning_rate": 4.0951159600982544e-05, - "loss": 0.3685, + "learning_rate": 4.095107980229124e-05, + "loss": 0.2748, "step": 3594500 }, { "epoch": 2.16, - "learning_rate": 4.0949063835353104e-05, - "loss": 0.3504, + "learning_rate": 4.0948979836730675e-05, + "loss": 0.2681, "step": 3595000 }, { "epoch": 2.16, - "learning_rate": 4.094696386979253e-05, - "loss": 0.3546, + "learning_rate": 4.0946879871170115e-05, + "loss": 0.2729, "step": 3595500 }, { "epoch": 2.16, - "learning_rate": 4.0944863904231964e-05, - "loss": 0.3539, + "learning_rate": 4.094477990560955e-05, + "loss": 0.2679, "step": 3596000 }, { "epoch": 2.16, - "learning_rate": 4.0942763938671405e-05, - "loss": 0.3326, + "learning_rate": 4.094267994004898e-05, + "loss": 0.2633, "step": 3596500 }, { "epoch": 2.16, - "learning_rate": 4.094066397311084e-05, - "loss": 0.3473, + "learning_rate": 4.094057997448842e-05, + "loss": 0.2685, "step": 3597000 }, { "epoch": 2.16, - "learning_rate": 4.09385682074814e-05, - "loss": 0.3527, + "learning_rate": 4.093848000892785e-05, + "loss": 0.2692, "step": 3597500 }, { "epoch": 2.16, - "learning_rate": 4.093646824192083e-05, - "loss": 0.3496, + "learning_rate": 4.093638004336728e-05, + "loss": 0.2711, "step": 3598000 }, { "epoch": 2.16, - "learning_rate": 4.0934368276360265e-05, - "loss": 0.3435, + "learning_rate": 4.093428427773785e-05, + "loss": 0.2626, "step": 3598500 }, { "epoch": 2.16, - "learning_rate": 4.09322683107997e-05, - "loss": 0.3522, + "learning_rate": 4.09321885121084e-05, + "loss": 0.2664, "step": 3599000 }, { "epoch": 2.16, - "learning_rate": 4.093016834523914e-05, - "loss": 0.3535, + "learning_rate": 4.0930088546547837e-05, + "loss": 0.2727, "step": 3599500 }, { "epoch": 2.16, - "learning_rate": 4.09280725796097e-05, - "loss": 0.3483, + "learning_rate": 4.092798858098727e-05, + "loss": 0.2662, "step": 3600000 }, { "epoch": 2.16, - "eval_loss": 0.3436375558376312, - "eval_runtime": 1120.5227, - "eval_samples_per_second": 470.066, - "eval_steps_per_second": 78.345, + "eval_loss": 0.24784673750400543, + "eval_runtime": 1466.7303, + "eval_samples_per_second": 359.112, + "eval_steps_per_second": 59.852, "step": 3600000 }, { "epoch": 2.16, - "learning_rate": 4.0925972614049126e-05, - "loss": 0.3604, + "learning_rate": 4.092588861542671e-05, + "loss": 0.2682, "step": 3600500 }, { "epoch": 2.16, - "learning_rate": 4.092387264848856e-05, - "loss": 0.347, + "learning_rate": 4.0923788649866144e-05, + "loss": 0.2684, "step": 3601000 }, { "epoch": 2.16, - "learning_rate": 4.092177688285912e-05, - "loss": 0.3591, + "learning_rate": 4.092168868430558e-05, + "loss": 0.2771, "step": 3601500 }, { "epoch": 2.16, - "learning_rate": 4.091967691729856e-05, - "loss": 0.3539, + "learning_rate": 4.091958871874502e-05, + "loss": 0.2649, "step": 3602000 }, { "epoch": 2.16, - "learning_rate": 4.0917576951737994e-05, - "loss": 0.3507, + "learning_rate": 4.0917488753184444e-05, + "loss": 0.2679, "step": 3602500 }, { "epoch": 2.16, - "learning_rate": 4.091547698617742e-05, - "loss": 0.3558, + "learning_rate": 4.091538878762388e-05, + "loss": 0.2688, "step": 3603000 }, { "epoch": 2.16, - "learning_rate": 4.091337702061686e-05, - "loss": 0.3687, + "learning_rate": 4.091329302199444e-05, + "loss": 0.278, "step": 3603500 }, { "epoch": 2.16, - "learning_rate": 4.0911277055056294e-05, - "loss": 0.3462, + "learning_rate": 4.091119305643388e-05, + "loss": 0.2628, "step": 3604000 }, { "epoch": 2.16, - "learning_rate": 4.090917708949573e-05, - "loss": 0.3461, + "learning_rate": 4.0909093090873305e-05, + "loss": 0.2669, "step": 3604500 }, { "epoch": 2.16, - "learning_rate": 4.090707712393517e-05, - "loss": 0.3578, + "learning_rate": 4.0906997325243865e-05, + "loss": 0.2752, "step": 3605000 }, { "epoch": 2.16, - "learning_rate": 4.09049771583746e-05, - "loss": 0.3603, + "learning_rate": 4.0904897359683305e-05, + "loss": 0.2669, "step": 3605500 }, { "epoch": 2.16, - "learning_rate": 4.0902881392745155e-05, - "loss": 0.358, + "learning_rate": 4.090279739412274e-05, + "loss": 0.2746, "step": 3606000 }, { "epoch": 2.16, - "learning_rate": 4.0900781427184595e-05, - "loss": 0.3639, + "learning_rate": 4.090069742856217e-05, + "loss": 0.2752, "step": 3606500 }, { "epoch": 2.16, - "learning_rate": 4.089868146162403e-05, - "loss": 0.3461, + "learning_rate": 4.0898597463001606e-05, + "loss": 0.2682, "step": 3607000 }, { "epoch": 2.16, - "learning_rate": 4.089658149606346e-05, - "loss": 0.3558, + "learning_rate": 4.089649749744104e-05, + "loss": 0.2765, "step": 3607500 }, { "epoch": 2.16, - "learning_rate": 4.08944815305029e-05, - "loss": 0.3626, + "learning_rate": 4.089439753188047e-05, + "loss": 0.2743, "step": 3608000 }, { "epoch": 2.16, - "learning_rate": 4.0892385764873456e-05, - "loss": 0.353, + "learning_rate": 4.089229756631991e-05, + "loss": 0.2679, "step": 3608500 }, { "epoch": 2.16, - "learning_rate": 4.089028579931289e-05, - "loss": 0.3489, + "learning_rate": 4.0890197600759346e-05, + "loss": 0.2679, "step": 3609000 }, { "epoch": 2.16, - "learning_rate": 4.088818583375232e-05, - "loss": 0.3402, + "learning_rate": 4.08881018351299e-05, + "loss": 0.2627, "step": 3609500 }, { "epoch": 2.16, - "learning_rate": 4.088608586819176e-05, - "loss": 0.3399, + "learning_rate": 4.088601026943158e-05, + "loss": 0.2619, "step": 3610000 }, { "epoch": 2.16, - "learning_rate": 4.0883985902631196e-05, - "loss": 0.3496, + "learning_rate": 4.0883910303871014e-05, + "loss": 0.2704, "step": 3610500 }, { "epoch": 2.16, - "learning_rate": 4.088188593707063e-05, - "loss": 0.3459, + "learning_rate": 4.0881810338310454e-05, + "loss": 0.2675, "step": 3611000 }, { "epoch": 2.17, - "learning_rate": 4.087978597151007e-05, - "loss": 0.3395, + "learning_rate": 4.087971037274989e-05, + "loss": 0.2688, "step": 3611500 }, { "epoch": 2.17, - "learning_rate": 4.0877686005949504e-05, - "loss": 0.35, + "learning_rate": 4.087761040718932e-05, + "loss": 0.2697, "step": 3612000 }, { "epoch": 2.17, - "learning_rate": 4.087559024032006e-05, - "loss": 0.3542, + "learning_rate": 4.087551044162876e-05, + "loss": 0.269, "step": 3612500 }, { "epoch": 2.17, - "learning_rate": 4.08734902747595e-05, - "loss": 0.3483, + "learning_rate": 4.0873410476068195e-05, + "loss": 0.27, "step": 3613000 }, { "epoch": 2.17, - "learning_rate": 4.087139030919893e-05, - "loss": 0.3571, + "learning_rate": 4.087131051050763e-05, + "loss": 0.2647, "step": 3613500 }, { "epoch": 2.17, - "learning_rate": 4.0869290343638364e-05, - "loss": 0.3559, + "learning_rate": 4.086921054494707e-05, + "loss": 0.2708, "step": 3614000 }, { "epoch": 2.17, - "learning_rate": 4.086719457800892e-05, - "loss": 0.3523, + "learning_rate": 4.0867110579386495e-05, + "loss": 0.2747, "step": 3614500 }, { "epoch": 2.17, - "learning_rate": 4.086509461244836e-05, - "loss": 0.35, + "learning_rate": 4.086501061382593e-05, + "loss": 0.2723, "step": 3615000 }, { "epoch": 2.17, - "learning_rate": 4.086299464688779e-05, - "loss": 0.3491, + "learning_rate": 4.086291064826537e-05, + "loss": 0.2649, "step": 3615500 }, { "epoch": 2.17, - "learning_rate": 4.0860894681327225e-05, - "loss": 0.3522, + "learning_rate": 4.086081488263593e-05, + "loss": 0.266, "step": 3616000 }, { "epoch": 2.17, - "learning_rate": 4.085879891569778e-05, - "loss": 0.3542, + "learning_rate": 4.0858714917075356e-05, + "loss": 0.276, "step": 3616500 }, { "epoch": 2.17, - "learning_rate": 4.085669895013722e-05, - "loss": 0.3505, + "learning_rate": 4.085661495151479e-05, + "loss": 0.2676, "step": 3617000 }, { "epoch": 2.17, - "learning_rate": 4.085459898457665e-05, - "loss": 0.3555, + "learning_rate": 4.085451498595423e-05, + "loss": 0.2701, "step": 3617500 }, { "epoch": 2.17, - "learning_rate": 4.0852503218947206e-05, - "loss": 0.362, + "learning_rate": 4.085241922032479e-05, + "loss": 0.2786, "step": 3618000 }, { "epoch": 2.17, - "learning_rate": 4.085040325338664e-05, - "loss": 0.351, + "learning_rate": 4.0850319254764223e-05, + "loss": 0.2663, "step": 3618500 }, { "epoch": 2.17, - "learning_rate": 4.084830328782608e-05, - "loss": 0.3463, + "learning_rate": 4.084821928920366e-05, + "loss": 0.2659, "step": 3619000 }, { "epoch": 2.17, - "learning_rate": 4.084620752219663e-05, - "loss": 0.355, + "learning_rate": 4.084611932364309e-05, + "loss": 0.2708, "step": 3619500 }, { "epoch": 2.17, - "learning_rate": 4.0844107556636067e-05, - "loss": 0.3561, + "learning_rate": 4.0844019358082524e-05, + "loss": 0.2656, "step": 3620000 }, { "epoch": 2.17, - "learning_rate": 4.084200759107551e-05, - "loss": 0.3553, + "learning_rate": 4.0841919392521964e-05, + "loss": 0.2728, "step": 3620500 }, { "epoch": 2.17, - "learning_rate": 4.083990762551494e-05, - "loss": 0.3674, + "learning_rate": 4.08398194269614e-05, + "loss": 0.2719, "step": 3621000 }, { "epoch": 2.17, - "learning_rate": 4.0837807659954374e-05, - "loss": 0.3578, + "learning_rate": 4.083771946140083e-05, + "loss": 0.2692, "step": 3621500 }, { "epoch": 2.17, - "learning_rate": 4.0835707694393814e-05, - "loss": 0.3548, + "learning_rate": 4.0835623695771385e-05, + "loss": 0.2697, "step": 3622000 }, { "epoch": 2.17, - "learning_rate": 4.083360772883325e-05, - "loss": 0.3478, + "learning_rate": 4.0833523730210825e-05, + "loss": 0.2658, "step": 3622500 }, { "epoch": 2.17, - "learning_rate": 4.083150776327268e-05, - "loss": 0.3489, + "learning_rate": 4.083142376465026e-05, + "loss": 0.2635, "step": 3623000 }, { "epoch": 2.17, - "learning_rate": 4.082940779771212e-05, - "loss": 0.3521, + "learning_rate": 4.082932379908969e-05, + "loss": 0.2712, "step": 3623500 }, { "epoch": 2.17, - "learning_rate": 4.0827307832151555e-05, - "loss": 0.3453, + "learning_rate": 4.082722383352913e-05, + "loss": 0.2668, "step": 3624000 }, { "epoch": 2.17, - "learning_rate": 4.082520786659099e-05, - "loss": 0.356, + "learning_rate": 4.0825128067899685e-05, + "loss": 0.2675, "step": 3624500 }, { "epoch": 2.17, - "learning_rate": 4.082310790103042e-05, - "loss": 0.3488, + "learning_rate": 4.082302810233912e-05, + "loss": 0.2681, "step": 3625000 }, { "epoch": 2.17, - "learning_rate": 4.082101213540098e-05, - "loss": 0.3431, + "learning_rate": 4.082092813677855e-05, + "loss": 0.261, "step": 3625500 }, { "epoch": 2.17, - "learning_rate": 4.0818912169840415e-05, - "loss": 0.3541, + "learning_rate": 4.081882817121799e-05, + "loss": 0.2663, "step": 3626000 }, { "epoch": 2.17, - "learning_rate": 4.081681220427985e-05, - "loss": 0.3549, + "learning_rate": 4.0816732405588546e-05, + "loss": 0.2725, "step": 3626500 }, { "epoch": 2.17, - "learning_rate": 4.081471223871929e-05, - "loss": 0.3575, + "learning_rate": 4.081463244002798e-05, + "loss": 0.2737, "step": 3627000 }, { "epoch": 2.17, - "learning_rate": 4.081261647308984e-05, - "loss": 0.3464, + "learning_rate": 4.081253247446742e-05, + "loss": 0.2647, "step": 3627500 }, { "epoch": 2.18, - "learning_rate": 4.0810516507529276e-05, - "loss": 0.3489, + "learning_rate": 4.081043250890685e-05, + "loss": 0.2656, "step": 3628000 }, { "epoch": 2.18, - "learning_rate": 4.080842074189983e-05, - "loss": 0.3466, + "learning_rate": 4.080833674327741e-05, + "loss": 0.2647, "step": 3628500 }, { "epoch": 2.18, - "learning_rate": 4.080632077633927e-05, - "loss": 0.3506, + "learning_rate": 4.080623677771684e-05, + "loss": 0.2737, "step": 3629000 }, { "epoch": 2.18, - "learning_rate": 4.08042208107787e-05, - "loss": 0.3456, + "learning_rate": 4.080413681215628e-05, + "loss": 0.2696, "step": 3629500 }, { "epoch": 2.18, - "learning_rate": 4.080212084521814e-05, - "loss": 0.3593, + "learning_rate": 4.0802036846595714e-05, + "loss": 0.2751, "step": 3630000 }, { "epoch": 2.18, - "learning_rate": 4.080002087965758e-05, - "loss": 0.3585, + "learning_rate": 4.0799941080966274e-05, + "loss": 0.2701, "step": 3630500 }, { "epoch": 2.18, - "learning_rate": 4.079792091409701e-05, - "loss": 0.3525, + "learning_rate": 4.07978411154057e-05, + "loss": 0.2711, "step": 3631000 }, { "epoch": 2.18, - "learning_rate": 4.0795820948536444e-05, - "loss": 0.3563, + "learning_rate": 4.079574114984514e-05, + "loss": 0.2707, "step": 3631500 }, { "epoch": 2.18, - "learning_rate": 4.079372098297588e-05, - "loss": 0.3521, + "learning_rate": 4.0793641184284575e-05, + "loss": 0.2702, "step": 3632000 }, { "epoch": 2.18, - "learning_rate": 4.079162521734644e-05, - "loss": 0.3517, + "learning_rate": 4.079154121872401e-05, + "loss": 0.2659, "step": 3632500 }, { "epoch": 2.18, - "learning_rate": 4.078952525178587e-05, - "loss": 0.3511, + "learning_rate": 4.078944545309457e-05, + "loss": 0.2692, "step": 3633000 }, { "epoch": 2.18, - "learning_rate": 4.0787425286225305e-05, - "loss": 0.3503, + "learning_rate": 4.0787345487534e-05, + "loss": 0.2695, "step": 3633500 }, { "epoch": 2.18, - "learning_rate": 4.0785329520595865e-05, - "loss": 0.3447, + "learning_rate": 4.0785245521973436e-05, + "loss": 0.2652, "step": 3634000 }, { "epoch": 2.18, - "learning_rate": 4.07832295550353e-05, - "loss": 0.3561, + "learning_rate": 4.0783145556412876e-05, + "loss": 0.2681, "step": 3634500 }, { "epoch": 2.18, - "learning_rate": 4.078112958947473e-05, - "loss": 0.3517, + "learning_rate": 4.078104559085231e-05, + "loss": 0.2663, "step": 3635000 }, { "epoch": 2.18, - "learning_rate": 4.077902962391417e-05, - "loss": 0.3559, + "learning_rate": 4.077894982522286e-05, + "loss": 0.2711, "step": 3635500 }, { "epoch": 2.18, - "learning_rate": 4.0776929658353606e-05, - "loss": 0.3514, + "learning_rate": 4.0776849859662296e-05, + "loss": 0.2639, "step": 3636000 }, { "epoch": 2.18, - "learning_rate": 4.077482969279304e-05, - "loss": 0.3568, + "learning_rate": 4.0774749894101736e-05, + "loss": 0.279, "step": 3636500 }, { "epoch": 2.18, - "learning_rate": 4.077272972723247e-05, - "loss": 0.3601, + "learning_rate": 4.077264992854117e-05, + "loss": 0.269, "step": 3637000 }, { "epoch": 2.18, - "learning_rate": 4.077063396160303e-05, - "loss": 0.3548, + "learning_rate": 4.0770549962980603e-05, + "loss": 0.2723, "step": 3637500 }, { "epoch": 2.18, - "learning_rate": 4.0768533996042466e-05, - "loss": 0.3546, + "learning_rate": 4.076845419735116e-05, + "loss": 0.265, "step": 3638000 }, { "epoch": 2.18, - "learning_rate": 4.07664340304819e-05, - "loss": 0.3532, + "learning_rate": 4.07663542317906e-05, + "loss": 0.2671, "step": 3638500 }, { "epoch": 2.18, - "learning_rate": 4.076433406492134e-05, - "loss": 0.3534, + "learning_rate": 4.076425426623003e-05, + "loss": 0.267, "step": 3639000 }, { "epoch": 2.18, - "learning_rate": 4.076223409936077e-05, - "loss": 0.3562, + "learning_rate": 4.0762154300669464e-05, + "loss": 0.2677, "step": 3639500 }, { "epoch": 2.18, - "learning_rate": 4.07601341338002e-05, - "loss": 0.3502, + "learning_rate": 4.0760058535040024e-05, + "loss": 0.2691, "step": 3640000 }, { "epoch": 2.18, - "learning_rate": 4.075803416823964e-05, - "loss": 0.3471, + "learning_rate": 4.075795856947946e-05, + "loss": 0.2654, "step": 3640500 }, { "epoch": 2.18, - "learning_rate": 4.07559384026102e-05, - "loss": 0.3623, + "learning_rate": 4.075585860391889e-05, + "loss": 0.2689, "step": 3641000 }, { "epoch": 2.18, - "learning_rate": 4.075383843704963e-05, - "loss": 0.3519, + "learning_rate": 4.075375863835833e-05, + "loss": 0.2665, "step": 3641500 }, { "epoch": 2.18, - "learning_rate": 4.075173847148907e-05, - "loss": 0.3509, + "learning_rate": 4.075166287272889e-05, + "loss": 0.2644, "step": 3642000 }, { "epoch": 2.18, - "learning_rate": 4.07496385059285e-05, - "loss": 0.3581, + "learning_rate": 4.0749562907168325e-05, + "loss": 0.2755, "step": 3642500 }, { "epoch": 2.18, - "learning_rate": 4.0747538540367935e-05, - "loss": 0.3574, + "learning_rate": 4.074746294160775e-05, + "loss": 0.2749, "step": 3643000 }, { "epoch": 2.18, - "learning_rate": 4.0745438574807375e-05, - "loss": 0.3502, + "learning_rate": 4.074536297604719e-05, + "loss": 0.2732, "step": 3643500 }, { "epoch": 2.18, - "learning_rate": 4.074334280917793e-05, - "loss": 0.3544, + "learning_rate": 4.074326721041775e-05, + "loss": 0.2705, "step": 3644000 }, { "epoch": 2.19, - "learning_rate": 4.074124284361736e-05, - "loss": 0.3512, + "learning_rate": 4.0741167244857186e-05, + "loss": 0.2704, "step": 3644500 }, { "epoch": 2.19, - "learning_rate": 4.0739142878056795e-05, - "loss": 0.3611, + "learning_rate": 4.073906727929662e-05, + "loss": 0.2723, "step": 3645000 }, { "epoch": 2.19, - "learning_rate": 4.0737042912496236e-05, - "loss": 0.3563, + "learning_rate": 4.073696731373605e-05, + "loss": 0.2694, "step": 3645500 }, { "epoch": 2.19, - "learning_rate": 4.073494294693567e-05, - "loss": 0.3456, + "learning_rate": 4.0734867348175487e-05, + "loss": 0.2675, "step": 3646000 }, { "epoch": 2.19, - "learning_rate": 4.07328429813751e-05, - "loss": 0.3555, + "learning_rate": 4.073276738261492e-05, + "loss": 0.2707, "step": 3646500 }, { "epoch": 2.19, - "learning_rate": 4.073074301581454e-05, - "loss": 0.3508, + "learning_rate": 4.073066741705436e-05, + "loss": 0.2684, "step": 3647000 }, { "epoch": 2.19, - "learning_rate": 4.0728643050253976e-05, - "loss": 0.3574, + "learning_rate": 4.0728567451493794e-05, + "loss": 0.2753, "step": 3647500 }, { "epoch": 2.19, - "learning_rate": 4.072655568448678e-05, - "loss": 0.3474, + "learning_rate": 4.072647168586435e-05, + "loss": 0.2689, "step": 3648000 }, { "epoch": 2.19, - "learning_rate": 4.072445571892621e-05, - "loss": 0.3511, + "learning_rate": 4.072437172030379e-05, + "loss": 0.2686, "step": 3648500 }, { "epoch": 2.19, - "learning_rate": 4.0722355753365644e-05, - "loss": 0.3468, + "learning_rate": 4.072227175474322e-05, + "loss": 0.272, "step": 3649000 }, { "epoch": 2.19, - "learning_rate": 4.0720255787805084e-05, - "loss": 0.3615, + "learning_rate": 4.0720171789182654e-05, + "loss": 0.2688, "step": 3649500 }, { "epoch": 2.19, - "learning_rate": 4.071815582224452e-05, - "loss": 0.3575, + "learning_rate": 4.071807602355321e-05, + "loss": 0.2728, "step": 3650000 }, { "epoch": 2.19, - "learning_rate": 4.071605585668395e-05, - "loss": 0.3514, + "learning_rate": 4.071597605799265e-05, + "loss": 0.2668, "step": 3650500 }, { "epoch": 2.19, - "learning_rate": 4.0713955891123384e-05, - "loss": 0.3429, + "learning_rate": 4.071387609243208e-05, + "loss": 0.266, "step": 3651000 }, { "epoch": 2.19, - "learning_rate": 4.071185592556282e-05, - "loss": 0.3563, + "learning_rate": 4.0711776126871515e-05, + "loss": 0.2696, "step": 3651500 }, { "epoch": 2.19, - "learning_rate": 4.070975596000225e-05, - "loss": 0.3578, + "learning_rate": 4.0709684561173196e-05, + "loss": 0.2718, "step": 3652000 }, { "epoch": 2.19, - "learning_rate": 4.070765599444169e-05, - "loss": 0.355, + "learning_rate": 4.070758459561263e-05, + "loss": 0.2652, "step": 3652500 }, { "epoch": 2.19, - "learning_rate": 4.0705556028881125e-05, - "loss": 0.3511, + "learning_rate": 4.070548463005207e-05, + "loss": 0.2726, "step": 3653000 }, { "epoch": 2.19, - "learning_rate": 4.070345606332056e-05, - "loss": 0.3531, + "learning_rate": 4.07033846644915e-05, + "loss": 0.2692, "step": 3653500 }, { "epoch": 2.19, - "learning_rate": 4.070135609776e-05, - "loss": 0.3475, + "learning_rate": 4.0701284698930936e-05, + "loss": 0.2621, "step": 3654000 }, { "epoch": 2.19, - "learning_rate": 4.069926453206167e-05, - "loss": 0.3575, + "learning_rate": 4.0699184733370376e-05, + "loss": 0.2678, "step": 3654500 }, { "epoch": 2.19, - "learning_rate": 4.069716456650111e-05, - "loss": 0.3712, + "learning_rate": 4.069708896774093e-05, + "loss": 0.2758, "step": 3655000 }, { "epoch": 2.19, - "learning_rate": 4.0695068800871666e-05, - "loss": 0.3678, + "learning_rate": 4.0694989002180364e-05, + "loss": 0.2699, "step": 3655500 }, { "epoch": 2.19, - "learning_rate": 4.06929688353111e-05, - "loss": 0.3474, + "learning_rate": 4.0692889036619804e-05, + "loss": 0.2608, "step": 3656000 }, { "epoch": 2.19, - "learning_rate": 4.069086886975054e-05, - "loss": 0.3502, + "learning_rate": 4.069078907105924e-05, + "loss": 0.2671, "step": 3656500 }, { "epoch": 2.19, - "learning_rate": 4.068876890418997e-05, - "loss": 0.3527, + "learning_rate": 4.0688689105498664e-05, + "loss": 0.2713, "step": 3657000 }, { "epoch": 2.19, - "learning_rate": 4.068666893862941e-05, - "loss": 0.3504, + "learning_rate": 4.0686589139938104e-05, + "loss": 0.2649, "step": 3657500 }, { "epoch": 2.19, - "learning_rate": 4.068456897306885e-05, - "loss": 0.3511, + "learning_rate": 4.0684493374308664e-05, + "loss": 0.2661, "step": 3658000 }, { "epoch": 2.19, - "learning_rate": 4.0682469007508274e-05, - "loss": 0.3577, + "learning_rate": 4.06823934087481e-05, + "loss": 0.2683, "step": 3658500 }, { "epoch": 2.19, - "learning_rate": 4.068036904194771e-05, - "loss": 0.3462, + "learning_rate": 4.068029344318753e-05, + "loss": 0.2637, "step": 3659000 }, { "epoch": 2.19, - "learning_rate": 4.067826907638715e-05, - "loss": 0.3611, + "learning_rate": 4.0678193477626965e-05, + "loss": 0.2747, "step": 3659500 }, { "epoch": 2.19, - "learning_rate": 4.067616911082658e-05, - "loss": 0.3477, + "learning_rate": 4.06760935120664e-05, + "loss": 0.265, "step": 3660000 }, { "epoch": 2.19, - "learning_rate": 4.0674069145266014e-05, - "loss": 0.361, + "learning_rate": 4.067399354650583e-05, + "loss": 0.272, "step": 3660500 }, { "epoch": 2.19, - "learning_rate": 4.067197337963657e-05, - "loss": 0.3645, + "learning_rate": 4.067189358094527e-05, + "loss": 0.2731, "step": 3661000 }, { "epoch": 2.2, - "learning_rate": 4.066987341407601e-05, - "loss": 0.3538, + "learning_rate": 4.0669793615384705e-05, + "loss": 0.2727, "step": 3661500 }, { "epoch": 2.2, - "learning_rate": 4.066777344851544e-05, - "loss": 0.3503, + "learning_rate": 4.066769784975526e-05, + "loss": 0.2626, "step": 3662000 }, { "epoch": 2.2, - "learning_rate": 4.0665673482954875e-05, - "loss": 0.3406, + "learning_rate": 4.06655978841947e-05, + "loss": 0.2638, "step": 3662500 }, { "epoch": 2.2, - "learning_rate": 4.0663573517394315e-05, - "loss": 0.3548, + "learning_rate": 4.066349791863413e-05, + "loss": 0.2703, "step": 3663000 }, { "epoch": 2.2, - "learning_rate": 4.066147355183375e-05, - "loss": 0.3466, + "learning_rate": 4.0661397953073566e-05, + "loss": 0.2696, "step": 3663500 }, { "epoch": 2.2, - "learning_rate": 4.065937358627318e-05, - "loss": 0.3498, + "learning_rate": 4.0659297987513006e-05, + "loss": 0.2667, "step": 3664000 }, { "epoch": 2.2, - "learning_rate": 4.065727362071262e-05, - "loss": 0.3593, + "learning_rate": 4.065720222188356e-05, + "loss": 0.2736, "step": 3664500 }, { "epoch": 2.2, - "learning_rate": 4.0655177855083176e-05, - "loss": 0.3588, + "learning_rate": 4.0655102256322993e-05, + "loss": 0.2736, "step": 3665000 }, { "epoch": 2.2, - "learning_rate": 4.065307788952261e-05, - "loss": 0.3614, + "learning_rate": 4.065300229076243e-05, + "loss": 0.2716, "step": 3665500 }, { "epoch": 2.2, - "learning_rate": 4.065097792396205e-05, - "loss": 0.3556, + "learning_rate": 4.065090232520187e-05, + "loss": 0.2731, "step": 3666000 }, { "epoch": 2.2, - "learning_rate": 4.06488821583326e-05, - "loss": 0.3523, + "learning_rate": 4.064880655957242e-05, + "loss": 0.2675, "step": 3666500 }, { "epoch": 2.2, - "learning_rate": 4.064678219277204e-05, - "loss": 0.3564, + "learning_rate": 4.0646706594011854e-05, + "loss": 0.2668, "step": 3667000 }, { "epoch": 2.2, - "learning_rate": 4.064468222721147e-05, - "loss": 0.3501, + "learning_rate": 4.064460662845129e-05, + "loss": 0.2705, "step": 3667500 }, { "epoch": 2.2, - "learning_rate": 4.064258226165091e-05, - "loss": 0.3532, + "learning_rate": 4.064250666289073e-05, + "loss": 0.2683, "step": 3668000 }, { "epoch": 2.2, - "learning_rate": 4.0640482296090344e-05, - "loss": 0.3457, + "learning_rate": 4.064041089726129e-05, + "loss": 0.2636, "step": 3668500 }, { "epoch": 2.2, - "learning_rate": 4.063838233052978e-05, - "loss": 0.359, + "learning_rate": 4.0638310931700715e-05, + "loss": 0.2716, "step": 3669000 }, { "epoch": 2.2, - "learning_rate": 4.063628236496922e-05, - "loss": 0.3627, + "learning_rate": 4.0636215166071275e-05, + "loss": 0.2733, "step": 3669500 }, { "epoch": 2.2, - "learning_rate": 4.063418239940865e-05, - "loss": 0.3568, + "learning_rate": 4.0634115200510716e-05, + "loss": 0.2744, "step": 3670000 }, { "epoch": 2.2, - "learning_rate": 4.0632086633779205e-05, - "loss": 0.348, + "learning_rate": 4.063201523495015e-05, + "loss": 0.2655, "step": 3670500 }, { "epoch": 2.2, - "learning_rate": 4.062998666821864e-05, - "loss": 0.3465, + "learning_rate": 4.062991526938958e-05, + "loss": 0.2715, "step": 3671000 }, { "epoch": 2.2, - "learning_rate": 4.062788670265808e-05, - "loss": 0.3566, + "learning_rate": 4.0627815303829016e-05, + "loss": 0.2674, "step": 3671500 }, { "epoch": 2.2, - "learning_rate": 4.062579093702863e-05, - "loss": 0.3493, + "learning_rate": 4.062571533826845e-05, + "loss": 0.2678, "step": 3672000 }, { "epoch": 2.2, - "learning_rate": 4.0623690971468065e-05, - "loss": 0.3516, + "learning_rate": 4.062361537270788e-05, + "loss": 0.2704, "step": 3672500 }, { "epoch": 2.2, - "learning_rate": 4.0621591005907505e-05, - "loss": 0.3522, + "learning_rate": 4.062151540714732e-05, + "loss": 0.2681, "step": 3673000 }, { "epoch": 2.2, - "learning_rate": 4.061949104034694e-05, - "loss": 0.3473, + "learning_rate": 4.0619419641517883e-05, + "loss": 0.2646, "step": 3673500 }, { "epoch": 2.2, - "learning_rate": 4.061739107478637e-05, - "loss": 0.3577, + "learning_rate": 4.061732387588844e-05, + "loss": 0.2686, "step": 3674000 }, { "epoch": 2.2, - "learning_rate": 4.061529110922581e-05, - "loss": 0.3544, + "learning_rate": 4.061522391032787e-05, + "loss": 0.267, "step": 3674500 }, { "epoch": 2.2, - "learning_rate": 4.0613191143665246e-05, - "loss": 0.3515, + "learning_rate": 4.0613123944767304e-05, + "loss": 0.2718, "step": 3675000 }, { "epoch": 2.2, - "learning_rate": 4.061109117810468e-05, - "loss": 0.351, + "learning_rate": 4.0611023979206744e-05, + "loss": 0.2687, "step": 3675500 }, { "epoch": 2.2, - "learning_rate": 4.060899541247523e-05, - "loss": 0.357, + "learning_rate": 4.06089282135773e-05, + "loss": 0.2711, "step": 3676000 }, { "epoch": 2.2, - "learning_rate": 4.060689544691467e-05, - "loss": 0.3531, + "learning_rate": 4.060682824801673e-05, + "loss": 0.2666, "step": 3676500 }, { "epoch": 2.2, - "learning_rate": 4.060479548135411e-05, - "loss": 0.3564, + "learning_rate": 4.060472828245617e-05, + "loss": 0.2707, "step": 3677000 }, { "epoch": 2.2, - "learning_rate": 4.060269551579354e-05, - "loss": 0.347, + "learning_rate": 4.0602628316895605e-05, + "loss": 0.2687, "step": 3677500 }, { "epoch": 2.21, - "learning_rate": 4.0600599750164094e-05, - "loss": 0.3516, + "learning_rate": 4.060052835133504e-05, + "loss": 0.2625, "step": 3678000 }, { "epoch": 2.21, - "learning_rate": 4.0598499784603534e-05, - "loss": 0.3581, + "learning_rate": 4.059842838577447e-05, + "loss": 0.2734, "step": 3678500 }, { "epoch": 2.21, - "learning_rate": 4.059639981904297e-05, - "loss": 0.3565, + "learning_rate": 4.059633262014503e-05, + "loss": 0.2716, "step": 3679000 }, { "epoch": 2.21, - "learning_rate": 4.059429985348241e-05, - "loss": 0.3491, + "learning_rate": 4.0594232654584466e-05, + "loss": 0.2637, "step": 3679500 }, { "epoch": 2.21, - "learning_rate": 4.059219988792184e-05, - "loss": 0.3498, + "learning_rate": 4.05921326890239e-05, + "loss": 0.2759, "step": 3680000 }, { "epoch": 2.21, - "learning_rate": 4.059009992236127e-05, - "loss": 0.3522, + "learning_rate": 4.059003272346334e-05, + "loss": 0.267, "step": 3680500 }, { "epoch": 2.21, - "learning_rate": 4.058800415673183e-05, - "loss": 0.3505, + "learning_rate": 4.0587932757902766e-05, + "loss": 0.2648, "step": 3681000 }, { "epoch": 2.21, - "learning_rate": 4.058590419117127e-05, - "loss": 0.3452, + "learning_rate": 4.05858327923422e-05, + "loss": 0.2701, "step": 3681500 }, { "epoch": 2.21, - "learning_rate": 4.05838042256107e-05, - "loss": 0.3591, + "learning_rate": 4.058373282678164e-05, + "loss": 0.2751, "step": 3682000 }, { "epoch": 2.21, - "learning_rate": 4.0581704260050135e-05, - "loss": 0.3471, + "learning_rate": 4.058163286122107e-05, + "loss": 0.2648, "step": 3682500 }, { "epoch": 2.21, - "learning_rate": 4.057960849442069e-05, - "loss": 0.3487, + "learning_rate": 4.057953289566051e-05, + "loss": 0.2696, "step": 3683000 }, { "epoch": 2.21, - "learning_rate": 4.057750852886013e-05, - "loss": 0.3523, + "learning_rate": 4.057744132996219e-05, + "loss": 0.2677, "step": 3683500 }, { "epoch": 2.21, - "learning_rate": 4.057540856329956e-05, - "loss": 0.3503, + "learning_rate": 4.057534136440163e-05, + "loss": 0.2666, "step": 3684000 }, { "epoch": 2.21, - "learning_rate": 4.0573308597738996e-05, - "loss": 0.3506, + "learning_rate": 4.057324139884106e-05, + "loss": 0.2682, "step": 3684500 }, { "epoch": 2.21, - "learning_rate": 4.0571208632178436e-05, - "loss": 0.3538, + "learning_rate": 4.0571141433280494e-05, + "loss": 0.2712, "step": 3685000 }, { "epoch": 2.21, - "learning_rate": 4.056910866661786e-05, - "loss": 0.3449, + "learning_rate": 4.056904566765105e-05, + "loss": 0.266, "step": 3685500 }, { "epoch": 2.21, - "learning_rate": 4.05670087010573e-05, - "loss": 0.3455, + "learning_rate": 4.056694570209049e-05, + "loss": 0.2619, "step": 3686000 }, { "epoch": 2.21, - "learning_rate": 4.056490873549674e-05, - "loss": 0.3469, + "learning_rate": 4.056484573652992e-05, + "loss": 0.2624, "step": 3686500 }, { "epoch": 2.21, - "learning_rate": 4.05628129698673e-05, - "loss": 0.3514, + "learning_rate": 4.0562745770969355e-05, + "loss": 0.2693, "step": 3687000 }, { "epoch": 2.21, - "learning_rate": 4.0560713004306724e-05, - "loss": 0.3478, + "learning_rate": 4.0560645805408795e-05, + "loss": 0.2601, "step": 3687500 }, { "epoch": 2.21, - "learning_rate": 4.0558613038746164e-05, - "loss": 0.3611, + "learning_rate": 4.055854583984822e-05, + "loss": 0.2705, "step": 3688000 }, { "epoch": 2.21, - "learning_rate": 4.05565130731856e-05, - "loss": 0.3491, + "learning_rate": 4.0556445874287655e-05, + "loss": 0.2657, "step": 3688500 }, { "epoch": 2.21, - "learning_rate": 4.055441730755616e-05, - "loss": 0.3609, + "learning_rate": 4.0554345908727096e-05, + "loss": 0.2714, "step": 3689000 }, { "epoch": 2.21, - "learning_rate": 4.055231734199559e-05, - "loss": 0.358, + "learning_rate": 4.055224594316653e-05, + "loss": 0.2755, "step": 3689500 }, { "epoch": 2.21, - "learning_rate": 4.0550221576366145e-05, - "loss": 0.361, + "learning_rate": 4.055014597760597e-05, + "loss": 0.2728, "step": 3690000 }, { "epoch": 2.21, - "learning_rate": 4.0548121610805585e-05, - "loss": 0.3631, + "learning_rate": 4.05480460120454e-05, + "loss": 0.2719, "step": 3690500 }, { "epoch": 2.21, - "learning_rate": 4.054602164524502e-05, - "loss": 0.3586, + "learning_rate": 4.0545946046484836e-05, + "loss": 0.2745, "step": 3691000 }, { "epoch": 2.21, - "learning_rate": 4.054392167968445e-05, - "loss": 0.3449, + "learning_rate": 4.054385028085539e-05, + "loss": 0.2621, "step": 3691500 }, { "epoch": 2.21, - "learning_rate": 4.054182591405501e-05, - "loss": 0.3579, + "learning_rate": 4.054175031529483e-05, + "loss": 0.2691, "step": 3692000 }, { "epoch": 2.21, - "learning_rate": 4.0539725948494446e-05, - "loss": 0.356, + "learning_rate": 4.0539650349734263e-05, + "loss": 0.2645, "step": 3692500 }, { "epoch": 2.21, - "learning_rate": 4.053762598293388e-05, - "loss": 0.3544, + "learning_rate": 4.05375503841737e-05, + "loss": 0.2698, "step": 3693000 }, { "epoch": 2.21, - "learning_rate": 4.053552601737332e-05, - "loss": 0.354, + "learning_rate": 4.053545461854425e-05, + "loss": 0.2664, "step": 3693500 }, { "epoch": 2.21, - "learning_rate": 4.053342605181275e-05, - "loss": 0.3488, + "learning_rate": 4.053335465298369e-05, + "loss": 0.2685, "step": 3694000 }, { "epoch": 2.22, - "learning_rate": 4.0531326086252186e-05, - "loss": 0.3495, + "learning_rate": 4.0531254687423124e-05, + "loss": 0.268, "step": 3694500 }, { "epoch": 2.22, - "learning_rate": 4.052923032062274e-05, - "loss": 0.3507, + "learning_rate": 4.052915472186256e-05, + "loss": 0.2699, "step": 3695000 }, { "epoch": 2.22, - "learning_rate": 4.052713035506218e-05, - "loss": 0.3526, + "learning_rate": 4.052705895623311e-05, + "loss": 0.2689, "step": 3695500 }, { "epoch": 2.22, - "learning_rate": 4.0525030389501614e-05, - "loss": 0.3451, + "learning_rate": 4.052495899067255e-05, + "loss": 0.2639, "step": 3696000 }, { "epoch": 2.22, - "learning_rate": 4.052293042394105e-05, - "loss": 0.3466, + "learning_rate": 4.052286322504311e-05, + "loss": 0.2663, "step": 3696500 }, { "epoch": 2.22, - "learning_rate": 4.052083045838048e-05, - "loss": 0.3444, + "learning_rate": 4.0520767459413665e-05, + "loss": 0.2635, "step": 3697000 }, { "epoch": 2.22, - "learning_rate": 4.0518730492819914e-05, - "loss": 0.3536, + "learning_rate": 4.05186674938531e-05, + "loss": 0.2698, "step": 3697500 }, { "epoch": 2.22, - "learning_rate": 4.0516634727190474e-05, - "loss": 0.3428, + "learning_rate": 4.051656752829254e-05, + "loss": 0.2687, "step": 3698000 }, { "epoch": 2.22, - "learning_rate": 4.051453896156103e-05, - "loss": 0.3638, + "learning_rate": 4.051446756273197e-05, + "loss": 0.2751, "step": 3698500 }, { "epoch": 2.22, - "learning_rate": 4.051243899600047e-05, - "loss": 0.3536, + "learning_rate": 4.0512367597171406e-05, + "loss": 0.2667, "step": 3699000 }, { "epoch": 2.22, - "learning_rate": 4.05103390304399e-05, - "loss": 0.3588, + "learning_rate": 4.0510267631610846e-05, + "loss": 0.2719, "step": 3699500 }, { "epoch": 2.22, - "learning_rate": 4.0508239064879335e-05, - "loss": 0.3497, + "learning_rate": 4.050816766605027e-05, + "loss": 0.2667, "step": 3700000 }, { "epoch": 2.22, - "eval_loss": 0.34326010942459106, - "eval_runtime": 1122.4896, - "eval_samples_per_second": 469.243, - "eval_steps_per_second": 78.207, + "eval_loss": 0.24800211191177368, + "eval_runtime": 1464.5788, + "eval_samples_per_second": 359.639, + "eval_steps_per_second": 59.94, "step": 3700000 }, { "epoch": 2.22, - "learning_rate": 4.0506139099318775e-05, - "loss": 0.3543, + "learning_rate": 4.0506067700489706e-05, + "loss": 0.2725, "step": 3700500 }, { "epoch": 2.22, - "learning_rate": 4.050404333368933e-05, - "loss": 0.357, + "learning_rate": 4.0503967734929147e-05, + "loss": 0.2703, "step": 3701000 }, { "epoch": 2.22, - "learning_rate": 4.050194336812876e-05, - "loss": 0.3614, + "learning_rate": 4.050186776936858e-05, + "loss": 0.2645, "step": 3701500 }, { "epoch": 2.22, - "learning_rate": 4.0499843402568196e-05, - "loss": 0.3606, + "learning_rate": 4.0499767803808013e-05, + "loss": 0.2728, "step": 3702000 }, { "epoch": 2.22, - "learning_rate": 4.0497743437007636e-05, - "loss": 0.3546, + "learning_rate": 4.0497667838247454e-05, + "loss": 0.2712, "step": 3702500 }, { "epoch": 2.22, - "learning_rate": 4.049564347144707e-05, - "loss": 0.356, + "learning_rate": 4.049557207261801e-05, + "loss": 0.2701, "step": 3703000 }, { "epoch": 2.22, - "learning_rate": 4.04935435058865e-05, - "loss": 0.3575, + "learning_rate": 4.049347210705744e-05, + "loss": 0.273, "step": 3703500 }, { "epoch": 2.22, - "learning_rate": 4.049144354032594e-05, - "loss": 0.3593, + "learning_rate": 4.049137214149688e-05, + "loss": 0.2689, "step": 3704000 }, { "epoch": 2.22, - "learning_rate": 4.048934357476537e-05, - "loss": 0.3489, + "learning_rate": 4.0489272175936314e-05, + "loss": 0.2691, "step": 3704500 }, { "epoch": 2.22, - "learning_rate": 4.0487243609204803e-05, - "loss": 0.348, + "learning_rate": 4.048717641030687e-05, + "loss": 0.2666, "step": 3705000 }, { "epoch": 2.22, - "learning_rate": 4.0485147843575364e-05, - "loss": 0.3645, + "learning_rate": 4.04850764447463e-05, + "loss": 0.2694, "step": 3705500 }, { "epoch": 2.22, - "learning_rate": 4.0483047878014804e-05, - "loss": 0.3512, + "learning_rate": 4.048297647918574e-05, + "loss": 0.2657, "step": 3706000 }, { "epoch": 2.22, - "learning_rate": 4.048094791245424e-05, - "loss": 0.3562, + "learning_rate": 4.0480876513625175e-05, + "loss": 0.2691, "step": 3706500 }, { "epoch": 2.22, - "learning_rate": 4.047884794689367e-05, - "loss": 0.3501, + "learning_rate": 4.047877654806461e-05, + "loss": 0.2647, "step": 3707000 }, { "epoch": 2.22, - "learning_rate": 4.0476747981333104e-05, - "loss": 0.3525, + "learning_rate": 4.047668078243516e-05, + "loss": 0.2717, "step": 3707500 }, { "epoch": 2.22, - "learning_rate": 4.047464801577254e-05, - "loss": 0.3539, + "learning_rate": 4.04745808168746e-05, + "loss": 0.269, "step": 3708000 }, { "epoch": 2.22, - "learning_rate": 4.047254805021198e-05, - "loss": 0.3505, + "learning_rate": 4.0472480851314036e-05, + "loss": 0.2668, "step": 3708500 }, { "epoch": 2.22, - "learning_rate": 4.047044808465141e-05, - "loss": 0.3471, + "learning_rate": 4.047038088575347e-05, + "loss": 0.2739, "step": 3709000 }, { "epoch": 2.22, - "learning_rate": 4.0468352319021965e-05, - "loss": 0.3583, + "learning_rate": 4.046828512012403e-05, + "loss": 0.2685, "step": 3709500 }, { "epoch": 2.22, - "learning_rate": 4.04662523534614e-05, - "loss": 0.3458, + "learning_rate": 4.046618515456346e-05, + "loss": 0.2678, "step": 3710000 }, { "epoch": 2.22, - "learning_rate": 4.046416078776308e-05, - "loss": 0.3577, + "learning_rate": 4.0464093588865144e-05, + "loss": 0.2718, "step": 3710500 }, { "epoch": 2.22, - "learning_rate": 4.046206082220251e-05, - "loss": 0.3483, + "learning_rate": 4.046199362330458e-05, + "loss": 0.2696, "step": 3711000 }, { "epoch": 2.23, - "learning_rate": 4.045996085664195e-05, - "loss": 0.363, + "learning_rate": 4.045989365774401e-05, + "loss": 0.27, "step": 3711500 }, { "epoch": 2.23, - "learning_rate": 4.0457860891081386e-05, - "loss": 0.3572, + "learning_rate": 4.045779369218345e-05, + "loss": 0.2696, "step": 3712000 }, { "epoch": 2.23, - "learning_rate": 4.045576092552082e-05, - "loss": 0.3602, + "learning_rate": 4.0455693726622884e-05, + "loss": 0.2702, "step": 3712500 }, { "epoch": 2.23, - "learning_rate": 4.045366515989138e-05, - "loss": 0.354, + "learning_rate": 4.045359376106232e-05, + "loss": 0.2695, "step": 3713000 }, { "epoch": 2.23, - "learning_rate": 4.0451565194330813e-05, - "loss": 0.3478, + "learning_rate": 4.045149379550176e-05, + "loss": 0.2651, "step": 3713500 }, { "epoch": 2.23, - "learning_rate": 4.044946522877025e-05, - "loss": 0.3576, + "learning_rate": 4.044939382994119e-05, + "loss": 0.2697, "step": 3714000 }, { "epoch": 2.23, - "learning_rate": 4.044736526320969e-05, - "loss": 0.3481, + "learning_rate": 4.044729386438062e-05, + "loss": 0.2651, "step": 3714500 }, { "epoch": 2.23, - "learning_rate": 4.044526529764912e-05, - "loss": 0.3448, + "learning_rate": 4.044519389882006e-05, + "loss": 0.2659, "step": 3715000 }, { "epoch": 2.23, - "learning_rate": 4.0443165332088554e-05, - "loss": 0.3514, + "learning_rate": 4.044309393325949e-05, + "loss": 0.2691, "step": 3715500 }, { "epoch": 2.23, - "learning_rate": 4.0441065366527994e-05, - "loss": 0.3518, + "learning_rate": 4.0440993967698925e-05, + "loss": 0.2662, "step": 3716000 }, { "epoch": 2.23, - "learning_rate": 4.043896540096742e-05, - "loss": 0.3559, + "learning_rate": 4.0438898202069486e-05, + "loss": 0.2704, "step": 3716500 }, { "epoch": 2.23, - "learning_rate": 4.0436865435406854e-05, - "loss": 0.3571, + "learning_rate": 4.043679823650892e-05, + "loss": 0.2684, "step": 3717000 }, { "epoch": 2.23, - "learning_rate": 4.0434769669777415e-05, - "loss": 0.3621, + "learning_rate": 4.043469827094835e-05, + "loss": 0.272, "step": 3717500 }, { "epoch": 2.23, - "learning_rate": 4.0432669704216855e-05, - "loss": 0.3487, + "learning_rate": 4.043259830538779e-05, + "loss": 0.2718, "step": 3718000 }, { "epoch": 2.23, - "learning_rate": 4.043056973865628e-05, - "loss": 0.3481, + "learning_rate": 4.043050253975835e-05, + "loss": 0.2681, "step": 3718500 }, { "epoch": 2.23, - "learning_rate": 4.0428469773095715e-05, - "loss": 0.3582, + "learning_rate": 4.042840257419778e-05, + "loss": 0.275, "step": 3719000 }, { "epoch": 2.23, - "learning_rate": 4.0426369807535155e-05, - "loss": 0.3509, + "learning_rate": 4.042630260863721e-05, + "loss": 0.2695, "step": 3719500 }, { "epoch": 2.23, - "learning_rate": 4.042426984197459e-05, - "loss": 0.3582, + "learning_rate": 4.0424202643076653e-05, + "loss": 0.2743, "step": 3720000 }, { "epoch": 2.23, - "learning_rate": 4.042216987641402e-05, - "loss": 0.3487, + "learning_rate": 4.042210267751609e-05, + "loss": 0.2624, "step": 3720500 }, { "epoch": 2.23, - "learning_rate": 4.042007411078458e-05, - "loss": 0.3515, + "learning_rate": 4.042001111181777e-05, + "loss": 0.2695, "step": 3721000 }, { "epoch": 2.23, - "learning_rate": 4.0417974145224016e-05, - "loss": 0.352, + "learning_rate": 4.04179111462572e-05, + "loss": 0.2701, "step": 3721500 }, { "epoch": 2.23, - "learning_rate": 4.041587417966345e-05, - "loss": 0.3565, + "learning_rate": 4.0415811180696634e-05, + "loss": 0.2699, "step": 3722000 }, { "epoch": 2.23, - "learning_rate": 4.041377421410289e-05, - "loss": 0.3523, + "learning_rate": 4.0413711215136075e-05, + "loss": 0.2702, "step": 3722500 }, { "epoch": 2.23, - "learning_rate": 4.041167424854232e-05, - "loss": 0.3598, + "learning_rate": 4.041161124957551e-05, + "loss": 0.2714, "step": 3723000 }, { "epoch": 2.23, - "learning_rate": 4.040957428298176e-05, - "loss": 0.3499, + "learning_rate": 4.040951128401494e-05, + "loss": 0.2682, "step": 3723500 }, { "epoch": 2.23, - "learning_rate": 4.040747851735231e-05, - "loss": 0.3548, + "learning_rate": 4.0407411318454375e-05, + "loss": 0.2715, "step": 3724000 }, { "epoch": 2.23, - "learning_rate": 4.040537855179175e-05, - "loss": 0.3571, + "learning_rate": 4.040531135289381e-05, + "loss": 0.2728, "step": 3724500 }, { "epoch": 2.23, - "learning_rate": 4.0403278586231184e-05, - "loss": 0.3506, + "learning_rate": 4.040321558726437e-05, + "loss": 0.2729, "step": 3725000 }, { "epoch": 2.23, - "learning_rate": 4.040117862067062e-05, - "loss": 0.3606, + "learning_rate": 4.040111982163492e-05, + "loss": 0.2709, "step": 3725500 }, { "epoch": 2.23, - "learning_rate": 4.039907865511006e-05, - "loss": 0.3451, + "learning_rate": 4.039901985607436e-05, + "loss": 0.2684, "step": 3726000 }, { "epoch": 2.23, - "learning_rate": 4.039697868954949e-05, - "loss": 0.3587, + "learning_rate": 4.0396919890513796e-05, + "loss": 0.273, "step": 3726500 }, { "epoch": 2.23, - "learning_rate": 4.0394878723988925e-05, - "loss": 0.3522, + "learning_rate": 4.039481992495323e-05, + "loss": 0.2743, "step": 3727000 }, { "epoch": 2.23, - "learning_rate": 4.0392778758428365e-05, - "loss": 0.3491, + "learning_rate": 4.039271995939267e-05, + "loss": 0.2657, "step": 3727500 }, { "epoch": 2.24, - "learning_rate": 4.039068299279892e-05, - "loss": 0.3503, + "learning_rate": 4.03906199938321e-05, + "loss": 0.2733, "step": 3728000 }, { "epoch": 2.24, - "learning_rate": 4.038858722716947e-05, - "loss": 0.3515, + "learning_rate": 4.038852002827153e-05, + "loss": 0.2664, "step": 3728500 }, { "epoch": 2.24, - "learning_rate": 4.038649146154003e-05, - "loss": 0.3421, + "learning_rate": 4.038642426264209e-05, + "loss": 0.2654, "step": 3729000 }, { "epoch": 2.24, - "learning_rate": 4.0384391495979466e-05, - "loss": 0.3483, + "learning_rate": 4.038432429708153e-05, + "loss": 0.2701, "step": 3729500 }, { "epoch": 2.24, - "learning_rate": 4.0382291530418906e-05, - "loss": 0.3469, + "learning_rate": 4.0382224331520964e-05, + "loss": 0.2669, "step": 3730000 }, { "epoch": 2.24, - "learning_rate": 4.038019156485833e-05, - "loss": 0.3584, + "learning_rate": 4.03801243659604e-05, + "loss": 0.27, "step": 3730500 }, { "epoch": 2.24, - "learning_rate": 4.0378091599297766e-05, - "loss": 0.347, + "learning_rate": 4.037802440039983e-05, + "loss": 0.2701, "step": 3731000 }, { "epoch": 2.24, - "learning_rate": 4.0375991633737206e-05, - "loss": 0.3402, + "learning_rate": 4.0375924434839264e-05, + "loss": 0.2617, "step": 3731500 }, { "epoch": 2.24, - "learning_rate": 4.037389166817664e-05, - "loss": 0.3595, + "learning_rate": 4.0373824469278704e-05, + "loss": 0.2679, "step": 3732000 }, { "epoch": 2.24, - "learning_rate": 4.037179170261607e-05, - "loss": 0.35, + "learning_rate": 4.037172450371814e-05, + "loss": 0.2669, "step": 3732500 }, { "epoch": 2.24, - "learning_rate": 4.0369691737055514e-05, - "loss": 0.3484, + "learning_rate": 4.036962453815757e-05, + "loss": 0.2692, "step": 3733000 }, { "epoch": 2.24, - "learning_rate": 4.036759177149495e-05, - "loss": 0.3588, + "learning_rate": 4.0367528772528125e-05, + "loss": 0.2734, "step": 3733500 }, { "epoch": 2.24, - "learning_rate": 4.036549180593438e-05, - "loss": 0.336, + "learning_rate": 4.0365428806967565e-05, + "loss": 0.261, "step": 3734000 }, { "epoch": 2.24, - "learning_rate": 4.036339184037382e-05, - "loss": 0.3491, + "learning_rate": 4.0363328841407e-05, + "loss": 0.264, "step": 3734500 }, { "epoch": 2.24, - "learning_rate": 4.0361296074744374e-05, - "loss": 0.3528, + "learning_rate": 4.036122887584643e-05, + "loss": 0.2679, "step": 3735000 }, { "epoch": 2.24, - "learning_rate": 4.035919610918381e-05, - "loss": 0.3465, + "learning_rate": 4.035913311021699e-05, + "loss": 0.2706, "step": 3735500 }, { "epoch": 2.24, - "learning_rate": 4.035709614362324e-05, - "loss": 0.3567, + "learning_rate": 4.0357033144656426e-05, + "loss": 0.2707, "step": 3736000 }, { "epoch": 2.24, - "learning_rate": 4.03550003779938e-05, - "loss": 0.3606, + "learning_rate": 4.0354941578958106e-05, + "loss": 0.2709, "step": 3736500 }, { "epoch": 2.24, - "learning_rate": 4.0352900412433235e-05, - "loss": 0.3579, + "learning_rate": 4.035284161339754e-05, + "loss": 0.2664, "step": 3737000 }, { "epoch": 2.24, - "learning_rate": 4.035080044687267e-05, - "loss": 0.3617, + "learning_rate": 4.035074164783697e-05, + "loss": 0.2701, "step": 3737500 }, { "epoch": 2.24, - "learning_rate": 4.034870048131211e-05, - "loss": 0.3445, + "learning_rate": 4.0348641682276414e-05, + "loss": 0.2653, "step": 3738000 }, { "epoch": 2.24, - "learning_rate": 4.034660051575154e-05, - "loss": 0.3569, + "learning_rate": 4.034654171671585e-05, + "loss": 0.271, "step": 3738500 }, { "epoch": 2.24, - "learning_rate": 4.0344500550190976e-05, - "loss": 0.3597, + "learning_rate": 4.03444459510864e-05, + "loss": 0.2692, "step": 3739000 }, { "epoch": 2.24, - "learning_rate": 4.0342400584630416e-05, - "loss": 0.3633, + "learning_rate": 4.0342345985525834e-05, + "loss": 0.2691, "step": 3739500 }, { "epoch": 2.24, - "learning_rate": 4.034030061906985e-05, - "loss": 0.3489, + "learning_rate": 4.0340246019965274e-05, + "loss": 0.2623, "step": 3740000 }, { "epoch": 2.24, - "learning_rate": 4.033820905337152e-05, - "loss": 0.3524, + "learning_rate": 4.033814605440471e-05, + "loss": 0.264, "step": 3740500 }, { "epoch": 2.24, - "learning_rate": 4.0336109087810957e-05, - "loss": 0.3556, + "learning_rate": 4.033604608884414e-05, + "loss": 0.2713, "step": 3741000 }, { "epoch": 2.24, - "learning_rate": 4.033400912225039e-05, - "loss": 0.3608, + "learning_rate": 4.033394612328358e-05, + "loss": 0.2715, "step": 3741500 }, { "epoch": 2.24, - "learning_rate": 4.033190915668983e-05, - "loss": 0.3557, + "learning_rate": 4.0331846157723015e-05, + "loss": 0.2726, "step": 3742000 }, { "epoch": 2.24, - "learning_rate": 4.0329809191129264e-05, - "loss": 0.3487, + "learning_rate": 4.032974619216245e-05, + "loss": 0.2674, "step": 3742500 }, { "epoch": 2.24, - "learning_rate": 4.032771342549982e-05, - "loss": 0.3476, + "learning_rate": 4.032764622660188e-05, + "loss": 0.2659, "step": 3743000 }, { "epoch": 2.24, - "learning_rate": 4.032561765987038e-05, - "loss": 0.357, + "learning_rate": 4.0325546261041315e-05, + "loss": 0.2676, "step": 3743500 }, { "epoch": 2.24, - "learning_rate": 4.032352189424093e-05, - "loss": 0.3587, + "learning_rate": 4.032344629548075e-05, + "loss": 0.2681, "step": 3744000 }, { "epoch": 2.24, - "learning_rate": 4.032142192868037e-05, - "loss": 0.3646, + "learning_rate": 4.032135052985131e-05, + "loss": 0.2751, "step": 3744500 }, { "epoch": 2.25, - "learning_rate": 4.0319321963119805e-05, - "loss": 0.3534, + "learning_rate": 4.031925056429075e-05, + "loss": 0.2652, "step": 3745000 }, { "epoch": 2.25, - "learning_rate": 4.031722199755924e-05, - "loss": 0.3471, + "learning_rate": 4.0317150598730176e-05, + "loss": 0.2672, "step": 3745500 }, { "epoch": 2.25, - "learning_rate": 4.031512203199868e-05, - "loss": 0.3406, + "learning_rate": 4.0315054833100736e-05, + "loss": 0.2623, "step": 3746000 }, { "epoch": 2.25, - "learning_rate": 4.031302206643811e-05, - "loss": 0.3641, + "learning_rate": 4.0312954867540177e-05, + "loss": 0.2681, "step": 3746500 }, { "epoch": 2.25, - "learning_rate": 4.0310922100877545e-05, - "loss": 0.3564, + "learning_rate": 4.031085490197961e-05, + "loss": 0.2705, "step": 3747000 }, { "epoch": 2.25, - "learning_rate": 4.030882213531698e-05, - "loss": 0.3577, + "learning_rate": 4.0308754936419044e-05, + "loss": 0.2647, "step": 3747500 }, { "epoch": 2.25, - "learning_rate": 4.030672636968754e-05, - "loss": 0.3461, + "learning_rate": 4.030665497085848e-05, + "loss": 0.2638, "step": 3748000 }, { "epoch": 2.25, - "learning_rate": 4.030462640412697e-05, - "loss": 0.3464, + "learning_rate": 4.030455500529791e-05, + "loss": 0.2668, "step": 3748500 }, { "epoch": 2.25, - "learning_rate": 4.0302526438566406e-05, - "loss": 0.3483, + "learning_rate": 4.0302455039737344e-05, + "loss": 0.2662, "step": 3749000 }, { "epoch": 2.25, - "learning_rate": 4.030042647300584e-05, - "loss": 0.3426, + "learning_rate": 4.0300355074176784e-05, + "loss": 0.2688, "step": 3749500 }, { "epoch": 2.25, - "learning_rate": 4.029832650744527e-05, - "loss": 0.3551, + "learning_rate": 4.029825510861622e-05, + "loss": 0.2711, "step": 3750000 }, { "epoch": 2.25, - "learning_rate": 4.029622654188471e-05, - "loss": 0.3487, + "learning_rate": 4.029615514305565e-05, + "loss": 0.2669, "step": 3750500 }, { "epoch": 2.25, - "learning_rate": 4.029412657632415e-05, - "loss": 0.3459, + "learning_rate": 4.029405517749509e-05, + "loss": 0.2653, "step": 3751000 }, { "epoch": 2.25, - "learning_rate": 4.029202661076358e-05, - "loss": 0.3592, + "learning_rate": 4.0291955211934525e-05, + "loss": 0.271, "step": 3751500 }, { "epoch": 2.25, - "learning_rate": 4.0289930845134134e-05, - "loss": 0.3485, + "learning_rate": 4.028985944630508e-05, + "loss": 0.2649, "step": 3752000 }, { "epoch": 2.25, - "learning_rate": 4.0287830879573574e-05, - "loss": 0.3608, + "learning_rate": 4.028775948074451e-05, + "loss": 0.2648, "step": 3752500 }, { "epoch": 2.25, - "learning_rate": 4.028573091401301e-05, - "loss": 0.3625, + "learning_rate": 4.028565951518395e-05, + "loss": 0.2772, "step": 3753000 }, { "epoch": 2.25, - "learning_rate": 4.028363514838357e-05, - "loss": 0.348, + "learning_rate": 4.0283559549623385e-05, + "loss": 0.2693, "step": 3753500 }, { "epoch": 2.25, - "learning_rate": 4.0281535182823e-05, - "loss": 0.3437, + "learning_rate": 4.028145958406282e-05, + "loss": 0.2635, "step": 3754000 }, { "epoch": 2.25, - "learning_rate": 4.0279435217262435e-05, - "loss": 0.3558, + "learning_rate": 4.027935961850226e-05, + "loss": 0.2691, "step": 3754500 }, { "epoch": 2.25, - "learning_rate": 4.027733525170187e-05, - "loss": 0.3595, + "learning_rate": 4.027725965294169e-05, + "loss": 0.2719, "step": 3755000 }, { "epoch": 2.25, - "learning_rate": 4.02752352861413e-05, - "loss": 0.3471, + "learning_rate": 4.0275159687381126e-05, + "loss": 0.263, "step": 3755500 }, { "epoch": 2.25, - "learning_rate": 4.027313532058074e-05, - "loss": 0.3435, + "learning_rate": 4.0273063921751686e-05, + "loss": 0.2681, "step": 3756000 }, { "epoch": 2.25, - "learning_rate": 4.0271035355020175e-05, - "loss": 0.3395, + "learning_rate": 4.027096395619112e-05, + "loss": 0.265, "step": 3756500 }, { "epoch": 2.25, - "learning_rate": 4.0268935389459616e-05, - "loss": 0.3606, + "learning_rate": 4.026886399063055e-05, + "loss": 0.2691, "step": 3757000 }, { "epoch": 2.25, - "learning_rate": 4.026683542389905e-05, - "loss": 0.3551, + "learning_rate": 4.0266764025069994e-05, + "loss": 0.2696, "step": 3757500 }, { "epoch": 2.25, - "learning_rate": 4.026473545833848e-05, - "loss": 0.3579, + "learning_rate": 4.026466825944055e-05, + "loss": 0.2727, "step": 3758000 }, { "epoch": 2.25, - "learning_rate": 4.026263549277792e-05, - "loss": 0.3619, + "learning_rate": 4.02625724938111e-05, + "loss": 0.266, "step": 3758500 }, { "epoch": 2.25, - "learning_rate": 4.0260539727148476e-05, - "loss": 0.3611, + "learning_rate": 4.0260472528250534e-05, + "loss": 0.2739, "step": 3759000 }, { "epoch": 2.25, - "learning_rate": 4.025843976158791e-05, - "loss": 0.3477, + "learning_rate": 4.025837256268997e-05, + "loss": 0.2624, "step": 3759500 }, { "epoch": 2.25, - "learning_rate": 4.025633979602734e-05, - "loss": 0.3534, + "learning_rate": 4.025627259712941e-05, + "loss": 0.2689, "step": 3760000 }, { "epoch": 2.25, - "learning_rate": 4.0254239830466784e-05, - "loss": 0.3454, + "learning_rate": 4.025417263156884e-05, + "loss": 0.2598, "step": 3760500 }, { "epoch": 2.25, - "learning_rate": 4.025214406483734e-05, - "loss": 0.3559, + "learning_rate": 4.025207266600828e-05, + "loss": 0.2713, "step": 3761000 }, { "epoch": 2.26, - "learning_rate": 4.025004409927677e-05, - "loss": 0.3506, + "learning_rate": 4.0249972700447715e-05, + "loss": 0.2694, "step": 3761500 }, { "epoch": 2.26, - "learning_rate": 4.0247944133716204e-05, - "loss": 0.3463, + "learning_rate": 4.024787273488715e-05, + "loss": 0.265, "step": 3762000 }, { "epoch": 2.26, - "learning_rate": 4.0245844168155644e-05, - "loss": 0.3505, + "learning_rate": 4.02457769692577e-05, + "loss": 0.2645, "step": 3762500 }, { "epoch": 2.26, - "learning_rate": 4.024374420259508e-05, - "loss": 0.349, + "learning_rate": 4.024367700369714e-05, + "loss": 0.2674, "step": 3763000 }, { "epoch": 2.26, - "learning_rate": 4.024164423703451e-05, - "loss": 0.3578, + "learning_rate": 4.0241577038136576e-05, + "loss": 0.2754, "step": 3763500 }, { "epoch": 2.26, - "learning_rate": 4.023954427147395e-05, - "loss": 0.3589, + "learning_rate": 4.023947707257601e-05, + "loss": 0.2728, "step": 3764000 }, { "epoch": 2.26, - "learning_rate": 4.023744430591338e-05, - "loss": 0.3456, + "learning_rate": 4.023738130694656e-05, + "loss": 0.2676, "step": 3764500 }, { "epoch": 2.26, - "learning_rate": 4.023534434035282e-05, - "loss": 0.3535, + "learning_rate": 4.0235281341386e-05, + "loss": 0.2682, "step": 3765000 }, { "epoch": 2.26, - "learning_rate": 4.023324857472338e-05, - "loss": 0.3462, + "learning_rate": 4.0233181375825436e-05, + "loss": 0.2643, "step": 3765500 }, { "epoch": 2.26, - "learning_rate": 4.023114860916281e-05, - "loss": 0.3477, + "learning_rate": 4.023108141026487e-05, + "loss": 0.2624, "step": 3766000 }, { "epoch": 2.26, - "learning_rate": 4.0229048643602246e-05, - "loss": 0.3472, + "learning_rate": 4.022898144470431e-05, + "loss": 0.2745, "step": 3766500 }, { "epoch": 2.26, - "learning_rate": 4.022694867804168e-05, - "loss": 0.3583, + "learning_rate": 4.0226885679074864e-05, + "loss": 0.2754, "step": 3767000 }, { "epoch": 2.26, - "learning_rate": 4.022485291241224e-05, - "loss": 0.3518, + "learning_rate": 4.02247857135143e-05, + "loss": 0.2727, "step": 3767500 }, { "epoch": 2.26, - "learning_rate": 4.022275294685167e-05, - "loss": 0.3517, + "learning_rate": 4.022268574795374e-05, + "loss": 0.2695, "step": 3768000 }, { "epoch": 2.26, - "learning_rate": 4.0220657181222226e-05, - "loss": 0.3558, + "learning_rate": 4.022058578239317e-05, + "loss": 0.2708, "step": 3768500 }, { "epoch": 2.26, - "learning_rate": 4.021855721566166e-05, - "loss": 0.3488, + "learning_rate": 4.0218490016763724e-05, + "loss": 0.2643, "step": 3769000 }, { "epoch": 2.26, - "learning_rate": 4.02164572501011e-05, - "loss": 0.3535, + "learning_rate": 4.021639005120316e-05, + "loss": 0.2694, "step": 3769500 }, { "epoch": 2.26, - "learning_rate": 4.0214357284540534e-05, - "loss": 0.3552, + "learning_rate": 4.02142900856426e-05, + "loss": 0.2715, "step": 3770000 }, { "epoch": 2.26, - "learning_rate": 4.021226151891109e-05, - "loss": 0.3503, + "learning_rate": 4.021219012008203e-05, + "loss": 0.2691, "step": 3770500 }, { "epoch": 2.26, - "learning_rate": 4.021016155335053e-05, - "loss": 0.3528, + "learning_rate": 4.0210094354452585e-05, + "loss": 0.2662, "step": 3771000 }, { "epoch": 2.26, - "learning_rate": 4.020806158778996e-05, - "loss": 0.3614, + "learning_rate": 4.020799438889202e-05, + "loss": 0.272, "step": 3771500 }, { "epoch": 2.26, - "learning_rate": 4.0205961622229394e-05, - "loss": 0.3461, + "learning_rate": 4.020589442333146e-05, + "loss": 0.2608, "step": 3772000 }, { "epoch": 2.26, - "learning_rate": 4.0203861656668835e-05, - "loss": 0.3587, + "learning_rate": 4.020379445777089e-05, + "loss": 0.2696, "step": 3772500 }, { "epoch": 2.26, - "learning_rate": 4.020176169110827e-05, - "loss": 0.3497, + "learning_rate": 4.0201698692141446e-05, + "loss": 0.27, "step": 3773000 }, { "epoch": 2.26, - "learning_rate": 4.01996617255477e-05, - "loss": 0.3485, + "learning_rate": 4.0199598726580886e-05, + "loss": 0.2699, "step": 3773500 }, { "epoch": 2.26, - "learning_rate": 4.019756175998714e-05, - "loss": 0.3533, + "learning_rate": 4.019749876102032e-05, + "loss": 0.2668, "step": 3774000 }, { "epoch": 2.26, - "learning_rate": 4.019546179442657e-05, - "loss": 0.3638, + "learning_rate": 4.019539879545975e-05, + "loss": 0.2707, "step": 3774500 }, { "epoch": 2.26, - "learning_rate": 4.0193361828866e-05, - "loss": 0.3475, + "learning_rate": 4.019330302983031e-05, + "loss": 0.2678, "step": 3775000 }, { "epoch": 2.26, - "learning_rate": 4.019126186330544e-05, - "loss": 0.3449, + "learning_rate": 4.019120306426975e-05, + "loss": 0.2615, "step": 3775500 }, { "epoch": 2.26, - "learning_rate": 4.0189161897744876e-05, - "loss": 0.3469, + "learning_rate": 4.018910309870918e-05, + "loss": 0.2617, "step": 3776000 }, { "epoch": 2.26, - "learning_rate": 4.018706613211543e-05, - "loss": 0.3544, + "learning_rate": 4.0187003133148614e-05, + "loss": 0.27, "step": 3776500 }, { "epoch": 2.26, - "learning_rate": 4.018496616655486e-05, - "loss": 0.3482, + "learning_rate": 4.0184903167588054e-05, + "loss": 0.2702, "step": 3777000 }, { "epoch": 2.26, - "learning_rate": 4.01828662009943e-05, - "loss": 0.343, + "learning_rate": 4.018280740195861e-05, + "loss": 0.2659, "step": 3777500 }, { "epoch": 2.27, - "learning_rate": 4.0180766235433736e-05, - "loss": 0.3583, + "learning_rate": 4.018070743639804e-05, + "loss": 0.2672, "step": 3778000 }, { "epoch": 2.27, - "learning_rate": 4.017866626987317e-05, - "loss": 0.3552, + "learning_rate": 4.0178607470837475e-05, + "loss": 0.2729, "step": 3778500 }, { "epoch": 2.27, - "learning_rate": 4.017656630431261e-05, - "loss": 0.3409, + "learning_rate": 4.017651170520803e-05, + "loss": 0.263, "step": 3779000 }, { "epoch": 2.27, - "learning_rate": 4.0174466338752043e-05, - "loss": 0.3525, + "learning_rate": 4.017441173964747e-05, + "loss": 0.2634, "step": 3779500 }, { "epoch": 2.27, - "learning_rate": 4.017236637319148e-05, - "loss": 0.3529, + "learning_rate": 4.01723117740869e-05, + "loss": 0.2676, "step": 3780000 }, { "epoch": 2.27, - "learning_rate": 4.017027060756204e-05, - "loss": 0.3525, + "learning_rate": 4.017021180852634e-05, + "loss": 0.2701, "step": 3780500 }, { "epoch": 2.27, - "learning_rate": 4.016817064200147e-05, - "loss": 0.3479, + "learning_rate": 4.0168111842965776e-05, + "loss": 0.2643, "step": 3781000 }, { "epoch": 2.27, - "learning_rate": 4.0166070676440904e-05, - "loss": 0.3521, + "learning_rate": 4.016601187740521e-05, + "loss": 0.2744, "step": 3781500 }, { "epoch": 2.27, - "learning_rate": 4.0163970710880344e-05, - "loss": 0.3527, + "learning_rate": 4.016391191184465e-05, + "loss": 0.2697, "step": 3782000 }, { "epoch": 2.27, - "learning_rate": 4.016187074531978e-05, - "loss": 0.3469, + "learning_rate": 4.016181194628408e-05, + "loss": 0.2648, "step": 3782500 }, { "epoch": 2.27, - "learning_rate": 4.015977077975921e-05, - "loss": 0.3504, + "learning_rate": 4.0159716180654636e-05, + "loss": 0.2724, "step": 3783000 }, { "epoch": 2.27, - "learning_rate": 4.015767081419865e-05, - "loss": 0.3428, + "learning_rate": 4.015761621509407e-05, + "loss": 0.2668, "step": 3783500 }, { "epoch": 2.27, - "learning_rate": 4.0155570848638085e-05, - "loss": 0.3448, + "learning_rate": 4.015551624953351e-05, + "loss": 0.2654, "step": 3784000 }, { "epoch": 2.27, - "learning_rate": 4.015347508300864e-05, - "loss": 0.3496, + "learning_rate": 4.0153416283972943e-05, + "loss": 0.2658, "step": 3784500 }, { "epoch": 2.27, - "learning_rate": 4.015137511744807e-05, - "loss": 0.351, + "learning_rate": 4.01513205183435e-05, + "loss": 0.2693, "step": 3785000 }, { "epoch": 2.27, - "learning_rate": 4.014927515188751e-05, - "loss": 0.3429, + "learning_rate": 4.014922055278293e-05, + "loss": 0.266, "step": 3785500 }, { "epoch": 2.27, - "learning_rate": 4.0147175186326946e-05, - "loss": 0.3482, + "learning_rate": 4.014712058722237e-05, + "loss": 0.2628, "step": 3786000 }, { "epoch": 2.27, - "learning_rate": 4.01450794206975e-05, - "loss": 0.3498, + "learning_rate": 4.0145020621661804e-05, + "loss": 0.2677, "step": 3786500 }, { "epoch": 2.27, - "learning_rate": 4.014297945513693e-05, - "loss": 0.3485, + "learning_rate": 4.014292485603236e-05, + "loss": 0.2632, "step": 3787000 }, { "epoch": 2.27, - "learning_rate": 4.014087948957637e-05, - "loss": 0.3557, + "learning_rate": 4.01408248904718e-05, + "loss": 0.2649, "step": 3787500 }, { "epoch": 2.27, - "learning_rate": 4.0138779524015806e-05, - "loss": 0.3641, + "learning_rate": 4.013872492491123e-05, + "loss": 0.2757, "step": 3788000 }, { "epoch": 2.27, - "learning_rate": 4.013668375838636e-05, - "loss": 0.353, + "learning_rate": 4.0136624959350665e-05, + "loss": 0.2706, "step": 3788500 }, { "epoch": 2.27, - "learning_rate": 4.01345837928258e-05, - "loss": 0.3564, + "learning_rate": 4.0134524993790105e-05, + "loss": 0.2702, "step": 3789000 }, { "epoch": 2.27, - "learning_rate": 4.0132483827265234e-05, - "loss": 0.3448, + "learning_rate": 4.013242922816066e-05, + "loss": 0.267, "step": 3789500 }, { "epoch": 2.27, - "learning_rate": 4.013038386170467e-05, - "loss": 0.3459, + "learning_rate": 4.013032926260009e-05, + "loss": 0.2632, "step": 3790000 }, { "epoch": 2.27, - "learning_rate": 4.012828809607522e-05, - "loss": 0.3492, + "learning_rate": 4.0128229297039526e-05, + "loss": 0.2672, "step": 3790500 }, { "epoch": 2.27, - "learning_rate": 4.012618813051466e-05, - "loss": 0.3563, + "learning_rate": 4.0126129331478966e-05, + "loss": 0.2666, "step": 3791000 }, { "epoch": 2.27, - "learning_rate": 4.0124088164954094e-05, - "loss": 0.362, + "learning_rate": 4.012403356584952e-05, + "loss": 0.271, "step": 3791500 }, { "epoch": 2.27, - "learning_rate": 4.012199239932465e-05, - "loss": 0.3567, + "learning_rate": 4.012193360028895e-05, + "loss": 0.2687, "step": 3792000 }, { "epoch": 2.27, - "learning_rate": 4.011989243376408e-05, - "loss": 0.3553, + "learning_rate": 4.0119833634728386e-05, + "loss": 0.2688, "step": 3792500 }, { "epoch": 2.27, - "learning_rate": 4.011779246820352e-05, - "loss": 0.353, + "learning_rate": 4.0117733669167827e-05, + "loss": 0.2652, "step": 3793000 }, { "epoch": 2.27, - "learning_rate": 4.0115692502642955e-05, - "loss": 0.3572, + "learning_rate": 4.011563790353838e-05, + "loss": 0.2728, "step": 3793500 }, { "epoch": 2.27, - "learning_rate": 4.011359253708239e-05, - "loss": 0.35, + "learning_rate": 4.0113537937977814e-05, + "loss": 0.2676, "step": 3794000 }, { "epoch": 2.27, - "learning_rate": 4.011149257152183e-05, - "loss": 0.3468, + "learning_rate": 4.0111437972417254e-05, + "loss": 0.2673, "step": 3794500 }, { "epoch": 2.28, - "learning_rate": 4.010939260596126e-05, - "loss": 0.35, + "learning_rate": 4.010933800685669e-05, + "loss": 0.267, "step": 3795000 }, { "epoch": 2.28, - "learning_rate": 4.0107292640400696e-05, - "loss": 0.3552, + "learning_rate": 4.010724224122724e-05, + "loss": 0.2679, "step": 3795500 }, { "epoch": 2.28, - "learning_rate": 4.0105192674840136e-05, - "loss": 0.3521, + "learning_rate": 4.01051464755978e-05, + "loss": 0.2687, "step": 3796000 }, { "epoch": 2.28, - "learning_rate": 4.010309270927956e-05, - "loss": 0.3547, + "learning_rate": 4.0103046510037235e-05, + "loss": 0.2713, "step": 3796500 }, { "epoch": 2.28, - "learning_rate": 4.010099694365012e-05, - "loss": 0.3596, + "learning_rate": 4.010095074440779e-05, + "loss": 0.274, "step": 3797000 }, { "epoch": 2.28, - "learning_rate": 4.009889697808956e-05, - "loss": 0.3541, + "learning_rate": 4.009885077884723e-05, + "loss": 0.2689, "step": 3797500 }, { "epoch": 2.28, - "learning_rate": 4.0096797012529e-05, - "loss": 0.351, + "learning_rate": 4.009675081328666e-05, + "loss": 0.2696, "step": 3798000 }, { "epoch": 2.28, - "learning_rate": 4.009469704696843e-05, - "loss": 0.3438, + "learning_rate": 4.0094650847726095e-05, + "loss": 0.262, "step": 3798500 }, { "epoch": 2.28, - "learning_rate": 4.0092601281338984e-05, - "loss": 0.3554, + "learning_rate": 4.0092550882165536e-05, + "loss": 0.2671, "step": 3799000 }, { "epoch": 2.28, - "learning_rate": 4.0090501315778424e-05, - "loss": 0.3512, + "learning_rate": 4.009045091660497e-05, + "loss": 0.2703, "step": 3799500 }, { "epoch": 2.28, - "learning_rate": 4.008840135021786e-05, - "loss": 0.3533, + "learning_rate": 4.00883509510444e-05, + "loss": 0.2676, "step": 3800000 }, { "epoch": 2.28, - "eval_loss": 0.343013197183609, - "eval_runtime": 1123.1851, - "eval_samples_per_second": 468.952, - "eval_steps_per_second": 78.159, + "eval_loss": 0.24739134311676025, + "eval_runtime": 1489.6136, + "eval_samples_per_second": 353.595, + "eval_steps_per_second": 58.933, "step": 3800000 }, { "epoch": 2.28, - "learning_rate": 4.008630138465729e-05, - "loss": 0.3532, + "learning_rate": 4.0086250985483836e-05, + "loss": 0.2687, "step": 3800500 }, { "epoch": 2.28, - "learning_rate": 4.0084201419096724e-05, - "loss": 0.3649, + "learning_rate": 4.0084155219854396e-05, + "loss": 0.2761, "step": 3801000 }, { "epoch": 2.28, - "learning_rate": 4.008210145353616e-05, - "loss": 0.3557, + "learning_rate": 4.008205525429383e-05, + "loss": 0.2719, "step": 3801500 }, { "epoch": 2.28, - "learning_rate": 4.008000568790672e-05, - "loss": 0.3606, + "learning_rate": 4.007995528873327e-05, + "loss": 0.2688, "step": 3802000 }, { "epoch": 2.28, - "learning_rate": 4.007790572234615e-05, - "loss": 0.3594, + "learning_rate": 4.00778553231727e-05, + "loss": 0.2712, "step": 3802500 }, { "epoch": 2.28, - "learning_rate": 4.007580575678559e-05, - "loss": 0.3547, + "learning_rate": 4.007575535761213e-05, + "loss": 0.2684, "step": 3803000 }, { "epoch": 2.28, - "learning_rate": 4.007370579122502e-05, - "loss": 0.3554, + "learning_rate": 4.007365539205157e-05, + "loss": 0.2714, "step": 3803500 }, { "epoch": 2.28, - "learning_rate": 4.007161002559558e-05, - "loss": 0.3507, + "learning_rate": 4.0071555426491004e-05, + "loss": 0.27, "step": 3804000 }, { "epoch": 2.28, - "learning_rate": 4.006951006003502e-05, - "loss": 0.36, + "learning_rate": 4.006945546093044e-05, + "loss": 0.2738, "step": 3804500 }, { "epoch": 2.28, - "learning_rate": 4.006741009447445e-05, - "loss": 0.345, + "learning_rate": 4.006735969530099e-05, + "loss": 0.2639, "step": 3805000 }, { "epoch": 2.28, - "learning_rate": 4.0065310128913886e-05, - "loss": 0.3546, + "learning_rate": 4.006525972974043e-05, + "loss": 0.2726, "step": 3805500 }, { "epoch": 2.28, - "learning_rate": 4.006321436328444e-05, - "loss": 0.3564, + "learning_rate": 4.0063159764179865e-05, + "loss": 0.2719, "step": 3806000 }, { "epoch": 2.28, - "learning_rate": 4.006111439772388e-05, - "loss": 0.3572, + "learning_rate": 4.00610597986193e-05, + "loss": 0.2676, "step": 3806500 }, { "epoch": 2.28, - "learning_rate": 4.005901443216331e-05, - "loss": 0.3526, + "learning_rate": 4.005895983305874e-05, + "loss": 0.2678, "step": 3807000 }, { "epoch": 2.28, - "learning_rate": 4.005691446660275e-05, - "loss": 0.3514, + "learning_rate": 4.005686406742929e-05, + "loss": 0.2705, "step": 3807500 }, { "epoch": 2.28, - "learning_rate": 4.005481450104219e-05, - "loss": 0.348, + "learning_rate": 4.0054764101868725e-05, + "loss": 0.2634, "step": 3808000 }, { "epoch": 2.28, - "learning_rate": 4.005271873541274e-05, - "loss": 0.3542, + "learning_rate": 4.0052664136308166e-05, + "loss": 0.2703, "step": 3808500 }, { "epoch": 2.28, - "learning_rate": 4.0050618769852174e-05, - "loss": 0.3566, + "learning_rate": 4.00505641707476e-05, + "loss": 0.2721, "step": 3809000 }, { "epoch": 2.28, - "learning_rate": 4.0048518804291614e-05, - "loss": 0.3577, + "learning_rate": 4.004846840511815e-05, + "loss": 0.2703, "step": 3809500 }, { "epoch": 2.28, - "learning_rate": 4.004641883873105e-05, - "loss": 0.3479, + "learning_rate": 4.0046368439557586e-05, + "loss": 0.268, "step": 3810000 }, { "epoch": 2.28, - "learning_rate": 4.004431887317048e-05, - "loss": 0.3537, + "learning_rate": 4.0044268473997026e-05, + "loss": 0.2671, "step": 3810500 }, { "epoch": 2.28, - "learning_rate": 4.0042223107541035e-05, - "loss": 0.3477, + "learning_rate": 4.004216850843646e-05, + "loss": 0.2666, "step": 3811000 }, { "epoch": 2.29, - "learning_rate": 4.0040123141980475e-05, - "loss": 0.3538, + "learning_rate": 4.004007274280702e-05, + "loss": 0.2736, "step": 3811500 }, { "epoch": 2.29, - "learning_rate": 4.003802317641991e-05, - "loss": 0.3519, + "learning_rate": 4.003797277724645e-05, + "loss": 0.2652, "step": 3812000 }, { "epoch": 2.29, - "learning_rate": 4.003592321085934e-05, - "loss": 0.3544, + "learning_rate": 4.003587281168589e-05, + "loss": 0.2698, "step": 3812500 }, { "epoch": 2.29, - "learning_rate": 4.0033823245298775e-05, - "loss": 0.3534, + "learning_rate": 4.003377284612532e-05, + "loss": 0.2755, "step": 3813000 }, { "epoch": 2.29, - "learning_rate": 4.003172327973821e-05, - "loss": 0.3513, + "learning_rate": 4.003167708049588e-05, + "loss": 0.2675, "step": 3813500 }, { "epoch": 2.29, - "learning_rate": 4.002962331417764e-05, - "loss": 0.3424, + "learning_rate": 4.0029577114935314e-05, + "loss": 0.2646, "step": 3814000 }, { "epoch": 2.29, - "learning_rate": 4.002752334861708e-05, - "loss": 0.3561, + "learning_rate": 4.0027481349305875e-05, + "loss": 0.2729, "step": 3814500 }, { "epoch": 2.29, - "learning_rate": 4.0025431782918756e-05, - "loss": 0.3433, + "learning_rate": 4.002538138374531e-05, + "loss": 0.2653, "step": 3815000 }, { "epoch": 2.29, - "learning_rate": 4.0023331817358197e-05, - "loss": 0.3484, + "learning_rate": 4.002328141818474e-05, + "loss": 0.2652, "step": 3815500 }, { "epoch": 2.29, - "learning_rate": 4.002123185179763e-05, - "loss": 0.3546, + "learning_rate": 4.002118145262418e-05, + "loss": 0.271, "step": 3816000 }, { "epoch": 2.29, - "learning_rate": 4.0019136086168184e-05, - "loss": 0.3574, + "learning_rate": 4.0019081487063615e-05, + "loss": 0.2676, "step": 3816500 }, { "epoch": 2.29, - "learning_rate": 4.0017036120607624e-05, - "loss": 0.3535, + "learning_rate": 4.001698152150304e-05, + "loss": 0.2728, "step": 3817000 }, { "epoch": 2.29, - "learning_rate": 4.001493615504706e-05, - "loss": 0.3603, + "learning_rate": 4.001488155594248e-05, + "loss": 0.2658, "step": 3817500 }, { "epoch": 2.29, - "learning_rate": 4.001283618948649e-05, - "loss": 0.3481, + "learning_rate": 4.0012781590381916e-05, + "loss": 0.2656, "step": 3818000 }, { "epoch": 2.29, - "learning_rate": 4.001073622392593e-05, - "loss": 0.3541, + "learning_rate": 4.0010685824752476e-05, + "loss": 0.2681, "step": 3818500 }, { "epoch": 2.29, - "learning_rate": 4.0008636258365364e-05, - "loss": 0.3504, + "learning_rate": 4.000858585919191e-05, + "loss": 0.2702, "step": 3819000 }, { "epoch": 2.29, - "learning_rate": 4.00065362928048e-05, - "loss": 0.3459, + "learning_rate": 4.000648589363134e-05, + "loss": 0.2668, "step": 3819500 }, { "epoch": 2.29, - "learning_rate": 4.000443632724424e-05, - "loss": 0.3478, + "learning_rate": 4.0004385928070776e-05, + "loss": 0.2675, "step": 3820000 }, { "epoch": 2.29, - "learning_rate": 4.0002336361683665e-05, - "loss": 0.3537, + "learning_rate": 4.000228596251021e-05, + "loss": 0.2668, "step": 3820500 }, { "epoch": 2.29, - "learning_rate": 4.00002363961231e-05, - "loss": 0.3479, + "learning_rate": 4.000019019688077e-05, + "loss": 0.2627, "step": 3821000 }, { "epoch": 2.29, - "learning_rate": 3.999813643056254e-05, - "loss": 0.3533, + "learning_rate": 3.9998090231320204e-05, + "loss": 0.2659, "step": 3821500 }, { "epoch": 2.29, - "learning_rate": 3.999603646500197e-05, - "loss": 0.3533, + "learning_rate": 3.999599026575964e-05, + "loss": 0.2691, "step": 3822000 }, { "epoch": 2.29, - "learning_rate": 3.9993936499441405e-05, - "loss": 0.3491, + "learning_rate": 3.999389030019908e-05, + "loss": 0.2645, "step": 3822500 }, { "epoch": 2.29, - "learning_rate": 3.9991840733811966e-05, - "loss": 0.3528, + "learning_rate": 3.999179453456964e-05, + "loss": 0.2715, "step": 3823000 }, { "epoch": 2.29, - "learning_rate": 3.99897407682514e-05, - "loss": 0.3425, + "learning_rate": 3.998969456900907e-05, + "loss": 0.2672, "step": 3823500 }, { "epoch": 2.29, - "learning_rate": 3.998764080269083e-05, - "loss": 0.3516, + "learning_rate": 3.99875946034485e-05, + "loss": 0.2715, "step": 3824000 }, { "epoch": 2.29, - "learning_rate": 3.998554083713027e-05, - "loss": 0.3548, + "learning_rate": 3.998549463788794e-05, + "loss": 0.2692, "step": 3824500 }, { "epoch": 2.29, - "learning_rate": 3.9983445071500826e-05, - "loss": 0.3478, + "learning_rate": 3.99833988722585e-05, + "loss": 0.2645, "step": 3825000 }, { "epoch": 2.29, - "learning_rate": 3.998134510594026e-05, - "loss": 0.3603, + "learning_rate": 3.998129890669793e-05, + "loss": 0.2726, "step": 3825500 }, { "epoch": 2.29, - "learning_rate": 3.9979245140379693e-05, - "loss": 0.355, + "learning_rate": 3.9979198941137365e-05, + "loss": 0.2687, "step": 3826000 }, { "epoch": 2.29, - "learning_rate": 3.9977145174819134e-05, - "loss": 0.3509, + "learning_rate": 3.99770989755768e-05, + "loss": 0.2666, "step": 3826500 }, { "epoch": 2.29, - "learning_rate": 3.997505360912081e-05, - "loss": 0.3489, + "learning_rate": 3.997499901001623e-05, + "loss": 0.2673, "step": 3827000 }, { "epoch": 2.29, - "learning_rate": 3.997295364356025e-05, - "loss": 0.3453, + "learning_rate": 3.9972899044455666e-05, + "loss": 0.2643, "step": 3827500 }, { "epoch": 2.3, - "learning_rate": 3.99708578779308e-05, - "loss": 0.3486, + "learning_rate": 3.9970799078895106e-05, + "loss": 0.2738, "step": 3828000 }, { "epoch": 2.3, - "learning_rate": 3.9968757912370235e-05, - "loss": 0.3484, + "learning_rate": 3.996869911333454e-05, + "loss": 0.2668, "step": 3828500 }, { "epoch": 2.3, - "learning_rate": 3.9966657946809675e-05, - "loss": 0.348, + "learning_rate": 3.996660334770509e-05, + "loss": 0.266, "step": 3829000 }, { "epoch": 2.3, - "learning_rate": 3.996455798124911e-05, - "loss": 0.344, + "learning_rate": 3.996450338214453e-05, + "loss": 0.2693, "step": 3829500 }, { "epoch": 2.3, - "learning_rate": 3.996245801568854e-05, - "loss": 0.3557, + "learning_rate": 3.996240341658397e-05, + "loss": 0.2666, "step": 3830000 }, { "epoch": 2.3, - "learning_rate": 3.996035805012798e-05, - "loss": 0.347, + "learning_rate": 3.99603034510234e-05, + "loss": 0.2668, "step": 3830500 }, { "epoch": 2.3, - "learning_rate": 3.9958258084567415e-05, - "loss": 0.3534, + "learning_rate": 3.9958207685393954e-05, + "loss": 0.2667, "step": 3831000 }, { "epoch": 2.3, - "learning_rate": 3.995616231893797e-05, - "loss": 0.3509, + "learning_rate": 3.9956107719833394e-05, + "loss": 0.2714, "step": 3831500 }, { "epoch": 2.3, - "learning_rate": 3.99540623533774e-05, - "loss": 0.3502, + "learning_rate": 3.995400775427283e-05, + "loss": 0.2643, "step": 3832000 }, { "epoch": 2.3, - "learning_rate": 3.995196238781684e-05, - "loss": 0.3657, + "learning_rate": 3.995190778871226e-05, + "loss": 0.2724, "step": 3832500 }, { "epoch": 2.3, - "learning_rate": 3.9949862422256276e-05, - "loss": 0.3452, + "learning_rate": 3.994981202308282e-05, + "loss": 0.2685, "step": 3833000 }, { "epoch": 2.3, - "learning_rate": 3.994776245669571e-05, - "loss": 0.3419, + "learning_rate": 3.9947712057522255e-05, + "loss": 0.2648, "step": 3833500 }, { "epoch": 2.3, - "learning_rate": 3.994566249113515e-05, - "loss": 0.3616, + "learning_rate": 3.994561209196169e-05, + "loss": 0.2732, "step": 3834000 }, { "epoch": 2.3, - "learning_rate": 3.9943562525574577e-05, - "loss": 0.3451, + "learning_rate": 3.994351632633225e-05, + "loss": 0.2681, "step": 3834500 }, { "epoch": 2.3, - "learning_rate": 3.994146256001401e-05, - "loss": 0.3535, + "learning_rate": 3.99414205607028e-05, + "loss": 0.2747, "step": 3835000 }, { "epoch": 2.3, - "learning_rate": 3.993936259445345e-05, - "loss": 0.3444, + "learning_rate": 3.993932059514224e-05, + "loss": 0.2653, "step": 3835500 }, { "epoch": 2.3, - "learning_rate": 3.9937262628892884e-05, - "loss": 0.3414, + "learning_rate": 3.9937220629581676e-05, + "loss": 0.2605, "step": 3836000 }, { "epoch": 2.3, - "learning_rate": 3.993516266333232e-05, - "loss": 0.354, + "learning_rate": 3.993512066402111e-05, + "loss": 0.2673, "step": 3836500 }, { "epoch": 2.3, - "learning_rate": 3.993306689770288e-05, - "loss": 0.3508, + "learning_rate": 3.993302069846055e-05, + "loss": 0.2681, "step": 3837000 }, { "epoch": 2.3, - "learning_rate": 3.993096693214231e-05, - "loss": 0.3522, + "learning_rate": 3.993092073289998e-05, + "loss": 0.2667, "step": 3837500 }, { "epoch": 2.3, - "learning_rate": 3.9928866966581744e-05, - "loss": 0.3559, + "learning_rate": 3.9928820767339416e-05, + "loss": 0.271, "step": 3838000 }, { "epoch": 2.3, - "learning_rate": 3.9926767001021185e-05, - "loss": 0.3565, + "learning_rate": 3.992672080177885e-05, + "loss": 0.2677, "step": 3838500 }, { "epoch": 2.3, - "learning_rate": 3.992466703546062e-05, - "loss": 0.3503, + "learning_rate": 3.992462083621828e-05, + "loss": 0.2695, "step": 3839000 }, { "epoch": 2.3, - "learning_rate": 3.992256706990005e-05, - "loss": 0.3566, + "learning_rate": 3.992252087065772e-05, + "loss": 0.269, "step": 3839500 }, { "epoch": 2.3, - "learning_rate": 3.992046710433949e-05, - "loss": 0.3536, + "learning_rate": 3.992042510502828e-05, + "loss": 0.2649, "step": 3840000 }, { "epoch": 2.3, - "learning_rate": 3.9918367138778925e-05, - "loss": 0.3485, + "learning_rate": 3.991832513946771e-05, + "loss": 0.2661, "step": 3840500 }, { "epoch": 2.3, - "learning_rate": 3.991627137314948e-05, - "loss": 0.3461, + "learning_rate": 3.9916225173907144e-05, + "loss": 0.2667, "step": 3841000 }, { "epoch": 2.3, - "learning_rate": 3.991417140758891e-05, - "loss": 0.3542, + "learning_rate": 3.991412520834658e-05, + "loss": 0.2678, "step": 3841500 }, { "epoch": 2.3, - "learning_rate": 3.9912075641959466e-05, - "loss": 0.3532, + "learning_rate": 3.991202524278602e-05, + "loss": 0.2711, "step": 3842000 }, { "epoch": 2.3, - "learning_rate": 3.9909975676398906e-05, - "loss": 0.3464, + "learning_rate": 3.990992527722545e-05, + "loss": 0.2645, "step": 3842500 }, { "epoch": 2.3, - "learning_rate": 3.990787571083834e-05, - "loss": 0.3538, + "learning_rate": 3.9907825311664885e-05, + "loss": 0.2676, "step": 3843000 }, { "epoch": 2.3, - "learning_rate": 3.990577574527777e-05, - "loss": 0.3547, + "learning_rate": 3.9905725346104325e-05, + "loss": 0.2677, "step": 3843500 }, { "epoch": 2.3, - "learning_rate": 3.990367577971721e-05, - "loss": 0.3596, + "learning_rate": 3.990362958047488e-05, + "loss": 0.2661, "step": 3844000 }, { "epoch": 2.3, - "learning_rate": 3.990158001408777e-05, - "loss": 0.3554, + "learning_rate": 3.990152961491431e-05, + "loss": 0.2669, "step": 3844500 }, { "epoch": 2.31, - "learning_rate": 3.98994800485272e-05, - "loss": 0.3443, + "learning_rate": 3.989942964935375e-05, + "loss": 0.2636, "step": 3845000 }, { "epoch": 2.31, - "learning_rate": 3.989738008296664e-05, - "loss": 0.3387, + "learning_rate": 3.9897329683793186e-05, + "loss": 0.2572, "step": 3845500 }, { "epoch": 2.31, - "learning_rate": 3.9895280117406074e-05, - "loss": 0.3406, + "learning_rate": 3.989522971823262e-05, + "loss": 0.262, "step": 3846000 }, { "epoch": 2.31, - "learning_rate": 3.989318015184551e-05, - "loss": 0.3449, + "learning_rate": 3.989313395260317e-05, + "loss": 0.2645, "step": 3846500 }, { "epoch": 2.31, - "learning_rate": 3.989108438621606e-05, - "loss": 0.3511, + "learning_rate": 3.989103398704261e-05, + "loss": 0.2687, "step": 3847000 }, { "epoch": 2.31, - "learning_rate": 3.98889844206555e-05, - "loss": 0.3519, + "learning_rate": 3.9888934021482046e-05, + "loss": 0.2665, "step": 3847500 }, { "epoch": 2.31, - "learning_rate": 3.9886884455094935e-05, - "loss": 0.3542, + "learning_rate": 3.988683405592148e-05, + "loss": 0.2718, "step": 3848000 }, { "epoch": 2.31, - "learning_rate": 3.988478448953437e-05, - "loss": 0.3579, + "learning_rate": 3.988473829029203e-05, + "loss": 0.2695, "step": 3848500 }, { "epoch": 2.31, - "learning_rate": 3.988268452397381e-05, - "loss": 0.3459, + "learning_rate": 3.9882638324731474e-05, + "loss": 0.269, "step": 3849000 }, { "epoch": 2.31, - "learning_rate": 3.988058875834436e-05, - "loss": 0.3642, + "learning_rate": 3.988053835917091e-05, + "loss": 0.2716, "step": 3849500 }, { "epoch": 2.31, - "learning_rate": 3.9878488792783795e-05, - "loss": 0.3511, + "learning_rate": 3.987843839361034e-05, + "loss": 0.2636, "step": 3850000 }, { "epoch": 2.31, - "learning_rate": 3.987638882722323e-05, - "loss": 0.3496, + "learning_rate": 3.98763426279809e-05, + "loss": 0.2699, "step": 3850500 }, { "epoch": 2.31, - "learning_rate": 3.987428886166267e-05, - "loss": 0.3539, + "learning_rate": 3.9874242662420334e-05, + "loss": 0.2755, "step": 3851000 }, { "epoch": 2.31, - "learning_rate": 3.98721888961021e-05, - "loss": 0.3527, + "learning_rate": 3.987214269685977e-05, + "loss": 0.2657, "step": 3851500 }, { "epoch": 2.31, - "learning_rate": 3.9870093130472656e-05, - "loss": 0.3457, + "learning_rate": 3.987004273129921e-05, + "loss": 0.2658, "step": 3852000 }, { "epoch": 2.31, - "learning_rate": 3.9867993164912096e-05, - "loss": 0.3436, + "learning_rate": 3.986794696566976e-05, + "loss": 0.2679, "step": 3852500 }, { "epoch": 2.31, - "learning_rate": 3.986589319935153e-05, - "loss": 0.35, + "learning_rate": 3.9865847000109195e-05, + "loss": 0.2664, "step": 3853000 }, { "epoch": 2.31, - "learning_rate": 3.986379323379096e-05, - "loss": 0.3437, + "learning_rate": 3.986374703454863e-05, + "loss": 0.2648, "step": 3853500 }, { "epoch": 2.31, - "learning_rate": 3.986169746816152e-05, - "loss": 0.3514, + "learning_rate": 3.986164706898807e-05, + "loss": 0.2679, "step": 3854000 }, { "epoch": 2.31, - "learning_rate": 3.985959750260096e-05, - "loss": 0.352, + "learning_rate": 3.98595471034275e-05, + "loss": 0.2671, "step": 3854500 }, { "epoch": 2.31, - "learning_rate": 3.985749753704039e-05, - "loss": 0.3473, + "learning_rate": 3.9857451337798056e-05, + "loss": 0.2704, "step": 3855000 }, { "epoch": 2.31, - "learning_rate": 3.9855397571479824e-05, - "loss": 0.3453, + "learning_rate": 3.985535137223749e-05, + "loss": 0.2651, "step": 3855500 }, { "epoch": 2.31, - "learning_rate": 3.9853297605919264e-05, - "loss": 0.3482, + "learning_rate": 3.985325560660805e-05, + "loss": 0.2689, "step": 3856000 }, { "epoch": 2.31, - "learning_rate": 3.98511976403587e-05, - "loss": 0.3567, + "learning_rate": 3.985115564104749e-05, + "loss": 0.2716, "step": 3856500 }, { "epoch": 2.31, - "learning_rate": 3.984910187472925e-05, - "loss": 0.3453, + "learning_rate": 3.984905567548692e-05, + "loss": 0.2684, "step": 3857000 }, { "epoch": 2.31, - "learning_rate": 3.9847001909168685e-05, - "loss": 0.3527, + "learning_rate": 3.984695570992636e-05, + "loss": 0.2667, "step": 3857500 }, { "epoch": 2.31, - "learning_rate": 3.9844901943608125e-05, - "loss": 0.3503, + "learning_rate": 3.984485574436579e-05, + "loss": 0.2667, "step": 3858000 }, { "epoch": 2.31, - "learning_rate": 3.984280197804756e-05, - "loss": 0.3585, + "learning_rate": 3.9842755778805224e-05, + "loss": 0.27, "step": 3858500 }, { "epoch": 2.31, - "learning_rate": 3.984070201248699e-05, - "loss": 0.3449, + "learning_rate": 3.9840655813244664e-05, + "loss": 0.2659, "step": 3859000 }, { "epoch": 2.31, - "learning_rate": 3.983860204692643e-05, - "loss": 0.3476, + "learning_rate": 3.98385558476841e-05, + "loss": 0.2652, "step": 3859500 }, { "epoch": 2.31, - "learning_rate": 3.9836502081365866e-05, - "loss": 0.356, + "learning_rate": 3.983646008205465e-05, + "loss": 0.2683, "step": 3860000 }, { "epoch": 2.31, - "learning_rate": 3.98344021158053e-05, - "loss": 0.3563, + "learning_rate": 3.9834360116494084e-05, + "loss": 0.2699, "step": 3860500 }, { "epoch": 2.31, - "learning_rate": 3.983230635017586e-05, - "loss": 0.3518, + "learning_rate": 3.9832260150933525e-05, + "loss": 0.2675, "step": 3861000 }, { "epoch": 2.32, - "learning_rate": 3.983020638461529e-05, - "loss": 0.3536, + "learning_rate": 3.983016018537296e-05, + "loss": 0.2719, "step": 3861500 }, { "epoch": 2.32, - "learning_rate": 3.9828106419054726e-05, - "loss": 0.342, + "learning_rate": 3.982806441974351e-05, + "loss": 0.2637, "step": 3862000 }, { "epoch": 2.32, - "learning_rate": 3.9826006453494167e-05, - "loss": 0.3498, + "learning_rate": 3.9825964454182945e-05, + "loss": 0.2708, "step": 3862500 }, { "epoch": 2.32, - "learning_rate": 3.982391068786472e-05, - "loss": 0.3559, + "learning_rate": 3.9823864488622385e-05, + "loss": 0.273, "step": 3863000 }, { "epoch": 2.32, - "learning_rate": 3.9821814922235274e-05, - "loss": 0.3406, + "learning_rate": 3.982176452306182e-05, + "loss": 0.2642, "step": 3863500 }, { "epoch": 2.32, - "learning_rate": 3.981971495667471e-05, - "loss": 0.3539, + "learning_rate": 3.981966875743238e-05, + "loss": 0.2706, "step": 3864000 }, { "epoch": 2.32, - "learning_rate": 3.981761499111414e-05, - "loss": 0.3463, + "learning_rate": 3.981756879187181e-05, + "loss": 0.2665, "step": 3864500 }, { "epoch": 2.32, - "learning_rate": 3.981551502555358e-05, - "loss": 0.3507, + "learning_rate": 3.9815468826311246e-05, + "loss": 0.2633, "step": 3865000 }, { "epoch": 2.32, - "learning_rate": 3.9813415059993014e-05, - "loss": 0.3422, + "learning_rate": 3.981336886075068e-05, + "loss": 0.2634, "step": 3865500 }, { "epoch": 2.32, - "learning_rate": 3.981131509443245e-05, - "loss": 0.3538, + "learning_rate": 3.981127309512124e-05, + "loss": 0.2732, "step": 3866000 }, { "epoch": 2.32, - "learning_rate": 3.980921932880301e-05, - "loss": 0.3524, + "learning_rate": 3.9809177329491793e-05, + "loss": 0.2717, "step": 3866500 }, { "epoch": 2.32, - "learning_rate": 3.980711936324244e-05, - "loss": 0.3532, + "learning_rate": 3.9807077363931234e-05, + "loss": 0.2652, "step": 3867000 }, { "epoch": 2.32, - "learning_rate": 3.9805019397681875e-05, - "loss": 0.3557, + "learning_rate": 3.980497739837067e-05, + "loss": 0.2731, "step": 3867500 }, { "epoch": 2.32, - "learning_rate": 3.9802919432121315e-05, - "loss": 0.351, + "learning_rate": 3.98028774328101e-05, + "loss": 0.2643, "step": 3868000 }, { "epoch": 2.32, - "learning_rate": 3.980081946656075e-05, - "loss": 0.3495, + "learning_rate": 3.980077746724954e-05, + "loss": 0.2649, "step": 3868500 }, { "epoch": 2.32, - "learning_rate": 3.979871950100018e-05, - "loss": 0.3436, + "learning_rate": 3.9798677501688974e-05, + "loss": 0.2645, "step": 3869000 }, { "epoch": 2.32, - "learning_rate": 3.979661953543962e-05, - "loss": 0.3451, + "learning_rate": 3.97965775361284e-05, + "loss": 0.2697, "step": 3869500 }, { "epoch": 2.32, - "learning_rate": 3.9794519569879056e-05, - "loss": 0.3499, + "learning_rate": 3.979448177049896e-05, + "loss": 0.2664, "step": 3870000 }, { "epoch": 2.32, - "learning_rate": 3.979242380424961e-05, - "loss": 0.3598, + "learning_rate": 3.97923818049384e-05, + "loss": 0.2701, "step": 3870500 }, { "epoch": 2.32, - "learning_rate": 3.979032383868904e-05, - "loss": 0.3512, + "learning_rate": 3.9790281839377835e-05, + "loss": 0.2713, "step": 3871000 }, { "epoch": 2.32, - "learning_rate": 3.978822387312848e-05, - "loss": 0.3548, + "learning_rate": 3.978818187381727e-05, + "loss": 0.2703, "step": 3871500 }, { "epoch": 2.32, - "learning_rate": 3.978612390756792e-05, - "loss": 0.3408, + "learning_rate": 3.97860819082567e-05, + "loss": 0.2646, "step": 3872000 }, { "epoch": 2.32, - "learning_rate": 3.978402394200735e-05, - "loss": 0.3399, + "learning_rate": 3.9783981942696135e-05, + "loss": 0.2624, "step": 3872500 }, { "epoch": 2.32, - "learning_rate": 3.978192397644679e-05, - "loss": 0.3651, + "learning_rate": 3.9781881977135576e-05, + "loss": 0.2771, "step": 3873000 }, { "epoch": 2.32, - "learning_rate": 3.9779828210817344e-05, - "loss": 0.3613, + "learning_rate": 3.977978201157501e-05, + "loss": 0.2725, "step": 3873500 }, { "epoch": 2.32, - "learning_rate": 3.977772824525678e-05, - "loss": 0.344, + "learning_rate": 3.977768624594556e-05, + "loss": 0.2643, "step": 3874000 }, { "epoch": 2.32, - "learning_rate": 3.977562827969622e-05, - "loss": 0.356, + "learning_rate": 3.9775586280384996e-05, + "loss": 0.266, "step": 3874500 }, { "epoch": 2.32, - "learning_rate": 3.977352831413565e-05, - "loss": 0.3515, + "learning_rate": 3.9773486314824436e-05, + "loss": 0.2687, "step": 3875000 }, { "epoch": 2.32, - "learning_rate": 3.9771428348575085e-05, - "loss": 0.3458, + "learning_rate": 3.977138634926387e-05, + "loss": 0.2615, "step": 3875500 }, { "epoch": 2.32, - "learning_rate": 3.976932838301452e-05, - "loss": 0.3525, + "learning_rate": 3.97692863837033e-05, + "loss": 0.2646, "step": 3876000 }, { "epoch": 2.32, - "learning_rate": 3.976722841745395e-05, - "loss": 0.34, + "learning_rate": 3.976719061807386e-05, + "loss": 0.2636, "step": 3876500 }, { "epoch": 2.32, - "learning_rate": 3.9765128451893385e-05, - "loss": 0.344, + "learning_rate": 3.97650906525133e-05, + "loss": 0.2688, "step": 3877000 }, { "epoch": 2.32, - "learning_rate": 3.9763032686263945e-05, - "loss": 0.3519, + "learning_rate": 3.976299068695273e-05, + "loss": 0.2665, "step": 3877500 }, { "epoch": 2.33, - "learning_rate": 3.976093272070338e-05, - "loss": 0.351, + "learning_rate": 3.976089072139217e-05, + "loss": 0.2665, "step": 3878000 }, { "epoch": 2.33, - "learning_rate": 3.975883275514281e-05, - "loss": 0.3521, + "learning_rate": 3.975879495576273e-05, + "loss": 0.269, "step": 3878500 }, { "epoch": 2.33, - "learning_rate": 3.9756732789582246e-05, - "loss": 0.3453, + "learning_rate": 3.975669499020216e-05, + "loss": 0.2665, "step": 3879000 }, { "epoch": 2.33, - "learning_rate": 3.9754641223883926e-05, - "loss": 0.3461, + "learning_rate": 3.975459502464159e-05, + "loss": 0.2714, "step": 3879500 }, { "epoch": 2.33, - "learning_rate": 3.975254125832336e-05, - "loss": 0.3556, + "learning_rate": 3.975249505908103e-05, + "loss": 0.2657, "step": 3880000 }, { "epoch": 2.33, - "learning_rate": 3.97504412927628e-05, - "loss": 0.3558, + "learning_rate": 3.975039929345159e-05, + "loss": 0.2717, "step": 3880500 }, { "epoch": 2.33, - "learning_rate": 3.974834132720223e-05, - "loss": 0.3527, + "learning_rate": 3.974829932789102e-05, + "loss": 0.2697, "step": 3881000 }, { "epoch": 2.33, - "learning_rate": 3.9746241361641674e-05, - "loss": 0.3517, + "learning_rate": 3.974619936233045e-05, + "loss": 0.2709, "step": 3881500 }, { "epoch": 2.33, - "learning_rate": 3.974414139608111e-05, - "loss": 0.3579, + "learning_rate": 3.974409939676989e-05, + "loss": 0.2667, "step": 3882000 }, { "epoch": 2.33, - "learning_rate": 3.974204563045166e-05, - "loss": 0.3572, + "learning_rate": 3.9741999431209326e-05, + "loss": 0.2637, "step": 3882500 }, { "epoch": 2.33, - "learning_rate": 3.9739945664891094e-05, - "loss": 0.3539, + "learning_rate": 3.9739903665579886e-05, + "loss": 0.268, "step": 3883000 }, { "epoch": 2.33, - "learning_rate": 3.9737845699330534e-05, - "loss": 0.3415, + "learning_rate": 3.973780370001932e-05, + "loss": 0.2632, "step": 3883500 }, { "epoch": 2.33, - "learning_rate": 3.973574993370109e-05, - "loss": 0.3637, + "learning_rate": 3.973570373445875e-05, + "loss": 0.2713, "step": 3884000 }, { "epoch": 2.33, - "learning_rate": 3.973364996814052e-05, - "loss": 0.3571, + "learning_rate": 3.973360796882931e-05, + "loss": 0.2727, "step": 3884500 }, { "epoch": 2.33, - "learning_rate": 3.9731550002579955e-05, - "loss": 0.3501, + "learning_rate": 3.973150800326875e-05, + "loss": 0.2716, "step": 3885000 }, { "epoch": 2.33, - "learning_rate": 3.9729450037019395e-05, - "loss": 0.3578, + "learning_rate": 3.972940803770819e-05, + "loss": 0.2676, "step": 3885500 }, { "epoch": 2.33, - "learning_rate": 3.972735007145883e-05, - "loss": 0.3552, + "learning_rate": 3.9727308072147614e-05, + "loss": 0.2676, "step": 3886000 }, { "epoch": 2.33, - "learning_rate": 3.972525010589826e-05, - "loss": 0.3528, + "learning_rate": 3.972520810658705e-05, + "loss": 0.2661, "step": 3886500 }, { "epoch": 2.33, - "learning_rate": 3.97231501403377e-05, - "loss": 0.3538, + "learning_rate": 3.972311234095761e-05, + "loss": 0.2684, "step": 3887000 }, { "epoch": 2.33, - "learning_rate": 3.9721050174777136e-05, - "loss": 0.3501, + "learning_rate": 3.972101237539705e-05, + "loss": 0.2628, "step": 3887500 }, { "epoch": 2.33, - "learning_rate": 3.971895440914769e-05, - "loss": 0.3556, + "learning_rate": 3.97189166097676e-05, + "loss": 0.2772, "step": 3888000 }, { "epoch": 2.33, - "learning_rate": 3.971685444358713e-05, - "loss": 0.3459, + "learning_rate": 3.9716816644207035e-05, + "loss": 0.2655, "step": 3888500 }, { "epoch": 2.33, - "learning_rate": 3.971475447802656e-05, - "loss": 0.357, + "learning_rate": 3.971471667864647e-05, + "loss": 0.2695, "step": 3889000 }, { "epoch": 2.33, - "learning_rate": 3.9712654512465996e-05, - "loss": 0.3497, + "learning_rate": 3.971261671308591e-05, + "loss": 0.2679, "step": 3889500 }, { "epoch": 2.33, - "learning_rate": 3.971055454690543e-05, - "loss": 0.3621, + "learning_rate": 3.971051674752534e-05, + "loss": 0.2689, "step": 3890000 }, { "epoch": 2.33, - "learning_rate": 3.970845878127599e-05, - "loss": 0.359, + "learning_rate": 3.9708416781964775e-05, + "loss": 0.2753, "step": 3890500 }, { "epoch": 2.33, - "learning_rate": 3.9706358815715424e-05, - "loss": 0.3517, + "learning_rate": 3.970631681640421e-05, + "loss": 0.2671, "step": 3891000 }, { "epoch": 2.33, - "learning_rate": 3.970425885015486e-05, - "loss": 0.3464, + "learning_rate": 3.970421685084364e-05, + "loss": 0.2696, "step": 3891500 }, { "epoch": 2.33, - "learning_rate": 3.97021588845943e-05, - "loss": 0.3558, + "learning_rate": 3.970211688528308e-05, + "loss": 0.2666, "step": 3892000 }, { "epoch": 2.33, - "learning_rate": 3.9700058919033724e-05, - "loss": 0.3536, + "learning_rate": 3.9700016919722516e-05, + "loss": 0.265, "step": 3892500 }, { "epoch": 2.33, - "learning_rate": 3.969795895347316e-05, - "loss": 0.3472, + "learning_rate": 3.969791695416195e-05, + "loss": 0.2664, "step": 3893000 }, { "epoch": 2.33, - "learning_rate": 3.96958589879126e-05, - "loss": 0.3456, + "learning_rate": 3.969581698860139e-05, + "loss": 0.265, "step": 3893500 }, { "epoch": 2.33, - "learning_rate": 3.969375902235203e-05, - "loss": 0.3413, + "learning_rate": 3.969372542290306e-05, + "loss": 0.2641, "step": 3894000 }, { "epoch": 2.33, - "learning_rate": 3.969166745665371e-05, - "loss": 0.3457, + "learning_rate": 3.9691625457342504e-05, + "loss": 0.2611, "step": 3894500 }, { "epoch": 2.34, - "learning_rate": 3.9689567491093145e-05, - "loss": 0.3418, + "learning_rate": 3.968952549178194e-05, + "loss": 0.266, "step": 3895000 }, { "epoch": 2.34, - "learning_rate": 3.9687467525532585e-05, - "loss": 0.3556, + "learning_rate": 3.9687425526221364e-05, + "loss": 0.2736, "step": 3895500 }, { "epoch": 2.34, - "learning_rate": 3.968536755997202e-05, - "loss": 0.3569, + "learning_rate": 3.9685325560660804e-05, + "loss": 0.2708, "step": 3896000 }, { "epoch": 2.34, - "learning_rate": 3.968326759441145e-05, - "loss": 0.3508, + "learning_rate": 3.968322559510024e-05, + "loss": 0.2706, "step": 3896500 }, { "epoch": 2.34, - "learning_rate": 3.968116762885089e-05, - "loss": 0.3414, + "learning_rate": 3.968112562953967e-05, + "loss": 0.2612, "step": 3897000 }, { "epoch": 2.34, - "learning_rate": 3.967906766329032e-05, - "loss": 0.3505, + "learning_rate": 3.967902566397911e-05, + "loss": 0.2647, "step": 3897500 }, { "epoch": 2.34, - "learning_rate": 3.967696769772975e-05, - "loss": 0.3514, + "learning_rate": 3.9676929898349665e-05, + "loss": 0.2647, "step": 3898000 }, { "epoch": 2.34, - "learning_rate": 3.967487613203143e-05, - "loss": 0.3548, + "learning_rate": 3.96748299327891e-05, + "loss": 0.2667, "step": 3898500 }, { "epoch": 2.34, - "learning_rate": 3.9672776166470866e-05, - "loss": 0.3397, + "learning_rate": 3.967272996722854e-05, + "loss": 0.2633, "step": 3899000 }, { "epoch": 2.34, - "learning_rate": 3.967067620091031e-05, - "loss": 0.3488, + "learning_rate": 3.967063000166797e-05, + "loss": 0.2661, "step": 3899500 }, { "epoch": 2.34, - "learning_rate": 3.966857623534974e-05, - "loss": 0.3551, + "learning_rate": 3.966853423603853e-05, + "loss": 0.2696, "step": 3900000 }, { "epoch": 2.34, - "eval_loss": 0.34067773818969727, - "eval_runtime": 1122.4626, - "eval_samples_per_second": 469.254, - "eval_steps_per_second": 78.209, + "eval_loss": 0.24585743248462677, + "eval_runtime": 1453.3301, + "eval_samples_per_second": 362.423, + "eval_steps_per_second": 60.404, "step": 3900000 }, { "epoch": 2.34, - "learning_rate": 3.9666476269789174e-05, - "loss": 0.3501, + "learning_rate": 3.966643427047796e-05, + "loss": 0.2671, "step": 3900500 }, { "epoch": 2.34, - "learning_rate": 3.9664376304228614e-05, - "loss": 0.347, + "learning_rate": 3.96643343049174e-05, + "loss": 0.2694, "step": 3901000 }, { "epoch": 2.34, - "learning_rate": 3.966227633866805e-05, - "loss": 0.348, + "learning_rate": 3.966223433935683e-05, + "loss": 0.2649, "step": 3901500 }, { "epoch": 2.34, - "learning_rate": 3.966017637310748e-05, - "loss": 0.3535, + "learning_rate": 3.9660134373796266e-05, + "loss": 0.2733, "step": 3902000 }, { "epoch": 2.34, - "learning_rate": 3.965808060747804e-05, - "loss": 0.3533, + "learning_rate": 3.965803860816682e-05, + "loss": 0.2689, "step": 3902500 }, { "epoch": 2.34, - "learning_rate": 3.9655980641917475e-05, - "loss": 0.3552, + "learning_rate": 3.965593864260626e-05, + "loss": 0.2671, "step": 3903000 }, { "epoch": 2.34, - "learning_rate": 3.965388487628803e-05, - "loss": 0.3576, + "learning_rate": 3.965383867704569e-05, + "loss": 0.2694, "step": 3903500 }, { "epoch": 2.34, - "learning_rate": 3.965178491072746e-05, - "loss": 0.3488, + "learning_rate": 3.965173871148513e-05, + "loss": 0.2679, "step": 3904000 }, { "epoch": 2.34, - "learning_rate": 3.96496849451669e-05, - "loss": 0.3517, + "learning_rate": 3.964964294585569e-05, + "loss": 0.2691, "step": 3904500 }, { "epoch": 2.34, - "learning_rate": 3.9647584979606335e-05, - "loss": 0.3463, + "learning_rate": 3.964754298029512e-05, + "loss": 0.2601, "step": 3905000 }, { "epoch": 2.34, - "learning_rate": 3.964548501404577e-05, - "loss": 0.3451, + "learning_rate": 3.9645443014734554e-05, + "loss": 0.2682, "step": 3905500 }, { "epoch": 2.34, - "learning_rate": 3.964338924841632e-05, - "loss": 0.35, + "learning_rate": 3.9643343049173994e-05, + "loss": 0.2685, "step": 3906000 }, { "epoch": 2.34, - "learning_rate": 3.964129348278688e-05, - "loss": 0.3516, + "learning_rate": 3.964124308361343e-05, + "loss": 0.2746, "step": 3906500 }, { "epoch": 2.34, - "learning_rate": 3.9639193517226316e-05, - "loss": 0.3464, + "learning_rate": 3.963914731798399e-05, + "loss": 0.2677, "step": 3907000 }, { "epoch": 2.34, - "learning_rate": 3.963709355166575e-05, - "loss": 0.3483, + "learning_rate": 3.9637047352423415e-05, + "loss": 0.2661, "step": 3907500 }, { "epoch": 2.34, - "learning_rate": 3.963499358610519e-05, - "loss": 0.3477, + "learning_rate": 3.9634947386862855e-05, + "loss": 0.267, "step": 3908000 }, { "epoch": 2.34, - "learning_rate": 3.963289362054462e-05, - "loss": 0.3448, + "learning_rate": 3.963284742130229e-05, + "loss": 0.2683, "step": 3908500 }, { "epoch": 2.34, - "learning_rate": 3.963079365498406e-05, - "loss": 0.3605, + "learning_rate": 3.963075165567285e-05, + "loss": 0.2733, "step": 3909000 }, { "epoch": 2.34, - "learning_rate": 3.96286936894235e-05, - "loss": 0.3459, + "learning_rate": 3.962865169011228e-05, + "loss": 0.2694, "step": 3909500 }, { "epoch": 2.34, - "learning_rate": 3.962659372386293e-05, - "loss": 0.3468, + "learning_rate": 3.9626551724551716e-05, + "loss": 0.2627, "step": 3910000 }, { "epoch": 2.34, - "learning_rate": 3.9624493758302364e-05, - "loss": 0.3456, + "learning_rate": 3.962445175899115e-05, + "loss": 0.2635, "step": 3910500 }, { "epoch": 2.34, - "learning_rate": 3.9622393792741804e-05, - "loss": 0.3456, + "learning_rate": 3.962235179343058e-05, + "loss": 0.267, "step": 3911000 }, { "epoch": 2.35, - "learning_rate": 3.962029802711236e-05, - "loss": 0.3562, + "learning_rate": 3.962025602780114e-05, + "loss": 0.2749, "step": 3911500 }, { "epoch": 2.35, - "learning_rate": 3.961819806155179e-05, - "loss": 0.3563, + "learning_rate": 3.9618156062240576e-05, + "loss": 0.2656, "step": 3912000 }, { "epoch": 2.35, - "learning_rate": 3.9616098095991225e-05, - "loss": 0.3485, + "learning_rate": 3.961605609668001e-05, + "loss": 0.2654, "step": 3912500 }, { "epoch": 2.35, - "learning_rate": 3.9613998130430665e-05, - "loss": 0.3501, + "learning_rate": 3.961396033105057e-05, + "loss": 0.2654, "step": 3913000 }, { "epoch": 2.35, - "learning_rate": 3.96118981648701e-05, - "loss": 0.3481, + "learning_rate": 3.961186036549001e-05, + "loss": 0.2656, "step": 3913500 }, { "epoch": 2.35, - "learning_rate": 3.9609798199309525e-05, - "loss": 0.3474, + "learning_rate": 3.9609760399929444e-05, + "loss": 0.2689, "step": 3914000 }, { "epoch": 2.35, - "learning_rate": 3.9607702433680085e-05, - "loss": 0.3481, + "learning_rate": 3.960766043436887e-05, + "loss": 0.2743, "step": 3914500 }, { "epoch": 2.35, - "learning_rate": 3.9605602468119526e-05, - "loss": 0.3499, + "learning_rate": 3.960556466873943e-05, + "loss": 0.2645, "step": 3915000 }, { "epoch": 2.35, - "learning_rate": 3.960350250255896e-05, - "loss": 0.3517, + "learning_rate": 3.960346470317887e-05, + "loss": 0.2685, "step": 3915500 }, { "epoch": 2.35, - "learning_rate": 3.960140673692951e-05, - "loss": 0.3451, + "learning_rate": 3.9601364737618305e-05, + "loss": 0.2637, "step": 3916000 }, { "epoch": 2.35, - "learning_rate": 3.959930677136895e-05, - "loss": 0.3541, + "learning_rate": 3.959926477205774e-05, + "loss": 0.2686, "step": 3916500 }, { "epoch": 2.35, - "learning_rate": 3.9597206805808386e-05, - "loss": 0.353, + "learning_rate": 3.959716480649717e-05, + "loss": 0.2659, "step": 3917000 }, { "epoch": 2.35, - "learning_rate": 3.959510684024782e-05, - "loss": 0.3535, + "learning_rate": 3.9595064840936605e-05, + "loss": 0.2646, "step": 3917500 }, { "epoch": 2.35, - "learning_rate": 3.959300687468726e-05, - "loss": 0.3518, + "learning_rate": 3.959296487537604e-05, + "loss": 0.2701, "step": 3918000 }, { "epoch": 2.35, - "learning_rate": 3.9590906909126694e-05, - "loss": 0.3546, + "learning_rate": 3.959086490981548e-05, + "loss": 0.2702, "step": 3918500 }, { "epoch": 2.35, - "learning_rate": 3.958880694356612e-05, - "loss": 0.3445, + "learning_rate": 3.958876494425491e-05, + "loss": 0.2659, "step": 3919000 }, { "epoch": 2.35, - "learning_rate": 3.958670697800556e-05, - "loss": 0.3539, + "learning_rate": 3.9586669178625466e-05, + "loss": 0.2689, "step": 3919500 }, { "epoch": 2.35, - "learning_rate": 3.9584607012444994e-05, - "loss": 0.3396, + "learning_rate": 3.9584569213064906e-05, + "loss": 0.2631, "step": 3920000 }, { "epoch": 2.35, - "learning_rate": 3.958250704688443e-05, - "loss": 0.3458, + "learning_rate": 3.9582473447435466e-05, + "loss": 0.2661, "step": 3920500 }, { "epoch": 2.35, - "learning_rate": 3.958040708132387e-05, - "loss": 0.3515, + "learning_rate": 3.95803734818749e-05, + "loss": 0.2699, "step": 3921000 }, { "epoch": 2.35, - "learning_rate": 3.957831131569442e-05, - "loss": 0.3507, + "learning_rate": 3.957827351631433e-05, + "loss": 0.2706, "step": 3921500 }, { "epoch": 2.35, - "learning_rate": 3.9576211350133855e-05, - "loss": 0.354, + "learning_rate": 3.957617355075377e-05, + "loss": 0.2708, "step": 3922000 }, { "epoch": 2.35, - "learning_rate": 3.957411138457329e-05, - "loss": 0.3522, + "learning_rate": 3.95740735851932e-05, + "loss": 0.2709, "step": 3922500 }, { "epoch": 2.35, - "learning_rate": 3.957201141901273e-05, - "loss": 0.3609, + "learning_rate": 3.957197781956376e-05, + "loss": 0.2698, "step": 3923000 }, { "epoch": 2.35, - "learning_rate": 3.956991145345216e-05, - "loss": 0.3476, + "learning_rate": 3.9569877854003194e-05, + "loss": 0.27, "step": 3923500 }, { "epoch": 2.35, - "learning_rate": 3.9567811487891595e-05, - "loss": 0.3571, + "learning_rate": 3.956777788844263e-05, + "loss": 0.2733, "step": 3924000 }, { "epoch": 2.35, - "learning_rate": 3.9565711522331035e-05, - "loss": 0.3518, + "learning_rate": 3.956567792288206e-05, + "loss": 0.2636, "step": 3924500 }, { "epoch": 2.35, - "learning_rate": 3.956361155677047e-05, - "loss": 0.3518, + "learning_rate": 3.9563577957321494e-05, + "loss": 0.2689, "step": 3925000 }, { "epoch": 2.35, - "learning_rate": 3.956151579114102e-05, - "loss": 0.3496, + "learning_rate": 3.9561477991760935e-05, + "loss": 0.2634, "step": 3925500 }, { "epoch": 2.35, - "learning_rate": 3.955941582558046e-05, - "loss": 0.3601, + "learning_rate": 3.955937802620037e-05, + "loss": 0.2692, "step": 3926000 }, { "epoch": 2.35, - "learning_rate": 3.9557315860019896e-05, - "loss": 0.3504, + "learning_rate": 3.95572780606398e-05, + "loss": 0.2694, "step": 3926500 }, { "epoch": 2.35, - "learning_rate": 3.955521589445933e-05, - "loss": 0.3587, + "learning_rate": 3.955518649494148e-05, + "loss": 0.2704, "step": 3927000 }, { "epoch": 2.35, - "learning_rate": 3.955311592889877e-05, - "loss": 0.3585, + "learning_rate": 3.955308652938092e-05, + "loss": 0.2703, "step": 3927500 }, { "epoch": 2.35, - "learning_rate": 3.95510159633382e-05, - "loss": 0.3505, + "learning_rate": 3.9550986563820356e-05, + "loss": 0.2626, "step": 3928000 }, { "epoch": 2.36, - "learning_rate": 3.954891599777764e-05, - "loss": 0.3371, + "learning_rate": 3.954888659825979e-05, + "loss": 0.259, "step": 3928500 }, { "epoch": 2.36, - "learning_rate": 3.954681603221707e-05, - "loss": 0.3514, + "learning_rate": 3.954678663269922e-05, + "loss": 0.2703, "step": 3929000 }, { "epoch": 2.36, - "learning_rate": 3.954472026658763e-05, - "loss": 0.3515, + "learning_rate": 3.9544686667138656e-05, + "loss": 0.2702, "step": 3929500 }, { "epoch": 2.36, - "learning_rate": 3.9542620301027064e-05, - "loss": 0.345, + "learning_rate": 3.954258670157809e-05, + "loss": 0.2638, "step": 3930000 }, { "epoch": 2.36, - "learning_rate": 3.954052453539762e-05, - "loss": 0.3461, + "learning_rate": 3.954049093594865e-05, + "loss": 0.2624, "step": 3930500 }, { "epoch": 2.36, - "learning_rate": 3.953842456983705e-05, - "loss": 0.3539, + "learning_rate": 3.953839097038809e-05, + "loss": 0.2722, "step": 3931000 }, { "epoch": 2.36, - "learning_rate": 3.953632880420761e-05, - "loss": 0.3468, + "learning_rate": 3.953629100482752e-05, + "loss": 0.2675, "step": 3931500 }, { "epoch": 2.36, - "learning_rate": 3.9534228838647045e-05, - "loss": 0.3464, + "learning_rate": 3.953419103926695e-05, + "loss": 0.269, "step": 3932000 }, { "epoch": 2.36, - "learning_rate": 3.953212887308648e-05, - "loss": 0.3628, + "learning_rate": 3.953209107370639e-05, + "loss": 0.2667, "step": 3932500 }, { "epoch": 2.36, - "learning_rate": 3.953002890752592e-05, - "loss": 0.3537, + "learning_rate": 3.9529991108145824e-05, + "loss": 0.266, "step": 3933000 }, { "epoch": 2.36, - "learning_rate": 3.952792894196535e-05, - "loss": 0.3411, + "learning_rate": 3.952789114258526e-05, + "loss": 0.262, "step": 3933500 }, { "epoch": 2.36, - "learning_rate": 3.9525828976404786e-05, - "loss": 0.3518, + "learning_rate": 3.95257911770247e-05, + "loss": 0.2676, "step": 3934000 }, { "epoch": 2.36, - "learning_rate": 3.9523729010844226e-05, - "loss": 0.3458, + "learning_rate": 3.952369541139525e-05, + "loss": 0.2681, "step": 3934500 }, { "epoch": 2.36, - "learning_rate": 3.952162904528366e-05, - "loss": 0.3437, + "learning_rate": 3.9521595445834685e-05, + "loss": 0.2663, "step": 3935000 }, { "epoch": 2.36, - "learning_rate": 3.951952907972309e-05, - "loss": 0.3541, + "learning_rate": 3.9519495480274125e-05, + "loss": 0.2712, "step": 3935500 }, { "epoch": 2.36, - "learning_rate": 3.9517429114162526e-05, - "loss": 0.3611, + "learning_rate": 3.951739551471356e-05, + "loss": 0.2733, "step": 3936000 }, { "epoch": 2.36, - "learning_rate": 3.9515333348533086e-05, - "loss": 0.3498, + "learning_rate": 3.951529974908411e-05, + "loss": 0.266, "step": 3936500 }, { "epoch": 2.36, - "learning_rate": 3.951323338297252e-05, - "loss": 0.3528, + "learning_rate": 3.9513199783523545e-05, + "loss": 0.2679, "step": 3937000 }, { "epoch": 2.36, - "learning_rate": 3.9511133417411953e-05, - "loss": 0.3601, + "learning_rate": 3.9511099817962986e-05, + "loss": 0.267, "step": 3937500 }, { "epoch": 2.36, - "learning_rate": 3.9509033451851394e-05, - "loss": 0.3596, + "learning_rate": 3.950900825226466e-05, + "loss": 0.2713, "step": 3938000 }, { "epoch": 2.36, - "learning_rate": 3.950693348629082e-05, - "loss": 0.3563, + "learning_rate": 3.95069082867041e-05, + "loss": 0.2711, "step": 3938500 }, { "epoch": 2.36, - "learning_rate": 3.9504833520730254e-05, - "loss": 0.3442, + "learning_rate": 3.950480832114353e-05, + "loss": 0.2663, "step": 3939000 }, { "epoch": 2.36, - "learning_rate": 3.9502737755100814e-05, - "loss": 0.3475, + "learning_rate": 3.9502708355582967e-05, + "loss": 0.2679, "step": 3939500 }, { "epoch": 2.36, - "learning_rate": 3.9500637789540254e-05, - "loss": 0.3545, + "learning_rate": 3.950060839002241e-05, + "loss": 0.2688, "step": 3940000 }, { "epoch": 2.36, - "learning_rate": 3.949853782397969e-05, - "loss": 0.3545, + "learning_rate": 3.949850842446184e-05, + "loss": 0.2662, "step": 3940500 }, { "epoch": 2.36, - "learning_rate": 3.949643785841912e-05, - "loss": 0.356, + "learning_rate": 3.9496408458901274e-05, + "loss": 0.2699, "step": 3941000 }, { "epoch": 2.36, - "learning_rate": 3.9494337892858555e-05, - "loss": 0.3483, + "learning_rate": 3.9494312693271834e-05, + "loss": 0.2722, "step": 3941500 }, { "epoch": 2.36, - "learning_rate": 3.949223792729799e-05, - "loss": 0.3519, + "learning_rate": 3.949221272771127e-05, + "loss": 0.2716, "step": 3942000 }, { "epoch": 2.36, - "learning_rate": 3.949013796173743e-05, - "loss": 0.3496, + "learning_rate": 3.94901127621507e-05, + "loss": 0.265, "step": 3942500 }, { "epoch": 2.36, - "learning_rate": 3.948803799617686e-05, - "loss": 0.3531, + "learning_rate": 3.9488012796590134e-05, + "loss": 0.2682, "step": 3943000 }, { "epoch": 2.36, - "learning_rate": 3.9485942230547415e-05, - "loss": 0.3455, + "learning_rate": 3.948591283102957e-05, + "loss": 0.2688, "step": 3943500 }, { "epoch": 2.36, - "learning_rate": 3.948384226498685e-05, - "loss": 0.3575, + "learning_rate": 3.9483812865469e-05, + "loss": 0.2716, "step": 3944000 }, { "epoch": 2.36, - "learning_rate": 3.948174229942629e-05, - "loss": 0.3419, + "learning_rate": 3.948171289990844e-05, + "loss": 0.2602, "step": 3944500 }, { "epoch": 2.37, - "learning_rate": 3.947964233386572e-05, - "loss": 0.3461, + "learning_rate": 3.9479612934347875e-05, + "loss": 0.2661, "step": 3945000 }, { "epoch": 2.37, - "learning_rate": 3.94775507681674e-05, - "loss": 0.3508, + "learning_rate": 3.947751296878731e-05, + "loss": 0.2701, "step": 3945500 }, { "epoch": 2.37, - "learning_rate": 3.9475450802606837e-05, - "loss": 0.3429, + "learning_rate": 3.947541300322675e-05, + "loss": 0.2666, "step": 3946000 }, { "epoch": 2.37, - "learning_rate": 3.947335083704628e-05, - "loss": 0.3357, + "learning_rate": 3.947331303766618e-05, + "loss": 0.2602, "step": 3946500 }, { "epoch": 2.37, - "learning_rate": 3.947125087148571e-05, - "loss": 0.3553, + "learning_rate": 3.9471213072105616e-05, + "loss": 0.2678, "step": 3947000 }, { "epoch": 2.37, - "learning_rate": 3.9469150905925144e-05, - "loss": 0.3509, + "learning_rate": 3.946911730647617e-05, + "loss": 0.2654, "step": 3947500 }, { "epoch": 2.37, - "learning_rate": 3.946705094036458e-05, - "loss": 0.3658, + "learning_rate": 3.946701734091561e-05, + "loss": 0.2684, "step": 3948000 }, { "epoch": 2.37, - "learning_rate": 3.946495097480401e-05, - "loss": 0.3393, + "learning_rate": 3.946491737535504e-05, + "loss": 0.2611, "step": 3948500 }, { "epoch": 2.37, - "learning_rate": 3.946285520917457e-05, - "loss": 0.3584, + "learning_rate": 3.946281740979448e-05, + "loss": 0.2707, "step": 3949000 }, { "epoch": 2.37, - "learning_rate": 3.9460755243614004e-05, - "loss": 0.3447, + "learning_rate": 3.9460717444233917e-05, + "loss": 0.265, "step": 3949500 }, { "epoch": 2.37, - "learning_rate": 3.9458655278053445e-05, - "loss": 0.3487, + "learning_rate": 3.945861747867335e-05, + "loss": 0.2617, "step": 3950000 }, { "epoch": 2.37, - "learning_rate": 3.945655531249287e-05, - "loss": 0.3528, + "learning_rate": 3.9456521713043904e-05, + "loss": 0.2669, "step": 3950500 }, { "epoch": 2.37, - "learning_rate": 3.9454455346932305e-05, - "loss": 0.3457, + "learning_rate": 3.9454421747483344e-05, + "loss": 0.268, "step": 3951000 }, { "epoch": 2.37, - "learning_rate": 3.9452355381371745e-05, - "loss": 0.3559, + "learning_rate": 3.945232178192278e-05, + "loss": 0.2697, "step": 3951500 }, { "epoch": 2.37, - "learning_rate": 3.945025541581118e-05, - "loss": 0.3548, + "learning_rate": 3.945022181636221e-05, + "loss": 0.2706, "step": 3952000 }, { "epoch": 2.37, - "learning_rate": 3.944815545025061e-05, - "loss": 0.3541, + "learning_rate": 3.9448126050732764e-05, + "loss": 0.2704, "step": 3952500 }, { "epoch": 2.37, - "learning_rate": 3.944605968462117e-05, - "loss": 0.3411, + "learning_rate": 3.9446026085172205e-05, + "loss": 0.2596, "step": 3953000 }, { "epoch": 2.37, - "learning_rate": 3.9443959719060606e-05, - "loss": 0.3493, + "learning_rate": 3.944392611961164e-05, + "loss": 0.2661, "step": 3953500 }, { "epoch": 2.37, - "learning_rate": 3.944185975350004e-05, - "loss": 0.3549, + "learning_rate": 3.944182615405107e-05, + "loss": 0.2678, "step": 3954000 }, { "epoch": 2.37, - "learning_rate": 3.943975978793948e-05, - "loss": 0.3547, + "learning_rate": 3.9439730388421625e-05, + "loss": 0.2718, "step": 3954500 }, { "epoch": 2.37, - "learning_rate": 3.943766402231004e-05, - "loss": 0.3514, + "learning_rate": 3.9437630422861065e-05, + "loss": 0.2648, "step": 3955000 }, { "epoch": 2.37, - "learning_rate": 3.9435564056749466e-05, - "loss": 0.3535, + "learning_rate": 3.94355304573005e-05, + "loss": 0.2713, "step": 3955500 }, { "epoch": 2.37, - "learning_rate": 3.94334640911889e-05, - "loss": 0.3424, + "learning_rate": 3.943343049173994e-05, + "loss": 0.2651, "step": 3956000 }, { "epoch": 2.37, - "learning_rate": 3.943136832555946e-05, - "loss": 0.3518, + "learning_rate": 3.943133472611049e-05, + "loss": 0.2696, "step": 3956500 }, { "epoch": 2.37, - "learning_rate": 3.94292683599989e-05, - "loss": 0.343, + "learning_rate": 3.9429234760549926e-05, + "loss": 0.26, "step": 3957000 }, { "epoch": 2.37, - "learning_rate": 3.942716839443833e-05, - "loss": 0.3416, + "learning_rate": 3.942713479498936e-05, + "loss": 0.2623, "step": 3957500 }, { "epoch": 2.37, - "learning_rate": 3.942506842887776e-05, - "loss": 0.3451, + "learning_rate": 3.94250348294288e-05, + "loss": 0.2691, "step": 3958000 }, { "epoch": 2.37, - "learning_rate": 3.94229684633172e-05, - "loss": 0.3552, + "learning_rate": 3.942293906379935e-05, + "loss": 0.2714, "step": 3958500 }, { "epoch": 2.37, - "learning_rate": 3.942087269768776e-05, - "loss": 0.3449, + "learning_rate": 3.942083909823879e-05, + "loss": 0.2692, "step": 3959000 }, { "epoch": 2.37, - "learning_rate": 3.9418772732127195e-05, - "loss": 0.3464, + "learning_rate": 3.941873913267822e-05, + "loss": 0.263, "step": 3959500 }, { "epoch": 2.37, - "learning_rate": 3.941667276656663e-05, - "loss": 0.3471, + "learning_rate": 3.941663916711766e-05, + "loss": 0.2653, "step": 3960000 }, { "epoch": 2.37, - "learning_rate": 3.941457280100606e-05, - "loss": 0.346, + "learning_rate": 3.9414543401488214e-05, + "loss": 0.2603, "step": 3960500 }, { "epoch": 2.37, - "learning_rate": 3.9412472835445495e-05, - "loss": 0.3494, + "learning_rate": 3.9412447635858774e-05, + "loss": 0.266, "step": 3961000 }, { "epoch": 2.38, - "learning_rate": 3.9410372869884935e-05, - "loss": 0.3513, + "learning_rate": 3.941034767029821e-05, + "loss": 0.2683, "step": 3961500 }, { "epoch": 2.38, - "learning_rate": 3.940827290432437e-05, - "loss": 0.3465, + "learning_rate": 3.940824770473765e-05, + "loss": 0.2622, "step": 3962000 }, { "epoch": 2.38, - "learning_rate": 3.94061729387638e-05, - "loss": 0.3447, + "learning_rate": 3.9406147739177075e-05, + "loss": 0.2607, "step": 3962500 }, { "epoch": 2.38, - "learning_rate": 3.9404077173134356e-05, - "loss": 0.3589, + "learning_rate": 3.940404777361651e-05, + "loss": 0.2741, "step": 3963000 }, { "epoch": 2.38, - "learning_rate": 3.9401977207573796e-05, - "loss": 0.3459, + "learning_rate": 3.940194780805595e-05, + "loss": 0.2683, "step": 3963500 }, { "epoch": 2.38, - "learning_rate": 3.939987724201323e-05, - "loss": 0.3516, + "learning_rate": 3.939984784249538e-05, + "loss": 0.2663, "step": 3964000 }, { "epoch": 2.38, - "learning_rate": 3.939778147638379e-05, - "loss": 0.3438, + "learning_rate": 3.9397747876934815e-05, + "loss": 0.2663, "step": 3964500 }, { "epoch": 2.38, - "learning_rate": 3.9395681510823217e-05, - "loss": 0.3451, + "learning_rate": 3.9395647911374256e-05, + "loss": 0.2643, "step": 3965000 }, { "epoch": 2.38, - "learning_rate": 3.939358154526266e-05, - "loss": 0.3543, + "learning_rate": 3.939355214574481e-05, + "loss": 0.267, "step": 3965500 }, { "epoch": 2.38, - "learning_rate": 3.939148157970209e-05, - "loss": 0.3501, + "learning_rate": 3.939145218018424e-05, + "loss": 0.2671, "step": 3966000 }, { "epoch": 2.38, - "learning_rate": 3.9389381614141524e-05, - "loss": 0.3484, + "learning_rate": 3.9389352214623676e-05, + "loss": 0.2661, "step": 3966500 }, { "epoch": 2.38, - "learning_rate": 3.9387281648580964e-05, - "loss": 0.3426, + "learning_rate": 3.9387252249063116e-05, + "loss": 0.2635, "step": 3967000 }, { "epoch": 2.38, - "learning_rate": 3.93851816830204e-05, - "loss": 0.3423, + "learning_rate": 3.938515648343367e-05, + "loss": 0.2651, "step": 3967500 }, { "epoch": 2.38, - "learning_rate": 3.938308171745983e-05, - "loss": 0.3504, + "learning_rate": 3.9383056517873103e-05, + "loss": 0.2671, "step": 3968000 }, { "epoch": 2.38, - "learning_rate": 3.938098595183039e-05, - "loss": 0.3528, + "learning_rate": 3.9380956552312544e-05, + "loss": 0.2663, "step": 3968500 }, { "epoch": 2.38, - "learning_rate": 3.9378885986269825e-05, - "loss": 0.354, + "learning_rate": 3.937885658675198e-05, + "loss": 0.2693, "step": 3969000 }, { "epoch": 2.38, - "learning_rate": 3.937678602070926e-05, - "loss": 0.3466, + "learning_rate": 3.937676082112253e-05, + "loss": 0.2679, "step": 3969500 }, { "epoch": 2.38, - "learning_rate": 3.93746860551487e-05, - "loss": 0.3504, + "learning_rate": 3.9374660855561964e-05, + "loss": 0.2672, "step": 3970000 }, { "epoch": 2.38, - "learning_rate": 3.937259028951925e-05, - "loss": 0.3522, + "learning_rate": 3.9372560890001404e-05, + "loss": 0.2655, "step": 3970500 }, { "epoch": 2.38, - "learning_rate": 3.9370490323958685e-05, - "loss": 0.344, + "learning_rate": 3.937046092444084e-05, + "loss": 0.2605, "step": 3971000 }, { "epoch": 2.38, - "learning_rate": 3.9368394558329246e-05, - "loss": 0.3465, + "learning_rate": 3.93683651588114e-05, + "loss": 0.2633, "step": 3971500 }, { "epoch": 2.38, - "learning_rate": 3.936629459276867e-05, - "loss": 0.3542, + "learning_rate": 3.936626939318195e-05, + "loss": 0.2708, "step": 3972000 }, { "epoch": 2.38, - "learning_rate": 3.936419462720811e-05, - "loss": 0.3457, + "learning_rate": 3.9364169427621385e-05, + "loss": 0.2633, "step": 3972500 }, { "epoch": 2.38, - "learning_rate": 3.9362094661647546e-05, - "loss": 0.3512, + "learning_rate": 3.9362069462060825e-05, + "loss": 0.2726, "step": 3973000 }, { "epoch": 2.38, - "learning_rate": 3.935999469608698e-05, - "loss": 0.3541, + "learning_rate": 3.935996949650026e-05, + "loss": 0.27, "step": 3973500 }, { "epoch": 2.38, - "learning_rate": 3.935789473052642e-05, - "loss": 0.3526, + "learning_rate": 3.9357869530939686e-05, + "loss": 0.2666, "step": 3974000 }, { "epoch": 2.38, - "learning_rate": 3.935579476496585e-05, - "loss": 0.3482, + "learning_rate": 3.9355769565379126e-05, + "loss": 0.2683, "step": 3974500 }, { "epoch": 2.38, - "learning_rate": 3.935369479940529e-05, - "loss": 0.3557, + "learning_rate": 3.935366959981856e-05, + "loss": 0.2712, "step": 3975000 }, { "epoch": 2.38, - "learning_rate": 3.935159903377585e-05, - "loss": 0.3473, + "learning_rate": 3.9351569634258e-05, + "loss": 0.2659, "step": 3975500 }, { "epoch": 2.38, - "learning_rate": 3.934950326814641e-05, - "loss": 0.3494, + "learning_rate": 3.934947386862856e-05, + "loss": 0.2694, "step": 3976000 }, { "epoch": 2.38, - "learning_rate": 3.9347403302585834e-05, - "loss": 0.3408, + "learning_rate": 3.9347373903067987e-05, + "loss": 0.2667, "step": 3976500 }, { "epoch": 2.38, - "learning_rate": 3.934530333702527e-05, - "loss": 0.3608, + "learning_rate": 3.934527393750742e-05, + "loss": 0.2751, "step": 3977000 }, { "epoch": 2.38, - "learning_rate": 3.934320337146471e-05, - "loss": 0.3558, + "learning_rate": 3.934317397194686e-05, + "loss": 0.2677, "step": 3977500 }, { "epoch": 2.38, - "learning_rate": 3.934110340590414e-05, - "loss": 0.3496, + "learning_rate": 3.9341074006386294e-05, + "loss": 0.2671, "step": 3978000 }, { "epoch": 2.39, - "learning_rate": 3.9339003440343575e-05, - "loss": 0.3489, + "learning_rate": 3.9338978240756854e-05, + "loss": 0.2674, "step": 3978500 }, { "epoch": 2.39, - "learning_rate": 3.933690767471413e-05, - "loss": 0.3557, + "learning_rate": 3.933687827519628e-05, + "loss": 0.2657, "step": 3979000 }, { "epoch": 2.39, - "learning_rate": 3.933480770915357e-05, - "loss": 0.3339, + "learning_rate": 3.933477830963572e-05, + "loss": 0.2598, "step": 3979500 }, { "epoch": 2.39, - "learning_rate": 3.9332707743593e-05, - "loss": 0.3451, + "learning_rate": 3.9332678344075154e-05, + "loss": 0.2652, "step": 3980000 }, { "epoch": 2.39, - "learning_rate": 3.9330607778032435e-05, - "loss": 0.3417, + "learning_rate": 3.9330582578445715e-05, + "loss": 0.2638, "step": 3980500 }, { "epoch": 2.39, - "learning_rate": 3.9328507812471876e-05, - "loss": 0.3504, + "learning_rate": 3.932848261288515e-05, + "loss": 0.2667, "step": 3981000 }, { "epoch": 2.39, - "learning_rate": 3.932640784691131e-05, - "loss": 0.3472, + "learning_rate": 3.932638264732458e-05, + "loss": 0.2636, "step": 3981500 }, { "epoch": 2.39, - "learning_rate": 3.932430788135074e-05, - "loss": 0.3539, + "learning_rate": 3.9324282681764015e-05, + "loss": 0.2676, "step": 3982000 }, { "epoch": 2.39, - "learning_rate": 3.932220791579018e-05, - "loss": 0.356, + "learning_rate": 3.9322186916134576e-05, + "loss": 0.2702, "step": 3982500 }, { "epoch": 2.39, - "learning_rate": 3.932011635009186e-05, - "loss": 0.3619, + "learning_rate": 3.9320086950574016e-05, + "loss": 0.274, "step": 3983000 }, { "epoch": 2.39, - "learning_rate": 3.93180163845313e-05, - "loss": 0.3545, + "learning_rate": 3.931798698501344e-05, + "loss": 0.271, "step": 3983500 }, { "epoch": 2.39, - "learning_rate": 3.9315916418970723e-05, - "loss": 0.355, + "learning_rate": 3.9315887019452876e-05, + "loss": 0.2739, "step": 3984000 }, { "epoch": 2.39, - "learning_rate": 3.9313816453410164e-05, - "loss": 0.341, + "learning_rate": 3.9313791253823436e-05, + "loss": 0.2611, "step": 3984500 }, { "epoch": 2.39, - "learning_rate": 3.93117164878496e-05, - "loss": 0.3472, + "learning_rate": 3.9311691288262876e-05, + "loss": 0.2619, "step": 3985000 }, { "epoch": 2.39, - "learning_rate": 3.930961652228903e-05, - "loss": 0.3482, + "learning_rate": 3.930959132270231e-05, + "loss": 0.2598, "step": 3985500 }, { "epoch": 2.39, - "learning_rate": 3.930751655672847e-05, - "loss": 0.3449, + "learning_rate": 3.9307495557072864e-05, + "loss": 0.264, "step": 3986000 }, { "epoch": 2.39, - "learning_rate": 3.9305416591167904e-05, - "loss": 0.3524, + "learning_rate": 3.93053955915123e-05, + "loss": 0.2658, "step": 3986500 }, { "epoch": 2.39, - "learning_rate": 3.930331662560734e-05, - "loss": 0.3468, + "learning_rate": 3.930329562595174e-05, + "loss": 0.2637, "step": 3987000 }, { "epoch": 2.39, - "learning_rate": 3.930121666004678e-05, - "loss": 0.3407, + "learning_rate": 3.930119566039117e-05, + "loss": 0.262, "step": 3987500 }, { "epoch": 2.39, - "learning_rate": 3.929911669448621e-05, - "loss": 0.3521, + "learning_rate": 3.9299095694830604e-05, + "loss": 0.2697, "step": 3988000 }, { "epoch": 2.39, - "learning_rate": 3.9297016728925645e-05, - "loss": 0.349, + "learning_rate": 3.929699572927004e-05, + "loss": 0.2642, "step": 3988500 }, { "epoch": 2.39, - "learning_rate": 3.92949209632962e-05, - "loss": 0.3544, + "learning_rate": 3.929489576370947e-05, + "loss": 0.2657, "step": 3989000 }, { "epoch": 2.39, - "learning_rate": 3.929282099773564e-05, - "loss": 0.3413, + "learning_rate": 3.929279579814891e-05, + "loss": 0.2638, "step": 3989500 }, { "epoch": 2.39, - "learning_rate": 3.929072103217507e-05, - "loss": 0.3454, + "learning_rate": 3.9290695832588345e-05, + "loss": 0.2665, "step": 3990000 }, { "epoch": 2.39, - "learning_rate": 3.9288621066614506e-05, - "loss": 0.3483, + "learning_rate": 3.9288600066958905e-05, + "loss": 0.2678, "step": 3990500 }, { "epoch": 2.39, - "learning_rate": 3.9286521101053946e-05, - "loss": 0.3458, + "learning_rate": 3.928650010139833e-05, + "loss": 0.2618, "step": 3991000 }, { "epoch": 2.39, - "learning_rate": 3.928442113549338e-05, - "loss": 0.3537, + "learning_rate": 3.928440013583777e-05, + "loss": 0.2722, "step": 3991500 }, { "epoch": 2.39, - "learning_rate": 3.928232116993281e-05, - "loss": 0.3408, + "learning_rate": 3.9282300170277205e-05, + "loss": 0.2636, "step": 3992000 }, { "epoch": 2.39, - "learning_rate": 3.9280221204372246e-05, - "loss": 0.3583, + "learning_rate": 3.9280204404647766e-05, + "loss": 0.2731, "step": 3992500 }, { "epoch": 2.39, - "learning_rate": 3.927812543874281e-05, - "loss": 0.3463, + "learning_rate": 3.92781044390872e-05, + "loss": 0.269, "step": 3993000 }, { "epoch": 2.39, - "learning_rate": 3.927602547318224e-05, - "loss": 0.3457, + "learning_rate": 3.927600447352663e-05, + "loss": 0.2658, "step": 3993500 }, { "epoch": 2.39, - "learning_rate": 3.9273925507621674e-05, - "loss": 0.3519, + "learning_rate": 3.9273904507966066e-05, + "loss": 0.269, "step": 3994000 }, { "epoch": 2.39, - "learning_rate": 3.927182554206111e-05, - "loss": 0.3517, + "learning_rate": 3.9271808742336627e-05, + "loss": 0.2695, "step": 3994500 }, { "epoch": 2.4, - "learning_rate": 3.926972557650054e-05, - "loss": 0.3444, + "learning_rate": 3.926970877677606e-05, + "loss": 0.2621, "step": 3995000 }, { "epoch": 2.4, - "learning_rate": 3.926762561093998e-05, - "loss": 0.3458, + "learning_rate": 3.9267608811215493e-05, + "loss": 0.265, "step": 3995500 }, { "epoch": 2.4, - "learning_rate": 3.9265525645379414e-05, - "loss": 0.3466, + "learning_rate": 3.926550884565493e-05, + "loss": 0.2661, "step": 3996000 }, { "epoch": 2.4, - "learning_rate": 3.926342987974997e-05, - "loss": 0.3476, + "learning_rate": 3.926341308002549e-05, + "loss": 0.2666, "step": 3996500 }, { "epoch": 2.4, - "learning_rate": 3.92613299141894e-05, - "loss": 0.3477, + "learning_rate": 3.926131311446493e-05, + "loss": 0.2605, "step": 3997000 }, { "epoch": 2.4, - "learning_rate": 3.925922994862884e-05, - "loss": 0.339, + "learning_rate": 3.925921314890436e-05, + "loss": 0.2612, "step": 3997500 }, { "epoch": 2.4, - "learning_rate": 3.9257129983068275e-05, - "loss": 0.3598, + "learning_rate": 3.925711318334379e-05, + "loss": 0.2727, "step": 3998000 }, { "epoch": 2.4, - "learning_rate": 3.925503001750771e-05, - "loss": 0.3497, + "learning_rate": 3.925501741771435e-05, + "loss": 0.2648, "step": 3998500 }, { "epoch": 2.4, - "learning_rate": 3.925293005194715e-05, - "loss": 0.3415, + "learning_rate": 3.925291745215379e-05, + "loss": 0.2664, "step": 3999000 }, { "epoch": 2.4, - "learning_rate": 3.925083008638658e-05, - "loss": 0.3483, + "learning_rate": 3.925081748659322e-05, + "loss": 0.2687, "step": 3999500 }, { "epoch": 2.4, - "learning_rate": 3.924873012082602e-05, - "loss": 0.3535, + "learning_rate": 3.9248717521032655e-05, + "loss": 0.2657, "step": 4000000 }, { "epoch": 2.4, - "eval_loss": 0.3390848636627197, - "eval_runtime": 1120.7204, - "eval_samples_per_second": 469.983, - "eval_steps_per_second": 78.331, + "eval_loss": 0.24457141757011414, + "eval_runtime": 1456.0304, + "eval_samples_per_second": 361.751, + "eval_steps_per_second": 60.292, "step": 4000000 }, { "epoch": 2.4, - "learning_rate": 3.9246634355196576e-05, - "loss": 0.3493, + "learning_rate": 3.924662175540321e-05, + "loss": 0.2631, "step": 4000500 }, { "epoch": 2.4, - "learning_rate": 3.924453438963601e-05, - "loss": 0.3542, + "learning_rate": 3.924452178984265e-05, + "loss": 0.2683, "step": 4001000 }, { "epoch": 2.4, - "learning_rate": 3.924243442407544e-05, - "loss": 0.3466, + "learning_rate": 3.924242182428208e-05, + "loss": 0.2608, "step": 4001500 }, { "epoch": 2.4, - "learning_rate": 3.9240338658445996e-05, - "loss": 0.3511, + "learning_rate": 3.9240321858721516e-05, + "loss": 0.2633, "step": 4002000 }, { "epoch": 2.4, - "learning_rate": 3.9238238692885437e-05, - "loss": 0.3562, + "learning_rate": 3.9238221893160956e-05, + "loss": 0.272, "step": 4002500 }, { "epoch": 2.4, - "learning_rate": 3.923613872732487e-05, - "loss": 0.347, + "learning_rate": 3.923612192760038e-05, + "loss": 0.2654, "step": 4003000 }, { "epoch": 2.4, - "learning_rate": 3.9234038761764303e-05, - "loss": 0.3421, + "learning_rate": 3.923402196203982e-05, + "loss": 0.2685, "step": 4003500 }, { "epoch": 2.4, - "learning_rate": 3.9231938796203744e-05, - "loss": 0.3518, + "learning_rate": 3.9231921996479256e-05, + "loss": 0.2626, "step": 4004000 }, { "epoch": 2.4, - "learning_rate": 3.922983883064318e-05, - "loss": 0.3472, + "learning_rate": 3.922982623084982e-05, + "loss": 0.2683, "step": 4004500 }, { "epoch": 2.4, - "learning_rate": 3.922773886508261e-05, - "loss": 0.3464, + "learning_rate": 3.9227726265289244e-05, + "loss": 0.2661, "step": 4005000 }, { "epoch": 2.4, - "learning_rate": 3.9225643099453164e-05, - "loss": 0.3463, + "learning_rate": 3.9225626299728684e-05, + "loss": 0.2667, "step": 4005500 }, { "epoch": 2.4, - "learning_rate": 3.9223543133892604e-05, - "loss": 0.3412, + "learning_rate": 3.922352633416812e-05, + "loss": 0.2608, "step": 4006000 }, { "epoch": 2.4, - "learning_rate": 3.922144316833204e-05, - "loss": 0.3514, + "learning_rate": 3.922143056853868e-05, + "loss": 0.2658, "step": 4006500 }, { "epoch": 2.4, - "learning_rate": 3.921934320277148e-05, - "loss": 0.3544, + "learning_rate": 3.921933060297811e-05, + "loss": 0.2669, "step": 4007000 }, { "epoch": 2.4, - "learning_rate": 3.921724323721091e-05, - "loss": 0.3506, + "learning_rate": 3.9217230637417544e-05, + "loss": 0.265, "step": 4007500 }, { "epoch": 2.4, - "learning_rate": 3.9215143271650345e-05, - "loss": 0.3442, + "learning_rate": 3.921513067185698e-05, + "loss": 0.2665, "step": 4008000 }, { "epoch": 2.4, - "learning_rate": 3.9213043306089785e-05, - "loss": 0.3438, + "learning_rate": 3.921303490622754e-05, + "loss": 0.2606, "step": 4008500 }, { "epoch": 2.4, - "learning_rate": 3.921094334052921e-05, - "loss": 0.3456, + "learning_rate": 3.921093494066697e-05, + "loss": 0.2649, "step": 4009000 }, { "epoch": 2.4, - "learning_rate": 3.920884757489977e-05, - "loss": 0.3482, + "learning_rate": 3.920883497510641e-05, + "loss": 0.2639, "step": 4009500 }, { "epoch": 2.4, - "learning_rate": 3.9206747609339206e-05, - "loss": 0.3536, + "learning_rate": 3.920673500954584e-05, + "loss": 0.2655, "step": 4010000 }, { "epoch": 2.4, - "learning_rate": 3.9204647643778646e-05, - "loss": 0.3668, + "learning_rate": 3.92046392439164e-05, + "loss": 0.2722, "step": 4010500 }, { "epoch": 2.4, - "learning_rate": 3.920254767821808e-05, - "loss": 0.3442, + "learning_rate": 3.920253927835584e-05, + "loss": 0.2623, "step": 4011000 }, { "epoch": 2.41, - "learning_rate": 3.920045191258863e-05, - "loss": 0.3416, + "learning_rate": 3.920043931279527e-05, + "loss": 0.2646, "step": 4011500 }, { "epoch": 2.41, - "learning_rate": 3.919835614695919e-05, - "loss": 0.3488, + "learning_rate": 3.9198339347234706e-05, + "loss": 0.264, "step": 4012000 }, { "epoch": 2.41, - "learning_rate": 3.919625618139863e-05, - "loss": 0.3482, + "learning_rate": 3.919623938167414e-05, + "loss": 0.2688, "step": 4012500 }, { "epoch": 2.41, - "learning_rate": 3.919415621583806e-05, - "loss": 0.3417, + "learning_rate": 3.919413941611357e-05, + "loss": 0.2618, "step": 4013000 }, { "epoch": 2.41, - "learning_rate": 3.9192056250277494e-05, - "loss": 0.3474, + "learning_rate": 3.9192039450553007e-05, + "loss": 0.2644, "step": 4013500 }, { "epoch": 2.41, - "learning_rate": 3.918996048464805e-05, - "loss": 0.3511, + "learning_rate": 3.918993948499245e-05, + "loss": 0.2645, "step": 4014000 }, { "epoch": 2.41, - "learning_rate": 3.918786051908749e-05, - "loss": 0.3606, + "learning_rate": 3.9187843719363e-05, + "loss": 0.2705, "step": 4014500 }, { "epoch": 2.41, - "learning_rate": 3.918576055352692e-05, - "loss": 0.3507, + "learning_rate": 3.9185743753802434e-05, + "loss": 0.266, "step": 4015000 }, { "epoch": 2.41, - "learning_rate": 3.9183660587966355e-05, - "loss": 0.3467, + "learning_rate": 3.918364378824187e-05, + "loss": 0.263, "step": 4015500 }, { "epoch": 2.41, - "learning_rate": 3.9181560622405795e-05, - "loss": 0.3453, + "learning_rate": 3.918154382268131e-05, + "loss": 0.2659, "step": 4016000 }, { "epoch": 2.41, - "learning_rate": 3.917946065684523e-05, - "loss": 0.3356, + "learning_rate": 3.917945225698299e-05, + "loss": 0.2584, "step": 4016500 }, { "epoch": 2.41, - "learning_rate": 3.917736489121578e-05, - "loss": 0.3546, + "learning_rate": 3.917735229142242e-05, + "loss": 0.2677, "step": 4017000 }, { "epoch": 2.41, - "learning_rate": 3.9175264925655215e-05, - "loss": 0.3481, + "learning_rate": 3.9175252325861855e-05, + "loss": 0.2691, "step": 4017500 }, { "epoch": 2.41, - "learning_rate": 3.917316916002577e-05, - "loss": 0.3452, + "learning_rate": 3.9173152360301295e-05, + "loss": 0.2637, "step": 4018000 }, { "epoch": 2.41, - "learning_rate": 3.917106919446521e-05, - "loss": 0.35, + "learning_rate": 3.917105239474073e-05, + "loss": 0.2693, "step": 4018500 }, { "epoch": 2.41, - "learning_rate": 3.916896922890464e-05, - "loss": 0.3542, + "learning_rate": 3.916895242918016e-05, + "loss": 0.268, "step": 4019000 }, { "epoch": 2.41, - "learning_rate": 3.916686926334408e-05, - "loss": 0.3514, + "learning_rate": 3.9166852463619596e-05, + "loss": 0.2648, "step": 4019500 }, { "epoch": 2.41, - "learning_rate": 3.9164769297783516e-05, - "loss": 0.3447, + "learning_rate": 3.916475249805903e-05, + "loss": 0.2702, "step": 4020000 }, { "epoch": 2.41, - "learning_rate": 3.916266933222295e-05, - "loss": 0.3438, + "learning_rate": 3.916265253249846e-05, + "loss": 0.262, "step": 4020500 }, { "epoch": 2.41, - "learning_rate": 3.916056936666239e-05, - "loss": 0.3535, + "learning_rate": 3.916055676686902e-05, + "loss": 0.268, "step": 4021000 }, { "epoch": 2.41, - "learning_rate": 3.915846940110182e-05, - "loss": 0.3414, + "learning_rate": 3.915845680130846e-05, + "loss": 0.265, "step": 4021500 }, { "epoch": 2.41, - "learning_rate": 3.915637363547238e-05, - "loss": 0.3505, + "learning_rate": 3.915635683574789e-05, + "loss": 0.2658, "step": 4022000 }, { "epoch": 2.41, - "learning_rate": 3.915427366991181e-05, - "loss": 0.3454, + "learning_rate": 3.915425687018732e-05, + "loss": 0.2655, "step": 4022500 }, { "epoch": 2.41, - "learning_rate": 3.9152177904282364e-05, - "loss": 0.3457, + "learning_rate": 3.9152161104557884e-05, + "loss": 0.2644, "step": 4023000 }, { "epoch": 2.41, - "learning_rate": 3.9150077938721804e-05, - "loss": 0.3439, + "learning_rate": 3.9150061138997324e-05, + "loss": 0.2629, "step": 4023500 }, { "epoch": 2.41, - "learning_rate": 3.914797797316124e-05, - "loss": 0.3537, + "learning_rate": 3.914796117343676e-05, + "loss": 0.2724, "step": 4024000 }, { "epoch": 2.41, - "learning_rate": 3.914587800760067e-05, - "loss": 0.3487, + "learning_rate": 3.914586120787619e-05, + "loss": 0.2633, "step": 4024500 }, { "epoch": 2.41, - "learning_rate": 3.914377804204011e-05, - "loss": 0.3543, + "learning_rate": 3.914376544224675e-05, + "loss": 0.2687, "step": 4025000 }, { "epoch": 2.41, - "learning_rate": 3.9141678076479545e-05, - "loss": 0.358, + "learning_rate": 3.9141665476686184e-05, + "loss": 0.2739, "step": 4025500 }, { "epoch": 2.41, - "learning_rate": 3.913957811091898e-05, - "loss": 0.3399, + "learning_rate": 3.913956551112562e-05, + "loss": 0.2665, "step": 4026000 }, { "epoch": 2.41, - "learning_rate": 3.913747814535842e-05, - "loss": 0.3561, + "learning_rate": 3.913746554556505e-05, + "loss": 0.2658, "step": 4026500 }, { "epoch": 2.41, - "learning_rate": 3.913537817979785e-05, - "loss": 0.3454, + "learning_rate": 3.913536977993561e-05, + "loss": 0.2647, "step": 4027000 }, { "epoch": 2.41, - "learning_rate": 3.9133278214237285e-05, - "loss": 0.3495, + "learning_rate": 3.9133274014306165e-05, + "loss": 0.2666, "step": 4027500 }, { "epoch": 2.41, - "learning_rate": 3.9131182448607846e-05, - "loss": 0.3555, + "learning_rate": 3.91311740487456e-05, + "loss": 0.2645, "step": 4028000 }, { "epoch": 2.42, - "learning_rate": 3.912908248304728e-05, - "loss": 0.3516, + "learning_rate": 3.912907408318503e-05, + "loss": 0.2667, "step": 4028500 }, { "epoch": 2.42, - "learning_rate": 3.912698251748671e-05, - "loss": 0.3534, + "learning_rate": 3.912697411762447e-05, + "loss": 0.2659, "step": 4029000 }, { "epoch": 2.42, - "learning_rate": 3.912488255192615e-05, - "loss": 0.3555, + "learning_rate": 3.9124874152063906e-05, + "loss": 0.2735, "step": 4029500 }, { "epoch": 2.42, - "learning_rate": 3.9122782586365586e-05, - "loss": 0.3518, + "learning_rate": 3.912277418650334e-05, + "loss": 0.267, "step": 4030000 }, { "epoch": 2.42, - "learning_rate": 3.912068262080501e-05, - "loss": 0.3565, + "learning_rate": 3.912067422094278e-05, + "loss": 0.2715, "step": 4030500 }, { "epoch": 2.42, - "learning_rate": 3.9118586855175573e-05, - "loss": 0.347, + "learning_rate": 3.911857425538221e-05, + "loss": 0.2657, "step": 4031000 }, { "epoch": 2.42, - "learning_rate": 3.9116486889615014e-05, - "loss": 0.3424, + "learning_rate": 3.911647848975277e-05, + "loss": 0.2663, "step": 4031500 }, { "epoch": 2.42, - "learning_rate": 3.911438692405445e-05, - "loss": 0.363, + "learning_rate": 3.911437852419221e-05, + "loss": 0.2742, "step": 4032000 }, { "epoch": 2.42, - "learning_rate": 3.911228695849388e-05, - "loss": 0.3485, + "learning_rate": 3.911227855863164e-05, + "loss": 0.2645, "step": 4032500 }, { "epoch": 2.42, - "learning_rate": 3.9110186992933314e-05, - "loss": 0.351, + "learning_rate": 3.9110182793002194e-05, + "loss": 0.2697, "step": 4033000 }, { "epoch": 2.42, - "learning_rate": 3.910808702737275e-05, - "loss": 0.3405, + "learning_rate": 3.910808282744163e-05, + "loss": 0.2666, "step": 4033500 }, { "epoch": 2.42, - "learning_rate": 3.910599546167443e-05, - "loss": 0.3656, + "learning_rate": 3.910598286188107e-05, + "loss": 0.2746, "step": 4034000 }, { "epoch": 2.42, - "learning_rate": 3.910389549611386e-05, - "loss": 0.3583, + "learning_rate": 3.91038828963205e-05, + "loss": 0.2653, "step": 4034500 }, { "epoch": 2.42, - "learning_rate": 3.91017955305533e-05, - "loss": 0.3516, + "learning_rate": 3.9101782930759935e-05, + "loss": 0.2705, "step": 4035000 }, { "epoch": 2.42, - "learning_rate": 3.9099695564992735e-05, - "loss": 0.3528, + "learning_rate": 3.9099682965199375e-05, + "loss": 0.2716, "step": 4035500 }, { "epoch": 2.42, - "learning_rate": 3.909759559943217e-05, - "loss": 0.3414, + "learning_rate": 3.90975829996388e-05, + "loss": 0.2619, "step": 4036000 }, { "epoch": 2.42, - "learning_rate": 3.909549563387161e-05, - "loss": 0.3454, + "learning_rate": 3.9095483034078235e-05, + "loss": 0.2647, "step": 4036500 }, { "epoch": 2.42, - "learning_rate": 3.909339566831104e-05, - "loss": 0.3547, + "learning_rate": 3.9093387268448795e-05, + "loss": 0.2683, "step": 4037000 }, { "epoch": 2.42, - "learning_rate": 3.9091295702750476e-05, - "loss": 0.3511, + "learning_rate": 3.9091287302888235e-05, + "loss": 0.2681, "step": 4037500 }, { "epoch": 2.42, - "learning_rate": 3.908919573718991e-05, - "loss": 0.3595, + "learning_rate": 3.908918733732767e-05, + "loss": 0.265, "step": 4038000 }, { "epoch": 2.42, - "learning_rate": 3.908709577162934e-05, - "loss": 0.3504, + "learning_rate": 3.90870873717671e-05, + "loss": 0.2743, "step": 4038500 }, { "epoch": 2.42, - "learning_rate": 3.9084995806068776e-05, - "loss": 0.3595, + "learning_rate": 3.908499160613766e-05, + "loss": 0.2698, "step": 4039000 }, { "epoch": 2.42, - "learning_rate": 3.9082895840508216e-05, - "loss": 0.3467, + "learning_rate": 3.9082891640577096e-05, + "loss": 0.2654, "step": 4039500 }, { "epoch": 2.42, - "learning_rate": 3.908080007487877e-05, - "loss": 0.3406, + "learning_rate": 3.908079167501653e-05, + "loss": 0.263, "step": 4040000 }, { "epoch": 2.42, - "learning_rate": 3.90787001093182e-05, - "loss": 0.3609, + "learning_rate": 3.907869590938708e-05, + "loss": 0.2741, "step": 4040500 }, { "epoch": 2.42, - "learning_rate": 3.907660014375764e-05, - "loss": 0.3465, + "learning_rate": 3.9076595943826523e-05, + "loss": 0.2665, "step": 4041000 }, { "epoch": 2.42, - "learning_rate": 3.907450017819708e-05, - "loss": 0.3418, + "learning_rate": 3.907449597826596e-05, + "loss": 0.2641, "step": 4041500 }, { "epoch": 2.42, - "learning_rate": 3.907240441256764e-05, - "loss": 0.3352, + "learning_rate": 3.907239601270539e-05, + "loss": 0.258, "step": 4042000 }, { "epoch": 2.42, - "learning_rate": 3.9070304447007064e-05, - "loss": 0.3501, + "learning_rate": 3.907029604714483e-05, + "loss": 0.2633, "step": 4042500 }, { "epoch": 2.42, - "learning_rate": 3.9068204481446504e-05, - "loss": 0.35, + "learning_rate": 3.9068196081584264e-05, + "loss": 0.2685, "step": 4043000 }, { "epoch": 2.42, - "learning_rate": 3.906610451588594e-05, - "loss": 0.3524, + "learning_rate": 3.906609611602369e-05, + "loss": 0.2676, "step": 4043500 }, { "epoch": 2.42, - "learning_rate": 3.906401295018762e-05, - "loss": 0.3495, + "learning_rate": 3.906399615046313e-05, + "loss": 0.2635, "step": 4044000 }, { "epoch": 2.42, - "learning_rate": 3.906191298462705e-05, - "loss": 0.3557, + "learning_rate": 3.906190038483369e-05, + "loss": 0.2698, "step": 4044500 }, { "epoch": 2.43, - "learning_rate": 3.9059813019066485e-05, - "loss": 0.3501, + "learning_rate": 3.9059800419273125e-05, + "loss": 0.2658, "step": 4045000 }, { "epoch": 2.43, - "learning_rate": 3.9057713053505925e-05, - "loss": 0.3573, + "learning_rate": 3.905770045371256e-05, + "loss": 0.2672, "step": 4045500 }, { "epoch": 2.43, - "learning_rate": 3.905561308794536e-05, - "loss": 0.3547, + "learning_rate": 3.905560048815199e-05, + "loss": 0.2734, "step": 4046000 }, { "epoch": 2.43, - "learning_rate": 3.905351312238479e-05, - "loss": 0.3573, + "learning_rate": 3.905350892245367e-05, + "loss": 0.2673, "step": 4046500 }, { "epoch": 2.43, - "learning_rate": 3.905141315682423e-05, - "loss": 0.3615, + "learning_rate": 3.9051408956893106e-05, + "loss": 0.2759, "step": 4047000 }, { "epoch": 2.43, - "learning_rate": 3.9049317391194786e-05, - "loss": 0.3452, + "learning_rate": 3.904930899133254e-05, + "loss": 0.2697, "step": 4047500 }, { "epoch": 2.43, - "learning_rate": 3.904721742563422e-05, - "loss": 0.3561, + "learning_rate": 3.904720902577198e-05, + "loss": 0.2681, "step": 4048000 }, { "epoch": 2.43, - "learning_rate": 3.904511746007365e-05, - "loss": 0.3481, + "learning_rate": 3.904510906021141e-05, + "loss": 0.2613, "step": 4048500 }, { "epoch": 2.43, - "learning_rate": 3.904301749451309e-05, - "loss": 0.3533, + "learning_rate": 3.9043009094650846e-05, + "loss": 0.2716, "step": 4049000 }, { "epoch": 2.43, - "learning_rate": 3.904091752895252e-05, - "loss": 0.3506, + "learning_rate": 3.9040909129090287e-05, + "loss": 0.2698, "step": 4049500 }, { "epoch": 2.43, - "learning_rate": 3.903881756339196e-05, - "loss": 0.3435, + "learning_rate": 3.903880916352972e-05, + "loss": 0.2684, "step": 4050000 }, { "epoch": 2.43, - "learning_rate": 3.9036717597831394e-05, - "loss": 0.3443, + "learning_rate": 3.9036713397900274e-05, + "loss": 0.2629, "step": 4050500 }, { "epoch": 2.43, - "learning_rate": 3.903461763227083e-05, - "loss": 0.3613, + "learning_rate": 3.903461343233971e-05, + "loss": 0.2699, "step": 4051000 }, { "epoch": 2.43, - "learning_rate": 3.903252186664139e-05, - "loss": 0.3446, + "learning_rate": 3.903251346677915e-05, + "loss": 0.2627, "step": 4051500 }, { "epoch": 2.43, - "learning_rate": 3.903042190108082e-05, - "loss": 0.3525, + "learning_rate": 3.903041350121858e-05, + "loss": 0.2669, "step": 4052000 }, { "epoch": 2.43, - "learning_rate": 3.902832613545138e-05, - "loss": 0.3557, + "learning_rate": 3.9028317735589134e-05, + "loss": 0.2615, "step": 4052500 }, { "epoch": 2.43, - "learning_rate": 3.9026226169890815e-05, - "loss": 0.3436, + "learning_rate": 3.9026217770028575e-05, + "loss": 0.2587, "step": 4053000 }, { "epoch": 2.43, - "learning_rate": 3.902412620433025e-05, - "loss": 0.3558, + "learning_rate": 3.902411780446801e-05, + "loss": 0.2674, "step": 4053500 }, { "epoch": 2.43, - "learning_rate": 3.902202623876969e-05, - "loss": 0.3518, + "learning_rate": 3.902201783890744e-05, + "loss": 0.2722, "step": 4054000 }, { "epoch": 2.43, - "learning_rate": 3.9019926273209115e-05, - "loss": 0.3403, + "learning_rate": 3.9019922073277995e-05, + "loss": 0.2661, "step": 4054500 }, { "epoch": 2.43, - "learning_rate": 3.901782630764855e-05, - "loss": 0.348, + "learning_rate": 3.9017822107717435e-05, + "loss": 0.2654, "step": 4055000 }, { "epoch": 2.43, - "learning_rate": 3.901572634208799e-05, - "loss": 0.3543, + "learning_rate": 3.901572214215687e-05, + "loss": 0.2665, "step": 4055500 }, { "epoch": 2.43, "learning_rate": 3.901362637652742e-05, - "loss": 0.3404, + "loss": 0.2644, "step": 4056000 }, { "epoch": 2.43, - "learning_rate": 3.901153061089798e-05, - "loss": 0.3567, + "learning_rate": 3.9011526410966856e-05, + "loss": 0.2686, "step": 4056500 }, { "epoch": 2.43, - "learning_rate": 3.9009430645337416e-05, - "loss": 0.3373, + "learning_rate": 3.9009426445406296e-05, + "loss": 0.2633, "step": 4057000 }, { "epoch": 2.43, - "learning_rate": 3.900733067977685e-05, - "loss": 0.3503, + "learning_rate": 3.900732647984573e-05, + "loss": 0.2692, "step": 4057500 }, { "epoch": 2.43, - "learning_rate": 3.900523071421628e-05, - "loss": 0.3389, + "learning_rate": 3.900522651428516e-05, + "loss": 0.2664, "step": 4058000 }, { "epoch": 2.43, - "learning_rate": 3.900313494858684e-05, - "loss": 0.348, + "learning_rate": 3.90031265487246e-05, + "loss": 0.263, "step": 4058500 }, { "epoch": 2.43, - "learning_rate": 3.9001034983026284e-05, - "loss": 0.3475, + "learning_rate": 3.9001026583164037e-05, + "loss": 0.2677, "step": 4059000 }, { "epoch": 2.43, - "learning_rate": 3.899893501746571e-05, - "loss": 0.3586, + "learning_rate": 3.899892661760348e-05, + "loss": 0.268, "step": 4059500 }, { "epoch": 2.43, - "learning_rate": 3.899683925183627e-05, - "loss": 0.3489, + "learning_rate": 3.899683085197403e-05, + "loss": 0.2687, "step": 4060000 }, { "epoch": 2.43, - "learning_rate": 3.8994739286275704e-05, - "loss": 0.3526, + "learning_rate": 3.8994730886413464e-05, + "loss": 0.2696, "step": 4060500 }, { "epoch": 2.43, - "learning_rate": 3.899264352064626e-05, - "loss": 0.3406, + "learning_rate": 3.89926309208529e-05, + "loss": 0.2634, "step": 4061000 }, { "epoch": 2.44, - "learning_rate": 3.89905435550857e-05, - "loss": 0.3418, + "learning_rate": 3.899053095529234e-05, + "loss": 0.2606, "step": 4061500 }, { "epoch": 2.44, - "learning_rate": 3.898844358952513e-05, - "loss": 0.3559, + "learning_rate": 3.898843518966289e-05, + "loss": 0.2727, "step": 4062000 }, { "epoch": 2.44, - "learning_rate": 3.8986343623964565e-05, - "loss": 0.3424, + "learning_rate": 3.8986335224102325e-05, + "loss": 0.2625, "step": 4062500 }, { "epoch": 2.44, - "learning_rate": 3.8984243658404005e-05, - "loss": 0.3462, + "learning_rate": 3.898423525854176e-05, + "loss": 0.2624, "step": 4063000 }, { "epoch": 2.44, - "learning_rate": 3.898214369284344e-05, - "loss": 0.3599, + "learning_rate": 3.898213949291231e-05, + "loss": 0.27, "step": 4063500 }, { "epoch": 2.44, - "learning_rate": 3.898004372728287e-05, - "loss": 0.3454, + "learning_rate": 3.898003952735175e-05, + "loss": 0.2644, "step": 4064000 }, { "epoch": 2.44, - "learning_rate": 3.8977943761722305e-05, - "loss": 0.3508, + "learning_rate": 3.8977939561791185e-05, + "loss": 0.271, "step": 4064500 }, { "epoch": 2.44, - "learning_rate": 3.897584379616174e-05, - "loss": 0.3507, + "learning_rate": 3.8975839596230626e-05, + "loss": 0.2705, "step": 4065000 }, { "epoch": 2.44, - "learning_rate": 3.897374383060118e-05, - "loss": 0.3504, + "learning_rate": 3.897373963067006e-05, + "loss": 0.2616, "step": 4065500 }, { "epoch": 2.44, - "learning_rate": 3.897164386504061e-05, - "loss": 0.3411, + "learning_rate": 3.897163966510949e-05, + "loss": 0.2636, "step": 4066000 }, { "epoch": 2.44, - "learning_rate": 3.8969543899480046e-05, - "loss": 0.345, + "learning_rate": 3.896953969954893e-05, + "loss": 0.266, "step": 4066500 }, { "epoch": 2.44, - "learning_rate": 3.89674481338506e-05, - "loss": 0.3457, + "learning_rate": 3.896743973398836e-05, + "loss": 0.2636, "step": 4067000 }, { "epoch": 2.44, - "learning_rate": 3.896534816829004e-05, - "loss": 0.3476, + "learning_rate": 3.896534396835892e-05, + "loss": 0.2663, "step": 4067500 }, { "epoch": 2.44, - "learning_rate": 3.896324820272947e-05, - "loss": 0.3434, + "learning_rate": 3.896324400279835e-05, + "loss": 0.2644, "step": 4068000 }, { "epoch": 2.44, - "learning_rate": 3.896114823716891e-05, - "loss": 0.3487, + "learning_rate": 3.8961144037237793e-05, + "loss": 0.2665, "step": 4068500 }, { "epoch": 2.44, - "learning_rate": 3.895905247153946e-05, - "loss": 0.3489, + "learning_rate": 3.895904407167723e-05, + "loss": 0.2655, "step": 4069000 }, { "epoch": 2.44, "learning_rate": 3.89569525059789e-05, - "loss": 0.3508, + "loss": 0.2693, "step": 4069500 }, { "epoch": 2.44, "learning_rate": 3.8954852540418334e-05, - "loss": 0.3544, + "loss": 0.2701, "step": 4070000 }, { "epoch": 2.44, - "learning_rate": 3.8952756774788894e-05, - "loss": 0.3554, + "learning_rate": 3.895275257485777e-05, + "loss": 0.2716, "step": 4070500 }, { "epoch": 2.44, - "learning_rate": 3.895065680922833e-05, - "loss": 0.3495, + "learning_rate": 3.895065260929721e-05, + "loss": 0.2654, "step": 4071000 }, { "epoch": 2.44, - "learning_rate": 3.894855684366776e-05, - "loss": 0.357, + "learning_rate": 3.894855264373664e-05, + "loss": 0.271, "step": 4071500 }, { "epoch": 2.44, - "learning_rate": 3.8946456878107195e-05, - "loss": 0.347, + "learning_rate": 3.894645267817608e-05, + "loss": 0.2677, "step": 4072000 }, { "epoch": 2.44, - "learning_rate": 3.8944356912546635e-05, - "loss": 0.3504, + "learning_rate": 3.8944352712615515e-05, + "loss": 0.2646, "step": 4072500 }, { "epoch": 2.44, - "learning_rate": 3.894225694698607e-05, - "loss": 0.3481, + "learning_rate": 3.894225274705495e-05, + "loss": 0.2622, "step": 4073000 }, { "epoch": 2.44, - "learning_rate": 3.894016118135662e-05, - "loss": 0.3454, + "learning_rate": 3.89401569814255e-05, + "loss": 0.265, "step": 4073500 }, { "epoch": 2.44, - "learning_rate": 3.8938061215796055e-05, - "loss": 0.3481, + "learning_rate": 3.893805701586494e-05, + "loss": 0.2668, "step": 4074000 }, { "epoch": 2.44, - "learning_rate": 3.8935961250235496e-05, - "loss": 0.3415, + "learning_rate": 3.8935957050304376e-05, + "loss": 0.2658, "step": 4074500 }, { "epoch": 2.44, - "learning_rate": 3.8933865484606056e-05, - "loss": 0.3527, + "learning_rate": 3.893385708474381e-05, + "loss": 0.2657, "step": 4075000 }, { "epoch": 2.44, - "learning_rate": 3.893176551904549e-05, - "loss": 0.3436, + "learning_rate": 3.893176131911436e-05, + "loss": 0.2624, "step": 4075500 }, { "epoch": 2.44, - "learning_rate": 3.8929665553484916e-05, - "loss": 0.3549, + "learning_rate": 3.89296613535538e-05, + "loss": 0.2718, "step": 4076000 }, { "epoch": 2.44, - "learning_rate": 3.8927565587924356e-05, - "loss": 0.3478, + "learning_rate": 3.8927561387993236e-05, + "loss": 0.269, "step": 4076500 }, { "epoch": 2.44, - "learning_rate": 3.892546562236379e-05, - "loss": 0.3444, + "learning_rate": 3.892546142243267e-05, + "loss": 0.2631, "step": 4077000 }, { "epoch": 2.44, "learning_rate": 3.892336565680323e-05, - "loss": 0.349, + "loss": 0.2628, "step": 4077500 }, { "epoch": 2.44, "learning_rate": 3.8921265691242664e-05, - "loss": 0.3484, + "loss": 0.2656, "step": 4078000 }, { "epoch": 2.45, "learning_rate": 3.89191657256821e-05, - "loss": 0.3466, + "loss": 0.2666, "step": 4078500 }, { "epoch": 2.45, "learning_rate": 3.891706996005265e-05, - "loss": 0.3482, + "loss": 0.2665, "step": 4079000 }, { "epoch": 2.45, "learning_rate": 3.891496999449209e-05, - "loss": 0.3466, + "loss": 0.2682, "step": 4079500 }, { "epoch": 2.45, - "learning_rate": 3.891287422886265e-05, - "loss": 0.346, + "learning_rate": 3.8912870028931524e-05, + "loss": 0.2614, "step": 4080000 }, { "epoch": 2.45, - "learning_rate": 3.891077426330208e-05, - "loss": 0.3438, + "learning_rate": 3.891077006337096e-05, + "loss": 0.2662, "step": 4080500 }, { "epoch": 2.45, - "learning_rate": 3.890867429774151e-05, - "loss": 0.3577, + "learning_rate": 3.89086700978104e-05, + "loss": 0.271, "step": 4081000 }, { "epoch": 2.45, - "learning_rate": 3.890657433218095e-05, - "loss": 0.3409, + "learning_rate": 3.890657013224983e-05, + "loss": 0.267, "step": 4081500 }, { "epoch": 2.45, - "learning_rate": 3.8904474366620385e-05, - "loss": 0.3443, + "learning_rate": 3.8904470166689265e-05, + "loss": 0.2655, "step": 4082000 }, { "epoch": 2.45, - "learning_rate": 3.890237440105982e-05, - "loss": 0.3457, + "learning_rate": 3.8902370201128705e-05, + "loss": 0.2682, "step": 4082500 }, { "epoch": 2.45, "learning_rate": 3.890027443549926e-05, - "loss": 0.3526, + "loss": 0.2709, "step": 4083000 }, { "epoch": 2.45, "learning_rate": 3.889817446993869e-05, - "loss": 0.3428, + "loss": 0.2605, "step": 4083500 }, { "epoch": 2.45, "learning_rate": 3.8896074504378126e-05, - "loss": 0.357, + "loss": 0.2669, "step": 4084000 }, { "epoch": 2.45, "learning_rate": 3.8893974538817566e-05, - "loss": 0.3434, + "loss": 0.2575, "step": 4084500 }, { "epoch": 2.45, "learning_rate": 3.8891874573257e-05, - "loss": 0.3466, + "loss": 0.2657, "step": 4085000 }, { "epoch": 2.45, - "learning_rate": 3.888977460769643e-05, - "loss": 0.3467, + "learning_rate": 3.888977880762755e-05, + "loss": 0.265, "step": 4085500 }, { "epoch": 2.45, "learning_rate": 3.888767884206699e-05, - "loss": 0.3506, + "loss": 0.2665, "step": 4086000 }, { "epoch": 2.45, "learning_rate": 3.888557887650643e-05, - "loss": 0.3478, + "loss": 0.2676, "step": 4086500 }, { "epoch": 2.45, "learning_rate": 3.888347891094586e-05, - "loss": 0.3491, + "loss": 0.2654, "step": 4087000 }, { "epoch": 2.45, - "learning_rate": 3.88813789453853e-05, - "loss": 0.3475, + "learning_rate": 3.8881383145316414e-05, + "loss": 0.2652, "step": 4087500 }, { "epoch": 2.45, "learning_rate": 3.8879283179755854e-05, - "loss": 0.3572, + "loss": 0.2647, "step": 4088000 }, { "epoch": 2.45, - "learning_rate": 3.887718741412641e-05, - "loss": 0.3599, + "learning_rate": 3.887718321419529e-05, + "loss": 0.2695, "step": 4088500 }, { "epoch": 2.45, - "learning_rate": 3.887508744856584e-05, - "loss": 0.3414, + "learning_rate": 3.887508324863472e-05, + "loss": 0.2597, "step": 4089000 }, { "epoch": 2.45, "learning_rate": 3.8872987483005274e-05, - "loss": 0.3493, + "loss": 0.2703, "step": 4089500 }, { "epoch": 2.45, "learning_rate": 3.8870887517444715e-05, - "loss": 0.3574, + "loss": 0.2702, "step": 4090000 }, { "epoch": 2.45, - "learning_rate": 3.886878755188415e-05, - "loss": 0.3556, + "learning_rate": 3.886879175181527e-05, + "loss": 0.2703, "step": 4090500 }, { "epoch": 2.45, - "learning_rate": 3.886668758632358e-05, - "loss": 0.3461, + "learning_rate": 3.88666917862547e-05, + "loss": 0.2673, "step": 4091000 }, { "epoch": 2.45, - "learning_rate": 3.886458762076302e-05, - "loss": 0.3492, + "learning_rate": 3.886459182069414e-05, + "loss": 0.2731, "step": 4091500 }, { "epoch": 2.45, - "learning_rate": 3.8862487655202455e-05, - "loss": 0.3521, + "learning_rate": 3.8862491855133575e-05, + "loss": 0.2712, "step": 4092000 }, { "epoch": 2.45, - "learning_rate": 3.886038768964189e-05, - "loss": 0.35, + "learning_rate": 3.886039188957301e-05, + "loss": 0.269, "step": 4092500 }, { "epoch": 2.45, - "learning_rate": 3.885828772408133e-05, - "loss": 0.3502, + "learning_rate": 3.885829192401245e-05, + "loss": 0.2703, "step": 4093000 }, { "epoch": 2.45, "learning_rate": 3.885619195845188e-05, - "loss": 0.3449, + "loss": 0.2668, "step": 4093500 }, { "epoch": 2.45, "learning_rate": 3.8854091992891316e-05, - "loss": 0.3532, + "loss": 0.2654, "step": 4094000 }, { "epoch": 2.45, - "learning_rate": 3.8851992027330756e-05, - "loss": 0.3453, + "learning_rate": 3.885199622726187e-05, + "loss": 0.2646, "step": 4094500 }, { "epoch": 2.46, - "learning_rate": 3.884989206177019e-05, - "loss": 0.3456, + "learning_rate": 3.884989626170131e-05, + "loss": 0.2646, "step": 4095000 }, { "epoch": 2.46, - "learning_rate": 3.884779209620962e-05, - "loss": 0.3429, + "learning_rate": 3.884779629614074e-05, + "loss": 0.2604, "step": 4095500 }, { "epoch": 2.46, - "learning_rate": 3.8845692130649057e-05, - "loss": 0.3458, + "learning_rate": 3.884569633058018e-05, + "loss": 0.2645, "step": 4096000 }, { "epoch": 2.46, - "learning_rate": 3.884359636501962e-05, - "loss": 0.3518, + "learning_rate": 3.884360056495073e-05, + "loss": 0.2704, "step": 4096500 }, { "epoch": 2.46, - "learning_rate": 3.884149639945905e-05, - "loss": 0.3486, + "learning_rate": 3.884150059939017e-05, + "loss": 0.2684, "step": 4097000 }, { "epoch": 2.46, - "learning_rate": 3.8839396433898484e-05, - "loss": 0.346, + "learning_rate": 3.8839400633829604e-05, + "loss": 0.2654, "step": 4097500 }, { "epoch": 2.46, - "learning_rate": 3.883729646833792e-05, - "loss": 0.3493, + "learning_rate": 3.883730066826904e-05, + "loss": 0.2601, "step": 4098000 }, { "epoch": 2.46, - "learning_rate": 3.883519650277735e-05, - "loss": 0.3484, + "learning_rate": 3.88352049026396e-05, + "loss": 0.2651, "step": 4098500 }, { "epoch": 2.46, - "learning_rate": 3.8833096537216784e-05, - "loss": 0.3447, + "learning_rate": 3.883310493707903e-05, + "loss": 0.259, "step": 4099000 }, { "epoch": 2.46, - "learning_rate": 3.8831000771587345e-05, - "loss": 0.3528, + "learning_rate": 3.8831004971518465e-05, + "loss": 0.2675, "step": 4099500 }, { "epoch": 2.46, - "learning_rate": 3.8828900806026785e-05, - "loss": 0.3536, + "learning_rate": 3.882890920588902e-05, + "loss": 0.272, "step": 4100000 }, { "epoch": 2.46, - "eval_loss": 0.3373085558414459, - "eval_runtime": 1111.3303, - "eval_samples_per_second": 473.954, - "eval_steps_per_second": 78.993, + "eval_loss": 0.24459972977638245, + "eval_runtime": 1452.9658, + "eval_samples_per_second": 362.514, + "eval_steps_per_second": 60.419, "step": 4100000 }, { "epoch": 2.46, - "learning_rate": 3.882680084046621e-05, - "loss": 0.349, + "learning_rate": 3.882680924032846e-05, + "loss": 0.2622, "step": 4100500 }, { "epoch": 2.46, - "learning_rate": 3.882470087490565e-05, - "loss": 0.3522, + "learning_rate": 3.882470927476789e-05, + "loss": 0.2672, "step": 4101000 }, { "epoch": 2.46, - "learning_rate": 3.8822600909345085e-05, - "loss": 0.3485, + "learning_rate": 3.8822609309207325e-05, + "loss": 0.2624, "step": 4101500 }, { "epoch": 2.46, - "learning_rate": 3.882050094378452e-05, - "loss": 0.358, + "learning_rate": 3.8820509343646766e-05, + "loss": 0.267, "step": 4102000 }, { "epoch": 2.46, - "learning_rate": 3.881840097822396e-05, - "loss": 0.3685, + "learning_rate": 3.88184093780862e-05, + "loss": 0.2712, "step": 4102500 }, { "epoch": 2.46, - "learning_rate": 3.881630521259451e-05, - "loss": 0.3537, + "learning_rate": 3.881630941252563e-05, + "loss": 0.2689, "step": 4103000 }, { "epoch": 2.46, - "learning_rate": 3.8814205247033946e-05, - "loss": 0.3451, + "learning_rate": 3.881420944696507e-05, + "loss": 0.2656, "step": 4103500 }, { "epoch": 2.46, - "learning_rate": 3.881210528147338e-05, - "loss": 0.3456, + "learning_rate": 3.8812113681335626e-05, + "loss": 0.2664, "step": 4104000 }, { "epoch": 2.46, - "learning_rate": 3.881000531591282e-05, - "loss": 0.3448, + "learning_rate": 3.881001371577506e-05, + "loss": 0.2639, "step": 4104500 }, { "epoch": 2.46, - "learning_rate": 3.880790955028338e-05, - "loss": 0.3594, + "learning_rate": 3.880791375021449e-05, + "loss": 0.2663, "step": 4105000 }, { "epoch": 2.46, - "learning_rate": 3.880580958472281e-05, - "loss": 0.3482, + "learning_rate": 3.8805813784653934e-05, + "loss": 0.269, "step": 4105500 }, { "epoch": 2.46, - "learning_rate": 3.880370961916224e-05, - "loss": 0.3469, + "learning_rate": 3.880371381909337e-05, + "loss": 0.2616, "step": 4106000 }, { "epoch": 2.46, - "learning_rate": 3.880160965360168e-05, - "loss": 0.3476, + "learning_rate": 3.880161805346392e-05, + "loss": 0.2657, "step": 4106500 }, { "epoch": 2.46, - "learning_rate": 3.8799509688041114e-05, - "loss": 0.3494, + "learning_rate": 3.879951808790336e-05, + "loss": 0.2706, "step": 4107000 }, { "epoch": 2.46, - "learning_rate": 3.879740972248055e-05, - "loss": 0.3451, + "learning_rate": 3.8797418122342794e-05, + "loss": 0.261, "step": 4107500 }, { "epoch": 2.46, - "learning_rate": 3.879531395685111e-05, - "loss": 0.3467, + "learning_rate": 3.879531815678223e-05, + "loss": 0.2696, "step": 4108000 }, { "epoch": 2.46, - "learning_rate": 3.879321399129054e-05, - "loss": 0.339, + "learning_rate": 3.879322239115278e-05, + "loss": 0.2654, "step": 4108500 }, { "epoch": 2.46, - "learning_rate": 3.8791114025729975e-05, - "loss": 0.344, + "learning_rate": 3.879112242559222e-05, + "loss": 0.2617, "step": 4109000 }, { "epoch": 2.46, - "learning_rate": 3.8789014060169415e-05, - "loss": 0.3361, + "learning_rate": 3.8789022460031655e-05, + "loss": 0.2627, "step": 4109500 }, { "epoch": 2.46, - "learning_rate": 3.878691409460885e-05, - "loss": 0.3516, + "learning_rate": 3.878692249447109e-05, + "loss": 0.2688, "step": 4110000 }, { "epoch": 2.46, - "learning_rate": 3.878481412904828e-05, - "loss": 0.3557, + "learning_rate": 3.878482252891053e-05, + "loss": 0.2681, "step": 4110500 }, { "epoch": 2.46, - "learning_rate": 3.878271416348772e-05, - "loss": 0.3508, + "learning_rate": 3.878272676328108e-05, + "loss": 0.268, "step": 4111000 }, { "epoch": 2.47, - "learning_rate": 3.8780614197927155e-05, - "loss": 0.3511, + "learning_rate": 3.8780626797720516e-05, + "loss": 0.2645, "step": 4111500 }, { "epoch": 2.47, - "learning_rate": 3.877851843229771e-05, - "loss": 0.3489, + "learning_rate": 3.877852683215995e-05, + "loss": 0.262, "step": 4112000 }, { "epoch": 2.47, - "learning_rate": 3.877641846673714e-05, - "loss": 0.352, + "learning_rate": 3.877643106653051e-05, + "loss": 0.2734, "step": 4112500 }, { "epoch": 2.47, - "learning_rate": 3.8774322701107696e-05, - "loss": 0.3583, + "learning_rate": 3.877433110096994e-05, + "loss": 0.2722, "step": 4113000 }, { "epoch": 2.47, - "learning_rate": 3.8772222735547136e-05, - "loss": 0.3439, + "learning_rate": 3.8772231135409376e-05, + "loss": 0.2653, "step": 4113500 }, { "epoch": 2.47, - "learning_rate": 3.877012276998657e-05, - "loss": 0.3468, + "learning_rate": 3.877013116984882e-05, + "loss": 0.26, "step": 4114000 }, { "epoch": 2.47, - "learning_rate": 3.8768022804426e-05, - "loss": 0.3591, + "learning_rate": 3.876803120428825e-05, + "loss": 0.2696, "step": 4114500 }, { "epoch": 2.47, - "learning_rate": 3.876592283886544e-05, - "loss": 0.3548, + "learning_rate": 3.8765935438658804e-05, + "loss": 0.268, "step": 4115000 }, { "epoch": 2.47, - "learning_rate": 3.8763827073236e-05, - "loss": 0.3488, + "learning_rate": 3.876383547309824e-05, + "loss": 0.2654, "step": 4115500 }, { "epoch": 2.47, - "learning_rate": 3.876172710767543e-05, - "loss": 0.3482, + "learning_rate": 3.876173550753768e-05, + "loss": 0.2676, "step": 4116000 }, { "epoch": 2.47, - "learning_rate": 3.875962714211487e-05, - "loss": 0.3492, + "learning_rate": 3.875963554197711e-05, + "loss": 0.2589, "step": 4116500 }, { "epoch": 2.47, - "learning_rate": 3.8757527176554304e-05, - "loss": 0.3579, + "learning_rate": 3.8757539776347664e-05, + "loss": 0.2726, "step": 4117000 }, { "epoch": 2.47, - "learning_rate": 3.875542721099374e-05, - "loss": 0.359, + "learning_rate": 3.87554398107871e-05, + "loss": 0.2694, "step": 4117500 }, { "epoch": 2.47, - "learning_rate": 3.875333144536429e-05, - "loss": 0.3453, + "learning_rate": 3.875333984522654e-05, + "loss": 0.2632, "step": 4118000 }, { "epoch": 2.47, - "learning_rate": 3.875123147980373e-05, - "loss": 0.3622, + "learning_rate": 3.875123987966597e-05, + "loss": 0.2684, "step": 4118500 }, { "epoch": 2.47, - "learning_rate": 3.8749131514243165e-05, - "loss": 0.3428, + "learning_rate": 3.8749139914105405e-05, + "loss": 0.2679, "step": 4119000 }, { "epoch": 2.47, - "learning_rate": 3.87470315486826e-05, - "loss": 0.3484, + "learning_rate": 3.8747039948544845e-05, + "loss": 0.2625, "step": 4119500 }, { "epoch": 2.47, - "learning_rate": 3.874493158312204e-05, - "loss": 0.3509, + "learning_rate": 3.87449441829154e-05, + "loss": 0.2707, "step": 4120000 }, { "epoch": 2.47, - "learning_rate": 3.874283161756147e-05, - "loss": 0.3435, + "learning_rate": 3.874284421735483e-05, + "loss": 0.2627, "step": 4120500 }, { "epoch": 2.47, - "learning_rate": 3.8740731652000905e-05, - "loss": 0.3415, + "learning_rate": 3.874074425179427e-05, + "loss": 0.2598, "step": 4121000 }, { "epoch": 2.47, - "learning_rate": 3.8738631686440346e-05, - "loss": 0.3513, + "learning_rate": 3.8738644286233706e-05, + "loss": 0.2668, "step": 4121500 }, { "epoch": 2.47, - "learning_rate": 3.873654012074202e-05, - "loss": 0.3418, + "learning_rate": 3.873654432067314e-05, + "loss": 0.2645, "step": 4122000 }, { "epoch": 2.47, - "learning_rate": 3.873444015518145e-05, - "loss": 0.3498, + "learning_rate": 3.873444435511258e-05, + "loss": 0.2675, "step": 4122500 }, { "epoch": 2.47, - "learning_rate": 3.8732340189620886e-05, - "loss": 0.3521, + "learning_rate": 3.873234438955201e-05, + "loss": 0.2657, "step": 4123000 }, { "epoch": 2.47, - "learning_rate": 3.8730240224060327e-05, - "loss": 0.349, + "learning_rate": 3.873024442399145e-05, + "loss": 0.2678, "step": 4123500 }, { "epoch": 2.47, - "learning_rate": 3.872814445843089e-05, - "loss": 0.3503, + "learning_rate": 3.8728148658362e-05, + "loss": 0.2674, "step": 4124000 }, { "epoch": 2.47, - "learning_rate": 3.8726044492870314e-05, - "loss": 0.3621, + "learning_rate": 3.872604869280144e-05, + "loss": 0.2755, "step": 4124500 }, { "epoch": 2.47, - "learning_rate": 3.872394452730975e-05, - "loss": 0.3384, + "learning_rate": 3.8723948727240874e-05, + "loss": 0.263, "step": 4125000 }, { "epoch": 2.47, - "learning_rate": 3.872184456174919e-05, - "loss": 0.3445, + "learning_rate": 3.872184876168031e-05, + "loss": 0.2657, "step": 4125500 }, { "epoch": 2.47, - "learning_rate": 3.871974459618862e-05, - "loss": 0.3412, + "learning_rate": 3.871975299605086e-05, + "loss": 0.2595, "step": 4126000 }, { "epoch": 2.47, - "learning_rate": 3.8717648830559174e-05, - "loss": 0.3571, + "learning_rate": 3.871765723042142e-05, + "loss": 0.27, "step": 4126500 }, { "epoch": 2.47, - "learning_rate": 3.871554886499861e-05, - "loss": 0.3424, + "learning_rate": 3.8715557264860855e-05, + "loss": 0.2665, "step": 4127000 }, { "epoch": 2.47, - "learning_rate": 3.871344889943805e-05, - "loss": 0.3495, + "learning_rate": 3.871345729930029e-05, + "loss": 0.2686, "step": 4127500 }, { "epoch": 2.47, - "learning_rate": 3.871134893387748e-05, - "loss": 0.3533, + "learning_rate": 3.871135733373973e-05, + "loss": 0.2714, "step": 4128000 }, { "epoch": 2.48, - "learning_rate": 3.8709248968316915e-05, - "loss": 0.3523, + "learning_rate": 3.870925736817916e-05, + "loss": 0.2677, "step": 4128500 }, { "epoch": 2.48, - "learning_rate": 3.8707149002756355e-05, - "loss": 0.3502, + "learning_rate": 3.8707157402618595e-05, + "loss": 0.2679, "step": 4129000 }, { "epoch": 2.48, - "learning_rate": 3.870504903719579e-05, - "loss": 0.3465, + "learning_rate": 3.8705057437058036e-05, + "loss": 0.2683, "step": 4129500 }, { "epoch": 2.48, - "learning_rate": 3.870294907163522e-05, - "loss": 0.352, + "learning_rate": 3.870295747149747e-05, + "loss": 0.2684, "step": 4130000 }, { "epoch": 2.48, - "learning_rate": 3.870085330600578e-05, - "loss": 0.3511, + "learning_rate": 3.870086170586802e-05, + "loss": 0.2662, "step": 4130500 }, { "epoch": 2.48, - "learning_rate": 3.869875754037634e-05, - "loss": 0.3493, + "learning_rate": 3.8698761740307456e-05, + "loss": 0.265, "step": 4131000 }, { "epoch": 2.48, - "learning_rate": 3.869665757481577e-05, - "loss": 0.3481, + "learning_rate": 3.8696661774746896e-05, + "loss": 0.261, "step": 4131500 }, { "epoch": 2.48, - "learning_rate": 3.86945576092552e-05, - "loss": 0.351, + "learning_rate": 3.869456180918633e-05, + "loss": 0.2751, "step": 4132000 }, { "epoch": 2.48, - "learning_rate": 3.869245764369464e-05, - "loss": 0.3514, + "learning_rate": 3.869246604355688e-05, + "loss": 0.2648, "step": 4132500 }, { "epoch": 2.48, - "learning_rate": 3.8690357678134077e-05, - "loss": 0.3371, + "learning_rate": 3.869036607799632e-05, + "loss": 0.2584, "step": 4133000 }, { "epoch": 2.48, - "learning_rate": 3.868826191250464e-05, - "loss": 0.3461, + "learning_rate": 3.868827031236688e-05, + "loss": 0.2626, "step": 4133500 }, { "epoch": 2.48, - "learning_rate": 3.8686161946944064e-05, - "loss": 0.341, + "learning_rate": 3.868617034680631e-05, + "loss": 0.2598, "step": 4134000 }, { "epoch": 2.48, - "learning_rate": 3.8684061981383504e-05, - "loss": 0.3473, + "learning_rate": 3.8684070381245744e-05, + "loss": 0.2653, "step": 4134500 }, { "epoch": 2.48, - "learning_rate": 3.868196201582294e-05, - "loss": 0.3405, + "learning_rate": 3.8681970415685184e-05, + "loss": 0.2668, "step": 4135000 }, { "epoch": 2.48, - "learning_rate": 3.86798662501935e-05, - "loss": 0.3531, + "learning_rate": 3.867987045012462e-05, + "loss": 0.2701, "step": 4135500 }, { "epoch": 2.48, - "learning_rate": 3.867776628463293e-05, - "loss": 0.3515, + "learning_rate": 3.867777048456405e-05, + "loss": 0.2653, "step": 4136000 }, { "epoch": 2.48, - "learning_rate": 3.8675666319072365e-05, - "loss": 0.351, + "learning_rate": 3.867567051900349e-05, + "loss": 0.271, "step": 4136500 }, { "epoch": 2.48, - "learning_rate": 3.86735663535118e-05, - "loss": 0.3545, + "learning_rate": 3.8673570553442925e-05, + "loss": 0.2696, "step": 4137000 }, { "epoch": 2.48, - "learning_rate": 3.867146638795124e-05, - "loss": 0.347, + "learning_rate": 3.867147478781348e-05, + "loss": 0.2658, "step": 4137500 }, { "epoch": 2.48, - "learning_rate": 3.866936642239067e-05, - "loss": 0.3408, + "learning_rate": 3.866937482225291e-05, + "loss": 0.2652, "step": 4138000 }, { "epoch": 2.48, - "learning_rate": 3.8667266456830105e-05, - "loss": 0.3529, + "learning_rate": 3.866727485669235e-05, + "loss": 0.2708, "step": 4138500 }, { "epoch": 2.48, - "learning_rate": 3.8665166491269545e-05, - "loss": 0.3439, + "learning_rate": 3.8665174891131786e-05, + "loss": 0.2582, "step": 4139000 }, { "epoch": 2.48, - "learning_rate": 3.86630707256401e-05, - "loss": 0.3503, + "learning_rate": 3.866307492557122e-05, + "loss": 0.2678, "step": 4139500 }, { "epoch": 2.48, - "learning_rate": 3.866097076007953e-05, - "loss": 0.3629, + "learning_rate": 3.866097915994177e-05, + "loss": 0.2705, "step": 4140000 }, { "epoch": 2.48, - "learning_rate": 3.8658870794518966e-05, - "loss": 0.3495, + "learning_rate": 3.865887919438121e-05, + "loss": 0.2676, "step": 4140500 }, { "epoch": 2.48, - "learning_rate": 3.8656770828958406e-05, - "loss": 0.3411, + "learning_rate": 3.8656779228820646e-05, + "loss": 0.2619, "step": 4141000 }, { "epoch": 2.48, - "learning_rate": 3.865467506332896e-05, - "loss": 0.3493, + "learning_rate": 3.865467926326008e-05, + "loss": 0.2682, "step": 4141500 }, { "epoch": 2.48, - "learning_rate": 3.865257509776839e-05, - "loss": 0.3529, + "learning_rate": 3.865258349763064e-05, + "loss": 0.2749, "step": 4142000 }, { "epoch": 2.48, - "learning_rate": 3.865047513220783e-05, - "loss": 0.3481, + "learning_rate": 3.8650483532070074e-05, + "loss": 0.2664, "step": 4142500 }, { "epoch": 2.48, - "learning_rate": 3.864837516664727e-05, - "loss": 0.3588, + "learning_rate": 3.864838356650951e-05, + "loss": 0.271, "step": 4143000 }, { "epoch": 2.48, - "learning_rate": 3.864627940101782e-05, - "loss": 0.3605, + "learning_rate": 3.864628360094895e-05, + "loss": 0.2676, "step": 4143500 }, { "epoch": 2.48, - "learning_rate": 3.8644179435457254e-05, - "loss": 0.3546, + "learning_rate": 3.86441878353195e-05, + "loss": 0.27, "step": 4144000 }, { "epoch": 2.48, - "learning_rate": 3.8642079469896694e-05, - "loss": 0.3513, + "learning_rate": 3.8642087869758934e-05, + "loss": 0.2664, "step": 4144500 }, { "epoch": 2.49, - "learning_rate": 3.863997950433613e-05, - "loss": 0.3533, + "learning_rate": 3.863998790419837e-05, + "loss": 0.2639, "step": 4145000 }, { "epoch": 2.49, - "learning_rate": 3.863787953877556e-05, - "loss": 0.3456, + "learning_rate": 3.863788793863781e-05, + "loss": 0.2642, "step": 4145500 }, { "epoch": 2.49, - "learning_rate": 3.8635779573215e-05, - "loss": 0.3483, + "learning_rate": 3.863579217300836e-05, + "loss": 0.2659, "step": 4146000 }, { "epoch": 2.49, - "learning_rate": 3.8633679607654435e-05, - "loss": 0.3421, + "learning_rate": 3.8633692207447795e-05, + "loss": 0.2645, "step": 4146500 }, { "epoch": 2.49, - "learning_rate": 3.863157964209387e-05, - "loss": 0.3387, + "learning_rate": 3.863159224188723e-05, + "loss": 0.2655, "step": 4147000 }, { "epoch": 2.49, - "learning_rate": 3.862948387646442e-05, - "loss": 0.3518, + "learning_rate": 3.862949227632667e-05, + "loss": 0.27, "step": 4147500 }, { "epoch": 2.49, - "learning_rate": 3.862738391090386e-05, - "loss": 0.35, + "learning_rate": 3.86273923107661e-05, + "loss": 0.2658, "step": 4148000 }, { "epoch": 2.49, - "learning_rate": 3.8625283945343296e-05, - "loss": 0.3491, + "learning_rate": 3.8625296545136656e-05, + "loss": 0.2648, "step": 4148500 }, { "epoch": 2.49, - "learning_rate": 3.862318397978273e-05, - "loss": 0.3437, + "learning_rate": 3.8623196579576096e-05, + "loss": 0.2595, "step": 4149000 }, { "epoch": 2.49, - "learning_rate": 3.862108821415329e-05, - "loss": 0.3447, + "learning_rate": 3.862109661401553e-05, + "loss": 0.2616, "step": 4149500 }, { "epoch": 2.49, - "learning_rate": 3.861898824859272e-05, - "loss": 0.3501, + "learning_rate": 3.861899664845496e-05, + "loss": 0.2661, "step": 4150000 }, { "epoch": 2.49, - "learning_rate": 3.8616888283032156e-05, - "loss": 0.355, + "learning_rate": 3.8616900882825517e-05, + "loss": 0.2719, "step": 4150500 }, { "epoch": 2.49, - "learning_rate": 3.8614788317471596e-05, - "loss": 0.3469, + "learning_rate": 3.861480091726496e-05, + "loss": 0.264, "step": 4151000 }, { "epoch": 2.49, - "learning_rate": 3.861268835191103e-05, - "loss": 0.3474, + "learning_rate": 3.861270095170439e-05, + "loss": 0.2663, "step": 4151500 }, { "epoch": 2.49, - "learning_rate": 3.8610588386350463e-05, - "loss": 0.3464, + "learning_rate": 3.8610600986143824e-05, + "loss": 0.2631, "step": 4152000 }, { "epoch": 2.49, - "learning_rate": 3.860849262072102e-05, - "loss": 0.352, + "learning_rate": 3.860850522051438e-05, + "loss": 0.2716, "step": 4152500 }, { "epoch": 2.49, - "learning_rate": 3.860639265516046e-05, - "loss": 0.3399, + "learning_rate": 3.860640525495382e-05, + "loss": 0.2661, "step": 4153000 }, { "epoch": 2.49, - "learning_rate": 3.860429268959989e-05, - "loss": 0.353, + "learning_rate": 3.860430528939325e-05, + "loss": 0.2666, "step": 4153500 }, { "epoch": 2.49, - "learning_rate": 3.8602192724039324e-05, - "loss": 0.35, + "learning_rate": 3.860220952376381e-05, + "loss": 0.2638, "step": 4154000 }, { "epoch": 2.49, - "learning_rate": 3.8600092758478764e-05, - "loss": 0.3486, + "learning_rate": 3.8600109558203245e-05, + "loss": 0.2676, "step": 4154500 }, { "epoch": 2.49, - "learning_rate": 3.85979927929182e-05, - "loss": 0.3407, + "learning_rate": 3.859800959264268e-05, + "loss": 0.26, "step": 4155000 }, { "epoch": 2.49, - "learning_rate": 3.859589282735763e-05, - "loss": 0.3509, + "learning_rate": 3.859590962708211e-05, + "loss": 0.2672, "step": 4155500 }, { "epoch": 2.49, - "learning_rate": 3.8593792861797065e-05, - "loss": 0.3416, + "learning_rate": 3.859380966152155e-05, + "loss": 0.2637, "step": 4156000 }, { "epoch": 2.49, - "learning_rate": 3.8591697096167625e-05, - "loss": 0.3583, + "learning_rate": 3.8591709695960985e-05, + "loss": 0.2731, "step": 4156500 }, { "epoch": 2.49, - "learning_rate": 3.858959713060706e-05, - "loss": 0.3478, + "learning_rate": 3.858960973040042e-05, + "loss": 0.2657, "step": 4157000 }, { "epoch": 2.49, - "learning_rate": 3.858749716504649e-05, - "loss": 0.359, + "learning_rate": 3.858750976483986e-05, + "loss": 0.2678, "step": 4157500 }, { "epoch": 2.49, - "learning_rate": 3.858539719948593e-05, - "loss": 0.347, + "learning_rate": 3.858541399921041e-05, + "loss": 0.2657, "step": 4158000 }, { "epoch": 2.49, - "learning_rate": 3.8583305633787606e-05, - "loss": 0.3456, + "learning_rate": 3.8583314033649846e-05, + "loss": 0.263, "step": 4158500 }, { "epoch": 2.49, - "learning_rate": 3.858120566822704e-05, - "loss": 0.3453, + "learning_rate": 3.858121406808928e-05, + "loss": 0.269, "step": 4159000 }, { "epoch": 2.49, - "learning_rate": 3.857910570266647e-05, - "loss": 0.351, + "learning_rate": 3.857911410252872e-05, + "loss": 0.2695, "step": 4159500 }, { "epoch": 2.49, - "learning_rate": 3.857700573710591e-05, - "loss": 0.3529, + "learning_rate": 3.857701413696815e-05, + "loss": 0.2647, "step": 4160000 }, { "epoch": 2.49, - "learning_rate": 3.8574905771545347e-05, - "loss": 0.348, + "learning_rate": 3.857491417140759e-05, + "loss": 0.264, "step": 4160500 }, { "epoch": 2.49, - "learning_rate": 3.857280580598478e-05, - "loss": 0.3483, + "learning_rate": 3.857281420584703e-05, + "loss": 0.2645, "step": 4161000 }, { "epoch": 2.49, - "learning_rate": 3.8570710040355334e-05, - "loss": 0.3484, + "learning_rate": 3.857071424028646e-05, + "loss": 0.2655, "step": 4161500 }, { "epoch": 2.5, - "learning_rate": 3.8568610074794774e-05, - "loss": 0.3526, + "learning_rate": 3.8568618474657014e-05, + "loss": 0.2646, "step": 4162000 }, { "epoch": 2.5, - "learning_rate": 3.856651010923421e-05, - "loss": 0.3579, + "learning_rate": 3.8566518509096454e-05, + "loss": 0.2701, "step": 4162500 }, { "epoch": 2.5, - "learning_rate": 3.856441014367364e-05, - "loss": 0.3434, + "learning_rate": 3.856441854353589e-05, + "loss": 0.2636, "step": 4163000 }, { "epoch": 2.5, - "learning_rate": 3.85623143780442e-05, - "loss": 0.351, + "learning_rate": 3.856231857797532e-05, + "loss": 0.2674, "step": 4163500 }, { "epoch": 2.5, - "learning_rate": 3.8560214412483635e-05, - "loss": 0.3491, + "learning_rate": 3.856021861241476e-05, + "loss": 0.2647, "step": 4164000 }, { "epoch": 2.5, - "learning_rate": 3.855811444692307e-05, - "loss": 0.3535, + "learning_rate": 3.8558122846785315e-05, + "loss": 0.2681, "step": 4164500 }, { "epoch": 2.5, - "learning_rate": 3.855601448136251e-05, - "loss": 0.3515, + "learning_rate": 3.855602288122475e-05, + "loss": 0.267, "step": 4165000 }, { "epoch": 2.5, - "learning_rate": 3.855391451580194e-05, - "loss": 0.3403, + "learning_rate": 3.855392291566418e-05, + "loss": 0.2679, "step": 4165500 }, { "epoch": 2.5, - "learning_rate": 3.8551818750172495e-05, - "loss": 0.3585, + "learning_rate": 3.8551827150034735e-05, + "loss": 0.2737, "step": 4166000 }, { "epoch": 2.5, - "learning_rate": 3.854971878461193e-05, - "loss": 0.3485, + "learning_rate": 3.8549727184474176e-05, + "loss": 0.2686, "step": 4166500 }, { "epoch": 2.5, - "learning_rate": 3.854761881905137e-05, - "loss": 0.3415, + "learning_rate": 3.854762721891361e-05, + "loss": 0.2657, "step": 4167000 }, { "epoch": 2.5, - "learning_rate": 3.85455188534908e-05, - "loss": 0.3482, + "learning_rate": 3.854552725335304e-05, + "loss": 0.2668, "step": 4167500 }, { "epoch": 2.5, - "learning_rate": 3.8543418887930236e-05, - "loss": 0.3458, + "learning_rate": 3.854342728779248e-05, + "loss": 0.2664, "step": 4168000 }, { "epoch": 2.5, - "learning_rate": 3.8541318922369676e-05, - "loss": 0.3513, + "learning_rate": 3.8541327322231916e-05, + "loss": 0.2688, "step": 4168500 }, { "epoch": 2.5, - "learning_rate": 3.853921895680911e-05, - "loss": 0.3477, + "learning_rate": 3.853922735667135e-05, + "loss": 0.2667, "step": 4169000 }, { "epoch": 2.5, - "learning_rate": 3.853711899124854e-05, - "loss": 0.3475, + "learning_rate": 3.853712739111078e-05, + "loss": 0.2676, "step": 4169500 }, { "epoch": 2.5, - "learning_rate": 3.8535023225619097e-05, - "loss": 0.3497, + "learning_rate": 3.8535031625481344e-05, + "loss": 0.2651, "step": 4170000 }, { "epoch": 2.5, - "learning_rate": 3.853292326005854e-05, - "loss": 0.3488, + "learning_rate": 3.853293165992078e-05, + "loss": 0.2644, "step": 4170500 }, { "epoch": 2.5, - "learning_rate": 3.853082749442909e-05, - "loss": 0.3408, + "learning_rate": 3.853083589429133e-05, + "loss": 0.269, "step": 4171000 }, { "epoch": 2.5, - "learning_rate": 3.8528727528868524e-05, - "loss": 0.3482, + "learning_rate": 3.852873592873077e-05, + "loss": 0.2699, "step": 4171500 }, { "epoch": 2.5, - "learning_rate": 3.852663176323908e-05, - "loss": 0.3481, + "learning_rate": 3.8526635963170204e-05, + "loss": 0.2671, "step": 4172000 }, { "epoch": 2.5, - "learning_rate": 3.852453179767852e-05, - "loss": 0.339, + "learning_rate": 3.852453599760964e-05, + "loss": 0.2576, "step": 4172500 }, { "epoch": 2.5, - "learning_rate": 3.852243183211795e-05, - "loss": 0.3548, + "learning_rate": 3.852243603204908e-05, + "loss": 0.2661, "step": 4173000 }, { "epoch": 2.5, - "learning_rate": 3.8520331866557385e-05, - "loss": 0.3488, + "learning_rate": 3.852033606648851e-05, + "loss": 0.2714, "step": 4173500 }, { "epoch": 2.5, - "learning_rate": 3.8518231900996825e-05, - "loss": 0.3461, + "learning_rate": 3.8518236100927945e-05, + "loss": 0.2649, "step": 4174000 }, { "epoch": 2.5, - "learning_rate": 3.851613193543626e-05, - "loss": 0.3471, + "learning_rate": 3.851613613536738e-05, + "loss": 0.2616, "step": 4174500 }, { "epoch": 2.5, - "learning_rate": 3.851403196987569e-05, - "loss": 0.3455, + "learning_rate": 3.851404036973794e-05, + "loss": 0.2632, "step": 4175000 }, { "epoch": 2.5, - "learning_rate": 3.851193200431513e-05, - "loss": 0.3463, + "learning_rate": 3.851194040417737e-05, + "loss": 0.2654, "step": 4175500 }, { "epoch": 2.5, - "learning_rate": 3.8509832038754565e-05, - "loss": 0.3397, + "learning_rate": 3.8509840438616806e-05, + "loss": 0.2601, "step": 4176000 }, { "epoch": 2.5, - "learning_rate": 3.8507732073194e-05, - "loss": 0.3497, + "learning_rate": 3.8507740473056246e-05, + "loss": 0.2664, "step": 4176500 }, { "epoch": 2.5, - "learning_rate": 3.850563210763344e-05, - "loss": 0.3488, + "learning_rate": 3.85056447074268e-05, + "loss": 0.261, "step": 4177000 }, { "epoch": 2.5, - "learning_rate": 3.8503532142072866e-05, - "loss": 0.3468, + "learning_rate": 3.850354474186623e-05, + "loss": 0.2676, "step": 4177500 }, { "epoch": 2.5, - "learning_rate": 3.8501436376443426e-05, - "loss": 0.3513, + "learning_rate": 3.850144477630567e-05, + "loss": 0.2719, "step": 4178000 }, { "epoch": 2.51, - "learning_rate": 3.849933641088286e-05, - "loss": 0.3525, + "learning_rate": 3.849934901067623e-05, + "loss": 0.2628, "step": 4178500 }, { "epoch": 2.51, - "learning_rate": 3.849724064525342e-05, - "loss": 0.3514, + "learning_rate": 3.849724904511566e-05, + "loss": 0.2667, "step": 4179000 }, { "epoch": 2.51, - "learning_rate": 3.8495140679692853e-05, - "loss": 0.3508, + "learning_rate": 3.8495149079555094e-05, + "loss": 0.2679, "step": 4179500 }, { "epoch": 2.51, - "learning_rate": 3.849304071413229e-05, - "loss": 0.3535, + "learning_rate": 3.8493049113994534e-05, + "loss": 0.2646, "step": 4180000 }, { "epoch": 2.51, - "learning_rate": 3.849094074857173e-05, - "loss": 0.3488, + "learning_rate": 3.849094914843397e-05, + "loss": 0.2642, "step": 4180500 }, { "epoch": 2.51, - "learning_rate": 3.848884078301116e-05, - "loss": 0.3414, + "learning_rate": 3.84888491828734e-05, + "loss": 0.266, "step": 4181000 }, { "epoch": 2.51, - "learning_rate": 3.8486740817450594e-05, - "loss": 0.3519, + "learning_rate": 3.8486749217312834e-05, + "loss": 0.2614, "step": 4181500 }, { "epoch": 2.51, - "learning_rate": 3.8484640851890034e-05, - "loss": 0.3395, + "learning_rate": 3.848464925175227e-05, + "loss": 0.2607, "step": 4182000 }, { "epoch": 2.51, - "learning_rate": 3.848254508626059e-05, - "loss": 0.3391, + "learning_rate": 3.848255348612283e-05, + "loss": 0.2587, "step": 4182500 }, { "epoch": 2.51, - "learning_rate": 3.848044512070002e-05, - "loss": 0.3499, + "learning_rate": 3.848045772049338e-05, + "loss": 0.2673, "step": 4183000 }, { "epoch": 2.51, - "learning_rate": 3.8478345155139455e-05, - "loss": 0.3456, + "learning_rate": 3.8478361954863935e-05, + "loss": 0.2687, "step": 4183500 }, { "epoch": 2.51, - "learning_rate": 3.8476245189578895e-05, - "loss": 0.3271, + "learning_rate": 3.8476261989303375e-05, + "loss": 0.2583, "step": 4184000 }, { "epoch": 2.51, - "learning_rate": 3.847414522401832e-05, - "loss": 0.3543, + "learning_rate": 3.847416202374281e-05, + "loss": 0.2682, "step": 4184500 }, { "epoch": 2.51, - "learning_rate": 3.8472045258457755e-05, - "loss": 0.3582, + "learning_rate": 3.847206205818224e-05, + "loss": 0.2692, "step": 4185000 }, { "epoch": 2.51, - "learning_rate": 3.8469949492828316e-05, - "loss": 0.3376, + "learning_rate": 3.846996209262168e-05, + "loss": 0.2636, "step": 4185500 }, { "epoch": 2.51, - "learning_rate": 3.8467849527267756e-05, - "loss": 0.3441, + "learning_rate": 3.8467862127061116e-05, + "loss": 0.2669, "step": 4186000 }, { "epoch": 2.51, - "learning_rate": 3.846574956170719e-05, - "loss": 0.3506, + "learning_rate": 3.846576216150055e-05, + "loss": 0.2722, "step": 4186500 }, { "epoch": 2.51, - "learning_rate": 3.846364959614662e-05, - "loss": 0.3519, + "learning_rate": 3.846366219593999e-05, + "loss": 0.2684, "step": 4187000 }, { "epoch": 2.51, - "learning_rate": 3.8461549630586056e-05, - "loss": 0.3438, + "learning_rate": 3.846156223037942e-05, + "loss": 0.2636, "step": 4187500 }, { "epoch": 2.51, - "learning_rate": 3.845944966502549e-05, - "loss": 0.3498, + "learning_rate": 3.845946226481886e-05, + "loss": 0.2677, "step": 4188000 }, { "epoch": 2.51, - "learning_rate": 3.845734969946493e-05, - "loss": 0.3539, + "learning_rate": 3.845736229925829e-05, + "loss": 0.273, "step": 4188500 }, { "epoch": 2.51, - "learning_rate": 3.845525393383549e-05, - "loss": 0.3474, + "learning_rate": 3.8455262333697724e-05, + "loss": 0.269, "step": 4189000 }, { "epoch": 2.51, - "learning_rate": 3.845315396827492e-05, - "loss": 0.3577, + "learning_rate": 3.845316236813716e-05, + "loss": 0.2714, "step": 4189500 }, { "epoch": 2.51, - "learning_rate": 3.845105400271435e-05, - "loss": 0.3466, + "learning_rate": 3.845106660250772e-05, + "loss": 0.2649, "step": 4190000 }, { "epoch": 2.51, - "learning_rate": 3.844895403715379e-05, - "loss": 0.3405, + "learning_rate": 3.844897083687828e-05, + "loss": 0.2642, "step": 4190500 }, { "epoch": 2.51, - "learning_rate": 3.844685827152435e-05, - "loss": 0.3478, + "learning_rate": 3.844687087131771e-05, + "loss": 0.2666, "step": 4191000 }, { "epoch": 2.51, - "learning_rate": 3.8444758305963784e-05, - "loss": 0.3477, + "learning_rate": 3.8444770905757145e-05, + "loss": 0.2662, "step": 4191500 }, { "epoch": 2.51, - "learning_rate": 3.844265834040321e-05, - "loss": 0.3384, + "learning_rate": 3.8442670940196585e-05, + "loss": 0.26, "step": 4192000 }, { "epoch": 2.51, - "learning_rate": 3.844055837484265e-05, - "loss": 0.3461, + "learning_rate": 3.844057097463602e-05, + "loss": 0.2617, "step": 4192500 }, { "epoch": 2.51, - "learning_rate": 3.8438458409282085e-05, - "loss": 0.3404, + "learning_rate": 3.843847100907545e-05, + "loss": 0.265, "step": 4193000 }, { "epoch": 2.51, - "learning_rate": 3.843635844372152e-05, - "loss": 0.3481, + "learning_rate": 3.8436371043514885e-05, + "loss": 0.2703, "step": 4193500 }, { "epoch": 2.51, - "learning_rate": 3.843426267809208e-05, - "loss": 0.3435, + "learning_rate": 3.843427107795432e-05, + "loss": 0.2665, "step": 4194000 }, { "epoch": 2.51, - "learning_rate": 3.843216271253151e-05, - "loss": 0.3519, + "learning_rate": 3.843217531232488e-05, + "loss": 0.2646, "step": 4194500 }, { "epoch": 2.52, - "learning_rate": 3.8430062746970945e-05, - "loss": 0.3579, + "learning_rate": 3.843007954669543e-05, + "loss": 0.2671, "step": 4195000 }, { "epoch": 2.52, - "learning_rate": 3.8427962781410386e-05, - "loss": 0.3392, + "learning_rate": 3.8427979581134866e-05, + "loss": 0.2607, "step": 4195500 }, { "epoch": 2.52, - "learning_rate": 3.842586281584982e-05, - "loss": 0.3491, + "learning_rate": 3.8425879615574306e-05, + "loss": 0.2669, "step": 4196000 }, { "epoch": 2.52, - "learning_rate": 3.842376285028925e-05, - "loss": 0.3439, + "learning_rate": 3.842377965001374e-05, + "loss": 0.2582, "step": 4196500 }, { "epoch": 2.52, - "learning_rate": 3.8421667084659806e-05, - "loss": 0.354, + "learning_rate": 3.842167968445317e-05, + "loss": 0.2643, "step": 4197000 }, { "epoch": 2.52, - "learning_rate": 3.8419567119099246e-05, - "loss": 0.3542, + "learning_rate": 3.8419579718892614e-05, + "loss": 0.2654, "step": 4197500 }, { "epoch": 2.52, - "learning_rate": 3.841746715353868e-05, - "loss": 0.3434, + "learning_rate": 3.841747975333205e-05, + "loss": 0.2627, "step": 4198000 }, { "epoch": 2.52, - "learning_rate": 3.841537138790924e-05, - "loss": 0.3564, + "learning_rate": 3.841537978777148e-05, + "loss": 0.2676, "step": 4198500 }, { "epoch": 2.52, - "learning_rate": 3.841327142234867e-05, - "loss": 0.3387, + "learning_rate": 3.841328402214204e-05, + "loss": 0.2575, "step": 4199000 }, { "epoch": 2.52, - "learning_rate": 3.841117145678811e-05, - "loss": 0.3438, + "learning_rate": 3.8411188256512594e-05, + "loss": 0.269, "step": 4199500 }, { "epoch": 2.52, - "learning_rate": 3.840907149122754e-05, - "loss": 0.344, + "learning_rate": 3.840908829095203e-05, + "loss": 0.2604, "step": 4200000 }, { "epoch": 2.52, - "eval_loss": 0.33738669753074646, - "eval_runtime": 1113.5514, - "eval_samples_per_second": 473.009, - "eval_steps_per_second": 78.835, + "eval_loss": 0.2439488023519516, + "eval_runtime": 1437.9082, + "eval_samples_per_second": 366.31, + "eval_steps_per_second": 61.052, "step": 4200000 }, { "epoch": 2.52, - "learning_rate": 3.8406971525666974e-05, - "loss": 0.3491, + "learning_rate": 3.840698832539146e-05, + "loss": 0.2626, "step": 4200500 }, { "epoch": 2.52, - "learning_rate": 3.8404871560106414e-05, - "loss": 0.3482, + "learning_rate": 3.84048883598309e-05, + "loss": 0.2645, "step": 4201000 }, { "epoch": 2.52, - "learning_rate": 3.840277159454585e-05, - "loss": 0.3563, + "learning_rate": 3.8402788394270335e-05, + "loss": 0.2661, "step": 4201500 }, { "epoch": 2.52, - "learning_rate": 3.840067162898529e-05, - "loss": 0.3503, + "learning_rate": 3.840068842870977e-05, + "loss": 0.2626, "step": 4202000 }, { "epoch": 2.52, - "learning_rate": 3.839857166342472e-05, - "loss": 0.3481, + "learning_rate": 3.839859266308032e-05, + "loss": 0.2616, "step": 4202500 }, { "epoch": 2.52, - "learning_rate": 3.8396475897795275e-05, - "loss": 0.341, + "learning_rate": 3.839649269751976e-05, + "loss": 0.2621, "step": 4203000 }, { "epoch": 2.52, - "learning_rate": 3.839437593223471e-05, - "loss": 0.3478, + "learning_rate": 3.8394392731959196e-05, + "loss": 0.2664, "step": 4203500 }, { "epoch": 2.52, - "learning_rate": 3.839227596667415e-05, - "loss": 0.3421, + "learning_rate": 3.839229276639863e-05, + "loss": 0.2614, "step": 4204000 }, { "epoch": 2.52, - "learning_rate": 3.83901802010447e-05, - "loss": 0.3565, + "learning_rate": 3.839019280083807e-05, + "loss": 0.2594, "step": 4204500 }, { "epoch": 2.52, - "learning_rate": 3.8388080235484136e-05, - "loss": 0.3396, + "learning_rate": 3.83880928352775e-05, + "loss": 0.2644, "step": 4205000 }, { "epoch": 2.52, - "learning_rate": 3.838598026992357e-05, - "loss": 0.3424, + "learning_rate": 3.8385992869716936e-05, + "loss": 0.2636, "step": 4205500 }, { "epoch": 2.52, - "learning_rate": 3.838388030436301e-05, - "loss": 0.3539, + "learning_rate": 3.838389290415637e-05, + "loss": 0.265, "step": 4206000 }, { "epoch": 2.52, - "learning_rate": 3.838178033880244e-05, - "loss": 0.3472, + "learning_rate": 3.838179713852693e-05, + "loss": 0.2638, "step": 4206500 }, { "epoch": 2.52, - "learning_rate": 3.8379680373241876e-05, - "loss": 0.343, + "learning_rate": 3.8379697172966364e-05, + "loss": 0.2607, "step": 4207000 }, { "epoch": 2.52, - "learning_rate": 3.8377580407681317e-05, - "loss": 0.3504, + "learning_rate": 3.837760140733692e-05, + "loss": 0.2676, "step": 4207500 }, { "epoch": 2.52, - "learning_rate": 3.837548044212075e-05, - "loss": 0.3562, + "learning_rate": 3.837550144177636e-05, + "loss": 0.2729, "step": 4208000 }, { "epoch": 2.52, - "learning_rate": 3.8373380476560184e-05, - "loss": 0.3525, + "learning_rate": 3.837340147621579e-05, + "loss": 0.2703, "step": 4208500 }, { "epoch": 2.52, - "learning_rate": 3.8371280510999624e-05, - "loss": 0.3439, + "learning_rate": 3.8371301510655224e-05, + "loss": 0.2631, "step": 4209000 }, { "epoch": 2.52, - "learning_rate": 3.836918054543905e-05, - "loss": 0.3477, + "learning_rate": 3.8369201545094665e-05, + "loss": 0.2634, "step": 4209500 }, { "epoch": 2.52, - "learning_rate": 3.836708057987849e-05, - "loss": 0.3506, + "learning_rate": 3.836710157953409e-05, + "loss": 0.2643, "step": 4210000 }, { "epoch": 2.52, - "learning_rate": 3.836498481424905e-05, - "loss": 0.3509, + "learning_rate": 3.8365001613973525e-05, + "loss": 0.2663, "step": 4210500 }, { "epoch": 2.52, - "learning_rate": 3.8362884848688484e-05, - "loss": 0.3588, + "learning_rate": 3.8362905848344085e-05, + "loss": 0.2671, "step": 4211000 }, { "epoch": 2.52, - "learning_rate": 3.836078908305904e-05, - "loss": 0.345, + "learning_rate": 3.8360805882783525e-05, + "loss": 0.2603, "step": 4211500 }, { "epoch": 2.53, - "learning_rate": 3.835868911749847e-05, - "loss": 0.3386, + "learning_rate": 3.835870591722296e-05, + "loss": 0.2641, "step": 4212000 }, { "epoch": 2.53, - "learning_rate": 3.835658915193791e-05, - "loss": 0.3497, + "learning_rate": 3.835660595166239e-05, + "loss": 0.2679, "step": 4212500 }, { "epoch": 2.53, - "learning_rate": 3.8354489186377345e-05, - "loss": 0.3456, + "learning_rate": 3.8354505986101826e-05, + "loss": 0.2622, "step": 4213000 }, { "epoch": 2.53, - "learning_rate": 3.835238922081678e-05, - "loss": 0.3459, + "learning_rate": 3.835240602054126e-05, + "loss": 0.2612, "step": 4213500 }, { "epoch": 2.53, - "learning_rate": 3.835028925525621e-05, - "loss": 0.3447, + "learning_rate": 3.83503060549807e-05, + "loss": 0.2668, "step": 4214000 }, { "epoch": 2.53, - "learning_rate": 3.834819348962677e-05, - "loss": 0.3483, + "learning_rate": 3.834820608942013e-05, + "loss": 0.2638, "step": 4214500 }, { "epoch": 2.53, - "learning_rate": 3.8346093524066206e-05, - "loss": 0.3482, + "learning_rate": 3.8346110323790686e-05, + "loss": 0.2637, "step": 4215000 }, { "epoch": 2.53, - "learning_rate": 3.834399355850564e-05, - "loss": 0.3517, + "learning_rate": 3.834401035823012e-05, + "loss": 0.2663, "step": 4215500 }, { "epoch": 2.53, - "learning_rate": 3.834189359294508e-05, - "loss": 0.3504, + "learning_rate": 3.834191459260068e-05, + "loss": 0.2698, "step": 4216000 }, { "epoch": 2.53, - "learning_rate": 3.8339793627384506e-05, - "loss": 0.3465, + "learning_rate": 3.833981462704012e-05, + "loss": 0.2665, "step": 4216500 }, { "epoch": 2.53, - "learning_rate": 3.8337693661823947e-05, - "loss": 0.3481, + "learning_rate": 3.8337714661479554e-05, + "loss": 0.2667, "step": 4217000 }, { "epoch": 2.53, - "learning_rate": 3.833559789619451e-05, - "loss": 0.3425, + "learning_rate": 3.833561469591898e-05, + "loss": 0.2618, "step": 4217500 }, { "epoch": 2.53, - "learning_rate": 3.833349793063394e-05, - "loss": 0.3462, + "learning_rate": 3.833351473035842e-05, + "loss": 0.2652, "step": 4218000 }, { "epoch": 2.53, - "learning_rate": 3.8331397965073374e-05, - "loss": 0.3521, + "learning_rate": 3.8331414764797854e-05, + "loss": 0.2637, "step": 4218500 }, { "epoch": 2.53, - "learning_rate": 3.832929799951281e-05, - "loss": 0.3527, + "learning_rate": 3.832931479923729e-05, + "loss": 0.2667, "step": 4219000 }, { "epoch": 2.53, - "learning_rate": 3.832719803395224e-05, - "loss": 0.3472, + "learning_rate": 3.832721483367673e-05, + "loss": 0.2707, "step": 4219500 }, { "epoch": 2.53, - "learning_rate": 3.8325098068391674e-05, - "loss": 0.3374, + "learning_rate": 3.832511906804728e-05, + "loss": 0.26, "step": 4220000 }, { "epoch": 2.53, - "learning_rate": 3.8323002302762235e-05, - "loss": 0.3397, + "learning_rate": 3.8323019102486715e-05, + "loss": 0.2616, "step": 4220500 }, { "epoch": 2.53, - "learning_rate": 3.832090233720167e-05, - "loss": 0.3447, + "learning_rate": 3.8320919136926155e-05, + "loss": 0.26, "step": 4221000 }, { "epoch": 2.53, - "learning_rate": 3.83188023716411e-05, - "loss": 0.3496, + "learning_rate": 3.831881917136559e-05, + "loss": 0.2685, "step": 4221500 }, { "epoch": 2.53, - "learning_rate": 3.8316702406080535e-05, - "loss": 0.3456, + "learning_rate": 3.831672340573614e-05, + "loss": 0.2614, "step": 4222000 }, { "epoch": 2.53, - "learning_rate": 3.8314606640451095e-05, - "loss": 0.3415, + "learning_rate": 3.83146276401067e-05, + "loss": 0.2612, "step": 4222500 }, { "epoch": 2.53, - "learning_rate": 3.8312506674890536e-05, - "loss": 0.35, + "learning_rate": 3.8312527674546136e-05, + "loss": 0.2681, "step": 4223000 }, { "epoch": 2.53, - "learning_rate": 3.831040670932996e-05, - "loss": 0.3404, + "learning_rate": 3.8310427708985576e-05, + "loss": 0.2596, "step": 4223500 }, { "epoch": 2.53, - "learning_rate": 3.83083067437694e-05, - "loss": 0.3444, + "learning_rate": 3.830832774342501e-05, + "loss": 0.2615, "step": 4224000 }, { "epoch": 2.53, - "learning_rate": 3.8306206778208836e-05, - "loss": 0.3483, + "learning_rate": 3.8306227777864436e-05, + "loss": 0.2664, "step": 4224500 }, { "epoch": 2.53, - "learning_rate": 3.830410681264827e-05, - "loss": 0.3481, + "learning_rate": 3.830412781230388e-05, + "loss": 0.2654, "step": 4225000 }, { "epoch": 2.53, - "learning_rate": 3.830200684708771e-05, - "loss": 0.3377, + "learning_rate": 3.830202784674331e-05, + "loss": 0.2638, "step": 4225500 }, { "epoch": 2.53, - "learning_rate": 3.829990688152714e-05, - "loss": 0.3435, + "learning_rate": 3.8299927881182744e-05, + "loss": 0.2628, "step": 4226000 }, { "epoch": 2.53, - "learning_rate": 3.82978111158977e-05, - "loss": 0.35, + "learning_rate": 3.8297827915622184e-05, + "loss": 0.2684, "step": 4226500 }, { "epoch": 2.53, - "learning_rate": 3.829571115033713e-05, - "loss": 0.344, + "learning_rate": 3.8295736349923864e-05, + "loss": 0.2661, "step": 4227000 }, { "epoch": 2.53, - "learning_rate": 3.829361118477657e-05, - "loss": 0.3436, + "learning_rate": 3.82936363843633e-05, + "loss": 0.2563, "step": 4227500 }, { "epoch": 2.53, - "learning_rate": 3.8291511219216004e-05, - "loss": 0.3491, + "learning_rate": 3.829153641880273e-05, + "loss": 0.2669, "step": 4228000 }, { "epoch": 2.54, - "learning_rate": 3.828941125365544e-05, - "loss": 0.3444, + "learning_rate": 3.828943645324217e-05, + "loss": 0.2618, "step": 4228500 }, { "epoch": 2.54, - "learning_rate": 3.828731548802599e-05, - "loss": 0.3457, + "learning_rate": 3.82873364876816e-05, + "loss": 0.2708, "step": 4229000 }, { "epoch": 2.54, - "learning_rate": 3.828521552246543e-05, - "loss": 0.3448, + "learning_rate": 3.828523652212103e-05, + "loss": 0.2607, "step": 4229500 }, { "epoch": 2.54, - "learning_rate": 3.8283115556904865e-05, - "loss": 0.3517, + "learning_rate": 3.828313655656047e-05, + "loss": 0.2639, "step": 4230000 }, { "epoch": 2.54, - "learning_rate": 3.82810155913443e-05, - "loss": 0.3417, + "learning_rate": 3.828104079093103e-05, + "loss": 0.2637, "step": 4230500 }, { "epoch": 2.54, - "learning_rate": 3.827891982571486e-05, - "loss": 0.3467, + "learning_rate": 3.8278940825370466e-05, + "loss": 0.2659, "step": 4231000 }, { "epoch": 2.54, - "learning_rate": 3.827681986015429e-05, - "loss": 0.3405, + "learning_rate": 3.827684085980989e-05, + "loss": 0.2584, "step": 4231500 }, { "epoch": 2.54, - "learning_rate": 3.8274719894593725e-05, - "loss": 0.3512, + "learning_rate": 3.827474089424933e-05, + "loss": 0.2646, "step": 4232000 }, { "epoch": 2.54, - "learning_rate": 3.8272619929033165e-05, - "loss": 0.3388, + "learning_rate": 3.8272640928688766e-05, + "loss": 0.2628, "step": 4232500 }, { "epoch": 2.54, - "learning_rate": 3.82705199634726e-05, - "loss": 0.3527, + "learning_rate": 3.82705409631282e-05, + "loss": 0.2654, "step": 4233000 }, { "epoch": 2.54, - "learning_rate": 3.826841999791203e-05, - "loss": 0.3484, + "learning_rate": 3.826844099756764e-05, + "loss": 0.2668, "step": 4233500 }, { "epoch": 2.54, - "learning_rate": 3.826632003235147e-05, - "loss": 0.343, + "learning_rate": 3.826634103200707e-05, + "loss": 0.2641, "step": 4234000 }, { "epoch": 2.54, - "learning_rate": 3.8264220066790906e-05, - "loss": 0.348, + "learning_rate": 3.826424526637763e-05, + "loss": 0.2667, "step": 4234500 }, { "epoch": 2.54, - "learning_rate": 3.826212430116146e-05, - "loss": 0.3459, + "learning_rate": 3.826214530081707e-05, + "loss": 0.2662, "step": 4235000 }, { "epoch": 2.54, - "learning_rate": 3.826002433560089e-05, - "loss": 0.3482, + "learning_rate": 3.82600453352565e-05, + "loss": 0.265, "step": 4235500 }, { "epoch": 2.54, - "learning_rate": 3.825792856997145e-05, - "loss": 0.35, + "learning_rate": 3.8257945369695934e-05, + "loss": 0.2632, "step": 4236000 }, { "epoch": 2.54, - "learning_rate": 3.825582860441089e-05, - "loss": 0.3523, + "learning_rate": 3.825584960406649e-05, + "loss": 0.2675, "step": 4236500 }, { "epoch": 2.54, - "learning_rate": 3.825372863885032e-05, - "loss": 0.3416, + "learning_rate": 3.825374963850593e-05, + "loss": 0.2633, "step": 4237000 }, { "epoch": 2.54, - "learning_rate": 3.8251628673289754e-05, - "loss": 0.3435, + "learning_rate": 3.825164967294536e-05, + "loss": 0.2623, "step": 4237500 }, { "epoch": 2.54, - "learning_rate": 3.8249528707729194e-05, - "loss": 0.3433, + "learning_rate": 3.8249549707384795e-05, + "loss": 0.2674, "step": 4238000 }, { "epoch": 2.54, - "learning_rate": 3.824742874216863e-05, - "loss": 0.3508, + "learning_rate": 3.8247453941755355e-05, + "loss": 0.2654, "step": 4238500 }, { "epoch": 2.54, - "learning_rate": 3.824532877660806e-05, - "loss": 0.3492, + "learning_rate": 3.824535817612591e-05, + "loss": 0.2627, "step": 4239000 }, { "epoch": 2.54, - "learning_rate": 3.82432288110475e-05, - "loss": 0.351, + "learning_rate": 3.824325821056535e-05, + "loss": 0.2669, "step": 4239500 }, { "epoch": 2.54, - "learning_rate": 3.8241133045418055e-05, - "loss": 0.3567, + "learning_rate": 3.824115824500478e-05, + "loss": 0.2665, "step": 4240000 }, { "epoch": 2.54, - "learning_rate": 3.823903307985749e-05, - "loss": 0.3543, + "learning_rate": 3.823905827944422e-05, + "loss": 0.2684, "step": 4240500 }, { "epoch": 2.54, - "learning_rate": 3.823693311429693e-05, - "loss": 0.3481, + "learning_rate": 3.823695831388365e-05, + "loss": 0.2662, "step": 4241000 }, { "epoch": 2.54, - "learning_rate": 3.823483314873636e-05, - "loss": 0.3479, + "learning_rate": 3.823485834832308e-05, + "loss": 0.265, "step": 4241500 }, { "epoch": 2.54, - "learning_rate": 3.8232737383106916e-05, - "loss": 0.3496, + "learning_rate": 3.823275838276252e-05, + "loss": 0.2616, "step": 4242000 }, { "epoch": 2.54, - "learning_rate": 3.823063741754635e-05, - "loss": 0.3502, + "learning_rate": 3.8230658417201956e-05, + "loss": 0.2623, "step": 4242500 }, { "epoch": 2.54, - "learning_rate": 3.822853745198579e-05, - "loss": 0.351, + "learning_rate": 3.822855845164139e-05, + "loss": 0.2678, "step": 4243000 }, { "epoch": 2.54, - "learning_rate": 3.822643748642522e-05, - "loss": 0.3419, + "learning_rate": 3.822646688594307e-05, + "loss": 0.2608, "step": 4243500 }, { "epoch": 2.54, - "learning_rate": 3.8224341720795776e-05, - "loss": 0.3437, + "learning_rate": 3.8224366920382504e-05, + "loss": 0.2641, "step": 4244000 }, { "epoch": 2.54, - "learning_rate": 3.822224175523521e-05, - "loss": 0.3598, + "learning_rate": 3.8222266954821944e-05, + "loss": 0.2692, "step": 4244500 }, { "epoch": 2.55, - "learning_rate": 3.822014178967465e-05, - "loss": 0.3513, + "learning_rate": 3.822016698926138e-05, + "loss": 0.2679, "step": 4245000 }, { "epoch": 2.55, - "learning_rate": 3.8218041824114083e-05, - "loss": 0.3567, + "learning_rate": 3.821806702370081e-05, + "loss": 0.2638, "step": 4245500 }, { "epoch": 2.55, - "learning_rate": 3.821594605848464e-05, - "loss": 0.3458, + "learning_rate": 3.8215967058140244e-05, + "loss": 0.2673, "step": 4246000 }, { "epoch": 2.55, - "learning_rate": 3.821384609292408e-05, - "loss": 0.3416, + "learning_rate": 3.821386709257968e-05, + "loss": 0.2661, "step": 4246500 }, { "epoch": 2.55, - "learning_rate": 3.821174612736351e-05, - "loss": 0.3502, + "learning_rate": 3.821176712701912e-05, + "loss": 0.2637, "step": 4247000 }, { "epoch": 2.55, - "learning_rate": 3.8209646161802944e-05, - "loss": 0.3452, + "learning_rate": 3.820967136138968e-05, + "loss": 0.2643, "step": 4247500 }, { "epoch": 2.55, - "learning_rate": 3.8207546196242384e-05, - "loss": 0.353, + "learning_rate": 3.820757139582911e-05, + "loss": 0.2641, "step": 4248000 }, { "epoch": 2.55, - "learning_rate": 3.820545043061294e-05, - "loss": 0.3539, + "learning_rate": 3.820547143026854e-05, + "loss": 0.267, "step": 4248500 }, { "epoch": 2.55, - "learning_rate": 3.820335046505237e-05, - "loss": 0.3441, + "learning_rate": 3.820337146470798e-05, + "loss": 0.2627, "step": 4249000 }, { "epoch": 2.55, - "learning_rate": 3.8201250499491805e-05, - "loss": 0.3499, + "learning_rate": 3.820127149914741e-05, + "loss": 0.266, "step": 4249500 }, { "epoch": 2.55, - "learning_rate": 3.8199150533931245e-05, - "loss": 0.3409, + "learning_rate": 3.819917573351797e-05, + "loss": 0.2592, "step": 4250000 }, { "epoch": 2.55, - "learning_rate": 3.819705056837068e-05, - "loss": 0.3493, + "learning_rate": 3.81970757679574e-05, + "loss": 0.2574, "step": 4250500 }, { "epoch": 2.55, - "learning_rate": 3.819495060281011e-05, - "loss": 0.3446, + "learning_rate": 3.819497580239684e-05, + "loss": 0.2604, "step": 4251000 }, { "epoch": 2.55, - "learning_rate": 3.819285063724955e-05, - "loss": 0.3506, + "learning_rate": 3.819287583683627e-05, + "loss": 0.2686, "step": 4251500 }, { "epoch": 2.55, - "learning_rate": 3.8190750671688986e-05, - "loss": 0.3473, + "learning_rate": 3.819078007120683e-05, + "loss": 0.2648, "step": 4252000 }, { "epoch": 2.55, - "learning_rate": 3.818865490605954e-05, - "loss": 0.351, + "learning_rate": 3.818868010564627e-05, + "loss": 0.2593, "step": 4252500 }, { "epoch": 2.55, - "learning_rate": 3.818655494049897e-05, - "loss": 0.3443, + "learning_rate": 3.81865801400857e-05, + "loss": 0.2619, "step": 4253000 }, { "epoch": 2.55, - "learning_rate": 3.818445497493841e-05, - "loss": 0.3483, + "learning_rate": 3.8184480174525134e-05, + "loss": 0.2611, "step": 4253500 }, { "epoch": 2.55, - "learning_rate": 3.8182355009377846e-05, - "loss": 0.3494, + "learning_rate": 3.8182384408895694e-05, + "loss": 0.2597, "step": 4254000 }, { "epoch": 2.55, - "learning_rate": 3.81802592437484e-05, - "loss": 0.3512, + "learning_rate": 3.8180284443335134e-05, + "loss": 0.2674, "step": 4254500 }, { "epoch": 2.55, - "learning_rate": 3.817815927818784e-05, - "loss": 0.3543, + "learning_rate": 3.817818447777457e-05, + "loss": 0.27, "step": 4255000 }, { "epoch": 2.55, - "learning_rate": 3.8176059312627274e-05, - "loss": 0.3397, + "learning_rate": 3.8176084512213994e-05, + "loss": 0.2628, "step": 4255500 }, { "epoch": 2.55, - "learning_rate": 3.817396354699783e-05, - "loss": 0.3489, + "learning_rate": 3.8173988746584555e-05, + "loss": 0.2635, "step": 4256000 }, { "epoch": 2.55, - "learning_rate": 3.817186358143726e-05, - "loss": 0.3317, + "learning_rate": 3.8171888781023995e-05, + "loss": 0.2522, "step": 4256500 }, { "epoch": 2.55, - "learning_rate": 3.81697636158767e-05, - "loss": 0.361, + "learning_rate": 3.816978881546343e-05, + "loss": 0.2731, "step": 4257000 }, { "epoch": 2.55, - "learning_rate": 3.8167663650316134e-05, - "loss": 0.3323, + "learning_rate": 3.816769304983398e-05, + "loss": 0.2536, "step": 4257500 }, { "epoch": 2.55, - "learning_rate": 3.816556368475557e-05, - "loss": 0.348, + "learning_rate": 3.8165593084273415e-05, + "loss": 0.2664, "step": 4258000 }, { "epoch": 2.55, - "learning_rate": 3.816346371919501e-05, - "loss": 0.3458, + "learning_rate": 3.8163493118712856e-05, + "loss": 0.2634, "step": 4258500 }, { "epoch": 2.55, - "learning_rate": 3.816136795356556e-05, - "loss": 0.3468, + "learning_rate": 3.816139315315229e-05, + "loss": 0.269, "step": 4259000 }, { "epoch": 2.55, - "learning_rate": 3.8159267988004995e-05, - "loss": 0.3538, + "learning_rate": 3.815929318759172e-05, + "loss": 0.2676, "step": 4259500 }, { "epoch": 2.55, - "learning_rate": 3.815716802244443e-05, - "loss": 0.3574, + "learning_rate": 3.8157193222031156e-05, + "loss": 0.2743, "step": 4260000 }, { "epoch": 2.55, - "learning_rate": 3.815506805688387e-05, - "loss": 0.3418, + "learning_rate": 3.815509325647059e-05, + "loss": 0.2626, "step": 4260500 }, { "epoch": 2.55, - "learning_rate": 3.81529680913233e-05, - "loss": 0.3478, + "learning_rate": 3.815299329091003e-05, + "loss": 0.2673, "step": 4261000 }, { "epoch": 2.55, - "learning_rate": 3.8150872325693856e-05, - "loss": 0.3461, + "learning_rate": 3.815089752528059e-05, + "loss": 0.2694, "step": 4261500 }, { "epoch": 2.56, - "learning_rate": 3.8148772360133296e-05, - "loss": 0.3426, + "learning_rate": 3.8148797559720024e-05, + "loss": 0.2578, "step": 4262000 }, { "epoch": 2.56, - "learning_rate": 3.814667239457273e-05, - "loss": 0.3459, + "learning_rate": 3.814670179409058e-05, + "loss": 0.2643, "step": 4262500 }, { "epoch": 2.56, - "learning_rate": 3.814457242901216e-05, - "loss": 0.3551, + "learning_rate": 3.814460182853001e-05, + "loss": 0.2698, "step": 4263000 }, { "epoch": 2.56, - "learning_rate": 3.81424724634516e-05, - "loss": 0.3481, + "learning_rate": 3.814250186296945e-05, + "loss": 0.2646, "step": 4263500 }, { "epoch": 2.56, - "learning_rate": 3.814037669782216e-05, - "loss": 0.3407, + "learning_rate": 3.8140401897408884e-05, + "loss": 0.2631, "step": 4264000 }, { "epoch": 2.56, - "learning_rate": 3.813827673226159e-05, - "loss": 0.3568, + "learning_rate": 3.813830193184832e-05, + "loss": 0.2661, "step": 4264500 }, { "epoch": 2.56, - "learning_rate": 3.8136176766701024e-05, - "loss": 0.3486, + "learning_rate": 3.813620196628775e-05, + "loss": 0.2674, "step": 4265000 }, { "epoch": 2.56, - "learning_rate": 3.8134076801140464e-05, - "loss": 0.3508, + "learning_rate": 3.8134102000727185e-05, + "loss": 0.2624, "step": 4265500 }, { "epoch": 2.56, - "learning_rate": 3.81319768355799e-05, - "loss": 0.3447, + "learning_rate": 3.813200203516662e-05, + "loss": 0.2625, "step": 4266000 }, { "epoch": 2.56, - "learning_rate": 3.812988106995045e-05, - "loss": 0.353, + "learning_rate": 3.812990206960606e-05, + "loss": 0.265, "step": 4266500 }, { "epoch": 2.56, - "learning_rate": 3.8127781104389885e-05, - "loss": 0.3517, + "learning_rate": 3.812781050390774e-05, + "loss": 0.2683, "step": 4267000 }, { "epoch": 2.56, - "learning_rate": 3.8125681138829325e-05, - "loss": 0.3502, + "learning_rate": 3.812571053834717e-05, + "loss": 0.2692, "step": 4267500 }, { "epoch": 2.56, - "learning_rate": 3.812358537319988e-05, - "loss": 0.3434, + "learning_rate": 3.8123610572786606e-05, + "loss": 0.2636, "step": 4268000 }, { "epoch": 2.56, - "learning_rate": 3.812148540763931e-05, - "loss": 0.3483, + "learning_rate": 3.8121510607226046e-05, + "loss": 0.2687, "step": 4268500 }, { "epoch": 2.56, - "learning_rate": 3.811938544207875e-05, - "loss": 0.3401, + "learning_rate": 3.811941064166548e-05, + "loss": 0.2629, "step": 4269000 }, { "epoch": 2.56, - "learning_rate": 3.8117285476518185e-05, - "loss": 0.3389, + "learning_rate": 3.811731067610491e-05, + "loss": 0.2573, "step": 4269500 }, { "epoch": 2.56, - "learning_rate": 3.811518551095762e-05, - "loss": 0.3448, + "learning_rate": 3.8115210710544346e-05, + "loss": 0.2611, "step": 4270000 }, { "epoch": 2.56, - "learning_rate": 3.811308554539706e-05, - "loss": 0.3448, + "learning_rate": 3.811311074498378e-05, + "loss": 0.2675, "step": 4270500 }, { "epoch": 2.56, - "learning_rate": 3.811098557983649e-05, - "loss": 0.3536, + "learning_rate": 3.811101077942321e-05, + "loss": 0.2673, "step": 4271000 }, { "epoch": 2.56, - "learning_rate": 3.8108889814207046e-05, - "loss": 0.3452, + "learning_rate": 3.8108915013793774e-05, + "loss": 0.2642, "step": 4271500 }, { "epoch": 2.56, - "learning_rate": 3.810678984864648e-05, - "loss": 0.3507, + "learning_rate": 3.810681504823321e-05, + "loss": 0.264, "step": 4272000 }, { "epoch": 2.56, - "learning_rate": 3.810468988308592e-05, - "loss": 0.3512, + "learning_rate": 3.810471508267264e-05, + "loss": 0.2657, "step": 4272500 }, { "epoch": 2.56, - "learning_rate": 3.810258991752535e-05, - "loss": 0.3543, + "learning_rate": 3.8102615117112074e-05, + "loss": 0.2678, "step": 4273000 }, { "epoch": 2.56, - "learning_rate": 3.810048995196479e-05, - "loss": 0.3411, + "learning_rate": 3.8100519351482634e-05, + "loss": 0.2653, "step": 4273500 }, { "epoch": 2.56, - "learning_rate": 3.809838998640423e-05, - "loss": 0.3547, + "learning_rate": 3.8098419385922075e-05, + "loss": 0.2692, "step": 4274000 }, { "epoch": 2.56, - "learning_rate": 3.8096290020843654e-05, - "loss": 0.3463, + "learning_rate": 3.80963194203615e-05, + "loss": 0.2607, "step": 4274500 }, { "epoch": 2.56, - "learning_rate": 3.8094190055283094e-05, - "loss": 0.3478, + "learning_rate": 3.809421945480094e-05, + "loss": 0.2693, "step": 4275000 }, { "epoch": 2.56, - "learning_rate": 3.809209008972253e-05, - "loss": 0.3485, + "learning_rate": 3.80921236891715e-05, + "loss": 0.2643, "step": 4275500 }, { "epoch": 2.56, - "learning_rate": 3.808999012416196e-05, - "loss": 0.3492, + "learning_rate": 3.8090023723610935e-05, + "loss": 0.266, "step": 4276000 }, { "epoch": 2.56, - "learning_rate": 3.80878901586014e-05, - "loss": 0.3418, + "learning_rate": 3.808792375805037e-05, + "loss": 0.2645, "step": 4276500 }, { "epoch": 2.56, - "learning_rate": 3.8085790193040835e-05, - "loss": 0.337, + "learning_rate": 3.80858237924898e-05, + "loss": 0.2568, "step": 4277000 }, { "epoch": 2.56, - "learning_rate": 3.808369442741139e-05, - "loss": 0.3381, + "learning_rate": 3.808372802686036e-05, + "loss": 0.262, "step": 4277500 }, { "epoch": 2.56, - "learning_rate": 3.808159446185082e-05, - "loss": 0.348, + "learning_rate": 3.8081628061299796e-05, + "loss": 0.2733, "step": 4278000 }, { "epoch": 2.57, - "learning_rate": 3.807949449629026e-05, - "loss": 0.338, + "learning_rate": 3.807952809573923e-05, + "loss": 0.2639, "step": 4278500 }, { "epoch": 2.57, - "learning_rate": 3.8077394530729695e-05, - "loss": 0.3345, + "learning_rate": 3.807742813017867e-05, + "loss": 0.264, "step": 4279000 }, { "epoch": 2.57, - "learning_rate": 3.807529876510025e-05, - "loss": 0.3516, + "learning_rate": 3.807533236454922e-05, + "loss": 0.2635, "step": 4279500 }, { "epoch": 2.57, - "learning_rate": 3.807319879953968e-05, - "loss": 0.3428, + "learning_rate": 3.807323239898866e-05, + "loss": 0.2589, "step": 4280000 }, { "epoch": 2.57, - "learning_rate": 3.807110303391024e-05, - "loss": 0.3508, + "learning_rate": 3.807113243342809e-05, + "loss": 0.2628, "step": 4280500 }, { "epoch": 2.57, - "learning_rate": 3.806900306834968e-05, - "loss": 0.3551, + "learning_rate": 3.806903246786753e-05, + "loss": 0.2712, "step": 4281000 }, { "epoch": 2.57, - "learning_rate": 3.806690310278911e-05, - "loss": 0.3412, + "learning_rate": 3.806693250230696e-05, + "loss": 0.2595, "step": 4281500 }, { "epoch": 2.57, - "learning_rate": 3.806480313722855e-05, - "loss": 0.3571, + "learning_rate": 3.806483673667752e-05, + "loss": 0.2682, "step": 4282000 }, { "epoch": 2.57, - "learning_rate": 3.806270317166798e-05, - "loss": 0.3514, + "learning_rate": 3.806273677111696e-05, + "loss": 0.2686, "step": 4282500 }, { "epoch": 2.57, - "learning_rate": 3.8060607406038544e-05, - "loss": 0.3458, + "learning_rate": 3.806063680555639e-05, + "loss": 0.2669, "step": 4283000 }, { "epoch": 2.57, - "learning_rate": 3.805850744047798e-05, - "loss": 0.3355, + "learning_rate": 3.8058541039926945e-05, + "loss": 0.258, "step": 4283500 }, { "epoch": 2.57, - "learning_rate": 3.805640747491741e-05, - "loss": 0.3518, + "learning_rate": 3.805644107436638e-05, + "loss": 0.2679, "step": 4284000 }, { "epoch": 2.57, - "learning_rate": 3.8054307509356844e-05, - "loss": 0.3428, + "learning_rate": 3.805434110880582e-05, + "loss": 0.2609, "step": 4284500 }, { "epoch": 2.57, - "learning_rate": 3.805220754379628e-05, - "loss": 0.3417, + "learning_rate": 3.805224114324525e-05, + "loss": 0.2639, "step": 4285000 }, { "epoch": 2.57, - "learning_rate": 3.805010757823572e-05, - "loss": 0.3473, + "learning_rate": 3.8050141177684685e-05, + "loss": 0.2627, "step": 4285500 }, { "epoch": 2.57, - "learning_rate": 3.804800761267515e-05, - "loss": 0.3447, + "learning_rate": 3.8048041212124126e-05, + "loss": 0.2601, "step": 4286000 }, { "epoch": 2.57, - "learning_rate": 3.8045907647114585e-05, - "loss": 0.357, + "learning_rate": 3.804594124656355e-05, + "loss": 0.2693, "step": 4286500 }, { "epoch": 2.57, - "learning_rate": 3.8043807681554025e-05, - "loss": 0.3426, + "learning_rate": 3.8043841281002986e-05, + "loss": 0.264, "step": 4287000 }, { "epoch": 2.57, - "learning_rate": 3.80417161158557e-05, - "loss": 0.3493, + "learning_rate": 3.8041745515373546e-05, + "loss": 0.2638, "step": 4287500 }, { "epoch": 2.57, - "learning_rate": 3.803961615029514e-05, - "loss": 0.3502, + "learning_rate": 3.8039645549812986e-05, + "loss": 0.2628, "step": 4288000 }, { "epoch": 2.57, - "learning_rate": 3.8037516184734565e-05, - "loss": 0.3429, + "learning_rate": 3.803754558425242e-05, + "loss": 0.264, "step": 4288500 }, { "epoch": 2.57, - "learning_rate": 3.8035416219174006e-05, - "loss": 0.3442, + "learning_rate": 3.803544561869185e-05, + "loss": 0.2639, "step": 4289000 }, { "epoch": 2.57, - "learning_rate": 3.803331625361344e-05, - "loss": 0.3515, + "learning_rate": 3.8033349853062414e-05, + "loss": 0.2704, "step": 4289500 }, { "epoch": 2.57, - "learning_rate": 3.803121628805287e-05, - "loss": 0.3406, + "learning_rate": 3.803124988750185e-05, + "loss": 0.2675, "step": 4290000 }, { "epoch": 2.57, - "learning_rate": 3.802912052242343e-05, - "loss": 0.348, + "learning_rate": 3.802914992194128e-05, + "loss": 0.2629, "step": 4290500 }, { "epoch": 2.57, - "learning_rate": 3.8027020556862866e-05, - "loss": 0.3455, + "learning_rate": 3.8027049956380714e-05, + "loss": 0.2665, "step": 4291000 }, { "epoch": 2.57, - "learning_rate": 3.80249205913023e-05, - "loss": 0.34, + "learning_rate": 3.8024954190751274e-05, + "loss": 0.2594, "step": 4291500 }, { "epoch": 2.57, - "learning_rate": 3.802282062574173e-05, - "loss": 0.3498, + "learning_rate": 3.802285422519071e-05, + "loss": 0.2646, "step": 4292000 }, { "epoch": 2.57, - "learning_rate": 3.8020724860112294e-05, - "loss": 0.3529, + "learning_rate": 3.802075425963014e-05, + "loss": 0.2708, "step": 4292500 }, { "epoch": 2.57, - "learning_rate": 3.8018624894551734e-05, - "loss": 0.3491, + "learning_rate": 3.801865429406958e-05, + "loss": 0.2672, "step": 4293000 }, { "epoch": 2.57, - "learning_rate": 3.801652492899116e-05, - "loss": 0.3503, + "learning_rate": 3.801655432850901e-05, + "loss": 0.2672, "step": 4293500 }, { "epoch": 2.57, - "learning_rate": 3.8014424963430594e-05, - "loss": 0.3494, + "learning_rate": 3.801445856287957e-05, + "loss": 0.2649, "step": 4294000 }, { "epoch": 2.57, - "learning_rate": 3.8012324997870034e-05, - "loss": 0.3484, + "learning_rate": 3.8012358597319e-05, + "loss": 0.2658, "step": 4294500 }, { "epoch": 2.58, - "learning_rate": 3.801022503230947e-05, - "loss": 0.3404, + "learning_rate": 3.801025863175844e-05, + "loss": 0.2602, "step": 4295000 }, { "epoch": 2.58, - "learning_rate": 3.80081250667489e-05, - "loss": 0.345, + "learning_rate": 3.8008158666197876e-05, + "loss": 0.2602, "step": 4295500 }, { "epoch": 2.58, - "learning_rate": 3.800602510118834e-05, - "loss": 0.3422, + "learning_rate": 3.800606710049955e-05, + "loss": 0.2658, "step": 4296000 }, { "epoch": 2.58, - "learning_rate": 3.8003929335558895e-05, - "loss": 0.3441, + "learning_rate": 3.800396713493898e-05, + "loss": 0.2677, "step": 4296500 }, { "epoch": 2.58, - "learning_rate": 3.800182936999833e-05, - "loss": 0.3453, + "learning_rate": 3.800186716937842e-05, + "loss": 0.2626, "step": 4297000 }, { "epoch": 2.58, - "learning_rate": 3.799972940443777e-05, - "loss": 0.3545, + "learning_rate": 3.7999767203817857e-05, + "loss": 0.265, "step": 4297500 }, { "epoch": 2.58, - "learning_rate": 3.79976294388772e-05, - "loss": 0.3523, + "learning_rate": 3.799766723825729e-05, + "loss": 0.2651, "step": 4298000 }, { "epoch": 2.58, - "learning_rate": 3.7995529473316636e-05, - "loss": 0.3403, + "learning_rate": 3.799556727269673e-05, + "loss": 0.2625, "step": 4298500 }, { "epoch": 2.58, - "learning_rate": 3.799343370768719e-05, - "loss": 0.342, + "learning_rate": 3.7993467307136164e-05, + "loss": 0.2654, "step": 4299000 }, { "epoch": 2.58, - "learning_rate": 3.799133374212663e-05, - "loss": 0.3515, + "learning_rate": 3.79913673415756e-05, + "loss": 0.267, "step": 4299500 }, { "epoch": 2.58, - "learning_rate": 3.798923377656606e-05, - "loss": 0.3408, + "learning_rate": 3.798927157594615e-05, + "loss": 0.2618, "step": 4300000 }, { "epoch": 2.58, - "eval_loss": 0.33534136414527893, - "eval_runtime": 1116.3539, - "eval_samples_per_second": 471.822, - "eval_steps_per_second": 78.637, + "eval_loss": 0.24335187673568726, + "eval_runtime": 1434.6421, + "eval_samples_per_second": 367.144, + "eval_steps_per_second": 61.191, "step": 4300000 - }, - { - "epoch": 2.58, - "learning_rate": 3.7987133811005496e-05, - "loss": 0.3505, - "step": 4300500 - }, - { - "epoch": 2.58, - "learning_rate": 3.798503804537605e-05, - "loss": 0.3482, - "step": 4301000 - }, - { - "epoch": 2.58, - "learning_rate": 3.798293807981549e-05, - "loss": 0.3335, - "step": 4301500 - }, - { - "epoch": 2.58, - "learning_rate": 3.7980838114254924e-05, - "loss": 0.3431, - "step": 4302000 - }, - { - "epoch": 2.58, - "learning_rate": 3.797873814869436e-05, - "loss": 0.3535, - "step": 4302500 - }, - { - "epoch": 2.58, - "learning_rate": 3.797664238306492e-05, - "loss": 0.3382, - "step": 4303000 - }, - { - "epoch": 2.58, - "learning_rate": 3.797454241750435e-05, - "loss": 0.3428, - "step": 4303500 - }, - { - "epoch": 2.58, - "learning_rate": 3.7972442451943784e-05, - "loss": 0.356, - "step": 4304000 - }, - { - "epoch": 2.58, - "learning_rate": 3.7970342486383225e-05, - "loss": 0.3498, - "step": 4304500 - }, - { - "epoch": 2.58, - "learning_rate": 3.7968246720753785e-05, - "loss": 0.3386, - "step": 4305000 - }, - { - "epoch": 2.58, - "learning_rate": 3.796615095512434e-05, - "loss": 0.3477, - "step": 4305500 - }, - { - "epoch": 2.58, - "learning_rate": 3.796405098956377e-05, - "loss": 0.352, - "step": 4306000 - }, - { - "epoch": 2.58, - "learning_rate": 3.7961951024003205e-05, - "loss": 0.3598, - "step": 4306500 - }, - { - "epoch": 2.58, - "learning_rate": 3.7959851058442646e-05, - "loss": 0.3581, - "step": 4307000 - }, - { - "epoch": 2.58, - "learning_rate": 3.795775109288208e-05, - "loss": 0.3509, - "step": 4307500 - }, - { - "epoch": 2.58, - "learning_rate": 3.7955651127321506e-05, - "loss": 0.3465, - "step": 4308000 - }, - { - "epoch": 2.58, - "learning_rate": 3.7953555361692066e-05, - "loss": 0.3446, - "step": 4308500 - }, - { - "epoch": 2.58, - "learning_rate": 3.7951455396131506e-05, - "loss": 0.3446, - "step": 4309000 - }, - { - "epoch": 2.58, - "learning_rate": 3.794935543057094e-05, - "loss": 0.3441, - "step": 4309500 - }, - { - "epoch": 2.58, - "learning_rate": 3.794725546501037e-05, - "loss": 0.3484, - "step": 4310000 - }, - { - "epoch": 2.58, - "learning_rate": 3.794515549944981e-05, - "loss": 0.3505, - "step": 4310500 - }, - { - "epoch": 2.58, - "learning_rate": 3.794305973382037e-05, - "loss": 0.3536, - "step": 4311000 - }, - { - "epoch": 2.58, - "learning_rate": 3.79409597682598e-05, - "loss": 0.3431, - "step": 4311500 - }, - { - "epoch": 2.59, - "learning_rate": 3.793885980269924e-05, - "loss": 0.3401, - "step": 4312000 - }, - { - "epoch": 2.59, - "learning_rate": 3.793675983713867e-05, - "loss": 0.3493, - "step": 4312500 - }, - { - "epoch": 2.59, - "learning_rate": 3.79346598715781e-05, - "loss": 0.3491, - "step": 4313000 - }, - { - "epoch": 2.59, - "learning_rate": 3.793256410594866e-05, - "loss": 0.3404, - "step": 4313500 - }, - { - "epoch": 2.59, - "learning_rate": 3.79304641403881e-05, - "loss": 0.3402, - "step": 4314000 - }, - { - "epoch": 2.59, - "learning_rate": 3.7928364174827535e-05, - "loss": 0.3502, - "step": 4314500 - }, - { - "epoch": 2.59, - "learning_rate": 3.792626420926696e-05, - "loss": 0.3608, - "step": 4315000 - }, - { - "epoch": 2.59, - "learning_rate": 3.792416844363752e-05, - "loss": 0.3601, - "step": 4315500 - }, - { - "epoch": 2.59, - "learning_rate": 3.792206847807696e-05, - "loss": 0.3464, - "step": 4316000 - }, - { - "epoch": 2.59, - "learning_rate": 3.7919968512516396e-05, - "loss": 0.3451, - "step": 4316500 - }, - { - "epoch": 2.59, - "learning_rate": 3.791786854695583e-05, - "loss": 0.3454, - "step": 4317000 - }, - { - "epoch": 2.59, - "learning_rate": 3.791577278132639e-05, - "loss": 0.3551, - "step": 4317500 - }, - { - "epoch": 2.59, - "learning_rate": 3.791367281576582e-05, - "loss": 0.3481, - "step": 4318000 - }, - { - "epoch": 2.59, - "learning_rate": 3.7911572850205257e-05, - "loss": 0.3456, - "step": 4318500 - }, - { - "epoch": 2.59, - "learning_rate": 3.79094728846447e-05, - "loss": 0.3462, - "step": 4319000 - }, - { - "epoch": 2.59, - "learning_rate": 3.7907372919084123e-05, - "loss": 0.3465, - "step": 4319500 - }, - { - "epoch": 2.59, - "learning_rate": 3.790527295352356e-05, - "loss": 0.3464, - "step": 4320000 - }, - { - "epoch": 2.59, - "learning_rate": 3.7903172987963e-05, - "loss": 0.3571, - "step": 4320500 - }, - { - "epoch": 2.59, - "learning_rate": 3.790107302240243e-05, - "loss": 0.355, - "step": 4321000 - }, - { - "epoch": 2.59, - "learning_rate": 3.7898973056841864e-05, - "loss": 0.3509, - "step": 4321500 - }, - { - "epoch": 2.59, - "learning_rate": 3.7896873091281304e-05, - "loss": 0.3482, - "step": 4322000 - }, - { - "epoch": 2.59, - "learning_rate": 3.789477312572074e-05, - "loss": 0.3537, - "step": 4322500 - }, - { - "epoch": 2.59, - "learning_rate": 3.789267316016017e-05, - "loss": 0.3442, - "step": 4323000 - }, - { - "epoch": 2.59, - "learning_rate": 3.7890577394530725e-05, - "loss": 0.3488, - "step": 4323500 - }, - { - "epoch": 2.59, - "learning_rate": 3.7888477428970165e-05, - "loss": 0.3492, - "step": 4324000 - }, - { - "epoch": 2.59, - "learning_rate": 3.78863774634096e-05, - "loss": 0.3461, - "step": 4324500 - }, - { - "epoch": 2.59, - "learning_rate": 3.788427749784903e-05, - "loss": 0.3405, - "step": 4325000 - }, - { - "epoch": 2.59, - "learning_rate": 3.788218173221959e-05, - "loss": 0.3384, - "step": 4325500 - }, - { - "epoch": 2.59, - "learning_rate": 3.7880081766659026e-05, - "loss": 0.3461, - "step": 4326000 - }, - { - "epoch": 2.59, - "learning_rate": 3.787798180109846e-05, - "loss": 0.3507, - "step": 4326500 - }, - { - "epoch": 2.59, - "learning_rate": 3.78758818355379e-05, - "loss": 0.3512, - "step": 4327000 - }, - { - "epoch": 2.59, - "learning_rate": 3.787378606990845e-05, - "loss": 0.3394, - "step": 4327500 - }, - { - "epoch": 2.59, - "learning_rate": 3.7871686104347886e-05, - "loss": 0.3443, - "step": 4328000 - }, - { - "epoch": 2.6, - "learning_rate": 3.786958613878732e-05, - "loss": 0.3509, - "step": 4328500 - }, - { - "epoch": 2.6, - "learning_rate": 3.786748617322676e-05, - "loss": 0.345, - "step": 4329000 - }, - { - "epoch": 2.6, - "learning_rate": 3.7865390407597314e-05, - "loss": 0.3452, - "step": 4329500 - }, - { - "epoch": 2.6, - "learning_rate": 3.7863294641967874e-05, - "loss": 0.3439, - "step": 4330000 - }, - { - "epoch": 2.6, - "learning_rate": 3.786119467640731e-05, - "loss": 0.3431, - "step": 4330500 - }, - { - "epoch": 2.6, - "learning_rate": 3.785909471084674e-05, - "loss": 0.352, - "step": 4331000 - }, - { - "epoch": 2.6, - "learning_rate": 3.7856994745286174e-05, - "loss": 0.3557, - "step": 4331500 - }, - { - "epoch": 2.6, - "learning_rate": 3.785489477972561e-05, - "loss": 0.3469, - "step": 4332000 - }, - { - "epoch": 2.6, - "learning_rate": 3.785279481416505e-05, - "loss": 0.3474, - "step": 4332500 - }, - { - "epoch": 2.6, - "learning_rate": 3.785069484860448e-05, - "loss": 0.3547, - "step": 4333000 - }, - { - "epoch": 2.6, - "learning_rate": 3.7848594883043915e-05, - "loss": 0.3502, - "step": 4333500 - }, - { - "epoch": 2.6, - "learning_rate": 3.784649911741447e-05, - "loss": 0.3429, - "step": 4334000 - }, - { - "epoch": 2.6, - "learning_rate": 3.784439915185391e-05, - "loss": 0.342, - "step": 4334500 - }, - { - "epoch": 2.6, - "learning_rate": 3.784229918629334e-05, - "loss": 0.3458, - "step": 4335000 - }, - { - "epoch": 2.6, - "learning_rate": 3.7840199220732776e-05, - "loss": 0.3502, - "step": 4335500 - }, - { - "epoch": 2.6, - "learning_rate": 3.7838099255172216e-05, - "loss": 0.3446, - "step": 4336000 - }, - { - "epoch": 2.6, - "learning_rate": 3.783600348954277e-05, - "loss": 0.3531, - "step": 4336500 - }, - { - "epoch": 2.6, - "learning_rate": 3.78339035239822e-05, - "loss": 0.3488, - "step": 4337000 - }, - { - "epoch": 2.6, - "learning_rate": 3.7831803558421637e-05, - "loss": 0.3498, - "step": 4337500 - }, - { - "epoch": 2.6, - "learning_rate": 3.782970359286108e-05, - "loss": 0.3527, - "step": 4338000 - }, - { - "epoch": 2.6, - "learning_rate": 3.782760362730051e-05, - "loss": 0.3384, - "step": 4338500 - }, - { - "epoch": 2.6, - "learning_rate": 3.7825507861671064e-05, - "loss": 0.3407, - "step": 4339000 - }, - { - "epoch": 2.6, - "learning_rate": 3.7823407896110504e-05, - "loss": 0.3416, - "step": 4339500 - }, - { - "epoch": 2.6, - "learning_rate": 3.782130793054994e-05, - "loss": 0.341, - "step": 4340000 - }, - { - "epoch": 2.6, - "learning_rate": 3.781920796498937e-05, - "loss": 0.3501, - "step": 4340500 - }, - { - "epoch": 2.6, - "learning_rate": 3.781710799942881e-05, - "loss": 0.3479, - "step": 4341000 - }, - { - "epoch": 2.6, - "learning_rate": 3.7815008033868245e-05, - "loss": 0.3483, - "step": 4341500 - }, - { - "epoch": 2.6, - "learning_rate": 3.781290806830768e-05, - "loss": 0.337, - "step": 4342000 - }, - { - "epoch": 2.6, - "learning_rate": 3.781080810274712e-05, - "loss": 0.3445, - "step": 4342500 - }, - { - "epoch": 2.6, - "learning_rate": 3.780871233711767e-05, - "loss": 0.3475, - "step": 4343000 - }, - { - "epoch": 2.6, - "learning_rate": 3.7806616571488225e-05, - "loss": 0.3482, - "step": 4343500 - }, - { - "epoch": 2.6, - "learning_rate": 3.780451660592766e-05, - "loss": 0.3536, - "step": 4344000 - }, - { - "epoch": 2.6, - "learning_rate": 3.780241664036709e-05, - "loss": 0.3435, - "step": 4344500 - }, - { - "epoch": 2.61, - "learning_rate": 3.780031667480653e-05, - "loss": 0.3513, - "step": 4345000 - }, - { - "epoch": 2.61, - "learning_rate": 3.7798216709245966e-05, - "loss": 0.3503, - "step": 4345500 - }, - { - "epoch": 2.61, - "learning_rate": 3.7796116743685406e-05, - "loss": 0.3464, - "step": 4346000 - }, - { - "epoch": 2.61, - "learning_rate": 3.779402097805596e-05, - "loss": 0.3587, - "step": 4346500 - }, - { - "epoch": 2.61, - "learning_rate": 3.779192101249539e-05, - "loss": 0.3456, - "step": 4347000 - }, - { - "epoch": 2.61, - "learning_rate": 3.778982104693483e-05, - "loss": 0.3505, - "step": 4347500 - }, - { - "epoch": 2.61, - "learning_rate": 3.778772108137427e-05, - "loss": 0.3479, - "step": 4348000 - }, - { - "epoch": 2.61, - "learning_rate": 3.77856211158137e-05, - "loss": 0.3504, - "step": 4348500 - }, - { - "epoch": 2.61, - "learning_rate": 3.7783521150253134e-05, - "loss": 0.3474, - "step": 4349000 - }, - { - "epoch": 2.61, - "learning_rate": 3.778142538462369e-05, - "loss": 0.3466, - "step": 4349500 - }, - { - "epoch": 2.61, - "learning_rate": 3.777932541906313e-05, - "loss": 0.3392, - "step": 4350000 - }, - { - "epoch": 2.61, - "learning_rate": 3.777722545350256e-05, - "loss": 0.3404, - "step": 4350500 - }, - { - "epoch": 2.61, - "learning_rate": 3.7775125487941995e-05, - "loss": 0.3503, - "step": 4351000 - }, - { - "epoch": 2.61, - "learning_rate": 3.7773025522381435e-05, - "loss": 0.3494, - "step": 4351500 - }, - { - "epoch": 2.61, - "learning_rate": 3.777092555682087e-05, - "loss": 0.3464, - "step": 4352000 - }, - { - "epoch": 2.61, - "learning_rate": 3.77688255912603e-05, - "loss": 0.3549, - "step": 4352500 - }, - { - "epoch": 2.61, - "learning_rate": 3.776672562569974e-05, - "loss": 0.3507, - "step": 4353000 - }, - { - "epoch": 2.61, - "learning_rate": 3.7764629860070296e-05, - "loss": 0.3574, - "step": 4353500 - }, - { - "epoch": 2.61, - "learning_rate": 3.776252989450973e-05, - "loss": 0.345, - "step": 4354000 - }, - { - "epoch": 2.61, - "learning_rate": 3.776042992894917e-05, - "loss": 0.354, - "step": 4354500 - }, - { - "epoch": 2.61, - "learning_rate": 3.77583299633886e-05, - "loss": 0.3434, - "step": 4355000 - }, - { - "epoch": 2.61, - "learning_rate": 3.7756234197759156e-05, - "loss": 0.3477, - "step": 4355500 - }, - { - "epoch": 2.61, - "learning_rate": 3.775413843212971e-05, - "loss": 0.3554, - "step": 4356000 - }, - { - "epoch": 2.61, - "learning_rate": 3.7752038466569143e-05, - "loss": 0.3524, - "step": 4356500 - }, - { - "epoch": 2.61, - "learning_rate": 3.7749938501008584e-05, - "loss": 0.3491, - "step": 4357000 - }, - { - "epoch": 2.61, - "learning_rate": 3.774783853544802e-05, - "loss": 0.3412, - "step": 4357500 - }, - { - "epoch": 2.61, - "learning_rate": 3.774573856988745e-05, - "loss": 0.3448, - "step": 4358000 - }, - { - "epoch": 2.61, - "learning_rate": 3.774363860432689e-05, - "loss": 0.3435, - "step": 4358500 - }, - { - "epoch": 2.61, - "learning_rate": 3.7741542838697444e-05, - "loss": 0.3456, - "step": 4359000 - }, - { - "epoch": 2.61, - "learning_rate": 3.773944287313688e-05, - "loss": 0.3425, - "step": 4359500 - }, - { - "epoch": 2.61, - "learning_rate": 3.773734290757632e-05, - "loss": 0.3442, - "step": 4360000 - }, - { - "epoch": 2.61, - "learning_rate": 3.773524714194687e-05, - "loss": 0.3426, - "step": 4360500 - }, - { - "epoch": 2.61, - "learning_rate": 3.7733147176386305e-05, - "loss": 0.3441, - "step": 4361000 - }, - { - "epoch": 2.61, - "learning_rate": 3.773104721082574e-05, - "loss": 0.3499, - "step": 4361500 - }, - { - "epoch": 2.62, - "learning_rate": 3.772894724526518e-05, - "loss": 0.3384, - "step": 4362000 - }, - { - "epoch": 2.62, - "learning_rate": 3.772684727970461e-05, - "loss": 0.3501, - "step": 4362500 - }, - { - "epoch": 2.62, - "learning_rate": 3.7724747314144046e-05, - "loss": 0.3369, - "step": 4363000 - }, - { - "epoch": 2.62, - "learning_rate": 3.7722647348583486e-05, - "loss": 0.3476, - "step": 4363500 - }, - { - "epoch": 2.62, - "learning_rate": 3.772054738302292e-05, - "loss": 0.3503, - "step": 4364000 - }, - { - "epoch": 2.62, - "learning_rate": 3.771845161739347e-05, - "loss": 0.3553, - "step": 4364500 - }, - { - "epoch": 2.62, - "learning_rate": 3.7716351651832906e-05, - "loss": 0.3513, - "step": 4365000 - }, - { - "epoch": 2.62, - "learning_rate": 3.771425168627235e-05, - "loss": 0.3422, - "step": 4365500 - }, - { - "epoch": 2.62, - "learning_rate": 3.771215172071178e-05, - "loss": 0.346, - "step": 4366000 - }, - { - "epoch": 2.62, - "learning_rate": 3.7710051755151214e-05, - "loss": 0.3446, - "step": 4366500 - }, - { - "epoch": 2.62, - "learning_rate": 3.7707951789590654e-05, - "loss": 0.3488, - "step": 4367000 - }, - { - "epoch": 2.62, - "learning_rate": 3.770585182403009e-05, - "loss": 0.344, - "step": 4367500 - }, - { - "epoch": 2.62, - "learning_rate": 3.770375185846952e-05, - "loss": 0.3455, - "step": 4368000 - }, - { - "epoch": 2.62, - "learning_rate": 3.7701651892908954e-05, - "loss": 0.3503, - "step": 4368500 - }, - { - "epoch": 2.62, - "learning_rate": 3.769955192734839e-05, - "loss": 0.3467, - "step": 4369000 - }, - { - "epoch": 2.62, - "learning_rate": 3.769745196178783e-05, - "loss": 0.3488, - "step": 4369500 - }, - { - "epoch": 2.62, - "learning_rate": 3.769535619615839e-05, - "loss": 0.3478, - "step": 4370000 - }, - { - "epoch": 2.62, - "learning_rate": 3.7693256230597815e-05, - "loss": 0.3523, - "step": 4370500 - }, - { - "epoch": 2.62, - "learning_rate": 3.7691160464968375e-05, - "loss": 0.3535, - "step": 4371000 - }, - { - "epoch": 2.62, - "learning_rate": 3.768906049940781e-05, - "loss": 0.3469, - "step": 4371500 - }, - { - "epoch": 2.62, - "learning_rate": 3.768696053384725e-05, - "loss": 0.3474, - "step": 4372000 - }, - { - "epoch": 2.62, - "learning_rate": 3.768486056828668e-05, - "loss": 0.3495, - "step": 4372500 - }, - { - "epoch": 2.62, - "learning_rate": 3.768276060272611e-05, - "loss": 0.3451, - "step": 4373000 - }, - { - "epoch": 2.62, - "learning_rate": 3.768066063716555e-05, - "loss": 0.3486, - "step": 4373500 - }, - { - "epoch": 2.62, - "learning_rate": 3.767856067160498e-05, - "loss": 0.3418, - "step": 4374000 - }, - { - "epoch": 2.62, - "learning_rate": 3.7676460706044416e-05, - "loss": 0.3509, - "step": 4374500 - }, - { - "epoch": 2.62, - "learning_rate": 3.7674360740483857e-05, - "loss": 0.3478, - "step": 4375000 - }, - { - "epoch": 2.62, - "learning_rate": 3.767226077492329e-05, - "loss": 0.3501, - "step": 4375500 - }, - { - "epoch": 2.62, - "learning_rate": 3.7670160809362723e-05, - "loss": 0.3342, - "step": 4376000 - }, - { - "epoch": 2.62, - "learning_rate": 3.7668060843802164e-05, - "loss": 0.3432, - "step": 4376500 - }, - { - "epoch": 2.62, - "learning_rate": 3.766596507817272e-05, - "loss": 0.3426, - "step": 4377000 - }, - { - "epoch": 2.62, - "learning_rate": 3.766386931254327e-05, - "loss": 0.3513, - "step": 4377500 - }, - { - "epoch": 2.62, - "learning_rate": 3.7661769346982704e-05, - "loss": 0.35, - "step": 4378000 - }, - { - "epoch": 2.63, - "learning_rate": 3.7659669381422145e-05, - "loss": 0.3454, - "step": 4378500 - }, - { - "epoch": 2.63, - "learning_rate": 3.765756941586158e-05, - "loss": 0.3464, - "step": 4379000 - }, - { - "epoch": 2.63, - "learning_rate": 3.765546945030101e-05, - "loss": 0.3506, - "step": 4379500 - }, - { - "epoch": 2.63, - "learning_rate": 3.7653373684671565e-05, - "loss": 0.3521, - "step": 4380000 - }, - { - "epoch": 2.63, - "learning_rate": 3.7651273719111005e-05, - "loss": 0.3458, - "step": 4380500 - }, - { - "epoch": 2.63, - "learning_rate": 3.764917375355044e-05, - "loss": 0.3482, - "step": 4381000 - }, - { - "epoch": 2.63, - "learning_rate": 3.764707378798987e-05, - "loss": 0.3469, - "step": 4381500 - }, - { - "epoch": 2.63, - "learning_rate": 3.764497382242931e-05, - "loss": 0.3433, - "step": 4382000 - }, - { - "epoch": 2.63, - "learning_rate": 3.7642878056799866e-05, - "loss": 0.3454, - "step": 4382500 - }, - { - "epoch": 2.63, - "learning_rate": 3.76407780912393e-05, - "loss": 0.3398, - "step": 4383000 - }, - { - "epoch": 2.63, - "learning_rate": 3.763867812567874e-05, - "loss": 0.344, - "step": 4383500 - }, - { - "epoch": 2.63, - "learning_rate": 3.763657816011817e-05, - "loss": 0.345, - "step": 4384000 - }, - { - "epoch": 2.63, - "learning_rate": 3.7634478194557607e-05, - "loss": 0.3421, - "step": 4384500 - }, - { - "epoch": 2.63, - "learning_rate": 3.763237822899705e-05, - "loss": 0.3501, - "step": 4385000 - }, - { - "epoch": 2.63, - "learning_rate": 3.76302824633676e-05, - "loss": 0.3477, - "step": 4385500 - }, - { - "epoch": 2.63, - "learning_rate": 3.7628182497807034e-05, - "loss": 0.3508, - "step": 4386000 - }, - { - "epoch": 2.63, - "learning_rate": 3.762608253224647e-05, - "loss": 0.3614, - "step": 4386500 - }, - { - "epoch": 2.63, - "learning_rate": 3.762398256668591e-05, - "loss": 0.3417, - "step": 4387000 - }, - { - "epoch": 2.63, - "learning_rate": 3.762188260112534e-05, - "loss": 0.345, - "step": 4387500 - }, - { - "epoch": 2.63, - "learning_rate": 3.7619782635564774e-05, - "loss": 0.3531, - "step": 4388000 - }, - { - "epoch": 2.63, - "learning_rate": 3.7617682670004215e-05, - "loss": 0.3481, - "step": 4388500 - }, - { - "epoch": 2.63, - "learning_rate": 3.761559110430589e-05, - "loss": 0.3494, - "step": 4389000 - }, - { - "epoch": 2.63, - "learning_rate": 3.761349113874532e-05, - "loss": 0.3407, - "step": 4389500 - }, - { - "epoch": 2.63, - "learning_rate": 3.7611391173184755e-05, - "loss": 0.3503, - "step": 4390000 - }, - { - "epoch": 2.63, - "learning_rate": 3.7609295407555316e-05, - "loss": 0.3484, - "step": 4390500 - }, - { - "epoch": 2.63, - "learning_rate": 3.7607195441994756e-05, - "loss": 0.3389, - "step": 4391000 - }, - { - "epoch": 2.63, - "learning_rate": 3.760509547643419e-05, - "loss": 0.3447, - "step": 4391500 - }, - { - "epoch": 2.63, - "learning_rate": 3.7602995510873616e-05, - "loss": 0.3456, - "step": 4392000 - }, - { - "epoch": 2.63, - "learning_rate": 3.7600895545313056e-05, - "loss": 0.3487, - "step": 4392500 - }, - { - "epoch": 2.63, - "learning_rate": 3.759879557975249e-05, - "loss": 0.3403, - "step": 4393000 - }, - { - "epoch": 2.63, - "learning_rate": 3.759669561419192e-05, - "loss": 0.3547, - "step": 4393500 - }, - { - "epoch": 2.63, - "learning_rate": 3.7594595648631363e-05, - "loss": 0.3339, - "step": 4394000 - }, - { - "epoch": 2.63, - "learning_rate": 3.75924956830708e-05, - "loss": 0.3437, - "step": 4394500 - }, - { - "epoch": 2.63, - "learning_rate": 3.759039571751023e-05, - "loss": 0.3509, - "step": 4395000 - }, - { - "epoch": 2.64, - "learning_rate": 3.758829575194967e-05, - "loss": 0.3444, - "step": 4395500 - }, - { - "epoch": 2.64, - "learning_rate": 3.7586195786389104e-05, - "loss": 0.3363, - "step": 4396000 - }, - { - "epoch": 2.64, - "learning_rate": 3.758409582082854e-05, - "loss": 0.3529, - "step": 4396500 - }, - { - "epoch": 2.64, - "learning_rate": 3.758199585526798e-05, - "loss": 0.3397, - "step": 4397000 - }, - { - "epoch": 2.64, - "learning_rate": 3.757990008963853e-05, - "loss": 0.3511, - "step": 4397500 - }, - { - "epoch": 2.64, - "learning_rate": 3.7577800124077965e-05, - "loss": 0.3452, - "step": 4398000 - }, - { - "epoch": 2.64, - "learning_rate": 3.7575700158517405e-05, - "loss": 0.35, - "step": 4398500 - }, - { - "epoch": 2.64, - "learning_rate": 3.757360019295684e-05, - "loss": 0.3364, - "step": 4399000 - }, - { - "epoch": 2.64, - "learning_rate": 3.757150022739627e-05, - "loss": 0.3456, - "step": 4399500 - }, - { - "epoch": 2.64, - "learning_rate": 3.7569400261835705e-05, - "loss": 0.3535, - "step": 4400000 - }, - { - "epoch": 2.64, - "eval_loss": 0.33438417315483093, - "eval_runtime": 1120.157, - "eval_samples_per_second": 470.22, - "eval_steps_per_second": 78.37, - "step": 4400000 - }, - { - "epoch": 2.64, - "learning_rate": 3.756730029627514e-05, - "loss": 0.361, - "step": 4400500 - }, - { - "epoch": 2.64, - "learning_rate": 3.756520033071457e-05, - "loss": 0.3414, - "step": 4401000 - }, - { - "epoch": 2.64, - "learning_rate": 3.756310456508513e-05, - "loss": 0.3421, - "step": 4401500 - }, - { - "epoch": 2.64, - "learning_rate": 3.7561004599524566e-05, - "loss": 0.3546, - "step": 4402000 - }, - { - "epoch": 2.64, - "learning_rate": 3.7558904633964e-05, - "loss": 0.3477, - "step": 4402500 - }, - { - "epoch": 2.64, - "learning_rate": 3.755680466840343e-05, - "loss": 0.342, - "step": 4403000 - }, - { - "epoch": 2.64, - "learning_rate": 3.755470470284287e-05, - "loss": 0.3524, - "step": 4403500 - }, - { - "epoch": 2.64, - "learning_rate": 3.755260473728231e-05, - "loss": 0.353, - "step": 4404000 - }, - { - "epoch": 2.64, - "learning_rate": 3.755050477172174e-05, - "loss": 0.3433, - "step": 4404500 - }, - { - "epoch": 2.64, - "learning_rate": 3.7548409006092294e-05, - "loss": 0.3643, - "step": 4405000 - }, - { - "epoch": 2.64, - "learning_rate": 3.7546309040531734e-05, - "loss": 0.3458, - "step": 4405500 - }, - { - "epoch": 2.64, - "learning_rate": 3.754420907497117e-05, - "loss": 0.3549, - "step": 4406000 - }, - { - "epoch": 2.64, - "learning_rate": 3.754210910941061e-05, - "loss": 0.3416, - "step": 4406500 - }, - { - "epoch": 2.64, - "learning_rate": 3.754000914385004e-05, - "loss": 0.3425, - "step": 4407000 - }, - { - "epoch": 2.64, - "learning_rate": 3.7537913378220595e-05, - "loss": 0.3555, - "step": 4407500 - }, - { - "epoch": 2.64, - "learning_rate": 3.753581341266003e-05, - "loss": 0.3459, - "step": 4408000 - }, - { - "epoch": 2.64, - "learning_rate": 3.753371344709947e-05, - "loss": 0.3497, - "step": 4408500 - }, - { - "epoch": 2.64, - "learning_rate": 3.75316134815389e-05, - "loss": 0.3408, - "step": 4409000 - }, - { - "epoch": 2.64, - "learning_rate": 3.7529517715909455e-05, - "loss": 0.3571, - "step": 4409500 - }, - { - "epoch": 2.64, - "learning_rate": 3.752741775034889e-05, - "loss": 0.3634, - "step": 4410000 - }, - { - "epoch": 2.64, - "learning_rate": 3.752531778478833e-05, - "loss": 0.3498, - "step": 4410500 - }, - { - "epoch": 2.64, - "learning_rate": 3.752321781922776e-05, - "loss": 0.3519, - "step": 4411000 - }, - { - "epoch": 2.64, - "learning_rate": 3.7521117853667196e-05, - "loss": 0.3345, - "step": 4411500 - }, - { - "epoch": 2.65, - "learning_rate": 3.7519017888106636e-05, - "loss": 0.3445, - "step": 4412000 - }, - { - "epoch": 2.65, - "learning_rate": 3.751692212247719e-05, - "loss": 0.3499, - "step": 4412500 - }, - { - "epoch": 2.65, - "learning_rate": 3.751482215691662e-05, - "loss": 0.3356, - "step": 4413000 - }, - { - "epoch": 2.65, - "learning_rate": 3.7512722191356064e-05, - "loss": 0.3533, - "step": 4413500 - }, - { - "epoch": 2.65, - "learning_rate": 3.75106222257955e-05, - "loss": 0.3435, - "step": 4414000 - }, - { - "epoch": 2.65, - "learning_rate": 3.750852646016605e-05, - "loss": 0.3397, - "step": 4414500 - }, - { - "epoch": 2.65, - "learning_rate": 3.7506426494605484e-05, - "loss": 0.3454, - "step": 4415000 - }, - { - "epoch": 2.65, - "learning_rate": 3.7504326529044924e-05, - "loss": 0.3424, - "step": 4415500 - }, - { - "epoch": 2.65, - "learning_rate": 3.750222656348436e-05, - "loss": 0.3416, - "step": 4416000 - }, - { - "epoch": 2.65, - "learning_rate": 3.750012659792379e-05, - "loss": 0.3448, - "step": 4416500 - }, - { - "epoch": 2.65, - "learning_rate": 3.7498030832294345e-05, - "loss": 0.3412, - "step": 4417000 - }, - { - "epoch": 2.65, - "learning_rate": 3.7495930866733785e-05, - "loss": 0.3478, - "step": 4417500 - }, - { - "epoch": 2.65, - "learning_rate": 3.749383090117322e-05, - "loss": 0.3417, - "step": 4418000 - }, - { - "epoch": 2.65, - "learning_rate": 3.749173093561265e-05, - "loss": 0.3512, - "step": 4418500 - }, - { - "epoch": 2.65, - "learning_rate": 3.748963516998321e-05, - "loss": 0.3438, - "step": 4419000 - }, - { - "epoch": 2.65, - "learning_rate": 3.7487535204422646e-05, - "loss": 0.3456, - "step": 4419500 - }, - { - "epoch": 2.65, - "learning_rate": 3.748543523886208e-05, - "loss": 0.3506, - "step": 4420000 - }, - { - "epoch": 2.65, - "learning_rate": 3.748333527330152e-05, - "loss": 0.3362, - "step": 4420500 - }, - { - "epoch": 2.65, - "learning_rate": 3.748123530774095e-05, - "loss": 0.3443, - "step": 4421000 - }, - { - "epoch": 2.65, - "learning_rate": 3.7479135342180386e-05, - "loss": 0.3429, - "step": 4421500 - }, - { - "epoch": 2.65, - "learning_rate": 3.7477035376619827e-05, - "loss": 0.3413, - "step": 4422000 - }, - { - "epoch": 2.65, - "learning_rate": 3.747493961099038e-05, - "loss": 0.3477, - "step": 4422500 - }, - { - "epoch": 2.65, - "learning_rate": 3.7472839645429814e-05, - "loss": 0.3374, - "step": 4423000 - }, - { - "epoch": 2.65, - "learning_rate": 3.747073967986925e-05, - "loss": 0.3541, - "step": 4423500 - }, - { - "epoch": 2.65, - "learning_rate": 3.746863971430869e-05, - "loss": 0.3473, - "step": 4424000 - }, - { - "epoch": 2.65, - "learning_rate": 3.746653974874812e-05, - "loss": 0.349, - "step": 4424500 - }, - { - "epoch": 2.65, - "learning_rate": 3.7464439783187554e-05, - "loss": 0.3483, - "step": 4425000 - }, - { - "epoch": 2.65, - "learning_rate": 3.7462339817626994e-05, - "loss": 0.3411, - "step": 4425500 - }, - { - "epoch": 2.65, - "learning_rate": 3.746024405199755e-05, - "loss": 0.3388, - "step": 4426000 - }, - { - "epoch": 2.65, - "learning_rate": 3.74581482863681e-05, - "loss": 0.3521, - "step": 4426500 - }, - { - "epoch": 2.65, - "learning_rate": 3.7456048320807535e-05, - "loss": 0.3371, - "step": 4427000 - }, - { - "epoch": 2.65, - "learning_rate": 3.7453948355246975e-05, - "loss": 0.3443, - "step": 4427500 - }, - { - "epoch": 2.65, - "learning_rate": 3.745184838968641e-05, - "loss": 0.355, - "step": 4428000 - }, - { - "epoch": 2.66, - "learning_rate": 3.744974842412584e-05, - "loss": 0.3442, - "step": 4428500 - }, - { - "epoch": 2.66, - "learning_rate": 3.744764845856528e-05, - "loss": 0.3527, - "step": 4429000 - }, - { - "epoch": 2.66, - "learning_rate": 3.7445548493004716e-05, - "loss": 0.3425, - "step": 4429500 - }, - { - "epoch": 2.66, - "learning_rate": 3.744344852744415e-05, - "loss": 0.3496, - "step": 4430000 - }, - { - "epoch": 2.66, - "learning_rate": 3.744134856188359e-05, - "loss": 0.3373, - "step": 4430500 - }, - { - "epoch": 2.66, - "learning_rate": 3.743925279625414e-05, - "loss": 0.3479, - "step": 4431000 - }, - { - "epoch": 2.66, - "learning_rate": 3.74371570306247e-05, - "loss": 0.349, - "step": 4431500 - }, - { - "epoch": 2.66, - "learning_rate": 3.743505706506413e-05, - "loss": 0.3473, - "step": 4432000 - }, - { - "epoch": 2.66, - "learning_rate": 3.7432957099503564e-05, - "loss": 0.3483, - "step": 4432500 - }, - { - "epoch": 2.66, - "learning_rate": 3.7430857133943004e-05, - "loss": 0.3529, - "step": 4433000 - }, - { - "epoch": 2.66, - "learning_rate": 3.742875716838244e-05, - "loss": 0.3475, - "step": 4433500 - }, - { - "epoch": 2.66, - "learning_rate": 3.742665720282187e-05, - "loss": 0.3454, - "step": 4434000 - }, - { - "epoch": 2.66, - "learning_rate": 3.742456143719243e-05, - "loss": 0.3511, - "step": 4434500 - }, - { - "epoch": 2.66, - "learning_rate": 3.7422461471631865e-05, - "loss": 0.3444, - "step": 4435000 - }, - { - "epoch": 2.66, - "learning_rate": 3.74203615060713e-05, - "loss": 0.3479, - "step": 4435500 - }, - { - "epoch": 2.66, - "learning_rate": 3.741826154051074e-05, - "loss": 0.3495, - "step": 4436000 - }, - { - "epoch": 2.66, - "learning_rate": 3.741616157495017e-05, - "loss": 0.3475, - "step": 4436500 - }, - { - "epoch": 2.66, - "learning_rate": 3.7414061609389605e-05, - "loss": 0.3532, - "step": 4437000 - }, - { - "epoch": 2.66, - "learning_rate": 3.7411961643829046e-05, - "loss": 0.3425, - "step": 4437500 - }, - { - "epoch": 2.66, - "learning_rate": 3.740986167826848e-05, - "loss": 0.3523, - "step": 4438000 - }, - { - "epoch": 2.66, - "learning_rate": 3.7407761712707906e-05, - "loss": 0.3484, - "step": 4438500 - }, - { - "epoch": 2.66, - "learning_rate": 3.7405661747147346e-05, - "loss": 0.3459, - "step": 4439000 - }, - { - "epoch": 2.66, - "learning_rate": 3.740356178158678e-05, - "loss": 0.3525, - "step": 4439500 - }, - { - "epoch": 2.66, - "learning_rate": 3.740146181602621e-05, - "loss": 0.3427, - "step": 4440000 - }, - { - "epoch": 2.66, - "learning_rate": 3.739936605039677e-05, - "loss": 0.3441, - "step": 4440500 - }, - { - "epoch": 2.66, - "learning_rate": 3.7397266084836207e-05, - "loss": 0.3496, - "step": 4441000 - }, - { - "epoch": 2.66, - "learning_rate": 3.739516611927564e-05, - "loss": 0.3424, - "step": 4441500 - }, - { - "epoch": 2.66, - "learning_rate": 3.7393066153715074e-05, - "loss": 0.339, - "step": 4442000 - }, - { - "epoch": 2.66, - "learning_rate": 3.7390970388085634e-05, - "loss": 0.3488, - "step": 4442500 - }, - { - "epoch": 2.66, - "learning_rate": 3.7388870422525074e-05, - "loss": 0.3457, - "step": 4443000 - }, - { - "epoch": 2.66, - "learning_rate": 3.73867704569645e-05, - "loss": 0.3505, - "step": 4443500 - }, - { - "epoch": 2.66, - "learning_rate": 3.738467049140394e-05, - "loss": 0.3596, - "step": 4444000 - }, - { - "epoch": 2.66, - "learning_rate": 3.7382570525843374e-05, - "loss": 0.34, - "step": 4444500 - }, - { - "epoch": 2.66, - "learning_rate": 3.738047056028281e-05, - "loss": 0.3396, - "step": 4445000 - }, - { - "epoch": 2.67, - "learning_rate": 3.737837059472225e-05, - "loss": 0.3406, - "step": 4445500 - }, - { - "epoch": 2.67, - "learning_rate": 3.737627062916168e-05, - "loss": 0.3458, - "step": 4446000 - }, - { - "epoch": 2.67, - "learning_rate": 3.7374174863532235e-05, - "loss": 0.3394, - "step": 4446500 - }, - { - "epoch": 2.67, - "learning_rate": 3.7372079097902796e-05, - "loss": 0.3399, - "step": 4447000 - }, - { - "epoch": 2.67, - "learning_rate": 3.736997913234223e-05, - "loss": 0.336, - "step": 4447500 - }, - { - "epoch": 2.67, - "learning_rate": 3.736787916678167e-05, - "loss": 0.3615, - "step": 4448000 - }, - { - "epoch": 2.67, - "learning_rate": 3.7365779201221096e-05, - "loss": 0.3405, - "step": 4448500 - }, - { - "epoch": 2.67, - "learning_rate": 3.736367923566053e-05, - "loss": 0.3404, - "step": 4449000 - }, - { - "epoch": 2.67, - "learning_rate": 3.736157927009997e-05, - "loss": 0.3386, - "step": 4449500 - }, - { - "epoch": 2.67, - "learning_rate": 3.735948350447053e-05, - "loss": 0.3365, - "step": 4450000 - }, - { - "epoch": 2.67, - "learning_rate": 3.735738353890996e-05, - "loss": 0.3494, - "step": 4450500 - }, - { - "epoch": 2.67, - "learning_rate": 3.73552835733494e-05, - "loss": 0.3441, - "step": 4451000 - }, - { - "epoch": 2.67, - "learning_rate": 3.735318360778883e-05, - "loss": 0.3432, - "step": 4451500 - }, - { - "epoch": 2.67, - "learning_rate": 3.7351083642228264e-05, - "loss": 0.3443, - "step": 4452000 - }, - { - "epoch": 2.67, - "learning_rate": 3.7348983676667704e-05, - "loss": 0.343, - "step": 4452500 - }, - { - "epoch": 2.67, - "learning_rate": 3.734688371110714e-05, - "loss": 0.3428, - "step": 4453000 - }, - { - "epoch": 2.67, - "learning_rate": 3.734478374554657e-05, - "loss": 0.3383, - "step": 4453500 - }, - { - "epoch": 2.67, - "learning_rate": 3.7342687979917125e-05, - "loss": 0.3435, - "step": 4454000 - }, - { - "epoch": 2.67, - "learning_rate": 3.7340588014356565e-05, - "loss": 0.3534, - "step": 4454500 - }, - { - "epoch": 2.67, - "learning_rate": 3.7338492248727125e-05, - "loss": 0.341, - "step": 4455000 - }, - { - "epoch": 2.67, - "learning_rate": 3.733639228316655e-05, - "loss": 0.3442, - "step": 4455500 - }, - { - "epoch": 2.67, - "learning_rate": 3.733429651753711e-05, - "loss": 0.3454, - "step": 4456000 - }, - { - "epoch": 2.67, - "learning_rate": 3.7332196551976546e-05, - "loss": 0.3382, - "step": 4456500 - }, - { - "epoch": 2.67, - "learning_rate": 3.7330096586415986e-05, - "loss": 0.3529, - "step": 4457000 - }, - { - "epoch": 2.67, - "learning_rate": 3.732799662085542e-05, - "loss": 0.3387, - "step": 4457500 - }, - { - "epoch": 2.67, - "learning_rate": 3.732589665529485e-05, - "loss": 0.3437, - "step": 4458000 - }, - { - "epoch": 2.67, - "learning_rate": 3.7323796689734286e-05, - "loss": 0.3544, - "step": 4458500 - }, - { - "epoch": 2.67, - "learning_rate": 3.7321700924104847e-05, - "loss": 0.3455, - "step": 4459000 - }, - { - "epoch": 2.67, - "learning_rate": 3.731960095854428e-05, - "loss": 0.356, - "step": 4459500 - }, - { - "epoch": 2.67, - "learning_rate": 3.7317500992983714e-05, - "loss": 0.3413, - "step": 4460000 - }, - { - "epoch": 2.67, - "learning_rate": 3.731540102742315e-05, - "loss": 0.3432, - "step": 4460500 - }, - { - "epoch": 2.67, - "learning_rate": 3.731330106186258e-05, - "loss": 0.3423, - "step": 4461000 - }, - { - "epoch": 2.67, - "learning_rate": 3.731120109630202e-05, - "loss": 0.3571, - "step": 4461500 - }, - { - "epoch": 2.68, - "learning_rate": 3.730910533067258e-05, - "loss": 0.3403, - "step": 4462000 - }, - { - "epoch": 2.68, - "learning_rate": 3.730700536511201e-05, - "loss": 0.341, - "step": 4462500 - }, - { - "epoch": 2.68, - "learning_rate": 3.730490959948257e-05, - "loss": 0.3532, - "step": 4463000 - }, - { - "epoch": 2.68, - "learning_rate": 3.7302809633922e-05, - "loss": 0.3352, - "step": 4463500 - }, - { - "epoch": 2.68, - "learning_rate": 3.730070966836144e-05, - "loss": 0.342, - "step": 4464000 - }, - { - "epoch": 2.68, - "learning_rate": 3.7298609702800875e-05, - "loss": 0.3404, - "step": 4464500 - }, - { - "epoch": 2.68, - "learning_rate": 3.729650973724031e-05, - "loss": 0.3438, - "step": 4465000 - }, - { - "epoch": 2.68, - "learning_rate": 3.729440977167974e-05, - "loss": 0.3454, - "step": 4465500 - }, - { - "epoch": 2.68, - "learning_rate": 3.7292309806119176e-05, - "loss": 0.3421, - "step": 4466000 - }, - { - "epoch": 2.68, - "learning_rate": 3.7290214040489736e-05, - "loss": 0.3559, - "step": 4466500 - }, - { - "epoch": 2.68, - "learning_rate": 3.7288114074929176e-05, - "loss": 0.3504, - "step": 4467000 - }, - { - "epoch": 2.68, - "learning_rate": 3.72860141093686e-05, - "loss": 0.3524, - "step": 4467500 - }, - { - "epoch": 2.68, - "learning_rate": 3.7283914143808036e-05, - "loss": 0.3491, - "step": 4468000 - }, - { - "epoch": 2.68, - "learning_rate": 3.7281814178247477e-05, - "loss": 0.3556, - "step": 4468500 - }, - { - "epoch": 2.68, - "learning_rate": 3.727971421268691e-05, - "loss": 0.3556, - "step": 4469000 - }, - { - "epoch": 2.68, - "learning_rate": 3.7277614247126343e-05, - "loss": 0.3384, - "step": 4469500 - }, - { - "epoch": 2.68, - "learning_rate": 3.7275514281565784e-05, - "loss": 0.3533, - "step": 4470000 - }, - { - "epoch": 2.68, - "learning_rate": 3.727341431600522e-05, - "loss": 0.3408, - "step": 4470500 - }, - { - "epoch": 2.68, - "learning_rate": 3.727131435044465e-05, - "loss": 0.355, - "step": 4471000 - }, - { - "epoch": 2.68, - "learning_rate": 3.726921438488409e-05, - "loss": 0.3463, - "step": 4471500 - }, - { - "epoch": 2.68, - "learning_rate": 3.7267114419323524e-05, - "loss": 0.3637, - "step": 4472000 - }, - { - "epoch": 2.68, - "learning_rate": 3.726501445376296e-05, - "loss": 0.3396, - "step": 4472500 - }, - { - "epoch": 2.68, - "learning_rate": 3.726291448820239e-05, - "loss": 0.3475, - "step": 4473000 - }, - { - "epoch": 2.68, - "learning_rate": 3.726081872257295e-05, - "loss": 0.3367, - "step": 4473500 - }, - { - "epoch": 2.68, - "learning_rate": 3.7258718757012385e-05, - "loss": 0.3506, - "step": 4474000 - }, - { - "epoch": 2.68, - "learning_rate": 3.7256618791451825e-05, - "loss": 0.3336, - "step": 4474500 - }, - { - "epoch": 2.68, - "learning_rate": 3.725451882589125e-05, - "loss": 0.3475, - "step": 4475000 - }, - { - "epoch": 2.68, - "learning_rate": 3.7252418860330685e-05, - "loss": 0.3373, - "step": 4475500 - }, - { - "epoch": 2.68, - "learning_rate": 3.7250318894770126e-05, - "loss": 0.3433, - "step": 4476000 - }, - { - "epoch": 2.68, - "learning_rate": 3.724821892920956e-05, - "loss": 0.3431, - "step": 4476500 - }, - { - "epoch": 2.68, - "learning_rate": 3.724611896364899e-05, - "loss": 0.3417, - "step": 4477000 - }, - { - "epoch": 2.68, - "learning_rate": 3.7244023198019546e-05, - "loss": 0.3468, - "step": 4477500 - }, - { - "epoch": 2.68, - "learning_rate": 3.7241923232458986e-05, - "loss": 0.3364, - "step": 4478000 - }, - { - "epoch": 2.69, - "learning_rate": 3.723982326689842e-05, - "loss": 0.3512, - "step": 4478500 - }, - { - "epoch": 2.69, - "learning_rate": 3.723772330133785e-05, - "loss": 0.3473, - "step": 4479000 - }, - { - "epoch": 2.69, - "learning_rate": 3.7235623335777294e-05, - "loss": 0.3444, - "step": 4479500 - }, - { - "epoch": 2.69, - "learning_rate": 3.723352337021673e-05, - "loss": 0.3326, - "step": 4480000 - }, - { - "epoch": 2.69, - "learning_rate": 3.723142340465616e-05, - "loss": 0.3467, - "step": 4480500 - }, - { - "epoch": 2.69, - "learning_rate": 3.72293234390956e-05, - "loss": 0.3422, - "step": 4481000 - }, - { - "epoch": 2.69, - "learning_rate": 3.7227227673466154e-05, - "loss": 0.3363, - "step": 4481500 - }, - { - "epoch": 2.69, - "learning_rate": 3.722512770790559e-05, - "loss": 0.3455, - "step": 4482000 - }, - { - "epoch": 2.69, - "learning_rate": 3.722302774234503e-05, - "loss": 0.3492, - "step": 4482500 - }, - { - "epoch": 2.69, - "learning_rate": 3.722092777678446e-05, - "loss": 0.3383, - "step": 4483000 - }, - { - "epoch": 2.69, - "learning_rate": 3.7218832011155015e-05, - "loss": 0.3482, - "step": 4483500 - }, - { - "epoch": 2.69, - "learning_rate": 3.7216736245525575e-05, - "loss": 0.3511, - "step": 4484000 - }, - { - "epoch": 2.69, - "learning_rate": 3.721463627996501e-05, - "loss": 0.346, - "step": 4484500 - }, - { - "epoch": 2.69, - "learning_rate": 3.721253631440444e-05, - "loss": 0.3567, - "step": 4485000 - }, - { - "epoch": 2.69, - "learning_rate": 3.7210436348843876e-05, - "loss": 0.3433, - "step": 4485500 - }, - { - "epoch": 2.69, - "learning_rate": 3.720833638328331e-05, - "loss": 0.3423, - "step": 4486000 - }, - { - "epoch": 2.69, - "learning_rate": 3.720624061765387e-05, - "loss": 0.3598, - "step": 4486500 - }, - { - "epoch": 2.69, - "learning_rate": 3.72041406520933e-05, - "loss": 0.3549, - "step": 4487000 - }, - { - "epoch": 2.69, - "learning_rate": 3.7202040686532736e-05, - "loss": 0.3433, - "step": 4487500 - }, - { - "epoch": 2.69, - "learning_rate": 3.719994072097218e-05, - "loss": 0.3427, - "step": 4488000 - }, - { - "epoch": 2.69, - "learning_rate": 3.719784075541161e-05, - "loss": 0.3435, - "step": 4488500 - }, - { - "epoch": 2.69, - "learning_rate": 3.7195740789851044e-05, - "loss": 0.34, - "step": 4489000 - }, - { - "epoch": 2.69, - "learning_rate": 3.7193640824290484e-05, - "loss": 0.343, - "step": 4489500 - }, - { - "epoch": 2.69, - "learning_rate": 3.719154085872992e-05, - "loss": 0.338, - "step": 4490000 - }, - { - "epoch": 2.69, - "learning_rate": 3.71894492930316e-05, - "loss": 0.3482, - "step": 4490500 - }, - { - "epoch": 2.69, - "learning_rate": 3.718734932747103e-05, - "loss": 0.3492, - "step": 4491000 - }, - { - "epoch": 2.69, - "learning_rate": 3.7185249361910465e-05, - "loss": 0.3454, - "step": 4491500 - }, - { - "epoch": 2.69, - "learning_rate": 3.71831493963499e-05, - "loss": 0.3411, - "step": 4492000 - }, - { - "epoch": 2.69, - "learning_rate": 3.718105363072046e-05, - "loss": 0.353, - "step": 4492500 - }, - { - "epoch": 2.69, - "learning_rate": 3.717895366515989e-05, - "loss": 0.3437, - "step": 4493000 - }, - { - "epoch": 2.69, - "learning_rate": 3.7176853699599325e-05, - "loss": 0.3385, - "step": 4493500 - }, - { - "epoch": 2.69, - "learning_rate": 3.7174753734038766e-05, - "loss": 0.3456, - "step": 4494000 - }, - { - "epoch": 2.69, - "learning_rate": 3.717265376847819e-05, - "loss": 0.3466, - "step": 4494500 - }, - { - "epoch": 2.69, - "learning_rate": 3.717055800284875e-05, - "loss": 0.3422, - "step": 4495000 - }, - { - "epoch": 2.7, - "learning_rate": 3.716845803728819e-05, - "loss": 0.3425, - "step": 4495500 - }, - { - "epoch": 2.7, - "learning_rate": 3.7166358071727626e-05, - "loss": 0.3409, - "step": 4496000 - }, - { - "epoch": 2.7, - "learning_rate": 3.716425810616705e-05, - "loss": 0.3455, - "step": 4496500 - }, - { - "epoch": 2.7, - "learning_rate": 3.716215814060649e-05, - "loss": 0.3486, - "step": 4497000 - }, - { - "epoch": 2.7, - "learning_rate": 3.716005817504593e-05, - "loss": 0.3432, - "step": 4497500 - }, - { - "epoch": 2.7, - "learning_rate": 3.715795820948536e-05, - "loss": 0.3506, - "step": 4498000 - }, - { - "epoch": 2.7, - "learning_rate": 3.71558582439248e-05, - "loss": 0.3463, - "step": 4498500 - }, - { - "epoch": 2.7, - "learning_rate": 3.7153762478295354e-05, - "loss": 0.3441, - "step": 4499000 - }, - { - "epoch": 2.7, - "learning_rate": 3.7151666712665914e-05, - "loss": 0.3542, - "step": 4499500 - }, - { - "epoch": 2.7, - "learning_rate": 3.714956674710535e-05, - "loss": 0.347, - "step": 4500000 - }, - { - "epoch": 2.7, - "eval_loss": 0.33365142345428467, - "eval_runtime": 1119.1708, - "eval_samples_per_second": 470.634, - "eval_steps_per_second": 78.439, - "step": 4500000 - }, - { - "epoch": 2.7, - "learning_rate": 3.714746678154478e-05, - "loss": 0.3464, - "step": 4500500 - }, - { - "epoch": 2.7, - "learning_rate": 3.714536681598422e-05, - "loss": 0.3466, - "step": 4501000 - }, - { - "epoch": 2.7, - "learning_rate": 3.714326685042365e-05, - "loss": 0.3465, - "step": 4501500 - }, - { - "epoch": 2.7, - "learning_rate": 3.714116688486309e-05, - "loss": 0.3339, - "step": 4502000 - }, - { - "epoch": 2.7, - "learning_rate": 3.713906691930252e-05, - "loss": 0.3432, - "step": 4502500 - }, - { - "epoch": 2.7, - "learning_rate": 3.7136966953741955e-05, - "loss": 0.3485, - "step": 4503000 - }, - { - "epoch": 2.7, - "learning_rate": 3.7134871188112516e-05, - "loss": 0.3518, - "step": 4503500 - }, - { - "epoch": 2.7, - "learning_rate": 3.713277542248307e-05, - "loss": 0.3489, - "step": 4504000 - }, - { - "epoch": 2.7, - "learning_rate": 3.713067545692251e-05, - "loss": 0.3507, - "step": 4504500 - }, - { - "epoch": 2.7, - "learning_rate": 3.712857549136194e-05, - "loss": 0.3516, - "step": 4505000 - }, - { - "epoch": 2.7, - "learning_rate": 3.7126475525801376e-05, - "loss": 0.3364, - "step": 4505500 - }, - { - "epoch": 2.7, - "learning_rate": 3.712437556024081e-05, - "loss": 0.3461, - "step": 4506000 - }, - { - "epoch": 2.7, - "learning_rate": 3.712227559468024e-05, - "loss": 0.3406, - "step": 4506500 - }, - { - "epoch": 2.7, - "learning_rate": 3.712017562911968e-05, - "loss": 0.3438, - "step": 4507000 - }, - { - "epoch": 2.7, - "learning_rate": 3.711807566355912e-05, - "loss": 0.3368, - "step": 4507500 - }, - { - "epoch": 2.7, - "learning_rate": 3.71159840978608e-05, - "loss": 0.3484, - "step": 4508000 - }, - { - "epoch": 2.7, - "learning_rate": 3.711388413230023e-05, - "loss": 0.3424, - "step": 4508500 - }, - { - "epoch": 2.7, - "learning_rate": 3.7111784166739664e-05, - "loss": 0.3434, - "step": 4509000 - }, - { - "epoch": 2.7, - "learning_rate": 3.7109684201179105e-05, - "loss": 0.3373, - "step": 4509500 - }, - { - "epoch": 2.7, - "learning_rate": 3.710758423561854e-05, - "loss": 0.3457, - "step": 4510000 - }, - { - "epoch": 2.7, - "learning_rate": 3.710548427005797e-05, - "loss": 0.3463, - "step": 4510500 - }, - { - "epoch": 2.7, - "learning_rate": 3.7103384304497405e-05, - "loss": 0.3429, - "step": 4511000 - }, - { - "epoch": 2.7, - "learning_rate": 3.710128433893684e-05, - "loss": 0.3478, - "step": 4511500 - }, - { - "epoch": 2.71, - "learning_rate": 3.70991885733074e-05, - "loss": 0.3382, - "step": 4512000 - }, - { - "epoch": 2.71, - "learning_rate": 3.709708860774683e-05, - "loss": 0.3406, - "step": 4512500 - }, - { - "epoch": 2.71, - "learning_rate": 3.709498864218627e-05, - "loss": 0.3481, - "step": 4513000 - }, - { - "epoch": 2.71, - "learning_rate": 3.70928886766257e-05, - "loss": 0.3378, - "step": 4513500 - }, - { - "epoch": 2.71, - "learning_rate": 3.709078871106513e-05, - "loss": 0.338, - "step": 4514000 - }, - { - "epoch": 2.71, - "learning_rate": 3.708868874550457e-05, - "loss": 0.3504, - "step": 4514500 - }, - { - "epoch": 2.71, - "learning_rate": 3.7086588779944006e-05, - "loss": 0.3421, - "step": 4515000 - }, - { - "epoch": 2.71, - "learning_rate": 3.708448881438344e-05, - "loss": 0.346, - "step": 4515500 - }, - { - "epoch": 2.71, - "learning_rate": 3.7082393048754e-05, - "loss": 0.3389, - "step": 4516000 - }, - { - "epoch": 2.71, - "learning_rate": 3.7080293083193434e-05, - "loss": 0.3495, - "step": 4516500 - }, - { - "epoch": 2.71, - "learning_rate": 3.707819311763287e-05, - "loss": 0.3454, - "step": 4517000 - }, - { - "epoch": 2.71, - "learning_rate": 3.707609315207231e-05, - "loss": 0.3463, - "step": 4517500 - }, - { - "epoch": 2.71, - "learning_rate": 3.707399738644286e-05, - "loss": 0.3505, - "step": 4518000 - }, - { - "epoch": 2.71, - "learning_rate": 3.7071897420882294e-05, - "loss": 0.3404, - "step": 4518500 - }, - { - "epoch": 2.71, - "learning_rate": 3.7069801655252855e-05, - "loss": 0.347, - "step": 4519000 - }, - { - "epoch": 2.71, - "learning_rate": 3.706770168969229e-05, - "loss": 0.349, - "step": 4519500 - }, - { - "epoch": 2.71, - "learning_rate": 3.706560172413173e-05, - "loss": 0.3439, - "step": 4520000 - }, - { - "epoch": 2.71, - "learning_rate": 3.7063501758571155e-05, - "loss": 0.3478, - "step": 4520500 - }, - { - "epoch": 2.71, - "learning_rate": 3.706140179301059e-05, - "loss": 0.3443, - "step": 4521000 - }, - { - "epoch": 2.71, - "learning_rate": 3.705930182745003e-05, - "loss": 0.3587, - "step": 4521500 - }, - { - "epoch": 2.71, - "learning_rate": 3.705720186188946e-05, - "loss": 0.3398, - "step": 4522000 - }, - { - "epoch": 2.71, - "learning_rate": 3.7055101896328896e-05, - "loss": 0.3384, - "step": 4522500 - }, - { - "epoch": 2.71, - "learning_rate": 3.7053010330630576e-05, - "loss": 0.3529, - "step": 4523000 - }, - { - "epoch": 2.71, - "learning_rate": 3.7050910365070016e-05, - "loss": 0.3446, - "step": 4523500 - }, - { - "epoch": 2.71, - "learning_rate": 3.704881459944057e-05, - "loss": 0.3574, - "step": 4524000 - }, - { - "epoch": 2.71, - "learning_rate": 3.7046714633880003e-05, - "loss": 0.3546, - "step": 4524500 - }, - { - "epoch": 2.71, - "learning_rate": 3.704461466831944e-05, - "loss": 0.3499, - "step": 4525000 - }, - { - "epoch": 2.71, - "learning_rate": 3.704251470275888e-05, - "loss": 0.346, - "step": 4525500 - }, - { - "epoch": 2.71, - "learning_rate": 3.704041473719831e-05, - "loss": 0.3459, - "step": 4526000 - }, - { - "epoch": 2.71, - "learning_rate": 3.7038318971568864e-05, - "loss": 0.3527, - "step": 4526500 - }, - { - "epoch": 2.71, - "learning_rate": 3.70362190060083e-05, - "loss": 0.3422, - "step": 4527000 - }, - { - "epoch": 2.71, - "learning_rate": 3.703411904044774e-05, - "loss": 0.3362, - "step": 4527500 - }, - { - "epoch": 2.71, - "learning_rate": 3.703201907488717e-05, - "loss": 0.3412, - "step": 4528000 - }, - { - "epoch": 2.72, - "learning_rate": 3.7029919109326605e-05, - "loss": 0.3459, - "step": 4528500 - }, - { - "epoch": 2.72, - "learning_rate": 3.7027819143766045e-05, - "loss": 0.343, - "step": 4529000 - }, - { - "epoch": 2.72, - "learning_rate": 3.702571917820548e-05, - "loss": 0.3553, - "step": 4529500 - }, - { - "epoch": 2.72, - "learning_rate": 3.702361921264491e-05, - "loss": 0.3511, - "step": 4530000 - }, - { - "epoch": 2.72, - "learning_rate": 3.7021519247084345e-05, - "loss": 0.3413, - "step": 4530500 - }, - { - "epoch": 2.72, - "learning_rate": 3.701941928152378e-05, - "loss": 0.3491, - "step": 4531000 - }, - { - "epoch": 2.72, - "learning_rate": 3.701732351589434e-05, - "loss": 0.3463, - "step": 4531500 - }, - { - "epoch": 2.72, - "learning_rate": 3.701522355033378e-05, - "loss": 0.3485, - "step": 4532000 - }, - { - "epoch": 2.72, - "learning_rate": 3.7013123584773206e-05, - "loss": 0.3518, - "step": 4532500 - }, - { - "epoch": 2.72, - "learning_rate": 3.701102361921264e-05, - "loss": 0.357, - "step": 4533000 - }, - { - "epoch": 2.72, - "learning_rate": 3.700892365365208e-05, - "loss": 0.3372, - "step": 4533500 - }, - { - "epoch": 2.72, - "learning_rate": 3.700682368809151e-05, - "loss": 0.3426, - "step": 4534000 - }, - { - "epoch": 2.72, - "learning_rate": 3.700472372253095e-05, - "loss": 0.3456, - "step": 4534500 - }, - { - "epoch": 2.72, - "learning_rate": 3.700262375697039e-05, - "loss": 0.3448, - "step": 4535000 - }, - { - "epoch": 2.72, - "learning_rate": 3.700052799134094e-05, - "loss": 0.3513, - "step": 4535500 - }, - { - "epoch": 2.72, - "learning_rate": 3.69984322257115e-05, - "loss": 0.342, - "step": 4536000 - }, - { - "epoch": 2.72, - "learning_rate": 3.6996332260150934e-05, - "loss": 0.3362, - "step": 4536500 - }, - { - "epoch": 2.72, - "learning_rate": 3.699423229459037e-05, - "loss": 0.3478, - "step": 4537000 - }, - { - "epoch": 2.72, - "learning_rate": 3.69921323290298e-05, - "loss": 0.345, - "step": 4537500 - }, - { - "epoch": 2.72, - "learning_rate": 3.6990032363469235e-05, - "loss": 0.3466, - "step": 4538000 - }, - { - "epoch": 2.72, - "learning_rate": 3.6987932397908675e-05, - "loss": 0.3343, - "step": 4538500 - }, - { - "epoch": 2.72, - "learning_rate": 3.698583243234811e-05, - "loss": 0.3435, - "step": 4539000 - }, - { - "epoch": 2.72, - "learning_rate": 3.698373246678754e-05, - "loss": 0.3381, - "step": 4539500 - }, - { - "epoch": 2.72, - "learning_rate": 3.698164090108922e-05, - "loss": 0.3438, - "step": 4540000 - }, - { - "epoch": 2.72, - "learning_rate": 3.6979540935528656e-05, - "loss": 0.3414, - "step": 4540500 - }, - { - "epoch": 2.72, - "learning_rate": 3.6977440969968096e-05, - "loss": 0.3438, - "step": 4541000 - }, - { - "epoch": 2.72, - "learning_rate": 3.697534100440753e-05, - "loss": 0.3526, - "step": 4541500 - }, - { - "epoch": 2.72, - "learning_rate": 3.697324523877808e-05, - "loss": 0.3516, - "step": 4542000 - }, - { - "epoch": 2.72, - "learning_rate": 3.697114527321752e-05, - "loss": 0.3368, - "step": 4542500 - }, - { - "epoch": 2.72, - "learning_rate": 3.696904530765696e-05, - "loss": 0.3374, - "step": 4543000 - }, - { - "epoch": 2.72, - "learning_rate": 3.696694534209639e-05, - "loss": 0.3538, - "step": 4543500 - }, - { - "epoch": 2.72, - "learning_rate": 3.696484537653583e-05, - "loss": 0.3529, - "step": 4544000 - }, - { - "epoch": 2.72, - "learning_rate": 3.6962749610906384e-05, - "loss": 0.3433, - "step": 4544500 - }, - { - "epoch": 2.72, - "learning_rate": 3.696064964534582e-05, - "loss": 0.3523, - "step": 4545000 - }, - { - "epoch": 2.73, - "learning_rate": 3.695854967978525e-05, - "loss": 0.3431, - "step": 4545500 - }, - { - "epoch": 2.73, - "learning_rate": 3.695644971422469e-05, - "loss": 0.3523, - "step": 4546000 - }, - { - "epoch": 2.73, - "learning_rate": 3.695434974866412e-05, - "loss": 0.3488, - "step": 4546500 - }, - { - "epoch": 2.73, - "learning_rate": 3.695224978310355e-05, - "loss": 0.3419, - "step": 4547000 - }, - { - "epoch": 2.73, - "learning_rate": 3.695014981754299e-05, - "loss": 0.3418, - "step": 4547500 - }, - { - "epoch": 2.73, - "learning_rate": 3.6948049851982425e-05, - "loss": 0.3388, - "step": 4548000 - }, - { - "epoch": 2.73, - "learning_rate": 3.6945954086352985e-05, - "loss": 0.3384, - "step": 4548500 - }, - { - "epoch": 2.73, - "learning_rate": 3.694385412079242e-05, - "loss": 0.3412, - "step": 4549000 - }, - { - "epoch": 2.73, - "learning_rate": 3.694175835516298e-05, - "loss": 0.3404, - "step": 4549500 - }, - { - "epoch": 2.73, - "learning_rate": 3.693965838960241e-05, - "loss": 0.3396, - "step": 4550000 - }, - { - "epoch": 2.73, - "learning_rate": 3.6937558424041846e-05, - "loss": 0.3348, - "step": 4550500 - }, - { - "epoch": 2.73, - "learning_rate": 3.6935458458481286e-05, - "loss": 0.3352, - "step": 4551000 - }, - { - "epoch": 2.73, - "learning_rate": 3.693335849292071e-05, - "loss": 0.3383, - "step": 4551500 - }, - { - "epoch": 2.73, - "learning_rate": 3.6931258527360147e-05, - "loss": 0.3363, - "step": 4552000 - }, - { - "epoch": 2.73, - "learning_rate": 3.692915856179959e-05, - "loss": 0.3459, - "step": 4552500 - }, - { - "epoch": 2.73, - "learning_rate": 3.692705859623902e-05, - "loss": 0.3455, - "step": 4553000 - }, - { - "epoch": 2.73, - "learning_rate": 3.692496283060958e-05, - "loss": 0.3498, - "step": 4553500 - }, - { - "epoch": 2.73, - "learning_rate": 3.692286286504901e-05, - "loss": 0.3354, - "step": 4554000 - }, - { - "epoch": 2.73, - "learning_rate": 3.692076289948845e-05, - "loss": 0.345, - "step": 4554500 - }, - { - "epoch": 2.73, - "learning_rate": 3.691866293392788e-05, - "loss": 0.3431, - "step": 4555000 - }, - { - "epoch": 2.73, - "learning_rate": 3.6916562968367314e-05, - "loss": 0.3401, - "step": 4555500 - }, - { - "epoch": 2.73, - "learning_rate": 3.6914463002806755e-05, - "loss": 0.3487, - "step": 4556000 - }, - { - "epoch": 2.73, - "learning_rate": 3.691236303724619e-05, - "loss": 0.3473, - "step": 4556500 - }, - { - "epoch": 2.73, - "learning_rate": 3.691026307168562e-05, - "loss": 0.3467, - "step": 4557000 - }, - { - "epoch": 2.73, - "learning_rate": 3.690816730605618e-05, - "loss": 0.3365, - "step": 4557500 - }, - { - "epoch": 2.73, - "learning_rate": 3.690607154042674e-05, - "loss": 0.3454, - "step": 4558000 - }, - { - "epoch": 2.73, - "learning_rate": 3.690397157486617e-05, - "loss": 0.3419, - "step": 4558500 - }, - { - "epoch": 2.73, - "learning_rate": 3.69018716093056e-05, - "loss": 0.3407, - "step": 4559000 - }, - { - "epoch": 2.73, - "learning_rate": 3.689977164374504e-05, - "loss": 0.3509, - "step": 4559500 - }, - { - "epoch": 2.73, - "learning_rate": 3.6897671678184476e-05, - "loss": 0.3515, - "step": 4560000 - }, - { - "epoch": 2.73, - "learning_rate": 3.689557171262391e-05, - "loss": 0.3406, - "step": 4560500 - }, - { - "epoch": 2.73, - "learning_rate": 3.689347594699446e-05, - "loss": 0.3454, - "step": 4561000 - }, - { - "epoch": 2.73, - "learning_rate": 3.68913759814339e-05, - "loss": 0.3369, - "step": 4561500 - }, - { - "epoch": 2.74, - "learning_rate": 3.688927601587334e-05, - "loss": 0.3412, - "step": 4562000 - }, - { - "epoch": 2.74, - "learning_rate": 3.688717605031277e-05, - "loss": 0.3322, - "step": 4562500 - }, - { - "epoch": 2.74, - "learning_rate": 3.688507608475221e-05, - "loss": 0.3401, - "step": 4563000 - }, - { - "epoch": 2.74, - "learning_rate": 3.6882980319122764e-05, - "loss": 0.3608, - "step": 4563500 - }, - { - "epoch": 2.74, - "learning_rate": 3.68808803535622e-05, - "loss": 0.3449, - "step": 4564000 - }, - { - "epoch": 2.74, - "learning_rate": 3.687878038800164e-05, - "loss": 0.3442, - "step": 4564500 - }, - { - "epoch": 2.74, - "learning_rate": 3.687668042244107e-05, - "loss": 0.3343, - "step": 4565000 - }, - { - "epoch": 2.74, - "learning_rate": 3.6874580456880505e-05, - "loss": 0.3454, - "step": 4565500 - }, - { - "epoch": 2.74, - "learning_rate": 3.687248469125106e-05, - "loss": 0.3371, - "step": 4566000 - }, - { - "epoch": 2.74, - "learning_rate": 3.68703847256905e-05, - "loss": 0.336, - "step": 4566500 - }, - { - "epoch": 2.74, - "learning_rate": 3.686828476012993e-05, - "loss": 0.3449, - "step": 4567000 - }, - { - "epoch": 2.74, - "learning_rate": 3.6866184794569365e-05, - "loss": 0.3537, - "step": 4567500 - }, - { - "epoch": 2.74, - "learning_rate": 3.6864084829008806e-05, - "loss": 0.3436, - "step": 4568000 - }, - { - "epoch": 2.74, - "learning_rate": 3.686198906337936e-05, - "loss": 0.3585, - "step": 4568500 - }, - { - "epoch": 2.74, - "learning_rate": 3.685988909781879e-05, - "loss": 0.3536, - "step": 4569000 - }, - { - "epoch": 2.74, - "learning_rate": 3.6857789132258226e-05, - "loss": 0.3406, - "step": 4569500 - }, - { - "epoch": 2.74, - "learning_rate": 3.6855693366628786e-05, - "loss": 0.3512, - "step": 4570000 - }, - { - "epoch": 2.74, - "learning_rate": 3.685359340106822e-05, - "loss": 0.356, - "step": 4570500 - }, - { - "epoch": 2.74, - "learning_rate": 3.6851493435507653e-05, - "loss": 0.3366, - "step": 4571000 - }, - { - "epoch": 2.74, - "learning_rate": 3.6849393469947094e-05, - "loss": 0.3434, - "step": 4571500 - }, - { - "epoch": 2.74, - "learning_rate": 3.684729350438653e-05, - "loss": 0.3409, - "step": 4572000 - }, - { - "epoch": 2.74, - "learning_rate": 3.684519353882596e-05, - "loss": 0.3394, - "step": 4572500 - }, - { - "epoch": 2.74, - "learning_rate": 3.68430935732654e-05, - "loss": 0.3437, - "step": 4573000 - }, - { - "epoch": 2.74, - "learning_rate": 3.6840993607704834e-05, - "loss": 0.3465, - "step": 4573500 - }, - { - "epoch": 2.74, - "learning_rate": 3.683889364214427e-05, - "loss": 0.3429, - "step": 4574000 - }, - { - "epoch": 2.74, - "learning_rate": 3.683679367658371e-05, - "loss": 0.342, - "step": 4574500 - }, - { - "epoch": 2.74, - "learning_rate": 3.683469371102314e-05, - "loss": 0.3358, - "step": 4575000 - }, - { - "epoch": 2.74, - "learning_rate": 3.6832593745462575e-05, - "loss": 0.3515, - "step": 4575500 - }, - { - "epoch": 2.74, - "learning_rate": 3.683049797983313e-05, - "loss": 0.3441, - "step": 4576000 - }, - { - "epoch": 2.74, - "learning_rate": 3.682839801427257e-05, - "loss": 0.3516, - "step": 4576500 - }, - { - "epoch": 2.74, - "learning_rate": 3.6826298048712e-05, - "loss": 0.3423, - "step": 4577000 - }, - { - "epoch": 2.74, - "learning_rate": 3.6824202283082556e-05, - "loss": 0.3373, - "step": 4577500 - }, - { - "epoch": 2.74, - "learning_rate": 3.682210231752199e-05, - "loss": 0.3472, - "step": 4578000 - }, - { - "epoch": 2.74, - "learning_rate": 3.682000235196143e-05, - "loss": 0.3419, - "step": 4578500 - }, - { - "epoch": 2.75, - "learning_rate": 3.681790238640086e-05, - "loss": 0.3386, - "step": 4579000 - }, - { - "epoch": 2.75, - "learning_rate": 3.6815802420840296e-05, - "loss": 0.3574, - "step": 4579500 - }, - { - "epoch": 2.75, - "learning_rate": 3.6813702455279737e-05, - "loss": 0.3428, - "step": 4580000 - }, - { - "epoch": 2.75, - "learning_rate": 3.681160668965029e-05, - "loss": 0.3457, - "step": 4580500 - }, - { - "epoch": 2.75, - "learning_rate": 3.6809506724089724e-05, - "loss": 0.3458, - "step": 4581000 - }, - { - "epoch": 2.75, - "learning_rate": 3.6807406758529164e-05, - "loss": 0.345, - "step": 4581500 - }, - { - "epoch": 2.75, - "learning_rate": 3.68053067929686e-05, - "loss": 0.3325, - "step": 4582000 - }, - { - "epoch": 2.75, - "learning_rate": 3.680320682740803e-05, - "loss": 0.3367, - "step": 4582500 - }, - { - "epoch": 2.75, - "learning_rate": 3.6801106861847464e-05, - "loss": 0.3464, - "step": 4583000 - }, - { - "epoch": 2.75, - "learning_rate": 3.67990068962869e-05, - "loss": 0.3381, - "step": 4583500 - }, - { - "epoch": 2.75, - "learning_rate": 3.679690693072633e-05, - "loss": 0.3477, - "step": 4584000 - }, - { - "epoch": 2.75, - "learning_rate": 3.679481116509689e-05, - "loss": 0.3369, - "step": 4584500 - }, - { - "epoch": 2.75, - "learning_rate": 3.679271119953633e-05, - "loss": 0.3515, - "step": 4585000 - }, - { - "epoch": 2.75, - "learning_rate": 3.679061123397576e-05, - "loss": 0.3455, - "step": 4585500 - }, - { - "epoch": 2.75, - "learning_rate": 3.678851546834632e-05, - "loss": 0.3473, - "step": 4586000 - }, - { - "epoch": 2.75, - "learning_rate": 3.678641970271687e-05, - "loss": 0.3337, - "step": 4586500 - }, - { - "epoch": 2.75, - "learning_rate": 3.678431973715631e-05, - "loss": 0.3346, - "step": 4587000 - }, - { - "epoch": 2.75, - "learning_rate": 3.6782219771595746e-05, - "loss": 0.3532, - "step": 4587500 - }, - { - "epoch": 2.75, - "learning_rate": 3.678011980603518e-05, - "loss": 0.3455, - "step": 4588000 - }, - { - "epoch": 2.75, - "learning_rate": 3.677801984047462e-05, - "loss": 0.3382, - "step": 4588500 - }, - { - "epoch": 2.75, - "learning_rate": 3.677591987491405e-05, - "loss": 0.3401, - "step": 4589000 - }, - { - "epoch": 2.75, - "learning_rate": 3.677381990935349e-05, - "loss": 0.343, - "step": 4589500 - }, - { - "epoch": 2.75, - "learning_rate": 3.677171994379293e-05, - "loss": 0.3463, - "step": 4590000 - }, - { - "epoch": 2.75, - "learning_rate": 3.6769619978232354e-05, - "loss": 0.3455, - "step": 4590500 - }, - { - "epoch": 2.75, - "learning_rate": 3.6767524212602914e-05, - "loss": 0.3369, - "step": 4591000 - }, - { - "epoch": 2.75, - "learning_rate": 3.676542424704235e-05, - "loss": 0.3446, - "step": 4591500 - }, - { - "epoch": 2.75, - "learning_rate": 3.676332428148179e-05, - "loss": 0.3455, - "step": 4592000 - }, - { - "epoch": 2.75, - "learning_rate": 3.676122431592122e-05, - "loss": 0.3432, - "step": 4592500 - }, - { - "epoch": 2.75, - "learning_rate": 3.6759132750222895e-05, - "loss": 0.3487, - "step": 4593000 - }, - { - "epoch": 2.75, - "learning_rate": 3.675703278466233e-05, - "loss": 0.337, - "step": 4593500 - }, - { - "epoch": 2.75, - "learning_rate": 3.675493281910177e-05, - "loss": 0.3486, - "step": 4594000 - }, - { - "epoch": 2.75, - "learning_rate": 3.67528328535412e-05, - "loss": 0.3417, - "step": 4594500 - }, - { - "epoch": 2.75, - "learning_rate": 3.6750732887980635e-05, - "loss": 0.3396, - "step": 4595000 - }, - { - "epoch": 2.76, - "learning_rate": 3.6748632922420076e-05, - "loss": 0.3486, - "step": 4595500 - }, - { - "epoch": 2.76, - "learning_rate": 3.674653295685951e-05, - "loss": 0.3468, - "step": 4596000 - }, - { - "epoch": 2.76, - "learning_rate": 3.674443299129894e-05, - "loss": 0.3465, - "step": 4596500 - }, - { - "epoch": 2.76, - "learning_rate": 3.674233302573838e-05, - "loss": 0.3333, - "step": 4597000 - }, - { - "epoch": 2.76, - "learning_rate": 3.674023306017781e-05, - "loss": 0.3483, - "step": 4597500 - }, - { - "epoch": 2.76, - "learning_rate": 3.673813729454837e-05, - "loss": 0.3393, - "step": 4598000 - }, - { - "epoch": 2.76, - "learning_rate": 3.67360373289878e-05, - "loss": 0.3412, - "step": 4598500 - }, - { - "epoch": 2.76, - "learning_rate": 3.6733937363427243e-05, - "loss": 0.3411, - "step": 4599000 - }, - { - "epoch": 2.76, - "learning_rate": 3.673183739786668e-05, - "loss": 0.3354, - "step": 4599500 - }, - { - "epoch": 2.76, - "learning_rate": 3.672974163223723e-05, - "loss": 0.3462, - "step": 4600000 - }, - { - "epoch": 2.76, - "eval_loss": 0.3324766755104065, - "eval_runtime": 1118.3498, - "eval_samples_per_second": 470.98, - "eval_steps_per_second": 78.497, - "step": 4600000 - }, - { - "epoch": 2.76, - "learning_rate": 3.6727641666676664e-05, - "loss": 0.3462, - "step": 4600500 - }, - { - "epoch": 2.76, - "learning_rate": 3.6725541701116104e-05, - "loss": 0.3484, - "step": 4601000 - }, - { - "epoch": 2.76, - "learning_rate": 3.672344173555554e-05, - "loss": 0.3479, - "step": 4601500 - }, - { - "epoch": 2.76, - "learning_rate": 3.672134176999498e-05, - "loss": 0.3413, - "step": 4602000 - }, - { - "epoch": 2.76, - "learning_rate": 3.6719241804434405e-05, - "loss": 0.3512, - "step": 4602500 - }, - { - "epoch": 2.76, - "learning_rate": 3.6717146038804965e-05, - "loss": 0.3417, - "step": 4603000 - }, - { - "epoch": 2.76, - "learning_rate": 3.67150460732444e-05, - "loss": 0.3425, - "step": 4603500 - }, - { - "epoch": 2.76, - "learning_rate": 3.671294610768384e-05, - "loss": 0.3446, - "step": 4604000 - }, - { - "epoch": 2.76, - "learning_rate": 3.6710846142123265e-05, - "loss": 0.3415, - "step": 4604500 - }, - { - "epoch": 2.76, - "learning_rate": 3.67087461765627e-05, - "loss": 0.3485, - "step": 4605000 - }, - { - "epoch": 2.76, - "learning_rate": 3.670665041093326e-05, - "loss": 0.3444, - "step": 4605500 - }, - { - "epoch": 2.76, - "learning_rate": 3.67045504453727e-05, - "loss": 0.3429, - "step": 4606000 - }, - { - "epoch": 2.76, - "learning_rate": 3.670245047981213e-05, - "loss": 0.3408, - "step": 4606500 - }, - { - "epoch": 2.76, - "learning_rate": 3.670035051425156e-05, - "loss": 0.3403, - "step": 4607000 - }, - { - "epoch": 2.76, - "learning_rate": 3.6698250548691e-05, - "loss": 0.344, - "step": 4607500 - }, - { - "epoch": 2.76, - "learning_rate": 3.669615058313043e-05, - "loss": 0.3493, - "step": 4608000 - }, - { - "epoch": 2.76, - "learning_rate": 3.6694054817500994e-05, - "loss": 0.3511, - "step": 4608500 - }, - { - "epoch": 2.76, - "learning_rate": 3.6691954851940434e-05, - "loss": 0.3416, - "step": 4609000 - }, - { - "epoch": 2.76, - "learning_rate": 3.668985488637986e-05, - "loss": 0.3394, - "step": 4609500 - }, - { - "epoch": 2.76, - "learning_rate": 3.6687754920819294e-05, - "loss": 0.3412, - "step": 4610000 - }, - { - "epoch": 2.76, - "learning_rate": 3.6685654955258734e-05, - "loss": 0.3365, - "step": 4610500 - }, - { - "epoch": 2.76, - "learning_rate": 3.668355498969817e-05, - "loss": 0.3442, - "step": 4611000 - }, - { - "epoch": 2.76, - "learning_rate": 3.668145922406873e-05, - "loss": 0.347, - "step": 4611500 - }, - { - "epoch": 2.77, - "learning_rate": 3.6679359258508155e-05, - "loss": 0.3473, - "step": 4612000 - }, - { - "epoch": 2.77, - "learning_rate": 3.6677259292947595e-05, - "loss": 0.3401, - "step": 4612500 - }, - { - "epoch": 2.77, - "learning_rate": 3.667515932738703e-05, - "loss": 0.3313, - "step": 4613000 - }, - { - "epoch": 2.77, - "learning_rate": 3.667306356175759e-05, - "loss": 0.3418, - "step": 4613500 - }, - { - "epoch": 2.77, - "learning_rate": 3.6670963596197015e-05, - "loss": 0.3465, - "step": 4614000 - }, - { - "epoch": 2.77, - "learning_rate": 3.6668863630636456e-05, - "loss": 0.342, - "step": 4614500 - }, - { - "epoch": 2.77, - "learning_rate": 3.666676366507589e-05, - "loss": 0.3494, - "step": 4615000 - }, - { - "epoch": 2.77, - "learning_rate": 3.666466369951533e-05, - "loss": 0.3392, - "step": 4615500 - }, - { - "epoch": 2.77, - "learning_rate": 3.666256373395476e-05, - "loss": 0.3489, - "step": 4616000 - }, - { - "epoch": 2.77, - "learning_rate": 3.6660463768394196e-05, - "loss": 0.3375, - "step": 4616500 - }, - { - "epoch": 2.77, - "learning_rate": 3.6658363802833636e-05, - "loss": 0.3416, - "step": 4617000 - }, - { - "epoch": 2.77, - "learning_rate": 3.665626803720419e-05, - "loss": 0.3476, - "step": 4617500 - }, - { - "epoch": 2.77, - "learning_rate": 3.665417227157475e-05, - "loss": 0.3473, - "step": 4618000 - }, - { - "epoch": 2.77, - "learning_rate": 3.6652072306014184e-05, - "loss": 0.3474, - "step": 4618500 - }, - { - "epoch": 2.77, - "learning_rate": 3.664997234045361e-05, - "loss": 0.3377, - "step": 4619000 - }, - { - "epoch": 2.77, - "learning_rate": 3.664787237489305e-05, - "loss": 0.3359, - "step": 4619500 - }, - { - "epoch": 2.77, - "learning_rate": 3.6645772409332484e-05, - "loss": 0.3473, - "step": 4620000 - }, - { - "epoch": 2.77, - "learning_rate": 3.6643676643703045e-05, - "loss": 0.3487, - "step": 4620500 - }, - { - "epoch": 2.77, - "learning_rate": 3.664157667814248e-05, - "loss": 0.3428, - "step": 4621000 - }, - { - "epoch": 2.77, - "learning_rate": 3.663948091251304e-05, - "loss": 0.349, - "step": 4621500 - }, - { - "epoch": 2.77, - "learning_rate": 3.663738094695247e-05, - "loss": 0.3503, - "step": 4622000 - }, - { - "epoch": 2.77, - "learning_rate": 3.6635280981391905e-05, - "loss": 0.342, - "step": 4622500 - }, - { - "epoch": 2.77, - "learning_rate": 3.6633181015831346e-05, - "loss": 0.343, - "step": 4623000 - }, - { - "epoch": 2.77, - "learning_rate": 3.663108105027078e-05, - "loss": 0.3386, - "step": 4623500 - }, - { - "epoch": 2.77, - "learning_rate": 3.6628981084710206e-05, - "loss": 0.3465, - "step": 4624000 - }, - { - "epoch": 2.77, - "learning_rate": 3.6626881119149646e-05, - "loss": 0.3517, - "step": 4624500 - }, - { - "epoch": 2.77, - "learning_rate": 3.662478115358908e-05, - "loss": 0.343, - "step": 4625000 - }, - { - "epoch": 2.77, - "learning_rate": 3.662268118802851e-05, - "loss": 0.3486, - "step": 4625500 - }, - { - "epoch": 2.77, - "learning_rate": 3.662058962233019e-05, - "loss": 0.3422, - "step": 4626000 - }, - { - "epoch": 2.77, - "learning_rate": 3.661848965676963e-05, - "loss": 0.3439, - "step": 4626500 - }, - { - "epoch": 2.77, - "learning_rate": 3.661638969120907e-05, - "loss": 0.3355, - "step": 4627000 - }, - { - "epoch": 2.77, - "learning_rate": 3.66142897256485e-05, - "loss": 0.3434, - "step": 4627500 - }, - { - "epoch": 2.77, - "learning_rate": 3.6612189760087934e-05, - "loss": 0.34, - "step": 4628000 - }, - { - "epoch": 2.77, - "learning_rate": 3.661008979452737e-05, - "loss": 0.3427, - "step": 4628500 - }, - { - "epoch": 2.78, - "learning_rate": 3.66079898289668e-05, - "loss": 0.3437, - "step": 4629000 - }, - { - "epoch": 2.78, - "learning_rate": 3.660588986340624e-05, - "loss": 0.3368, - "step": 4629500 - }, - { - "epoch": 2.78, - "learning_rate": 3.6603789897845675e-05, - "loss": 0.346, - "step": 4630000 - }, - { - "epoch": 2.78, - "learning_rate": 3.6601694132216235e-05, - "loss": 0.351, - "step": 4630500 - }, - { - "epoch": 2.78, - "learning_rate": 3.659959416665566e-05, - "loss": 0.3448, - "step": 4631000 - }, - { - "epoch": 2.78, - "learning_rate": 3.659749840102622e-05, - "loss": 0.3539, - "step": 4631500 - }, - { - "epoch": 2.78, - "learning_rate": 3.659539843546566e-05, - "loss": 0.3411, - "step": 4632000 - }, - { - "epoch": 2.78, - "learning_rate": 3.6593298469905096e-05, - "loss": 0.3437, - "step": 4632500 - }, - { - "epoch": 2.78, - "learning_rate": 3.659119850434453e-05, - "loss": 0.3447, - "step": 4633000 - }, - { - "epoch": 2.78, - "learning_rate": 3.658909853878396e-05, - "loss": 0.3328, - "step": 4633500 - }, - { - "epoch": 2.78, - "learning_rate": 3.6586998573223396e-05, - "loss": 0.3369, - "step": 4634000 - }, - { - "epoch": 2.78, - "learning_rate": 3.658489860766283e-05, - "loss": 0.3389, - "step": 4634500 - }, - { - "epoch": 2.78, - "learning_rate": 3.658279864210227e-05, - "loss": 0.3434, - "step": 4635000 - }, - { - "epoch": 2.78, - "learning_rate": 3.65806986765417e-05, - "loss": 0.3435, - "step": 4635500 - }, - { - "epoch": 2.78, - "learning_rate": 3.6578598710981137e-05, - "loss": 0.3317, - "step": 4636000 - }, - { - "epoch": 2.78, - "learning_rate": 3.657649874542058e-05, - "loss": 0.3411, - "step": 4636500 - }, - { - "epoch": 2.78, - "learning_rate": 3.657440297979113e-05, - "loss": 0.3369, - "step": 4637000 - }, - { - "epoch": 2.78, - "learning_rate": 3.6572303014230564e-05, - "loss": 0.3401, - "step": 4637500 - }, - { - "epoch": 2.78, - "learning_rate": 3.6570203048670004e-05, - "loss": 0.3433, - "step": 4638000 - }, - { - "epoch": 2.78, - "learning_rate": 3.656810308310944e-05, - "loss": 0.3333, - "step": 4638500 - }, - { - "epoch": 2.78, - "learning_rate": 3.656600311754887e-05, - "loss": 0.342, - "step": 4639000 - }, - { - "epoch": 2.78, - "learning_rate": 3.656390315198831e-05, - "loss": 0.3436, - "step": 4639500 - }, - { - "epoch": 2.78, - "learning_rate": 3.6561803186427745e-05, - "loss": 0.336, - "step": 4640000 - }, - { - "epoch": 2.78, - "learning_rate": 3.65597074207983e-05, - "loss": 0.3399, - "step": 4640500 - }, - { - "epoch": 2.78, - "learning_rate": 3.655760745523773e-05, - "loss": 0.3441, - "step": 4641000 - }, - { - "epoch": 2.78, - "learning_rate": 3.655550748967717e-05, - "loss": 0.3428, - "step": 4641500 - }, - { - "epoch": 2.78, - "learning_rate": 3.6553407524116605e-05, - "loss": 0.3391, - "step": 4642000 - }, - { - "epoch": 2.78, - "learning_rate": 3.655130755855604e-05, - "loss": 0.3412, - "step": 4642500 - }, - { - "epoch": 2.78, - "learning_rate": 3.654920759299548e-05, - "loss": 0.3445, - "step": 4643000 - }, - { - "epoch": 2.78, - "learning_rate": 3.6547107627434906e-05, - "loss": 0.3436, - "step": 4643500 - }, - { - "epoch": 2.78, - "learning_rate": 3.6545011861805466e-05, - "loss": 0.3487, - "step": 4644000 - }, - { - "epoch": 2.78, - "learning_rate": 3.65429118962449e-05, - "loss": 0.3392, - "step": 4644500 - }, - { - "epoch": 2.78, - "learning_rate": 3.654081193068434e-05, - "loss": 0.3433, - "step": 4645000 - }, - { - "epoch": 2.79, - "learning_rate": 3.653871196512377e-05, - "loss": 0.3447, - "step": 4645500 - }, - { - "epoch": 2.79, - "learning_rate": 3.653661199956321e-05, - "loss": 0.3402, - "step": 4646000 - }, - { - "epoch": 2.79, - "learning_rate": 3.653451623393377e-05, - "loss": 0.3393, - "step": 4646500 - }, - { - "epoch": 2.79, - "learning_rate": 3.65324162683732e-05, - "loss": 0.3499, - "step": 4647000 - }, - { - "epoch": 2.79, - "learning_rate": 3.6530316302812634e-05, - "loss": 0.3452, - "step": 4647500 - }, - { - "epoch": 2.79, - "learning_rate": 3.6528216337252074e-05, - "loss": 0.3376, - "step": 4648000 - }, - { - "epoch": 2.79, - "learning_rate": 3.65261163716915e-05, - "loss": 0.3501, - "step": 4648500 - }, - { - "epoch": 2.79, - "learning_rate": 3.6524016406130934e-05, - "loss": 0.3452, - "step": 4649000 - }, - { - "epoch": 2.79, - "learning_rate": 3.6521916440570375e-05, - "loss": 0.3485, - "step": 4649500 - }, - { - "epoch": 2.79, - "learning_rate": 3.651981647500981e-05, - "loss": 0.346, - "step": 4650000 - }, - { - "epoch": 2.79, - "learning_rate": 3.651772070938036e-05, - "loss": 0.3438, - "step": 4650500 - }, - { - "epoch": 2.79, - "learning_rate": 3.6515620743819795e-05, - "loss": 0.339, - "step": 4651000 - }, - { - "epoch": 2.79, - "learning_rate": 3.6513524978190355e-05, - "loss": 0.3386, - "step": 4651500 - }, - { - "epoch": 2.79, - "learning_rate": 3.6511425012629796e-05, - "loss": 0.3529, - "step": 4652000 - }, - { - "epoch": 2.79, - "learning_rate": 3.650932504706923e-05, - "loss": 0.3315, - "step": 4652500 - }, - { - "epoch": 2.79, - "learning_rate": 3.650722508150866e-05, - "loss": 0.3445, - "step": 4653000 - }, - { - "epoch": 2.79, - "learning_rate": 3.650512931587922e-05, - "loss": 0.3459, - "step": 4653500 - }, - { - "epoch": 2.79, - "learning_rate": 3.6503029350318656e-05, - "loss": 0.3443, - "step": 4654000 - }, - { - "epoch": 2.79, - "learning_rate": 3.650092938475809e-05, - "loss": 0.3391, - "step": 4654500 - }, - { - "epoch": 2.79, - "learning_rate": 3.649882941919753e-05, - "loss": 0.3453, - "step": 4655000 - }, - { - "epoch": 2.79, - "learning_rate": 3.649672945363696e-05, - "loss": 0.3487, - "step": 4655500 - }, - { - "epoch": 2.79, - "learning_rate": 3.649462948807639e-05, - "loss": 0.3319, - "step": 4656000 - }, - { - "epoch": 2.79, - "learning_rate": 3.649252952251583e-05, - "loss": 0.3511, - "step": 4656500 - }, - { - "epoch": 2.79, - "learning_rate": 3.6490429556955264e-05, - "loss": 0.3387, - "step": 4657000 - }, - { - "epoch": 2.79, - "learning_rate": 3.6488333791325824e-05, - "loss": 0.3398, - "step": 4657500 - }, - { - "epoch": 2.79, - "learning_rate": 3.648623382576525e-05, - "loss": 0.3481, - "step": 4658000 - }, - { - "epoch": 2.79, - "learning_rate": 3.648413386020469e-05, - "loss": 0.3543, - "step": 4658500 - }, - { - "epoch": 2.79, - "learning_rate": 3.6482033894644125e-05, - "loss": 0.3388, - "step": 4659000 - }, - { - "epoch": 2.79, - "learning_rate": 3.647993392908356e-05, - "loss": 0.3382, - "step": 4659500 - }, - { - "epoch": 2.79, - "learning_rate": 3.6477833963523e-05, - "loss": 0.3449, - "step": 4660000 - }, - { - "epoch": 2.79, - "learning_rate": 3.647573399796243e-05, - "loss": 0.3443, - "step": 4660500 - }, - { - "epoch": 2.79, - "learning_rate": 3.6473638232332985e-05, - "loss": 0.3451, - "step": 4661000 - }, - { - "epoch": 2.79, - "learning_rate": 3.6471538266772426e-05, - "loss": 0.3436, - "step": 4661500 - }, - { - "epoch": 2.8, - "learning_rate": 3.646943830121186e-05, - "loss": 0.3432, - "step": 4662000 - }, - { - "epoch": 2.8, - "learning_rate": 3.646733833565129e-05, - "loss": 0.3422, - "step": 4662500 - }, - { - "epoch": 2.8, - "learning_rate": 3.646523837009073e-05, - "loss": 0.343, - "step": 4663000 - }, - { - "epoch": 2.8, - "learning_rate": 3.6463142604461286e-05, - "loss": 0.347, - "step": 4663500 - }, - { - "epoch": 2.8, - "learning_rate": 3.646104683883185e-05, - "loss": 0.3387, - "step": 4664000 - }, - { - "epoch": 2.8, - "learning_rate": 3.645894687327128e-05, - "loss": 0.3433, - "step": 4664500 - }, - { - "epoch": 2.8, - "learning_rate": 3.645684690771071e-05, - "loss": 0.3389, - "step": 4665000 - }, - { - "epoch": 2.8, - "learning_rate": 3.645474694215015e-05, - "loss": 0.3501, - "step": 4665500 - }, - { - "epoch": 2.8, - "learning_rate": 3.645264697658958e-05, - "loss": 0.3441, - "step": 4666000 - }, - { - "epoch": 2.8, - "learning_rate": 3.6450547011029014e-05, - "loss": 0.3455, - "step": 4666500 - }, - { - "epoch": 2.8, - "learning_rate": 3.6448447045468454e-05, - "loss": 0.3426, - "step": 4667000 - }, - { - "epoch": 2.8, - "learning_rate": 3.644634707990789e-05, - "loss": 0.3388, - "step": 4667500 - }, - { - "epoch": 2.8, - "learning_rate": 3.644424711434733e-05, - "loss": 0.3372, - "step": 4668000 - }, - { - "epoch": 2.8, - "learning_rate": 3.644214714878676e-05, - "loss": 0.3462, - "step": 4668500 - }, - { - "epoch": 2.8, - "learning_rate": 3.6440047183226195e-05, - "loss": 0.3304, - "step": 4669000 - }, - { - "epoch": 2.8, - "learning_rate": 3.6437947217665635e-05, - "loss": 0.3454, - "step": 4669500 - }, - { - "epoch": 2.8, - "learning_rate": 3.643585145203619e-05, - "loss": 0.3415, - "step": 4670000 - }, - { - "epoch": 2.8, - "learning_rate": 3.643375148647562e-05, - "loss": 0.3403, - "step": 4670500 - }, - { - "epoch": 2.8, - "learning_rate": 3.6431651520915056e-05, - "loss": 0.3418, - "step": 4671000 - }, - { - "epoch": 2.8, - "learning_rate": 3.642955575528561e-05, - "loss": 0.3413, - "step": 4671500 - }, - { - "epoch": 2.8, - "learning_rate": 3.642745578972505e-05, - "loss": 0.3498, - "step": 4672000 - }, - { - "epoch": 2.8, - "learning_rate": 3.642535582416448e-05, - "loss": 0.3436, - "step": 4672500 - }, - { - "epoch": 2.8, - "learning_rate": 3.6423255858603916e-05, - "loss": 0.339, - "step": 4673000 - }, - { - "epoch": 2.8, - "learning_rate": 3.642116009297448e-05, - "loss": 0.356, - "step": 4673500 - }, - { - "epoch": 2.8, - "learning_rate": 3.641906012741391e-05, - "loss": 0.3483, - "step": 4674000 - }, - { - "epoch": 2.8, - "learning_rate": 3.6416960161853344e-05, - "loss": 0.3373, - "step": 4674500 - }, - { - "epoch": 2.8, - "learning_rate": 3.6414860196292784e-05, - "loss": 0.3528, - "step": 4675000 - }, - { - "epoch": 2.8, - "learning_rate": 3.641276023073222e-05, - "loss": 0.3412, - "step": 4675500 - }, - { - "epoch": 2.8, - "learning_rate": 3.641066026517165e-05, - "loss": 0.3441, - "step": 4676000 - }, - { - "epoch": 2.8, - "learning_rate": 3.640856029961109e-05, - "loss": 0.348, - "step": 4676500 - }, - { - "epoch": 2.8, - "learning_rate": 3.6406460334050524e-05, - "loss": 0.3456, - "step": 4677000 - }, - { - "epoch": 2.8, - "learning_rate": 3.64043687683522e-05, - "loss": 0.3415, - "step": 4677500 - }, - { - "epoch": 2.8, - "learning_rate": 3.640226880279163e-05, - "loss": 0.326, - "step": 4678000 - }, - { - "epoch": 2.8, - "learning_rate": 3.6400168837231065e-05, - "loss": 0.3429, - "step": 4678500 - }, - { - "epoch": 2.81, - "learning_rate": 3.6398068871670505e-05, - "loss": 0.3446, - "step": 4679000 - }, - { - "epoch": 2.81, - "learning_rate": 3.639597310604106e-05, - "loss": 0.3498, - "step": 4679500 - }, - { - "epoch": 2.81, - "learning_rate": 3.639387314048049e-05, - "loss": 0.3533, - "step": 4680000 - }, - { - "epoch": 2.81, - "learning_rate": 3.639177317491993e-05, - "loss": 0.3349, - "step": 4680500 - }, - { - "epoch": 2.81, - "learning_rate": 3.6389673209359366e-05, - "loss": 0.3406, - "step": 4681000 - }, - { - "epoch": 2.81, - "learning_rate": 3.638757744372992e-05, - "loss": 0.341, - "step": 4681500 - }, - { - "epoch": 2.81, - "learning_rate": 3.638547747816935e-05, - "loss": 0.3421, - "step": 4682000 - }, - { - "epoch": 2.81, - "learning_rate": 3.638337751260879e-05, - "loss": 0.3422, - "step": 4682500 - }, - { - "epoch": 2.81, - "learning_rate": 3.638127754704823e-05, - "loss": 0.3434, - "step": 4683000 - }, - { - "epoch": 2.81, - "learning_rate": 3.637917758148766e-05, - "loss": 0.3396, - "step": 4683500 - }, - { - "epoch": 2.81, - "learning_rate": 3.6377081815858214e-05, - "loss": 0.3468, - "step": 4684000 - }, - { - "epoch": 2.81, - "learning_rate": 3.6374981850297654e-05, - "loss": 0.3411, - "step": 4684500 - }, - { - "epoch": 2.81, - "learning_rate": 3.637288188473709e-05, - "loss": 0.3473, - "step": 4685000 - }, - { - "epoch": 2.81, - "learning_rate": 3.637078191917652e-05, - "loss": 0.3375, - "step": 4685500 - }, - { - "epoch": 2.81, - "learning_rate": 3.636868195361596e-05, - "loss": 0.3427, - "step": 4686000 - }, - { - "epoch": 2.81, - "learning_rate": 3.6366581988055395e-05, - "loss": 0.3471, - "step": 4686500 - }, - { - "epoch": 2.81, - "learning_rate": 3.636448202249483e-05, - "loss": 0.349, - "step": 4687000 - }, - { - "epoch": 2.81, - "learning_rate": 3.636238205693427e-05, - "loss": 0.3472, - "step": 4687500 - }, - { - "epoch": 2.81, - "learning_rate": 3.636028629130482e-05, - "loss": 0.3432, - "step": 4688000 - }, - { - "epoch": 2.81, - "learning_rate": 3.6358186325744255e-05, - "loss": 0.3434, - "step": 4688500 - }, - { - "epoch": 2.81, - "learning_rate": 3.6356086360183696e-05, - "loss": 0.3393, - "step": 4689000 - }, - { - "epoch": 2.81, - "learning_rate": 3.635398639462313e-05, - "loss": 0.3411, - "step": 4689500 - }, - { - "epoch": 2.81, - "learning_rate": 3.635188642906256e-05, - "loss": 0.3422, - "step": 4690000 - }, - { - "epoch": 2.81, - "learning_rate": 3.6349786463502e-05, - "loss": 0.3472, - "step": 4690500 - }, - { - "epoch": 2.81, - "learning_rate": 3.6347686497941436e-05, - "loss": 0.3415, - "step": 4691000 - }, - { - "epoch": 2.81, - "learning_rate": 3.634558653238087e-05, - "loss": 0.3426, - "step": 4691500 - }, - { - "epoch": 2.81, - "learning_rate": 3.634349076675142e-05, - "loss": 0.3313, - "step": 4692000 - }, - { - "epoch": 2.81, - "learning_rate": 3.6341390801190863e-05, - "loss": 0.3495, - "step": 4692500 - }, - { - "epoch": 2.81, - "learning_rate": 3.63392908356303e-05, - "loss": 0.3439, - "step": 4693000 - }, - { - "epoch": 2.81, - "learning_rate": 3.633719087006973e-05, - "loss": 0.3481, - "step": 4693500 - }, - { - "epoch": 2.81, - "learning_rate": 3.6335099304371404e-05, - "loss": 0.3413, - "step": 4694000 - }, - { - "epoch": 2.81, - "learning_rate": 3.6332999338810844e-05, - "loss": 0.3412, - "step": 4694500 - }, - { - "epoch": 2.81, - "learning_rate": 3.633089937325028e-05, - "loss": 0.3394, - "step": 4695000 - }, - { - "epoch": 2.82, - "learning_rate": 3.632879940768971e-05, - "loss": 0.3382, - "step": 4695500 - }, - { - "epoch": 2.82, - "learning_rate": 3.632669944212915e-05, - "loss": 0.3451, - "step": 4696000 - }, - { - "epoch": 2.82, - "learning_rate": 3.6324599476568585e-05, - "loss": 0.3418, - "step": 4696500 - }, - { - "epoch": 2.82, - "learning_rate": 3.632249951100802e-05, - "loss": 0.3372, - "step": 4697000 - }, - { - "epoch": 2.82, - "learning_rate": 3.632039954544746e-05, - "loss": 0.3337, - "step": 4697500 - }, - { - "epoch": 2.82, - "learning_rate": 3.631830377981801e-05, - "loss": 0.344, - "step": 4698000 - }, - { - "epoch": 2.82, - "learning_rate": 3.6316203814257446e-05, - "loss": 0.3466, - "step": 4698500 - }, - { - "epoch": 2.82, - "learning_rate": 3.6314108048628e-05, - "loss": 0.3416, - "step": 4699000 - }, - { - "epoch": 2.82, - "learning_rate": 3.631200808306743e-05, - "loss": 0.3482, - "step": 4699500 - }, - { - "epoch": 2.82, - "learning_rate": 3.630990811750687e-05, - "loss": 0.3419, - "step": 4700000 - }, - { - "epoch": 2.82, - "eval_loss": 0.3312073349952698, - "eval_runtime": 1117.6715, - "eval_samples_per_second": 471.265, - "eval_steps_per_second": 78.545, - "step": 4700000 - }, - { - "epoch": 2.82, - "learning_rate": 3.6307808151946306e-05, - "loss": 0.3483, - "step": 4700500 - }, - { - "epoch": 2.82, - "learning_rate": 3.630570818638574e-05, - "loss": 0.3387, - "step": 4701000 - }, - { - "epoch": 2.82, - "learning_rate": 3.630360822082518e-05, - "loss": 0.3387, - "step": 4701500 - }, - { - "epoch": 2.82, - "learning_rate": 3.6301508255264614e-05, - "loss": 0.341, - "step": 4702000 - }, - { - "epoch": 2.82, - "learning_rate": 3.629940828970405e-05, - "loss": 0.3437, - "step": 4702500 - }, - { - "epoch": 2.82, - "learning_rate": 3.629731672400572e-05, - "loss": 0.3426, - "step": 4703000 - }, - { - "epoch": 2.82, - "learning_rate": 3.629521675844516e-05, - "loss": 0.3429, - "step": 4703500 - }, - { - "epoch": 2.82, - "learning_rate": 3.6293116792884594e-05, - "loss": 0.3402, - "step": 4704000 - }, - { - "epoch": 2.82, - "learning_rate": 3.629101682732403e-05, - "loss": 0.3385, - "step": 4704500 - }, - { - "epoch": 2.82, - "learning_rate": 3.628891686176347e-05, - "loss": 0.3436, - "step": 4705000 - }, - { - "epoch": 2.82, - "learning_rate": 3.62868168962029e-05, - "loss": 0.3432, - "step": 4705500 - }, - { - "epoch": 2.82, - "learning_rate": 3.6284716930642335e-05, - "loss": 0.3438, - "step": 4706000 - }, - { - "epoch": 2.82, - "learning_rate": 3.6282616965081775e-05, - "loss": 0.3417, - "step": 4706500 - }, - { - "epoch": 2.82, - "learning_rate": 3.628052119945233e-05, - "loss": 0.3386, - "step": 4707000 - }, - { - "epoch": 2.82, - "learning_rate": 3.627842123389176e-05, - "loss": 0.3411, - "step": 4707500 - }, - { - "epoch": 2.82, - "learning_rate": 3.6276325468262316e-05, - "loss": 0.3445, - "step": 4708000 - }, - { - "epoch": 2.82, - "learning_rate": 3.6274229702632876e-05, - "loss": 0.3455, - "step": 4708500 - }, - { - "epoch": 2.82, - "learning_rate": 3.6272129737072316e-05, - "loss": 0.3432, - "step": 4709000 - }, - { - "epoch": 2.82, - "learning_rate": 3.627002977151175e-05, - "loss": 0.3498, - "step": 4709500 - }, - { - "epoch": 2.82, - "learning_rate": 3.626792980595118e-05, - "loss": 0.3352, - "step": 4710000 - }, - { - "epoch": 2.82, - "learning_rate": 3.626582984039062e-05, - "loss": 0.34, - "step": 4710500 - }, - { - "epoch": 2.82, - "learning_rate": 3.626373407476118e-05, - "loss": 0.3451, - "step": 4711000 - }, - { - "epoch": 2.82, - "learning_rate": 3.626163410920061e-05, - "loss": 0.3398, - "step": 4711500 - }, - { - "epoch": 2.83, - "learning_rate": 3.6259534143640044e-05, - "loss": 0.3409, - "step": 4712000 - }, - { - "epoch": 2.83, - "learning_rate": 3.625743417807948e-05, - "loss": 0.3379, - "step": 4712500 - }, - { - "epoch": 2.83, - "learning_rate": 3.625533421251891e-05, - "loss": 0.3434, - "step": 4713000 - }, - { - "epoch": 2.83, - "learning_rate": 3.6253234246958344e-05, - "loss": 0.3377, - "step": 4713500 - }, - { - "epoch": 2.83, - "learning_rate": 3.6251134281397785e-05, - "loss": 0.3416, - "step": 4714000 - }, - { - "epoch": 2.83, - "learning_rate": 3.624903431583722e-05, - "loss": 0.3458, - "step": 4714500 - }, - { - "epoch": 2.83, - "learning_rate": 3.624693435027665e-05, - "loss": 0.3585, - "step": 4715000 - }, - { - "epoch": 2.83, - "learning_rate": 3.624483438471609e-05, - "loss": 0.3396, - "step": 4715500 - }, - { - "epoch": 2.83, - "learning_rate": 3.6242734419155525e-05, - "loss": 0.3479, - "step": 4716000 - }, - { - "epoch": 2.83, - "learning_rate": 3.624063445359496e-05, - "loss": 0.3416, - "step": 4716500 - }, - { - "epoch": 2.83, - "learning_rate": 3.62385344880344e-05, - "loss": 0.3428, - "step": 4717000 - }, - { - "epoch": 2.83, - "learning_rate": 3.623643452247383e-05, - "loss": 0.3462, - "step": 4717500 - }, - { - "epoch": 2.83, - "learning_rate": 3.6234338756844386e-05, - "loss": 0.3409, - "step": 4718000 - }, - { - "epoch": 2.83, - "learning_rate": 3.623224299121494e-05, - "loss": 0.343, - "step": 4718500 - }, - { - "epoch": 2.83, - "learning_rate": 3.623014302565438e-05, - "loss": 0.3411, - "step": 4719000 - }, - { - "epoch": 2.83, - "learning_rate": 3.622804306009381e-05, - "loss": 0.3373, - "step": 4719500 - }, - { - "epoch": 2.83, - "learning_rate": 3.622594309453325e-05, - "loss": 0.3432, - "step": 4720000 - }, - { - "epoch": 2.83, - "learning_rate": 3.622384312897269e-05, - "loss": 0.3386, - "step": 4720500 - }, - { - "epoch": 2.83, - "learning_rate": 3.622174316341212e-05, - "loss": 0.3435, - "step": 4721000 - }, - { - "epoch": 2.83, - "learning_rate": 3.6219643197851554e-05, - "loss": 0.3292, - "step": 4721500 - }, - { - "epoch": 2.83, - "learning_rate": 3.6217543232290994e-05, - "loss": 0.349, - "step": 4722000 - }, - { - "epoch": 2.83, - "learning_rate": 3.621544326673043e-05, - "loss": 0.3394, - "step": 4722500 - }, - { - "epoch": 2.83, - "learning_rate": 3.621334750110098e-05, - "loss": 0.3428, - "step": 4723000 - }, - { - "epoch": 2.83, - "learning_rate": 3.6211247535540415e-05, - "loss": 0.3413, - "step": 4723500 - }, - { - "epoch": 2.83, - "learning_rate": 3.6209147569979855e-05, - "loss": 0.34, - "step": 4724000 - }, - { - "epoch": 2.83, - "learning_rate": 3.620704760441929e-05, - "loss": 0.3453, - "step": 4724500 - }, - { - "epoch": 2.83, - "learning_rate": 3.620494763885872e-05, - "loss": 0.3522, - "step": 4725000 - }, - { - "epoch": 2.83, - "learning_rate": 3.6202847673298155e-05, - "loss": 0.3314, - "step": 4725500 - }, - { - "epoch": 2.83, - "learning_rate": 3.620074770773759e-05, - "loss": 0.3316, - "step": 4726000 - }, - { - "epoch": 2.83, - "learning_rate": 3.619865194210815e-05, - "loss": 0.3396, - "step": 4726500 - }, - { - "epoch": 2.83, - "learning_rate": 3.619655197654759e-05, - "loss": 0.3382, - "step": 4727000 - }, - { - "epoch": 2.83, - "learning_rate": 3.619445201098702e-05, - "loss": 0.3413, - "step": 4727500 - }, - { - "epoch": 2.83, - "learning_rate": 3.619235204542645e-05, - "loss": 0.3473, - "step": 4728000 - }, - { - "epoch": 2.83, - "learning_rate": 3.619025207986589e-05, - "loss": 0.3464, - "step": 4728500 - }, - { - "epoch": 2.84, - "learning_rate": 3.618815211430532e-05, - "loss": 0.3326, - "step": 4729000 - }, - { - "epoch": 2.84, - "learning_rate": 3.6186052148744757e-05, - "loss": 0.3374, - "step": 4729500 - }, - { - "epoch": 2.84, - "learning_rate": 3.618395638311531e-05, - "loss": 0.3454, - "step": 4730000 - }, - { - "epoch": 2.84, - "learning_rate": 3.618185641755475e-05, - "loss": 0.3441, - "step": 4730500 - }, - { - "epoch": 2.84, - "learning_rate": 3.6179756451994184e-05, - "loss": 0.3453, - "step": 4731000 - }, - { - "epoch": 2.84, - "learning_rate": 3.617765648643362e-05, - "loss": 0.3429, - "step": 4731500 - }, - { - "epoch": 2.84, - "learning_rate": 3.6175560720804184e-05, - "loss": 0.3474, - "step": 4732000 - }, - { - "epoch": 2.84, - "learning_rate": 3.617346075524361e-05, - "loss": 0.3527, - "step": 4732500 - }, - { - "epoch": 2.84, - "learning_rate": 3.6171360789683045e-05, - "loss": 0.3378, - "step": 4733000 - }, - { - "epoch": 2.84, - "learning_rate": 3.6169260824122485e-05, - "loss": 0.3509, - "step": 4733500 - }, - { - "epoch": 2.84, - "learning_rate": 3.616716085856192e-05, - "loss": 0.3423, - "step": 4734000 - }, - { - "epoch": 2.84, - "learning_rate": 3.616506509293248e-05, - "loss": 0.3468, - "step": 4734500 - }, - { - "epoch": 2.84, - "learning_rate": 3.6162965127371905e-05, - "loss": 0.3459, - "step": 4735000 - }, - { - "epoch": 2.84, - "learning_rate": 3.6160865161811346e-05, - "loss": 0.3432, - "step": 4735500 - }, - { - "epoch": 2.84, - "learning_rate": 3.615876519625078e-05, - "loss": 0.3369, - "step": 4736000 - }, - { - "epoch": 2.84, - "learning_rate": 3.615666523069021e-05, - "loss": 0.3409, - "step": 4736500 - }, - { - "epoch": 2.84, - "learning_rate": 3.615456526512965e-05, - "loss": 0.3447, - "step": 4737000 - }, - { - "epoch": 2.84, - "learning_rate": 3.6152465299569086e-05, - "loss": 0.3437, - "step": 4737500 - }, - { - "epoch": 2.84, - "learning_rate": 3.615036953393964e-05, - "loss": 0.3383, - "step": 4738000 - }, - { - "epoch": 2.84, - "learning_rate": 3.614826956837907e-05, - "loss": 0.3325, - "step": 4738500 - }, - { - "epoch": 2.84, - "learning_rate": 3.6146169602818513e-05, - "loss": 0.3463, - "step": 4739000 - }, - { - "epoch": 2.84, - "learning_rate": 3.614406963725795e-05, - "loss": 0.347, - "step": 4739500 - }, - { - "epoch": 2.84, - "learning_rate": 3.614196967169739e-05, - "loss": 0.3392, - "step": 4740000 - }, - { - "epoch": 2.84, - "learning_rate": 3.613986970613682e-05, - "loss": 0.3379, - "step": 4740500 - }, - { - "epoch": 2.84, - "learning_rate": 3.6137769740576254e-05, - "loss": 0.3416, - "step": 4741000 - }, - { - "epoch": 2.84, - "learning_rate": 3.6135669775015694e-05, - "loss": 0.3431, - "step": 4741500 - }, - { - "epoch": 2.84, - "learning_rate": 3.613357400938625e-05, - "loss": 0.3519, - "step": 4742000 - }, - { - "epoch": 2.84, - "learning_rate": 3.61314782437568e-05, - "loss": 0.3455, - "step": 4742500 - }, - { - "epoch": 2.84, - "learning_rate": 3.6129378278196235e-05, - "loss": 0.3422, - "step": 4743000 - }, - { - "epoch": 2.84, - "learning_rate": 3.612727831263567e-05, - "loss": 0.3394, - "step": 4743500 - }, - { - "epoch": 2.84, - "learning_rate": 3.612517834707511e-05, - "loss": 0.3527, - "step": 4744000 - }, - { - "epoch": 2.84, - "learning_rate": 3.612307838151454e-05, - "loss": 0.3329, - "step": 4744500 - }, - { - "epoch": 2.84, - "learning_rate": 3.6120978415953976e-05, - "loss": 0.3415, - "step": 4745000 - }, - { - "epoch": 2.85, - "learning_rate": 3.6118878450393416e-05, - "loss": 0.3469, - "step": 4745500 - }, - { - "epoch": 2.85, - "learning_rate": 3.611677848483285e-05, - "loss": 0.3533, - "step": 4746000 - }, - { - "epoch": 2.85, - "learning_rate": 3.611468691913453e-05, - "loss": 0.3368, - "step": 4746500 - }, - { - "epoch": 2.85, - "learning_rate": 3.6112586953573956e-05, - "loss": 0.3431, - "step": 4747000 - }, - { - "epoch": 2.85, - "learning_rate": 3.6110486988013397e-05, - "loss": 0.3488, - "step": 4747500 - }, - { - "epoch": 2.85, - "learning_rate": 3.610838702245283e-05, - "loss": 0.3409, - "step": 4748000 - }, - { - "epoch": 2.85, - "learning_rate": 3.6106287056892264e-05, - "loss": 0.3402, - "step": 4748500 - }, - { - "epoch": 2.85, - "learning_rate": 3.610419129126282e-05, - "loss": 0.3472, - "step": 4749000 - }, - { - "epoch": 2.85, - "learning_rate": 3.610209132570226e-05, - "loss": 0.3455, - "step": 4749500 - }, - { - "epoch": 2.85, - "learning_rate": 3.609999136014169e-05, - "loss": 0.3365, - "step": 4750000 - }, - { - "epoch": 2.85, - "learning_rate": 3.6097891394581124e-05, - "loss": 0.3333, - "step": 4750500 - }, - { - "epoch": 2.85, - "learning_rate": 3.6095791429020564e-05, - "loss": 0.3444, - "step": 4751000 - }, - { - "epoch": 2.85, - "learning_rate": 3.609369146346e-05, - "loss": 0.3397, - "step": 4751500 - }, - { - "epoch": 2.85, - "learning_rate": 3.609159149789943e-05, - "loss": 0.3505, - "step": 4752000 - }, - { - "epoch": 2.85, - "learning_rate": 3.608949573226999e-05, - "loss": 0.3351, - "step": 4752500 - }, - { - "epoch": 2.85, - "learning_rate": 3.6087395766709425e-05, - "loss": 0.3379, - "step": 4753000 - }, - { - "epoch": 2.85, - "learning_rate": 3.608529580114886e-05, - "loss": 0.3467, - "step": 4753500 - }, - { - "epoch": 2.85, - "learning_rate": 3.608320003551941e-05, - "loss": 0.3561, - "step": 4754000 - }, - { - "epoch": 2.85, - "learning_rate": 3.608110006995885e-05, - "loss": 0.3414, - "step": 4754500 - }, - { - "epoch": 2.85, - "learning_rate": 3.6079000104398286e-05, - "loss": 0.3328, - "step": 4755000 - }, - { - "epoch": 2.85, - "learning_rate": 3.607690013883772e-05, - "loss": 0.3419, - "step": 4755500 - }, - { - "epoch": 2.85, - "learning_rate": 3.607480017327716e-05, - "loss": 0.3502, - "step": 4756000 - }, - { - "epoch": 2.85, - "learning_rate": 3.607270020771659e-05, - "loss": 0.3374, - "step": 4756500 - }, - { - "epoch": 2.85, - "learning_rate": 3.6070600242156027e-05, - "loss": 0.3475, - "step": 4757000 - }, - { - "epoch": 2.85, - "learning_rate": 3.606850027659547e-05, - "loss": 0.3491, - "step": 4757500 - }, - { - "epoch": 2.85, - "learning_rate": 3.606640451096602e-05, - "loss": 0.3416, - "step": 4758000 - }, - { - "epoch": 2.85, - "learning_rate": 3.6064304545405454e-05, - "loss": 0.3337, - "step": 4758500 - }, - { - "epoch": 2.85, - "learning_rate": 3.606220457984489e-05, - "loss": 0.3356, - "step": 4759000 - }, - { - "epoch": 2.85, - "learning_rate": 3.606010461428433e-05, - "loss": 0.3377, - "step": 4759500 - }, - { - "epoch": 2.85, - "learning_rate": 3.605800464872376e-05, - "loss": 0.3431, - "step": 4760000 - }, - { - "epoch": 2.85, - "learning_rate": 3.6055904683163194e-05, - "loss": 0.3483, - "step": 4760500 - }, - { - "epoch": 2.85, - "learning_rate": 3.6053808917533755e-05, - "loss": 0.3539, - "step": 4761000 - }, - { - "epoch": 2.85, - "learning_rate": 3.605170895197319e-05, - "loss": 0.3419, - "step": 4761500 - }, - { - "epoch": 2.86, - "learning_rate": 3.604960898641262e-05, - "loss": 0.3376, - "step": 4762000 - }, - { - "epoch": 2.86, - "learning_rate": 3.604750902085206e-05, - "loss": 0.3315, - "step": 4762500 - }, - { - "epoch": 2.86, - "learning_rate": 3.6045409055291495e-05, - "loss": 0.3579, - "step": 4763000 - }, - { - "epoch": 2.86, - "learning_rate": 3.604330908973093e-05, - "loss": 0.3539, - "step": 4763500 - }, - { - "epoch": 2.86, - "learning_rate": 3.604120912417037e-05, - "loss": 0.3506, - "step": 4764000 - }, - { - "epoch": 2.86, - "learning_rate": 3.6039109158609796e-05, - "loss": 0.3405, - "step": 4764500 - }, - { - "epoch": 2.86, - "learning_rate": 3.6037013392980356e-05, - "loss": 0.3489, - "step": 4765000 - }, - { - "epoch": 2.86, - "learning_rate": 3.603491342741979e-05, - "loss": 0.3446, - "step": 4765500 - }, - { - "epoch": 2.86, - "learning_rate": 3.603281346185923e-05, - "loss": 0.3367, - "step": 4766000 - }, - { - "epoch": 2.86, - "learning_rate": 3.6030713496298656e-05, - "loss": 0.338, - "step": 4766500 - }, - { - "epoch": 2.86, - "learning_rate": 3.602861353073809e-05, - "loss": 0.3422, - "step": 4767000 - }, - { - "epoch": 2.86, - "learning_rate": 3.602651356517753e-05, - "loss": 0.3427, - "step": 4767500 - }, - { - "epoch": 2.86, - "learning_rate": 3.6024413599616964e-05, - "loss": 0.3447, - "step": 4768000 - }, - { - "epoch": 2.86, - "learning_rate": 3.60223136340564e-05, - "loss": 0.3385, - "step": 4768500 - }, - { - "epoch": 2.86, - "learning_rate": 3.602021786842696e-05, - "loss": 0.3606, - "step": 4769000 - }, - { - "epoch": 2.86, - "learning_rate": 3.601811790286639e-05, - "loss": 0.3434, - "step": 4769500 - }, - { - "epoch": 2.86, - "learning_rate": 3.6016017937305824e-05, - "loss": 0.3432, - "step": 4770000 - }, - { - "epoch": 2.86, - "learning_rate": 3.6013917971745265e-05, - "loss": 0.3387, - "step": 4770500 - }, - { - "epoch": 2.86, - "learning_rate": 3.60118180061847e-05, - "loss": 0.346, - "step": 4771000 - }, - { - "epoch": 2.86, - "learning_rate": 3.600972224055525e-05, - "loss": 0.3428, - "step": 4771500 - }, - { - "epoch": 2.86, - "learning_rate": 3.6007622274994685e-05, - "loss": 0.3458, - "step": 4772000 - }, - { - "epoch": 2.86, - "learning_rate": 3.6005522309434125e-05, - "loss": 0.3426, - "step": 4772500 - }, - { - "epoch": 2.86, - "learning_rate": 3.600342234387356e-05, - "loss": 0.3395, - "step": 4773000 - }, - { - "epoch": 2.86, - "learning_rate": 3.600132237831299e-05, - "loss": 0.3427, - "step": 4773500 - }, - { - "epoch": 2.86, - "learning_rate": 3.5999226612683546e-05, - "loss": 0.3463, - "step": 4774000 - }, - { - "epoch": 2.86, - "learning_rate": 3.5997126647122986e-05, - "loss": 0.3454, - "step": 4774500 - }, - { - "epoch": 2.86, - "learning_rate": 3.599502668156242e-05, - "loss": 0.3401, - "step": 4775000 - }, - { - "epoch": 2.86, - "learning_rate": 3.599292671600185e-05, - "loss": 0.3381, - "step": 4775500 - }, - { - "epoch": 2.86, - "learning_rate": 3.599082675044129e-05, - "loss": 0.3426, - "step": 4776000 - }, - { - "epoch": 2.86, - "learning_rate": 3.598872678488073e-05, - "loss": 0.3366, - "step": 4776500 - }, - { - "epoch": 2.86, - "learning_rate": 3.598662681932016e-05, - "loss": 0.3419, - "step": 4777000 - }, - { - "epoch": 2.86, - "learning_rate": 3.598453105369072e-05, - "loss": 0.3397, - "step": 4777500 - }, - { - "epoch": 2.86, - "learning_rate": 3.5982431088130154e-05, - "loss": 0.3445, - "step": 4778000 - }, - { - "epoch": 2.86, - "learning_rate": 3.598033532250071e-05, - "loss": 0.3352, - "step": 4778500 - }, - { - "epoch": 2.87, - "learning_rate": 3.597823535694014e-05, - "loss": 0.3414, - "step": 4779000 - }, - { - "epoch": 2.87, - "learning_rate": 3.597613539137958e-05, - "loss": 0.3542, - "step": 4779500 - }, - { - "epoch": 2.87, - "learning_rate": 3.5974035425819015e-05, - "loss": 0.3502, - "step": 4780000 - }, - { - "epoch": 2.87, - "learning_rate": 3.597193546025845e-05, - "loss": 0.3383, - "step": 4780500 - }, - { - "epoch": 2.87, - "learning_rate": 3.596983549469789e-05, - "loss": 0.3458, - "step": 4781000 - }, - { - "epoch": 2.87, - "learning_rate": 3.596773552913732e-05, - "loss": 0.3404, - "step": 4781500 - }, - { - "epoch": 2.87, - "learning_rate": 3.5965635563576755e-05, - "loss": 0.3366, - "step": 4782000 - }, - { - "epoch": 2.87, - "learning_rate": 3.5963535598016196e-05, - "loss": 0.3371, - "step": 4782500 - }, - { - "epoch": 2.87, - "learning_rate": 3.596143563245563e-05, - "loss": 0.3396, - "step": 4783000 - }, - { - "epoch": 2.87, - "learning_rate": 3.595933986682618e-05, - "loss": 0.3392, - "step": 4783500 - }, - { - "epoch": 2.87, - "learning_rate": 3.5957239901265616e-05, - "loss": 0.3394, - "step": 4784000 - }, - { - "epoch": 2.87, - "learning_rate": 3.5955139935705056e-05, - "loss": 0.3451, - "step": 4784500 - }, - { - "epoch": 2.87, - "learning_rate": 3.595303997014449e-05, - "loss": 0.3298, - "step": 4785000 - }, - { - "epoch": 2.87, - "learning_rate": 3.595094420451504e-05, - "loss": 0.3537, - "step": 4785500 - }, - { - "epoch": 2.87, - "learning_rate": 3.5948844238954484e-05, - "loss": 0.3382, - "step": 4786000 - }, - { - "epoch": 2.87, - "learning_rate": 3.594674427339392e-05, - "loss": 0.345, - "step": 4786500 - }, - { - "epoch": 2.87, - "learning_rate": 3.594464430783335e-05, - "loss": 0.3346, - "step": 4787000 - }, - { - "epoch": 2.87, - "learning_rate": 3.594254434227279e-05, - "loss": 0.3455, - "step": 4787500 - }, - { - "epoch": 2.87, - "learning_rate": 3.5940448576643344e-05, - "loss": 0.3408, - "step": 4788000 - }, - { - "epoch": 2.87, - "learning_rate": 3.593834861108278e-05, - "loss": 0.3422, - "step": 4788500 - }, - { - "epoch": 2.87, - "learning_rate": 3.593624864552221e-05, - "loss": 0.3528, - "step": 4789000 - }, - { - "epoch": 2.87, - "learning_rate": 3.593414867996165e-05, - "loss": 0.3407, - "step": 4789500 - }, - { - "epoch": 2.87, - "learning_rate": 3.5932048714401085e-05, - "loss": 0.3431, - "step": 4790000 - }, - { - "epoch": 2.87, - "learning_rate": 3.592994874884052e-05, - "loss": 0.3435, - "step": 4790500 - }, - { - "epoch": 2.87, - "learning_rate": 3.592785298321107e-05, - "loss": 0.3421, - "step": 4791000 - }, - { - "epoch": 2.87, - "learning_rate": 3.592575301765051e-05, - "loss": 0.343, - "step": 4791500 - }, - { - "epoch": 2.87, - "learning_rate": 3.5923653052089946e-05, - "loss": 0.3447, - "step": 4792000 - }, - { - "epoch": 2.87, - "learning_rate": 3.5921553086529386e-05, - "loss": 0.3377, - "step": 4792500 - }, - { - "epoch": 2.87, - "learning_rate": 3.591945312096882e-05, - "loss": 0.3406, - "step": 4793000 - }, - { - "epoch": 2.87, - "learning_rate": 3.5917353155408246e-05, - "loss": 0.3388, - "step": 4793500 - }, - { - "epoch": 2.87, - "learning_rate": 3.5915257389778806e-05, - "loss": 0.3408, - "step": 4794000 - }, - { - "epoch": 2.87, - "learning_rate": 3.5913157424218247e-05, - "loss": 0.3414, - "step": 4794500 - }, - { - "epoch": 2.87, - "learning_rate": 3.591105745865768e-05, - "loss": 0.3515, - "step": 4795000 - }, - { - "epoch": 2.88, - "learning_rate": 3.5908957493097113e-05, - "loss": 0.3438, - "step": 4795500 - }, - { - "epoch": 2.88, - "learning_rate": 3.590685752753655e-05, - "loss": 0.3471, - "step": 4796000 - }, - { - "epoch": 2.88, - "learning_rate": 3.590476176190711e-05, - "loss": 0.3376, - "step": 4796500 - }, - { - "epoch": 2.88, - "learning_rate": 3.590266179634654e-05, - "loss": 0.3329, - "step": 4797000 - }, - { - "epoch": 2.88, - "learning_rate": 3.5900561830785974e-05, - "loss": 0.3409, - "step": 4797500 - }, - { - "epoch": 2.88, - "learning_rate": 3.5898461865225414e-05, - "loss": 0.3399, - "step": 4798000 - }, - { - "epoch": 2.88, - "learning_rate": 3.589636189966484e-05, - "loss": 0.3437, - "step": 4798500 - }, - { - "epoch": 2.88, - "learning_rate": 3.589426193410428e-05, - "loss": 0.341, - "step": 4799000 - }, - { - "epoch": 2.88, - "learning_rate": 3.5892161968543715e-05, - "loss": 0.337, - "step": 4799500 - }, - { - "epoch": 2.88, - "learning_rate": 3.589006200298315e-05, - "loss": 0.334, - "step": 4800000 - }, - { - "epoch": 2.88, - "eval_loss": 0.33047863841056824, - "eval_runtime": 1120.6954, - "eval_samples_per_second": 469.994, - "eval_steps_per_second": 78.333, - "step": 4800000 - }, - { - "epoch": 2.88, - "learning_rate": 3.588796623735371e-05, - "loss": 0.3471, - "step": 4800500 - }, - { - "epoch": 2.88, - "learning_rate": 3.588587047172426e-05, - "loss": 0.3363, - "step": 4801000 - }, - { - "epoch": 2.88, - "learning_rate": 3.58837705061637e-05, - "loss": 0.3381, - "step": 4801500 - }, - { - "epoch": 2.88, - "learning_rate": 3.5881670540603136e-05, - "loss": 0.3363, - "step": 4802000 - }, - { - "epoch": 2.88, - "learning_rate": 3.587957057504257e-05, - "loss": 0.3478, - "step": 4802500 - }, - { - "epoch": 2.88, - "learning_rate": 3.5877470609482e-05, - "loss": 0.347, - "step": 4803000 - }, - { - "epoch": 2.88, - "learning_rate": 3.5875370643921436e-05, - "loss": 0.3462, - "step": 4803500 - }, - { - "epoch": 2.88, - "learning_rate": 3.587327067836087e-05, - "loss": 0.3476, - "step": 4804000 - }, - { - "epoch": 2.88, - "learning_rate": 3.587117071280031e-05, - "loss": 0.3369, - "step": 4804500 - }, - { - "epoch": 2.88, - "learning_rate": 3.586907494717087e-05, - "loss": 0.3403, - "step": 4805000 - }, - { - "epoch": 2.88, - "learning_rate": 3.5866979181541424e-05, - "loss": 0.3448, - "step": 4805500 - }, - { - "epoch": 2.88, - "learning_rate": 3.586487921598086e-05, - "loss": 0.3339, - "step": 4806000 - }, - { - "epoch": 2.88, - "learning_rate": 3.58627792504203e-05, - "loss": 0.3382, - "step": 4806500 - }, - { - "epoch": 2.88, - "learning_rate": 3.586067928485973e-05, - "loss": 0.341, - "step": 4807000 - }, - { - "epoch": 2.88, - "learning_rate": 3.5858579319299164e-05, - "loss": 0.3402, - "step": 4807500 - }, - { - "epoch": 2.88, - "learning_rate": 3.58564793537386e-05, - "loss": 0.3396, - "step": 4808000 - }, - { - "epoch": 2.88, - "learning_rate": 3.585437938817803e-05, - "loss": 0.3341, - "step": 4808500 - }, - { - "epoch": 2.88, - "learning_rate": 3.5852279422617465e-05, - "loss": 0.3527, - "step": 4809000 - }, - { - "epoch": 2.88, - "learning_rate": 3.5850183656988025e-05, - "loss": 0.3442, - "step": 4809500 - }, - { - "epoch": 2.88, - "learning_rate": 3.5848083691427465e-05, - "loss": 0.3447, - "step": 4810000 - }, - { - "epoch": 2.88, - "learning_rate": 3.584598372586689e-05, - "loss": 0.3478, - "step": 4810500 - }, - { - "epoch": 2.88, - "learning_rate": 3.5843883760306326e-05, - "loss": 0.3317, - "step": 4811000 - }, - { - "epoch": 2.88, - "learning_rate": 3.5841787994676886e-05, - "loss": 0.3335, - "step": 4811500 - }, - { - "epoch": 2.88, - "learning_rate": 3.5839688029116326e-05, - "loss": 0.3351, - "step": 4812000 - }, - { - "epoch": 2.89, - "learning_rate": 3.583758806355575e-05, - "loss": 0.3392, - "step": 4812500 - }, - { - "epoch": 2.89, - "learning_rate": 3.583548809799519e-05, - "loss": 0.3289, - "step": 4813000 - }, - { - "epoch": 2.89, - "learning_rate": 3.5833388132434627e-05, - "loss": 0.3478, - "step": 4813500 - }, - { - "epoch": 2.89, - "learning_rate": 3.583129236680519e-05, - "loss": 0.3313, - "step": 4814000 - }, - { - "epoch": 2.89, - "learning_rate": 3.582919240124462e-05, - "loss": 0.3349, - "step": 4814500 - }, - { - "epoch": 2.89, - "learning_rate": 3.5827092435684054e-05, - "loss": 0.3412, - "step": 4815000 - }, - { - "epoch": 2.89, - "learning_rate": 3.582499247012349e-05, - "loss": 0.3342, - "step": 4815500 - }, - { - "epoch": 2.89, - "learning_rate": 3.582289670449405e-05, - "loss": 0.3466, - "step": 4816000 - }, - { - "epoch": 2.89, - "learning_rate": 3.58208009388646e-05, - "loss": 0.3465, - "step": 4816500 - }, - { - "epoch": 2.89, - "learning_rate": 3.5818700973304035e-05, - "loss": 0.3368, - "step": 4817000 - }, - { - "epoch": 2.89, - "learning_rate": 3.5816601007743475e-05, - "loss": 0.3463, - "step": 4817500 - }, - { - "epoch": 2.89, - "learning_rate": 3.581450104218291e-05, - "loss": 0.3332, - "step": 4818000 - }, - { - "epoch": 2.89, - "learning_rate": 3.581240527655346e-05, - "loss": 0.3443, - "step": 4818500 - }, - { - "epoch": 2.89, - "learning_rate": 3.58103053109929e-05, - "loss": 0.3278, - "step": 4819000 - }, - { - "epoch": 2.89, - "learning_rate": 3.5808205345432336e-05, - "loss": 0.3429, - "step": 4819500 - }, - { - "epoch": 2.89, - "learning_rate": 3.580610537987177e-05, - "loss": 0.3324, - "step": 4820000 - }, - { - "epoch": 2.89, - "learning_rate": 3.580400541431121e-05, - "loss": 0.3459, - "step": 4820500 - }, - { - "epoch": 2.89, - "learning_rate": 3.580190544875064e-05, - "loss": 0.3415, - "step": 4821000 - }, - { - "epoch": 2.89, - "learning_rate": 3.5799805483190076e-05, - "loss": 0.3396, - "step": 4821500 - }, - { - "epoch": 2.89, - "learning_rate": 3.579770551762951e-05, - "loss": 0.3406, - "step": 4822000 - }, - { - "epoch": 2.89, - "learning_rate": 3.579560975200007e-05, - "loss": 0.3385, - "step": 4822500 - }, - { - "epoch": 2.89, - "learning_rate": 3.5793509786439504e-05, - "loss": 0.346, - "step": 4823000 - }, - { - "epoch": 2.89, - "learning_rate": 3.579140982087894e-05, - "loss": 0.3406, - "step": 4823500 - }, - { - "epoch": 2.89, - "learning_rate": 3.578930985531838e-05, - "loss": 0.3422, - "step": 4824000 - }, - { - "epoch": 2.89, - "learning_rate": 3.5787209889757804e-05, - "loss": 0.3415, - "step": 4824500 - }, - { - "epoch": 2.89, - "learning_rate": 3.578510992419724e-05, - "loss": 0.3405, - "step": 4825000 - }, - { - "epoch": 2.89, - "learning_rate": 3.57830141585678e-05, - "loss": 0.3421, - "step": 4825500 - }, - { - "epoch": 2.89, - "learning_rate": 3.578091419300724e-05, - "loss": 0.3379, - "step": 4826000 - }, - { - "epoch": 2.89, - "learning_rate": 3.577881842737779e-05, - "loss": 0.345, - "step": 4826500 - }, - { - "epoch": 2.89, - "learning_rate": 3.5776718461817225e-05, - "loss": 0.3375, - "step": 4827000 - }, - { - "epoch": 2.89, - "learning_rate": 3.5774618496256665e-05, - "loss": 0.3389, - "step": 4827500 - }, - { - "epoch": 2.89, - "learning_rate": 3.57725185306961e-05, - "loss": 0.3421, - "step": 4828000 - }, - { - "epoch": 2.89, - "learning_rate": 3.577041856513553e-05, - "loss": 0.3519, - "step": 4828500 - }, - { - "epoch": 2.9, - "learning_rate": 3.5768322799506086e-05, - "loss": 0.3353, - "step": 4829000 - }, - { - "epoch": 2.9, - "learning_rate": 3.5766222833945526e-05, - "loss": 0.3333, - "step": 4829500 - }, - { - "epoch": 2.9, - "learning_rate": 3.576412286838496e-05, - "loss": 0.3373, - "step": 4830000 - }, - { - "epoch": 2.9, - "learning_rate": 3.576202710275551e-05, - "loss": 0.3462, - "step": 4830500 - }, - { - "epoch": 2.9, - "learning_rate": 3.5759927137194946e-05, - "loss": 0.3412, - "step": 4831000 - }, - { - "epoch": 2.9, - "learning_rate": 3.575782717163439e-05, - "loss": 0.3371, - "step": 4831500 - }, - { - "epoch": 2.9, - "learning_rate": 3.575572720607382e-05, - "loss": 0.3495, - "step": 4832000 - }, - { - "epoch": 2.9, - "learning_rate": 3.5753627240513254e-05, - "loss": 0.3405, - "step": 4832500 - }, - { - "epoch": 2.9, - "learning_rate": 3.5751527274952694e-05, - "loss": 0.3512, - "step": 4833000 - }, - { - "epoch": 2.9, - "learning_rate": 3.574942730939213e-05, - "loss": 0.3441, - "step": 4833500 - }, - { - "epoch": 2.9, - "learning_rate": 3.574732734383156e-05, - "loss": 0.3443, - "step": 4834000 - }, - { - "epoch": 2.9, - "learning_rate": 3.5745227378270994e-05, - "loss": 0.3425, - "step": 4834500 - }, - { - "epoch": 2.9, - "learning_rate": 3.574312741271043e-05, - "loss": 0.3486, - "step": 4835000 - }, - { - "epoch": 2.9, - "learning_rate": 3.574102744714987e-05, - "loss": 0.3352, - "step": 4835500 - }, - { - "epoch": 2.9, - "learning_rate": 3.57389274815893e-05, - "loss": 0.3352, - "step": 4836000 - }, - { - "epoch": 2.9, - "learning_rate": 3.5736827516028735e-05, - "loss": 0.3391, - "step": 4836500 - }, - { - "epoch": 2.9, - "learning_rate": 3.5734727550468175e-05, - "loss": 0.3406, - "step": 4837000 - }, - { - "epoch": 2.9, - "learning_rate": 3.573262758490761e-05, - "loss": 0.3482, - "step": 4837500 - }, - { - "epoch": 2.9, - "learning_rate": 3.573052761934704e-05, - "loss": 0.3464, - "step": 4838000 - }, - { - "epoch": 2.9, - "learning_rate": 3.5728431853717596e-05, - "loss": 0.3369, - "step": 4838500 - }, - { - "epoch": 2.9, - "learning_rate": 3.572633608808815e-05, - "loss": 0.3456, - "step": 4839000 - }, - { - "epoch": 2.9, - "learning_rate": 3.572424032245871e-05, - "loss": 0.3485, - "step": 4839500 - }, - { - "epoch": 2.9, - "learning_rate": 3.572214035689815e-05, - "loss": 0.3364, - "step": 4840000 - }, - { - "epoch": 2.9, - "learning_rate": 3.572004039133758e-05, - "loss": 0.3484, - "step": 4840500 - }, - { - "epoch": 2.9, - "learning_rate": 3.571794042577702e-05, - "loss": 0.3361, - "step": 4841000 - }, - { - "epoch": 2.9, - "learning_rate": 3.571584046021645e-05, - "loss": 0.3418, - "step": 4841500 - }, - { - "epoch": 2.9, - "learning_rate": 3.5713740494655884e-05, - "loss": 0.3528, - "step": 4842000 - }, - { - "epoch": 2.9, - "learning_rate": 3.5711640529095324e-05, - "loss": 0.3402, - "step": 4842500 - }, - { - "epoch": 2.9, - "learning_rate": 3.570954056353476e-05, - "loss": 0.3448, - "step": 4843000 - }, - { - "epoch": 2.9, - "learning_rate": 3.570744059797419e-05, - "loss": 0.3321, - "step": 4843500 - }, - { - "epoch": 2.9, - "learning_rate": 3.570534063241363e-05, - "loss": 0.3349, - "step": 4844000 - }, - { - "epoch": 2.9, - "learning_rate": 3.5703240666853064e-05, - "loss": 0.342, - "step": 4844500 - }, - { - "epoch": 2.9, - "learning_rate": 3.57011407012925e-05, - "loss": 0.3451, - "step": 4845000 - }, - { - "epoch": 2.91, - "learning_rate": 3.569904073573194e-05, - "loss": 0.3386, - "step": 4845500 - }, - { - "epoch": 2.91, - "learning_rate": 3.569694497010249e-05, - "loss": 0.3449, - "step": 4846000 - }, - { - "epoch": 2.91, - "learning_rate": 3.5694845004541925e-05, - "loss": 0.3423, - "step": 4846500 - }, - { - "epoch": 2.91, - "learning_rate": 3.569274923891248e-05, - "loss": 0.3477, - "step": 4847000 - }, - { - "epoch": 2.91, - "learning_rate": 3.569064927335191e-05, - "loss": 0.3401, - "step": 4847500 - }, - { - "epoch": 2.91, - "learning_rate": 3.568854930779135e-05, - "loss": 0.3437, - "step": 4848000 - }, - { - "epoch": 2.91, - "learning_rate": 3.5686449342230786e-05, - "loss": 0.3426, - "step": 4848500 - }, - { - "epoch": 2.91, - "learning_rate": 3.568434937667022e-05, - "loss": 0.3313, - "step": 4849000 - }, - { - "epoch": 2.91, - "learning_rate": 3.568225361104078e-05, - "loss": 0.3429, - "step": 4849500 - }, - { - "epoch": 2.91, - "learning_rate": 3.568015364548021e-05, - "loss": 0.3473, - "step": 4850000 - }, - { - "epoch": 2.91, - "learning_rate": 3.5678053679919647e-05, - "loss": 0.3426, - "step": 4850500 - }, - { - "epoch": 2.91, - "learning_rate": 3.567595371435909e-05, - "loss": 0.3459, - "step": 4851000 - }, - { - "epoch": 2.91, - "learning_rate": 3.567385794872964e-05, - "loss": 0.3321, - "step": 4851500 - }, - { - "epoch": 2.91, - "learning_rate": 3.5671757983169074e-05, - "loss": 0.3423, - "step": 4852000 - }, - { - "epoch": 2.91, - "learning_rate": 3.566965801760851e-05, - "loss": 0.3464, - "step": 4852500 - }, - { - "epoch": 2.91, - "learning_rate": 3.566755805204795e-05, - "loss": 0.3433, - "step": 4853000 - }, - { - "epoch": 2.91, - "learning_rate": 3.566545808648738e-05, - "loss": 0.3342, - "step": 4853500 - }, - { - "epoch": 2.91, - "learning_rate": 3.5663362320857935e-05, - "loss": 0.3343, - "step": 4854000 - }, - { - "epoch": 2.91, - "learning_rate": 3.566126235529737e-05, - "loss": 0.3516, - "step": 4854500 - }, - { - "epoch": 2.91, - "learning_rate": 3.565916238973681e-05, - "loss": 0.3265, - "step": 4855000 - }, - { - "epoch": 2.91, - "learning_rate": 3.565706242417624e-05, - "loss": 0.3382, - "step": 4855500 - }, - { - "epoch": 2.91, - "learning_rate": 3.5654962458615675e-05, - "loss": 0.3356, - "step": 4856000 - }, - { - "epoch": 2.91, - "learning_rate": 3.5652862493055115e-05, - "loss": 0.3372, - "step": 4856500 - }, - { - "epoch": 2.91, - "learning_rate": 3.565076672742567e-05, - "loss": 0.3454, - "step": 4857000 - }, - { - "epoch": 2.91, - "learning_rate": 3.56486667618651e-05, - "loss": 0.3425, - "step": 4857500 - }, - { - "epoch": 2.91, - "learning_rate": 3.564656679630454e-05, - "loss": 0.3311, - "step": 4858000 - }, - { - "epoch": 2.91, - "learning_rate": 3.5644466830743976e-05, - "loss": 0.3437, - "step": 4858500 - }, - { - "epoch": 2.91, - "learning_rate": 3.564237106511453e-05, - "loss": 0.3481, - "step": 4859000 - }, - { - "epoch": 2.91, - "learning_rate": 3.564027109955396e-05, - "loss": 0.3455, - "step": 4859500 - }, - { - "epoch": 2.91, - "learning_rate": 3.5638171133993403e-05, - "loss": 0.3479, - "step": 4860000 - }, - { - "epoch": 2.91, - "learning_rate": 3.563607116843284e-05, - "loss": 0.3407, - "step": 4860500 - }, - { - "epoch": 2.91, - "learning_rate": 3.563397120287227e-05, - "loss": 0.3444, - "step": 4861000 - }, - { - "epoch": 2.91, - "learning_rate": 3.563187123731171e-05, - "loss": 0.3443, - "step": 4861500 - }, - { - "epoch": 2.91, - "learning_rate": 3.5629771271751144e-05, - "loss": 0.3387, - "step": 4862000 - }, - { - "epoch": 2.92, - "learning_rate": 3.56276755061217e-05, - "loss": 0.3346, - "step": 4862500 - }, - { - "epoch": 2.92, - "learning_rate": 3.562557554056113e-05, - "loss": 0.3307, - "step": 4863000 - }, - { - "epoch": 2.92, - "learning_rate": 3.562347977493169e-05, - "loss": 0.3418, - "step": 4863500 - }, - { - "epoch": 2.92, - "learning_rate": 3.5621379809371125e-05, - "loss": 0.345, - "step": 4864000 - }, - { - "epoch": 2.92, - "learning_rate": 3.561927984381056e-05, - "loss": 0.3482, - "step": 4864500 - }, - { - "epoch": 2.92, - "learning_rate": 3.561717987825e-05, - "loss": 0.3385, - "step": 4865000 - }, - { - "epoch": 2.92, - "learning_rate": 3.561507991268943e-05, - "loss": 0.347, - "step": 4865500 - }, - { - "epoch": 2.92, - "learning_rate": 3.5612984147059986e-05, - "loss": 0.3456, - "step": 4866000 - }, - { - "epoch": 2.92, - "learning_rate": 3.561088418149942e-05, - "loss": 0.3427, - "step": 4866500 - }, - { - "epoch": 2.92, - "learning_rate": 3.560878421593886e-05, - "loss": 0.3484, - "step": 4867000 - }, - { - "epoch": 2.92, - "learning_rate": 3.560668425037829e-05, - "loss": 0.341, - "step": 4867500 - }, - { - "epoch": 2.92, - "learning_rate": 3.5604584284817726e-05, - "loss": 0.3376, - "step": 4868000 - }, - { - "epoch": 2.92, - "learning_rate": 3.5602484319257166e-05, - "loss": 0.3405, - "step": 4868500 - }, - { - "epoch": 2.92, - "learning_rate": 3.56003843536966e-05, - "loss": 0.3469, - "step": 4869000 - }, - { - "epoch": 2.92, - "learning_rate": 3.559828438813603e-05, - "loss": 0.3483, - "step": 4869500 - }, - { - "epoch": 2.92, - "learning_rate": 3.5596184422575474e-05, - "loss": 0.348, - "step": 4870000 - }, - { - "epoch": 2.92, - "learning_rate": 3.559408865694603e-05, - "loss": 0.3357, - "step": 4870500 - }, - { - "epoch": 2.92, - "learning_rate": 3.559198869138546e-05, - "loss": 0.3349, - "step": 4871000 - }, - { - "epoch": 2.92, - "learning_rate": 3.55898887258249e-05, - "loss": 0.3445, - "step": 4871500 - }, - { - "epoch": 2.92, - "learning_rate": 3.5587788760264334e-05, - "loss": 0.3404, - "step": 4872000 - }, - { - "epoch": 2.92, - "learning_rate": 3.558568879470377e-05, - "loss": 0.3507, - "step": 4872500 - }, - { - "epoch": 2.92, - "learning_rate": 3.55835888291432e-05, - "loss": 0.3419, - "step": 4873000 - }, - { - "epoch": 2.92, - "learning_rate": 3.5581488863582635e-05, - "loss": 0.344, - "step": 4873500 - }, - { - "epoch": 2.92, - "learning_rate": 3.5579397297884315e-05, - "loss": 0.3481, - "step": 4874000 - }, - { - "epoch": 2.92, - "learning_rate": 3.557729733232375e-05, - "loss": 0.3383, - "step": 4874500 - }, - { - "epoch": 2.92, - "learning_rate": 3.557519736676318e-05, - "loss": 0.3356, - "step": 4875000 - }, - { - "epoch": 2.92, - "learning_rate": 3.557309740120262e-05, - "loss": 0.3473, - "step": 4875500 - }, - { - "epoch": 2.92, - "learning_rate": 3.5570997435642056e-05, - "loss": 0.3396, - "step": 4876000 - }, - { - "epoch": 2.92, - "learning_rate": 3.556889747008149e-05, - "loss": 0.342, - "step": 4876500 - }, - { - "epoch": 2.92, - "learning_rate": 3.556679750452093e-05, - "loss": 0.338, - "step": 4877000 - }, - { - "epoch": 2.92, - "learning_rate": 3.556469753896036e-05, - "loss": 0.3389, - "step": 4877500 - }, - { - "epoch": 2.92, - "learning_rate": 3.5562597573399796e-05, - "loss": 0.3527, - "step": 4878000 - }, - { - "epoch": 2.92, - "learning_rate": 3.556049760783923e-05, - "loss": 0.3404, - "step": 4878500 - }, - { - "epoch": 2.93, - "learning_rate": 3.555840184220979e-05, - "loss": 0.3294, - "step": 4879000 - }, - { - "epoch": 2.93, - "learning_rate": 3.5556301876649224e-05, - "loss": 0.3451, - "step": 4879500 - }, - { - "epoch": 2.93, - "learning_rate": 3.555420191108866e-05, - "loss": 0.3253, - "step": 4880000 - }, - { - "epoch": 2.93, - "learning_rate": 3.555210194552809e-05, - "loss": 0.3411, - "step": 4880500 - }, - { - "epoch": 2.93, - "learning_rate": 3.5550001979967524e-05, - "loss": 0.3395, - "step": 4881000 - }, - { - "epoch": 2.93, - "learning_rate": 3.5547902014406964e-05, - "loss": 0.3436, - "step": 4881500 - }, - { - "epoch": 2.93, - "learning_rate": 3.55458020488464e-05, - "loss": 0.3415, - "step": 4882000 - }, - { - "epoch": 2.93, - "learning_rate": 3.554370208328583e-05, - "loss": 0.333, - "step": 4882500 - }, - { - "epoch": 2.93, - "learning_rate": 3.5541606317656385e-05, - "loss": 0.3357, - "step": 4883000 - }, - { - "epoch": 2.93, - "learning_rate": 3.5539510552026945e-05, - "loss": 0.3448, - "step": 4883500 - }, - { - "epoch": 2.93, - "learning_rate": 3.5537410586466385e-05, - "loss": 0.3385, - "step": 4884000 - }, - { - "epoch": 2.93, - "learning_rate": 3.553531062090582e-05, - "loss": 0.3501, - "step": 4884500 - }, - { - "epoch": 2.93, - "learning_rate": 3.553321065534525e-05, - "loss": 0.3388, - "step": 4885000 - }, - { - "epoch": 2.93, - "learning_rate": 3.5531110689784686e-05, - "loss": 0.3396, - "step": 4885500 - }, - { - "epoch": 2.93, - "learning_rate": 3.5529014924155246e-05, - "loss": 0.3337, - "step": 4886000 - }, - { - "epoch": 2.93, - "learning_rate": 3.552691495859468e-05, - "loss": 0.3379, - "step": 4886500 - }, - { - "epoch": 2.93, - "learning_rate": 3.552481499303412e-05, - "loss": 0.3387, - "step": 4887000 - }, - { - "epoch": 2.93, - "learning_rate": 3.5522715027473546e-05, - "loss": 0.3395, - "step": 4887500 - }, - { - "epoch": 2.93, - "learning_rate": 3.552061926184411e-05, - "loss": 0.34, - "step": 4888000 - }, - { - "epoch": 2.93, - "learning_rate": 3.551851929628354e-05, - "loss": 0.3441, - "step": 4888500 - }, - { - "epoch": 2.93, - "learning_rate": 3.551641933072298e-05, - "loss": 0.3389, - "step": 4889000 - }, - { - "epoch": 2.93, - "learning_rate": 3.551431936516241e-05, - "loss": 0.334, - "step": 4889500 - }, - { - "epoch": 2.93, - "learning_rate": 3.551221939960184e-05, - "loss": 0.347, - "step": 4890000 - }, - { - "epoch": 2.93, - "learning_rate": 3.55101236339724e-05, - "loss": 0.3489, - "step": 4890500 - }, - { - "epoch": 2.93, - "learning_rate": 3.550802366841184e-05, - "loss": 0.3519, - "step": 4891000 - }, - { - "epoch": 2.93, - "learning_rate": 3.5505923702851275e-05, - "loss": 0.3409, - "step": 4891500 - }, - { - "epoch": 2.93, - "learning_rate": 3.550382373729071e-05, - "loss": 0.3364, - "step": 4892000 - }, - { - "epoch": 2.93, - "learning_rate": 3.550172377173014e-05, - "loss": 0.338, - "step": 4892500 - }, - { - "epoch": 2.93, - "learning_rate": 3.54996280061007e-05, - "loss": 0.3458, - "step": 4893000 - }, - { - "epoch": 2.93, - "learning_rate": 3.5497528040540135e-05, - "loss": 0.3394, - "step": 4893500 - }, - { - "epoch": 2.93, - "learning_rate": 3.5495428074979576e-05, - "loss": 0.3438, - "step": 4894000 - }, - { - "epoch": 2.93, - "learning_rate": 3.5493328109419e-05, - "loss": 0.3481, - "step": 4894500 - }, - { - "epoch": 2.93, - "learning_rate": 3.5491228143858436e-05, - "loss": 0.3412, - "step": 4895000 - }, - { - "epoch": 2.94, - "learning_rate": 3.5489128178297876e-05, - "loss": 0.3403, - "step": 4895500 - }, - { - "epoch": 2.94, - "learning_rate": 3.548702821273731e-05, - "loss": 0.3388, - "step": 4896000 - }, - { - "epoch": 2.94, - "learning_rate": 3.548493244710787e-05, - "loss": 0.3488, - "step": 4896500 - }, - { - "epoch": 2.94, - "learning_rate": 3.5482832481547297e-05, - "loss": 0.3382, - "step": 4897000 - }, - { - "epoch": 2.94, - "learning_rate": 3.548073251598674e-05, - "loss": 0.3381, - "step": 4897500 - }, - { - "epoch": 2.94, - "learning_rate": 3.547863255042617e-05, - "loss": 0.3476, - "step": 4898000 - }, - { - "epoch": 2.94, - "learning_rate": 3.5476532584865604e-05, - "loss": 0.3351, - "step": 4898500 - }, - { - "epoch": 2.94, - "learning_rate": 3.5474436819236164e-05, - "loss": 0.3383, - "step": 4899000 - }, - { - "epoch": 2.94, - "learning_rate": 3.54723368536756e-05, - "loss": 0.3409, - "step": 4899500 - }, - { - "epoch": 2.94, - "learning_rate": 3.547023688811503e-05, - "loss": 0.3381, - "step": 4900000 - }, - { - "epoch": 2.94, - "eval_loss": 0.33070382475852966, - "eval_runtime": 1120.8105, - "eval_samples_per_second": 469.946, - "eval_steps_per_second": 78.325, - "step": 4900000 - }, - { - "epoch": 2.94, - "learning_rate": 3.546813692255447e-05, - "loss": 0.327, - "step": 4900500 - }, - { - "epoch": 2.94, - "learning_rate": 3.5466036956993905e-05, - "loss": 0.3382, - "step": 4901000 - }, - { - "epoch": 2.94, - "learning_rate": 3.546393699143334e-05, - "loss": 0.3389, - "step": 4901500 - }, - { - "epoch": 2.94, - "learning_rate": 3.546183702587278e-05, - "loss": 0.3382, - "step": 4902000 - }, - { - "epoch": 2.94, - "learning_rate": 3.545973706031221e-05, - "loss": 0.3503, - "step": 4902500 - }, - { - "epoch": 2.94, - "learning_rate": 3.5457641294682765e-05, - "loss": 0.3299, - "step": 4903000 - }, - { - "epoch": 2.94, - "learning_rate": 3.54555413291222e-05, - "loss": 0.3327, - "step": 4903500 - }, - { - "epoch": 2.94, - "learning_rate": 3.545344136356164e-05, - "loss": 0.3377, - "step": 4904000 - }, - { - "epoch": 2.94, - "learning_rate": 3.545134139800107e-05, - "loss": 0.3366, - "step": 4904500 - }, - { - "epoch": 2.94, - "learning_rate": 3.5449245632371626e-05, - "loss": 0.3396, - "step": 4905000 - }, - { - "epoch": 2.94, - "learning_rate": 3.544714566681106e-05, - "loss": 0.3399, - "step": 4905500 - }, - { - "epoch": 2.94, - "learning_rate": 3.54450457012505e-05, - "loss": 0.334, - "step": 4906000 - }, - { - "epoch": 2.94, - "learning_rate": 3.544294573568993e-05, - "loss": 0.3395, - "step": 4906500 - }, - { - "epoch": 2.94, - "learning_rate": 3.544084997006049e-05, - "loss": 0.3378, - "step": 4907000 - }, - { - "epoch": 2.94, - "learning_rate": 3.543875000449993e-05, - "loss": 0.3379, - "step": 4907500 - }, - { - "epoch": 2.94, - "learning_rate": 3.543665003893936e-05, - "loss": 0.3453, - "step": 4908000 - }, - { - "epoch": 2.94, - "learning_rate": 3.543455427330992e-05, - "loss": 0.3405, - "step": 4908500 - }, - { - "epoch": 2.94, - "learning_rate": 3.543245430774935e-05, - "loss": 0.3434, - "step": 4909000 - }, - { - "epoch": 2.94, - "learning_rate": 3.543035434218879e-05, - "loss": 0.346, - "step": 4909500 - }, - { - "epoch": 2.94, - "learning_rate": 3.542825437662822e-05, - "loss": 0.348, - "step": 4910000 - }, - { - "epoch": 2.94, - "learning_rate": 3.5426154411067655e-05, - "loss": 0.3419, - "step": 4910500 - }, - { - "epoch": 2.94, - "learning_rate": 3.5424054445507095e-05, - "loss": 0.3433, - "step": 4911000 - }, - { - "epoch": 2.94, - "learning_rate": 3.542195447994653e-05, - "loss": 0.3459, - "step": 4911500 - }, - { - "epoch": 2.94, - "learning_rate": 3.541985451438596e-05, - "loss": 0.3413, - "step": 4912000 - }, - { - "epoch": 2.95, - "learning_rate": 3.5417758748756515e-05, - "loss": 0.3429, - "step": 4912500 - }, - { - "epoch": 2.95, - "learning_rate": 3.5415658783195956e-05, - "loss": 0.3298, - "step": 4913000 - }, - { - "epoch": 2.95, - "learning_rate": 3.541355881763539e-05, - "loss": 0.3439, - "step": 4913500 - }, - { - "epoch": 2.95, - "learning_rate": 3.541145885207482e-05, - "loss": 0.3358, - "step": 4914000 - }, - { - "epoch": 2.95, - "learning_rate": 3.540936308644538e-05, - "loss": 0.3502, - "step": 4914500 - }, - { - "epoch": 2.95, - "learning_rate": 3.540726732081594e-05, - "loss": 0.3396, - "step": 4915000 - }, - { - "epoch": 2.95, - "learning_rate": 3.540516735525538e-05, - "loss": 0.3483, - "step": 4915500 - }, - { - "epoch": 2.95, - "learning_rate": 3.5403067389694803e-05, - "loss": 0.3446, - "step": 4916000 - }, - { - "epoch": 2.95, - "learning_rate": 3.5400967424134244e-05, - "loss": 0.3304, - "step": 4916500 - }, - { - "epoch": 2.95, - "learning_rate": 3.539886745857368e-05, - "loss": 0.3416, - "step": 4917000 - }, - { - "epoch": 2.95, - "learning_rate": 3.539676749301311e-05, - "loss": 0.3342, - "step": 4917500 - }, - { - "epoch": 2.95, - "learning_rate": 3.539466752745255e-05, - "loss": 0.3345, - "step": 4918000 - }, - { - "epoch": 2.95, - "learning_rate": 3.5392567561891984e-05, - "loss": 0.3381, - "step": 4918500 - }, - { - "epoch": 2.95, - "learning_rate": 3.539047179626254e-05, - "loss": 0.3403, - "step": 4919000 - }, - { - "epoch": 2.95, - "learning_rate": 3.538837183070197e-05, - "loss": 0.3424, - "step": 4919500 - }, - { - "epoch": 2.95, - "learning_rate": 3.538627606507253e-05, - "loss": 0.3378, - "step": 4920000 - }, - { - "epoch": 2.95, - "learning_rate": 3.5384176099511965e-05, - "loss": 0.3363, - "step": 4920500 - }, - { - "epoch": 2.95, - "learning_rate": 3.5382080333882525e-05, - "loss": 0.3503, - "step": 4921000 - }, - { - "epoch": 2.95, - "learning_rate": 3.537998036832196e-05, - "loss": 0.3405, - "step": 4921500 - }, - { - "epoch": 2.95, - "learning_rate": 3.53778804027614e-05, - "loss": 0.3463, - "step": 4922000 - }, - { - "epoch": 2.95, - "learning_rate": 3.537578043720083e-05, - "loss": 0.3484, - "step": 4922500 - }, - { - "epoch": 2.95, - "learning_rate": 3.537368047164026e-05, - "loss": 0.3401, - "step": 4923000 - }, - { - "epoch": 2.95, - "learning_rate": 3.53715805060797e-05, - "loss": 0.3415, - "step": 4923500 - }, - { - "epoch": 2.95, - "learning_rate": 3.536948054051913e-05, - "loss": 0.3375, - "step": 4924000 - }, - { - "epoch": 2.95, - "learning_rate": 3.5367380574958566e-05, - "loss": 0.3402, - "step": 4924500 - }, - { - "epoch": 2.95, - "learning_rate": 3.536528060939801e-05, - "loss": 0.3355, - "step": 4925000 - }, - { - "epoch": 2.95, - "learning_rate": 3.536318064383744e-05, - "loss": 0.3415, - "step": 4925500 - }, - { - "epoch": 2.95, - "learning_rate": 3.5361084878207994e-05, - "loss": 0.3419, - "step": 4926000 - }, - { - "epoch": 2.95, - "learning_rate": 3.535898491264743e-05, - "loss": 0.336, - "step": 4926500 - }, - { - "epoch": 2.95, - "learning_rate": 3.535688494708687e-05, - "loss": 0.3362, - "step": 4927000 - }, - { - "epoch": 2.95, - "learning_rate": 3.53547849815263e-05, - "loss": 0.3372, - "step": 4927500 - }, - { - "epoch": 2.95, - "learning_rate": 3.5352685015965734e-05, - "loss": 0.3377, - "step": 4928000 - }, - { - "epoch": 2.95, - "learning_rate": 3.5350585050405175e-05, - "loss": 0.3318, - "step": 4928500 - }, - { - "epoch": 2.96, - "learning_rate": 3.534848508484461e-05, - "loss": 0.3323, - "step": 4929000 - }, - { - "epoch": 2.96, - "learning_rate": 3.534638931921516e-05, - "loss": 0.3455, - "step": 4929500 - }, - { - "epoch": 2.96, - "learning_rate": 3.53442893536546e-05, - "loss": 0.3429, - "step": 4930000 - }, - { - "epoch": 2.96, - "learning_rate": 3.5342193588025155e-05, - "loss": 0.3481, - "step": 4930500 - }, - { - "epoch": 2.96, - "learning_rate": 3.534009362246459e-05, - "loss": 0.3416, - "step": 4931000 - }, - { - "epoch": 2.96, - "learning_rate": 3.533799365690402e-05, - "loss": 0.342, - "step": 4931500 - }, - { - "epoch": 2.96, - "learning_rate": 3.533589369134346e-05, - "loss": 0.3406, - "step": 4932000 - }, - { - "epoch": 2.96, - "learning_rate": 3.5333793725782896e-05, - "loss": 0.3396, - "step": 4932500 - }, - { - "epoch": 2.96, - "learning_rate": 3.533169376022233e-05, - "loss": 0.3386, - "step": 4933000 - }, - { - "epoch": 2.96, - "learning_rate": 3.532959379466177e-05, - "loss": 0.3375, - "step": 4933500 - }, - { - "epoch": 2.96, - "learning_rate": 3.53274938291012e-05, - "loss": 0.3446, - "step": 4934000 - }, - { - "epoch": 2.96, - "learning_rate": 3.532539386354064e-05, - "loss": 0.3285, - "step": 4934500 - }, - { - "epoch": 2.96, - "learning_rate": 3.532329389798008e-05, - "loss": 0.3417, - "step": 4935000 - }, - { - "epoch": 2.96, - "learning_rate": 3.532119813235063e-05, - "loss": 0.3459, - "step": 4935500 - }, - { - "epoch": 2.96, - "learning_rate": 3.5319098166790064e-05, - "loss": 0.3421, - "step": 4936000 - }, - { - "epoch": 2.96, - "learning_rate": 3.5316998201229504e-05, - "loss": 0.3377, - "step": 4936500 - }, - { - "epoch": 2.96, - "learning_rate": 3.531490243560006e-05, - "loss": 0.3378, - "step": 4937000 - }, - { - "epoch": 2.96, - "learning_rate": 3.531280247003949e-05, - "loss": 0.3428, - "step": 4937500 - }, - { - "epoch": 2.96, - "learning_rate": 3.5310702504478925e-05, - "loss": 0.3383, - "step": 4938000 - }, - { - "epoch": 2.96, - "learning_rate": 3.5308602538918365e-05, - "loss": 0.335, - "step": 4938500 - }, - { - "epoch": 2.96, - "learning_rate": 3.530650677328892e-05, - "loss": 0.3365, - "step": 4939000 - }, - { - "epoch": 2.96, - "learning_rate": 3.530440680772835e-05, - "loss": 0.345, - "step": 4939500 - }, - { - "epoch": 2.96, - "learning_rate": 3.5302306842167785e-05, - "loss": 0.3345, - "step": 4940000 - }, - { - "epoch": 2.96, - "learning_rate": 3.5300206876607226e-05, - "loss": 0.3406, - "step": 4940500 - }, - { - "epoch": 2.96, - "learning_rate": 3.529810691104666e-05, - "loss": 0.3419, - "step": 4941000 - }, - { - "epoch": 2.96, - "learning_rate": 3.529600694548609e-05, - "loss": 0.3371, - "step": 4941500 - }, - { - "epoch": 2.96, - "learning_rate": 3.529390697992553e-05, - "loss": 0.3398, - "step": 4942000 - }, - { - "epoch": 2.96, - "learning_rate": 3.5291807014364966e-05, - "loss": 0.3468, - "step": 4942500 - }, - { - "epoch": 2.96, - "learning_rate": 3.52897070488044e-05, - "loss": 0.3357, - "step": 4943000 - }, - { - "epoch": 2.96, - "learning_rate": 3.528760708324383e-05, - "loss": 0.3402, - "step": 4943500 - }, - { - "epoch": 2.96, - "learning_rate": 3.5285507117683267e-05, - "loss": 0.3426, - "step": 4944000 - }, - { - "epoch": 2.96, - "learning_rate": 3.528340715212271e-05, - "loss": 0.3402, - "step": 4944500 - }, - { - "epoch": 2.96, - "learning_rate": 3.528130718656214e-05, - "loss": 0.3364, - "step": 4945000 - }, - { - "epoch": 2.97, - "learning_rate": 3.5279211420932694e-05, - "loss": 0.3338, - "step": 4945500 - }, - { - "epoch": 2.97, - "learning_rate": 3.527711145537213e-05, - "loss": 0.3345, - "step": 4946000 - }, - { - "epoch": 2.97, - "learning_rate": 3.527501148981157e-05, - "loss": 0.3329, - "step": 4946500 - }, - { - "epoch": 2.97, - "learning_rate": 3.5272911524251e-05, - "loss": 0.352, - "step": 4947000 - }, - { - "epoch": 2.97, - "learning_rate": 3.5270811558690434e-05, - "loss": 0.3372, - "step": 4947500 - }, - { - "epoch": 2.97, - "learning_rate": 3.5268711593129875e-05, - "loss": 0.3426, - "step": 4948000 - }, - { - "epoch": 2.97, - "learning_rate": 3.526661162756931e-05, - "loss": 0.3414, - "step": 4948500 - }, - { - "epoch": 2.97, - "learning_rate": 3.526451586193986e-05, - "loss": 0.3407, - "step": 4949000 - }, - { - "epoch": 2.97, - "learning_rate": 3.5262415896379295e-05, - "loss": 0.3369, - "step": 4949500 - }, - { - "epoch": 2.97, - "learning_rate": 3.5260315930818735e-05, - "loss": 0.3492, - "step": 4950000 - }, - { - "epoch": 2.97, - "learning_rate": 3.525821596525817e-05, - "loss": 0.3453, - "step": 4950500 - }, - { - "epoch": 2.97, - "learning_rate": 3.52561159996976e-05, - "loss": 0.3446, - "step": 4951000 - }, - { - "epoch": 2.97, - "learning_rate": 3.525402023406816e-05, - "loss": 0.3377, - "step": 4951500 - }, - { - "epoch": 2.97, - "learning_rate": 3.5251920268507596e-05, - "loss": 0.3498, - "step": 4952000 - }, - { - "epoch": 2.97, - "learning_rate": 3.524982030294703e-05, - "loss": 0.3407, - "step": 4952500 - }, - { - "epoch": 2.97, - "learning_rate": 3.524772033738647e-05, - "loss": 0.3436, - "step": 4953000 - }, - { - "epoch": 2.97, - "learning_rate": 3.52456203718259e-05, - "loss": 0.3488, - "step": 4953500 - }, - { - "epoch": 2.97, - "learning_rate": 3.524352040626534e-05, - "loss": 0.3342, - "step": 4954000 - }, - { - "epoch": 2.97, - "learning_rate": 3.524142044070478e-05, - "loss": 0.3326, - "step": 4954500 - }, - { - "epoch": 2.97, - "learning_rate": 3.523932047514421e-05, - "loss": 0.3393, - "step": 4955000 - }, - { - "epoch": 2.97, - "learning_rate": 3.5237224709514764e-05, - "loss": 0.3517, - "step": 4955500 - }, - { - "epoch": 2.97, - "learning_rate": 3.52351247439542e-05, - "loss": 0.3416, - "step": 4956000 - }, - { - "epoch": 2.97, - "learning_rate": 3.523302477839364e-05, - "loss": 0.3374, - "step": 4956500 - }, - { - "epoch": 2.97, - "learning_rate": 3.523092481283307e-05, - "loss": 0.3432, - "step": 4957000 - }, - { - "epoch": 2.97, - "learning_rate": 3.5228829047203625e-05, - "loss": 0.3443, - "step": 4957500 - }, - { - "epoch": 2.97, - "learning_rate": 3.522672908164306e-05, - "loss": 0.3434, - "step": 4958000 - }, - { - "epoch": 2.97, - "learning_rate": 3.52246291160825e-05, - "loss": 0.3468, - "step": 4958500 - }, - { - "epoch": 2.97, - "learning_rate": 3.522252915052193e-05, - "loss": 0.3441, - "step": 4959000 - }, - { - "epoch": 2.97, - "learning_rate": 3.5220429184961365e-05, - "loss": 0.3469, - "step": 4959500 - }, - { - "epoch": 2.97, - "learning_rate": 3.5218329219400806e-05, - "loss": 0.3362, - "step": 4960000 - }, - { - "epoch": 2.97, - "learning_rate": 3.521623765370248e-05, - "loss": 0.3374, - "step": 4960500 - }, - { - "epoch": 2.97, - "learning_rate": 3.521413768814191e-05, - "loss": 0.3464, - "step": 4961000 - }, - { - "epoch": 2.97, - "learning_rate": 3.5212037722581346e-05, - "loss": 0.3423, - "step": 4961500 - }, - { - "epoch": 2.97, - "learning_rate": 3.5209937757020786e-05, - "loss": 0.3357, - "step": 4962000 - }, - { - "epoch": 2.98, - "learning_rate": 3.520783779146022e-05, - "loss": 0.3522, - "step": 4962500 - }, - { - "epoch": 2.98, - "learning_rate": 3.520573782589965e-05, - "loss": 0.3466, - "step": 4963000 - }, - { - "epoch": 2.98, - "learning_rate": 3.5203637860339094e-05, - "loss": 0.3349, - "step": 4963500 - }, - { - "epoch": 2.98, - "learning_rate": 3.520153789477853e-05, - "loss": 0.3423, - "step": 4964000 - }, - { - "epoch": 2.98, - "learning_rate": 3.519943792921796e-05, - "loss": 0.3379, - "step": 4964500 - }, - { - "epoch": 2.98, - "learning_rate": 3.5197337963657394e-05, - "loss": 0.3344, - "step": 4965000 - }, - { - "epoch": 2.98, - "learning_rate": 3.5195242198027954e-05, - "loss": 0.3374, - "step": 4965500 - }, - { - "epoch": 2.98, - "learning_rate": 3.519314223246739e-05, - "loss": 0.3415, - "step": 4966000 - }, - { - "epoch": 2.98, - "learning_rate": 3.519104226690682e-05, - "loss": 0.3484, - "step": 4966500 - }, - { - "epoch": 2.98, - "learning_rate": 3.518894650127738e-05, - "loss": 0.3384, - "step": 4967000 - }, - { - "epoch": 2.98, - "learning_rate": 3.5186846535716815e-05, - "loss": 0.3333, - "step": 4967500 - }, - { - "epoch": 2.98, - "learning_rate": 3.518474657015625e-05, - "loss": 0.3452, - "step": 4968000 - }, - { - "epoch": 2.98, - "learning_rate": 3.518264660459569e-05, - "loss": 0.335, - "step": 4968500 - }, - { - "epoch": 2.98, - "learning_rate": 3.518054663903512e-05, - "loss": 0.331, - "step": 4969000 - }, - { - "epoch": 2.98, - "learning_rate": 3.5178446673474556e-05, - "loss": 0.3386, - "step": 4969500 - }, - { - "epoch": 2.98, - "learning_rate": 3.517634670791399e-05, - "loss": 0.3435, - "step": 4970000 - }, - { - "epoch": 2.98, - "learning_rate": 3.517425094228455e-05, - "loss": 0.3357, - "step": 4970500 - }, - { - "epoch": 2.98, - "learning_rate": 3.517215097672398e-05, - "loss": 0.3327, - "step": 4971000 - }, - { - "epoch": 2.98, - "learning_rate": 3.5170051011163416e-05, - "loss": 0.3366, - "step": 4971500 - }, - { - "epoch": 2.98, - "learning_rate": 3.516795104560285e-05, - "loss": 0.335, - "step": 4972000 - }, - { - "epoch": 2.98, - "learning_rate": 3.516585108004228e-05, - "loss": 0.3436, - "step": 4972500 - }, - { - "epoch": 2.98, - "learning_rate": 3.516375111448172e-05, - "loss": 0.3455, - "step": 4973000 - }, - { - "epoch": 2.98, - "learning_rate": 3.516165114892116e-05, - "loss": 0.3414, - "step": 4973500 - }, - { - "epoch": 2.98, - "learning_rate": 3.515955118336059e-05, - "loss": 0.3387, - "step": 4974000 - }, - { - "epoch": 2.98, - "learning_rate": 3.5157451217800024e-05, - "loss": 0.3313, - "step": 4974500 - }, - { - "epoch": 2.98, - "learning_rate": 3.5155351252239464e-05, - "loss": 0.3492, - "step": 4975000 - }, - { - "epoch": 2.98, - "learning_rate": 3.515325548661002e-05, - "loss": 0.3413, - "step": 4975500 - }, - { - "epoch": 2.98, - "learning_rate": 3.515115552104945e-05, - "loss": 0.3388, - "step": 4976000 - }, - { - "epoch": 2.98, - "learning_rate": 3.514905555548889e-05, - "loss": 0.3426, - "step": 4976500 - }, - { - "epoch": 2.98, - "learning_rate": 3.5146955589928325e-05, - "loss": 0.3387, - "step": 4977000 - }, - { - "epoch": 2.98, - "learning_rate": 3.514485562436776e-05, - "loss": 0.3503, - "step": 4977500 - }, - { - "epoch": 2.98, - "learning_rate": 3.514275985873831e-05, - "loss": 0.3432, - "step": 4978000 - }, - { - "epoch": 2.98, - "learning_rate": 3.514065989317775e-05, - "loss": 0.3406, - "step": 4978500 - }, - { - "epoch": 2.99, - "learning_rate": 3.5138559927617186e-05, - "loss": 0.3413, - "step": 4979000 - }, - { - "epoch": 2.99, - "learning_rate": 3.513645996205662e-05, - "loss": 0.3396, - "step": 4979500 - }, - { - "epoch": 2.99, - "learning_rate": 3.513436419642717e-05, - "loss": 0.3439, - "step": 4980000 - }, - { - "epoch": 2.99, - "learning_rate": 3.513226423086661e-05, - "loss": 0.331, - "step": 4980500 - }, - { - "epoch": 2.99, - "learning_rate": 3.5130164265306046e-05, - "loss": 0.3443, - "step": 4981000 - }, - { - "epoch": 2.99, - "learning_rate": 3.512806429974548e-05, - "loss": 0.3364, - "step": 4981500 - }, - { - "epoch": 2.99, - "learning_rate": 3.512596433418492e-05, - "loss": 0.3305, - "step": 4982000 - }, - { - "epoch": 2.99, - "learning_rate": 3.5123864368624354e-05, - "loss": 0.3379, - "step": 4982500 - }, - { - "epoch": 2.99, - "learning_rate": 3.5121764403063794e-05, - "loss": 0.3414, - "step": 4983000 - }, - { - "epoch": 2.99, - "learning_rate": 3.511966443750323e-05, - "loss": 0.3357, - "step": 4983500 - }, - { - "epoch": 2.99, - "learning_rate": 3.511756867187378e-05, - "loss": 0.3426, - "step": 4984000 - }, - { - "epoch": 2.99, - "learning_rate": 3.5115468706313214e-05, - "loss": 0.3472, - "step": 4984500 - }, - { - "epoch": 2.99, - "learning_rate": 3.5113368740752654e-05, - "loss": 0.3371, - "step": 4985000 - }, - { - "epoch": 2.99, - "learning_rate": 3.511126877519209e-05, - "loss": 0.3346, - "step": 4985500 - }, - { - "epoch": 2.99, - "learning_rate": 3.510917300956264e-05, - "loss": 0.3419, - "step": 4986000 - }, - { - "epoch": 2.99, - "learning_rate": 3.5107073044002075e-05, - "loss": 0.3479, - "step": 4986500 - }, - { - "epoch": 2.99, - "learning_rate": 3.5104973078441515e-05, - "loss": 0.3433, - "step": 4987000 - }, - { - "epoch": 2.99, - "learning_rate": 3.510287731281207e-05, - "loss": 0.3569, - "step": 4987500 - }, - { - "epoch": 2.99, - "learning_rate": 3.51007773472515e-05, - "loss": 0.3461, - "step": 4988000 - }, - { - "epoch": 2.99, - "learning_rate": 3.509867738169094e-05, - "loss": 0.3421, - "step": 4988500 - }, - { - "epoch": 2.99, - "learning_rate": 3.5096577416130376e-05, - "loss": 0.3359, - "step": 4989000 - }, - { - "epoch": 2.99, - "learning_rate": 3.509447745056981e-05, - "loss": 0.3415, - "step": 4989500 - }, - { - "epoch": 2.99, - "learning_rate": 3.509237748500925e-05, - "loss": 0.3485, - "step": 4990000 - }, - { - "epoch": 2.99, - "learning_rate": 3.509027751944868e-05, - "loss": 0.3349, - "step": 4990500 - }, - { - "epoch": 2.99, - "learning_rate": 3.5088177553888117e-05, - "loss": 0.3505, - "step": 4991000 - }, - { - "epoch": 2.99, - "learning_rate": 3.508608178825867e-05, - "loss": 0.341, - "step": 4991500 - }, - { - "epoch": 2.99, - "learning_rate": 3.508398182269811e-05, - "loss": 0.3331, - "step": 4992000 - }, - { - "epoch": 2.99, - "learning_rate": 3.5081881857137544e-05, - "loss": 0.3475, - "step": 4992500 - }, - { - "epoch": 2.99, - "learning_rate": 3.50797860915081e-05, - "loss": 0.3364, - "step": 4993000 - }, - { - "epoch": 2.99, - "learning_rate": 3.507768612594753e-05, - "loss": 0.3375, - "step": 4993500 - }, - { - "epoch": 2.99, - "learning_rate": 3.507558616038697e-05, - "loss": 0.3433, - "step": 4994000 - }, - { - "epoch": 2.99, - "learning_rate": 3.5073490394757525e-05, - "loss": 0.3485, - "step": 4994500 - }, - { - "epoch": 2.99, - "learning_rate": 3.507139042919696e-05, - "loss": 0.3566, - "step": 4995000 - }, - { - "epoch": 3.0, - "learning_rate": 3.50692904636364e-05, - "loss": 0.3505, - "step": 4995500 - }, - { - "epoch": 3.0, - "learning_rate": 3.506719469800695e-05, - "loss": 0.336, - "step": 4996000 - }, - { - "epoch": 3.0, - "learning_rate": 3.5065094732446385e-05, - "loss": 0.3339, - "step": 4996500 - }, - { - "epoch": 3.0, - "learning_rate": 3.506299476688582e-05, - "loss": 0.3442, - "step": 4997000 - }, - { - "epoch": 3.0, - "learning_rate": 3.506089480132526e-05, - "loss": 0.3425, - "step": 4997500 - }, - { - "epoch": 3.0, - "learning_rate": 3.505879483576469e-05, - "loss": 0.3285, - "step": 4998000 - }, - { - "epoch": 3.0, - "learning_rate": 3.5056694870204126e-05, - "loss": 0.3422, - "step": 4998500 - }, - { - "epoch": 3.0, - "learning_rate": 3.5054594904643566e-05, - "loss": 0.3358, - "step": 4999000 - }, - { - "epoch": 3.0, - "learning_rate": 3.5052494939083e-05, - "loss": 0.3369, - "step": 4999500 - }, - { - "epoch": 3.0, - "learning_rate": 3.505039497352243e-05, - "loss": 0.339, - "step": 5000000 - }, - { - "epoch": 3.0, - "eval_loss": 0.3291718661785126, - "eval_runtime": 1121.0384, - "eval_samples_per_second": 469.85, - "eval_steps_per_second": 78.309, - "step": 5000000 - }, - { - "epoch": 3.0, - "learning_rate": 3.504829500796187e-05, - "loss": 0.3354, - "step": 5000500 - }, - { - "epoch": 3.0, - "learning_rate": 3.504619504240131e-05, - "loss": 0.3497, - "step": 5001000 - }, - { - "epoch": 3.0, - "learning_rate": 3.5044095076840734e-05, - "loss": 0.3456, - "step": 5001500 - }, - { - "epoch": 3.0, - "learning_rate": 3.5041995111280174e-05, - "loss": 0.3401, - "step": 5002000 - }, - { - "epoch": 3.0, - "learning_rate": 3.5039899345650734e-05, - "loss": 0.3412, - "step": 5002500 - }, - { - "epoch": 3.0, - "learning_rate": 3.503779938009017e-05, - "loss": 0.3382, - "step": 5003000 - }, - { - "epoch": 3.0, - "learning_rate": 3.50356994145296e-05, - "loss": 0.3387, - "step": 5003500 - }, - { - "epoch": 3.0, - "learning_rate": 3.5033599448969034e-05, - "loss": 0.3377, - "step": 5004000 - }, - { - "epoch": 3.0, - "learning_rate": 3.503149948340847e-05, - "loss": 0.3226, - "step": 5004500 - }, - { - "epoch": 3.0, - "learning_rate": 3.502939951784791e-05, - "loss": 0.3159, - "step": 5005000 - }, - { - "epoch": 3.0, - "learning_rate": 3.502729955228734e-05, - "loss": 0.3247, - "step": 5005500 - }, - { - "epoch": 3.0, - "learning_rate": 3.5025199586726775e-05, - "loss": 0.3298, - "step": 5006000 - }, - { - "epoch": 3.0, - "learning_rate": 3.502310382109733e-05, - "loss": 0.3218, - "step": 5006500 - }, - { - "epoch": 3.0, - "learning_rate": 3.502100385553677e-05, - "loss": 0.3299, - "step": 5007000 - }, - { - "epoch": 3.0, - "learning_rate": 3.50189038899762e-05, - "loss": 0.3296, - "step": 5007500 - }, - { - "epoch": 3.0, - "learning_rate": 3.5016803924415636e-05, - "loss": 0.3237, - "step": 5008000 - }, - { - "epoch": 3.0, - "learning_rate": 3.501470815878619e-05, - "loss": 0.3258, - "step": 5008500 - }, - { - "epoch": 3.0, - "learning_rate": 3.501260819322563e-05, - "loss": 0.3215, - "step": 5009000 - }, - { - "epoch": 3.0, - "learning_rate": 3.501050822766506e-05, - "loss": 0.3223, - "step": 5009500 - }, - { - "epoch": 3.0, - "learning_rate": 3.5008408262104497e-05, - "loss": 0.3316, - "step": 5010000 - }, - { - "epoch": 3.0, - "learning_rate": 3.500631249647506e-05, - "loss": 0.3378, - "step": 5010500 - }, - { - "epoch": 3.0, - "learning_rate": 3.500421673084562e-05, - "loss": 0.3298, - "step": 5011000 - }, - { - "epoch": 3.0, - "learning_rate": 3.500211676528505e-05, - "loss": 0.3247, - "step": 5011500 - }, - { - "epoch": 3.0, - "learning_rate": 3.5000016799724484e-05, - "loss": 0.3306, - "step": 5012000 - }, - { - "epoch": 3.01, - "learning_rate": 3.4997916834163924e-05, - "loss": 0.3243, - "step": 5012500 - }, - { - "epoch": 3.01, - "learning_rate": 3.499581686860336e-05, - "loss": 0.3294, - "step": 5013000 - }, - { - "epoch": 3.01, - "learning_rate": 3.4993716903042785e-05, - "loss": 0.3299, - "step": 5013500 - }, - { - "epoch": 3.01, - "learning_rate": 3.4991616937482225e-05, - "loss": 0.3248, - "step": 5014000 - }, - { - "epoch": 3.01, - "learning_rate": 3.4989521171852785e-05, - "loss": 0.3295, - "step": 5014500 - }, - { - "epoch": 3.01, - "learning_rate": 3.498742120629222e-05, - "loss": 0.3232, - "step": 5015000 - }, - { - "epoch": 3.01, - "learning_rate": 3.498532124073165e-05, - "loss": 0.3285, - "step": 5015500 - }, - { - "epoch": 3.01, - "learning_rate": 3.4983221275171086e-05, - "loss": 0.3353, - "step": 5016000 - }, - { - "epoch": 3.01, - "learning_rate": 3.498112130961052e-05, - "loss": 0.3242, - "step": 5016500 - }, - { - "epoch": 3.01, - "learning_rate": 3.497902134404995e-05, - "loss": 0.3198, - "step": 5017000 - }, - { - "epoch": 3.01, - "learning_rate": 3.497692137848939e-05, - "loss": 0.3278, - "step": 5017500 - }, - { - "epoch": 3.01, - "learning_rate": 3.4974821412928826e-05, - "loss": 0.335, - "step": 5018000 - }, - { - "epoch": 3.01, - "learning_rate": 3.4972729847230507e-05, - "loss": 0.3249, - "step": 5018500 - }, - { - "epoch": 3.01, - "learning_rate": 3.497062988166994e-05, - "loss": 0.3348, - "step": 5019000 - }, - { - "epoch": 3.01, - "learning_rate": 3.496852991610938e-05, - "loss": 0.3334, - "step": 5019500 - }, - { - "epoch": 3.01, - "learning_rate": 3.4966429950548814e-05, - "loss": 0.3286, - "step": 5020000 - }, - { - "epoch": 3.01, - "learning_rate": 3.496432998498824e-05, - "loss": 0.3282, - "step": 5020500 - }, - { - "epoch": 3.01, - "learning_rate": 3.496223001942768e-05, - "loss": 0.3286, - "step": 5021000 - }, - { - "epoch": 3.01, - "learning_rate": 3.4960130053867114e-05, - "loss": 0.3361, - "step": 5021500 - }, - { - "epoch": 3.01, - "learning_rate": 3.495803008830655e-05, - "loss": 0.3342, - "step": 5022000 - }, - { - "epoch": 3.01, - "learning_rate": 3.495593852260823e-05, - "loss": 0.3323, - "step": 5022500 - }, - { - "epoch": 3.01, - "learning_rate": 3.495383855704766e-05, - "loss": 0.3262, - "step": 5023000 - }, - { - "epoch": 3.01, - "learning_rate": 3.49517385914871e-05, - "loss": 0.3307, - "step": 5023500 - }, - { - "epoch": 3.01, - "learning_rate": 3.4949638625926535e-05, - "loss": 0.3244, - "step": 5024000 - }, - { - "epoch": 3.01, - "learning_rate": 3.494753866036597e-05, - "loss": 0.3311, - "step": 5024500 - }, - { - "epoch": 3.01, - "learning_rate": 3.494543869480541e-05, - "loss": 0.3233, - "step": 5025000 - }, - { - "epoch": 3.01, - "learning_rate": 3.4943338729244836e-05, - "loss": 0.3299, - "step": 5025500 - }, - { - "epoch": 3.01, - "learning_rate": 3.4941238763684276e-05, - "loss": 0.3227, - "step": 5026000 - }, - { - "epoch": 3.01, - "learning_rate": 3.493913879812371e-05, - "loss": 0.3236, - "step": 5026500 - }, - { - "epoch": 3.01, - "learning_rate": 3.493703883256314e-05, - "loss": 0.3307, - "step": 5027000 - }, - { - "epoch": 3.01, - "learning_rate": 3.493493886700258e-05, - "loss": 0.3275, - "step": 5027500 - }, - { - "epoch": 3.01, - "learning_rate": 3.4932843101373137e-05, - "loss": 0.3258, - "step": 5028000 - }, - { - "epoch": 3.01, - "learning_rate": 3.493074313581257e-05, - "loss": 0.3259, - "step": 5028500 - }, - { - "epoch": 3.02, - "learning_rate": 3.4928643170252003e-05, - "loss": 0.3347, - "step": 5029000 - }, - { - "epoch": 3.02, - "learning_rate": 3.4926543204691444e-05, - "loss": 0.3265, - "step": 5029500 - }, - { - "epoch": 3.02, - "learning_rate": 3.492444323913088e-05, - "loss": 0.3352, - "step": 5030000 - }, - { - "epoch": 3.02, - "learning_rate": 3.492234327357031e-05, - "loss": 0.3329, - "step": 5030500 - }, - { - "epoch": 3.02, - "learning_rate": 3.4920247507940864e-05, - "loss": 0.3327, - "step": 5031000 - }, - { - "epoch": 3.02, - "learning_rate": 3.4918147542380304e-05, - "loss": 0.3255, - "step": 5031500 - }, - { - "epoch": 3.02, - "learning_rate": 3.491604757681974e-05, - "loss": 0.3231, - "step": 5032000 - }, - { - "epoch": 3.02, - "learning_rate": 3.491394761125917e-05, - "loss": 0.3218, - "step": 5032500 - }, - { - "epoch": 3.02, - "learning_rate": 3.491184764569861e-05, - "loss": 0.3368, - "step": 5033000 - }, - { - "epoch": 3.02, - "learning_rate": 3.4909747680138045e-05, - "loss": 0.3366, - "step": 5033500 - }, - { - "epoch": 3.02, - "learning_rate": 3.490764771457748e-05, - "loss": 0.3219, - "step": 5034000 - }, - { - "epoch": 3.02, - "learning_rate": 3.490554774901692e-05, - "loss": 0.3354, - "step": 5034500 - }, - { - "epoch": 3.02, - "learning_rate": 3.490345198338747e-05, - "loss": 0.3237, - "step": 5035000 - }, - { - "epoch": 3.02, - "learning_rate": 3.4901352017826906e-05, - "loss": 0.3323, - "step": 5035500 - }, - { - "epoch": 3.02, - "learning_rate": 3.4899252052266346e-05, - "loss": 0.3282, - "step": 5036000 - }, - { - "epoch": 3.02, - "learning_rate": 3.489715208670578e-05, - "loss": 0.3363, - "step": 5036500 - }, - { - "epoch": 3.02, - "learning_rate": 3.489506052100746e-05, - "loss": 0.3293, - "step": 5037000 - }, - { - "epoch": 3.02, - "learning_rate": 3.4892960555446887e-05, - "loss": 0.3259, - "step": 5037500 - }, - { - "epoch": 3.02, - "learning_rate": 3.489086058988632e-05, - "loss": 0.3309, - "step": 5038000 - }, - { - "epoch": 3.02, - "learning_rate": 3.488876062432576e-05, - "loss": 0.328, - "step": 5038500 - }, - { - "epoch": 3.02, - "learning_rate": 3.4886660658765194e-05, - "loss": 0.3307, - "step": 5039000 - }, - { - "epoch": 3.02, - "learning_rate": 3.488456069320463e-05, - "loss": 0.3284, - "step": 5039500 - }, - { - "epoch": 3.02, - "learning_rate": 3.488246072764407e-05, - "loss": 0.3264, - "step": 5040000 - }, - { - "epoch": 3.02, - "learning_rate": 3.488036496201462e-05, - "loss": 0.3313, - "step": 5040500 - }, - { - "epoch": 3.02, - "learning_rate": 3.4878264996454054e-05, - "loss": 0.3401, - "step": 5041000 - }, - { - "epoch": 3.02, - "learning_rate": 3.4876165030893495e-05, - "loss": 0.3253, - "step": 5041500 - }, - { - "epoch": 3.02, - "learning_rate": 3.487406506533293e-05, - "loss": 0.3268, - "step": 5042000 - }, - { - "epoch": 3.02, - "learning_rate": 3.487196509977236e-05, - "loss": 0.337, - "step": 5042500 - }, - { - "epoch": 3.02, - "learning_rate": 3.48698651342118e-05, - "loss": 0.3267, - "step": 5043000 - }, - { - "epoch": 3.02, - "learning_rate": 3.4867769368582355e-05, - "loss": 0.334, - "step": 5043500 - }, - { - "epoch": 3.02, - "learning_rate": 3.486566940302179e-05, - "loss": 0.3344, - "step": 5044000 - }, - { - "epoch": 3.02, - "learning_rate": 3.486356943746122e-05, - "loss": 0.3336, - "step": 5044500 - }, - { - "epoch": 3.02, - "learning_rate": 3.486146947190066e-05, - "loss": 0.3268, - "step": 5045000 - }, - { - "epoch": 3.02, - "learning_rate": 3.4859369506340096e-05, - "loss": 0.332, - "step": 5045500 - }, - { - "epoch": 3.03, - "learning_rate": 3.485726954077953e-05, - "loss": 0.3367, - "step": 5046000 - }, - { - "epoch": 3.03, - "learning_rate": 3.485517377515008e-05, - "loss": 0.3396, - "step": 5046500 - }, - { - "epoch": 3.03, - "learning_rate": 3.485307380958952e-05, - "loss": 0.3163, - "step": 5047000 - }, - { - "epoch": 3.03, - "learning_rate": 3.485097384402896e-05, - "loss": 0.3205, - "step": 5047500 - }, - { - "epoch": 3.03, - "learning_rate": 3.48488738784684e-05, - "loss": 0.3348, - "step": 5048000 - }, - { - "epoch": 3.03, - "learning_rate": 3.484677391290783e-05, - "loss": 0.3291, - "step": 5048500 - }, - { - "epoch": 3.03, - "learning_rate": 3.4844678147278384e-05, - "loss": 0.3358, - "step": 5049000 - }, - { - "epoch": 3.03, - "learning_rate": 3.484257818171782e-05, - "loss": 0.3279, - "step": 5049500 - }, - { - "epoch": 3.03, - "learning_rate": 3.484047821615726e-05, - "loss": 0.33, - "step": 5050000 - }, - { - "epoch": 3.03, - "learning_rate": 3.483837825059669e-05, - "loss": 0.3322, - "step": 5050500 - }, - { - "epoch": 3.03, - "learning_rate": 3.4836278285036125e-05, - "loss": 0.3257, - "step": 5051000 - }, - { - "epoch": 3.03, - "learning_rate": 3.4834178319475565e-05, - "loss": 0.3285, - "step": 5051500 - }, - { - "epoch": 3.03, - "learning_rate": 3.4832078353915e-05, - "loss": 0.3279, - "step": 5052000 - }, - { - "epoch": 3.03, - "learning_rate": 3.4829978388354425e-05, - "loss": 0.3337, - "step": 5052500 - }, - { - "epoch": 3.03, - "learning_rate": 3.4827882622724985e-05, - "loss": 0.3342, - "step": 5053000 - }, - { - "epoch": 3.03, - "learning_rate": 3.4825786857095546e-05, - "loss": 0.3329, - "step": 5053500 - }, - { - "epoch": 3.03, - "learning_rate": 3.48236910914661e-05, - "loss": 0.3347, - "step": 5054000 - }, - { - "epoch": 3.03, - "learning_rate": 3.482159112590553e-05, - "loss": 0.3471, - "step": 5054500 - }, - { - "epoch": 3.03, - "learning_rate": 3.4819491160344966e-05, - "loss": 0.3342, - "step": 5055000 - }, - { - "epoch": 3.03, - "learning_rate": 3.4817391194784406e-05, - "loss": 0.3319, - "step": 5055500 - }, - { - "epoch": 3.03, - "learning_rate": 3.481529122922384e-05, - "loss": 0.3309, - "step": 5056000 - }, - { - "epoch": 3.03, - "learning_rate": 3.4813195463594394e-05, - "loss": 0.3352, - "step": 5056500 - }, - { - "epoch": 3.03, - "learning_rate": 3.481109549803383e-05, - "loss": 0.3312, - "step": 5057000 - }, - { - "epoch": 3.03, - "learning_rate": 3.480899553247327e-05, - "loss": 0.3415, - "step": 5057500 - }, - { - "epoch": 3.03, - "learning_rate": 3.48068955669127e-05, - "loss": 0.3257, - "step": 5058000 - }, - { - "epoch": 3.03, - "learning_rate": 3.4804795601352134e-05, - "loss": 0.3293, - "step": 5058500 - }, - { - "epoch": 3.03, - "learning_rate": 3.4802695635791574e-05, - "loss": 0.3276, - "step": 5059000 - }, - { - "epoch": 3.03, - "learning_rate": 3.480059567023101e-05, - "loss": 0.3295, - "step": 5059500 - }, - { - "epoch": 3.03, - "learning_rate": 3.479849570467044e-05, - "loss": 0.3289, - "step": 5060000 - }, - { - "epoch": 3.03, - "learning_rate": 3.4796399939041e-05, - "loss": 0.347, - "step": 5060500 - }, - { - "epoch": 3.03, - "learning_rate": 3.4794304173411555e-05, - "loss": 0.3209, - "step": 5061000 - }, - { - "epoch": 3.03, - "learning_rate": 3.479220420785099e-05, - "loss": 0.333, - "step": 5061500 - }, - { - "epoch": 3.03, - "learning_rate": 3.479010424229042e-05, - "loss": 0.3282, - "step": 5062000 - }, - { - "epoch": 3.04, - "learning_rate": 3.478800427672986e-05, - "loss": 0.3267, - "step": 5062500 - }, - { - "epoch": 3.04, - "learning_rate": 3.4785904311169296e-05, - "loss": 0.3295, - "step": 5063000 - }, - { - "epoch": 3.04, - "learning_rate": 3.478380434560873e-05, - "loss": 0.3319, - "step": 5063500 - }, - { - "epoch": 3.04, - "learning_rate": 3.478170438004817e-05, - "loss": 0.3282, - "step": 5064000 - }, - { - "epoch": 3.04, - "learning_rate": 3.47796044144876e-05, - "loss": 0.3236, - "step": 5064500 - }, - { - "epoch": 3.04, - "learning_rate": 3.4777504448927036e-05, - "loss": 0.3352, - "step": 5065000 - }, - { - "epoch": 3.04, - "learning_rate": 3.477540448336648e-05, - "loss": 0.3188, - "step": 5065500 - }, - { - "epoch": 3.04, - "learning_rate": 3.477330451780591e-05, - "loss": 0.3301, - "step": 5066000 - }, - { - "epoch": 3.04, - "learning_rate": 3.477120455224534e-05, - "loss": 0.3349, - "step": 5066500 - }, - { - "epoch": 3.04, - "learning_rate": 3.47691087866159e-05, - "loss": 0.328, - "step": 5067000 - }, - { - "epoch": 3.04, - "learning_rate": 3.476700882105534e-05, - "loss": 0.3226, - "step": 5067500 - }, - { - "epoch": 3.04, - "learning_rate": 3.476490885549477e-05, - "loss": 0.3353, - "step": 5068000 - }, - { - "epoch": 3.04, - "learning_rate": 3.4762808889934204e-05, - "loss": 0.3346, - "step": 5068500 - }, - { - "epoch": 3.04, - "learning_rate": 3.4760713124304765e-05, - "loss": 0.337, - "step": 5069000 - }, - { - "epoch": 3.04, - "learning_rate": 3.47586131587442e-05, - "loss": 0.3397, - "step": 5069500 - }, - { - "epoch": 3.04, - "learning_rate": 3.475651739311475e-05, - "loss": 0.3332, - "step": 5070000 - }, - { - "epoch": 3.04, - "learning_rate": 3.4754417427554185e-05, - "loss": 0.3317, - "step": 5070500 - }, - { - "epoch": 3.04, - "learning_rate": 3.4752317461993625e-05, - "loss": 0.3271, - "step": 5071000 - }, - { - "epoch": 3.04, - "learning_rate": 3.475021749643306e-05, - "loss": 0.3366, - "step": 5071500 - }, - { - "epoch": 3.04, - "learning_rate": 3.474811753087249e-05, - "loss": 0.3365, - "step": 5072000 - }, - { - "epoch": 3.04, - "learning_rate": 3.474601756531193e-05, - "loss": 0.3325, - "step": 5072500 - }, - { - "epoch": 3.04, - "learning_rate": 3.4743917599751366e-05, - "loss": 0.3296, - "step": 5073000 - }, - { - "epoch": 3.04, - "learning_rate": 3.47418176341908e-05, - "loss": 0.3237, - "step": 5073500 - }, - { - "epoch": 3.04, - "learning_rate": 3.473972186856135e-05, - "loss": 0.3341, - "step": 5074000 - }, - { - "epoch": 3.04, - "learning_rate": 3.473762190300079e-05, - "loss": 0.3386, - "step": 5074500 - }, - { - "epoch": 3.04, - "learning_rate": 3.473552193744023e-05, - "loss": 0.3288, - "step": 5075000 - }, - { - "epoch": 3.04, - "learning_rate": 3.473342197187966e-05, - "loss": 0.3325, - "step": 5075500 - }, - { - "epoch": 3.04, - "learning_rate": 3.473132620625022e-05, - "loss": 0.343, - "step": 5076000 - }, - { - "epoch": 3.04, - "learning_rate": 3.4729230440620774e-05, - "loss": 0.3255, - "step": 5076500 - }, - { - "epoch": 3.04, - "learning_rate": 3.472713047506021e-05, - "loss": 0.3264, - "step": 5077000 - }, - { - "epoch": 3.04, - "learning_rate": 3.472503050949964e-05, - "loss": 0.3223, - "step": 5077500 - }, - { - "epoch": 3.04, - "learning_rate": 3.472293054393908e-05, - "loss": 0.3276, - "step": 5078000 - }, - { - "epoch": 3.04, - "learning_rate": 3.4720830578378515e-05, - "loss": 0.3375, - "step": 5078500 - }, - { - "epoch": 3.05, - "learning_rate": 3.471873061281795e-05, - "loss": 0.3292, - "step": 5079000 - }, - { - "epoch": 3.05, - "learning_rate": 3.471663064725739e-05, - "loss": 0.328, - "step": 5079500 - }, - { - "epoch": 3.05, - "learning_rate": 3.471453488162794e-05, - "loss": 0.3288, - "step": 5080000 - }, - { - "epoch": 3.05, - "learning_rate": 3.4712434916067375e-05, - "loss": 0.3355, - "step": 5080500 - }, - { - "epoch": 3.05, - "learning_rate": 3.471033495050681e-05, - "loss": 0.3262, - "step": 5081000 - }, - { - "epoch": 3.05, - "learning_rate": 3.470823498494625e-05, - "loss": 0.3273, - "step": 5081500 - }, - { - "epoch": 3.05, - "learning_rate": 3.470613501938568e-05, - "loss": 0.3374, - "step": 5082000 - }, - { - "epoch": 3.05, - "learning_rate": 3.4704035053825116e-05, - "loss": 0.3287, - "step": 5082500 - }, - { - "epoch": 3.05, - "learning_rate": 3.4701939288195676e-05, - "loss": 0.325, - "step": 5083000 - }, - { - "epoch": 3.05, - "learning_rate": 3.469983932263511e-05, - "loss": 0.3318, - "step": 5083500 - }, - { - "epoch": 3.05, - "learning_rate": 3.469773935707454e-05, - "loss": 0.3299, - "step": 5084000 - }, - { - "epoch": 3.05, - "learning_rate": 3.4695639391513984e-05, - "loss": 0.3329, - "step": 5084500 - }, - { - "epoch": 3.05, - "learning_rate": 3.469353942595342e-05, - "loss": 0.3305, - "step": 5085000 - }, - { - "epoch": 3.05, - "learning_rate": 3.469143946039285e-05, - "loss": 0.3323, - "step": 5085500 - }, - { - "epoch": 3.05, - "learning_rate": 3.4689343694763404e-05, - "loss": 0.3317, - "step": 5086000 - }, - { - "epoch": 3.05, - "learning_rate": 3.4687243729202844e-05, - "loss": 0.3386, - "step": 5086500 - }, - { - "epoch": 3.05, - "learning_rate": 3.468514376364228e-05, - "loss": 0.326, - "step": 5087000 - }, - { - "epoch": 3.05, - "learning_rate": 3.468304379808171e-05, - "loss": 0.3346, - "step": 5087500 - }, - { - "epoch": 3.05, - "learning_rate": 3.4680943832521145e-05, - "loss": 0.3227, - "step": 5088000 - }, - { - "epoch": 3.05, - "learning_rate": 3.4678848066891705e-05, - "loss": 0.333, - "step": 5088500 - }, - { - "epoch": 3.05, - "learning_rate": 3.467674810133114e-05, - "loss": 0.3299, - "step": 5089000 - }, - { - "epoch": 3.05, - "learning_rate": 3.467464813577057e-05, - "loss": 0.3402, - "step": 5089500 - }, - { - "epoch": 3.05, - "learning_rate": 3.467254817021001e-05, - "loss": 0.329, - "step": 5090000 - }, - { - "epoch": 3.05, - "learning_rate": 3.467044820464944e-05, - "loss": 0.3281, - "step": 5090500 - }, - { - "epoch": 3.05, - "learning_rate": 3.466834823908888e-05, - "loss": 0.3306, - "step": 5091000 - }, - { - "epoch": 3.05, - "learning_rate": 3.466624827352831e-05, - "loss": 0.3321, - "step": 5091500 - }, - { - "epoch": 3.05, - "learning_rate": 3.4664148307967746e-05, - "loss": 0.3387, - "step": 5092000 - }, - { - "epoch": 3.05, - "learning_rate": 3.4662052542338306e-05, - "loss": 0.3323, - "step": 5092500 - }, - { - "epoch": 3.05, - "learning_rate": 3.465995257677774e-05, - "loss": 0.3401, - "step": 5093000 - }, - { - "epoch": 3.05, - "learning_rate": 3.46578568111483e-05, - "loss": 0.3251, - "step": 5093500 - }, - { - "epoch": 3.05, - "learning_rate": 3.4655756845587734e-05, - "loss": 0.3334, - "step": 5094000 - }, - { - "epoch": 3.05, - "learning_rate": 3.465365688002717e-05, - "loss": 0.3291, - "step": 5094500 - }, - { - "epoch": 3.05, - "learning_rate": 3.465155691446661e-05, - "loss": 0.3347, - "step": 5095000 - }, - { - "epoch": 3.05, - "learning_rate": 3.464946114883716e-05, - "loss": 0.3377, - "step": 5095500 - }, - { - "epoch": 3.06, - "learning_rate": 3.4647361183276594e-05, - "loss": 0.3306, - "step": 5096000 - }, - { - "epoch": 3.06, - "learning_rate": 3.464526121771603e-05, - "loss": 0.3291, - "step": 5096500 - }, - { - "epoch": 3.06, - "learning_rate": 3.464316125215547e-05, - "loss": 0.3283, - "step": 5097000 - }, - { - "epoch": 3.06, - "learning_rate": 3.4641061286594895e-05, - "loss": 0.333, - "step": 5097500 - }, - { - "epoch": 3.06, - "learning_rate": 3.4638961321034335e-05, - "loss": 0.3229, - "step": 5098000 - }, - { - "epoch": 3.06, - "learning_rate": 3.463686135547377e-05, - "loss": 0.3309, - "step": 5098500 - }, - { - "epoch": 3.06, - "learning_rate": 3.46347613899132e-05, - "loss": 0.3356, - "step": 5099000 - }, - { - "epoch": 3.06, - "learning_rate": 3.463266562428376e-05, - "loss": 0.3252, - "step": 5099500 - }, - { - "epoch": 3.06, - "learning_rate": 3.4630565658723196e-05, - "loss": 0.3226, - "step": 5100000 - }, - { - "epoch": 3.06, - "eval_loss": 0.3272015154361725, - "eval_runtime": 1122.7478, - "eval_samples_per_second": 469.135, - "eval_steps_per_second": 78.189, - "step": 5100000 - }, - { - "epoch": 3.06, - "learning_rate": 3.462846569316263e-05, - "loss": 0.328, - "step": 5100500 - }, - { - "epoch": 3.06, - "learning_rate": 3.462636572760206e-05, - "loss": 0.3282, - "step": 5101000 - }, - { - "epoch": 3.06, - "learning_rate": 3.462426996197262e-05, - "loss": 0.3344, - "step": 5101500 - }, - { - "epoch": 3.06, - "learning_rate": 3.462216999641206e-05, - "loss": 0.3221, - "step": 5102000 - }, - { - "epoch": 3.06, - "learning_rate": 3.462007003085149e-05, - "loss": 0.3368, - "step": 5102500 - }, - { - "epoch": 3.06, - "learning_rate": 3.461797426522205e-05, - "loss": 0.3321, - "step": 5103000 - }, - { - "epoch": 3.06, - "learning_rate": 3.4615874299661484e-05, - "loss": 0.3355, - "step": 5103500 - }, - { - "epoch": 3.06, - "learning_rate": 3.4613774334100924e-05, - "loss": 0.3334, - "step": 5104000 - }, - { - "epoch": 3.06, - "learning_rate": 3.461167436854036e-05, - "loss": 0.3291, - "step": 5104500 - }, - { - "epoch": 3.06, - "learning_rate": 3.460957860291091e-05, - "loss": 0.3293, - "step": 5105000 - }, - { - "epoch": 3.06, - "learning_rate": 3.460747863735035e-05, - "loss": 0.3368, - "step": 5105500 - }, - { - "epoch": 3.06, - "learning_rate": 3.4605378671789785e-05, - "loss": 0.3243, - "step": 5106000 - }, - { - "epoch": 3.06, - "learning_rate": 3.460327870622922e-05, - "loss": 0.3313, - "step": 5106500 - }, - { - "epoch": 3.06, - "learning_rate": 3.460118294059977e-05, - "loss": 0.3349, - "step": 5107000 - }, - { - "epoch": 3.06, - "learning_rate": 3.459908297503921e-05, - "loss": 0.3352, - "step": 5107500 - }, - { - "epoch": 3.06, - "learning_rate": 3.4596983009478645e-05, - "loss": 0.3352, - "step": 5108000 - }, - { - "epoch": 3.06, - "learning_rate": 3.459488304391808e-05, - "loss": 0.3287, - "step": 5108500 - }, - { - "epoch": 3.06, - "learning_rate": 3.459278307835752e-05, - "loss": 0.3265, - "step": 5109000 - }, - { - "epoch": 3.06, - "learning_rate": 3.4590683112796946e-05, - "loss": 0.3362, - "step": 5109500 - }, - { - "epoch": 3.06, - "learning_rate": 3.458858314723638e-05, - "loss": 0.3348, - "step": 5110000 - }, - { - "epoch": 3.06, - "learning_rate": 3.458648318167582e-05, - "loss": 0.3276, - "step": 5110500 - }, - { - "epoch": 3.06, - "learning_rate": 3.458438741604638e-05, - "loss": 0.3243, - "step": 5111000 - }, - { - "epoch": 3.06, - "learning_rate": 3.458228745048581e-05, - "loss": 0.3301, - "step": 5111500 - }, - { - "epoch": 3.06, - "learning_rate": 3.458018748492525e-05, - "loss": 0.3311, - "step": 5112000 - }, - { - "epoch": 3.07, - "learning_rate": 3.457808751936468e-05, - "loss": 0.3288, - "step": 5112500 - }, - { - "epoch": 3.07, - "learning_rate": 3.4575987553804114e-05, - "loss": 0.3293, - "step": 5113000 - }, - { - "epoch": 3.07, - "learning_rate": 3.4573895988105794e-05, - "loss": 0.34, - "step": 5113500 - }, - { - "epoch": 3.07, - "learning_rate": 3.457179602254523e-05, - "loss": 0.3386, - "step": 5114000 - }, - { - "epoch": 3.07, - "learning_rate": 3.456969605698467e-05, - "loss": 0.328, - "step": 5114500 - }, - { - "epoch": 3.07, - "learning_rate": 3.45675960914241e-05, - "loss": 0.3253, - "step": 5115000 - }, - { - "epoch": 3.07, - "learning_rate": 3.4565496125863535e-05, - "loss": 0.3385, - "step": 5115500 - }, - { - "epoch": 3.07, - "learning_rate": 3.4563396160302975e-05, - "loss": 0.3275, - "step": 5116000 - }, - { - "epoch": 3.07, - "learning_rate": 3.456129619474241e-05, - "loss": 0.325, - "step": 5116500 - }, - { - "epoch": 3.07, - "learning_rate": 3.4559196229181835e-05, - "loss": 0.3292, - "step": 5117000 - }, - { - "epoch": 3.07, - "learning_rate": 3.4557096263621275e-05, - "loss": 0.3321, - "step": 5117500 - }, - { - "epoch": 3.07, - "learning_rate": 3.4555000497991836e-05, - "loss": 0.3309, - "step": 5118000 - }, - { - "epoch": 3.07, - "learning_rate": 3.455290053243127e-05, - "loss": 0.3321, - "step": 5118500 - }, - { - "epoch": 3.07, - "learning_rate": 3.45508005668707e-05, - "loss": 0.3262, - "step": 5119000 - }, - { - "epoch": 3.07, - "learning_rate": 3.4548700601310136e-05, - "loss": 0.3309, - "step": 5119500 - }, - { - "epoch": 3.07, - "learning_rate": 3.454660063574957e-05, - "loss": 0.3406, - "step": 5120000 - }, - { - "epoch": 3.07, - "learning_rate": 3.454450067018901e-05, - "loss": 0.3359, - "step": 5120500 - }, - { - "epoch": 3.07, - "learning_rate": 3.454240070462844e-05, - "loss": 0.3361, - "step": 5121000 - }, - { - "epoch": 3.07, - "learning_rate": 3.454030073906788e-05, - "loss": 0.337, - "step": 5121500 - }, - { - "epoch": 3.07, - "learning_rate": 3.453820497343843e-05, - "loss": 0.338, - "step": 5122000 - }, - { - "epoch": 3.07, - "learning_rate": 3.453610920780899e-05, - "loss": 0.3268, - "step": 5122500 - }, - { - "epoch": 3.07, - "learning_rate": 3.453400924224843e-05, - "loss": 0.33, - "step": 5123000 - }, - { - "epoch": 3.07, - "learning_rate": 3.4531909276687864e-05, - "loss": 0.3243, - "step": 5123500 - }, - { - "epoch": 3.07, - "learning_rate": 3.452980931112729e-05, - "loss": 0.3392, - "step": 5124000 - }, - { - "epoch": 3.07, - "learning_rate": 3.452770934556673e-05, - "loss": 0.3332, - "step": 5124500 - }, - { - "epoch": 3.07, - "learning_rate": 3.4525609380006165e-05, - "loss": 0.324, - "step": 5125000 - }, - { - "epoch": 3.07, - "learning_rate": 3.4523509414445605e-05, - "loss": 0.3344, - "step": 5125500 - }, - { - "epoch": 3.07, - "learning_rate": 3.452140944888504e-05, - "loss": 0.3233, - "step": 5126000 - }, - { - "epoch": 3.07, - "learning_rate": 3.451930948332447e-05, - "loss": 0.3272, - "step": 5126500 - }, - { - "epoch": 3.07, - "learning_rate": 3.4517213717695025e-05, - "loss": 0.3235, - "step": 5127000 - }, - { - "epoch": 3.07, - "learning_rate": 3.4515113752134466e-05, - "loss": 0.3272, - "step": 5127500 - }, - { - "epoch": 3.07, - "learning_rate": 3.45130137865739e-05, - "loss": 0.3331, - "step": 5128000 - }, - { - "epoch": 3.07, - "learning_rate": 3.451091802094445e-05, - "loss": 0.3375, - "step": 5128500 - }, - { - "epoch": 3.08, - "learning_rate": 3.4508818055383886e-05, - "loss": 0.3377, - "step": 5129000 - }, - { - "epoch": 3.08, - "learning_rate": 3.4506718089823326e-05, - "loss": 0.3336, - "step": 5129500 - }, - { - "epoch": 3.08, - "learning_rate": 3.450461812426276e-05, - "loss": 0.3319, - "step": 5130000 - }, - { - "epoch": 3.08, - "learning_rate": 3.450251815870219e-05, - "loss": 0.3273, - "step": 5130500 - }, - { - "epoch": 3.08, - "learning_rate": 3.4500418193141634e-05, - "loss": 0.3291, - "step": 5131000 - }, - { - "epoch": 3.08, - "learning_rate": 3.449831822758107e-05, - "loss": 0.327, - "step": 5131500 - }, - { - "epoch": 3.08, - "learning_rate": 3.44962182620205e-05, - "loss": 0.3255, - "step": 5132000 - }, - { - "epoch": 3.08, - "learning_rate": 3.449411829645994e-05, - "loss": 0.3233, - "step": 5132500 - }, - { - "epoch": 3.08, - "learning_rate": 3.4492018330899374e-05, - "loss": 0.3372, - "step": 5133000 - }, - { - "epoch": 3.08, - "learning_rate": 3.448991836533881e-05, - "loss": 0.3184, - "step": 5133500 - }, - { - "epoch": 3.08, - "learning_rate": 3.448782259970937e-05, - "loss": 0.3377, - "step": 5134000 - }, - { - "epoch": 3.08, - "learning_rate": 3.44857226341488e-05, - "loss": 0.3324, - "step": 5134500 - }, - { - "epoch": 3.08, - "learning_rate": 3.4483626868519355e-05, - "loss": 0.3338, - "step": 5135000 - }, - { - "epoch": 3.08, - "learning_rate": 3.448152690295879e-05, - "loss": 0.3267, - "step": 5135500 - }, - { - "epoch": 3.08, - "learning_rate": 3.447942693739823e-05, - "loss": 0.3258, - "step": 5136000 - }, - { - "epoch": 3.08, - "learning_rate": 3.447732697183766e-05, - "loss": 0.3332, - "step": 5136500 - }, - { - "epoch": 3.08, - "learning_rate": 3.4475227006277096e-05, - "loss": 0.3293, - "step": 5137000 - }, - { - "epoch": 3.08, - "learning_rate": 3.447313124064765e-05, - "loss": 0.3352, - "step": 5137500 - }, - { - "epoch": 3.08, - "learning_rate": 3.447103127508709e-05, - "loss": 0.3252, - "step": 5138000 - }, - { - "epoch": 3.08, - "learning_rate": 3.446893130952652e-05, - "loss": 0.3246, - "step": 5138500 - }, - { - "epoch": 3.08, - "learning_rate": 3.4466831343965956e-05, - "loss": 0.332, - "step": 5139000 - }, - { - "epoch": 3.08, - "learning_rate": 3.4464731378405397e-05, - "loss": 0.332, - "step": 5139500 - }, - { - "epoch": 3.08, - "learning_rate": 3.446263141284483e-05, - "loss": 0.3286, - "step": 5140000 - }, - { - "epoch": 3.08, - "learning_rate": 3.4460531447284263e-05, - "loss": 0.3222, - "step": 5140500 - }, - { - "epoch": 3.08, - "learning_rate": 3.4458431481723704e-05, - "loss": 0.3213, - "step": 5141000 - }, - { - "epoch": 3.08, - "learning_rate": 3.445633151616313e-05, - "loss": 0.3344, - "step": 5141500 - }, - { - "epoch": 3.08, - "learning_rate": 3.445423575053369e-05, - "loss": 0.34, - "step": 5142000 - }, - { - "epoch": 3.08, - "learning_rate": 3.445213578497313e-05, - "loss": 0.3264, - "step": 5142500 - }, - { - "epoch": 3.08, - "learning_rate": 3.4450035819412564e-05, - "loss": 0.3349, - "step": 5143000 - }, - { - "epoch": 3.08, - "learning_rate": 3.444793585385199e-05, - "loss": 0.3281, - "step": 5143500 - }, - { - "epoch": 3.08, - "learning_rate": 3.444583588829143e-05, - "loss": 0.3402, - "step": 5144000 - }, - { - "epoch": 3.08, - "learning_rate": 3.444374012266199e-05, - "loss": 0.35, - "step": 5144500 - }, - { - "epoch": 3.08, - "learning_rate": 3.4441640157101425e-05, - "loss": 0.3368, - "step": 5145000 - }, - { - "epoch": 3.08, - "learning_rate": 3.443954019154086e-05, - "loss": 0.3247, - "step": 5145500 - }, - { - "epoch": 3.09, - "learning_rate": 3.443744022598029e-05, - "loss": 0.3224, - "step": 5146000 - }, - { - "epoch": 3.09, - "learning_rate": 3.443534446035085e-05, - "loss": 0.3484, - "step": 5146500 - }, - { - "epoch": 3.09, - "learning_rate": 3.4433244494790286e-05, - "loss": 0.3236, - "step": 5147000 - }, - { - "epoch": 3.09, - "learning_rate": 3.443114452922972e-05, - "loss": 0.3316, - "step": 5147500 - }, - { - "epoch": 3.09, - "learning_rate": 3.442904456366916e-05, - "loss": 0.3351, - "step": 5148000 - }, - { - "epoch": 3.09, - "learning_rate": 3.442694879803971e-05, - "loss": 0.3204, - "step": 5148500 - }, - { - "epoch": 3.09, - "learning_rate": 3.442484883247915e-05, - "loss": 0.3286, - "step": 5149000 - }, - { - "epoch": 3.09, - "learning_rate": 3.442274886691859e-05, - "loss": 0.3335, - "step": 5149500 - }, - { - "epoch": 3.09, - "learning_rate": 3.442064890135802e-05, - "loss": 0.332, - "step": 5150000 - }, - { - "epoch": 3.09, - "learning_rate": 3.4418548935797454e-05, - "loss": 0.329, - "step": 5150500 - }, - { - "epoch": 3.09, - "learning_rate": 3.441644897023689e-05, - "loss": 0.3274, - "step": 5151000 - }, - { - "epoch": 3.09, - "learning_rate": 3.441434900467632e-05, - "loss": 0.3263, - "step": 5151500 - }, - { - "epoch": 3.09, - "learning_rate": 3.4412249039115754e-05, - "loss": 0.3203, - "step": 5152000 - }, - { - "epoch": 3.09, - "learning_rate": 3.4410149073555194e-05, - "loss": 0.3248, - "step": 5152500 - }, - { - "epoch": 3.09, - "learning_rate": 3.440804910799463e-05, - "loss": 0.334, - "step": 5153000 - }, - { - "epoch": 3.09, - "learning_rate": 3.440595334236518e-05, - "loss": 0.3309, - "step": 5153500 - }, - { - "epoch": 3.09, - "learning_rate": 3.4403853376804615e-05, - "loss": 0.3213, - "step": 5154000 - }, - { - "epoch": 3.09, - "learning_rate": 3.4401753411244055e-05, - "loss": 0.3351, - "step": 5154500 - }, - { - "epoch": 3.09, - "learning_rate": 3.439965344568349e-05, - "loss": 0.3308, - "step": 5155000 - }, - { - "epoch": 3.09, - "learning_rate": 3.439755348012292e-05, - "loss": 0.3331, - "step": 5155500 - }, - { - "epoch": 3.09, - "learning_rate": 3.439545351456236e-05, - "loss": 0.3314, - "step": 5156000 - }, - { - "epoch": 3.09, - "learning_rate": 3.4393357748932916e-05, - "loss": 0.3498, - "step": 5156500 - }, - { - "epoch": 3.09, - "learning_rate": 3.439125778337235e-05, - "loss": 0.3312, - "step": 5157000 - }, - { - "epoch": 3.09, - "learning_rate": 3.438915781781179e-05, - "loss": 0.3297, - "step": 5157500 - }, - { - "epoch": 3.09, - "learning_rate": 3.438705785225122e-05, - "loss": 0.3299, - "step": 5158000 - }, - { - "epoch": 3.09, - "learning_rate": 3.4384957886690656e-05, - "loss": 0.3357, - "step": 5158500 - }, - { - "epoch": 3.09, - "learning_rate": 3.438286212106121e-05, - "loss": 0.3312, - "step": 5159000 - }, - { - "epoch": 3.09, - "learning_rate": 3.438076215550065e-05, - "loss": 0.3274, - "step": 5159500 - }, - { - "epoch": 3.09, - "learning_rate": 3.4378662189940084e-05, - "loss": 0.3245, - "step": 5160000 - }, - { - "epoch": 3.09, - "learning_rate": 3.437656222437952e-05, - "loss": 0.3254, - "step": 5160500 - }, - { - "epoch": 3.09, - "learning_rate": 3.437446645875007e-05, - "loss": 0.3256, - "step": 5161000 - }, - { - "epoch": 3.09, - "learning_rate": 3.437236649318951e-05, - "loss": 0.3319, - "step": 5161500 - }, - { - "epoch": 3.09, - "learning_rate": 3.4370266527628944e-05, - "loss": 0.3308, - "step": 5162000 - }, - { - "epoch": 3.1, - "learning_rate": 3.436816656206838e-05, - "loss": 0.3349, - "step": 5162500 - }, - { - "epoch": 3.1, - "learning_rate": 3.436606659650782e-05, - "loss": 0.3316, - "step": 5163000 - }, - { - "epoch": 3.1, - "learning_rate": 3.436396663094725e-05, - "loss": 0.3332, - "step": 5163500 - }, - { - "epoch": 3.1, - "learning_rate": 3.4361870865317805e-05, - "loss": 0.3263, - "step": 5164000 - }, - { - "epoch": 3.1, - "learning_rate": 3.4359770899757245e-05, - "loss": 0.3412, - "step": 5164500 - }, - { - "epoch": 3.1, - "learning_rate": 3.435767093419668e-05, - "loss": 0.3453, - "step": 5165000 - }, - { - "epoch": 3.1, - "learning_rate": 3.435557096863611e-05, - "loss": 0.3286, - "step": 5165500 - }, - { - "epoch": 3.1, - "learning_rate": 3.435347100307555e-05, - "loss": 0.3245, - "step": 5166000 - }, - { - "epoch": 3.1, - "learning_rate": 3.4351371037514986e-05, - "loss": 0.3333, - "step": 5166500 - }, - { - "epoch": 3.1, - "learning_rate": 3.434927107195442e-05, - "loss": 0.3284, - "step": 5167000 - }, - { - "epoch": 3.1, - "learning_rate": 3.434717110639386e-05, - "loss": 0.3249, - "step": 5167500 - }, - { - "epoch": 3.1, - "learning_rate": 3.434507954069553e-05, - "loss": 0.3343, - "step": 5168000 - }, - { - "epoch": 3.1, - "learning_rate": 3.434297957513497e-05, - "loss": 0.3329, - "step": 5168500 - }, - { - "epoch": 3.1, - "learning_rate": 3.43408796095744e-05, - "loss": 0.3317, - "step": 5169000 - }, - { - "epoch": 3.1, - "learning_rate": 3.4338779644013834e-05, - "loss": 0.3232, - "step": 5169500 - }, - { - "epoch": 3.1, - "learning_rate": 3.4336679678453274e-05, - "loss": 0.3349, - "step": 5170000 - }, - { - "epoch": 3.1, - "learning_rate": 3.433458391282383e-05, - "loss": 0.3315, - "step": 5170500 - }, - { - "epoch": 3.1, - "learning_rate": 3.433248394726326e-05, - "loss": 0.3224, - "step": 5171000 - }, - { - "epoch": 3.1, - "learning_rate": 3.43303839817027e-05, - "loss": 0.333, - "step": 5171500 - }, - { - "epoch": 3.1, - "learning_rate": 3.4328284016142135e-05, - "loss": 0.327, - "step": 5172000 - }, - { - "epoch": 3.1, - "learning_rate": 3.432618405058157e-05, - "loss": 0.3259, - "step": 5172500 - }, - { - "epoch": 3.1, - "learning_rate": 3.432408828495212e-05, - "loss": 0.3286, - "step": 5173000 - }, - { - "epoch": 3.1, - "learning_rate": 3.432198831939156e-05, - "loss": 0.3293, - "step": 5173500 - }, - { - "epoch": 3.1, - "learning_rate": 3.4319888353830995e-05, - "loss": 0.3305, - "step": 5174000 - }, - { - "epoch": 3.1, - "learning_rate": 3.431778838827043e-05, - "loss": 0.3282, - "step": 5174500 - }, - { - "epoch": 3.1, - "learning_rate": 3.431568842270987e-05, - "loss": 0.3349, - "step": 5175000 - }, - { - "epoch": 3.1, - "learning_rate": 3.43135884571493e-05, - "loss": 0.3344, - "step": 5175500 - }, - { - "epoch": 3.1, - "learning_rate": 3.4311488491588736e-05, - "loss": 0.33, - "step": 5176000 - }, - { - "epoch": 3.1, - "learning_rate": 3.4309388526028176e-05, - "loss": 0.337, - "step": 5176500 - }, - { - "epoch": 3.1, - "learning_rate": 3.430729696032985e-05, - "loss": 0.3365, - "step": 5177000 - }, - { - "epoch": 3.1, - "learning_rate": 3.4305196994769283e-05, - "loss": 0.3205, - "step": 5177500 - }, - { - "epoch": 3.1, - "learning_rate": 3.430309702920872e-05, - "loss": 0.3301, - "step": 5178000 - }, - { - "epoch": 3.1, - "learning_rate": 3.430099706364816e-05, - "loss": 0.3302, - "step": 5178500 - }, - { - "epoch": 3.11, - "learning_rate": 3.429889709808759e-05, - "loss": 0.3278, - "step": 5179000 - }, - { - "epoch": 3.11, - "learning_rate": 3.4296797132527024e-05, - "loss": 0.3311, - "step": 5179500 - }, - { - "epoch": 3.11, - "learning_rate": 3.4294697166966464e-05, - "loss": 0.3302, - "step": 5180000 - }, - { - "epoch": 3.11, - "learning_rate": 3.42925972014059e-05, - "loss": 0.3376, - "step": 5180500 - }, - { - "epoch": 3.11, - "learning_rate": 3.429049723584533e-05, - "loss": 0.3375, - "step": 5181000 - }, - { - "epoch": 3.11, - "learning_rate": 3.4288401470215885e-05, - "loss": 0.3242, - "step": 5181500 - }, - { - "epoch": 3.11, - "learning_rate": 3.4286301504655325e-05, - "loss": 0.335, - "step": 5182000 - }, - { - "epoch": 3.11, - "learning_rate": 3.428420153909476e-05, - "loss": 0.3268, - "step": 5182500 - }, - { - "epoch": 3.11, - "learning_rate": 3.428210157353419e-05, - "loss": 0.324, - "step": 5183000 - }, - { - "epoch": 3.11, - "learning_rate": 3.428000160797363e-05, - "loss": 0.3313, - "step": 5183500 - }, - { - "epoch": 3.11, - "learning_rate": 3.4277901642413066e-05, - "loss": 0.3364, - "step": 5184000 - }, - { - "epoch": 3.11, - "learning_rate": 3.42758016768525e-05, - "loss": 0.3363, - "step": 5184500 - }, - { - "epoch": 3.11, - "learning_rate": 3.427370171129193e-05, - "loss": 0.3305, - "step": 5185000 - }, - { - "epoch": 3.11, - "learning_rate": 3.427161014559361e-05, - "loss": 0.3347, - "step": 5185500 - }, - { - "epoch": 3.11, - "learning_rate": 3.4269510180033047e-05, - "loss": 0.336, - "step": 5186000 - }, - { - "epoch": 3.11, - "learning_rate": 3.426741021447248e-05, - "loss": 0.3315, - "step": 5186500 - }, - { - "epoch": 3.11, - "learning_rate": 3.426531024891192e-05, - "loss": 0.3428, - "step": 5187000 - }, - { - "epoch": 3.11, - "learning_rate": 3.4263210283351354e-05, - "loss": 0.324, - "step": 5187500 - }, - { - "epoch": 3.11, - "learning_rate": 3.4261118717653034e-05, - "loss": 0.3302, - "step": 5188000 - }, - { - "epoch": 3.11, - "learning_rate": 3.425901875209247e-05, - "loss": 0.3261, - "step": 5188500 - }, - { - "epoch": 3.11, - "learning_rate": 3.4256918786531894e-05, - "loss": 0.3372, - "step": 5189000 - }, - { - "epoch": 3.11, - "learning_rate": 3.4254818820971335e-05, - "loss": 0.3339, - "step": 5189500 - }, - { - "epoch": 3.11, - "learning_rate": 3.425271885541077e-05, - "loss": 0.3303, - "step": 5190000 - }, - { - "epoch": 3.11, - "learning_rate": 3.42506188898502e-05, - "loss": 0.3316, - "step": 5190500 - }, - { - "epoch": 3.11, - "learning_rate": 3.424851892428964e-05, - "loss": 0.3382, - "step": 5191000 - }, - { - "epoch": 3.11, - "learning_rate": 3.4246418958729075e-05, - "loss": 0.3304, - "step": 5191500 - }, - { - "epoch": 3.11, - "learning_rate": 3.4244318993168515e-05, - "loss": 0.3289, - "step": 5192000 - }, - { - "epoch": 3.11, - "learning_rate": 3.424221902760795e-05, - "loss": 0.3316, - "step": 5192500 - }, - { - "epoch": 3.11, - "learning_rate": 3.424011906204738e-05, - "loss": 0.3288, - "step": 5193000 - }, - { - "epoch": 3.11, - "learning_rate": 3.423801909648682e-05, - "loss": 0.3355, - "step": 5193500 - }, - { - "epoch": 3.11, - "learning_rate": 3.4235923330857376e-05, - "loss": 0.327, - "step": 5194000 - }, - { - "epoch": 3.11, - "learning_rate": 3.423382756522793e-05, - "loss": 0.3352, - "step": 5194500 - }, - { - "epoch": 3.11, - "learning_rate": 3.423172759966736e-05, - "loss": 0.3256, - "step": 5195000 - }, - { - "epoch": 3.11, - "learning_rate": 3.4229627634106797e-05, - "loss": 0.3344, - "step": 5195500 - }, - { - "epoch": 3.12, - "learning_rate": 3.422752766854624e-05, - "loss": 0.3334, - "step": 5196000 - }, - { - "epoch": 3.12, - "learning_rate": 3.422542770298567e-05, - "loss": 0.3391, - "step": 5196500 - }, - { - "epoch": 3.12, - "learning_rate": 3.4223327737425104e-05, - "loss": 0.3366, - "step": 5197000 - }, - { - "epoch": 3.12, - "learning_rate": 3.4221231971795664e-05, - "loss": 0.3238, - "step": 5197500 - }, - { - "epoch": 3.12, - "learning_rate": 3.4219136206166224e-05, - "loss": 0.3334, - "step": 5198000 - }, - { - "epoch": 3.12, - "learning_rate": 3.421703624060565e-05, - "loss": 0.3335, - "step": 5198500 - }, - { - "epoch": 3.12, - "learning_rate": 3.4214936275045085e-05, - "loss": 0.3252, - "step": 5199000 - }, - { - "epoch": 3.12, - "learning_rate": 3.4212836309484525e-05, - "loss": 0.3319, - "step": 5199500 - }, - { - "epoch": 3.12, - "learning_rate": 3.421073634392396e-05, - "loss": 0.3408, - "step": 5200000 - }, - { - "epoch": 3.12, - "eval_loss": 0.3276987373828888, - "eval_runtime": 1118.9331, - "eval_samples_per_second": 470.734, - "eval_steps_per_second": 78.456, - "step": 5200000 - }, - { - "epoch": 3.12, - "learning_rate": 3.420863637836339e-05, - "loss": 0.3387, - "step": 5200500 - }, - { - "epoch": 3.12, - "learning_rate": 3.420653641280283e-05, - "loss": 0.3251, - "step": 5201000 - }, - { - "epoch": 3.12, - "learning_rate": 3.4204436447242265e-05, - "loss": 0.3279, - "step": 5201500 - }, - { - "epoch": 3.12, - "learning_rate": 3.420234068161282e-05, - "loss": 0.3385, - "step": 5202000 - }, - { - "epoch": 3.12, - "learning_rate": 3.420024071605225e-05, - "loss": 0.3291, - "step": 5202500 - }, - { - "epoch": 3.12, - "learning_rate": 3.419814075049169e-05, - "loss": 0.328, - "step": 5203000 - }, - { - "epoch": 3.12, - "learning_rate": 3.4196040784931126e-05, - "loss": 0.3264, - "step": 5203500 - }, - { - "epoch": 3.12, - "learning_rate": 3.419394081937056e-05, - "loss": 0.3359, - "step": 5204000 - }, - { - "epoch": 3.12, - "learning_rate": 3.419184505374112e-05, - "loss": 0.3415, - "step": 5204500 - }, - { - "epoch": 3.12, - "learning_rate": 3.4189745088180553e-05, - "loss": 0.3232, - "step": 5205000 - }, - { - "epoch": 3.12, - "learning_rate": 3.418764512261999e-05, - "loss": 0.3374, - "step": 5205500 - }, - { - "epoch": 3.12, - "learning_rate": 3.418554515705943e-05, - "loss": 0.3305, - "step": 5206000 - }, - { - "epoch": 3.12, - "learning_rate": 3.418344519149886e-05, - "loss": 0.3348, - "step": 5206500 - }, - { - "epoch": 3.12, - "learning_rate": 3.4181345225938294e-05, - "loss": 0.3331, - "step": 5207000 - }, - { - "epoch": 3.12, - "learning_rate": 3.4179245260377734e-05, - "loss": 0.3202, - "step": 5207500 - }, - { - "epoch": 3.12, - "learning_rate": 3.417714949474829e-05, - "loss": 0.3256, - "step": 5208000 - }, - { - "epoch": 3.12, - "learning_rate": 3.417504952918772e-05, - "loss": 0.3398, - "step": 5208500 - }, - { - "epoch": 3.12, - "learning_rate": 3.4172949563627155e-05, - "loss": 0.3483, - "step": 5209000 - }, - { - "epoch": 3.12, - "learning_rate": 3.4170849598066595e-05, - "loss": 0.3318, - "step": 5209500 - }, - { - "epoch": 3.12, - "learning_rate": 3.416874963250603e-05, - "loss": 0.3293, - "step": 5210000 - }, - { - "epoch": 3.12, - "learning_rate": 3.416664966694546e-05, - "loss": 0.3284, - "step": 5210500 - }, - { - "epoch": 3.12, - "learning_rate": 3.4164549701384895e-05, - "loss": 0.327, - "step": 5211000 - }, - { - "epoch": 3.12, - "learning_rate": 3.416244973582433e-05, - "loss": 0.3334, - "step": 5211500 - }, - { - "epoch": 3.12, - "learning_rate": 3.416035397019489e-05, - "loss": 0.3408, - "step": 5212000 - }, - { - "epoch": 3.13, - "learning_rate": 3.415825400463432e-05, - "loss": 0.3298, - "step": 5212500 - }, - { - "epoch": 3.13, - "learning_rate": 3.415615403907376e-05, - "loss": 0.3221, - "step": 5213000 - }, - { - "epoch": 3.13, - "learning_rate": 3.415405407351319e-05, - "loss": 0.3308, - "step": 5213500 - }, - { - "epoch": 3.13, - "learning_rate": 3.415195410795263e-05, - "loss": 0.3299, - "step": 5214000 - }, - { - "epoch": 3.13, - "learning_rate": 3.414985414239206e-05, - "loss": 0.3338, - "step": 5214500 - }, - { - "epoch": 3.13, - "learning_rate": 3.41477541768315e-05, - "loss": 0.3295, - "step": 5215000 - }, - { - "epoch": 3.13, - "learning_rate": 3.414565841120206e-05, - "loss": 0.3281, - "step": 5215500 - }, - { - "epoch": 3.13, - "learning_rate": 3.414355844564149e-05, - "loss": 0.334, - "step": 5216000 - }, - { - "epoch": 3.13, - "learning_rate": 3.4141458480080924e-05, - "loss": 0.3265, - "step": 5216500 - }, - { - "epoch": 3.13, - "learning_rate": 3.413935851452036e-05, - "loss": 0.3358, - "step": 5217000 - }, - { - "epoch": 3.13, - "learning_rate": 3.41372585489598e-05, - "loss": 0.3415, - "step": 5217500 - }, - { - "epoch": 3.13, - "learning_rate": 3.413516278333036e-05, - "loss": 0.3279, - "step": 5218000 - }, - { - "epoch": 3.13, - "learning_rate": 3.4133062817769785e-05, - "loss": 0.3477, - "step": 5218500 - }, - { - "epoch": 3.13, - "learning_rate": 3.4130967052140345e-05, - "loss": 0.3349, - "step": 5219000 - }, - { - "epoch": 3.13, - "learning_rate": 3.412886708657978e-05, - "loss": 0.3208, - "step": 5219500 - }, - { - "epoch": 3.13, - "learning_rate": 3.412676712101922e-05, - "loss": 0.3301, - "step": 5220000 - }, - { - "epoch": 3.13, - "learning_rate": 3.4124667155458645e-05, - "loss": 0.3368, - "step": 5220500 - }, - { - "epoch": 3.13, - "learning_rate": 3.4122567189898086e-05, - "loss": 0.324, - "step": 5221000 - }, - { - "epoch": 3.13, - "learning_rate": 3.412046722433752e-05, - "loss": 0.33, - "step": 5221500 - }, - { - "epoch": 3.13, - "learning_rate": 3.411836725877695e-05, - "loss": 0.3368, - "step": 5222000 - }, - { - "epoch": 3.13, - "learning_rate": 3.411626729321639e-05, - "loss": 0.3293, - "step": 5222500 - }, - { - "epoch": 3.13, - "learning_rate": 3.4114167327655826e-05, - "loss": 0.3319, - "step": 5223000 - }, - { - "epoch": 3.13, - "learning_rate": 3.411207156202638e-05, - "loss": 0.3276, - "step": 5223500 - }, - { - "epoch": 3.13, - "learning_rate": 3.410997159646581e-05, - "loss": 0.3305, - "step": 5224000 - }, - { - "epoch": 3.13, - "learning_rate": 3.4107871630905254e-05, - "loss": 0.3428, - "step": 5224500 - }, - { - "epoch": 3.13, - "learning_rate": 3.410577166534469e-05, - "loss": 0.3285, - "step": 5225000 - }, - { - "epoch": 3.13, - "learning_rate": 3.410367169978412e-05, - "loss": 0.3306, - "step": 5225500 - }, - { - "epoch": 3.13, - "learning_rate": 3.4101575934154674e-05, - "loss": 0.3351, - "step": 5226000 - }, - { - "epoch": 3.13, - "learning_rate": 3.4099475968594114e-05, - "loss": 0.342, - "step": 5226500 - }, - { - "epoch": 3.13, - "learning_rate": 3.409737600303355e-05, - "loss": 0.3423, - "step": 5227000 - }, - { - "epoch": 3.13, - "learning_rate": 3.409527603747298e-05, - "loss": 0.3303, - "step": 5227500 - }, - { - "epoch": 3.13, - "learning_rate": 3.409317607191242e-05, - "loss": 0.3377, - "step": 5228000 - }, - { - "epoch": 3.13, - "learning_rate": 3.4091076106351855e-05, - "loss": 0.326, - "step": 5228500 - }, - { - "epoch": 3.13, - "learning_rate": 3.408897614079129e-05, - "loss": 0.3338, - "step": 5229000 - }, - { - "epoch": 3.14, - "learning_rate": 3.408688037516185e-05, - "loss": 0.3308, - "step": 5229500 - }, - { - "epoch": 3.14, - "learning_rate": 3.408478040960128e-05, - "loss": 0.3275, - "step": 5230000 - }, - { - "epoch": 3.14, - "learning_rate": 3.4082680444040716e-05, - "loss": 0.3248, - "step": 5230500 - }, - { - "epoch": 3.14, - "learning_rate": 3.4080580478480156e-05, - "loss": 0.3426, - "step": 5231000 - }, - { - "epoch": 3.14, - "learning_rate": 3.407848051291959e-05, - "loss": 0.3264, - "step": 5231500 - }, - { - "epoch": 3.14, - "learning_rate": 3.407638054735902e-05, - "loss": 0.3277, - "step": 5232000 - }, - { - "epoch": 3.14, - "learning_rate": 3.407428058179846e-05, - "loss": 0.3268, - "step": 5232500 - }, - { - "epoch": 3.14, - "learning_rate": 3.4072184816169017e-05, - "loss": 0.3322, - "step": 5233000 - }, - { - "epoch": 3.14, - "learning_rate": 3.407008485060845e-05, - "loss": 0.3327, - "step": 5233500 - }, - { - "epoch": 3.14, - "learning_rate": 3.4067984885047883e-05, - "loss": 0.3303, - "step": 5234000 - }, - { - "epoch": 3.14, - "learning_rate": 3.4065884919487324e-05, - "loss": 0.3334, - "step": 5234500 - }, - { - "epoch": 3.14, - "learning_rate": 3.406378495392676e-05, - "loss": 0.3247, - "step": 5235000 - }, - { - "epoch": 3.14, - "learning_rate": 3.406168498836619e-05, - "loss": 0.3287, - "step": 5235500 - }, - { - "epoch": 3.14, - "learning_rate": 3.4059585022805624e-05, - "loss": 0.3352, - "step": 5236000 - }, - { - "epoch": 3.14, - "learning_rate": 3.405748505724506e-05, - "loss": 0.3323, - "step": 5236500 - }, - { - "epoch": 3.14, - "learning_rate": 3.405538929161562e-05, - "loss": 0.3347, - "step": 5237000 - }, - { - "epoch": 3.14, - "learning_rate": 3.405328932605506e-05, - "loss": 0.3367, - "step": 5237500 - }, - { - "epoch": 3.14, - "learning_rate": 3.4051189360494485e-05, - "loss": 0.3275, - "step": 5238000 - }, - { - "epoch": 3.14, - "learning_rate": 3.404908939493392e-05, - "loss": 0.3295, - "step": 5238500 - }, - { - "epoch": 3.14, - "learning_rate": 3.404698942937336e-05, - "loss": 0.323, - "step": 5239000 - }, - { - "epoch": 3.14, - "learning_rate": 3.404489366374392e-05, - "loss": 0.3345, - "step": 5239500 - }, - { - "epoch": 3.14, - "learning_rate": 3.404279369818335e-05, - "loss": 0.3243, - "step": 5240000 - }, - { - "epoch": 3.14, - "learning_rate": 3.404069373262278e-05, - "loss": 0.328, - "step": 5240500 - }, - { - "epoch": 3.14, - "learning_rate": 3.403859796699334e-05, - "loss": 0.3289, - "step": 5241000 - }, - { - "epoch": 3.14, - "learning_rate": 3.403649800143278e-05, - "loss": 0.3293, - "step": 5241500 - }, - { - "epoch": 3.14, - "learning_rate": 3.403439803587221e-05, - "loss": 0.3307, - "step": 5242000 - }, - { - "epoch": 3.14, - "learning_rate": 3.4032298070311647e-05, - "loss": 0.3428, - "step": 5242500 - }, - { - "epoch": 3.14, - "learning_rate": 3.403019810475108e-05, - "loss": 0.3242, - "step": 5243000 - }, - { - "epoch": 3.14, - "learning_rate": 3.4028098139190513e-05, - "loss": 0.333, - "step": 5243500 - }, - { - "epoch": 3.14, - "learning_rate": 3.4025998173629954e-05, - "loss": 0.3387, - "step": 5244000 - }, - { - "epoch": 3.14, - "learning_rate": 3.4023902408000514e-05, - "loss": 0.3267, - "step": 5244500 - }, - { - "epoch": 3.14, - "learning_rate": 3.402180244243995e-05, - "loss": 0.3313, - "step": 5245000 - }, - { - "epoch": 3.14, - "learning_rate": 3.4019702476879374e-05, - "loss": 0.3284, - "step": 5245500 - }, - { - "epoch": 3.15, - "learning_rate": 3.4017602511318814e-05, - "loss": 0.3263, - "step": 5246000 - }, - { - "epoch": 3.15, - "learning_rate": 3.401550254575825e-05, - "loss": 0.3314, - "step": 5246500 - }, - { - "epoch": 3.15, - "learning_rate": 3.401340258019768e-05, - "loss": 0.3196, - "step": 5247000 - }, - { - "epoch": 3.15, - "learning_rate": 3.401130261463712e-05, - "loss": 0.3302, - "step": 5247500 - }, - { - "epoch": 3.15, - "learning_rate": 3.4009206849007675e-05, - "loss": 0.3314, - "step": 5248000 - }, - { - "epoch": 3.15, - "learning_rate": 3.400710688344711e-05, - "loss": 0.3375, - "step": 5248500 - }, - { - "epoch": 3.15, - "learning_rate": 3.400500691788654e-05, - "loss": 0.3242, - "step": 5249000 - }, - { - "epoch": 3.15, - "learning_rate": 3.400290695232598e-05, - "loss": 0.3301, - "step": 5249500 - }, - { - "epoch": 3.15, - "learning_rate": 3.4000806986765416e-05, - "loss": 0.3274, - "step": 5250000 - }, - { - "epoch": 3.15, - "learning_rate": 3.399870702120485e-05, - "loss": 0.3446, - "step": 5250500 - }, - { - "epoch": 3.15, - "learning_rate": 3.399661125557541e-05, - "loss": 0.3312, - "step": 5251000 - }, - { - "epoch": 3.15, - "learning_rate": 3.399451129001484e-05, - "loss": 0.3331, - "step": 5251500 - }, - { - "epoch": 3.15, - "learning_rate": 3.3992411324454276e-05, - "loss": 0.3293, - "step": 5252000 - }, - { - "epoch": 3.15, - "learning_rate": 3.399031135889372e-05, - "loss": 0.3361, - "step": 5252500 - }, - { - "epoch": 3.15, - "learning_rate": 3.398821139333315e-05, - "loss": 0.3356, - "step": 5253000 - }, - { - "epoch": 3.15, - "learning_rate": 3.3986111427772584e-05, - "loss": 0.3288, - "step": 5253500 - }, - { - "epoch": 3.15, - "learning_rate": 3.398401566214314e-05, - "loss": 0.3358, - "step": 5254000 - }, - { - "epoch": 3.15, - "learning_rate": 3.398191569658258e-05, - "loss": 0.3279, - "step": 5254500 - }, - { - "epoch": 3.15, - "learning_rate": 3.397981573102201e-05, - "loss": 0.3289, - "step": 5255000 - }, - { - "epoch": 3.15, - "learning_rate": 3.3977715765461444e-05, - "loss": 0.3309, - "step": 5255500 - }, - { - "epoch": 3.15, - "learning_rate": 3.3975615799900885e-05, - "loss": 0.3342, - "step": 5256000 - }, - { - "epoch": 3.15, - "learning_rate": 3.397352003427144e-05, - "loss": 0.3312, - "step": 5256500 - }, - { - "epoch": 3.15, - "learning_rate": 3.397142006871087e-05, - "loss": 0.3323, - "step": 5257000 - }, - { - "epoch": 3.15, - "learning_rate": 3.3969320103150305e-05, - "loss": 0.3345, - "step": 5257500 - }, - { - "epoch": 3.15, - "learning_rate": 3.3967220137589745e-05, - "loss": 0.3247, - "step": 5258000 - }, - { - "epoch": 3.15, - "learning_rate": 3.396512017202918e-05, - "loss": 0.3323, - "step": 5258500 - }, - { - "epoch": 3.15, - "learning_rate": 3.396302440639973e-05, - "loss": 0.3318, - "step": 5259000 - }, - { - "epoch": 3.15, - "learning_rate": 3.396092444083917e-05, - "loss": 0.339, - "step": 5259500 - }, - { - "epoch": 3.15, - "learning_rate": 3.3958824475278606e-05, - "loss": 0.3284, - "step": 5260000 - }, - { - "epoch": 3.15, - "learning_rate": 3.395672450971804e-05, - "loss": 0.3257, - "step": 5260500 - }, - { - "epoch": 3.15, - "learning_rate": 3.395462874408859e-05, - "loss": 0.3331, - "step": 5261000 - }, - { - "epoch": 3.15, - "learning_rate": 3.395252877852803e-05, - "loss": 0.3367, - "step": 5261500 - }, - { - "epoch": 3.15, - "learning_rate": 3.395042881296747e-05, - "loss": 0.3291, - "step": 5262000 - }, - { - "epoch": 3.16, - "learning_rate": 3.39483288474069e-05, - "loss": 0.3303, - "step": 5262500 - }, - { - "epoch": 3.16, - "learning_rate": 3.394622888184634e-05, - "loss": 0.3418, - "step": 5263000 - }, - { - "epoch": 3.16, - "learning_rate": 3.3944128916285774e-05, - "loss": 0.3316, - "step": 5263500 - }, - { - "epoch": 3.16, - "learning_rate": 3.394203315065633e-05, - "loss": 0.3396, - "step": 5264000 - }, - { - "epoch": 3.16, - "learning_rate": 3.393993318509576e-05, - "loss": 0.3449, - "step": 5264500 - }, - { - "epoch": 3.16, - "learning_rate": 3.39378332195352e-05, - "loss": 0.329, - "step": 5265000 - }, - { - "epoch": 3.16, - "learning_rate": 3.3935733253974635e-05, - "loss": 0.3341, - "step": 5265500 - }, - { - "epoch": 3.16, - "learning_rate": 3.393363328841407e-05, - "loss": 0.3359, - "step": 5266000 - }, - { - "epoch": 3.16, - "learning_rate": 3.393153332285351e-05, - "loss": 0.3242, - "step": 5266500 - }, - { - "epoch": 3.16, - "learning_rate": 3.392943335729294e-05, - "loss": 0.3335, - "step": 5267000 - }, - { - "epoch": 3.16, - "learning_rate": 3.3927337591663495e-05, - "loss": 0.3279, - "step": 5267500 - }, - { - "epoch": 3.16, - "learning_rate": 3.3925237626102936e-05, - "loss": 0.3324, - "step": 5268000 - }, - { - "epoch": 3.16, - "learning_rate": 3.392313766054237e-05, - "loss": 0.3391, - "step": 5268500 - }, - { - "epoch": 3.16, - "learning_rate": 3.39210376949818e-05, - "loss": 0.3258, - "step": 5269000 - }, - { - "epoch": 3.16, - "learning_rate": 3.391893772942124e-05, - "loss": 0.3339, - "step": 5269500 - }, - { - "epoch": 3.16, - "learning_rate": 3.391683776386067e-05, - "loss": 0.3317, - "step": 5270000 - }, - { - "epoch": 3.16, - "learning_rate": 3.391474199823123e-05, - "loss": 0.3344, - "step": 5270500 - }, - { - "epoch": 3.16, - "learning_rate": 3.391264203267066e-05, - "loss": 0.3412, - "step": 5271000 - }, - { - "epoch": 3.16, - "learning_rate": 3.3910542067110104e-05, - "loss": 0.3304, - "step": 5271500 - }, - { - "epoch": 3.16, - "learning_rate": 3.390844210154954e-05, - "loss": 0.3313, - "step": 5272000 - }, - { - "epoch": 3.16, - "learning_rate": 3.3906342135988964e-05, - "loss": 0.3382, - "step": 5272500 - }, - { - "epoch": 3.16, - "learning_rate": 3.3904242170428404e-05, - "loss": 0.3321, - "step": 5273000 - }, - { - "epoch": 3.16, - "learning_rate": 3.390214220486784e-05, - "loss": 0.3267, - "step": 5273500 - }, - { - "epoch": 3.16, - "learning_rate": 3.390004223930727e-05, - "loss": 0.3329, - "step": 5274000 - }, - { - "epoch": 3.16, - "learning_rate": 3.389794647367783e-05, - "loss": 0.3313, - "step": 5274500 - }, - { - "epoch": 3.16, - "learning_rate": 3.3895846508117265e-05, - "loss": 0.3376, - "step": 5275000 - }, - { - "epoch": 3.16, - "learning_rate": 3.38937465425567e-05, - "loss": 0.3339, - "step": 5275500 - }, - { - "epoch": 3.16, - "learning_rate": 3.389165077692726e-05, - "loss": 0.3379, - "step": 5276000 - }, - { - "epoch": 3.16, - "learning_rate": 3.38895508113667e-05, - "loss": 0.3154, - "step": 5276500 - }, - { - "epoch": 3.16, - "learning_rate": 3.3887450845806125e-05, - "loss": 0.3347, - "step": 5277000 - }, - { - "epoch": 3.16, - "learning_rate": 3.388535088024556e-05, - "loss": 0.3321, - "step": 5277500 - }, - { - "epoch": 3.16, - "learning_rate": 3.388325511461612e-05, - "loss": 0.3305, - "step": 5278000 - }, - { - "epoch": 3.16, - "learning_rate": 3.388115514905556e-05, - "loss": 0.3374, - "step": 5278500 - }, - { - "epoch": 3.16, - "learning_rate": 3.387905518349499e-05, - "loss": 0.3338, - "step": 5279000 - }, - { - "epoch": 3.17, - "learning_rate": 3.387695521793442e-05, - "loss": 0.3249, - "step": 5279500 - }, - { - "epoch": 3.17, - "learning_rate": 3.387485525237386e-05, - "loss": 0.3285, - "step": 5280000 - }, - { - "epoch": 3.17, - "learning_rate": 3.387275528681329e-05, - "loss": 0.34, - "step": 5280500 - }, - { - "epoch": 3.17, - "learning_rate": 3.387065532125273e-05, - "loss": 0.3256, - "step": 5281000 - }, - { - "epoch": 3.17, - "learning_rate": 3.386855955562329e-05, - "loss": 0.3414, - "step": 5281500 - }, - { - "epoch": 3.17, - "learning_rate": 3.386645959006272e-05, - "loss": 0.3369, - "step": 5282000 - }, - { - "epoch": 3.17, - "learning_rate": 3.3864359624502154e-05, - "loss": 0.339, - "step": 5282500 - }, - { - "epoch": 3.17, - "learning_rate": 3.3862259658941594e-05, - "loss": 0.3375, - "step": 5283000 - }, - { - "epoch": 3.17, - "learning_rate": 3.386015969338103e-05, - "loss": 0.3302, - "step": 5283500 - }, - { - "epoch": 3.17, - "learning_rate": 3.385806392775158e-05, - "loss": 0.34, - "step": 5284000 - }, - { - "epoch": 3.17, - "learning_rate": 3.3855963962191015e-05, - "loss": 0.3363, - "step": 5284500 - }, - { - "epoch": 3.17, - "learning_rate": 3.3853863996630455e-05, - "loss": 0.3352, - "step": 5285000 - }, - { - "epoch": 3.17, - "learning_rate": 3.385176403106989e-05, - "loss": 0.3381, - "step": 5285500 - }, - { - "epoch": 3.17, - "learning_rate": 3.384966406550932e-05, - "loss": 0.332, - "step": 5286000 - }, - { - "epoch": 3.17, - "learning_rate": 3.384756409994876e-05, - "loss": 0.3295, - "step": 5286500 - }, - { - "epoch": 3.17, - "learning_rate": 3.3845464134388196e-05, - "loss": 0.3266, - "step": 5287000 - }, - { - "epoch": 3.17, - "learning_rate": 3.384336836875875e-05, - "loss": 0.3396, - "step": 5287500 - }, - { - "epoch": 3.17, - "learning_rate": 3.384126840319818e-05, - "loss": 0.339, - "step": 5288000 - }, - { - "epoch": 3.17, - "learning_rate": 3.383916843763762e-05, - "loss": 0.3365, - "step": 5288500 - }, - { - "epoch": 3.17, - "learning_rate": 3.3837068472077056e-05, - "loss": 0.3351, - "step": 5289000 - }, - { - "epoch": 3.17, - "learning_rate": 3.383496850651649e-05, - "loss": 0.3315, - "step": 5289500 - }, - { - "epoch": 3.17, - "learning_rate": 3.383286854095593e-05, - "loss": 0.3315, - "step": 5290000 - }, - { - "epoch": 3.17, - "learning_rate": 3.3830772775326484e-05, - "loss": 0.339, - "step": 5290500 - }, - { - "epoch": 3.17, - "learning_rate": 3.382867280976592e-05, - "loss": 0.337, - "step": 5291000 - }, - { - "epoch": 3.17, - "learning_rate": 3.382657284420536e-05, - "loss": 0.3368, - "step": 5291500 - }, - { - "epoch": 3.17, - "learning_rate": 3.382447287864479e-05, - "loss": 0.3273, - "step": 5292000 - }, - { - "epoch": 3.17, - "learning_rate": 3.3822377113015344e-05, - "loss": 0.3259, - "step": 5292500 - }, - { - "epoch": 3.17, - "learning_rate": 3.382027714745478e-05, - "loss": 0.3309, - "step": 5293000 - }, - { - "epoch": 3.17, - "learning_rate": 3.381817718189422e-05, - "loss": 0.3311, - "step": 5293500 - }, - { - "epoch": 3.17, - "learning_rate": 3.381607721633365e-05, - "loss": 0.3211, - "step": 5294000 - }, - { - "epoch": 3.17, - "learning_rate": 3.3813977250773085e-05, - "loss": 0.3333, - "step": 5294500 - }, - { - "epoch": 3.17, - "learning_rate": 3.3811877285212525e-05, - "loss": 0.3257, - "step": 5295000 - }, - { - "epoch": 3.17, - "learning_rate": 3.380977731965196e-05, - "loss": 0.3305, - "step": 5295500 - }, - { - "epoch": 3.18, - "learning_rate": 3.380767735409139e-05, - "loss": 0.33, - "step": 5296000 - }, - { - "epoch": 3.18, - "learning_rate": 3.380557738853083e-05, - "loss": 0.3258, - "step": 5296500 - }, - { - "epoch": 3.18, - "learning_rate": 3.380347742297026e-05, - "loss": 0.3277, - "step": 5297000 - }, - { - "epoch": 3.18, - "learning_rate": 3.38013774574097e-05, - "loss": 0.3259, - "step": 5297500 - }, - { - "epoch": 3.18, - "learning_rate": 3.379927749184913e-05, - "loss": 0.3279, - "step": 5298000 - }, - { - "epoch": 3.18, - "learning_rate": 3.379718172621969e-05, - "loss": 0.3259, - "step": 5298500 - }, - { - "epoch": 3.18, - "learning_rate": 3.3795085960590247e-05, - "loss": 0.3303, - "step": 5299000 - }, - { - "epoch": 3.18, - "learning_rate": 3.37929901949608e-05, - "loss": 0.3234, - "step": 5299500 - }, - { - "epoch": 3.18, - "learning_rate": 3.3790890229400234e-05, - "loss": 0.327, - "step": 5300000 - }, - { - "epoch": 3.18, - "eval_loss": 0.3255656063556671, - "eval_runtime": 1119.0379, - "eval_samples_per_second": 470.69, - "eval_steps_per_second": 78.449, - "step": 5300000 - }, - { - "epoch": 3.18, - "learning_rate": 3.3788790263839674e-05, - "loss": 0.3288, - "step": 5300500 - }, - { - "epoch": 3.18, - "learning_rate": 3.378669029827911e-05, - "loss": 0.3356, - "step": 5301000 - }, - { - "epoch": 3.18, - "learning_rate": 3.378459033271854e-05, - "loss": 0.3257, - "step": 5301500 - }, - { - "epoch": 3.18, - "learning_rate": 3.378249036715798e-05, - "loss": 0.3322, - "step": 5302000 - }, - { - "epoch": 3.18, - "learning_rate": 3.3780390401597414e-05, - "loss": 0.3446, - "step": 5302500 - }, - { - "epoch": 3.18, - "learning_rate": 3.377829043603685e-05, - "loss": 0.3363, - "step": 5303000 - }, - { - "epoch": 3.18, - "learning_rate": 3.377619467040741e-05, - "loss": 0.3346, - "step": 5303500 - }, - { - "epoch": 3.18, - "learning_rate": 3.377409470484684e-05, - "loss": 0.3385, - "step": 5304000 - }, - { - "epoch": 3.18, - "learning_rate": 3.3771994739286275e-05, - "loss": 0.3288, - "step": 5304500 - }, - { - "epoch": 3.18, - "learning_rate": 3.3769894773725715e-05, - "loss": 0.3313, - "step": 5305000 - }, - { - "epoch": 3.18, - "learning_rate": 3.376779480816515e-05, - "loss": 0.3246, - "step": 5305500 - }, - { - "epoch": 3.18, - "learning_rate": 3.376569484260458e-05, - "loss": 0.3286, - "step": 5306000 - }, - { - "epoch": 3.18, - "learning_rate": 3.3763599076975136e-05, - "loss": 0.3325, - "step": 5306500 - }, - { - "epoch": 3.18, - "learning_rate": 3.3761499111414576e-05, - "loss": 0.3232, - "step": 5307000 - }, - { - "epoch": 3.18, - "learning_rate": 3.375939914585401e-05, - "loss": 0.3323, - "step": 5307500 - }, - { - "epoch": 3.18, - "learning_rate": 3.375729918029344e-05, - "loss": 0.3287, - "step": 5308000 - }, - { - "epoch": 3.18, - "learning_rate": 3.375519921473288e-05, - "loss": 0.3333, - "step": 5308500 - }, - { - "epoch": 3.18, - "learning_rate": 3.375309924917231e-05, - "loss": 0.3301, - "step": 5309000 - }, - { - "epoch": 3.18, - "learning_rate": 3.3750999283611743e-05, - "loss": 0.321, - "step": 5309500 - }, - { - "epoch": 3.18, - "learning_rate": 3.3748903517982304e-05, - "loss": 0.3358, - "step": 5310000 - }, - { - "epoch": 3.18, - "learning_rate": 3.3746807752352864e-05, - "loss": 0.3362, - "step": 5310500 - }, - { - "epoch": 3.18, - "learning_rate": 3.37447077867923e-05, - "loss": 0.3252, - "step": 5311000 - }, - { - "epoch": 3.18, - "learning_rate": 3.374260782123173e-05, - "loss": 0.3305, - "step": 5311500 - }, - { - "epoch": 3.18, - "learning_rate": 3.374050785567117e-05, - "loss": 0.3309, - "step": 5312000 - }, - { - "epoch": 3.19, - "learning_rate": 3.3738407890110605e-05, - "loss": 0.3313, - "step": 5312500 - }, - { - "epoch": 3.19, - "learning_rate": 3.373630792455004e-05, - "loss": 0.3358, - "step": 5313000 - }, - { - "epoch": 3.19, - "learning_rate": 3.373420795898947e-05, - "loss": 0.3374, - "step": 5313500 - }, - { - "epoch": 3.19, - "learning_rate": 3.3732107993428905e-05, - "loss": 0.3291, - "step": 5314000 - }, - { - "epoch": 3.19, - "learning_rate": 3.373000802786834e-05, - "loss": 0.3289, - "step": 5314500 - }, - { - "epoch": 3.19, - "learning_rate": 3.372790806230778e-05, - "loss": 0.3316, - "step": 5315000 - }, - { - "epoch": 3.19, - "learning_rate": 3.372580809674721e-05, - "loss": 0.3352, - "step": 5315500 - }, - { - "epoch": 3.19, - "learning_rate": 3.3723708131186646e-05, - "loss": 0.3296, - "step": 5316000 - }, - { - "epoch": 3.19, - "learning_rate": 3.3721616565488326e-05, - "loss": 0.3333, - "step": 5316500 - }, - { - "epoch": 3.19, - "learning_rate": 3.371951659992776e-05, - "loss": 0.3302, - "step": 5317000 - }, - { - "epoch": 3.19, - "learning_rate": 3.37174166343672e-05, - "loss": 0.3355, - "step": 5317500 - }, - { - "epoch": 3.19, - "learning_rate": 3.371531666880663e-05, - "loss": 0.3383, - "step": 5318000 - }, - { - "epoch": 3.19, - "learning_rate": 3.371321670324607e-05, - "loss": 0.3266, - "step": 5318500 - }, - { - "epoch": 3.19, - "learning_rate": 3.37111167376855e-05, - "loss": 0.3336, - "step": 5319000 - }, - { - "epoch": 3.19, - "learning_rate": 3.3709016772124934e-05, - "loss": 0.3208, - "step": 5319500 - }, - { - "epoch": 3.19, - "learning_rate": 3.3706921006495494e-05, - "loss": 0.3331, - "step": 5320000 - }, - { - "epoch": 3.19, - "learning_rate": 3.370482104093493e-05, - "loss": 0.3271, - "step": 5320500 - }, - { - "epoch": 3.19, - "learning_rate": 3.370272107537436e-05, - "loss": 0.3322, - "step": 5321000 - }, - { - "epoch": 3.19, - "learning_rate": 3.3700621109813794e-05, - "loss": 0.3195, - "step": 5321500 - }, - { - "epoch": 3.19, - "learning_rate": 3.3698521144253235e-05, - "loss": 0.3278, - "step": 5322000 - }, - { - "epoch": 3.19, - "learning_rate": 3.369642117869267e-05, - "loss": 0.3322, - "step": 5322500 - }, - { - "epoch": 3.19, - "learning_rate": 3.36943212131321e-05, - "loss": 0.3291, - "step": 5323000 - }, - { - "epoch": 3.19, - "learning_rate": 3.369222124757154e-05, - "loss": 0.3403, - "step": 5323500 - }, - { - "epoch": 3.19, - "learning_rate": 3.3690129681873216e-05, - "loss": 0.328, - "step": 5324000 - }, - { - "epoch": 3.19, - "learning_rate": 3.3688029716312656e-05, - "loss": 0.3271, - "step": 5324500 - }, - { - "epoch": 3.19, - "learning_rate": 3.368592975075209e-05, - "loss": 0.3302, - "step": 5325000 - }, - { - "epoch": 3.19, - "learning_rate": 3.368382978519152e-05, - "loss": 0.3392, - "step": 5325500 - }, - { - "epoch": 3.19, - "learning_rate": 3.3681729819630956e-05, - "loss": 0.328, - "step": 5326000 - }, - { - "epoch": 3.19, - "learning_rate": 3.367962985407039e-05, - "loss": 0.3235, - "step": 5326500 - }, - { - "epoch": 3.19, - "learning_rate": 3.367752988850983e-05, - "loss": 0.3353, - "step": 5327000 - }, - { - "epoch": 3.19, - "learning_rate": 3.367542992294926e-05, - "loss": 0.3303, - "step": 5327500 - }, - { - "epoch": 3.19, - "learning_rate": 3.367333415731982e-05, - "loss": 0.3335, - "step": 5328000 - }, - { - "epoch": 3.19, - "learning_rate": 3.367123419175925e-05, - "loss": 0.3288, - "step": 5328500 - }, - { - "epoch": 3.19, - "learning_rate": 3.366913422619869e-05, - "loss": 0.3275, - "step": 5329000 - }, - { - "epoch": 3.2, - "learning_rate": 3.3667034260638124e-05, - "loss": 0.3224, - "step": 5329500 - }, - { - "epoch": 3.2, - "learning_rate": 3.3664938495008684e-05, - "loss": 0.3289, - "step": 5330000 - }, - { - "epoch": 3.2, - "learning_rate": 3.366283852944811e-05, - "loss": 0.3244, - "step": 5330500 - }, - { - "epoch": 3.2, - "learning_rate": 3.366073856388755e-05, - "loss": 0.3407, - "step": 5331000 - }, - { - "epoch": 3.2, - "learning_rate": 3.3658638598326985e-05, - "loss": 0.3283, - "step": 5331500 - }, - { - "epoch": 3.2, - "learning_rate": 3.3656542832697545e-05, - "loss": 0.3404, - "step": 5332000 - }, - { - "epoch": 3.2, - "learning_rate": 3.365444286713698e-05, - "loss": 0.3342, - "step": 5332500 - }, - { - "epoch": 3.2, - "learning_rate": 3.365234290157641e-05, - "loss": 0.3336, - "step": 5333000 - }, - { - "epoch": 3.2, - "learning_rate": 3.3650242936015845e-05, - "loss": 0.3335, - "step": 5333500 - }, - { - "epoch": 3.2, - "learning_rate": 3.3648142970455286e-05, - "loss": 0.3373, - "step": 5334000 - }, - { - "epoch": 3.2, - "learning_rate": 3.3646047204825846e-05, - "loss": 0.3358, - "step": 5334500 - }, - { - "epoch": 3.2, - "learning_rate": 3.364394723926527e-05, - "loss": 0.3249, - "step": 5335000 - }, - { - "epoch": 3.2, - "learning_rate": 3.3641847273704706e-05, - "loss": 0.337, - "step": 5335500 - }, - { - "epoch": 3.2, - "learning_rate": 3.3639747308144146e-05, - "loss": 0.3243, - "step": 5336000 - }, - { - "epoch": 3.2, - "learning_rate": 3.363764734258358e-05, - "loss": 0.3266, - "step": 5336500 - }, - { - "epoch": 3.2, - "learning_rate": 3.363555157695414e-05, - "loss": 0.3352, - "step": 5337000 - }, - { - "epoch": 3.2, - "learning_rate": 3.363345161139357e-05, - "loss": 0.3274, - "step": 5337500 - }, - { - "epoch": 3.2, - "learning_rate": 3.363135164583301e-05, - "loss": 0.3323, - "step": 5338000 - }, - { - "epoch": 3.2, - "learning_rate": 3.362925168027244e-05, - "loss": 0.327, - "step": 5338500 - }, - { - "epoch": 3.2, - "learning_rate": 3.3627151714711874e-05, - "loss": 0.3241, - "step": 5339000 - }, - { - "epoch": 3.2, - "learning_rate": 3.3625051749151314e-05, - "loss": 0.325, - "step": 5339500 - }, - { - "epoch": 3.2, - "learning_rate": 3.362295178359075e-05, - "loss": 0.3324, - "step": 5340000 - }, - { - "epoch": 3.2, - "learning_rate": 3.362085181803018e-05, - "loss": 0.34, - "step": 5340500 - }, - { - "epoch": 3.2, - "learning_rate": 3.361875605240074e-05, - "loss": 0.3274, - "step": 5341000 - }, - { - "epoch": 3.2, - "learning_rate": 3.3616656086840175e-05, - "loss": 0.3349, - "step": 5341500 - }, - { - "epoch": 3.2, - "learning_rate": 3.361455612127961e-05, - "loss": 0.3326, - "step": 5342000 - }, - { - "epoch": 3.2, - "learning_rate": 3.361245615571905e-05, - "loss": 0.3364, - "step": 5342500 - }, - { - "epoch": 3.2, - "learning_rate": 3.36103603900896e-05, - "loss": 0.3304, - "step": 5343000 - }, - { - "epoch": 3.2, - "learning_rate": 3.3608260424529036e-05, - "loss": 0.3342, - "step": 5343500 - }, - { - "epoch": 3.2, - "learning_rate": 3.360616045896847e-05, - "loss": 0.3195, - "step": 5344000 - }, - { - "epoch": 3.2, - "learning_rate": 3.360406049340791e-05, - "loss": 0.3245, - "step": 5344500 - }, - { - "epoch": 3.2, - "learning_rate": 3.360196472777846e-05, - "loss": 0.3346, - "step": 5345000 - }, - { - "epoch": 3.2, - "learning_rate": 3.3599864762217897e-05, - "loss": 0.3311, - "step": 5345500 - }, - { - "epoch": 3.21, - "learning_rate": 3.359776899658846e-05, - "loss": 0.3318, - "step": 5346000 - }, - { - "epoch": 3.21, - "learning_rate": 3.359566903102789e-05, - "loss": 0.336, - "step": 5346500 - }, - { - "epoch": 3.21, - "learning_rate": 3.3593569065467324e-05, - "loss": 0.334, - "step": 5347000 - }, - { - "epoch": 3.21, - "learning_rate": 3.359146909990676e-05, - "loss": 0.3343, - "step": 5347500 - }, - { - "epoch": 3.21, - "learning_rate": 3.35893691343462e-05, - "loss": 0.3245, - "step": 5348000 - }, - { - "epoch": 3.21, - "learning_rate": 3.358726916878563e-05, - "loss": 0.3308, - "step": 5348500 - }, - { - "epoch": 3.21, - "learning_rate": 3.358517340315619e-05, - "loss": 0.3321, - "step": 5349000 - }, - { - "epoch": 3.21, - "learning_rate": 3.358307343759562e-05, - "loss": 0.3305, - "step": 5349500 - }, - { - "epoch": 3.21, - "learning_rate": 3.358097347203506e-05, - "loss": 0.3405, - "step": 5350000 - }, - { - "epoch": 3.21, - "learning_rate": 3.357887350647449e-05, - "loss": 0.328, - "step": 5350500 - }, - { - "epoch": 3.21, - "learning_rate": 3.357677774084505e-05, - "loss": 0.3346, - "step": 5351000 - }, - { - "epoch": 3.21, - "learning_rate": 3.357467777528448e-05, - "loss": 0.3245, - "step": 5351500 - }, - { - "epoch": 3.21, - "learning_rate": 3.357257780972392e-05, - "loss": 0.3298, - "step": 5352000 - }, - { - "epoch": 3.21, - "learning_rate": 3.357047784416335e-05, - "loss": 0.3371, - "step": 5352500 - }, - { - "epoch": 3.21, - "learning_rate": 3.3568377878602786e-05, - "loss": 0.3334, - "step": 5353000 - }, - { - "epoch": 3.21, - "learning_rate": 3.3566277913042226e-05, - "loss": 0.3325, - "step": 5353500 - }, - { - "epoch": 3.21, - "learning_rate": 3.356417794748166e-05, - "loss": 0.3301, - "step": 5354000 - }, - { - "epoch": 3.21, - "learning_rate": 3.356208218185221e-05, - "loss": 0.3296, - "step": 5354500 - }, - { - "epoch": 3.21, - "learning_rate": 3.355998221629165e-05, - "loss": 0.3348, - "step": 5355000 - }, - { - "epoch": 3.21, - "learning_rate": 3.355788225073109e-05, - "loss": 0.3352, - "step": 5355500 - }, - { - "epoch": 3.21, - "learning_rate": 3.355578228517052e-05, - "loss": 0.3368, - "step": 5356000 - }, - { - "epoch": 3.21, - "learning_rate": 3.355368231960996e-05, - "loss": 0.3301, - "step": 5356500 - }, - { - "epoch": 3.21, - "learning_rate": 3.3551582354049394e-05, - "loss": 0.3291, - "step": 5357000 - }, - { - "epoch": 3.21, - "learning_rate": 3.354948658841995e-05, - "loss": 0.3359, - "step": 5357500 - }, - { - "epoch": 3.21, - "learning_rate": 3.354738662285938e-05, - "loss": 0.3297, - "step": 5358000 - }, - { - "epoch": 3.21, - "learning_rate": 3.354528665729882e-05, - "loss": 0.3269, - "step": 5358500 - }, - { - "epoch": 3.21, - "learning_rate": 3.3543186691738255e-05, - "loss": 0.3233, - "step": 5359000 - }, - { - "epoch": 3.21, - "learning_rate": 3.354108672617769e-05, - "loss": 0.3318, - "step": 5359500 - }, - { - "epoch": 3.21, - "learning_rate": 3.353899096054824e-05, - "loss": 0.3294, - "step": 5360000 - }, - { - "epoch": 3.21, - "learning_rate": 3.353689099498768e-05, - "loss": 0.3308, - "step": 5360500 - }, - { - "epoch": 3.21, - "learning_rate": 3.3534791029427115e-05, - "loss": 0.3305, - "step": 5361000 - }, - { - "epoch": 3.21, - "learning_rate": 3.353269106386655e-05, - "loss": 0.3331, - "step": 5361500 - }, - { - "epoch": 3.21, - "learning_rate": 3.353059109830599e-05, - "loss": 0.3263, - "step": 5362000 - }, - { - "epoch": 3.22, - "learning_rate": 3.352849113274542e-05, - "loss": 0.3291, - "step": 5362500 - }, - { - "epoch": 3.22, - "learning_rate": 3.352639116718486e-05, - "loss": 0.3283, - "step": 5363000 - }, - { - "epoch": 3.22, - "learning_rate": 3.3524291201624296e-05, - "loss": 0.3364, - "step": 5363500 - }, - { - "epoch": 3.22, - "learning_rate": 3.352219543599485e-05, - "loss": 0.3343, - "step": 5364000 - }, - { - "epoch": 3.22, - "learning_rate": 3.352009547043428e-05, - "loss": 0.3454, - "step": 5364500 - }, - { - "epoch": 3.22, - "learning_rate": 3.3517995504873724e-05, - "loss": 0.3351, - "step": 5365000 - }, - { - "epoch": 3.22, - "learning_rate": 3.351589553931316e-05, - "loss": 0.3373, - "step": 5365500 - }, - { - "epoch": 3.22, - "learning_rate": 3.351379977368371e-05, - "loss": 0.3321, - "step": 5366000 - }, - { - "epoch": 3.22, - "learning_rate": 3.3511699808123144e-05, - "loss": 0.3309, - "step": 5366500 - }, - { - "epoch": 3.22, - "learning_rate": 3.3509599842562584e-05, - "loss": 0.3256, - "step": 5367000 - }, - { - "epoch": 3.22, - "learning_rate": 3.350749987700202e-05, - "loss": 0.3307, - "step": 5367500 - }, - { - "epoch": 3.22, - "learning_rate": 3.35054083113037e-05, - "loss": 0.32, - "step": 5368000 - }, - { - "epoch": 3.22, - "learning_rate": 3.3503308345743125e-05, - "loss": 0.3467, - "step": 5368500 - }, - { - "epoch": 3.22, - "learning_rate": 3.3501208380182565e-05, - "loss": 0.3279, - "step": 5369000 - }, - { - "epoch": 3.22, - "learning_rate": 3.3499108414622e-05, - "loss": 0.3271, - "step": 5369500 - }, - { - "epoch": 3.22, - "learning_rate": 3.349700844906143e-05, - "loss": 0.3334, - "step": 5370000 - }, - { - "epoch": 3.22, - "learning_rate": 3.349490848350087e-05, - "loss": 0.3407, - "step": 5370500 - }, - { - "epoch": 3.22, - "learning_rate": 3.3492812717871426e-05, - "loss": 0.3207, - "step": 5371000 - }, - { - "epoch": 3.22, - "learning_rate": 3.349071275231086e-05, - "loss": 0.3358, - "step": 5371500 - }, - { - "epoch": 3.22, - "learning_rate": 3.348861278675029e-05, - "loss": 0.3373, - "step": 5372000 - }, - { - "epoch": 3.22, - "learning_rate": 3.348651282118973e-05, - "loss": 0.3364, - "step": 5372500 - }, - { - "epoch": 3.22, - "learning_rate": 3.3484412855629166e-05, - "loss": 0.33, - "step": 5373000 - }, - { - "epoch": 3.22, - "learning_rate": 3.348231708999972e-05, - "loss": 0.335, - "step": 5373500 - }, - { - "epoch": 3.22, - "learning_rate": 3.3480217124439153e-05, - "loss": 0.3181, - "step": 5374000 - }, - { - "epoch": 3.22, - "learning_rate": 3.3478117158878594e-05, - "loss": 0.3365, - "step": 5374500 - }, - { - "epoch": 3.22, - "learning_rate": 3.347601719331803e-05, - "loss": 0.3245, - "step": 5375000 - }, - { - "epoch": 3.22, - "learning_rate": 3.347391722775747e-05, - "loss": 0.3332, - "step": 5375500 - }, - { - "epoch": 3.22, - "learning_rate": 3.347182146212802e-05, - "loss": 0.3301, - "step": 5376000 - }, - { - "epoch": 3.22, - "learning_rate": 3.3469721496567454e-05, - "loss": 0.3252, - "step": 5376500 - }, - { - "epoch": 3.22, - "learning_rate": 3.346762153100689e-05, - "loss": 0.3327, - "step": 5377000 - }, - { - "epoch": 3.22, - "learning_rate": 3.346552156544633e-05, - "loss": 0.3397, - "step": 5377500 - }, - { - "epoch": 3.22, - "learning_rate": 3.346342159988576e-05, - "loss": 0.3341, - "step": 5378000 - }, - { - "epoch": 3.22, - "learning_rate": 3.3461325834256315e-05, - "loss": 0.3224, - "step": 5378500 - }, - { - "epoch": 3.22, - "learning_rate": 3.345922586869575e-05, - "loss": 0.334, - "step": 5379000 - }, - { - "epoch": 3.23, - "learning_rate": 3.345712590313519e-05, - "loss": 0.3196, - "step": 5379500 - }, - { - "epoch": 3.23, - "learning_rate": 3.345502593757462e-05, - "loss": 0.3314, - "step": 5380000 - }, - { - "epoch": 3.23, - "learning_rate": 3.3452925972014056e-05, - "loss": 0.3372, - "step": 5380500 - }, - { - "epoch": 3.23, - "learning_rate": 3.3450826006453496e-05, - "loss": 0.3265, - "step": 5381000 - }, - { - "epoch": 3.23, - "learning_rate": 3.344872604089293e-05, - "loss": 0.3282, - "step": 5381500 - }, - { - "epoch": 3.23, - "learning_rate": 3.344662607533236e-05, - "loss": 0.3345, - "step": 5382000 - }, - { - "epoch": 3.23, - "learning_rate": 3.344453030970292e-05, - "loss": 0.3298, - "step": 5382500 - }, - { - "epoch": 3.23, - "learning_rate": 3.344243034414236e-05, - "loss": 0.3254, - "step": 5383000 - }, - { - "epoch": 3.23, - "learning_rate": 3.344033457851291e-05, - "loss": 0.3327, - "step": 5383500 - }, - { - "epoch": 3.23, - "learning_rate": 3.3438234612952344e-05, - "loss": 0.3359, - "step": 5384000 - }, - { - "epoch": 3.23, - "learning_rate": 3.3436134647391784e-05, - "loss": 0.3337, - "step": 5384500 - }, - { - "epoch": 3.23, - "learning_rate": 3.343403468183122e-05, - "loss": 0.3269, - "step": 5385000 - }, - { - "epoch": 3.23, - "learning_rate": 3.343193471627065e-05, - "loss": 0.3333, - "step": 5385500 - }, - { - "epoch": 3.23, - "learning_rate": 3.3429838950641205e-05, - "loss": 0.3336, - "step": 5386000 - }, - { - "epoch": 3.23, - "learning_rate": 3.3427738985080645e-05, - "loss": 0.3239, - "step": 5386500 - }, - { - "epoch": 3.23, - "learning_rate": 3.342563901952008e-05, - "loss": 0.3326, - "step": 5387000 - }, - { - "epoch": 3.23, - "learning_rate": 3.342353905395951e-05, - "loss": 0.3465, - "step": 5387500 - }, - { - "epoch": 3.23, - "learning_rate": 3.342143908839895e-05, - "loss": 0.3255, - "step": 5388000 - }, - { - "epoch": 3.23, - "learning_rate": 3.3419343322769505e-05, - "loss": 0.3323, - "step": 5388500 - }, - { - "epoch": 3.23, - "learning_rate": 3.341724335720894e-05, - "loss": 0.3359, - "step": 5389000 - }, - { - "epoch": 3.23, - "learning_rate": 3.341514339164838e-05, - "loss": 0.3255, - "step": 5389500 - }, - { - "epoch": 3.23, - "learning_rate": 3.341304342608781e-05, - "loss": 0.3367, - "step": 5390000 - }, - { - "epoch": 3.23, - "learning_rate": 3.3410943460527246e-05, - "loss": 0.3263, - "step": 5390500 - }, - { - "epoch": 3.23, - "learning_rate": 3.3408843494966686e-05, - "loss": 0.3353, - "step": 5391000 - }, - { - "epoch": 3.23, - "learning_rate": 3.340674352940612e-05, - "loss": 0.3366, - "step": 5391500 - }, - { - "epoch": 3.23, - "learning_rate": 3.340464776377667e-05, - "loss": 0.3292, - "step": 5392000 - }, - { - "epoch": 3.23, - "learning_rate": 3.340254779821611e-05, - "loss": 0.3346, - "step": 5392500 - }, - { - "epoch": 3.23, - "learning_rate": 3.340044783265555e-05, - "loss": 0.3291, - "step": 5393000 - }, - { - "epoch": 3.23, - "learning_rate": 3.339834786709498e-05, - "loss": 0.3333, - "step": 5393500 - }, - { - "epoch": 3.23, - "learning_rate": 3.3396247901534414e-05, - "loss": 0.3406, - "step": 5394000 - }, - { - "epoch": 3.23, - "learning_rate": 3.339415213590497e-05, - "loss": 0.3308, - "step": 5394500 - }, - { - "epoch": 3.23, - "learning_rate": 3.339205217034441e-05, - "loss": 0.3247, - "step": 5395000 - }, - { - "epoch": 3.23, - "learning_rate": 3.338995220478384e-05, - "loss": 0.3274, - "step": 5395500 - }, - { - "epoch": 3.24, - "learning_rate": 3.3387852239223275e-05, - "loss": 0.3332, - "step": 5396000 - }, - { - "epoch": 3.24, - "learning_rate": 3.3385752273662715e-05, - "loss": 0.3318, - "step": 5396500 - }, - { - "epoch": 3.24, - "learning_rate": 3.338365230810215e-05, - "loss": 0.3252, - "step": 5397000 - }, - { - "epoch": 3.24, - "learning_rate": 3.338155234254158e-05, - "loss": 0.3345, - "step": 5397500 - }, - { - "epoch": 3.24, - "learning_rate": 3.3379452376981015e-05, - "loss": 0.3324, - "step": 5398000 - }, - { - "epoch": 3.24, - "learning_rate": 3.3377356611351576e-05, - "loss": 0.3354, - "step": 5398500 - }, - { - "epoch": 3.24, - "learning_rate": 3.337526084572213e-05, - "loss": 0.3307, - "step": 5399000 - }, - { - "epoch": 3.24, - "learning_rate": 3.337316088016156e-05, - "loss": 0.3386, - "step": 5399500 - }, - { - "epoch": 3.24, - "learning_rate": 3.3371060914601e-05, - "loss": 0.3299, - "step": 5400000 - }, - { - "epoch": 3.24, - "eval_loss": 0.32459577918052673, - "eval_runtime": 1114.6554, - "eval_samples_per_second": 472.541, - "eval_steps_per_second": 78.757, - "step": 5400000 - }, - { - "epoch": 3.24, - "learning_rate": 3.3368960949040436e-05, - "loss": 0.3301, - "step": 5400500 - }, - { - "epoch": 3.24, - "learning_rate": 3.336686098347987e-05, - "loss": 0.3289, - "step": 5401000 - }, - { - "epoch": 3.24, - "learning_rate": 3.336476101791931e-05, - "loss": 0.3331, - "step": 5401500 - }, - { - "epoch": 3.24, - "learning_rate": 3.3362661052358744e-05, - "loss": 0.3413, - "step": 5402000 - }, - { - "epoch": 3.24, - "learning_rate": 3.336056108679817e-05, - "loss": 0.3335, - "step": 5402500 - }, - { - "epoch": 3.24, - "learning_rate": 3.335846532116873e-05, - "loss": 0.3361, - "step": 5403000 - }, - { - "epoch": 3.24, - "learning_rate": 3.335636535560817e-05, - "loss": 0.3364, - "step": 5403500 - }, - { - "epoch": 3.24, - "learning_rate": 3.3354265390047604e-05, - "loss": 0.3306, - "step": 5404000 - }, - { - "epoch": 3.24, - "learning_rate": 3.335216542448704e-05, - "loss": 0.3376, - "step": 5404500 - }, - { - "epoch": 3.24, - "learning_rate": 3.335006545892647e-05, - "loss": 0.3378, - "step": 5405000 - }, - { - "epoch": 3.24, - "learning_rate": 3.3347965493365905e-05, - "loss": 0.3262, - "step": 5405500 - }, - { - "epoch": 3.24, - "learning_rate": 3.3345865527805345e-05, - "loss": 0.3259, - "step": 5406000 - }, - { - "epoch": 3.24, - "learning_rate": 3.334376556224478e-05, - "loss": 0.3379, - "step": 5406500 - }, - { - "epoch": 3.24, - "learning_rate": 3.334167399654646e-05, - "loss": 0.3302, - "step": 5407000 - }, - { - "epoch": 3.24, - "learning_rate": 3.333957403098589e-05, - "loss": 0.3287, - "step": 5407500 - }, - { - "epoch": 3.24, - "learning_rate": 3.3337474065425326e-05, - "loss": 0.3277, - "step": 5408000 - }, - { - "epoch": 3.24, - "learning_rate": 3.3335374099864766e-05, - "loss": 0.3225, - "step": 5408500 - }, - { - "epoch": 3.24, - "learning_rate": 3.33332741343042e-05, - "loss": 0.3294, - "step": 5409000 - }, - { - "epoch": 3.24, - "learning_rate": 3.333118256860587e-05, - "loss": 0.3317, - "step": 5409500 - }, - { - "epoch": 3.24, - "learning_rate": 3.3329091002907554e-05, - "loss": 0.3266, - "step": 5410000 - }, - { - "epoch": 3.24, - "learning_rate": 3.332699103734699e-05, - "loss": 0.3337, - "step": 5410500 - }, - { - "epoch": 3.24, - "learning_rate": 3.332489107178642e-05, - "loss": 0.3325, - "step": 5411000 - }, - { - "epoch": 3.24, - "learning_rate": 3.332279110622586e-05, - "loss": 0.3286, - "step": 5411500 - }, - { - "epoch": 3.24, - "learning_rate": 3.3320691140665294e-05, - "loss": 0.3303, - "step": 5412000 - }, - { - "epoch": 3.25, - "learning_rate": 3.331859117510473e-05, - "loss": 0.3283, - "step": 5412500 - }, - { - "epoch": 3.25, - "learning_rate": 3.331649120954417e-05, - "loss": 0.3266, - "step": 5413000 - }, - { - "epoch": 3.25, - "learning_rate": 3.3314391243983595e-05, - "loss": 0.3201, - "step": 5413500 - }, - { - "epoch": 3.25, - "learning_rate": 3.331229127842303e-05, - "loss": 0.3424, - "step": 5414000 - }, - { - "epoch": 3.25, - "learning_rate": 3.331019131286247e-05, - "loss": 0.3308, - "step": 5414500 - }, - { - "epoch": 3.25, - "learning_rate": 3.33080913473019e-05, - "loss": 0.3263, - "step": 5415000 - }, - { - "epoch": 3.25, - "learning_rate": 3.3305991381741335e-05, - "loss": 0.3429, - "step": 5415500 - }, - { - "epoch": 3.25, - "learning_rate": 3.3303891416180775e-05, - "loss": 0.3258, - "step": 5416000 - }, - { - "epoch": 3.25, - "learning_rate": 3.330179145062021e-05, - "loss": 0.337, - "step": 5416500 - }, - { - "epoch": 3.25, - "learning_rate": 3.329969568499076e-05, - "loss": 0.3346, - "step": 5417000 - }, - { - "epoch": 3.25, - "learning_rate": 3.32975957194302e-05, - "loss": 0.3366, - "step": 5417500 - }, - { - "epoch": 3.25, - "learning_rate": 3.3295495753869636e-05, - "loss": 0.3324, - "step": 5418000 - }, - { - "epoch": 3.25, - "learning_rate": 3.329339578830907e-05, - "loss": 0.335, - "step": 5418500 - }, - { - "epoch": 3.25, - "learning_rate": 3.329129582274851e-05, - "loss": 0.3285, - "step": 5419000 - }, - { - "epoch": 3.25, - "learning_rate": 3.328919585718794e-05, - "loss": 0.3353, - "step": 5419500 - }, - { - "epoch": 3.25, - "learning_rate": 3.328709589162738e-05, - "loss": 0.3339, - "step": 5420000 - }, - { - "epoch": 3.25, - "learning_rate": 3.328499592606682e-05, - "loss": 0.3187, - "step": 5420500 - }, - { - "epoch": 3.25, - "learning_rate": 3.328289596050625e-05, - "loss": 0.3235, - "step": 5421000 - }, - { - "epoch": 3.25, - "learning_rate": 3.328079599494568e-05, - "loss": 0.3289, - "step": 5421500 - }, - { - "epoch": 3.25, - "learning_rate": 3.327869602938512e-05, - "loss": 0.3313, - "step": 5422000 - }, - { - "epoch": 3.25, - "learning_rate": 3.327659606382455e-05, - "loss": 0.3273, - "step": 5422500 - }, - { - "epoch": 3.25, - "learning_rate": 3.327450449812623e-05, - "loss": 0.3303, - "step": 5423000 - }, - { - "epoch": 3.25, - "learning_rate": 3.3272404532565665e-05, - "loss": 0.3315, - "step": 5423500 - }, - { - "epoch": 3.25, - "learning_rate": 3.32703045670051e-05, - "loss": 0.3303, - "step": 5424000 - }, - { - "epoch": 3.25, - "learning_rate": 3.326820460144454e-05, - "loss": 0.3305, - "step": 5424500 - }, - { - "epoch": 3.25, - "learning_rate": 3.326610883581509e-05, - "loss": 0.3301, - "step": 5425000 - }, - { - "epoch": 3.25, - "learning_rate": 3.3264008870254525e-05, - "loss": 0.3229, - "step": 5425500 - }, - { - "epoch": 3.25, - "learning_rate": 3.3261908904693966e-05, - "loss": 0.3228, - "step": 5426000 - }, - { - "epoch": 3.25, - "learning_rate": 3.32598089391334e-05, - "loss": 0.3247, - "step": 5426500 - }, - { - "epoch": 3.25, - "learning_rate": 3.325770897357283e-05, - "loss": 0.3348, - "step": 5427000 - }, - { - "epoch": 3.25, - "learning_rate": 3.3255613207943386e-05, - "loss": 0.3212, - "step": 5427500 - }, - { - "epoch": 3.25, - "learning_rate": 3.3253513242382826e-05, - "loss": 0.3328, - "step": 5428000 - }, - { - "epoch": 3.25, - "learning_rate": 3.325141327682226e-05, - "loss": 0.332, - "step": 5428500 - }, - { - "epoch": 3.25, - "learning_rate": 3.324931331126169e-05, - "loss": 0.3367, - "step": 5429000 - }, - { - "epoch": 3.26, - "learning_rate": 3.3247213345701134e-05, - "loss": 0.3411, - "step": 5429500 - }, - { - "epoch": 3.26, - "learning_rate": 3.324511338014057e-05, - "loss": 0.3235, - "step": 5430000 - }, - { - "epoch": 3.26, - "learning_rate": 3.324301341458e-05, - "loss": 0.3328, - "step": 5430500 - }, - { - "epoch": 3.26, - "learning_rate": 3.3240917648950554e-05, - "loss": 0.3353, - "step": 5431000 - }, - { - "epoch": 3.26, - "learning_rate": 3.3238821883321114e-05, - "loss": 0.3201, - "step": 5431500 - }, - { - "epoch": 3.26, - "learning_rate": 3.323672191776055e-05, - "loss": 0.3407, - "step": 5432000 - }, - { - "epoch": 3.26, - "learning_rate": 3.323462195219998e-05, - "loss": 0.3331, - "step": 5432500 - }, - { - "epoch": 3.26, - "learning_rate": 3.323252198663942e-05, - "loss": 0.3334, - "step": 5433000 - }, - { - "epoch": 3.26, - "learning_rate": 3.3230422021078855e-05, - "loss": 0.3443, - "step": 5433500 - }, - { - "epoch": 3.26, - "learning_rate": 3.322832205551829e-05, - "loss": 0.3244, - "step": 5434000 - }, - { - "epoch": 3.26, - "learning_rate": 3.322622208995773e-05, - "loss": 0.3389, - "step": 5434500 - }, - { - "epoch": 3.26, - "learning_rate": 3.322412212439716e-05, - "loss": 0.3308, - "step": 5435000 - }, - { - "epoch": 3.26, - "learning_rate": 3.3222022158836596e-05, - "loss": 0.3289, - "step": 5435500 - }, - { - "epoch": 3.26, - "learning_rate": 3.321992639320715e-05, - "loss": 0.3398, - "step": 5436000 - }, - { - "epoch": 3.26, - "learning_rate": 3.321782642764659e-05, - "loss": 0.3292, - "step": 5436500 - }, - { - "epoch": 3.26, - "learning_rate": 3.321572646208602e-05, - "loss": 0.3363, - "step": 5437000 - }, - { - "epoch": 3.26, - "learning_rate": 3.3213626496525456e-05, - "loss": 0.3244, - "step": 5437500 - }, - { - "epoch": 3.26, - "learning_rate": 3.32115265309649e-05, - "loss": 0.3284, - "step": 5438000 - }, - { - "epoch": 3.26, - "learning_rate": 3.320942656540432e-05, - "loss": 0.3333, - "step": 5438500 - }, - { - "epoch": 3.26, - "learning_rate": 3.3207330799774884e-05, - "loss": 0.3381, - "step": 5439000 - }, - { - "epoch": 3.26, - "learning_rate": 3.320523503414544e-05, - "loss": 0.3274, - "step": 5439500 - }, - { - "epoch": 3.26, - "learning_rate": 3.320313506858488e-05, - "loss": 0.3345, - "step": 5440000 - }, - { - "epoch": 3.26, - "learning_rate": 3.320103510302431e-05, - "loss": 0.3269, - "step": 5440500 - }, - { - "epoch": 3.26, - "learning_rate": 3.3198935137463744e-05, - "loss": 0.332, - "step": 5441000 - }, - { - "epoch": 3.26, - "learning_rate": 3.3196835171903185e-05, - "loss": 0.322, - "step": 5441500 - }, - { - "epoch": 3.26, - "learning_rate": 3.319473520634262e-05, - "loss": 0.3359, - "step": 5442000 - }, - { - "epoch": 3.26, - "learning_rate": 3.319263524078205e-05, - "loss": 0.3329, - "step": 5442500 - }, - { - "epoch": 3.26, - "learning_rate": 3.3190535275221485e-05, - "loss": 0.3341, - "step": 5443000 - }, - { - "epoch": 3.26, - "learning_rate": 3.318843530966092e-05, - "loss": 0.3373, - "step": 5443500 - }, - { - "epoch": 3.26, - "learning_rate": 3.318633534410035e-05, - "loss": 0.331, - "step": 5444000 - }, - { - "epoch": 3.26, - "learning_rate": 3.318423537853979e-05, - "loss": 0.3273, - "step": 5444500 - }, - { - "epoch": 3.26, - "learning_rate": 3.3182135412979226e-05, - "loss": 0.3239, - "step": 5445000 - }, - { - "epoch": 3.26, - "learning_rate": 3.318003964734978e-05, - "loss": 0.3296, - "step": 5445500 - }, - { - "epoch": 3.27, - "learning_rate": 3.317794388172034e-05, - "loss": 0.3457, - "step": 5446000 - }, - { - "epoch": 3.27, - "learning_rate": 3.317584391615978e-05, - "loss": 0.3303, - "step": 5446500 - }, - { - "epoch": 3.27, - "learning_rate": 3.317374395059921e-05, - "loss": 0.3238, - "step": 5447000 - }, - { - "epoch": 3.27, - "learning_rate": 3.317164398503865e-05, - "loss": 0.3352, - "step": 5447500 - }, - { - "epoch": 3.27, - "learning_rate": 3.316954401947808e-05, - "loss": 0.3223, - "step": 5448000 - }, - { - "epoch": 3.27, - "learning_rate": 3.3167444053917514e-05, - "loss": 0.3277, - "step": 5448500 - }, - { - "epoch": 3.27, - "learning_rate": 3.3165348288288074e-05, - "loss": 0.3406, - "step": 5449000 - }, - { - "epoch": 3.27, - "learning_rate": 3.316324832272751e-05, - "loss": 0.3253, - "step": 5449500 - }, - { - "epoch": 3.27, - "learning_rate": 3.316114835716694e-05, - "loss": 0.3325, - "step": 5450000 - }, - { - "epoch": 3.27, - "learning_rate": 3.3159048391606374e-05, - "loss": 0.336, - "step": 5450500 - }, - { - "epoch": 3.27, - "learning_rate": 3.315694842604581e-05, - "loss": 0.3299, - "step": 5451000 - }, - { - "epoch": 3.27, - "learning_rate": 3.315484846048525e-05, - "loss": 0.3349, - "step": 5451500 - }, - { - "epoch": 3.27, - "learning_rate": 3.315275269485581e-05, - "loss": 0.3373, - "step": 5452000 - }, - { - "epoch": 3.27, - "learning_rate": 3.3150652729295235e-05, - "loss": 0.3272, - "step": 5452500 - }, - { - "epoch": 3.27, - "learning_rate": 3.3148552763734675e-05, - "loss": 0.3311, - "step": 5453000 - }, - { - "epoch": 3.27, - "learning_rate": 3.314645279817411e-05, - "loss": 0.3255, - "step": 5453500 - }, - { - "epoch": 3.27, - "learning_rate": 3.314435283261354e-05, - "loss": 0.3305, - "step": 5454000 - }, - { - "epoch": 3.27, - "learning_rate": 3.314225286705298e-05, - "loss": 0.3367, - "step": 5454500 - }, - { - "epoch": 3.27, - "learning_rate": 3.3140152901492416e-05, - "loss": 0.3361, - "step": 5455000 - }, - { - "epoch": 3.27, - "learning_rate": 3.313805293593185e-05, - "loss": 0.3357, - "step": 5455500 - }, - { - "epoch": 3.27, - "learning_rate": 3.31359571703024e-05, - "loss": 0.3314, - "step": 5456000 - }, - { - "epoch": 3.27, - "learning_rate": 3.313385720474184e-05, - "loss": 0.3232, - "step": 5456500 - }, - { - "epoch": 3.27, - "learning_rate": 3.313175723918128e-05, - "loss": 0.3308, - "step": 5457000 - }, - { - "epoch": 3.27, - "learning_rate": 3.312965727362071e-05, - "loss": 0.3274, - "step": 5457500 - }, - { - "epoch": 3.27, - "learning_rate": 3.312755730806015e-05, - "loss": 0.3402, - "step": 5458000 - }, - { - "epoch": 3.27, - "learning_rate": 3.3125457342499584e-05, - "loss": 0.3352, - "step": 5458500 - }, - { - "epoch": 3.27, - "learning_rate": 3.312335737693902e-05, - "loss": 0.3397, - "step": 5459000 - }, - { - "epoch": 3.27, - "learning_rate": 3.312126161130957e-05, - "loss": 0.3327, - "step": 5459500 - }, - { - "epoch": 3.27, - "learning_rate": 3.311916164574901e-05, - "loss": 0.3262, - "step": 5460000 - }, - { - "epoch": 3.27, - "learning_rate": 3.3117061680188445e-05, - "loss": 0.3297, - "step": 5460500 - }, - { - "epoch": 3.27, - "learning_rate": 3.311496171462788e-05, - "loss": 0.3322, - "step": 5461000 - }, - { - "epoch": 3.27, - "learning_rate": 3.311286174906732e-05, - "loss": 0.335, - "step": 5461500 - }, - { - "epoch": 3.27, - "learning_rate": 3.311076178350675e-05, - "loss": 0.3365, - "step": 5462000 - }, - { - "epoch": 3.27, - "learning_rate": 3.3108661817946185e-05, - "loss": 0.3239, - "step": 5462500 - }, - { - "epoch": 3.28, - "learning_rate": 3.3106566052316745e-05, - "loss": 0.3297, - "step": 5463000 - }, - { - "epoch": 3.28, - "learning_rate": 3.310446608675618e-05, - "loss": 0.3298, - "step": 5463500 - }, - { - "epoch": 3.28, - "learning_rate": 3.310236612119561e-05, - "loss": 0.3335, - "step": 5464000 - }, - { - "epoch": 3.28, - "learning_rate": 3.310026615563505e-05, - "loss": 0.3271, - "step": 5464500 - }, - { - "epoch": 3.28, - "learning_rate": 3.309816619007448e-05, - "loss": 0.3253, - "step": 5465000 - }, - { - "epoch": 3.28, - "learning_rate": 3.309607042444504e-05, - "loss": 0.3266, - "step": 5465500 - }, - { - "epoch": 3.28, - "learning_rate": 3.309397045888447e-05, - "loss": 0.3261, - "step": 5466000 - }, - { - "epoch": 3.28, - "learning_rate": 3.309187049332391e-05, - "loss": 0.3345, - "step": 5466500 - }, - { - "epoch": 3.28, - "learning_rate": 3.308977052776335e-05, - "loss": 0.3304, - "step": 5467000 - }, - { - "epoch": 3.28, - "learning_rate": 3.3087670562202773e-05, - "loss": 0.3242, - "step": 5467500 - }, - { - "epoch": 3.28, - "learning_rate": 3.3085570596642214e-05, - "loss": 0.3382, - "step": 5468000 - }, - { - "epoch": 3.28, - "learning_rate": 3.308347063108165e-05, - "loss": 0.3355, - "step": 5468500 - }, - { - "epoch": 3.28, - "learning_rate": 3.308137066552108e-05, - "loss": 0.3251, - "step": 5469000 - }, - { - "epoch": 3.28, - "learning_rate": 3.307927489989164e-05, - "loss": 0.3242, - "step": 5469500 - }, - { - "epoch": 3.28, - "learning_rate": 3.3077174934331074e-05, - "loss": 0.3339, - "step": 5470000 - }, - { - "epoch": 3.28, - "learning_rate": 3.3075079168701635e-05, - "loss": 0.3265, - "step": 5470500 - }, - { - "epoch": 3.28, - "learning_rate": 3.307297920314107e-05, - "loss": 0.3304, - "step": 5471000 - }, - { - "epoch": 3.28, - "learning_rate": 3.307087923758051e-05, - "loss": 0.3267, - "step": 5471500 - }, - { - "epoch": 3.28, - "learning_rate": 3.306877927201994e-05, - "loss": 0.3199, - "step": 5472000 - }, - { - "epoch": 3.28, - "learning_rate": 3.306667930645937e-05, - "loss": 0.3325, - "step": 5472500 - }, - { - "epoch": 3.28, - "learning_rate": 3.306458354082993e-05, - "loss": 0.3286, - "step": 5473000 - }, - { - "epoch": 3.28, - "learning_rate": 3.306248357526937e-05, - "loss": 0.3231, - "step": 5473500 - }, - { - "epoch": 3.28, - "learning_rate": 3.30603836097088e-05, - "loss": 0.3307, - "step": 5474000 - }, - { - "epoch": 3.28, - "learning_rate": 3.3058283644148236e-05, - "loss": 0.3367, - "step": 5474500 - }, - { - "epoch": 3.28, - "learning_rate": 3.305618367858767e-05, - "loss": 0.3329, - "step": 5475000 - }, - { - "epoch": 3.28, - "learning_rate": 3.305408791295823e-05, - "loss": 0.335, - "step": 5475500 - }, - { - "epoch": 3.28, - "learning_rate": 3.3051987947397663e-05, - "loss": 0.3337, - "step": 5476000 - }, - { - "epoch": 3.28, - "learning_rate": 3.30498879818371e-05, - "loss": 0.3297, - "step": 5476500 - }, - { - "epoch": 3.28, - "learning_rate": 3.304778801627653e-05, - "loss": 0.3333, - "step": 5477000 - }, - { - "epoch": 3.28, - "learning_rate": 3.3045688050715964e-05, - "loss": 0.3238, - "step": 5477500 - }, - { - "epoch": 3.28, - "learning_rate": 3.3043592285086524e-05, - "loss": 0.3352, - "step": 5478000 - }, - { - "epoch": 3.28, - "learning_rate": 3.3041492319525964e-05, - "loss": 0.3218, - "step": 5478500 - }, - { - "epoch": 3.28, - "learning_rate": 3.30393923539654e-05, - "loss": 0.3238, - "step": 5479000 - }, - { - "epoch": 3.29, - "learning_rate": 3.3037292388404825e-05, - "loss": 0.3335, - "step": 5479500 - }, - { - "epoch": 3.29, - "learning_rate": 3.3035192422844265e-05, - "loss": 0.3225, - "step": 5480000 - }, - { - "epoch": 3.29, - "learning_rate": 3.30330924572837e-05, - "loss": 0.3268, - "step": 5480500 - }, - { - "epoch": 3.29, - "learning_rate": 3.303099669165426e-05, - "loss": 0.3223, - "step": 5481000 - }, - { - "epoch": 3.29, - "learning_rate": 3.302890092602481e-05, - "loss": 0.3283, - "step": 5481500 - }, - { - "epoch": 3.29, - "learning_rate": 3.3026800960464246e-05, - "loss": 0.3302, - "step": 5482000 - }, - { - "epoch": 3.29, - "learning_rate": 3.3024700994903686e-05, - "loss": 0.329, - "step": 5482500 - }, - { - "epoch": 3.29, - "learning_rate": 3.302260102934312e-05, - "loss": 0.3349, - "step": 5483000 - }, - { - "epoch": 3.29, - "learning_rate": 3.302050106378255e-05, - "loss": 0.3319, - "step": 5483500 - }, - { - "epoch": 3.29, - "learning_rate": 3.301840109822199e-05, - "loss": 0.3308, - "step": 5484000 - }, - { - "epoch": 3.29, - "learning_rate": 3.301630113266142e-05, - "loss": 0.3275, - "step": 5484500 - }, - { - "epoch": 3.29, - "learning_rate": 3.301420116710086e-05, - "loss": 0.3346, - "step": 5485000 - }, - { - "epoch": 3.29, - "learning_rate": 3.301210120154029e-05, - "loss": 0.3323, - "step": 5485500 - }, - { - "epoch": 3.29, - "learning_rate": 3.3010005435910854e-05, - "loss": 0.3325, - "step": 5486000 - }, - { - "epoch": 3.29, - "learning_rate": 3.300790547035028e-05, - "loss": 0.3232, - "step": 5486500 - }, - { - "epoch": 3.29, - "learning_rate": 3.300580550478972e-05, - "loss": 0.3218, - "step": 5487000 - }, - { - "epoch": 3.29, - "learning_rate": 3.3003705539229154e-05, - "loss": 0.3357, - "step": 5487500 - }, - { - "epoch": 3.29, - "learning_rate": 3.300160557366859e-05, - "loss": 0.3492, - "step": 5488000 - }, - { - "epoch": 3.29, - "learning_rate": 3.299950560810803e-05, - "loss": 0.3303, - "step": 5488500 - }, - { - "epoch": 3.29, - "learning_rate": 3.299740564254746e-05, - "loss": 0.3372, - "step": 5489000 - }, - { - "epoch": 3.29, - "learning_rate": 3.2995305676986895e-05, - "loss": 0.3275, - "step": 5489500 - }, - { - "epoch": 3.29, - "learning_rate": 3.299320991135745e-05, - "loss": 0.3252, - "step": 5490000 - }, - { - "epoch": 3.29, - "learning_rate": 3.299110994579689e-05, - "loss": 0.3332, - "step": 5490500 - }, - { - "epoch": 3.29, - "learning_rate": 3.298900998023632e-05, - "loss": 0.3324, - "step": 5491000 - }, - { - "epoch": 3.29, - "learning_rate": 3.2986910014675755e-05, - "loss": 0.3292, - "step": 5491500 - }, - { - "epoch": 3.29, - "learning_rate": 3.2984814249046316e-05, - "loss": 0.329, - "step": 5492000 - }, - { - "epoch": 3.29, - "learning_rate": 3.298271428348575e-05, - "loss": 0.3269, - "step": 5492500 - }, - { - "epoch": 3.29, - "learning_rate": 3.298061851785631e-05, - "loss": 0.3314, - "step": 5493000 - }, - { - "epoch": 3.29, - "learning_rate": 3.297851855229574e-05, - "loss": 0.339, - "step": 5493500 - }, - { - "epoch": 3.29, - "learning_rate": 3.2976418586735177e-05, - "loss": 0.3264, - "step": 5494000 - }, - { - "epoch": 3.29, - "learning_rate": 3.297431862117461e-05, - "loss": 0.3267, - "step": 5494500 - }, - { - "epoch": 3.29, - "learning_rate": 3.2972218655614043e-05, - "loss": 0.3481, - "step": 5495000 - }, - { - "epoch": 3.29, - "learning_rate": 3.2970118690053484e-05, - "loss": 0.3291, - "step": 5495500 - }, - { - "epoch": 3.3, - "learning_rate": 3.296801872449292e-05, - "loss": 0.3313, - "step": 5496000 - }, - { - "epoch": 3.3, - "learning_rate": 3.296591875893235e-05, - "loss": 0.3328, - "step": 5496500 - }, - { - "epoch": 3.3, - "learning_rate": 3.296381879337179e-05, - "loss": 0.3291, - "step": 5497000 - }, - { - "epoch": 3.3, - "learning_rate": 3.2961718827811224e-05, - "loss": 0.3344, - "step": 5497500 - }, - { - "epoch": 3.3, - "learning_rate": 3.295961886225066e-05, - "loss": 0.3278, - "step": 5498000 - }, - { - "epoch": 3.3, - "learning_rate": 3.295752309662121e-05, - "loss": 0.3358, - "step": 5498500 - }, - { - "epoch": 3.3, - "learning_rate": 3.295542313106065e-05, - "loss": 0.3318, - "step": 5499000 - }, - { - "epoch": 3.3, - "learning_rate": 3.2953323165500085e-05, - "loss": 0.3316, - "step": 5499500 - }, - { - "epoch": 3.3, - "learning_rate": 3.2951223199939525e-05, - "loss": 0.3216, - "step": 5500000 - }, - { - "epoch": 3.3, - "eval_loss": 0.3251773416996002, - "eval_runtime": 1118.1228, - "eval_samples_per_second": 471.075, - "eval_steps_per_second": 78.513, - "step": 5500000 - }, - { - "epoch": 3.3, - "learning_rate": 3.294912743431008e-05, - "loss": 0.3249, - "step": 5500500 - }, - { - "epoch": 3.3, - "learning_rate": 3.294702746874951e-05, - "loss": 0.3392, - "step": 5501000 - }, - { - "epoch": 3.3, - "learning_rate": 3.2944927503188946e-05, - "loss": 0.3261, - "step": 5501500 - }, - { - "epoch": 3.3, - "learning_rate": 3.2942827537628386e-05, - "loss": 0.3238, - "step": 5502000 - }, - { - "epoch": 3.3, - "learning_rate": 3.294073177199894e-05, - "loss": 0.3355, - "step": 5502500 - }, - { - "epoch": 3.3, - "learning_rate": 3.293863180643837e-05, - "loss": 0.3302, - "step": 5503000 - }, - { - "epoch": 3.3, - "learning_rate": 3.2936531840877806e-05, - "loss": 0.3293, - "step": 5503500 - }, - { - "epoch": 3.3, - "learning_rate": 3.293443187531725e-05, - "loss": 0.3265, - "step": 5504000 - }, - { - "epoch": 3.3, - "learning_rate": 3.293233190975668e-05, - "loss": 0.3236, - "step": 5504500 - }, - { - "epoch": 3.3, - "learning_rate": 3.2930236144127234e-05, - "loss": 0.3169, - "step": 5505000 - }, - { - "epoch": 3.3, - "learning_rate": 3.292813617856667e-05, - "loss": 0.3369, - "step": 5505500 - }, - { - "epoch": 3.3, - "learning_rate": 3.292603621300611e-05, - "loss": 0.3322, - "step": 5506000 - }, - { - "epoch": 3.3, - "learning_rate": 3.292394044737666e-05, - "loss": 0.3428, - "step": 5506500 - }, - { - "epoch": 3.3, - "learning_rate": 3.2921840481816094e-05, - "loss": 0.3259, - "step": 5507000 - }, - { - "epoch": 3.3, - "learning_rate": 3.2919740516255535e-05, - "loss": 0.3381, - "step": 5507500 - }, - { - "epoch": 3.3, - "learning_rate": 3.291764055069497e-05, - "loss": 0.3184, - "step": 5508000 - }, - { - "epoch": 3.3, - "learning_rate": 3.29155405851344e-05, - "loss": 0.3278, - "step": 5508500 - }, - { - "epoch": 3.3, - "learning_rate": 3.291344061957384e-05, - "loss": 0.3175, - "step": 5509000 - }, - { - "epoch": 3.3, - "learning_rate": 3.2911340654013275e-05, - "loss": 0.3278, - "step": 5509500 - }, - { - "epoch": 3.3, - "learning_rate": 3.290924068845271e-05, - "loss": 0.3289, - "step": 5510000 - }, - { - "epoch": 3.3, - "learning_rate": 3.290714492282326e-05, - "loss": 0.3362, - "step": 5510500 - }, - { - "epoch": 3.3, - "learning_rate": 3.29050449572627e-05, - "loss": 0.3398, - "step": 5511000 - }, - { - "epoch": 3.3, - "learning_rate": 3.2902949191633256e-05, - "loss": 0.3321, - "step": 5511500 - }, - { - "epoch": 3.3, - "learning_rate": 3.290084922607269e-05, - "loss": 0.3325, - "step": 5512000 - }, - { - "epoch": 3.3, - "learning_rate": 3.289874926051213e-05, - "loss": 0.3277, - "step": 5512500 - }, - { - "epoch": 3.31, - "learning_rate": 3.289664929495156e-05, - "loss": 0.3265, - "step": 5513000 - }, - { - "epoch": 3.31, - "learning_rate": 3.2894549329391e-05, - "loss": 0.334, - "step": 5513500 - }, - { - "epoch": 3.31, - "learning_rate": 3.289244936383044e-05, - "loss": 0.3406, - "step": 5514000 - }, - { - "epoch": 3.31, - "learning_rate": 3.289034939826987e-05, - "loss": 0.337, - "step": 5514500 - }, - { - "epoch": 3.31, - "learning_rate": 3.2888249432709304e-05, - "loss": 0.3297, - "step": 5515000 - }, - { - "epoch": 3.31, - "learning_rate": 3.2886149467148744e-05, - "loss": 0.326, - "step": 5515500 - }, - { - "epoch": 3.31, - "learning_rate": 3.28840537015193e-05, - "loss": 0.3235, - "step": 5516000 - }, - { - "epoch": 3.31, - "learning_rate": 3.288195373595873e-05, - "loss": 0.3248, - "step": 5516500 - }, - { - "epoch": 3.31, - "learning_rate": 3.2879853770398165e-05, - "loss": 0.3249, - "step": 5517000 - }, - { - "epoch": 3.31, - "learning_rate": 3.2877753804837605e-05, - "loss": 0.3257, - "step": 5517500 - }, - { - "epoch": 3.31, - "learning_rate": 3.287565383927704e-05, - "loss": 0.3344, - "step": 5518000 - }, - { - "epoch": 3.31, - "learning_rate": 3.287355807364759e-05, - "loss": 0.3288, - "step": 5518500 - }, - { - "epoch": 3.31, - "learning_rate": 3.2871458108087025e-05, - "loss": 0.3333, - "step": 5519000 - }, - { - "epoch": 3.31, - "learning_rate": 3.2869358142526466e-05, - "loss": 0.3305, - "step": 5519500 - }, - { - "epoch": 3.31, - "learning_rate": 3.28672581769659e-05, - "loss": 0.328, - "step": 5520000 - }, - { - "epoch": 3.31, - "learning_rate": 3.286516241133645e-05, - "loss": 0.3355, - "step": 5520500 - }, - { - "epoch": 3.31, - "learning_rate": 3.2863066645707006e-05, - "loss": 0.3281, - "step": 5521000 - }, - { - "epoch": 3.31, - "learning_rate": 3.2860966680146446e-05, - "loss": 0.3283, - "step": 5521500 - }, - { - "epoch": 3.31, - "learning_rate": 3.285886671458588e-05, - "loss": 0.3311, - "step": 5522000 - }, - { - "epoch": 3.31, - "learning_rate": 3.285676674902531e-05, - "loss": 0.3285, - "step": 5522500 - }, - { - "epoch": 3.31, - "learning_rate": 3.2854666783464754e-05, - "loss": 0.3224, - "step": 5523000 - }, - { - "epoch": 3.31, - "learning_rate": 3.285256681790419e-05, - "loss": 0.3236, - "step": 5523500 - }, - { - "epoch": 3.31, - "learning_rate": 3.285046685234362e-05, - "loss": 0.3248, - "step": 5524000 - }, - { - "epoch": 3.31, - "learning_rate": 3.284836688678306e-05, - "loss": 0.342, - "step": 5524500 - }, - { - "epoch": 3.31, - "learning_rate": 3.2846266921222494e-05, - "loss": 0.3374, - "step": 5525000 - }, - { - "epoch": 3.31, - "learning_rate": 3.284417115559305e-05, - "loss": 0.3328, - "step": 5525500 - }, - { - "epoch": 3.31, - "learning_rate": 3.284207119003248e-05, - "loss": 0.3327, - "step": 5526000 - }, - { - "epoch": 3.31, - "learning_rate": 3.283997542440304e-05, - "loss": 0.3268, - "step": 5526500 - }, - { - "epoch": 3.31, - "learning_rate": 3.2837875458842475e-05, - "loss": 0.3345, - "step": 5527000 - }, - { - "epoch": 3.31, - "learning_rate": 3.283577549328191e-05, - "loss": 0.3365, - "step": 5527500 - }, - { - "epoch": 3.31, - "learning_rate": 3.283367552772135e-05, - "loss": 0.3298, - "step": 5528000 - }, - { - "epoch": 3.31, - "learning_rate": 3.283157556216078e-05, - "loss": 0.3351, - "step": 5528500 - }, - { - "epoch": 3.31, - "learning_rate": 3.2829475596600216e-05, - "loss": 0.3323, - "step": 5529000 - }, - { - "epoch": 3.32, - "learning_rate": 3.2827375631039656e-05, - "loss": 0.3242, - "step": 5529500 - }, - { - "epoch": 3.32, - "learning_rate": 3.282527566547909e-05, - "loss": 0.3249, - "step": 5530000 - }, - { - "epoch": 3.32, - "learning_rate": 3.2823175699918516e-05, - "loss": 0.3361, - "step": 5530500 - }, - { - "epoch": 3.32, - "learning_rate": 3.2821079934289076e-05, - "loss": 0.3405, - "step": 5531000 - }, - { - "epoch": 3.32, - "learning_rate": 3.281897996872852e-05, - "loss": 0.327, - "step": 5531500 - }, - { - "epoch": 3.32, - "learning_rate": 3.281688000316795e-05, - "loss": 0.3384, - "step": 5532000 - }, - { - "epoch": 3.32, - "learning_rate": 3.281478003760738e-05, - "loss": 0.323, - "step": 5532500 - }, - { - "epoch": 3.32, - "learning_rate": 3.281268007204682e-05, - "loss": 0.3328, - "step": 5533000 - }, - { - "epoch": 3.32, - "learning_rate": 3.281058010648625e-05, - "loss": 0.3267, - "step": 5533500 - }, - { - "epoch": 3.32, - "learning_rate": 3.280848434085681e-05, - "loss": 0.3292, - "step": 5534000 - }, - { - "epoch": 3.32, - "learning_rate": 3.2806384375296244e-05, - "loss": 0.3375, - "step": 5534500 - }, - { - "epoch": 3.32, - "learning_rate": 3.280428440973568e-05, - "loss": 0.3283, - "step": 5535000 - }, - { - "epoch": 3.32, - "learning_rate": 3.280218444417511e-05, - "loss": 0.3415, - "step": 5535500 - }, - { - "epoch": 3.32, - "learning_rate": 3.280008867854567e-05, - "loss": 0.3366, - "step": 5536000 - }, - { - "epoch": 3.32, - "learning_rate": 3.279798871298511e-05, - "loss": 0.337, - "step": 5536500 - }, - { - "epoch": 3.32, - "learning_rate": 3.2795888747424545e-05, - "loss": 0.3242, - "step": 5537000 - }, - { - "epoch": 3.32, - "learning_rate": 3.279378878186397e-05, - "loss": 0.3287, - "step": 5537500 - }, - { - "epoch": 3.32, - "learning_rate": 3.279168881630341e-05, - "loss": 0.3277, - "step": 5538000 - }, - { - "epoch": 3.32, - "learning_rate": 3.2789588850742846e-05, - "loss": 0.335, - "step": 5538500 - }, - { - "epoch": 3.32, - "learning_rate": 3.278748888518228e-05, - "loss": 0.33, - "step": 5539000 - }, - { - "epoch": 3.32, - "learning_rate": 3.278538891962172e-05, - "loss": 0.3346, - "step": 5539500 - }, - { - "epoch": 3.32, - "learning_rate": 3.278329315399227e-05, - "loss": 0.3274, - "step": 5540000 - }, - { - "epoch": 3.32, - "learning_rate": 3.2781193188431706e-05, - "loss": 0.3263, - "step": 5540500 - }, - { - "epoch": 3.32, - "learning_rate": 3.277909322287114e-05, - "loss": 0.3195, - "step": 5541000 - }, - { - "epoch": 3.32, - "learning_rate": 3.277699325731058e-05, - "loss": 0.3258, - "step": 5541500 - }, - { - "epoch": 3.32, - "learning_rate": 3.277489749168114e-05, - "loss": 0.3361, - "step": 5542000 - }, - { - "epoch": 3.32, - "learning_rate": 3.277279752612057e-05, - "loss": 0.3386, - "step": 5542500 - }, - { - "epoch": 3.32, - "learning_rate": 3.277069756056001e-05, - "loss": 0.3367, - "step": 5543000 - }, - { - "epoch": 3.32, - "learning_rate": 3.276859759499944e-05, - "loss": 0.3204, - "step": 5543500 - }, - { - "epoch": 3.32, - "learning_rate": 3.2766497629438874e-05, - "loss": 0.331, - "step": 5544000 - }, - { - "epoch": 3.32, - "learning_rate": 3.2764397663878314e-05, - "loss": 0.3406, - "step": 5544500 - }, - { - "epoch": 3.32, - "learning_rate": 3.276229769831775e-05, - "loss": 0.3316, - "step": 5545000 - }, - { - "epoch": 3.32, - "learning_rate": 3.276019773275718e-05, - "loss": 0.3294, - "step": 5545500 - }, - { - "epoch": 3.33, - "learning_rate": 3.2758101967127735e-05, - "loss": 0.3214, - "step": 5546000 - }, - { - "epoch": 3.33, - "learning_rate": 3.2756002001567175e-05, - "loss": 0.334, - "step": 5546500 - }, - { - "epoch": 3.33, - "learning_rate": 3.275390203600661e-05, - "loss": 0.327, - "step": 5547000 - }, - { - "epoch": 3.33, - "learning_rate": 3.275180207044604e-05, - "loss": 0.3261, - "step": 5547500 - }, - { - "epoch": 3.33, - "learning_rate": 3.2749706304816596e-05, - "loss": 0.3324, - "step": 5548000 - }, - { - "epoch": 3.33, - "learning_rate": 3.2747606339256036e-05, - "loss": 0.3371, - "step": 5548500 - }, - { - "epoch": 3.33, - "learning_rate": 3.274550637369547e-05, - "loss": 0.3305, - "step": 5549000 - }, - { - "epoch": 3.33, - "learning_rate": 3.274341060806602e-05, - "loss": 0.3249, - "step": 5549500 - }, - { - "epoch": 3.33, - "learning_rate": 3.274131064250546e-05, - "loss": 0.3221, - "step": 5550000 - }, - { - "epoch": 3.33, - "learning_rate": 3.27392106769449e-05, - "loss": 0.3316, - "step": 5550500 - }, - { - "epoch": 3.33, - "learning_rate": 3.273711071138433e-05, - "loss": 0.3278, - "step": 5551000 - }, - { - "epoch": 3.33, - "learning_rate": 3.273501074582377e-05, - "loss": 0.3325, - "step": 5551500 - }, - { - "epoch": 3.33, - "learning_rate": 3.2732910780263204e-05, - "loss": 0.3294, - "step": 5552000 - }, - { - "epoch": 3.33, - "learning_rate": 3.273081081470264e-05, - "loss": 0.3324, - "step": 5552500 - }, - { - "epoch": 3.33, - "learning_rate": 3.272871084914208e-05, - "loss": 0.3294, - "step": 5553000 - }, - { - "epoch": 3.33, - "learning_rate": 3.272661928344375e-05, - "loss": 0.3306, - "step": 5553500 - }, - { - "epoch": 3.33, - "learning_rate": 3.2724519317883185e-05, - "loss": 0.3291, - "step": 5554000 - }, - { - "epoch": 3.33, - "learning_rate": 3.272241935232262e-05, - "loss": 0.3311, - "step": 5554500 - }, - { - "epoch": 3.33, - "learning_rate": 3.272031938676205e-05, - "loss": 0.3291, - "step": 5555000 - }, - { - "epoch": 3.33, - "learning_rate": 3.271821942120149e-05, - "loss": 0.326, - "step": 5555500 - }, - { - "epoch": 3.33, - "learning_rate": 3.271612365557205e-05, - "loss": 0.3228, - "step": 5556000 - }, - { - "epoch": 3.33, - "learning_rate": 3.271402369001148e-05, - "loss": 0.3324, - "step": 5556500 - }, - { - "epoch": 3.33, - "learning_rate": 3.271192372445092e-05, - "loss": 0.3307, - "step": 5557000 - }, - { - "epoch": 3.33, - "learning_rate": 3.270982375889035e-05, - "loss": 0.3306, - "step": 5557500 - }, - { - "epoch": 3.33, - "learning_rate": 3.2707723793329786e-05, - "loss": 0.3303, - "step": 5558000 - }, - { - "epoch": 3.33, - "learning_rate": 3.2705623827769226e-05, - "loss": 0.3371, - "step": 5558500 - }, - { - "epoch": 3.33, - "learning_rate": 3.270352386220866e-05, - "loss": 0.3347, - "step": 5559000 - }, - { - "epoch": 3.33, - "learning_rate": 3.270142809657921e-05, - "loss": 0.3302, - "step": 5559500 - }, - { - "epoch": 3.33, - "learning_rate": 3.269932813101865e-05, - "loss": 0.329, - "step": 5560000 - }, - { - "epoch": 3.33, - "learning_rate": 3.269722816545809e-05, - "loss": 0.3332, - "step": 5560500 - }, - { - "epoch": 3.33, - "learning_rate": 3.269512819989752e-05, - "loss": 0.3354, - "step": 5561000 - }, - { - "epoch": 3.33, - "learning_rate": 3.2693028234336954e-05, - "loss": 0.3285, - "step": 5561500 - }, - { - "epoch": 3.33, - "learning_rate": 3.2690928268776394e-05, - "loss": 0.3368, - "step": 5562000 - }, - { - "epoch": 3.33, - "learning_rate": 3.268883250314695e-05, - "loss": 0.3312, - "step": 5562500 - }, - { - "epoch": 3.34, - "learning_rate": 3.268673253758638e-05, - "loss": 0.3304, - "step": 5563000 - }, - { - "epoch": 3.34, - "learning_rate": 3.2684632572025815e-05, - "loss": 0.3292, - "step": 5563500 - }, - { - "epoch": 3.34, - "learning_rate": 3.2682532606465255e-05, - "loss": 0.328, - "step": 5564000 - }, - { - "epoch": 3.34, - "learning_rate": 3.268043264090469e-05, - "loss": 0.3352, - "step": 5564500 - }, - { - "epoch": 3.34, - "learning_rate": 3.267833267534413e-05, - "loss": 0.3308, - "step": 5565000 - }, - { - "epoch": 3.34, - "learning_rate": 3.267623690971468e-05, - "loss": 0.3411, - "step": 5565500 - }, - { - "epoch": 3.34, - "learning_rate": 3.2674141144085236e-05, - "loss": 0.333, - "step": 5566000 - }, - { - "epoch": 3.34, - "learning_rate": 3.267204117852467e-05, - "loss": 0.3263, - "step": 5566500 - }, - { - "epoch": 3.34, - "learning_rate": 3.26699412129641e-05, - "loss": 0.3302, - "step": 5567000 - }, - { - "epoch": 3.34, - "learning_rate": 3.266784124740354e-05, - "loss": 0.3279, - "step": 5567500 - }, - { - "epoch": 3.34, - "learning_rate": 3.2665741281842976e-05, - "loss": 0.3283, - "step": 5568000 - }, - { - "epoch": 3.34, - "learning_rate": 3.266364131628241e-05, - "loss": 0.3269, - "step": 5568500 - }, - { - "epoch": 3.34, - "learning_rate": 3.266154135072185e-05, - "loss": 0.331, - "step": 5569000 - }, - { - "epoch": 3.34, - "learning_rate": 3.2659441385161283e-05, - "loss": 0.3384, - "step": 5569500 - }, - { - "epoch": 3.34, - "learning_rate": 3.265734141960072e-05, - "loss": 0.3316, - "step": 5570000 - }, - { - "epoch": 3.34, - "learning_rate": 3.265524565397127e-05, - "loss": 0.3369, - "step": 5570500 - }, - { - "epoch": 3.34, - "learning_rate": 3.265314988834183e-05, - "loss": 0.3326, - "step": 5571000 - }, - { - "epoch": 3.34, - "learning_rate": 3.2651049922781264e-05, - "loss": 0.3376, - "step": 5571500 - }, - { - "epoch": 3.34, - "learning_rate": 3.26489499572207e-05, - "loss": 0.3394, - "step": 5572000 - }, - { - "epoch": 3.34, - "learning_rate": 3.264684999166014e-05, - "loss": 0.3291, - "step": 5572500 - }, - { - "epoch": 3.34, - "learning_rate": 3.264475002609957e-05, - "loss": 0.3278, - "step": 5573000 - }, - { - "epoch": 3.34, - "learning_rate": 3.2642650060539005e-05, - "loss": 0.336, - "step": 5573500 - }, - { - "epoch": 3.34, - "learning_rate": 3.2640550094978445e-05, - "loss": 0.3282, - "step": 5574000 - }, - { - "epoch": 3.34, - "learning_rate": 3.263845012941788e-05, - "loss": 0.33, - "step": 5574500 - }, - { - "epoch": 3.34, - "learning_rate": 3.263635016385731e-05, - "loss": 0.3228, - "step": 5575000 - }, - { - "epoch": 3.34, - "learning_rate": 3.2634254398227866e-05, - "loss": 0.3287, - "step": 5575500 - }, - { - "epoch": 3.34, - "learning_rate": 3.2632154432667306e-05, - "loss": 0.3322, - "step": 5576000 - }, - { - "epoch": 3.34, - "learning_rate": 3.263005446710674e-05, - "loss": 0.3205, - "step": 5576500 - }, - { - "epoch": 3.34, - "learning_rate": 3.262795870147729e-05, - "loss": 0.3463, - "step": 5577000 - }, - { - "epoch": 3.34, - "learning_rate": 3.262585873591673e-05, - "loss": 0.3348, - "step": 5577500 - }, - { - "epoch": 3.34, - "learning_rate": 3.2623758770356167e-05, - "loss": 0.33, - "step": 5578000 - }, - { - "epoch": 3.34, - "learning_rate": 3.26216588047956e-05, - "loss": 0.3351, - "step": 5578500 - }, - { - "epoch": 3.34, - "learning_rate": 3.261955883923504e-05, - "loss": 0.3421, - "step": 5579000 - }, - { - "epoch": 3.35, - "learning_rate": 3.2617458873674474e-05, - "loss": 0.3182, - "step": 5579500 - }, - { - "epoch": 3.35, - "learning_rate": 3.261535890811391e-05, - "loss": 0.3266, - "step": 5580000 - }, - { - "epoch": 3.35, - "learning_rate": 3.261325894255335e-05, - "loss": 0.3258, - "step": 5580500 - }, - { - "epoch": 3.35, - "learning_rate": 3.2611158976992774e-05, - "loss": 0.3224, - "step": 5581000 - }, - { - "epoch": 3.35, - "learning_rate": 3.260905901143221e-05, - "loss": 0.3281, - "step": 5581500 - }, - { - "epoch": 3.35, - "learning_rate": 3.260695904587165e-05, - "loss": 0.3268, - "step": 5582000 - }, - { - "epoch": 3.35, - "learning_rate": 3.260486328024221e-05, - "loss": 0.3245, - "step": 5582500 - }, - { - "epoch": 3.35, - "learning_rate": 3.260276331468164e-05, - "loss": 0.3225, - "step": 5583000 - }, - { - "epoch": 3.35, - "learning_rate": 3.260066334912107e-05, - "loss": 0.3245, - "step": 5583500 - }, - { - "epoch": 3.35, - "learning_rate": 3.259856338356051e-05, - "loss": 0.3234, - "step": 5584000 - }, - { - "epoch": 3.35, - "learning_rate": 3.259646341799994e-05, - "loss": 0.3321, - "step": 5584500 - }, - { - "epoch": 3.35, - "learning_rate": 3.2594363452439375e-05, - "loss": 0.3249, - "step": 5585000 - }, - { - "epoch": 3.35, - "learning_rate": 3.2592263486878816e-05, - "loss": 0.3263, - "step": 5585500 - }, - { - "epoch": 3.35, - "learning_rate": 3.259016772124937e-05, - "loss": 0.3291, - "step": 5586000 - }, - { - "epoch": 3.35, - "learning_rate": 3.25880677556888e-05, - "loss": 0.33, - "step": 5586500 - }, - { - "epoch": 3.35, - "learning_rate": 3.258596779012824e-05, - "loss": 0.3243, - "step": 5587000 - }, - { - "epoch": 3.35, - "learning_rate": 3.2583867824567676e-05, - "loss": 0.3314, - "step": 5587500 - }, - { - "epoch": 3.35, - "learning_rate": 3.258177205893824e-05, - "loss": 0.3293, - "step": 5588000 - }, - { - "epoch": 3.35, - "learning_rate": 3.2579672093377663e-05, - "loss": 0.333, - "step": 5588500 - }, - { - "epoch": 3.35, - "learning_rate": 3.2577572127817104e-05, - "loss": 0.325, - "step": 5589000 - }, - { - "epoch": 3.35, - "learning_rate": 3.257547216225654e-05, - "loss": 0.3317, - "step": 5589500 - }, - { - "epoch": 3.35, - "learning_rate": 3.257337219669597e-05, - "loss": 0.3226, - "step": 5590000 - }, - { - "epoch": 3.35, - "learning_rate": 3.257127223113541e-05, - "loss": 0.3255, - "step": 5590500 - }, - { - "epoch": 3.35, - "learning_rate": 3.2569172265574844e-05, - "loss": 0.3264, - "step": 5591000 - }, - { - "epoch": 3.35, - "learning_rate": 3.25670764999454e-05, - "loss": 0.3323, - "step": 5591500 - }, - { - "epoch": 3.35, - "learning_rate": 3.256497653438483e-05, - "loss": 0.3366, - "step": 5592000 - }, - { - "epoch": 3.35, - "learning_rate": 3.256287656882427e-05, - "loss": 0.3209, - "step": 5592500 - }, - { - "epoch": 3.35, - "learning_rate": 3.2560776603263705e-05, - "loss": 0.3329, - "step": 5593000 - }, - { - "epoch": 3.35, - "learning_rate": 3.255867663770314e-05, - "loss": 0.3363, - "step": 5593500 - }, - { - "epoch": 3.35, - "learning_rate": 3.255657667214258e-05, - "loss": 0.3372, - "step": 5594000 - }, - { - "epoch": 3.35, - "learning_rate": 3.255447670658201e-05, - "loss": 0.3205, - "step": 5594500 - }, - { - "epoch": 3.35, - "learning_rate": 3.2552380940952566e-05, - "loss": 0.3348, - "step": 5595000 - }, - { - "epoch": 3.35, - "learning_rate": 3.2550280975392006e-05, - "loss": 0.3324, - "step": 5595500 - }, - { - "epoch": 3.36, - "learning_rate": 3.254818100983144e-05, - "loss": 0.3291, - "step": 5596000 - }, - { - "epoch": 3.36, - "learning_rate": 3.254608104427087e-05, - "loss": 0.3283, - "step": 5596500 - }, - { - "epoch": 3.36, - "learning_rate": 3.254398107871031e-05, - "loss": 0.3305, - "step": 5597000 - }, - { - "epoch": 3.36, - "learning_rate": 3.2541881113149747e-05, - "loss": 0.3278, - "step": 5597500 - }, - { - "epoch": 3.36, - "learning_rate": 3.253978114758918e-05, - "loss": 0.3368, - "step": 5598000 - }, - { - "epoch": 3.36, - "learning_rate": 3.2537681182028614e-05, - "loss": 0.3352, - "step": 5598500 - }, - { - "epoch": 3.36, - "learning_rate": 3.2535585416399174e-05, - "loss": 0.3367, - "step": 5599000 - }, - { - "epoch": 3.36, - "learning_rate": 3.253348965076973e-05, - "loss": 0.3432, - "step": 5599500 - }, - { - "epoch": 3.36, - "learning_rate": 3.253138968520916e-05, - "loss": 0.3385, - "step": 5600000 - }, - { - "epoch": 3.36, - "eval_loss": 0.3224908411502838, - "eval_runtime": 1116.5259, - "eval_samples_per_second": 471.749, - "eval_steps_per_second": 78.625, - "step": 5600000 - }, - { - "epoch": 3.36, - "learning_rate": 3.2529289719648594e-05, - "loss": 0.3322, - "step": 5600500 - }, - { - "epoch": 3.36, - "learning_rate": 3.2527189754088035e-05, - "loss": 0.3274, - "step": 5601000 - }, - { - "epoch": 3.36, - "learning_rate": 3.252508978852747e-05, - "loss": 0.3269, - "step": 5601500 - }, - { - "epoch": 3.36, - "learning_rate": 3.252299402289802e-05, - "loss": 0.3204, - "step": 5602000 - }, - { - "epoch": 3.36, - "learning_rate": 3.252089405733746e-05, - "loss": 0.3278, - "step": 5602500 - }, - { - "epoch": 3.36, - "learning_rate": 3.2518794091776895e-05, - "loss": 0.3321, - "step": 5603000 - }, - { - "epoch": 3.36, - "learning_rate": 3.251669412621633e-05, - "loss": 0.3279, - "step": 5603500 - }, - { - "epoch": 3.36, - "learning_rate": 3.251459416065577e-05, - "loss": 0.3343, - "step": 5604000 - }, - { - "epoch": 3.36, - "learning_rate": 3.251249839502632e-05, - "loss": 0.327, - "step": 5604500 - }, - { - "epoch": 3.36, - "learning_rate": 3.2510398429465756e-05, - "loss": 0.3308, - "step": 5605000 - }, - { - "epoch": 3.36, - "learning_rate": 3.250829846390519e-05, - "loss": 0.3346, - "step": 5605500 - }, - { - "epoch": 3.36, - "learning_rate": 3.250619849834463e-05, - "loss": 0.3261, - "step": 5606000 - }, - { - "epoch": 3.36, - "learning_rate": 3.250410273271518e-05, - "loss": 0.3331, - "step": 5606500 - }, - { - "epoch": 3.36, - "learning_rate": 3.250200276715462e-05, - "loss": 0.3417, - "step": 5607000 - }, - { - "epoch": 3.36, - "learning_rate": 3.249990280159405e-05, - "loss": 0.3311, - "step": 5607500 - }, - { - "epoch": 3.36, - "learning_rate": 3.249780283603349e-05, - "loss": 0.3183, - "step": 5608000 - }, - { - "epoch": 3.36, - "learning_rate": 3.2495702870472924e-05, - "loss": 0.3322, - "step": 5608500 - }, - { - "epoch": 3.36, - "learning_rate": 3.249360710484348e-05, - "loss": 0.336, - "step": 5609000 - }, - { - "epoch": 3.36, - "learning_rate": 3.249150713928292e-05, - "loss": 0.3419, - "step": 5609500 - }, - { - "epoch": 3.36, - "learning_rate": 3.248940717372235e-05, - "loss": 0.3407, - "step": 5610000 - }, - { - "epoch": 3.36, - "learning_rate": 3.2487307208161785e-05, - "loss": 0.3371, - "step": 5610500 - }, - { - "epoch": 3.36, - "learning_rate": 3.2485207242601225e-05, - "loss": 0.3309, - "step": 5611000 - }, - { - "epoch": 3.36, - "learning_rate": 3.248310727704066e-05, - "loss": 0.3289, - "step": 5611500 - }, - { - "epoch": 3.36, - "learning_rate": 3.248101151141121e-05, - "loss": 0.3278, - "step": 5612000 - }, - { - "epoch": 3.36, - "learning_rate": 3.2478911545850645e-05, - "loss": 0.3328, - "step": 5612500 - }, - { - "epoch": 3.37, - "learning_rate": 3.2476811580290086e-05, - "loss": 0.33, - "step": 5613000 - }, - { - "epoch": 3.37, - "learning_rate": 3.247471161472952e-05, - "loss": 0.3284, - "step": 5613500 - }, - { - "epoch": 3.37, - "learning_rate": 3.247261164916895e-05, - "loss": 0.3338, - "step": 5614000 - }, - { - "epoch": 3.37, - "learning_rate": 3.247051168360839e-05, - "loss": 0.3291, - "step": 5614500 - }, - { - "epoch": 3.37, - "learning_rate": 3.2468411718047826e-05, - "loss": 0.3266, - "step": 5615000 - }, - { - "epoch": 3.37, - "learning_rate": 3.246631595241838e-05, - "loss": 0.3275, - "step": 5615500 - }, - { - "epoch": 3.37, - "learning_rate": 3.246421598685781e-05, - "loss": 0.341, - "step": 5616000 - }, - { - "epoch": 3.37, - "learning_rate": 3.2462116021297254e-05, - "loss": 0.335, - "step": 5616500 - }, - { - "epoch": 3.37, - "learning_rate": 3.246001605573669e-05, - "loss": 0.3248, - "step": 5617000 - }, - { - "epoch": 3.37, - "learning_rate": 3.245791609017612e-05, - "loss": 0.3351, - "step": 5617500 - }, - { - "epoch": 3.37, - "learning_rate": 3.2455816124615554e-05, - "loss": 0.3208, - "step": 5618000 - }, - { - "epoch": 3.37, - "learning_rate": 3.245371615905499e-05, - "loss": 0.3358, - "step": 5618500 - }, - { - "epoch": 3.37, - "learning_rate": 3.245161619349443e-05, - "loss": 0.325, - "step": 5619000 - }, - { - "epoch": 3.37, - "learning_rate": 3.244952042786499e-05, - "loss": 0.3278, - "step": 5619500 - }, - { - "epoch": 3.37, - "learning_rate": 3.244742466223554e-05, - "loss": 0.3342, - "step": 5620000 - }, - { - "epoch": 3.37, - "learning_rate": 3.2445324696674975e-05, - "loss": 0.3303, - "step": 5620500 - }, - { - "epoch": 3.37, - "learning_rate": 3.244322473111441e-05, - "loss": 0.3276, - "step": 5621000 - }, - { - "epoch": 3.37, - "learning_rate": 3.244112476555385e-05, - "loss": 0.3339, - "step": 5621500 - }, - { - "epoch": 3.37, - "learning_rate": 3.243902479999328e-05, - "loss": 0.3234, - "step": 5622000 - }, - { - "epoch": 3.37, - "learning_rate": 3.243692483443271e-05, - "loss": 0.3333, - "step": 5622500 - }, - { - "epoch": 3.37, - "learning_rate": 3.243482486887215e-05, - "loss": 0.3407, - "step": 5623000 - }, - { - "epoch": 3.37, - "learning_rate": 3.243272910324271e-05, - "loss": 0.3249, - "step": 5623500 - }, - { - "epoch": 3.37, - "learning_rate": 3.243062913768214e-05, - "loss": 0.3263, - "step": 5624000 - }, - { - "epoch": 3.37, - "learning_rate": 3.242852917212158e-05, - "loss": 0.3273, - "step": 5624500 - }, - { - "epoch": 3.37, - "learning_rate": 3.242642920656101e-05, - "loss": 0.3284, - "step": 5625000 - }, - { - "epoch": 3.37, - "learning_rate": 3.242432924100044e-05, - "loss": 0.3308, - "step": 5625500 - }, - { - "epoch": 3.37, - "learning_rate": 3.2422229275439883e-05, - "loss": 0.3459, - "step": 5626000 - }, - { - "epoch": 3.37, - "learning_rate": 3.242012930987932e-05, - "loss": 0.3321, - "step": 5626500 - }, - { - "epoch": 3.37, - "learning_rate": 3.241803354424987e-05, - "loss": 0.3251, - "step": 5627000 - }, - { - "epoch": 3.37, - "learning_rate": 3.2415933578689304e-05, - "loss": 0.3308, - "step": 5627500 - }, - { - "epoch": 3.37, - "learning_rate": 3.2413833613128744e-05, - "loss": 0.3258, - "step": 5628000 - }, - { - "epoch": 3.37, - "learning_rate": 3.241173364756818e-05, - "loss": 0.3222, - "step": 5628500 - }, - { - "epoch": 3.37, - "learning_rate": 3.240963368200761e-05, - "loss": 0.3361, - "step": 5629000 - }, - { - "epoch": 3.38, - "learning_rate": 3.240753371644705e-05, - "loss": 0.3284, - "step": 5629500 - }, - { - "epoch": 3.38, - "learning_rate": 3.2405437950817605e-05, - "loss": 0.3256, - "step": 5630000 - }, - { - "epoch": 3.38, - "learning_rate": 3.240333798525704e-05, - "loss": 0.3355, - "step": 5630500 - }, - { - "epoch": 3.38, - "learning_rate": 3.240123801969648e-05, - "loss": 0.328, - "step": 5631000 - }, - { - "epoch": 3.38, - "learning_rate": 3.239913805413591e-05, - "loss": 0.3412, - "step": 5631500 - }, - { - "epoch": 3.38, - "learning_rate": 3.2397038088575346e-05, - "loss": 0.326, - "step": 5632000 - }, - { - "epoch": 3.38, - "learning_rate": 3.2394938123014786e-05, - "loss": 0.3264, - "step": 5632500 - }, - { - "epoch": 3.38, - "learning_rate": 3.239283815745422e-05, - "loss": 0.3229, - "step": 5633000 - }, - { - "epoch": 3.38, - "learning_rate": 3.239073819189365e-05, - "loss": 0.3368, - "step": 5633500 - }, - { - "epoch": 3.38, - "learning_rate": 3.2388642426264206e-05, - "loss": 0.333, - "step": 5634000 - }, - { - "epoch": 3.38, - "learning_rate": 3.2386542460703646e-05, - "loss": 0.3301, - "step": 5634500 - }, - { - "epoch": 3.38, - "learning_rate": 3.238444249514308e-05, - "loss": 0.3255, - "step": 5635000 - }, - { - "epoch": 3.38, - "learning_rate": 3.2382346729513634e-05, - "loss": 0.3279, - "step": 5635500 - }, - { - "epoch": 3.38, - "learning_rate": 3.238024676395307e-05, - "loss": 0.3486, - "step": 5636000 - }, - { - "epoch": 3.38, - "learning_rate": 3.237814679839251e-05, - "loss": 0.3261, - "step": 5636500 - }, - { - "epoch": 3.38, - "learning_rate": 3.237604683283194e-05, - "loss": 0.3303, - "step": 5637000 - }, - { - "epoch": 3.38, - "learning_rate": 3.2373946867271374e-05, - "loss": 0.3317, - "step": 5637500 - }, - { - "epoch": 3.38, - "learning_rate": 3.2371846901710814e-05, - "loss": 0.3348, - "step": 5638000 - }, - { - "epoch": 3.38, - "learning_rate": 3.236975113608137e-05, - "loss": 0.3359, - "step": 5638500 - }, - { - "epoch": 3.38, - "learning_rate": 3.23676511705208e-05, - "loss": 0.3309, - "step": 5639000 - }, - { - "epoch": 3.38, - "learning_rate": 3.236555120496024e-05, - "loss": 0.3274, - "step": 5639500 - }, - { - "epoch": 3.38, - "learning_rate": 3.2363451239399675e-05, - "loss": 0.3305, - "step": 5640000 - }, - { - "epoch": 3.38, - "learning_rate": 3.236135127383911e-05, - "loss": 0.3284, - "step": 5640500 - }, - { - "epoch": 3.38, - "learning_rate": 3.235925130827855e-05, - "loss": 0.3241, - "step": 5641000 - }, - { - "epoch": 3.38, - "learning_rate": 3.235715134271798e-05, - "loss": 0.3266, - "step": 5641500 - }, - { - "epoch": 3.38, - "learning_rate": 3.235505137715741e-05, - "loss": 0.3367, - "step": 5642000 - }, - { - "epoch": 3.38, - "learning_rate": 3.235295561152797e-05, - "loss": 0.3309, - "step": 5642500 - }, - { - "epoch": 3.38, - "learning_rate": 3.235085564596741e-05, - "loss": 0.3341, - "step": 5643000 - }, - { - "epoch": 3.38, - "learning_rate": 3.234875568040684e-05, - "loss": 0.34, - "step": 5643500 - }, - { - "epoch": 3.38, - "learning_rate": 3.2346655714846276e-05, - "loss": 0.3297, - "step": 5644000 - }, - { - "epoch": 3.38, - "learning_rate": 3.234455574928571e-05, - "loss": 0.3225, - "step": 5644500 - }, - { - "epoch": 3.38, - "learning_rate": 3.234245578372514e-05, - "loss": 0.3265, - "step": 5645000 - }, - { - "epoch": 3.38, - "learning_rate": 3.234035581816458e-05, - "loss": 0.3356, - "step": 5645500 - }, - { - "epoch": 3.39, - "learning_rate": 3.233825585260402e-05, - "loss": 0.3287, - "step": 5646000 - }, - { - "epoch": 3.39, - "learning_rate": 3.233616008697458e-05, - "loss": 0.3287, - "step": 5646500 - }, - { - "epoch": 3.39, - "learning_rate": 3.2334060121414004e-05, - "loss": 0.3345, - "step": 5647000 - }, - { - "epoch": 3.39, - "learning_rate": 3.2331964355784564e-05, - "loss": 0.3385, - "step": 5647500 - }, - { - "epoch": 3.39, - "learning_rate": 3.2329864390224005e-05, - "loss": 0.3249, - "step": 5648000 - }, - { - "epoch": 3.39, - "learning_rate": 3.232776442466344e-05, - "loss": 0.3483, - "step": 5648500 - }, - { - "epoch": 3.39, - "learning_rate": 3.232566445910287e-05, - "loss": 0.3381, - "step": 5649000 - }, - { - "epoch": 3.39, - "learning_rate": 3.2323568693473425e-05, - "loss": 0.3299, - "step": 5649500 - }, - { - "epoch": 3.39, - "learning_rate": 3.2321468727912865e-05, - "loss": 0.3189, - "step": 5650000 - }, - { - "epoch": 3.39, - "learning_rate": 3.23193687623523e-05, - "loss": 0.3338, - "step": 5650500 - }, - { - "epoch": 3.39, - "learning_rate": 3.231726879679173e-05, - "loss": 0.3268, - "step": 5651000 - }, - { - "epoch": 3.39, - "learning_rate": 3.231516883123117e-05, - "loss": 0.3327, - "step": 5651500 - }, - { - "epoch": 3.39, - "learning_rate": 3.2313073065601726e-05, - "loss": 0.3314, - "step": 5652000 - }, - { - "epoch": 3.39, - "learning_rate": 3.231097310004116e-05, - "loss": 0.3334, - "step": 5652500 - }, - { - "epoch": 3.39, - "learning_rate": 3.230887313448059e-05, - "loss": 0.318, - "step": 5653000 - }, - { - "epoch": 3.39, - "learning_rate": 3.230677316892003e-05, - "loss": 0.3322, - "step": 5653500 - }, - { - "epoch": 3.39, - "learning_rate": 3.230467740329059e-05, - "loss": 0.332, - "step": 5654000 - }, - { - "epoch": 3.39, - "learning_rate": 3.230257743773002e-05, - "loss": 0.3237, - "step": 5654500 - }, - { - "epoch": 3.39, - "learning_rate": 3.2300481672100574e-05, - "loss": 0.3387, - "step": 5655000 - }, - { - "epoch": 3.39, - "learning_rate": 3.2298381706540014e-05, - "loss": 0.3333, - "step": 5655500 - }, - { - "epoch": 3.39, - "learning_rate": 3.229628174097945e-05, - "loss": 0.3295, - "step": 5656000 - }, - { - "epoch": 3.39, - "learning_rate": 3.229418177541888e-05, - "loss": 0.3351, - "step": 5656500 - }, - { - "epoch": 3.39, - "learning_rate": 3.229208180985832e-05, - "loss": 0.3227, - "step": 5657000 - }, - { - "epoch": 3.39, - "learning_rate": 3.2289981844297755e-05, - "loss": 0.3397, - "step": 5657500 - }, - { - "epoch": 3.39, - "learning_rate": 3.228788607866831e-05, - "loss": 0.3284, - "step": 5658000 - }, - { - "epoch": 3.39, - "learning_rate": 3.228578611310774e-05, - "loss": 0.3304, - "step": 5658500 - }, - { - "epoch": 3.39, - "learning_rate": 3.228368614754718e-05, - "loss": 0.3252, - "step": 5659000 - }, - { - "epoch": 3.39, - "learning_rate": 3.2281590381917736e-05, - "loss": 0.3331, - "step": 5659500 - }, - { - "epoch": 3.39, - "learning_rate": 3.227949041635717e-05, - "loss": 0.3333, - "step": 5660000 - }, - { - "epoch": 3.39, - "learning_rate": 3.227739045079661e-05, - "loss": 0.3275, - "step": 5660500 - }, - { - "epoch": 3.39, - "learning_rate": 3.227529048523604e-05, - "loss": 0.3313, - "step": 5661000 - }, - { - "epoch": 3.39, - "learning_rate": 3.2273190519675476e-05, - "loss": 0.3377, - "step": 5661500 - }, - { - "epoch": 3.39, - "learning_rate": 3.2271090554114916e-05, - "loss": 0.331, - "step": 5662000 - }, - { - "epoch": 3.39, - "learning_rate": 3.226899058855435e-05, - "loss": 0.3278, - "step": 5662500 - }, - { - "epoch": 3.4, - "learning_rate": 3.226689062299378e-05, - "loss": 0.3254, - "step": 5663000 - }, - { - "epoch": 3.4, - "learning_rate": 3.226479065743322e-05, - "loss": 0.3291, - "step": 5663500 - }, - { - "epoch": 3.4, - "learning_rate": 3.226269069187265e-05, - "loss": 0.3349, - "step": 5664000 - }, - { - "epoch": 3.4, - "learning_rate": 3.2260590726312084e-05, - "loss": 0.3351, - "step": 5664500 - }, - { - "epoch": 3.4, - "learning_rate": 3.2258490760751524e-05, - "loss": 0.3362, - "step": 5665000 - }, - { - "epoch": 3.4, - "learning_rate": 3.225639079519096e-05, - "loss": 0.3274, - "step": 5665500 - }, - { - "epoch": 3.4, - "learning_rate": 3.225429502956151e-05, - "loss": 0.3264, - "step": 5666000 - }, - { - "epoch": 3.4, - "learning_rate": 3.2252195064000944e-05, - "loss": 0.3348, - "step": 5666500 - }, - { - "epoch": 3.4, - "learning_rate": 3.2250095098440385e-05, - "loss": 0.3318, - "step": 5667000 - }, - { - "epoch": 3.4, - "learning_rate": 3.224799513287982e-05, - "loss": 0.3249, - "step": 5667500 - }, - { - "epoch": 3.4, - "learning_rate": 3.224589516731925e-05, - "loss": 0.3272, - "step": 5668000 - }, - { - "epoch": 3.4, - "learning_rate": 3.224379520175869e-05, - "loss": 0.3353, - "step": 5668500 - }, - { - "epoch": 3.4, - "learning_rate": 3.2241699436129245e-05, - "loss": 0.336, - "step": 5669000 - }, - { - "epoch": 3.4, - "learning_rate": 3.223959947056868e-05, - "loss": 0.3279, - "step": 5669500 - }, - { - "epoch": 3.4, - "learning_rate": 3.223749950500812e-05, - "loss": 0.3229, - "step": 5670000 - }, - { - "epoch": 3.4, - "learning_rate": 3.223539953944755e-05, - "loss": 0.3286, - "step": 5670500 - }, - { - "epoch": 3.4, - "learning_rate": 3.2233299573886986e-05, - "loss": 0.3241, - "step": 5671000 - }, - { - "epoch": 3.4, - "learning_rate": 3.2231199608326426e-05, - "loss": 0.3284, - "step": 5671500 - }, - { - "epoch": 3.4, - "learning_rate": 3.222909964276586e-05, - "loss": 0.3332, - "step": 5672000 - }, - { - "epoch": 3.4, - "learning_rate": 3.222700387713641e-05, - "loss": 0.3336, - "step": 5672500 - }, - { - "epoch": 3.4, - "learning_rate": 3.222490391157585e-05, - "loss": 0.3251, - "step": 5673000 - }, - { - "epoch": 3.4, - "learning_rate": 3.222280394601529e-05, - "loss": 0.3318, - "step": 5673500 - }, - { - "epoch": 3.4, - "learning_rate": 3.222070818038584e-05, - "loss": 0.3262, - "step": 5674000 - }, - { - "epoch": 3.4, - "learning_rate": 3.2218608214825274e-05, - "loss": 0.3326, - "step": 5674500 - }, - { - "epoch": 3.4, - "learning_rate": 3.221650824926471e-05, - "loss": 0.3313, - "step": 5675000 - }, - { - "epoch": 3.4, - "learning_rate": 3.221440828370415e-05, - "loss": 0.3201, - "step": 5675500 - }, - { - "epoch": 3.4, - "learning_rate": 3.221230831814358e-05, - "loss": 0.3317, - "step": 5676000 - }, - { - "epoch": 3.4, - "learning_rate": 3.2210208352583015e-05, - "loss": 0.3239, - "step": 5676500 - }, - { - "epoch": 3.4, - "learning_rate": 3.2208108387022455e-05, - "loss": 0.3277, - "step": 5677000 - }, - { - "epoch": 3.4, - "learning_rate": 3.220600842146189e-05, - "loss": 0.3332, - "step": 5677500 - }, - { - "epoch": 3.4, - "learning_rate": 3.220391265583244e-05, - "loss": 0.3333, - "step": 5678000 - }, - { - "epoch": 3.4, - "learning_rate": 3.220181269027188e-05, - "loss": 0.3339, - "step": 5678500 - }, - { - "epoch": 3.4, - "learning_rate": 3.2199712724711316e-05, - "loss": 0.3336, - "step": 5679000 - }, - { - "epoch": 3.41, - "learning_rate": 3.219761275915075e-05, - "loss": 0.3315, - "step": 5679500 - }, - { - "epoch": 3.41, - "learning_rate": 3.219551279359019e-05, - "loss": 0.3385, - "step": 5680000 - }, - { - "epoch": 3.41, - "learning_rate": 3.219341702796074e-05, - "loss": 0.3303, - "step": 5680500 - }, - { - "epoch": 3.41, - "learning_rate": 3.2191317062400176e-05, - "loss": 0.3255, - "step": 5681000 - }, - { - "epoch": 3.41, - "learning_rate": 3.218921709683961e-05, - "loss": 0.3325, - "step": 5681500 - }, - { - "epoch": 3.41, - "learning_rate": 3.218711713127905e-05, - "loss": 0.3265, - "step": 5682000 - }, - { - "epoch": 3.41, - "learning_rate": 3.2185017165718483e-05, - "loss": 0.3362, - "step": 5682500 - }, - { - "epoch": 3.41, - "learning_rate": 3.218291720015792e-05, - "loss": 0.3343, - "step": 5683000 - }, - { - "epoch": 3.41, - "learning_rate": 3.218081723459735e-05, - "loss": 0.3266, - "step": 5683500 - }, - { - "epoch": 3.41, - "learning_rate": 3.217872146896791e-05, - "loss": 0.3296, - "step": 5684000 - }, - { - "epoch": 3.41, - "learning_rate": 3.2176621503407344e-05, - "loss": 0.3269, - "step": 5684500 - }, - { - "epoch": 3.41, - "learning_rate": 3.2174521537846784e-05, - "loss": 0.3262, - "step": 5685000 - }, - { - "epoch": 3.41, - "learning_rate": 3.217242157228622e-05, - "loss": 0.3289, - "step": 5685500 - }, - { - "epoch": 3.41, - "learning_rate": 3.2170321606725645e-05, - "loss": 0.3347, - "step": 5686000 - }, - { - "epoch": 3.41, - "learning_rate": 3.2168225841096205e-05, - "loss": 0.3317, - "step": 5686500 - }, - { - "epoch": 3.41, - "learning_rate": 3.2166125875535645e-05, - "loss": 0.3263, - "step": 5687000 - }, - { - "epoch": 3.41, - "learning_rate": 3.21640301099062e-05, - "loss": 0.3221, - "step": 5687500 - }, - { - "epoch": 3.41, - "learning_rate": 3.216193014434563e-05, - "loss": 0.3359, - "step": 5688000 - }, - { - "epoch": 3.41, - "learning_rate": 3.2159830178785066e-05, - "loss": 0.3265, - "step": 5688500 - }, - { - "epoch": 3.41, - "learning_rate": 3.2157730213224506e-05, - "loss": 0.3307, - "step": 5689000 - }, - { - "epoch": 3.41, - "learning_rate": 3.215563024766394e-05, - "loss": 0.3238, - "step": 5689500 - }, - { - "epoch": 3.41, - "learning_rate": 3.215353028210337e-05, - "loss": 0.3364, - "step": 5690000 - }, - { - "epoch": 3.41, - "learning_rate": 3.2151430316542806e-05, - "loss": 0.324, - "step": 5690500 - }, - { - "epoch": 3.41, - "learning_rate": 3.214933035098224e-05, - "loss": 0.3272, - "step": 5691000 - }, - { - "epoch": 3.41, - "learning_rate": 3.214723038542168e-05, - "loss": 0.3267, - "step": 5691500 - }, - { - "epoch": 3.41, - "learning_rate": 3.214513461979224e-05, - "loss": 0.3262, - "step": 5692000 - }, - { - "epoch": 3.41, - "learning_rate": 3.2143034654231674e-05, - "loss": 0.3318, - "step": 5692500 - }, - { - "epoch": 3.41, - "learning_rate": 3.21409346886711e-05, - "loss": 0.3329, - "step": 5693000 - }, - { - "epoch": 3.41, - "learning_rate": 3.213883892304166e-05, - "loss": 0.3392, - "step": 5693500 - }, - { - "epoch": 3.41, - "learning_rate": 3.21367389574811e-05, - "loss": 0.3245, - "step": 5694000 - }, - { - "epoch": 3.41, - "learning_rate": 3.2134638991920535e-05, - "loss": 0.3399, - "step": 5694500 - }, - { - "epoch": 3.41, - "learning_rate": 3.213253902635997e-05, - "loss": 0.3277, - "step": 5695000 - }, - { - "epoch": 3.41, - "learning_rate": 3.21304390607994e-05, - "loss": 0.3207, - "step": 5695500 - }, - { - "epoch": 3.41, - "learning_rate": 3.2128339095238835e-05, - "loss": 0.3221, - "step": 5696000 - }, - { - "epoch": 3.42, - "learning_rate": 3.212623912967827e-05, - "loss": 0.3275, - "step": 5696500 - }, - { - "epoch": 3.42, - "learning_rate": 3.212413916411771e-05, - "loss": 0.3354, - "step": 5697000 - }, - { - "epoch": 3.42, - "learning_rate": 3.212203919855714e-05, - "loss": 0.333, - "step": 5697500 - }, - { - "epoch": 3.42, - "learning_rate": 3.2119939232996576e-05, - "loss": 0.3304, - "step": 5698000 - }, - { - "epoch": 3.42, - "learning_rate": 3.2117843467367136e-05, - "loss": 0.3291, - "step": 5698500 - }, - { - "epoch": 3.42, - "learning_rate": 3.211574350180657e-05, - "loss": 0.3352, - "step": 5699000 - }, - { - "epoch": 3.42, - "learning_rate": 3.2113643536246e-05, - "loss": 0.3277, - "step": 5699500 - }, - { - "epoch": 3.42, - "learning_rate": 3.211154357068544e-05, - "loss": 0.3252, - "step": 5700000 - }, - { - "epoch": 3.42, - "eval_loss": 0.32316854596138, - "eval_runtime": 1119.4683, - "eval_samples_per_second": 470.509, - "eval_steps_per_second": 78.418, - "step": 5700000 - }, - { - "epoch": 3.42, - "learning_rate": 3.2109443605124876e-05, - "loss": 0.3347, - "step": 5700500 - }, - { - "epoch": 3.42, - "learning_rate": 3.210734363956431e-05, - "loss": 0.3145, - "step": 5701000 - }, - { - "epoch": 3.42, - "learning_rate": 3.210524367400375e-05, - "loss": 0.3391, - "step": 5701500 - }, - { - "epoch": 3.42, - "learning_rate": 3.2103147908374304e-05, - "loss": 0.3218, - "step": 5702000 - }, - { - "epoch": 3.42, - "learning_rate": 3.210104794281374e-05, - "loss": 0.3225, - "step": 5702500 - }, - { - "epoch": 3.42, - "learning_rate": 3.209894797725317e-05, - "loss": 0.3307, - "step": 5703000 - }, - { - "epoch": 3.42, - "learning_rate": 3.209684801169261e-05, - "loss": 0.324, - "step": 5703500 - }, - { - "epoch": 3.42, - "learning_rate": 3.2094748046132044e-05, - "loss": 0.3306, - "step": 5704000 - }, - { - "epoch": 3.42, - "learning_rate": 3.209264808057148e-05, - "loss": 0.3319, - "step": 5704500 - }, - { - "epoch": 3.42, - "learning_rate": 3.209054811501092e-05, - "loss": 0.318, - "step": 5705000 - }, - { - "epoch": 3.42, - "learning_rate": 3.2088448149450345e-05, - "loss": 0.334, - "step": 5705500 - }, - { - "epoch": 3.42, - "learning_rate": 3.2086352383820905e-05, - "loss": 0.3261, - "step": 5706000 - }, - { - "epoch": 3.42, - "learning_rate": 3.208425661819146e-05, - "loss": 0.3428, - "step": 5706500 - }, - { - "epoch": 3.42, - "learning_rate": 3.20821566526309e-05, - "loss": 0.333, - "step": 5707000 - }, - { - "epoch": 3.42, - "learning_rate": 3.208005668707033e-05, - "loss": 0.3407, - "step": 5707500 - }, - { - "epoch": 3.42, - "learning_rate": 3.2077956721509766e-05, - "loss": 0.3275, - "step": 5708000 - }, - { - "epoch": 3.42, - "learning_rate": 3.2075856755949206e-05, - "loss": 0.3247, - "step": 5708500 - }, - { - "epoch": 3.42, - "learning_rate": 3.207375679038864e-05, - "loss": 0.3274, - "step": 5709000 - }, - { - "epoch": 3.42, - "learning_rate": 3.207165682482807e-05, - "loss": 0.3275, - "step": 5709500 - }, - { - "epoch": 3.42, - "learning_rate": 3.206955685926751e-05, - "loss": 0.323, - "step": 5710000 - }, - { - "epoch": 3.42, - "learning_rate": 3.206746109363807e-05, - "loss": 0.3323, - "step": 5710500 - }, - { - "epoch": 3.42, - "learning_rate": 3.20653611280775e-05, - "loss": 0.3409, - "step": 5711000 - }, - { - "epoch": 3.42, - "learning_rate": 3.2063261162516934e-05, - "loss": 0.3283, - "step": 5711500 - }, - { - "epoch": 3.42, - "learning_rate": 3.206116539688749e-05, - "loss": 0.3297, - "step": 5712000 - }, - { - "epoch": 3.42, - "learning_rate": 3.205906543132693e-05, - "loss": 0.3275, - "step": 5712500 - }, - { - "epoch": 3.43, - "learning_rate": 3.205696546576636e-05, - "loss": 0.3279, - "step": 5713000 - }, - { - "epoch": 3.43, - "learning_rate": 3.2054865500205794e-05, - "loss": 0.3251, - "step": 5713500 - }, - { - "epoch": 3.43, - "learning_rate": 3.2052769734576355e-05, - "loss": 0.3277, - "step": 5714000 - }, - { - "epoch": 3.43, - "learning_rate": 3.205066976901579e-05, - "loss": 0.3338, - "step": 5714500 - }, - { - "epoch": 3.43, - "learning_rate": 3.204856980345522e-05, - "loss": 0.3327, - "step": 5715000 - }, - { - "epoch": 3.43, - "learning_rate": 3.204646983789466e-05, - "loss": 0.3299, - "step": 5715500 - }, - { - "epoch": 3.43, - "learning_rate": 3.2044369872334095e-05, - "loss": 0.3334, - "step": 5716000 - }, - { - "epoch": 3.43, - "learning_rate": 3.204226990677353e-05, - "loss": 0.3313, - "step": 5716500 - }, - { - "epoch": 3.43, - "learning_rate": 3.204016994121297e-05, - "loss": 0.3347, - "step": 5717000 - }, - { - "epoch": 3.43, - "learning_rate": 3.2038069975652396e-05, - "loss": 0.3369, - "step": 5717500 - }, - { - "epoch": 3.43, - "learning_rate": 3.2035978409954076e-05, - "loss": 0.3269, - "step": 5718000 - }, - { - "epoch": 3.43, - "learning_rate": 3.203387844439351e-05, - "loss": 0.3183, - "step": 5718500 - }, - { - "epoch": 3.43, - "learning_rate": 3.203177847883294e-05, - "loss": 0.3335, - "step": 5719000 - }, - { - "epoch": 3.43, - "learning_rate": 3.202967851327238e-05, - "loss": 0.3209, - "step": 5719500 - }, - { - "epoch": 3.43, - "learning_rate": 3.202757854771182e-05, - "loss": 0.3277, - "step": 5720000 - }, - { - "epoch": 3.43, - "learning_rate": 3.202547858215125e-05, - "loss": 0.3279, - "step": 5720500 - }, - { - "epoch": 3.43, - "learning_rate": 3.202338281652181e-05, - "loss": 0.3268, - "step": 5721000 - }, - { - "epoch": 3.43, - "learning_rate": 3.2021282850961244e-05, - "loss": 0.3351, - "step": 5721500 - }, - { - "epoch": 3.43, - "learning_rate": 3.201918288540068e-05, - "loss": 0.3287, - "step": 5722000 - }, - { - "epoch": 3.43, - "learning_rate": 3.201708291984012e-05, - "loss": 0.3304, - "step": 5722500 - }, - { - "epoch": 3.43, - "learning_rate": 3.201498295427955e-05, - "loss": 0.3222, - "step": 5723000 - }, - { - "epoch": 3.43, - "learning_rate": 3.2012887188650105e-05, - "loss": 0.3265, - "step": 5723500 - }, - { - "epoch": 3.43, - "learning_rate": 3.201078722308954e-05, - "loss": 0.3262, - "step": 5724000 - }, - { - "epoch": 3.43, - "learning_rate": 3.200868725752898e-05, - "loss": 0.3274, - "step": 5724500 - }, - { - "epoch": 3.43, - "learning_rate": 3.200659149189953e-05, - "loss": 0.3255, - "step": 5725000 - }, - { - "epoch": 3.43, - "learning_rate": 3.2004491526338966e-05, - "loss": 0.338, - "step": 5725500 - }, - { - "epoch": 3.43, - "learning_rate": 3.2002395760709526e-05, - "loss": 0.3435, - "step": 5726000 - }, - { - "epoch": 3.43, - "learning_rate": 3.200029579514896e-05, - "loss": 0.3247, - "step": 5726500 - }, - { - "epoch": 3.43, - "learning_rate": 3.199819582958839e-05, - "loss": 0.3228, - "step": 5727000 - }, - { - "epoch": 3.43, - "learning_rate": 3.1996095864027826e-05, - "loss": 0.3197, - "step": 5727500 - }, - { - "epoch": 3.43, - "learning_rate": 3.1993995898467267e-05, - "loss": 0.3221, - "step": 5728000 - }, - { - "epoch": 3.43, - "learning_rate": 3.19918959329067e-05, - "loss": 0.3236, - "step": 5728500 - }, - { - "epoch": 3.43, - "learning_rate": 3.1989795967346133e-05, - "loss": 0.3313, - "step": 5729000 - }, - { - "epoch": 3.44, - "learning_rate": 3.1987696001785574e-05, - "loss": 0.3264, - "step": 5729500 - }, - { - "epoch": 3.44, - "learning_rate": 3.198559603622501e-05, - "loss": 0.3358, - "step": 5730000 - }, - { - "epoch": 3.44, - "learning_rate": 3.198349607066444e-05, - "loss": 0.3321, - "step": 5730500 - }, - { - "epoch": 3.44, - "learning_rate": 3.198139610510388e-05, - "loss": 0.3392, - "step": 5731000 - }, - { - "epoch": 3.44, - "learning_rate": 3.1979296139543314e-05, - "loss": 0.3285, - "step": 5731500 - }, - { - "epoch": 3.44, - "learning_rate": 3.197719617398274e-05, - "loss": 0.3321, - "step": 5732000 - }, - { - "epoch": 3.44, - "learning_rate": 3.197509620842218e-05, - "loss": 0.3233, - "step": 5732500 - }, - { - "epoch": 3.44, - "learning_rate": 3.197300044279274e-05, - "loss": 0.3325, - "step": 5733000 - }, - { - "epoch": 3.44, - "learning_rate": 3.1970900477232175e-05, - "loss": 0.3308, - "step": 5733500 - }, - { - "epoch": 3.44, - "learning_rate": 3.196880051167161e-05, - "loss": 0.3265, - "step": 5734000 - }, - { - "epoch": 3.44, - "learning_rate": 3.196670054611104e-05, - "loss": 0.3338, - "step": 5734500 - }, - { - "epoch": 3.44, - "learning_rate": 3.1964600580550475e-05, - "loss": 0.3269, - "step": 5735000 - }, - { - "epoch": 3.44, - "learning_rate": 3.196250061498991e-05, - "loss": 0.331, - "step": 5735500 - }, - { - "epoch": 3.44, - "learning_rate": 3.196040064942935e-05, - "loss": 0.3281, - "step": 5736000 - }, - { - "epoch": 3.44, - "learning_rate": 3.19583048837999e-05, - "loss": 0.3239, - "step": 5736500 - }, - { - "epoch": 3.44, - "learning_rate": 3.1956204918239336e-05, - "loss": 0.3243, - "step": 5737000 - }, - { - "epoch": 3.44, - "learning_rate": 3.1954104952678776e-05, - "loss": 0.3271, - "step": 5737500 - }, - { - "epoch": 3.44, - "learning_rate": 3.195200498711821e-05, - "loss": 0.3255, - "step": 5738000 - }, - { - "epoch": 3.44, - "learning_rate": 3.194990502155764e-05, - "loss": 0.3353, - "step": 5738500 - }, - { - "epoch": 3.44, - "learning_rate": 3.19478092559282e-05, - "loss": 0.3297, - "step": 5739000 - }, - { - "epoch": 3.44, - "learning_rate": 3.194570929036764e-05, - "loss": 0.3336, - "step": 5739500 - }, - { - "epoch": 3.44, - "learning_rate": 3.194360932480707e-05, - "loss": 0.3328, - "step": 5740000 - }, - { - "epoch": 3.44, - "learning_rate": 3.1941509359246504e-05, - "loss": 0.3223, - "step": 5740500 - }, - { - "epoch": 3.44, - "learning_rate": 3.1939413593617064e-05, - "loss": 0.3223, - "step": 5741000 - }, - { - "epoch": 3.44, - "learning_rate": 3.19373136280565e-05, - "loss": 0.3241, - "step": 5741500 - }, - { - "epoch": 3.44, - "learning_rate": 3.193521366249593e-05, - "loss": 0.3292, - "step": 5742000 - }, - { - "epoch": 3.44, - "learning_rate": 3.193311369693537e-05, - "loss": 0.327, - "step": 5742500 - }, - { - "epoch": 3.44, - "learning_rate": 3.193101793130593e-05, - "loss": 0.3304, - "step": 5743000 - }, - { - "epoch": 3.44, - "learning_rate": 3.1928917965745365e-05, - "loss": 0.3389, - "step": 5743500 - }, - { - "epoch": 3.44, - "learning_rate": 3.192681800018479e-05, - "loss": 0.3325, - "step": 5744000 - }, - { - "epoch": 3.44, - "learning_rate": 3.192471803462423e-05, - "loss": 0.3286, - "step": 5744500 - }, - { - "epoch": 3.44, - "learning_rate": 3.1922618069063666e-05, - "loss": 0.3248, - "step": 5745000 - }, - { - "epoch": 3.44, - "learning_rate": 3.1920522303434226e-05, - "loss": 0.3322, - "step": 5745500 - }, - { - "epoch": 3.44, - "learning_rate": 3.191842233787365e-05, - "loss": 0.3276, - "step": 5746000 - }, - { - "epoch": 3.45, - "learning_rate": 3.191632237231309e-05, - "loss": 0.3261, - "step": 5746500 - }, - { - "epoch": 3.45, - "learning_rate": 3.1914222406752526e-05, - "loss": 0.3319, - "step": 5747000 - }, - { - "epoch": 3.45, - "learning_rate": 3.191212244119196e-05, - "loss": 0.3177, - "step": 5747500 - }, - { - "epoch": 3.45, - "learning_rate": 3.19100224756314e-05, - "loss": 0.3279, - "step": 5748000 - }, - { - "epoch": 3.45, - "learning_rate": 3.1907922510070834e-05, - "loss": 0.3384, - "step": 5748500 - }, - { - "epoch": 3.45, - "learning_rate": 3.190582254451027e-05, - "loss": 0.3262, - "step": 5749000 - }, - { - "epoch": 3.45, - "learning_rate": 3.190373097881195e-05, - "loss": 0.3298, - "step": 5749500 - }, - { - "epoch": 3.45, - "learning_rate": 3.190163101325139e-05, - "loss": 0.3292, - "step": 5750000 - }, - { - "epoch": 3.45, - "learning_rate": 3.189953524762194e-05, - "loss": 0.3353, - "step": 5750500 - }, - { - "epoch": 3.45, - "learning_rate": 3.1897435282061375e-05, - "loss": 0.33, - "step": 5751000 - }, - { - "epoch": 3.45, - "learning_rate": 3.189533531650081e-05, - "loss": 0.3196, - "step": 5751500 - }, - { - "epoch": 3.45, - "learning_rate": 3.189323535094025e-05, - "loss": 0.3286, - "step": 5752000 - }, - { - "epoch": 3.45, - "learning_rate": 3.189113538537968e-05, - "loss": 0.3363, - "step": 5752500 - }, - { - "epoch": 3.45, - "learning_rate": 3.1889035419819115e-05, - "loss": 0.3397, - "step": 5753000 - }, - { - "epoch": 3.45, - "learning_rate": 3.188693545425855e-05, - "loss": 0.3395, - "step": 5753500 - }, - { - "epoch": 3.45, - "learning_rate": 3.188483548869798e-05, - "loss": 0.3249, - "step": 5754000 - }, - { - "epoch": 3.45, - "learning_rate": 3.1882735523137416e-05, - "loss": 0.3332, - "step": 5754500 - }, - { - "epoch": 3.45, - "learning_rate": 3.1880639757507976e-05, - "loss": 0.33, - "step": 5755000 - }, - { - "epoch": 3.45, - "learning_rate": 3.1878539791947416e-05, - "loss": 0.3306, - "step": 5755500 - }, - { - "epoch": 3.45, - "learning_rate": 3.187643982638684e-05, - "loss": 0.3267, - "step": 5756000 - }, - { - "epoch": 3.45, - "learning_rate": 3.187433986082628e-05, - "loss": 0.3228, - "step": 5756500 - }, - { - "epoch": 3.45, - "learning_rate": 3.187223989526572e-05, - "loss": 0.3303, - "step": 5757000 - }, - { - "epoch": 3.45, - "learning_rate": 3.187014412963628e-05, - "loss": 0.3268, - "step": 5757500 - }, - { - "epoch": 3.45, - "learning_rate": 3.1868044164075704e-05, - "loss": 0.3238, - "step": 5758000 - }, - { - "epoch": 3.45, - "learning_rate": 3.1865944198515144e-05, - "loss": 0.3288, - "step": 5758500 - }, - { - "epoch": 3.45, - "learning_rate": 3.186384423295458e-05, - "loss": 0.3331, - "step": 5759000 - }, - { - "epoch": 3.45, - "learning_rate": 3.186174846732514e-05, - "loss": 0.3257, - "step": 5759500 - }, - { - "epoch": 3.45, - "learning_rate": 3.185964850176457e-05, - "loss": 0.329, - "step": 5760000 - }, - { - "epoch": 3.45, - "learning_rate": 3.1857548536204005e-05, - "loss": 0.3278, - "step": 5760500 - }, - { - "epoch": 3.45, - "learning_rate": 3.185544857064344e-05, - "loss": 0.3288, - "step": 5761000 - }, - { - "epoch": 3.45, - "learning_rate": 3.185334860508287e-05, - "loss": 0.327, - "step": 5761500 - }, - { - "epoch": 3.45, - "learning_rate": 3.185125283945343e-05, - "loss": 0.3289, - "step": 5762000 - }, - { - "epoch": 3.45, - "learning_rate": 3.184915287389287e-05, - "loss": 0.3251, - "step": 5762500 - }, - { - "epoch": 3.46, - "learning_rate": 3.18470529083323e-05, - "loss": 0.3294, - "step": 5763000 - }, - { - "epoch": 3.46, - "learning_rate": 3.184495294277174e-05, - "loss": 0.3375, - "step": 5763500 - }, - { - "epoch": 3.46, - "learning_rate": 3.18428571771423e-05, - "loss": 0.3332, - "step": 5764000 - }, - { - "epoch": 3.46, - "learning_rate": 3.184075721158173e-05, - "loss": 0.3437, - "step": 5764500 - }, - { - "epoch": 3.46, - "learning_rate": 3.1838657246021166e-05, - "loss": 0.3279, - "step": 5765000 - }, - { - "epoch": 3.46, - "learning_rate": 3.18365572804606e-05, - "loss": 0.3296, - "step": 5765500 - }, - { - "epoch": 3.46, - "learning_rate": 3.183445731490003e-05, - "loss": 0.3365, - "step": 5766000 - }, - { - "epoch": 3.46, - "learning_rate": 3.183235734933947e-05, - "loss": 0.3367, - "step": 5766500 - }, - { - "epoch": 3.46, - "learning_rate": 3.183025738377891e-05, - "loss": 0.331, - "step": 5767000 - }, - { - "epoch": 3.46, - "learning_rate": 3.182815741821834e-05, - "loss": 0.3305, - "step": 5767500 - }, - { - "epoch": 3.46, - "learning_rate": 3.1826061652588894e-05, - "loss": 0.3277, - "step": 5768000 - }, - { - "epoch": 3.46, - "learning_rate": 3.182396168702833e-05, - "loss": 0.3247, - "step": 5768500 - }, - { - "epoch": 3.46, - "learning_rate": 3.182186172146777e-05, - "loss": 0.3275, - "step": 5769000 - }, - { - "epoch": 3.46, - "learning_rate": 3.18197617559072e-05, - "loss": 0.3243, - "step": 5769500 - }, - { - "epoch": 3.46, - "learning_rate": 3.1817661790346635e-05, - "loss": 0.3294, - "step": 5770000 - }, - { - "epoch": 3.46, - "learning_rate": 3.1815566024717195e-05, - "loss": 0.3359, - "step": 5770500 - }, - { - "epoch": 3.46, - "learning_rate": 3.181346605915663e-05, - "loss": 0.3264, - "step": 5771000 - }, - { - "epoch": 3.46, - "learning_rate": 3.181136609359606e-05, - "loss": 0.3274, - "step": 5771500 - }, - { - "epoch": 3.46, - "learning_rate": 3.18092661280355e-05, - "loss": 0.3254, - "step": 5772000 - }, - { - "epoch": 3.46, - "learning_rate": 3.1807170362406056e-05, - "loss": 0.3389, - "step": 5772500 - }, - { - "epoch": 3.46, - "learning_rate": 3.180507039684549e-05, - "loss": 0.3321, - "step": 5773000 - }, - { - "epoch": 3.46, - "learning_rate": 3.180297043128492e-05, - "loss": 0.3251, - "step": 5773500 - }, - { - "epoch": 3.46, - "learning_rate": 3.180087046572436e-05, - "loss": 0.3337, - "step": 5774000 - }, - { - "epoch": 3.46, - "learning_rate": 3.1798770500163796e-05, - "loss": 0.3362, - "step": 5774500 - }, - { - "epoch": 3.46, - "learning_rate": 3.179667473453435e-05, - "loss": 0.3335, - "step": 5775000 - }, - { - "epoch": 3.46, - "learning_rate": 3.1794574768973783e-05, - "loss": 0.3282, - "step": 5775500 - }, - { - "epoch": 3.46, - "learning_rate": 3.1792474803413224e-05, - "loss": 0.3324, - "step": 5776000 - }, - { - "epoch": 3.46, - "learning_rate": 3.179037483785266e-05, - "loss": 0.3303, - "step": 5776500 - }, - { - "epoch": 3.46, - "learning_rate": 3.178827907222321e-05, - "loss": 0.3312, - "step": 5777000 - }, - { - "epoch": 3.46, - "learning_rate": 3.178617910666265e-05, - "loss": 0.3358, - "step": 5777500 - }, - { - "epoch": 3.46, - "learning_rate": 3.1784079141102084e-05, - "loss": 0.3295, - "step": 5778000 - }, - { - "epoch": 3.46, - "learning_rate": 3.178197917554152e-05, - "loss": 0.3298, - "step": 5778500 - }, - { - "epoch": 3.46, - "learning_rate": 3.177987920998096e-05, - "loss": 0.3328, - "step": 5779000 - }, - { - "epoch": 3.47, - "learning_rate": 3.177777924442039e-05, - "loss": 0.3337, - "step": 5779500 - }, - { - "epoch": 3.47, - "learning_rate": 3.1775679278859825e-05, - "loss": 0.3323, - "step": 5780000 - }, - { - "epoch": 3.47, - "learning_rate": 3.1773579313299265e-05, - "loss": 0.3313, - "step": 5780500 - }, - { - "epoch": 3.47, - "learning_rate": 3.177148774760094e-05, - "loss": 0.3268, - "step": 5781000 - }, - { - "epoch": 3.47, - "learning_rate": 3.176938778204038e-05, - "loss": 0.3276, - "step": 5781500 - }, - { - "epoch": 3.47, - "learning_rate": 3.1767287816479806e-05, - "loss": 0.3321, - "step": 5782000 - }, - { - "epoch": 3.47, - "learning_rate": 3.176518785091924e-05, - "loss": 0.3304, - "step": 5782500 - }, - { - "epoch": 3.47, - "learning_rate": 3.176308788535868e-05, - "loss": 0.3343, - "step": 5783000 - }, - { - "epoch": 3.47, - "learning_rate": 3.176099211972924e-05, - "loss": 0.3339, - "step": 5783500 - }, - { - "epoch": 3.47, - "learning_rate": 3.175889215416867e-05, - "loss": 0.3311, - "step": 5784000 - }, - { - "epoch": 3.47, - "learning_rate": 3.175679218860811e-05, - "loss": 0.3382, - "step": 5784500 - }, - { - "epoch": 3.47, - "learning_rate": 3.175469222304754e-05, - "loss": 0.3167, - "step": 5785000 - }, - { - "epoch": 3.47, - "learning_rate": 3.1752592257486974e-05, - "loss": 0.3263, - "step": 5785500 - }, - { - "epoch": 3.47, - "learning_rate": 3.1750496491857534e-05, - "loss": 0.3316, - "step": 5786000 - }, - { - "epoch": 3.47, - "learning_rate": 3.174839652629697e-05, - "loss": 0.3319, - "step": 5786500 - }, - { - "epoch": 3.47, - "learning_rate": 3.17462965607364e-05, - "loss": 0.336, - "step": 5787000 - }, - { - "epoch": 3.47, - "learning_rate": 3.1744196595175834e-05, - "loss": 0.3316, - "step": 5787500 - }, - { - "epoch": 3.47, - "learning_rate": 3.1742096629615275e-05, - "loss": 0.3299, - "step": 5788000 - }, - { - "epoch": 3.47, - "learning_rate": 3.1740000863985835e-05, - "loss": 0.3308, - "step": 5788500 - }, - { - "epoch": 3.47, - "learning_rate": 3.173790089842526e-05, - "loss": 0.3315, - "step": 5789000 - }, - { - "epoch": 3.47, - "learning_rate": 3.1735800932864695e-05, - "loss": 0.325, - "step": 5789500 - }, - { - "epoch": 3.47, - "learning_rate": 3.1733700967304135e-05, - "loss": 0.3268, - "step": 5790000 - }, - { - "epoch": 3.47, - "learning_rate": 3.173160100174357e-05, - "loss": 0.3315, - "step": 5790500 - }, - { - "epoch": 3.47, - "learning_rate": 3.172950523611413e-05, - "loss": 0.3269, - "step": 5791000 - }, - { - "epoch": 3.47, - "learning_rate": 3.172740527055356e-05, - "loss": 0.3316, - "step": 5791500 - }, - { - "epoch": 3.47, - "learning_rate": 3.1725305304992996e-05, - "loss": 0.3297, - "step": 5792000 - }, - { - "epoch": 3.47, - "learning_rate": 3.172320533943243e-05, - "loss": 0.3227, - "step": 5792500 - }, - { - "epoch": 3.47, - "learning_rate": 3.172110537387187e-05, - "loss": 0.3242, - "step": 5793000 - }, - { - "epoch": 3.47, - "learning_rate": 3.17190054083113e-05, - "loss": 0.3337, - "step": 5793500 - }, - { - "epoch": 3.47, - "learning_rate": 3.171690544275074e-05, - "loss": 0.3371, - "step": 5794000 - }, - { - "epoch": 3.47, - "learning_rate": 3.171480547719018e-05, - "loss": 0.3276, - "step": 5794500 - }, - { - "epoch": 3.47, - "learning_rate": 3.171270971156073e-05, - "loss": 0.3327, - "step": 5795000 - }, - { - "epoch": 3.47, - "learning_rate": 3.171061394593129e-05, - "loss": 0.3446, - "step": 5795500 - }, - { - "epoch": 3.47, - "learning_rate": 3.1708513980370724e-05, - "loss": 0.3283, - "step": 5796000 - }, - { - "epoch": 3.48, - "learning_rate": 3.170641401481015e-05, - "loss": 0.3318, - "step": 5796500 - }, - { - "epoch": 3.48, - "learning_rate": 3.170431404924959e-05, - "loss": 0.3218, - "step": 5797000 - }, - { - "epoch": 3.48, - "learning_rate": 3.170221828362015e-05, - "loss": 0.3289, - "step": 5797500 - }, - { - "epoch": 3.48, - "learning_rate": 3.1700118318059585e-05, - "loss": 0.326, - "step": 5798000 - }, - { - "epoch": 3.48, - "learning_rate": 3.169801835249902e-05, - "loss": 0.3279, - "step": 5798500 - }, - { - "epoch": 3.48, - "learning_rate": 3.169591838693845e-05, - "loss": 0.3182, - "step": 5799000 - }, - { - "epoch": 3.48, - "learning_rate": 3.1693818421377885e-05, - "loss": 0.331, - "step": 5799500 - }, - { - "epoch": 3.48, - "learning_rate": 3.1691722655748446e-05, - "loss": 0.3225, - "step": 5800000 - }, - { - "epoch": 3.48, - "eval_loss": 0.32082223892211914, - "eval_runtime": 1121.4189, - "eval_samples_per_second": 469.691, - "eval_steps_per_second": 78.282, - "step": 5800000 - }, - { - "epoch": 3.48, - "learning_rate": 3.1689626890119e-05, - "loss": 0.3266, - "step": 5800500 - }, - { - "epoch": 3.48, - "learning_rate": 3.168752692455844e-05, - "loss": 0.3319, - "step": 5801000 - }, - { - "epoch": 3.48, - "learning_rate": 3.168542695899787e-05, - "loss": 0.3262, - "step": 5801500 - }, - { - "epoch": 3.48, - "learning_rate": 3.1683326993437307e-05, - "loss": 0.3249, - "step": 5802000 - }, - { - "epoch": 3.48, - "learning_rate": 3.168122702787675e-05, - "loss": 0.3226, - "step": 5802500 - }, - { - "epoch": 3.48, - "learning_rate": 3.16791312622473e-05, - "loss": 0.3374, - "step": 5803000 - }, - { - "epoch": 3.48, - "learning_rate": 3.1677031296686734e-05, - "loss": 0.3277, - "step": 5803500 - }, - { - "epoch": 3.48, - "learning_rate": 3.167493133112617e-05, - "loss": 0.324, - "step": 5804000 - }, - { - "epoch": 3.48, - "learning_rate": 3.167283136556561e-05, - "loss": 0.3268, - "step": 5804500 - }, - { - "epoch": 3.48, - "learning_rate": 3.167073140000504e-05, - "loss": 0.3269, - "step": 5805000 - }, - { - "epoch": 3.48, - "learning_rate": 3.1668631434444474e-05, - "loss": 0.3212, - "step": 5805500 - }, - { - "epoch": 3.48, - "learning_rate": 3.166653146888391e-05, - "loss": 0.3215, - "step": 5806000 - }, - { - "epoch": 3.48, - "learning_rate": 3.166443150332334e-05, - "loss": 0.3225, - "step": 5806500 - }, - { - "epoch": 3.48, - "learning_rate": 3.166233153776278e-05, - "loss": 0.3371, - "step": 5807000 - }, - { - "epoch": 3.48, - "learning_rate": 3.1660231572202215e-05, - "loss": 0.3308, - "step": 5807500 - }, - { - "epoch": 3.48, - "learning_rate": 3.165813580657277e-05, - "loss": 0.3245, - "step": 5808000 - }, - { - "epoch": 3.48, - "learning_rate": 3.16560358410122e-05, - "loss": 0.3291, - "step": 5808500 - }, - { - "epoch": 3.48, - "learning_rate": 3.165393587545164e-05, - "loss": 0.3298, - "step": 5809000 - }, - { - "epoch": 3.48, - "learning_rate": 3.1651835909891076e-05, - "loss": 0.3257, - "step": 5809500 - }, - { - "epoch": 3.48, - "learning_rate": 3.164973594433051e-05, - "loss": 0.3221, - "step": 5810000 - }, - { - "epoch": 3.48, - "learning_rate": 3.164763597876995e-05, - "loss": 0.333, - "step": 5810500 - }, - { - "epoch": 3.48, - "learning_rate": 3.164553601320938e-05, - "loss": 0.3329, - "step": 5811000 - }, - { - "epoch": 3.48, - "learning_rate": 3.1643440247579936e-05, - "loss": 0.3315, - "step": 5811500 - }, - { - "epoch": 3.48, - "learning_rate": 3.164134028201937e-05, - "loss": 0.3256, - "step": 5812000 - }, - { - "epoch": 3.48, - "learning_rate": 3.163924031645881e-05, - "loss": 0.3312, - "step": 5812500 - }, - { - "epoch": 3.49, - "learning_rate": 3.1637140350898244e-05, - "loss": 0.3435, - "step": 5813000 - }, - { - "epoch": 3.49, - "learning_rate": 3.163504038533768e-05, - "loss": 0.3223, - "step": 5813500 - }, - { - "epoch": 3.49, - "learning_rate": 3.163294041977712e-05, - "loss": 0.3311, - "step": 5814000 - }, - { - "epoch": 3.49, - "learning_rate": 3.163084045421655e-05, - "loss": 0.3244, - "step": 5814500 - }, - { - "epoch": 3.49, - "learning_rate": 3.162874048865599e-05, - "loss": 0.3293, - "step": 5815000 - }, - { - "epoch": 3.49, - "learning_rate": 3.1626644723026545e-05, - "loss": 0.3369, - "step": 5815500 - }, - { - "epoch": 3.49, - "learning_rate": 3.162454475746598e-05, - "loss": 0.3324, - "step": 5816000 - }, - { - "epoch": 3.49, - "learning_rate": 3.162244899183653e-05, - "loss": 0.3303, - "step": 5816500 - }, - { - "epoch": 3.49, - "learning_rate": 3.1620349026275965e-05, - "loss": 0.3284, - "step": 5817000 - }, - { - "epoch": 3.49, - "learning_rate": 3.1618249060715405e-05, - "loss": 0.3239, - "step": 5817500 - }, - { - "epoch": 3.49, - "learning_rate": 3.161614909515484e-05, - "loss": 0.3264, - "step": 5818000 - }, - { - "epoch": 3.49, - "learning_rate": 3.161404912959427e-05, - "loss": 0.3301, - "step": 5818500 - }, - { - "epoch": 3.49, - "learning_rate": 3.161194916403371e-05, - "loss": 0.3324, - "step": 5819000 - }, - { - "epoch": 3.49, - "learning_rate": 3.1609849198473146e-05, - "loss": 0.3307, - "step": 5819500 - }, - { - "epoch": 3.49, - "learning_rate": 3.16077534328437e-05, - "loss": 0.3246, - "step": 5820000 - }, - { - "epoch": 3.49, - "learning_rate": 3.160565346728314e-05, - "loss": 0.3262, - "step": 5820500 - }, - { - "epoch": 3.49, - "learning_rate": 3.160355350172257e-05, - "loss": 0.3327, - "step": 5821000 - }, - { - "epoch": 3.49, - "learning_rate": 3.160145353616201e-05, - "loss": 0.3268, - "step": 5821500 - }, - { - "epoch": 3.49, - "learning_rate": 3.159935357060145e-05, - "loss": 0.3391, - "step": 5822000 - }, - { - "epoch": 3.49, - "learning_rate": 3.159725360504088e-05, - "loss": 0.3266, - "step": 5822500 - }, - { - "epoch": 3.49, - "learning_rate": 3.159515363948031e-05, - "loss": 0.3286, - "step": 5823000 - }, - { - "epoch": 3.49, - "learning_rate": 3.159305367391975e-05, - "loss": 0.3354, - "step": 5823500 - }, - { - "epoch": 3.49, - "learning_rate": 3.159095790829031e-05, - "loss": 0.3251, - "step": 5824000 - }, - { - "epoch": 3.49, - "learning_rate": 3.158885794272974e-05, - "loss": 0.3258, - "step": 5824500 - }, - { - "epoch": 3.49, - "learning_rate": 3.1586762177100295e-05, - "loss": 0.3373, - "step": 5825000 - }, - { - "epoch": 3.49, - "learning_rate": 3.158466221153973e-05, - "loss": 0.3337, - "step": 5825500 - }, - { - "epoch": 3.49, - "learning_rate": 3.158256224597917e-05, - "loss": 0.3259, - "step": 5826000 - }, - { - "epoch": 3.49, - "learning_rate": 3.15804622804186e-05, - "loss": 0.3313, - "step": 5826500 - }, - { - "epoch": 3.49, - "learning_rate": 3.1578366514789155e-05, - "loss": 0.3281, - "step": 5827000 - }, - { - "epoch": 3.49, - "learning_rate": 3.1576266549228596e-05, - "loss": 0.3194, - "step": 5827500 - }, - { - "epoch": 3.49, - "learning_rate": 3.157416658366803e-05, - "loss": 0.3361, - "step": 5828000 - }, - { - "epoch": 3.49, - "learning_rate": 3.157206661810746e-05, - "loss": 0.3311, - "step": 5828500 - }, - { - "epoch": 3.49, - "learning_rate": 3.15699666525469e-05, - "loss": 0.3256, - "step": 5829000 - }, - { - "epoch": 3.5, - "learning_rate": 3.1567866686986336e-05, - "loss": 0.3268, - "step": 5829500 - }, - { - "epoch": 3.5, - "learning_rate": 3.156576672142577e-05, - "loss": 0.3279, - "step": 5830000 - }, - { - "epoch": 3.5, - "learning_rate": 3.15636667558652e-05, - "loss": 0.3346, - "step": 5830500 - }, - { - "epoch": 3.5, - "learning_rate": 3.156157519016688e-05, - "loss": 0.3348, - "step": 5831000 - }, - { - "epoch": 3.5, - "learning_rate": 3.155947522460632e-05, - "loss": 0.3285, - "step": 5831500 - }, - { - "epoch": 3.5, - "learning_rate": 3.155737525904575e-05, - "loss": 0.3241, - "step": 5832000 - }, - { - "epoch": 3.5, - "learning_rate": 3.1555275293485184e-05, - "loss": 0.3221, - "step": 5832500 - }, - { - "epoch": 3.5, - "learning_rate": 3.155317952785574e-05, - "loss": 0.3332, - "step": 5833000 - }, - { - "epoch": 3.5, - "learning_rate": 3.155107956229518e-05, - "loss": 0.3199, - "step": 5833500 - }, - { - "epoch": 3.5, - "learning_rate": 3.154897959673461e-05, - "loss": 0.3192, - "step": 5834000 - }, - { - "epoch": 3.5, - "learning_rate": 3.154687963117405e-05, - "loss": 0.322, - "step": 5834500 - }, - { - "epoch": 3.5, - "learning_rate": 3.1544779665613485e-05, - "loss": 0.3252, - "step": 5835000 - }, - { - "epoch": 3.5, - "learning_rate": 3.154268389998404e-05, - "loss": 0.3272, - "step": 5835500 - }, - { - "epoch": 3.5, - "learning_rate": 3.154058393442347e-05, - "loss": 0.3275, - "step": 5836000 - }, - { - "epoch": 3.5, - "learning_rate": 3.153848396886291e-05, - "loss": 0.3248, - "step": 5836500 - }, - { - "epoch": 3.5, - "learning_rate": 3.1536384003302346e-05, - "loss": 0.3247, - "step": 5837000 - }, - { - "epoch": 3.5, - "learning_rate": 3.153428403774178e-05, - "loss": 0.33, - "step": 5837500 - }, - { - "epoch": 3.5, - "learning_rate": 3.153218407218122e-05, - "loss": 0.3342, - "step": 5838000 - }, - { - "epoch": 3.5, - "learning_rate": 3.153008830655177e-05, - "loss": 0.324, - "step": 5838500 - }, - { - "epoch": 3.5, - "learning_rate": 3.1527988340991206e-05, - "loss": 0.3269, - "step": 5839000 - }, - { - "epoch": 3.5, - "learning_rate": 3.152588837543064e-05, - "loss": 0.3346, - "step": 5839500 - }, - { - "epoch": 3.5, - "learning_rate": 3.152378840987008e-05, - "loss": 0.3331, - "step": 5840000 - }, - { - "epoch": 3.5, - "learning_rate": 3.1521688444309514e-05, - "loss": 0.3276, - "step": 5840500 - }, - { - "epoch": 3.5, - "learning_rate": 3.151959267868007e-05, - "loss": 0.3369, - "step": 5841000 - }, - { - "epoch": 3.5, - "learning_rate": 3.151749271311951e-05, - "loss": 0.3304, - "step": 5841500 - }, - { - "epoch": 3.5, - "learning_rate": 3.151539274755894e-05, - "loss": 0.3233, - "step": 5842000 - }, - { - "epoch": 3.5, - "learning_rate": 3.1513292781998374e-05, - "loss": 0.3331, - "step": 5842500 - }, - { - "epoch": 3.5, - "learning_rate": 3.1511192816437815e-05, - "loss": 0.3218, - "step": 5843000 - }, - { - "epoch": 3.5, - "learning_rate": 3.150909285087725e-05, - "loss": 0.3246, - "step": 5843500 - }, - { - "epoch": 3.5, - "learning_rate": 3.150699288531668e-05, - "loss": 0.3243, - "step": 5844000 - }, - { - "epoch": 3.5, - "learning_rate": 3.1504892919756115e-05, - "loss": 0.3297, - "step": 5844500 - }, - { - "epoch": 3.5, - "learning_rate": 3.1502797154126675e-05, - "loss": 0.3302, - "step": 5845000 - }, - { - "epoch": 3.5, - "learning_rate": 3.150069718856611e-05, - "loss": 0.3267, - "step": 5845500 - }, - { - "epoch": 3.5, - "learning_rate": 3.149860142293666e-05, - "loss": 0.3268, - "step": 5846000 - }, - { - "epoch": 3.51, - "learning_rate": 3.1496501457376096e-05, - "loss": 0.3244, - "step": 5846500 - }, - { - "epoch": 3.51, - "learning_rate": 3.1494401491815536e-05, - "loss": 0.3319, - "step": 5847000 - }, - { - "epoch": 3.51, - "learning_rate": 3.149230152625497e-05, - "loss": 0.3288, - "step": 5847500 - }, - { - "epoch": 3.51, - "learning_rate": 3.149020576062552e-05, - "loss": 0.3299, - "step": 5848000 - }, - { - "epoch": 3.51, - "learning_rate": 3.148810579506496e-05, - "loss": 0.3285, - "step": 5848500 - }, - { - "epoch": 3.51, - "learning_rate": 3.14860058295044e-05, - "loss": 0.3361, - "step": 5849000 - }, - { - "epoch": 3.51, - "learning_rate": 3.148390586394383e-05, - "loss": 0.3269, - "step": 5849500 - }, - { - "epoch": 3.51, - "learning_rate": 3.148180589838327e-05, - "loss": 0.3434, - "step": 5850000 - }, - { - "epoch": 3.51, - "learning_rate": 3.1479710132753824e-05, - "loss": 0.3257, - "step": 5850500 - }, - { - "epoch": 3.51, - "learning_rate": 3.147761016719326e-05, - "loss": 0.3321, - "step": 5851000 - }, - { - "epoch": 3.51, - "learning_rate": 3.147551020163269e-05, - "loss": 0.3281, - "step": 5851500 - }, - { - "epoch": 3.51, - "learning_rate": 3.147341023607213e-05, - "loss": 0.324, - "step": 5852000 - }, - { - "epoch": 3.51, - "learning_rate": 3.1471314470442685e-05, - "loss": 0.3354, - "step": 5852500 - }, - { - "epoch": 3.51, - "learning_rate": 3.146921450488212e-05, - "loss": 0.3251, - "step": 5853000 - }, - { - "epoch": 3.51, - "learning_rate": 3.146711453932155e-05, - "loss": 0.3231, - "step": 5853500 - }, - { - "epoch": 3.51, - "learning_rate": 3.146501457376099e-05, - "loss": 0.3361, - "step": 5854000 - }, - { - "epoch": 3.51, - "learning_rate": 3.1462914608200425e-05, - "loss": 0.3335, - "step": 5854500 - }, - { - "epoch": 3.51, - "learning_rate": 3.146081464263986e-05, - "loss": 0.3276, - "step": 5855000 - }, - { - "epoch": 3.51, - "learning_rate": 3.14587146770793e-05, - "loss": 0.3268, - "step": 5855500 - }, - { - "epoch": 3.51, - "learning_rate": 3.145661471151873e-05, - "loss": 0.335, - "step": 5856000 - }, - { - "epoch": 3.51, - "learning_rate": 3.1454518945889286e-05, - "loss": 0.3276, - "step": 5856500 - }, - { - "epoch": 3.51, - "learning_rate": 3.1452418980328726e-05, - "loss": 0.3279, - "step": 5857000 - }, - { - "epoch": 3.51, - "learning_rate": 3.145031901476816e-05, - "loss": 0.3302, - "step": 5857500 - }, - { - "epoch": 3.51, - "learning_rate": 3.144821904920759e-05, - "loss": 0.3316, - "step": 5858000 - }, - { - "epoch": 3.51, - "learning_rate": 3.144612328357815e-05, - "loss": 0.3292, - "step": 5858500 - }, - { - "epoch": 3.51, - "learning_rate": 3.144402331801759e-05, - "loss": 0.3173, - "step": 5859000 - }, - { - "epoch": 3.51, - "learning_rate": 3.144192335245702e-05, - "loss": 0.3261, - "step": 5859500 - }, - { - "epoch": 3.51, - "learning_rate": 3.1439823386896454e-05, - "loss": 0.3364, - "step": 5860000 - }, - { - "epoch": 3.51, - "learning_rate": 3.143772762126701e-05, - "loss": 0.3236, - "step": 5860500 - }, - { - "epoch": 3.51, - "learning_rate": 3.143562765570645e-05, - "loss": 0.3314, - "step": 5861000 - }, - { - "epoch": 3.51, - "learning_rate": 3.143352769014588e-05, - "loss": 0.3256, - "step": 5861500 - }, - { - "epoch": 3.51, - "learning_rate": 3.1431427724585315e-05, - "loss": 0.3313, - "step": 5862000 - }, - { - "epoch": 3.51, - "learning_rate": 3.1429327759024755e-05, - "loss": 0.3295, - "step": 5862500 - }, - { - "epoch": 3.52, - "learning_rate": 3.142722779346419e-05, - "loss": 0.3278, - "step": 5863000 - }, - { - "epoch": 3.52, - "learning_rate": 3.142513202783474e-05, - "loss": 0.3247, - "step": 5863500 - }, - { - "epoch": 3.52, - "learning_rate": 3.142303206227418e-05, - "loss": 0.3267, - "step": 5864000 - }, - { - "epoch": 3.52, - "learning_rate": 3.1420932096713616e-05, - "loss": 0.3259, - "step": 5864500 - }, - { - "epoch": 3.52, - "learning_rate": 3.141883213115305e-05, - "loss": 0.3285, - "step": 5865000 - }, - { - "epoch": 3.52, - "learning_rate": 3.14167363655236e-05, - "loss": 0.3414, - "step": 5865500 - }, - { - "epoch": 3.52, - "learning_rate": 3.141463639996304e-05, - "loss": 0.3302, - "step": 5866000 - }, - { - "epoch": 3.52, - "learning_rate": 3.1412536434402476e-05, - "loss": 0.3253, - "step": 5866500 - }, - { - "epoch": 3.52, - "learning_rate": 3.141043646884191e-05, - "loss": 0.3262, - "step": 5867000 - }, - { - "epoch": 3.52, - "learning_rate": 3.140833650328135e-05, - "loss": 0.321, - "step": 5867500 - }, - { - "epoch": 3.52, - "learning_rate": 3.1406240737651904e-05, - "loss": 0.3275, - "step": 5868000 - }, - { - "epoch": 3.52, - "learning_rate": 3.140414077209134e-05, - "loss": 0.3319, - "step": 5868500 - }, - { - "epoch": 3.52, - "learning_rate": 3.140204080653077e-05, - "loss": 0.3385, - "step": 5869000 - }, - { - "epoch": 3.52, - "learning_rate": 3.139994084097021e-05, - "loss": 0.3192, - "step": 5869500 - }, - { - "epoch": 3.52, - "learning_rate": 3.1397840875409644e-05, - "loss": 0.3267, - "step": 5870000 - }, - { - "epoch": 3.52, - "learning_rate": 3.139574090984908e-05, - "loss": 0.3177, - "step": 5870500 - }, - { - "epoch": 3.52, - "learning_rate": 3.139364514421964e-05, - "loss": 0.3389, - "step": 5871000 - }, - { - "epoch": 3.52, - "learning_rate": 3.139154517865907e-05, - "loss": 0.3258, - "step": 5871500 - }, - { - "epoch": 3.52, - "learning_rate": 3.1389445213098505e-05, - "loss": 0.3234, - "step": 5872000 - }, - { - "epoch": 3.52, - "learning_rate": 3.1387345247537945e-05, - "loss": 0.3331, - "step": 5872500 - }, - { - "epoch": 3.52, - "learning_rate": 3.138524528197738e-05, - "loss": 0.3364, - "step": 5873000 - }, - { - "epoch": 3.52, - "learning_rate": 3.1383145316416805e-05, - "loss": 0.3389, - "step": 5873500 - }, - { - "epoch": 3.52, - "learning_rate": 3.1381045350856246e-05, - "loss": 0.3221, - "step": 5874000 - }, - { - "epoch": 3.52, - "learning_rate": 3.1378949585226806e-05, - "loss": 0.3299, - "step": 5874500 - }, - { - "epoch": 3.52, - "learning_rate": 3.137684961966624e-05, - "loss": 0.3165, - "step": 5875000 - }, - { - "epoch": 3.52, - "learning_rate": 3.137475385403679e-05, - "loss": 0.3409, - "step": 5875500 - }, - { - "epoch": 3.52, - "learning_rate": 3.1372653888476226e-05, - "loss": 0.3268, - "step": 5876000 - }, - { - "epoch": 3.52, - "learning_rate": 3.137055392291567e-05, - "loss": 0.3218, - "step": 5876500 - }, - { - "epoch": 3.52, - "learning_rate": 3.13684539573551e-05, - "loss": 0.3307, - "step": 5877000 - }, - { - "epoch": 3.52, - "learning_rate": 3.1366353991794534e-05, - "loss": 0.3298, - "step": 5877500 - }, - { - "epoch": 3.52, - "learning_rate": 3.136425402623397e-05, - "loss": 0.328, - "step": 5878000 - }, - { - "epoch": 3.52, - "learning_rate": 3.13621540606734e-05, - "loss": 0.3333, - "step": 5878500 - }, - { - "epoch": 3.52, - "learning_rate": 3.136005409511284e-05, - "loss": 0.3194, - "step": 5879000 - }, - { - "epoch": 3.52, - "learning_rate": 3.1357954129552274e-05, - "loss": 0.3246, - "step": 5879500 - }, - { - "epoch": 3.53, - "learning_rate": 3.1355858363922835e-05, - "loss": 0.323, - "step": 5880000 - }, - { - "epoch": 3.53, - "learning_rate": 3.135375839836226e-05, - "loss": 0.326, - "step": 5880500 - }, - { - "epoch": 3.53, - "learning_rate": 3.13516584328017e-05, - "loss": 0.3279, - "step": 5881000 - }, - { - "epoch": 3.53, - "learning_rate": 3.1349558467241135e-05, - "loss": 0.3233, - "step": 5881500 - }, - { - "epoch": 3.53, - "learning_rate": 3.134745850168057e-05, - "loss": 0.3295, - "step": 5882000 - }, - { - "epoch": 3.53, - "learning_rate": 3.134535853612001e-05, - "loss": 0.3211, - "step": 5882500 - }, - { - "epoch": 3.53, - "learning_rate": 3.134326277049056e-05, - "loss": 0.3361, - "step": 5883000 - }, - { - "epoch": 3.53, - "learning_rate": 3.1341162804929996e-05, - "loss": 0.3285, - "step": 5883500 - }, - { - "epoch": 3.53, - "learning_rate": 3.133906283936943e-05, - "loss": 0.3266, - "step": 5884000 - }, - { - "epoch": 3.53, - "learning_rate": 3.133696287380887e-05, - "loss": 0.3265, - "step": 5884500 - }, - { - "epoch": 3.53, - "learning_rate": 3.13348629082483e-05, - "loss": 0.33, - "step": 5885000 - }, - { - "epoch": 3.53, - "learning_rate": 3.1332767142618856e-05, - "loss": 0.3285, - "step": 5885500 - }, - { - "epoch": 3.53, - "learning_rate": 3.1330667177058297e-05, - "loss": 0.3258, - "step": 5886000 - }, - { - "epoch": 3.53, - "learning_rate": 3.132856721149773e-05, - "loss": 0.3331, - "step": 5886500 - }, - { - "epoch": 3.53, - "learning_rate": 3.1326467245937164e-05, - "loss": 0.3304, - "step": 5887000 - }, - { - "epoch": 3.53, - "learning_rate": 3.1324367280376604e-05, - "loss": 0.3322, - "step": 5887500 - }, - { - "epoch": 3.53, - "learning_rate": 3.132227151474716e-05, - "loss": 0.3258, - "step": 5888000 - }, - { - "epoch": 3.53, - "learning_rate": 3.132017154918659e-05, - "loss": 0.3328, - "step": 5888500 - }, - { - "epoch": 3.53, - "learning_rate": 3.1318071583626024e-05, - "loss": 0.322, - "step": 5889000 - }, - { - "epoch": 3.53, - "learning_rate": 3.1315971618065464e-05, - "loss": 0.3276, - "step": 5889500 - }, - { - "epoch": 3.53, - "learning_rate": 3.131387585243602e-05, - "loss": 0.322, - "step": 5890000 - }, - { - "epoch": 3.53, - "learning_rate": 3.131177588687545e-05, - "loss": 0.337, - "step": 5890500 - }, - { - "epoch": 3.53, - "learning_rate": 3.1309675921314885e-05, - "loss": 0.3227, - "step": 5891000 - }, - { - "epoch": 3.53, - "learning_rate": 3.1307575955754325e-05, - "loss": 0.3355, - "step": 5891500 - }, - { - "epoch": 3.53, - "learning_rate": 3.1305480190124886e-05, - "loss": 0.3237, - "step": 5892000 - }, - { - "epoch": 3.53, - "learning_rate": 3.130338022456431e-05, - "loss": 0.3284, - "step": 5892500 - }, - { - "epoch": 3.53, - "learning_rate": 3.130128025900375e-05, - "loss": 0.3209, - "step": 5893000 - }, - { - "epoch": 3.53, - "learning_rate": 3.1299180293443186e-05, - "loss": 0.3272, - "step": 5893500 - }, - { - "epoch": 3.53, - "learning_rate": 3.129708032788262e-05, - "loss": 0.3255, - "step": 5894000 - }, - { - "epoch": 3.53, - "learning_rate": 3.129498036232206e-05, - "loss": 0.3201, - "step": 5894500 - }, - { - "epoch": 3.53, - "learning_rate": 3.129288459669261e-05, - "loss": 0.3306, - "step": 5895000 - }, - { - "epoch": 3.53, - "learning_rate": 3.129078463113205e-05, - "loss": 0.3203, - "step": 5895500 - }, - { - "epoch": 3.53, - "learning_rate": 3.128868466557148e-05, - "loss": 0.3299, - "step": 5896000 - }, - { - "epoch": 3.54, - "learning_rate": 3.128658470001092e-05, - "loss": 0.3296, - "step": 5896500 - }, - { - "epoch": 3.54, - "learning_rate": 3.1284488934381474e-05, - "loss": 0.3263, - "step": 5897000 - }, - { - "epoch": 3.54, - "learning_rate": 3.128238896882091e-05, - "loss": 0.3272, - "step": 5897500 - }, - { - "epoch": 3.54, - "learning_rate": 3.128028900326034e-05, - "loss": 0.3321, - "step": 5898000 - }, - { - "epoch": 3.54, - "learning_rate": 3.127818903769978e-05, - "loss": 0.3388, - "step": 5898500 - }, - { - "epoch": 3.54, - "learning_rate": 3.1276089072139215e-05, - "loss": 0.3231, - "step": 5899000 - }, - { - "epoch": 3.54, - "learning_rate": 3.1273989106578655e-05, - "loss": 0.3265, - "step": 5899500 - }, - { - "epoch": 3.54, - "learning_rate": 3.127188914101809e-05, - "loss": 0.3428, - "step": 5900000 - }, - { - "epoch": 3.54, - "eval_loss": 0.32011526823043823, - "eval_runtime": 1121.1763, - "eval_samples_per_second": 469.792, - "eval_steps_per_second": 78.299, - "step": 5900000 - }, - { - "epoch": 3.54, - "learning_rate": 3.126979337538864e-05, - "loss": 0.3309, - "step": 5900500 - }, - { - "epoch": 3.54, - "learning_rate": 3.1267693409828075e-05, - "loss": 0.3252, - "step": 5901000 - }, - { - "epoch": 3.54, - "learning_rate": 3.1265593444267516e-05, - "loss": 0.3157, - "step": 5901500 - }, - { - "epoch": 3.54, - "learning_rate": 3.126349347870695e-05, - "loss": 0.3332, - "step": 5902000 - }, - { - "epoch": 3.54, - "learning_rate": 3.126139351314638e-05, - "loss": 0.3272, - "step": 5902500 - }, - { - "epoch": 3.54, - "learning_rate": 3.125929354758582e-05, - "loss": 0.3124, - "step": 5903000 - }, - { - "epoch": 3.54, - "learning_rate": 3.1257193582025256e-05, - "loss": 0.3287, - "step": 5903500 - }, - { - "epoch": 3.54, - "learning_rate": 3.125509361646469e-05, - "loss": 0.3297, - "step": 5904000 - }, - { - "epoch": 3.54, - "learning_rate": 3.125299365090413e-05, - "loss": 0.335, - "step": 5904500 - }, - { - "epoch": 3.54, - "learning_rate": 3.1250893685343557e-05, - "loss": 0.3339, - "step": 5905000 - }, - { - "epoch": 3.54, - "learning_rate": 3.124879371978299e-05, - "loss": 0.3283, - "step": 5905500 - }, - { - "epoch": 3.54, - "learning_rate": 3.124669375422243e-05, - "loss": 0.3197, - "step": 5906000 - }, - { - "epoch": 3.54, - "learning_rate": 3.124459798859299e-05, - "loss": 0.3319, - "step": 5906500 - }, - { - "epoch": 3.54, - "learning_rate": 3.1242502222963544e-05, - "loss": 0.3292, - "step": 5907000 - }, - { - "epoch": 3.54, - "learning_rate": 3.124040225740298e-05, - "loss": 0.3375, - "step": 5907500 - }, - { - "epoch": 3.54, - "learning_rate": 3.123830229184242e-05, - "loss": 0.3321, - "step": 5908000 - }, - { - "epoch": 3.54, - "learning_rate": 3.123620232628185e-05, - "loss": 0.3206, - "step": 5908500 - }, - { - "epoch": 3.54, - "learning_rate": 3.1234106560652405e-05, - "loss": 0.3228, - "step": 5909000 - }, - { - "epoch": 3.54, - "learning_rate": 3.123200659509184e-05, - "loss": 0.3286, - "step": 5909500 - }, - { - "epoch": 3.54, - "learning_rate": 3.122990662953128e-05, - "loss": 0.3264, - "step": 5910000 - }, - { - "epoch": 3.54, - "learning_rate": 3.122780666397071e-05, - "loss": 0.3307, - "step": 5910500 - }, - { - "epoch": 3.54, - "learning_rate": 3.1225706698410145e-05, - "loss": 0.3273, - "step": 5911000 - }, - { - "epoch": 3.54, - "learning_rate": 3.1223606732849586e-05, - "loss": 0.3351, - "step": 5911500 - }, - { - "epoch": 3.54, - "learning_rate": 3.122150676728901e-05, - "loss": 0.3258, - "step": 5912000 - }, - { - "epoch": 3.54, - "learning_rate": 3.1219406801728446e-05, - "loss": 0.3246, - "step": 5912500 - }, - { - "epoch": 3.55, - "learning_rate": 3.1217311036099006e-05, - "loss": 0.3291, - "step": 5913000 - }, - { - "epoch": 3.55, - "learning_rate": 3.1215215270469567e-05, - "loss": 0.3186, - "step": 5913500 - }, - { - "epoch": 3.55, - "learning_rate": 3.1213115304909e-05, - "loss": 0.3282, - "step": 5914000 - }, - { - "epoch": 3.55, - "learning_rate": 3.1211015339348433e-05, - "loss": 0.3314, - "step": 5914500 - }, - { - "epoch": 3.55, - "learning_rate": 3.1208915373787874e-05, - "loss": 0.3267, - "step": 5915000 - }, - { - "epoch": 3.55, - "learning_rate": 3.120681540822731e-05, - "loss": 0.3209, - "step": 5915500 - }, - { - "epoch": 3.55, - "learning_rate": 3.120471544266674e-05, - "loss": 0.3296, - "step": 5916000 - }, - { - "epoch": 3.55, - "learning_rate": 3.1202619677037294e-05, - "loss": 0.3286, - "step": 5916500 - }, - { - "epoch": 3.55, - "learning_rate": 3.1200519711476734e-05, - "loss": 0.3372, - "step": 5917000 - }, - { - "epoch": 3.55, - "learning_rate": 3.119841974591617e-05, - "loss": 0.3262, - "step": 5917500 - }, - { - "epoch": 3.55, - "learning_rate": 3.11963197803556e-05, - "loss": 0.3236, - "step": 5918000 - }, - { - "epoch": 3.55, - "learning_rate": 3.119421981479504e-05, - "loss": 0.3317, - "step": 5918500 - }, - { - "epoch": 3.55, - "learning_rate": 3.1192124049165595e-05, - "loss": 0.3227, - "step": 5919000 - }, - { - "epoch": 3.55, - "learning_rate": 3.119002408360503e-05, - "loss": 0.3243, - "step": 5919500 - }, - { - "epoch": 3.55, - "learning_rate": 3.118792411804446e-05, - "loss": 0.332, - "step": 5920000 - }, - { - "epoch": 3.55, - "learning_rate": 3.11858241524839e-05, - "loss": 0.3364, - "step": 5920500 - }, - { - "epoch": 3.55, - "learning_rate": 3.1183724186923336e-05, - "loss": 0.3392, - "step": 5921000 - }, - { - "epoch": 3.55, - "learning_rate": 3.118162422136277e-05, - "loss": 0.3237, - "step": 5921500 - }, - { - "epoch": 3.55, - "learning_rate": 3.11795242558022e-05, - "loss": 0.3256, - "step": 5922000 - }, - { - "epoch": 3.55, - "learning_rate": 3.117742849017276e-05, - "loss": 0.3359, - "step": 5922500 - }, - { - "epoch": 3.55, - "learning_rate": 3.1175328524612196e-05, - "loss": 0.3214, - "step": 5923000 - }, - { - "epoch": 3.55, - "learning_rate": 3.117322855905164e-05, - "loss": 0.3269, - "step": 5923500 - }, - { - "epoch": 3.55, - "learning_rate": 3.1171128593491063e-05, - "loss": 0.3328, - "step": 5924000 - }, - { - "epoch": 3.55, - "learning_rate": 3.11690286279305e-05, - "loss": 0.3276, - "step": 5924500 - }, - { - "epoch": 3.55, - "learning_rate": 3.116692866236994e-05, - "loss": 0.3226, - "step": 5925000 - }, - { - "epoch": 3.55, - "learning_rate": 3.116482869680937e-05, - "loss": 0.3355, - "step": 5925500 - }, - { - "epoch": 3.55, - "learning_rate": 3.116273293117993e-05, - "loss": 0.3367, - "step": 5926000 - }, - { - "epoch": 3.55, - "learning_rate": 3.116063296561936e-05, - "loss": 0.3222, - "step": 5926500 - }, - { - "epoch": 3.55, - "learning_rate": 3.11585330000588e-05, - "loss": 0.3215, - "step": 5927000 - }, - { - "epoch": 3.55, - "learning_rate": 3.115643303449823e-05, - "loss": 0.3252, - "step": 5927500 - }, - { - "epoch": 3.55, - "learning_rate": 3.1154333068937665e-05, - "loss": 0.3373, - "step": 5928000 - }, - { - "epoch": 3.55, - "learning_rate": 3.1152233103377105e-05, - "loss": 0.3188, - "step": 5928500 - }, - { - "epoch": 3.55, - "learning_rate": 3.115013733774766e-05, - "loss": 0.336, - "step": 5929000 - }, - { - "epoch": 3.55, - "learning_rate": 3.114803737218709e-05, - "loss": 0.3276, - "step": 5929500 - }, - { - "epoch": 3.56, - "learning_rate": 3.114593740662653e-05, - "loss": 0.3429, - "step": 5930000 - }, - { - "epoch": 3.56, - "learning_rate": 3.1143837441065966e-05, - "loss": 0.3253, - "step": 5930500 - }, - { - "epoch": 3.56, - "learning_rate": 3.11417374755054e-05, - "loss": 0.326, - "step": 5931000 - }, - { - "epoch": 3.56, - "learning_rate": 3.113963750994484e-05, - "loss": 0.3267, - "step": 5931500 - }, - { - "epoch": 3.56, - "learning_rate": 3.113753754438427e-05, - "loss": 0.3378, - "step": 5932000 - }, - { - "epoch": 3.56, - "learning_rate": 3.1135437578823706e-05, - "loss": 0.3283, - "step": 5932500 - }, - { - "epoch": 3.56, - "learning_rate": 3.113334181319426e-05, - "loss": 0.3238, - "step": 5933000 - }, - { - "epoch": 3.56, - "learning_rate": 3.11312418476337e-05, - "loss": 0.3239, - "step": 5933500 - }, - { - "epoch": 3.56, - "learning_rate": 3.1129146082004254e-05, - "loss": 0.3284, - "step": 5934000 - }, - { - "epoch": 3.56, - "learning_rate": 3.112704611644369e-05, - "loss": 0.3288, - "step": 5934500 - }, - { - "epoch": 3.56, - "learning_rate": 3.112494615088312e-05, - "loss": 0.3318, - "step": 5935000 - }, - { - "epoch": 3.56, - "learning_rate": 3.112284618532256e-05, - "loss": 0.3232, - "step": 5935500 - }, - { - "epoch": 3.56, - "learning_rate": 3.1120746219761994e-05, - "loss": 0.3291, - "step": 5936000 - }, - { - "epoch": 3.56, - "learning_rate": 3.111864625420143e-05, - "loss": 0.3305, - "step": 5936500 - }, - { - "epoch": 3.56, - "learning_rate": 3.111654628864087e-05, - "loss": 0.3253, - "step": 5937000 - }, - { - "epoch": 3.56, - "learning_rate": 3.11144463230803e-05, - "loss": 0.3285, - "step": 5937500 - }, - { - "epoch": 3.56, - "learning_rate": 3.1112350557450855e-05, - "loss": 0.3273, - "step": 5938000 - }, - { - "epoch": 3.56, - "learning_rate": 3.1110250591890295e-05, - "loss": 0.3269, - "step": 5938500 - }, - { - "epoch": 3.56, - "learning_rate": 3.110815062632973e-05, - "loss": 0.3279, - "step": 5939000 - }, - { - "epoch": 3.56, - "learning_rate": 3.110605486070028e-05, - "loss": 0.3398, - "step": 5939500 - }, - { - "epoch": 3.56, - "learning_rate": 3.1103954895139716e-05, - "loss": 0.3345, - "step": 5940000 - }, - { - "epoch": 3.56, - "learning_rate": 3.1101854929579156e-05, - "loss": 0.3313, - "step": 5940500 - }, - { - "epoch": 3.56, - "learning_rate": 3.109975496401859e-05, - "loss": 0.3386, - "step": 5941000 - }, - { - "epoch": 3.56, - "learning_rate": 3.109765499845802e-05, - "loss": 0.3205, - "step": 5941500 - }, - { - "epoch": 3.56, - "learning_rate": 3.1095559232828577e-05, - "loss": 0.3209, - "step": 5942000 - }, - { - "epoch": 3.56, - "learning_rate": 3.109345926726802e-05, - "loss": 0.3287, - "step": 5942500 - }, - { - "epoch": 3.56, - "learning_rate": 3.109135930170745e-05, - "loss": 0.3215, - "step": 5943000 - }, - { - "epoch": 3.56, - "learning_rate": 3.1089259336146884e-05, - "loss": 0.331, - "step": 5943500 - }, - { - "epoch": 3.56, - "learning_rate": 3.1087159370586324e-05, - "loss": 0.3203, - "step": 5944000 - }, - { - "epoch": 3.56, - "learning_rate": 3.108506360495688e-05, - "loss": 0.3297, - "step": 5944500 - }, - { - "epoch": 3.56, - "learning_rate": 3.108296363939631e-05, - "loss": 0.3408, - "step": 5945000 - }, - { - "epoch": 3.56, - "learning_rate": 3.108086367383575e-05, - "loss": 0.3205, - "step": 5945500 - }, - { - "epoch": 3.56, - "learning_rate": 3.1078763708275185e-05, - "loss": 0.3259, - "step": 5946000 - }, - { - "epoch": 3.57, - "learning_rate": 3.107666794264574e-05, - "loss": 0.336, - "step": 5946500 - }, - { - "epoch": 3.57, - "learning_rate": 3.107456797708517e-05, - "loss": 0.319, - "step": 5947000 - }, - { - "epoch": 3.57, - "learning_rate": 3.107246801152461e-05, - "loss": 0.3275, - "step": 5947500 - }, - { - "epoch": 3.57, - "learning_rate": 3.1070368045964045e-05, - "loss": 0.3372, - "step": 5948000 - }, - { - "epoch": 3.57, - "learning_rate": 3.10682722803346e-05, - "loss": 0.3273, - "step": 5948500 - }, - { - "epoch": 3.57, - "learning_rate": 3.106617231477403e-05, - "loss": 0.3308, - "step": 5949000 - }, - { - "epoch": 3.57, - "learning_rate": 3.106407234921347e-05, - "loss": 0.3238, - "step": 5949500 - }, - { - "epoch": 3.57, - "learning_rate": 3.1061972383652906e-05, - "loss": 0.3276, - "step": 5950000 - }, - { - "epoch": 3.57, - "learning_rate": 3.105987241809234e-05, - "loss": 0.3256, - "step": 5950500 - }, - { - "epoch": 3.57, - "learning_rate": 3.10577766524629e-05, - "loss": 0.3289, - "step": 5951000 - }, - { - "epoch": 3.57, - "learning_rate": 3.105567668690233e-05, - "loss": 0.3266, - "step": 5951500 - }, - { - "epoch": 3.57, - "learning_rate": 3.105357672134177e-05, - "loss": 0.3308, - "step": 5952000 - }, - { - "epoch": 3.57, - "learning_rate": 3.105147675578121e-05, - "loss": 0.3309, - "step": 5952500 - }, - { - "epoch": 3.57, - "learning_rate": 3.104937679022064e-05, - "loss": 0.3275, - "step": 5953000 - }, - { - "epoch": 3.57, - "learning_rate": 3.1047276824660074e-05, - "loss": 0.3262, - "step": 5953500 - }, - { - "epoch": 3.57, - "learning_rate": 3.1045176859099514e-05, - "loss": 0.3352, - "step": 5954000 - }, - { - "epoch": 3.57, - "learning_rate": 3.104307689353895e-05, - "loss": 0.3283, - "step": 5954500 - }, - { - "epoch": 3.57, - "learning_rate": 3.10409811279095e-05, - "loss": 0.3281, - "step": 5955000 - }, - { - "epoch": 3.57, - "learning_rate": 3.1038885362280055e-05, - "loss": 0.3359, - "step": 5955500 - }, - { - "epoch": 3.57, - "learning_rate": 3.103678539671949e-05, - "loss": 0.3218, - "step": 5956000 - }, - { - "epoch": 3.57, - "learning_rate": 3.103468543115893e-05, - "loss": 0.3229, - "step": 5956500 - }, - { - "epoch": 3.57, - "learning_rate": 3.103258546559836e-05, - "loss": 0.3317, - "step": 5957000 - }, - { - "epoch": 3.57, - "learning_rate": 3.1030485500037795e-05, - "loss": 0.3255, - "step": 5957500 - }, - { - "epoch": 3.57, - "learning_rate": 3.1028385534477236e-05, - "loss": 0.3277, - "step": 5958000 - }, - { - "epoch": 3.57, - "learning_rate": 3.102628976884779e-05, - "loss": 0.3234, - "step": 5958500 - }, - { - "epoch": 3.57, - "learning_rate": 3.102418980328722e-05, - "loss": 0.337, - "step": 5959000 - }, - { - "epoch": 3.57, - "learning_rate": 3.102208983772666e-05, - "loss": 0.3237, - "step": 5959500 - }, - { - "epoch": 3.57, - "learning_rate": 3.1019989872166096e-05, - "loss": 0.3245, - "step": 5960000 - }, - { - "epoch": 3.57, - "learning_rate": 3.101788990660553e-05, - "loss": 0.3317, - "step": 5960500 - }, - { - "epoch": 3.57, - "learning_rate": 3.101578994104497e-05, - "loss": 0.3268, - "step": 5961000 - }, - { - "epoch": 3.57, - "learning_rate": 3.1013689975484404e-05, - "loss": 0.3311, - "step": 5961500 - }, - { - "epoch": 3.57, - "learning_rate": 3.101159420985496e-05, - "loss": 0.3277, - "step": 5962000 - }, - { - "epoch": 3.57, - "learning_rate": 3.100949424429439e-05, - "loss": 0.3193, - "step": 5962500 - }, - { - "epoch": 3.58, - "learning_rate": 3.100739427873383e-05, - "loss": 0.3251, - "step": 5963000 - }, - { - "epoch": 3.58, - "learning_rate": 3.1005294313173264e-05, - "loss": 0.3252, - "step": 5963500 - }, - { - "epoch": 3.58, - "learning_rate": 3.10031943476127e-05, - "loss": 0.3263, - "step": 5964000 - }, - { - "epoch": 3.58, - "learning_rate": 3.100109858198326e-05, - "loss": 0.3302, - "step": 5964500 - }, - { - "epoch": 3.58, - "learning_rate": 3.099899861642269e-05, - "loss": 0.3329, - "step": 5965000 - }, - { - "epoch": 3.58, - "learning_rate": 3.0996898650862125e-05, - "loss": 0.3178, - "step": 5965500 - }, - { - "epoch": 3.58, - "learning_rate": 3.0994798685301565e-05, - "loss": 0.3326, - "step": 5966000 - }, - { - "epoch": 3.58, - "learning_rate": 3.0992698719741e-05, - "loss": 0.3329, - "step": 5966500 - }, - { - "epoch": 3.58, - "learning_rate": 3.099059875418043e-05, - "loss": 0.3279, - "step": 5967000 - }, - { - "epoch": 3.58, - "learning_rate": 3.098849878861987e-05, - "loss": 0.3297, - "step": 5967500 - }, - { - "epoch": 3.58, - "learning_rate": 3.09863988230593e-05, - "loss": 0.3263, - "step": 5968000 - }, - { - "epoch": 3.58, - "learning_rate": 3.098430305742986e-05, - "loss": 0.3284, - "step": 5968500 - }, - { - "epoch": 3.58, - "learning_rate": 3.098220309186929e-05, - "loss": 0.3308, - "step": 5969000 - }, - { - "epoch": 3.58, - "learning_rate": 3.098010312630873e-05, - "loss": 0.3288, - "step": 5969500 - }, - { - "epoch": 3.58, - "learning_rate": 3.097800316074816e-05, - "loss": 0.3302, - "step": 5970000 - }, - { - "epoch": 3.58, - "learning_rate": 3.097590319518759e-05, - "loss": 0.3252, - "step": 5970500 - }, - { - "epoch": 3.58, - "learning_rate": 3.0973807429558154e-05, - "loss": 0.3222, - "step": 5971000 - }, - { - "epoch": 3.58, - "learning_rate": 3.0971707463997594e-05, - "loss": 0.3219, - "step": 5971500 - }, - { - "epoch": 3.58, - "learning_rate": 3.096960749843703e-05, - "loss": 0.3354, - "step": 5972000 - }, - { - "epoch": 3.58, - "learning_rate": 3.096750753287646e-05, - "loss": 0.325, - "step": 5972500 - }, - { - "epoch": 3.58, - "learning_rate": 3.0965407567315894e-05, - "loss": 0.3254, - "step": 5973000 - }, - { - "epoch": 3.58, - "learning_rate": 3.0963311801686455e-05, - "loss": 0.3354, - "step": 5973500 - }, - { - "epoch": 3.58, - "learning_rate": 3.096121183612589e-05, - "loss": 0.3218, - "step": 5974000 - }, - { - "epoch": 3.58, - "learning_rate": 3.095911187056533e-05, - "loss": 0.3271, - "step": 5974500 - }, - { - "epoch": 3.58, - "learning_rate": 3.0957011905004755e-05, - "loss": 0.3244, - "step": 5975000 - }, - { - "epoch": 3.58, - "learning_rate": 3.095491193944419e-05, - "loss": 0.3193, - "step": 5975500 - }, - { - "epoch": 3.58, - "learning_rate": 3.095281197388363e-05, - "loss": 0.3252, - "step": 5976000 - }, - { - "epoch": 3.58, - "learning_rate": 3.095071620825419e-05, - "loss": 0.3271, - "step": 5976500 - }, - { - "epoch": 3.58, - "learning_rate": 3.094861624269362e-05, - "loss": 0.3337, - "step": 5977000 - }, - { - "epoch": 3.58, - "learning_rate": 3.094651627713305e-05, - "loss": 0.3272, - "step": 5977500 - }, - { - "epoch": 3.58, - "learning_rate": 3.094441631157249e-05, - "loss": 0.3282, - "step": 5978000 - }, - { - "epoch": 3.58, - "learning_rate": 3.094231634601192e-05, - "loss": 0.3197, - "step": 5978500 - }, - { - "epoch": 3.58, - "learning_rate": 3.0940216380451356e-05, - "loss": 0.323, - "step": 5979000 - }, - { - "epoch": 3.58, - "learning_rate": 3.093812061482192e-05, - "loss": 0.3213, - "step": 5979500 - }, - { - "epoch": 3.59, - "learning_rate": 3.093602064926135e-05, - "loss": 0.3264, - "step": 5980000 - }, - { - "epoch": 3.59, - "learning_rate": 3.0933920683700784e-05, - "loss": 0.3303, - "step": 5980500 - }, - { - "epoch": 3.59, - "learning_rate": 3.0931820718140224e-05, - "loss": 0.3331, - "step": 5981000 - }, - { - "epoch": 3.59, - "learning_rate": 3.092972075257966e-05, - "loss": 0.3273, - "step": 5981500 - }, - { - "epoch": 3.59, - "learning_rate": 3.092762498695021e-05, - "loss": 0.3237, - "step": 5982000 - }, - { - "epoch": 3.59, - "learning_rate": 3.0925525021389644e-05, - "loss": 0.3249, - "step": 5982500 - }, - { - "epoch": 3.59, - "learning_rate": 3.0923425055829085e-05, - "loss": 0.3208, - "step": 5983000 - }, - { - "epoch": 3.59, - "learning_rate": 3.092132509026852e-05, - "loss": 0.3248, - "step": 5983500 - }, - { - "epoch": 3.59, - "learning_rate": 3.091922512470795e-05, - "loss": 0.3253, - "step": 5984000 - }, - { - "epoch": 3.59, - "learning_rate": 3.091712515914739e-05, - "loss": 0.3322, - "step": 5984500 - }, - { - "epoch": 3.59, - "learning_rate": 3.0915025193586825e-05, - "loss": 0.3253, - "step": 5985000 - }, - { - "epoch": 3.59, - "learning_rate": 3.091292522802626e-05, - "loss": 0.3295, - "step": 5985500 - }, - { - "epoch": 3.59, - "learning_rate": 3.091082946239681e-05, - "loss": 0.3283, - "step": 5986000 - }, - { - "epoch": 3.59, - "learning_rate": 3.090872949683625e-05, - "loss": 0.3259, - "step": 5986500 - }, - { - "epoch": 3.59, - "learning_rate": 3.0906629531275686e-05, - "loss": 0.3215, - "step": 5987000 - }, - { - "epoch": 3.59, - "learning_rate": 3.090452956571512e-05, - "loss": 0.3295, - "step": 5987500 - }, - { - "epoch": 3.59, - "learning_rate": 3.090242960015456e-05, - "loss": 0.3183, - "step": 5988000 - }, - { - "epoch": 3.59, - "learning_rate": 3.090033383452511e-05, - "loss": 0.3286, - "step": 5988500 - }, - { - "epoch": 3.59, - "learning_rate": 3.0898233868964547e-05, - "loss": 0.3252, - "step": 5989000 - }, - { - "epoch": 3.59, - "learning_rate": 3.089613390340399e-05, - "loss": 0.326, - "step": 5989500 - }, - { - "epoch": 3.59, - "learning_rate": 3.089403393784342e-05, - "loss": 0.3257, - "step": 5990000 - }, - { - "epoch": 3.59, - "learning_rate": 3.0891933972282854e-05, - "loss": 0.3299, - "step": 5990500 - }, - { - "epoch": 3.59, - "learning_rate": 3.0889834006722294e-05, - "loss": 0.3347, - "step": 5991000 - }, - { - "epoch": 3.59, - "learning_rate": 3.088773824109285e-05, - "loss": 0.336, - "step": 5991500 - }, - { - "epoch": 3.59, - "learning_rate": 3.08856424754634e-05, - "loss": 0.3386, - "step": 5992000 - }, - { - "epoch": 3.59, - "learning_rate": 3.0883542509902835e-05, - "loss": 0.3289, - "step": 5992500 - }, - { - "epoch": 3.59, - "learning_rate": 3.088144254434227e-05, - "loss": 0.3211, - "step": 5993000 - }, - { - "epoch": 3.59, - "learning_rate": 3.087934257878171e-05, - "loss": 0.3267, - "step": 5993500 - }, - { - "epoch": 3.59, - "learning_rate": 3.087724261322114e-05, - "loss": 0.3226, - "step": 5994000 - }, - { - "epoch": 3.59, - "learning_rate": 3.0875142647660575e-05, - "loss": 0.3197, - "step": 5994500 - }, - { - "epoch": 3.59, - "learning_rate": 3.0873042682100015e-05, - "loss": 0.3237, - "step": 5995000 - }, - { - "epoch": 3.59, - "learning_rate": 3.087094271653945e-05, - "loss": 0.3253, - "step": 5995500 - }, - { - "epoch": 3.59, - "learning_rate": 3.086884275097888e-05, - "loss": 0.334, - "step": 5996000 - }, - { - "epoch": 3.6, - "learning_rate": 3.086674698534944e-05, - "loss": 0.3253, - "step": 5996500 - }, - { - "epoch": 3.6, - "learning_rate": 3.0864647019788876e-05, - "loss": 0.3308, - "step": 5997000 - }, - { - "epoch": 3.6, - "learning_rate": 3.086254705422831e-05, - "loss": 0.3314, - "step": 5997500 - }, - { - "epoch": 3.6, - "learning_rate": 3.086044708866775e-05, - "loss": 0.321, - "step": 5998000 - }, - { - "epoch": 3.6, - "learning_rate": 3.0858351323038303e-05, - "loss": 0.3282, - "step": 5998500 - }, - { - "epoch": 3.6, - "learning_rate": 3.085625135747774e-05, - "loss": 0.3325, - "step": 5999000 - }, - { - "epoch": 3.6, - "learning_rate": 3.085415139191717e-05, - "loss": 0.3162, - "step": 5999500 - }, - { - "epoch": 3.6, - "learning_rate": 3.085205142635661e-05, - "loss": 0.3209, - "step": 6000000 - }, - { - "epoch": 3.6, - "eval_loss": 0.31902313232421875, - "eval_runtime": 1120.2992, - "eval_samples_per_second": 470.16, - "eval_steps_per_second": 78.36, - "step": 6000000 - }, - { - "epoch": 3.6, - "learning_rate": 3.0849951460796044e-05, - "loss": 0.3256, - "step": 6000500 - }, - { - "epoch": 3.6, - "learning_rate": 3.084785149523548e-05, - "loss": 0.3238, - "step": 6001000 - }, - { - "epoch": 3.6, - "learning_rate": 3.084575152967492e-05, - "loss": 0.3316, - "step": 6001500 - }, - { - "epoch": 3.6, - "learning_rate": 3.084365576404547e-05, - "loss": 0.3317, - "step": 6002000 - }, - { - "epoch": 3.6, - "learning_rate": 3.0841555798484905e-05, - "loss": 0.3341, - "step": 6002500 - }, - { - "epoch": 3.6, - "learning_rate": 3.083945583292434e-05, - "loss": 0.3278, - "step": 6003000 - }, - { - "epoch": 3.6, - "learning_rate": 3.083735586736378e-05, - "loss": 0.3286, - "step": 6003500 - }, - { - "epoch": 3.6, - "learning_rate": 3.083525590180321e-05, - "loss": 0.3363, - "step": 6004000 - }, - { - "epoch": 3.6, - "learning_rate": 3.0833155936242645e-05, - "loss": 0.3257, - "step": 6004500 - }, - { - "epoch": 3.6, - "learning_rate": 3.083105597068208e-05, - "loss": 0.3214, - "step": 6005000 - }, - { - "epoch": 3.6, - "learning_rate": 3.082895600512151e-05, - "loss": 0.3261, - "step": 6005500 - }, - { - "epoch": 3.6, - "learning_rate": 3.082686023949207e-05, - "loss": 0.3263, - "step": 6006000 - }, - { - "epoch": 3.6, - "learning_rate": 3.0824764473862626e-05, - "loss": 0.3263, - "step": 6006500 - }, - { - "epoch": 3.6, - "learning_rate": 3.0822664508302066e-05, - "loss": 0.325, - "step": 6007000 - }, - { - "epoch": 3.6, - "learning_rate": 3.08205645427415e-05, - "loss": 0.319, - "step": 6007500 - }, - { - "epoch": 3.6, - "learning_rate": 3.081846457718093e-05, - "loss": 0.3237, - "step": 6008000 - }, - { - "epoch": 3.6, - "learning_rate": 3.0816364611620374e-05, - "loss": 0.3258, - "step": 6008500 - }, - { - "epoch": 3.6, - "learning_rate": 3.08142646460598e-05, - "loss": 0.3238, - "step": 6009000 - }, - { - "epoch": 3.6, - "learning_rate": 3.0812164680499234e-05, - "loss": 0.322, - "step": 6009500 - }, - { - "epoch": 3.6, - "learning_rate": 3.0810068914869794e-05, - "loss": 0.3256, - "step": 6010000 - }, - { - "epoch": 3.6, - "learning_rate": 3.0807968949309234e-05, - "loss": 0.3341, - "step": 6010500 - }, - { - "epoch": 3.6, - "learning_rate": 3.080586898374867e-05, - "loss": 0.3253, - "step": 6011000 - }, - { - "epoch": 3.6, - "learning_rate": 3.08037690181881e-05, - "loss": 0.3199, - "step": 6011500 - }, - { - "epoch": 3.6, - "learning_rate": 3.0801669052627535e-05, - "loss": 0.3271, - "step": 6012000 - }, - { - "epoch": 3.6, - "learning_rate": 3.079956908706697e-05, - "loss": 0.327, - "step": 6012500 - }, - { - "epoch": 3.61, - "learning_rate": 3.079746912150641e-05, - "loss": 0.3307, - "step": 6013000 - }, - { - "epoch": 3.61, - "learning_rate": 3.079536915594584e-05, - "loss": 0.3304, - "step": 6013500 - }, - { - "epoch": 3.61, - "learning_rate": 3.0793273390316395e-05, - "loss": 0.3238, - "step": 6014000 - }, - { - "epoch": 3.61, - "learning_rate": 3.0791177624686956e-05, - "loss": 0.322, - "step": 6014500 - }, - { - "epoch": 3.61, - "learning_rate": 3.078907765912639e-05, - "loss": 0.3308, - "step": 6015000 - }, - { - "epoch": 3.61, - "learning_rate": 3.078697769356583e-05, - "loss": 0.3302, - "step": 6015500 - }, - { - "epoch": 3.61, - "learning_rate": 3.0784877728005256e-05, - "loss": 0.3236, - "step": 6016000 - }, - { - "epoch": 3.61, - "learning_rate": 3.078277776244469e-05, - "loss": 0.3156, - "step": 6016500 - }, - { - "epoch": 3.61, - "learning_rate": 3.078068199681526e-05, - "loss": 0.3277, - "step": 6017000 - }, - { - "epoch": 3.61, - "learning_rate": 3.077858203125469e-05, - "loss": 0.3307, - "step": 6017500 - }, - { - "epoch": 3.61, - "learning_rate": 3.0776482065694124e-05, - "loss": 0.338, - "step": 6018000 - }, - { - "epoch": 3.61, - "learning_rate": 3.077438210013356e-05, - "loss": 0.3242, - "step": 6018500 - }, - { - "epoch": 3.61, - "learning_rate": 3.077228213457299e-05, - "loss": 0.3187, - "step": 6019000 - }, - { - "epoch": 3.61, - "learning_rate": 3.0770182169012424e-05, - "loss": 0.3256, - "step": 6019500 - }, - { - "epoch": 3.61, - "learning_rate": 3.0768082203451864e-05, - "loss": 0.3314, - "step": 6020000 - }, - { - "epoch": 3.61, - "learning_rate": 3.07659822378913e-05, - "loss": 0.3226, - "step": 6020500 - }, - { - "epoch": 3.61, - "learning_rate": 3.076388647226185e-05, - "loss": 0.3236, - "step": 6021000 - }, - { - "epoch": 3.61, - "learning_rate": 3.0761786506701285e-05, - "loss": 0.3255, - "step": 6021500 - }, - { - "epoch": 3.61, - "learning_rate": 3.0759690741071845e-05, - "loss": 0.3288, - "step": 6022000 - }, - { - "epoch": 3.61, - "learning_rate": 3.0757590775511285e-05, - "loss": 0.3225, - "step": 6022500 - }, - { - "epoch": 3.61, - "learning_rate": 3.075549080995072e-05, - "loss": 0.3329, - "step": 6023000 - }, - { - "epoch": 3.61, - "learning_rate": 3.075339084439015e-05, - "loss": 0.3279, - "step": 6023500 - }, - { - "epoch": 3.61, - "learning_rate": 3.0751290878829586e-05, - "loss": 0.3349, - "step": 6024000 - }, - { - "epoch": 3.61, - "learning_rate": 3.0749195113200146e-05, - "loss": 0.3246, - "step": 6024500 - }, - { - "epoch": 3.61, - "learning_rate": 3.074709514763958e-05, - "loss": 0.3296, - "step": 6025000 - }, - { - "epoch": 3.61, - "learning_rate": 3.074499518207901e-05, - "loss": 0.3201, - "step": 6025500 - }, - { - "epoch": 3.61, - "learning_rate": 3.0742895216518446e-05, - "loss": 0.3216, - "step": 6026000 - }, - { - "epoch": 3.61, - "learning_rate": 3.074079945088901e-05, - "loss": 0.3245, - "step": 6026500 - }, - { - "epoch": 3.61, - "learning_rate": 3.073869948532844e-05, - "loss": 0.3324, - "step": 6027000 - }, - { - "epoch": 3.61, - "learning_rate": 3.073659951976788e-05, - "loss": 0.3293, - "step": 6027500 - }, - { - "epoch": 3.61, - "learning_rate": 3.073449955420731e-05, - "loss": 0.3277, - "step": 6028000 - }, - { - "epoch": 3.61, - "learning_rate": 3.073240378857787e-05, - "loss": 0.3408, - "step": 6028500 - }, - { - "epoch": 3.61, - "learning_rate": 3.07303038230173e-05, - "loss": 0.3248, - "step": 6029000 - }, - { - "epoch": 3.61, - "learning_rate": 3.072820385745674e-05, - "loss": 0.3251, - "step": 6029500 - }, - { - "epoch": 3.62, - "learning_rate": 3.0726103891896175e-05, - "loss": 0.3239, - "step": 6030000 - }, - { - "epoch": 3.62, - "learning_rate": 3.072400392633561e-05, - "loss": 0.3302, - "step": 6030500 - }, - { - "epoch": 3.62, - "learning_rate": 3.072190816070617e-05, - "loss": 0.3338, - "step": 6031000 - }, - { - "epoch": 3.62, - "learning_rate": 3.07198081951456e-05, - "loss": 0.3283, - "step": 6031500 - }, - { - "epoch": 3.62, - "learning_rate": 3.0717708229585035e-05, - "loss": 0.3256, - "step": 6032000 - }, - { - "epoch": 3.62, - "learning_rate": 3.0715608264024476e-05, - "loss": 0.3322, - "step": 6032500 - }, - { - "epoch": 3.62, - "learning_rate": 3.07135082984639e-05, - "loss": 0.3256, - "step": 6033000 - }, - { - "epoch": 3.62, - "learning_rate": 3.071141253283446e-05, - "loss": 0.3336, - "step": 6033500 - }, - { - "epoch": 3.62, - "learning_rate": 3.0709312567273896e-05, - "loss": 0.3311, - "step": 6034000 - }, - { - "epoch": 3.62, - "learning_rate": 3.0707212601713336e-05, - "loss": 0.3238, - "step": 6034500 - }, - { - "epoch": 3.62, - "learning_rate": 3.070511263615277e-05, - "loss": 0.3168, - "step": 6035000 - }, - { - "epoch": 3.62, - "learning_rate": 3.0703012670592197e-05, - "loss": 0.3276, - "step": 6035500 - }, - { - "epoch": 3.62, - "learning_rate": 3.070091270503164e-05, - "loss": 0.3351, - "step": 6036000 - }, - { - "epoch": 3.62, - "learning_rate": 3.069881273947107e-05, - "loss": 0.3267, - "step": 6036500 - }, - { - "epoch": 3.62, - "learning_rate": 3.0696712773910504e-05, - "loss": 0.3247, - "step": 6037000 - }, - { - "epoch": 3.62, - "learning_rate": 3.0694617008281064e-05, - "loss": 0.3261, - "step": 6037500 - }, - { - "epoch": 3.62, - "learning_rate": 3.06925170427205e-05, - "loss": 0.321, - "step": 6038000 - }, - { - "epoch": 3.62, - "learning_rate": 3.069041707715993e-05, - "loss": 0.3295, - "step": 6038500 - }, - { - "epoch": 3.62, - "learning_rate": 3.068831711159937e-05, - "loss": 0.335, - "step": 6039000 - }, - { - "epoch": 3.62, - "learning_rate": 3.068622134596993e-05, - "loss": 0.3329, - "step": 6039500 - }, - { - "epoch": 3.62, - "learning_rate": 3.068412138040936e-05, - "loss": 0.3196, - "step": 6040000 - }, - { - "epoch": 3.62, - "learning_rate": 3.068202141484879e-05, - "loss": 0.3253, - "step": 6040500 - }, - { - "epoch": 3.62, - "learning_rate": 3.067992144928823e-05, - "loss": 0.3241, - "step": 6041000 - }, - { - "epoch": 3.62, - "learning_rate": 3.0677821483727665e-05, - "loss": 0.3221, - "step": 6041500 - }, - { - "epoch": 3.62, - "learning_rate": 3.06757215181671e-05, - "loss": 0.3232, - "step": 6042000 - }, - { - "epoch": 3.62, - "learning_rate": 3.067362155260654e-05, - "loss": 0.3283, - "step": 6042500 - }, - { - "epoch": 3.62, - "learning_rate": 3.067152158704597e-05, - "loss": 0.3275, - "step": 6043000 - }, - { - "epoch": 3.62, - "learning_rate": 3.0669425821416526e-05, - "loss": 0.3304, - "step": 6043500 - }, - { - "epoch": 3.62, - "learning_rate": 3.0667330055787086e-05, - "loss": 0.337, - "step": 6044000 - }, - { - "epoch": 3.62, - "learning_rate": 3.066523009022652e-05, - "loss": 0.3281, - "step": 6044500 - }, - { - "epoch": 3.62, - "learning_rate": 3.066313012466595e-05, - "loss": 0.3281, - "step": 6045000 - }, - { - "epoch": 3.62, - "learning_rate": 3.066103015910539e-05, - "loss": 0.3272, - "step": 6045500 - }, - { - "epoch": 3.62, - "learning_rate": 3.065893019354483e-05, - "loss": 0.3198, - "step": 6046000 - }, - { - "epoch": 3.63, - "learning_rate": 3.065683442791539e-05, - "loss": 0.327, - "step": 6046500 - }, - { - "epoch": 3.63, - "learning_rate": 3.0654734462354814e-05, - "loss": 0.3287, - "step": 6047000 - }, - { - "epoch": 3.63, - "learning_rate": 3.065263449679425e-05, - "loss": 0.326, - "step": 6047500 - }, - { - "epoch": 3.63, - "learning_rate": 3.065053453123369e-05, - "loss": 0.3311, - "step": 6048000 - }, - { - "epoch": 3.63, - "learning_rate": 3.064843456567312e-05, - "loss": 0.317, - "step": 6048500 - }, - { - "epoch": 3.63, - "learning_rate": 3.0646334600112555e-05, - "loss": 0.3347, - "step": 6049000 - }, - { - "epoch": 3.63, - "learning_rate": 3.0644234634551995e-05, - "loss": 0.3209, - "step": 6049500 - }, - { - "epoch": 3.63, - "learning_rate": 3.064213466899143e-05, - "loss": 0.3303, - "step": 6050000 - }, - { - "epoch": 3.63, - "learning_rate": 3.064003890336198e-05, - "loss": 0.3318, - "step": 6050500 - }, - { - "epoch": 3.63, - "learning_rate": 3.0637938937801415e-05, - "loss": 0.3261, - "step": 6051000 - }, - { - "epoch": 3.63, - "learning_rate": 3.0635838972240856e-05, - "loss": 0.3285, - "step": 6051500 - }, - { - "epoch": 3.63, - "learning_rate": 3.063374320661141e-05, - "loss": 0.3253, - "step": 6052000 - }, - { - "epoch": 3.63, - "learning_rate": 3.063164324105084e-05, - "loss": 0.3226, - "step": 6052500 - }, - { - "epoch": 3.63, - "learning_rate": 3.062954327549028e-05, - "loss": 0.3259, - "step": 6053000 - }, - { - "epoch": 3.63, - "learning_rate": 3.0627443309929716e-05, - "loss": 0.3272, - "step": 6053500 - }, - { - "epoch": 3.63, - "learning_rate": 3.062534334436915e-05, - "loss": 0.3249, - "step": 6054000 - }, - { - "epoch": 3.63, - "learning_rate": 3.062324337880859e-05, - "loss": 0.331, - "step": 6054500 - }, - { - "epoch": 3.63, - "learning_rate": 3.0621147613179144e-05, - "loss": 0.3283, - "step": 6055000 - }, - { - "epoch": 3.63, - "learning_rate": 3.061904764761858e-05, - "loss": 0.3282, - "step": 6055500 - }, - { - "epoch": 3.63, - "learning_rate": 3.061694768205801e-05, - "loss": 0.3319, - "step": 6056000 - }, - { - "epoch": 3.63, - "learning_rate": 3.061484771649745e-05, - "loss": 0.3291, - "step": 6056500 - }, - { - "epoch": 3.63, - "learning_rate": 3.0612747750936884e-05, - "loss": 0.3302, - "step": 6057000 - }, - { - "epoch": 3.63, - "learning_rate": 3.061065198530744e-05, - "loss": 0.3271, - "step": 6057500 - }, - { - "epoch": 3.63, - "learning_rate": 3.060855201974687e-05, - "loss": 0.3311, - "step": 6058000 - }, - { - "epoch": 3.63, - "learning_rate": 3.060645205418631e-05, - "loss": 0.3288, - "step": 6058500 - }, - { - "epoch": 3.63, - "learning_rate": 3.0604352088625745e-05, - "loss": 0.3252, - "step": 6059000 - }, - { - "epoch": 3.63, - "learning_rate": 3.060225212306518e-05, - "loss": 0.3297, - "step": 6059500 - }, - { - "epoch": 3.63, - "learning_rate": 3.060015635743574e-05, - "loss": 0.3287, - "step": 6060000 - }, - { - "epoch": 3.63, - "learning_rate": 3.059805639187517e-05, - "loss": 0.3327, - "step": 6060500 - }, - { - "epoch": 3.63, - "learning_rate": 3.0595956426314606e-05, - "loss": 0.3203, - "step": 6061000 - }, - { - "epoch": 3.63, - "learning_rate": 3.0593856460754046e-05, - "loss": 0.3293, - "step": 6061500 - }, - { - "epoch": 3.63, - "learning_rate": 3.05917606951246e-05, - "loss": 0.3218, - "step": 6062000 - }, - { - "epoch": 3.63, - "learning_rate": 3.058966072956403e-05, - "loss": 0.3206, - "step": 6062500 - }, - { - "epoch": 3.64, - "learning_rate": 3.0587560764003466e-05, - "loss": 0.3227, - "step": 6063000 - }, - { - "epoch": 3.64, - "learning_rate": 3.058546079844291e-05, - "loss": 0.3391, - "step": 6063500 - }, - { - "epoch": 3.64, - "learning_rate": 3.058336083288234e-05, - "loss": 0.3259, - "step": 6064000 - }, - { - "epoch": 3.64, - "learning_rate": 3.0581260867321774e-05, - "loss": 0.3248, - "step": 6064500 - }, - { - "epoch": 3.64, - "learning_rate": 3.0579160901761214e-05, - "loss": 0.3337, - "step": 6065000 - }, - { - "epoch": 3.64, - "learning_rate": 3.057706093620065e-05, - "loss": 0.3169, - "step": 6065500 - }, - { - "epoch": 3.64, - "learning_rate": 3.05749651705712e-05, - "loss": 0.3265, - "step": 6066000 - }, - { - "epoch": 3.64, - "learning_rate": 3.0572865205010634e-05, - "loss": 0.3234, - "step": 6066500 - }, - { - "epoch": 3.64, - "learning_rate": 3.0570765239450075e-05, - "loss": 0.3358, - "step": 6067000 - }, - { - "epoch": 3.64, - "learning_rate": 3.056866527388951e-05, - "loss": 0.3337, - "step": 6067500 - }, - { - "epoch": 3.64, - "learning_rate": 3.056656950826006e-05, - "loss": 0.3264, - "step": 6068000 - }, - { - "epoch": 3.64, - "learning_rate": 3.05644695426995e-05, - "loss": 0.3293, - "step": 6068500 - }, - { - "epoch": 3.64, - "learning_rate": 3.0562373777070055e-05, - "loss": 0.3295, - "step": 6069000 - }, - { - "epoch": 3.64, - "learning_rate": 3.056027381150949e-05, - "loss": 0.3271, - "step": 6069500 - }, - { - "epoch": 3.64, - "learning_rate": 3.055817384594892e-05, - "loss": 0.3204, - "step": 6070000 - }, - { - "epoch": 3.64, - "learning_rate": 3.055607388038836e-05, - "loss": 0.3318, - "step": 6070500 - }, - { - "epoch": 3.64, - "learning_rate": 3.0553973914827796e-05, - "loss": 0.3199, - "step": 6071000 - }, - { - "epoch": 3.64, - "learning_rate": 3.055187814919835e-05, - "loss": 0.3179, - "step": 6071500 - }, - { - "epoch": 3.64, - "learning_rate": 3.054977818363778e-05, - "loss": 0.3289, - "step": 6072000 - }, - { - "epoch": 3.64, - "learning_rate": 3.054767821807722e-05, - "loss": 0.322, - "step": 6072500 - }, - { - "epoch": 3.64, - "learning_rate": 3.054557825251666e-05, - "loss": 0.3224, - "step": 6073000 - }, - { - "epoch": 3.64, - "learning_rate": 3.054347828695609e-05, - "loss": 0.3215, - "step": 6073500 - }, - { - "epoch": 3.64, - "learning_rate": 3.054137832139553e-05, - "loss": 0.3268, - "step": 6074000 - }, - { - "epoch": 3.64, - "learning_rate": 3.0539282555766084e-05, - "loss": 0.3188, - "step": 6074500 - }, - { - "epoch": 3.64, - "learning_rate": 3.0537186790136644e-05, - "loss": 0.3295, - "step": 6075000 - }, - { - "epoch": 3.64, - "learning_rate": 3.053508682457608e-05, - "loss": 0.326, - "step": 6075500 - }, - { - "epoch": 3.64, - "learning_rate": 3.053298685901551e-05, - "loss": 0.3143, - "step": 6076000 - }, - { - "epoch": 3.64, - "learning_rate": 3.0530886893454945e-05, - "loss": 0.3254, - "step": 6076500 - }, - { - "epoch": 3.64, - "learning_rate": 3.052878692789438e-05, - "loss": 0.3287, - "step": 6077000 - }, - { - "epoch": 3.64, - "learning_rate": 3.052668696233382e-05, - "loss": 0.3288, - "step": 6077500 - }, - { - "epoch": 3.64, - "learning_rate": 3.052458699677325e-05, - "loss": 0.3254, - "step": 6078000 - }, - { - "epoch": 3.64, - "learning_rate": 3.0522487031212685e-05, - "loss": 0.3322, - "step": 6078500 - }, - { - "epoch": 3.64, - "learning_rate": 3.0520387065652126e-05, - "loss": 0.332, - "step": 6079000 - }, - { - "epoch": 3.64, - "learning_rate": 3.051829130002268e-05, - "loss": 0.3222, - "step": 6079500 - }, - { - "epoch": 3.65, - "learning_rate": 3.0516191334462113e-05, - "loss": 0.3175, - "step": 6080000 - }, - { - "epoch": 3.65, - "learning_rate": 3.051409136890155e-05, - "loss": 0.3299, - "step": 6080500 - }, - { - "epoch": 3.65, - "learning_rate": 3.0511991403340986e-05, - "loss": 0.3301, - "step": 6081000 - }, - { - "epoch": 3.65, - "learning_rate": 3.050989563771154e-05, - "loss": 0.3286, - "step": 6081500 - }, - { - "epoch": 3.65, - "learning_rate": 3.0507795672150977e-05, - "loss": 0.3325, - "step": 6082000 - }, - { - "epoch": 3.65, - "learning_rate": 3.050569570659041e-05, - "loss": 0.3308, - "step": 6082500 - }, - { - "epoch": 3.65, - "learning_rate": 3.0503595741029847e-05, - "loss": 0.3221, - "step": 6083000 - }, - { - "epoch": 3.65, - "learning_rate": 3.0501495775469284e-05, - "loss": 0.33, - "step": 6083500 - }, - { - "epoch": 3.65, - "learning_rate": 3.0499395809908717e-05, - "loss": 0.3231, - "step": 6084000 - }, - { - "epoch": 3.65, - "learning_rate": 3.049730004427927e-05, - "loss": 0.3335, - "step": 6084500 - }, - { - "epoch": 3.65, - "learning_rate": 3.0495200078718708e-05, - "loss": 0.3271, - "step": 6085000 - }, - { - "epoch": 3.65, - "learning_rate": 3.0493100113158145e-05, - "loss": 0.328, - "step": 6085500 - }, - { - "epoch": 3.65, - "learning_rate": 3.0491000147597578e-05, - "loss": 0.3248, - "step": 6086000 - }, - { - "epoch": 3.65, - "learning_rate": 3.0488900182037015e-05, - "loss": 0.3287, - "step": 6086500 - }, - { - "epoch": 3.65, - "learning_rate": 3.048680441640757e-05, - "loss": 0.3291, - "step": 6087000 - }, - { - "epoch": 3.65, - "learning_rate": 3.0484704450847005e-05, - "loss": 0.3347, - "step": 6087500 - }, - { - "epoch": 3.65, - "learning_rate": 3.0482604485286442e-05, - "loss": 0.3226, - "step": 6088000 - }, - { - "epoch": 3.65, - "learning_rate": 3.0480504519725876e-05, - "loss": 0.3258, - "step": 6088500 - }, - { - "epoch": 3.65, - "learning_rate": 3.0478404554165313e-05, - "loss": 0.3294, - "step": 6089000 - }, - { - "epoch": 3.65, - "learning_rate": 3.047630458860475e-05, - "loss": 0.3289, - "step": 6089500 - }, - { - "epoch": 3.65, - "learning_rate": 3.0474204623044183e-05, - "loss": 0.3281, - "step": 6090000 - }, - { - "epoch": 3.65, - "learning_rate": 3.047210885741474e-05, - "loss": 0.3207, - "step": 6090500 - }, - { - "epoch": 3.65, - "learning_rate": 3.0470008891854173e-05, - "loss": 0.324, - "step": 6091000 - }, - { - "epoch": 3.65, - "learning_rate": 3.046790892629361e-05, - "loss": 0.3241, - "step": 6091500 - }, - { - "epoch": 3.65, - "learning_rate": 3.0465808960733047e-05, - "loss": 0.3261, - "step": 6092000 - }, - { - "epoch": 3.65, - "learning_rate": 3.046370899517248e-05, - "loss": 0.3308, - "step": 6092500 - }, - { - "epoch": 3.65, - "learning_rate": 3.046160902961191e-05, - "loss": 0.3298, - "step": 6093000 - }, - { - "epoch": 3.65, - "learning_rate": 3.0459509064051347e-05, - "loss": 0.3313, - "step": 6093500 - }, - { - "epoch": 3.65, - "learning_rate": 3.045740909849078e-05, - "loss": 0.319, - "step": 6094000 - }, - { - "epoch": 3.65, - "learning_rate": 3.045531333286134e-05, - "loss": 0.3339, - "step": 6094500 - }, - { - "epoch": 3.65, - "learning_rate": 3.0453213367300778e-05, - "loss": 0.33, - "step": 6095000 - }, - { - "epoch": 3.65, - "learning_rate": 3.0451113401740208e-05, - "loss": 0.3239, - "step": 6095500 - }, - { - "epoch": 3.65, - "learning_rate": 3.044901763611077e-05, - "loss": 0.3253, - "step": 6096000 - }, - { - "epoch": 3.66, - "learning_rate": 3.0446917670550205e-05, - "loss": 0.3351, - "step": 6096500 - }, - { - "epoch": 3.66, - "learning_rate": 3.044481770498964e-05, - "loss": 0.3388, - "step": 6097000 - }, - { - "epoch": 3.66, - "learning_rate": 3.0442717739429076e-05, - "loss": 0.3287, - "step": 6097500 - }, - { - "epoch": 3.66, - "learning_rate": 3.0440617773868506e-05, - "loss": 0.3287, - "step": 6098000 - }, - { - "epoch": 3.66, - "learning_rate": 3.0438517808307942e-05, - "loss": 0.3192, - "step": 6098500 - }, - { - "epoch": 3.66, - "learning_rate": 3.0436422042678503e-05, - "loss": 0.3276, - "step": 6099000 - }, - { - "epoch": 3.66, - "learning_rate": 3.0434322077117936e-05, - "loss": 0.3281, - "step": 6099500 - }, - { - "epoch": 3.66, - "learning_rate": 3.0432222111557373e-05, - "loss": 0.3189, - "step": 6100000 - }, - { - "epoch": 3.66, - "eval_loss": 0.31894803047180176, - "eval_runtime": 1119.5797, - "eval_samples_per_second": 470.462, - "eval_steps_per_second": 78.411, - "step": 6100000 - }, - { - "epoch": 3.66, - "learning_rate": 3.0430122145996803e-05, - "loss": 0.3295, - "step": 6100500 - }, - { - "epoch": 3.66, - "learning_rate": 3.0428022180436237e-05, - "loss": 0.3204, - "step": 6101000 - }, - { - "epoch": 3.66, - "learning_rate": 3.0425926414806797e-05, - "loss": 0.323, - "step": 6101500 - }, - { - "epoch": 3.66, - "learning_rate": 3.0423826449246234e-05, - "loss": 0.3237, - "step": 6102000 - }, - { - "epoch": 3.66, - "learning_rate": 3.0421726483685664e-05, - "loss": 0.3268, - "step": 6102500 - }, - { - "epoch": 3.66, - "learning_rate": 3.04196265181251e-05, - "loss": 0.3271, - "step": 6103000 - }, - { - "epoch": 3.66, - "learning_rate": 3.0417526552564534e-05, - "loss": 0.3263, - "step": 6103500 - }, - { - "epoch": 3.66, - "learning_rate": 3.041542658700397e-05, - "loss": 0.3345, - "step": 6104000 - }, - { - "epoch": 3.66, - "learning_rate": 3.0413326621443408e-05, - "loss": 0.3214, - "step": 6104500 - }, - { - "epoch": 3.66, - "learning_rate": 3.041123085581396e-05, - "loss": 0.3262, - "step": 6105000 - }, - { - "epoch": 3.66, - "learning_rate": 3.04091308902534e-05, - "loss": 0.3189, - "step": 6105500 - }, - { - "epoch": 3.66, - "learning_rate": 3.0407030924692832e-05, - "loss": 0.3267, - "step": 6106000 - }, - { - "epoch": 3.66, - "learning_rate": 3.040493095913227e-05, - "loss": 0.326, - "step": 6106500 - }, - { - "epoch": 3.66, - "learning_rate": 3.0402830993571706e-05, - "loss": 0.3286, - "step": 6107000 - }, - { - "epoch": 3.66, - "learning_rate": 3.040073522794226e-05, - "loss": 0.3325, - "step": 6107500 - }, - { - "epoch": 3.66, - "learning_rate": 3.0398635262381693e-05, - "loss": 0.3269, - "step": 6108000 - }, - { - "epoch": 3.66, - "learning_rate": 3.039653529682113e-05, - "loss": 0.3308, - "step": 6108500 - }, - { - "epoch": 3.66, - "learning_rate": 3.0394435331260566e-05, - "loss": 0.3257, - "step": 6109000 - }, - { - "epoch": 3.66, - "learning_rate": 3.0392335365700003e-05, - "loss": 0.3247, - "step": 6109500 - }, - { - "epoch": 3.66, - "learning_rate": 3.0390239600070557e-05, - "loss": 0.3277, - "step": 6110000 - }, - { - "epoch": 3.66, - "learning_rate": 3.038813963450999e-05, - "loss": 0.3278, - "step": 6110500 - }, - { - "epoch": 3.66, - "learning_rate": 3.0386039668949427e-05, - "loss": 0.32, - "step": 6111000 - }, - { - "epoch": 3.66, - "learning_rate": 3.0383939703388864e-05, - "loss": 0.3309, - "step": 6111500 - }, - { - "epoch": 3.66, - "learning_rate": 3.0381839737828297e-05, - "loss": 0.323, - "step": 6112000 - }, - { - "epoch": 3.66, - "learning_rate": 3.0379739772267734e-05, - "loss": 0.3352, - "step": 6112500 - }, - { - "epoch": 3.66, - "learning_rate": 3.037763980670717e-05, - "loss": 0.3263, - "step": 6113000 - }, - { - "epoch": 3.67, - "learning_rate": 3.0375539841146604e-05, - "loss": 0.3295, - "step": 6113500 - }, - { - "epoch": 3.67, - "learning_rate": 3.037344407551716e-05, - "loss": 0.324, - "step": 6114000 - }, - { - "epoch": 3.67, - "learning_rate": 3.0371344109956595e-05, - "loss": 0.3373, - "step": 6114500 - }, - { - "epoch": 3.67, - "learning_rate": 3.036924414439603e-05, - "loss": 0.3244, - "step": 6115000 - }, - { - "epoch": 3.67, - "learning_rate": 3.036714417883547e-05, - "loss": 0.3263, - "step": 6115500 - }, - { - "epoch": 3.67, - "learning_rate": 3.0365048413206022e-05, - "loss": 0.3393, - "step": 6116000 - }, - { - "epoch": 3.67, - "learning_rate": 3.036294844764546e-05, - "loss": 0.3211, - "step": 6116500 - }, - { - "epoch": 3.67, - "learning_rate": 3.0360848482084892e-05, - "loss": 0.3277, - "step": 6117000 - }, - { - "epoch": 3.67, - "learning_rate": 3.035874851652433e-05, - "loss": 0.3288, - "step": 6117500 - }, - { - "epoch": 3.67, - "learning_rate": 3.0356648550963766e-05, - "loss": 0.3302, - "step": 6118000 - }, - { - "epoch": 3.67, - "learning_rate": 3.03545485854032e-05, - "loss": 0.3178, - "step": 6118500 - }, - { - "epoch": 3.67, - "learning_rate": 3.0352452819773753e-05, - "loss": 0.3312, - "step": 6119000 - }, - { - "epoch": 3.67, - "learning_rate": 3.035035285421319e-05, - "loss": 0.3305, - "step": 6119500 - }, - { - "epoch": 3.67, - "learning_rate": 3.0348252888652627e-05, - "loss": 0.3331, - "step": 6120000 - }, - { - "epoch": 3.67, - "learning_rate": 3.034615292309206e-05, - "loss": 0.3191, - "step": 6120500 - }, - { - "epoch": 3.67, - "learning_rate": 3.0344052957531497e-05, - "loss": 0.3279, - "step": 6121000 - }, - { - "epoch": 3.67, - "learning_rate": 3.0341952991970934e-05, - "loss": 0.3316, - "step": 6121500 - }, - { - "epoch": 3.67, - "learning_rate": 3.0339857226341488e-05, - "loss": 0.3325, - "step": 6122000 - }, - { - "epoch": 3.67, - "learning_rate": 3.0337757260780924e-05, - "loss": 0.3279, - "step": 6122500 - }, - { - "epoch": 3.67, - "learning_rate": 3.0335657295220358e-05, - "loss": 0.3212, - "step": 6123000 - }, - { - "epoch": 3.67, - "learning_rate": 3.0333557329659795e-05, - "loss": 0.3284, - "step": 6123500 - }, - { - "epoch": 3.67, - "learning_rate": 3.033145736409923e-05, - "loss": 0.3209, - "step": 6124000 - }, - { - "epoch": 3.67, - "learning_rate": 3.0329361598469785e-05, - "loss": 0.3332, - "step": 6124500 - }, - { - "epoch": 3.67, - "learning_rate": 3.0327261632909222e-05, - "loss": 0.3167, - "step": 6125000 - }, - { - "epoch": 3.67, - "learning_rate": 3.0325161667348655e-05, - "loss": 0.3195, - "step": 6125500 - }, - { - "epoch": 3.67, - "learning_rate": 3.0323061701788092e-05, - "loss": 0.3274, - "step": 6126000 - }, - { - "epoch": 3.67, - "learning_rate": 3.032096173622753e-05, - "loss": 0.3235, - "step": 6126500 - }, - { - "epoch": 3.67, - "learning_rate": 3.0318865970598083e-05, - "loss": 0.3265, - "step": 6127000 - }, - { - "epoch": 3.67, - "learning_rate": 3.0316766005037516e-05, - "loss": 0.3236, - "step": 6127500 - }, - { - "epoch": 3.67, - "learning_rate": 3.0314666039476953e-05, - "loss": 0.3246, - "step": 6128000 - }, - { - "epoch": 3.67, - "learning_rate": 3.031256607391639e-05, - "loss": 0.3243, - "step": 6128500 - }, - { - "epoch": 3.67, - "learning_rate": 3.0310466108355827e-05, - "loss": 0.3119, - "step": 6129000 - }, - { - "epoch": 3.67, - "learning_rate": 3.0308366142795253e-05, - "loss": 0.3294, - "step": 6129500 - }, - { - "epoch": 3.68, - "learning_rate": 3.0306270377165814e-05, - "loss": 0.333, - "step": 6130000 - }, - { - "epoch": 3.68, - "learning_rate": 3.030417041160525e-05, - "loss": 0.3232, - "step": 6130500 - }, - { - "epoch": 3.68, - "learning_rate": 3.0302070446044687e-05, - "loss": 0.3277, - "step": 6131000 - }, - { - "epoch": 3.68, - "learning_rate": 3.029997048048412e-05, - "loss": 0.3206, - "step": 6131500 - }, - { - "epoch": 3.68, - "learning_rate": 3.029787051492355e-05, - "loss": 0.3239, - "step": 6132000 - }, - { - "epoch": 3.68, - "learning_rate": 3.029577474929411e-05, - "loss": 0.3337, - "step": 6132500 - }, - { - "epoch": 3.68, - "learning_rate": 3.0293674783733548e-05, - "loss": 0.3279, - "step": 6133000 - }, - { - "epoch": 3.68, - "learning_rate": 3.0291574818172985e-05, - "loss": 0.3271, - "step": 6133500 - }, - { - "epoch": 3.68, - "learning_rate": 3.028947485261242e-05, - "loss": 0.3272, - "step": 6134000 - }, - { - "epoch": 3.68, - "learning_rate": 3.0287379086982975e-05, - "loss": 0.3195, - "step": 6134500 - }, - { - "epoch": 3.68, - "learning_rate": 3.028527912142241e-05, - "loss": 0.3189, - "step": 6135000 - }, - { - "epoch": 3.68, - "learning_rate": 3.0283179155861846e-05, - "loss": 0.3268, - "step": 6135500 - }, - { - "epoch": 3.68, - "learning_rate": 3.0281079190301283e-05, - "loss": 0.3235, - "step": 6136000 - }, - { - "epoch": 3.68, - "learning_rate": 3.0278979224740716e-05, - "loss": 0.3261, - "step": 6136500 - }, - { - "epoch": 3.68, - "learning_rate": 3.0276879259180146e-05, - "loss": 0.3223, - "step": 6137000 - }, - { - "epoch": 3.68, - "learning_rate": 3.0274783493550706e-05, - "loss": 0.3258, - "step": 6137500 - }, - { - "epoch": 3.68, - "learning_rate": 3.0272683527990143e-05, - "loss": 0.3312, - "step": 6138000 - }, - { - "epoch": 3.68, - "learning_rate": 3.0270583562429577e-05, - "loss": 0.3178, - "step": 6138500 - }, - { - "epoch": 3.68, - "learning_rate": 3.0268483596869007e-05, - "loss": 0.3185, - "step": 6139000 - }, - { - "epoch": 3.68, - "learning_rate": 3.0266387831239567e-05, - "loss": 0.3206, - "step": 6139500 - }, - { - "epoch": 3.68, - "learning_rate": 3.0264287865679004e-05, - "loss": 0.3182, - "step": 6140000 - }, - { - "epoch": 3.68, - "learning_rate": 3.026218790011844e-05, - "loss": 0.3273, - "step": 6140500 - }, - { - "epoch": 3.68, - "learning_rate": 3.0260087934557874e-05, - "loss": 0.3284, - "step": 6141000 - }, - { - "epoch": 3.68, - "learning_rate": 3.0257987968997304e-05, - "loss": 0.3234, - "step": 6141500 - }, - { - "epoch": 3.68, - "learning_rate": 3.0255892203367865e-05, - "loss": 0.3341, - "step": 6142000 - }, - { - "epoch": 3.68, - "learning_rate": 3.02537922378073e-05, - "loss": 0.3279, - "step": 6142500 - }, - { - "epoch": 3.68, - "learning_rate": 3.025169227224674e-05, - "loss": 0.3228, - "step": 6143000 - }, - { - "epoch": 3.68, - "learning_rate": 3.0249592306686172e-05, - "loss": 0.3214, - "step": 6143500 - }, - { - "epoch": 3.68, - "learning_rate": 3.0247492341125602e-05, - "loss": 0.3264, - "step": 6144000 - }, - { - "epoch": 3.68, - "learning_rate": 3.024539237556504e-05, - "loss": 0.3247, - "step": 6144500 - }, - { - "epoch": 3.68, - "learning_rate": 3.0243292410004472e-05, - "loss": 0.3229, - "step": 6145000 - }, - { - "epoch": 3.68, - "learning_rate": 3.024119244444391e-05, - "loss": 0.3263, - "step": 6145500 - }, - { - "epoch": 3.68, - "learning_rate": 3.023909667881447e-05, - "loss": 0.3271, - "step": 6146000 - }, - { - "epoch": 3.69, - "learning_rate": 3.02369967132539e-05, - "loss": 0.3208, - "step": 6146500 - }, - { - "epoch": 3.69, - "learning_rate": 3.0234896747693336e-05, - "loss": 0.3249, - "step": 6147000 - }, - { - "epoch": 3.69, - "learning_rate": 3.023279678213277e-05, - "loss": 0.3243, - "step": 6147500 - }, - { - "epoch": 3.69, - "learning_rate": 3.023070101650333e-05, - "loss": 0.3308, - "step": 6148000 - }, - { - "epoch": 3.69, - "learning_rate": 3.0228601050942767e-05, - "loss": 0.3253, - "step": 6148500 - }, - { - "epoch": 3.69, - "learning_rate": 3.0226501085382197e-05, - "loss": 0.315, - "step": 6149000 - }, - { - "epoch": 3.69, - "learning_rate": 3.0224401119821634e-05, - "loss": 0.3245, - "step": 6149500 - }, - { - "epoch": 3.69, - "learning_rate": 3.0222305354192194e-05, - "loss": 0.3259, - "step": 6150000 - }, - { - "epoch": 3.69, - "learning_rate": 3.0220205388631628e-05, - "loss": 0.3245, - "step": 6150500 - }, - { - "epoch": 3.69, - "learning_rate": 3.0218105423071058e-05, - "loss": 0.336, - "step": 6151000 - }, - { - "epoch": 3.69, - "learning_rate": 3.0216005457510495e-05, - "loss": 0.3236, - "step": 6151500 - }, - { - "epoch": 3.69, - "learning_rate": 3.0213909691881055e-05, - "loss": 0.333, - "step": 6152000 - }, - { - "epoch": 3.69, - "learning_rate": 3.021180972632049e-05, - "loss": 0.3356, - "step": 6152500 - }, - { - "epoch": 3.69, - "learning_rate": 3.0209709760759925e-05, - "loss": 0.3306, - "step": 6153000 - }, - { - "epoch": 3.69, - "learning_rate": 3.0207609795199355e-05, - "loss": 0.3233, - "step": 6153500 - }, - { - "epoch": 3.69, - "learning_rate": 3.0205509829638792e-05, - "loss": 0.3226, - "step": 6154000 - }, - { - "epoch": 3.69, - "learning_rate": 3.0203414064009353e-05, - "loss": 0.3358, - "step": 6154500 - }, - { - "epoch": 3.69, - "learning_rate": 3.0201314098448786e-05, - "loss": 0.3232, - "step": 6155000 - }, - { - "epoch": 3.69, - "learning_rate": 3.0199214132888223e-05, - "loss": 0.3303, - "step": 6155500 - }, - { - "epoch": 3.69, - "learning_rate": 3.0197114167327653e-05, - "loss": 0.3329, - "step": 6156000 - }, - { - "epoch": 3.69, - "learning_rate": 3.019501420176709e-05, - "loss": 0.3251, - "step": 6156500 - }, - { - "epoch": 3.69, - "learning_rate": 3.0192914236206523e-05, - "loss": 0.3262, - "step": 6157000 - }, - { - "epoch": 3.69, - "learning_rate": 3.019081427064596e-05, - "loss": 0.3311, - "step": 6157500 - }, - { - "epoch": 3.69, - "learning_rate": 3.0188714305085397e-05, - "loss": 0.3195, - "step": 6158000 - }, - { - "epoch": 3.69, - "learning_rate": 3.0186622739387074e-05, - "loss": 0.3423, - "step": 6158500 - }, - { - "epoch": 3.69, - "learning_rate": 3.018452277382651e-05, - "loss": 0.3177, - "step": 6159000 - }, - { - "epoch": 3.69, - "learning_rate": 3.0182422808265944e-05, - "loss": 0.3297, - "step": 6159500 - }, - { - "epoch": 3.69, - "learning_rate": 3.018032284270538e-05, - "loss": 0.3217, - "step": 6160000 - }, - { - "epoch": 3.69, - "learning_rate": 3.017822287714481e-05, - "loss": 0.3234, - "step": 6160500 - }, - { - "epoch": 3.69, - "learning_rate": 3.017612711151537e-05, - "loss": 0.3235, - "step": 6161000 - }, - { - "epoch": 3.69, - "learning_rate": 3.017402714595481e-05, - "loss": 0.3321, - "step": 6161500 - }, - { - "epoch": 3.69, - "learning_rate": 3.0171927180394242e-05, - "loss": 0.3312, - "step": 6162000 - }, - { - "epoch": 3.69, - "learning_rate": 3.016982721483368e-05, - "loss": 0.3281, - "step": 6162500 - }, - { - "epoch": 3.69, - "learning_rate": 3.0167731449204232e-05, - "loss": 0.3277, - "step": 6163000 - }, - { - "epoch": 3.7, - "learning_rate": 3.016563148364367e-05, - "loss": 0.3358, - "step": 6163500 - }, - { - "epoch": 3.7, - "learning_rate": 3.0163531518083106e-05, - "loss": 0.3277, - "step": 6164000 - }, - { - "epoch": 3.7, - "learning_rate": 3.016143155252254e-05, - "loss": 0.3238, - "step": 6164500 - }, - { - "epoch": 3.7, - "learning_rate": 3.0159331586961976e-05, - "loss": 0.3232, - "step": 6165000 - }, - { - "epoch": 3.7, - "learning_rate": 3.015723582133253e-05, - "loss": 0.3324, - "step": 6165500 - }, - { - "epoch": 3.7, - "learning_rate": 3.0155135855771967e-05, - "loss": 0.3264, - "step": 6166000 - }, - { - "epoch": 3.7, - "learning_rate": 3.01530358902114e-05, - "loss": 0.3306, - "step": 6166500 - }, - { - "epoch": 3.7, - "learning_rate": 3.0150935924650837e-05, - "loss": 0.3207, - "step": 6167000 - }, - { - "epoch": 3.7, - "learning_rate": 3.0148835959090274e-05, - "loss": 0.3362, - "step": 6167500 - }, - { - "epoch": 3.7, - "learning_rate": 3.0146735993529704e-05, - "loss": 0.3311, - "step": 6168000 - }, - { - "epoch": 3.7, - "learning_rate": 3.0144640227900264e-05, - "loss": 0.3359, - "step": 6168500 - }, - { - "epoch": 3.7, - "learning_rate": 3.0142540262339698e-05, - "loss": 0.3187, - "step": 6169000 - }, - { - "epoch": 3.7, - "learning_rate": 3.0140440296779135e-05, - "loss": 0.3275, - "step": 6169500 - }, - { - "epoch": 3.7, - "learning_rate": 3.0138340331218565e-05, - "loss": 0.328, - "step": 6170000 - }, - { - "epoch": 3.7, - "learning_rate": 3.0136240365658e-05, - "loss": 0.3251, - "step": 6170500 - }, - { - "epoch": 3.7, - "learning_rate": 3.0134140400097435e-05, - "loss": 0.3203, - "step": 6171000 - }, - { - "epoch": 3.7, - "learning_rate": 3.0132040434536872e-05, - "loss": 0.3386, - "step": 6171500 - }, - { - "epoch": 3.7, - "learning_rate": 3.012994046897631e-05, - "loss": 0.3325, - "step": 6172000 - }, - { - "epoch": 3.7, - "learning_rate": 3.0127844703346862e-05, - "loss": 0.3293, - "step": 6172500 - }, - { - "epoch": 3.7, - "learning_rate": 3.0125748937717423e-05, - "loss": 0.3359, - "step": 6173000 - }, - { - "epoch": 3.7, - "learning_rate": 3.012364897215686e-05, - "loss": 0.3288, - "step": 6173500 - }, - { - "epoch": 3.7, - "learning_rate": 3.0121549006596293e-05, - "loss": 0.3227, - "step": 6174000 - }, - { - "epoch": 3.7, - "learning_rate": 3.011944904103573e-05, - "loss": 0.3282, - "step": 6174500 - }, - { - "epoch": 3.7, - "learning_rate": 3.011734907547516e-05, - "loss": 0.333, - "step": 6175000 - }, - { - "epoch": 3.7, - "learning_rate": 3.011525330984572e-05, - "loss": 0.3331, - "step": 6175500 - }, - { - "epoch": 3.7, - "learning_rate": 3.0113153344285154e-05, - "loss": 0.3194, - "step": 6176000 - }, - { - "epoch": 3.7, - "learning_rate": 3.011105337872459e-05, - "loss": 0.3342, - "step": 6176500 - }, - { - "epoch": 3.7, - "learning_rate": 3.0108953413164027e-05, - "loss": 0.3196, - "step": 6177000 - }, - { - "epoch": 3.7, - "learning_rate": 3.0106853447603458e-05, - "loss": 0.3274, - "step": 6177500 - }, - { - "epoch": 3.7, - "learning_rate": 3.0104757681974018e-05, - "loss": 0.324, - "step": 6178000 - }, - { - "epoch": 3.7, - "learning_rate": 3.010265771641345e-05, - "loss": 0.3343, - "step": 6178500 - }, - { - "epoch": 3.7, - "learning_rate": 3.0100557750852888e-05, - "loss": 0.3239, - "step": 6179000 - }, - { - "epoch": 3.7, - "learning_rate": 3.0098457785292325e-05, - "loss": 0.3357, - "step": 6179500 - }, - { - "epoch": 3.71, - "learning_rate": 3.0096357819731752e-05, - "loss": 0.3267, - "step": 6180000 - }, - { - "epoch": 3.71, - "learning_rate": 3.0094262054102315e-05, - "loss": 0.3322, - "step": 6180500 - }, - { - "epoch": 3.71, - "learning_rate": 3.009216208854175e-05, - "loss": 0.3261, - "step": 6181000 - }, - { - "epoch": 3.71, - "learning_rate": 3.0090062122981186e-05, - "loss": 0.3211, - "step": 6181500 - }, - { - "epoch": 3.71, - "learning_rate": 3.0087962157420616e-05, - "loss": 0.3249, - "step": 6182000 - }, - { - "epoch": 3.71, - "learning_rate": 3.008586219186005e-05, - "loss": 0.3341, - "step": 6182500 - }, - { - "epoch": 3.71, - "learning_rate": 3.0083762226299486e-05, - "loss": 0.3302, - "step": 6183000 - }, - { - "epoch": 3.71, - "learning_rate": 3.0081662260738923e-05, - "loss": 0.3289, - "step": 6183500 - }, - { - "epoch": 3.71, - "learning_rate": 3.0079562295178356e-05, - "loss": 0.3265, - "step": 6184000 - }, - { - "epoch": 3.71, - "learning_rate": 3.0077466529548913e-05, - "loss": 0.3358, - "step": 6184500 - }, - { - "epoch": 3.71, - "learning_rate": 3.0075366563988347e-05, - "loss": 0.329, - "step": 6185000 - }, - { - "epoch": 3.71, - "learning_rate": 3.0073266598427784e-05, - "loss": 0.3254, - "step": 6185500 - }, - { - "epoch": 3.71, - "learning_rate": 3.007116663286722e-05, - "loss": 0.3325, - "step": 6186000 - }, - { - "epoch": 3.71, - "learning_rate": 3.0069066667306654e-05, - "loss": 0.3187, - "step": 6186500 - }, - { - "epoch": 3.71, - "learning_rate": 3.006697090167721e-05, - "loss": 0.3233, - "step": 6187000 - }, - { - "epoch": 3.71, - "learning_rate": 3.0064870936116644e-05, - "loss": 0.3184, - "step": 6187500 - }, - { - "epoch": 3.71, - "learning_rate": 3.006277097055608e-05, - "loss": 0.3293, - "step": 6188000 - }, - { - "epoch": 3.71, - "learning_rate": 3.0060671004995518e-05, - "loss": 0.3313, - "step": 6188500 - }, - { - "epoch": 3.71, - "learning_rate": 3.005857103943495e-05, - "loss": 0.3237, - "step": 6189000 - }, - { - "epoch": 3.71, - "learning_rate": 3.005647107387439e-05, - "loss": 0.3256, - "step": 6189500 - }, - { - "epoch": 3.71, - "learning_rate": 3.0054375308244942e-05, - "loss": 0.3276, - "step": 6190000 - }, - { - "epoch": 3.71, - "learning_rate": 3.005227534268438e-05, - "loss": 0.3191, - "step": 6190500 - }, - { - "epoch": 3.71, - "learning_rate": 3.0050175377123812e-05, - "loss": 0.3209, - "step": 6191000 - }, - { - "epoch": 3.71, - "learning_rate": 3.004807541156325e-05, - "loss": 0.3259, - "step": 6191500 - }, - { - "epoch": 3.71, - "learning_rate": 3.0045975446002686e-05, - "loss": 0.3192, - "step": 6192000 - }, - { - "epoch": 3.71, - "learning_rate": 3.004387968037324e-05, - "loss": 0.331, - "step": 6192500 - }, - { - "epoch": 3.71, - "learning_rate": 3.0041779714812676e-05, - "loss": 0.3223, - "step": 6193000 - }, - { - "epoch": 3.71, - "learning_rate": 3.003967974925211e-05, - "loss": 0.322, - "step": 6193500 - }, - { - "epoch": 3.71, - "learning_rate": 3.0037579783691547e-05, - "loss": 0.3299, - "step": 6194000 - }, - { - "epoch": 3.71, - "learning_rate": 3.00354840180621e-05, - "loss": 0.3237, - "step": 6194500 - }, - { - "epoch": 3.71, - "learning_rate": 3.0033384052501537e-05, - "loss": 0.3243, - "step": 6195000 - }, - { - "epoch": 3.71, - "learning_rate": 3.0031284086940974e-05, - "loss": 0.3287, - "step": 6195500 - }, - { - "epoch": 3.71, - "learning_rate": 3.0029184121380407e-05, - "loss": 0.3392, - "step": 6196000 - }, - { - "epoch": 3.72, - "learning_rate": 3.0027084155819844e-05, - "loss": 0.3283, - "step": 6196500 - }, - { - "epoch": 3.72, - "learning_rate": 3.002498419025928e-05, - "loss": 0.3258, - "step": 6197000 - }, - { - "epoch": 3.72, - "learning_rate": 3.0022884224698715e-05, - "loss": 0.3168, - "step": 6197500 - }, - { - "epoch": 3.72, - "learning_rate": 3.002078425913815e-05, - "loss": 0.3325, - "step": 6198000 - }, - { - "epoch": 3.72, - "learning_rate": 3.001869269343983e-05, - "loss": 0.3295, - "step": 6198500 - }, - { - "epoch": 3.72, - "learning_rate": 3.001659272787926e-05, - "loss": 0.3224, - "step": 6199000 - }, - { - "epoch": 3.72, - "learning_rate": 3.0014492762318695e-05, - "loss": 0.3311, - "step": 6199500 - }, - { - "epoch": 3.72, - "learning_rate": 3.0012392796758132e-05, - "loss": 0.3276, - "step": 6200000 - }, - { - "epoch": 3.72, - "eval_loss": 0.31798064708709717, - "eval_runtime": 1116.1155, - "eval_samples_per_second": 471.922, - "eval_steps_per_second": 78.654, - "step": 6200000 - }, - { - "epoch": 3.72, - "learning_rate": 3.0010292831197566e-05, - "loss": 0.3232, - "step": 6200500 - }, - { - "epoch": 3.72, - "learning_rate": 3.0008192865637003e-05, - "loss": 0.3197, - "step": 6201000 - }, - { - "epoch": 3.72, - "learning_rate": 3.000609290007644e-05, - "loss": 0.3228, - "step": 6201500 - }, - { - "epoch": 3.72, - "learning_rate": 3.0003992934515873e-05, - "loss": 0.3239, - "step": 6202000 - }, - { - "epoch": 3.72, - "learning_rate": 3.000189716888643e-05, - "loss": 0.3245, - "step": 6202500 - }, - { - "epoch": 3.72, - "learning_rate": 2.999980140325699e-05, - "loss": 0.3373, - "step": 6203000 - }, - { - "epoch": 3.72, - "learning_rate": 2.9997701437696417e-05, - "loss": 0.3265, - "step": 6203500 - }, - { - "epoch": 3.72, - "learning_rate": 2.9995601472135854e-05, - "loss": 0.3222, - "step": 6204000 - }, - { - "epoch": 3.72, - "learning_rate": 2.999350150657529e-05, - "loss": 0.3209, - "step": 6204500 - }, - { - "epoch": 3.72, - "learning_rate": 2.9991401541014724e-05, - "loss": 0.3308, - "step": 6205000 - }, - { - "epoch": 3.72, - "learning_rate": 2.998930157545416e-05, - "loss": 0.3285, - "step": 6205500 - }, - { - "epoch": 3.72, - "learning_rate": 2.9987201609893598e-05, - "loss": 0.3234, - "step": 6206000 - }, - { - "epoch": 3.72, - "learning_rate": 2.9985101644333035e-05, - "loss": 0.3241, - "step": 6206500 - }, - { - "epoch": 3.72, - "learning_rate": 2.9983001678772468e-05, - "loss": 0.3282, - "step": 6207000 - }, - { - "epoch": 3.72, - "learning_rate": 2.9980901713211905e-05, - "loss": 0.3279, - "step": 6207500 - }, - { - "epoch": 3.72, - "learning_rate": 2.997880594758246e-05, - "loss": 0.3153, - "step": 6208000 - }, - { - "epoch": 3.72, - "learning_rate": 2.9976705982021895e-05, - "loss": 0.3321, - "step": 6208500 - }, - { - "epoch": 3.72, - "learning_rate": 2.997460601646133e-05, - "loss": 0.3258, - "step": 6209000 - }, - { - "epoch": 3.72, - "learning_rate": 2.9972506050900766e-05, - "loss": 0.3216, - "step": 6209500 - }, - { - "epoch": 3.72, - "learning_rate": 2.9970406085340202e-05, - "loss": 0.3305, - "step": 6210000 - }, - { - "epoch": 3.72, - "learning_rate": 2.9968306119779636e-05, - "loss": 0.3266, - "step": 6210500 - }, - { - "epoch": 3.72, - "learning_rate": 2.9966206154219073e-05, - "loss": 0.3292, - "step": 6211000 - }, - { - "epoch": 3.72, - "learning_rate": 2.9964106188658503e-05, - "loss": 0.3252, - "step": 6211500 - }, - { - "epoch": 3.72, - "learning_rate": 2.996201462296018e-05, - "loss": 0.3228, - "step": 6212000 - }, - { - "epoch": 3.72, - "learning_rate": 2.9959914657399617e-05, - "loss": 0.3154, - "step": 6212500 - }, - { - "epoch": 3.72, - "learning_rate": 2.9957814691839054e-05, - "loss": 0.328, - "step": 6213000 - }, - { - "epoch": 3.73, - "learning_rate": 2.995571472627849e-05, - "loss": 0.3342, - "step": 6213500 - }, - { - "epoch": 3.73, - "learning_rate": 2.9953614760717924e-05, - "loss": 0.3269, - "step": 6214000 - }, - { - "epoch": 3.73, - "learning_rate": 2.9951518995088478e-05, - "loss": 0.3225, - "step": 6214500 - }, - { - "epoch": 3.73, - "learning_rate": 2.9949419029527914e-05, - "loss": 0.3154, - "step": 6215000 - }, - { - "epoch": 3.73, - "learning_rate": 2.994731906396735e-05, - "loss": 0.3262, - "step": 6215500 - }, - { - "epoch": 3.73, - "learning_rate": 2.9945219098406785e-05, - "loss": 0.3252, - "step": 6216000 - }, - { - "epoch": 3.73, - "learning_rate": 2.994311913284622e-05, - "loss": 0.3209, - "step": 6216500 - }, - { - "epoch": 3.73, - "learning_rate": 2.9941023367216775e-05, - "loss": 0.346, - "step": 6217000 - }, - { - "epoch": 3.73, - "learning_rate": 2.9938923401656212e-05, - "loss": 0.3388, - "step": 6217500 - }, - { - "epoch": 3.73, - "learning_rate": 2.993682343609565e-05, - "loss": 0.3217, - "step": 6218000 - }, - { - "epoch": 3.73, - "learning_rate": 2.9934723470535082e-05, - "loss": 0.33, - "step": 6218500 - }, - { - "epoch": 3.73, - "learning_rate": 2.993262350497452e-05, - "loss": 0.3271, - "step": 6219000 - }, - { - "epoch": 3.73, - "learning_rate": 2.9930523539413956e-05, - "loss": 0.3215, - "step": 6219500 - }, - { - "epoch": 3.73, - "learning_rate": 2.992842357385339e-05, - "loss": 0.3245, - "step": 6220000 - }, - { - "epoch": 3.73, - "learning_rate": 2.9926327808223946e-05, - "loss": 0.3286, - "step": 6220500 - }, - { - "epoch": 3.73, - "learning_rate": 2.992422784266338e-05, - "loss": 0.3194, - "step": 6221000 - }, - { - "epoch": 3.73, - "learning_rate": 2.9922127877102817e-05, - "loss": 0.3333, - "step": 6221500 - }, - { - "epoch": 3.73, - "learning_rate": 2.9920027911542254e-05, - "loss": 0.3231, - "step": 6222000 - }, - { - "epoch": 3.73, - "learning_rate": 2.9917932145912807e-05, - "loss": 0.3243, - "step": 6222500 - }, - { - "epoch": 3.73, - "learning_rate": 2.991583218035224e-05, - "loss": 0.3266, - "step": 6223000 - }, - { - "epoch": 3.73, - "learning_rate": 2.9913732214791677e-05, - "loss": 0.3197, - "step": 6223500 - }, - { - "epoch": 3.73, - "learning_rate": 2.9911632249231114e-05, - "loss": 0.3271, - "step": 6224000 - }, - { - "epoch": 3.73, - "learning_rate": 2.9909532283670548e-05, - "loss": 0.3199, - "step": 6224500 - }, - { - "epoch": 3.73, - "learning_rate": 2.9907432318109985e-05, - "loss": 0.3229, - "step": 6225000 - }, - { - "epoch": 3.73, - "learning_rate": 2.990533235254942e-05, - "loss": 0.3199, - "step": 6225500 - }, - { - "epoch": 3.73, - "learning_rate": 2.990323238698885e-05, - "loss": 0.3302, - "step": 6226000 - }, - { - "epoch": 3.73, - "learning_rate": 2.9901136621359412e-05, - "loss": 0.3266, - "step": 6226500 - }, - { - "epoch": 3.73, - "learning_rate": 2.9899036655798845e-05, - "loss": 0.3325, - "step": 6227000 - }, - { - "epoch": 3.73, - "learning_rate": 2.9896936690238282e-05, - "loss": 0.3308, - "step": 6227500 - }, - { - "epoch": 3.73, - "learning_rate": 2.9894836724677712e-05, - "loss": 0.3123, - "step": 6228000 - }, - { - "epoch": 3.73, - "learning_rate": 2.989273675911715e-05, - "loss": 0.3275, - "step": 6228500 - }, - { - "epoch": 3.73, - "learning_rate": 2.989064099348771e-05, - "loss": 0.3206, - "step": 6229000 - }, - { - "epoch": 3.73, - "learning_rate": 2.9888541027927143e-05, - "loss": 0.3293, - "step": 6229500 - }, - { - "epoch": 3.74, - "learning_rate": 2.988644106236658e-05, - "loss": 0.3289, - "step": 6230000 - }, - { - "epoch": 3.74, - "learning_rate": 2.988434109680601e-05, - "loss": 0.3294, - "step": 6230500 - }, - { - "epoch": 3.74, - "learning_rate": 2.988224533117657e-05, - "loss": 0.3309, - "step": 6231000 - }, - { - "epoch": 3.74, - "learning_rate": 2.9880145365616004e-05, - "loss": 0.3216, - "step": 6231500 - }, - { - "epoch": 3.74, - "learning_rate": 2.987804540005544e-05, - "loss": 0.329, - "step": 6232000 - }, - { - "epoch": 3.74, - "learning_rate": 2.9875945434494877e-05, - "loss": 0.3145, - "step": 6232500 - }, - { - "epoch": 3.74, - "learning_rate": 2.9873845468934307e-05, - "loss": 0.3305, - "step": 6233000 - }, - { - "epoch": 3.74, - "learning_rate": 2.987174550337374e-05, - "loss": 0.3263, - "step": 6233500 - }, - { - "epoch": 3.74, - "learning_rate": 2.9869645537813178e-05, - "loss": 0.3309, - "step": 6234000 - }, - { - "epoch": 3.74, - "learning_rate": 2.9867549772183738e-05, - "loss": 0.3289, - "step": 6234500 - }, - { - "epoch": 3.74, - "learning_rate": 2.9865449806623175e-05, - "loss": 0.3267, - "step": 6235000 - }, - { - "epoch": 3.74, - "learning_rate": 2.9863349841062605e-05, - "loss": 0.3256, - "step": 6235500 - }, - { - "epoch": 3.74, - "learning_rate": 2.986124987550204e-05, - "loss": 0.3274, - "step": 6236000 - }, - { - "epoch": 3.74, - "learning_rate": 2.9859149909941475e-05, - "loss": 0.3253, - "step": 6236500 - }, - { - "epoch": 3.74, - "learning_rate": 2.9857049944380912e-05, - "loss": 0.3274, - "step": 6237000 - }, - { - "epoch": 3.74, - "learning_rate": 2.9854954178751466e-05, - "loss": 0.3321, - "step": 6237500 - }, - { - "epoch": 3.74, - "learning_rate": 2.98528542131909e-05, - "loss": 0.3222, - "step": 6238000 - }, - { - "epoch": 3.74, - "learning_rate": 2.9850754247630336e-05, - "loss": 0.3221, - "step": 6238500 - }, - { - "epoch": 3.74, - "learning_rate": 2.9848654282069773e-05, - "loss": 0.3269, - "step": 6239000 - }, - { - "epoch": 3.74, - "learning_rate": 2.984655431650921e-05, - "loss": 0.3249, - "step": 6239500 - }, - { - "epoch": 3.74, - "learning_rate": 2.9844458550879763e-05, - "loss": 0.3266, - "step": 6240000 - }, - { - "epoch": 3.74, - "learning_rate": 2.9842358585319197e-05, - "loss": 0.3249, - "step": 6240500 - }, - { - "epoch": 3.74, - "learning_rate": 2.9840258619758634e-05, - "loss": 0.3301, - "step": 6241000 - }, - { - "epoch": 3.74, - "learning_rate": 2.983815865419807e-05, - "loss": 0.3313, - "step": 6241500 - }, - { - "epoch": 3.74, - "learning_rate": 2.9836058688637504e-05, - "loss": 0.3183, - "step": 6242000 - }, - { - "epoch": 3.74, - "learning_rate": 2.983396292300806e-05, - "loss": 0.318, - "step": 6242500 - }, - { - "epoch": 3.74, - "learning_rate": 2.9831862957447494e-05, - "loss": 0.3234, - "step": 6243000 - }, - { - "epoch": 3.74, - "learning_rate": 2.982976299188693e-05, - "loss": 0.3309, - "step": 6243500 - }, - { - "epoch": 3.74, - "learning_rate": 2.9827663026326368e-05, - "loss": 0.323, - "step": 6244000 - }, - { - "epoch": 3.74, - "learning_rate": 2.98255630607658e-05, - "loss": 0.3311, - "step": 6244500 - }, - { - "epoch": 3.74, - "learning_rate": 2.9823467295136355e-05, - "loss": 0.33, - "step": 6245000 - }, - { - "epoch": 3.74, - "learning_rate": 2.9821367329575792e-05, - "loss": 0.3282, - "step": 6245500 - }, - { - "epoch": 3.74, - "learning_rate": 2.981926736401523e-05, - "loss": 0.3182, - "step": 6246000 - }, - { - "epoch": 3.75, - "learning_rate": 2.9817167398454666e-05, - "loss": 0.3193, - "step": 6246500 - }, - { - "epoch": 3.75, - "learning_rate": 2.98150674328941e-05, - "loss": 0.3323, - "step": 6247000 - }, - { - "epoch": 3.75, - "learning_rate": 2.9812967467333536e-05, - "loss": 0.3247, - "step": 6247500 - }, - { - "epoch": 3.75, - "learning_rate": 2.981087170170409e-05, - "loss": 0.3265, - "step": 6248000 - }, - { - "epoch": 3.75, - "learning_rate": 2.9808771736143526e-05, - "loss": 0.3287, - "step": 6248500 - }, - { - "epoch": 3.75, - "learning_rate": 2.980667177058296e-05, - "loss": 0.3187, - "step": 6249000 - }, - { - "epoch": 3.75, - "learning_rate": 2.9804571805022397e-05, - "loss": 0.3248, - "step": 6249500 - }, - { - "epoch": 3.75, - "learning_rate": 2.9802471839461833e-05, - "loss": 0.3248, - "step": 6250000 - }, - { - "epoch": 3.75, - "learning_rate": 2.9800371873901267e-05, - "loss": 0.3191, - "step": 6250500 - }, - { - "epoch": 3.75, - "learning_rate": 2.9798271908340704e-05, - "loss": 0.3139, - "step": 6251000 - }, - { - "epoch": 3.75, - "learning_rate": 2.979617194278014e-05, - "loss": 0.3262, - "step": 6251500 - }, - { - "epoch": 3.75, - "learning_rate": 2.9794076177150694e-05, - "loss": 0.3269, - "step": 6252000 - }, - { - "epoch": 3.75, - "learning_rate": 2.979197621159013e-05, - "loss": 0.3218, - "step": 6252500 - }, - { - "epoch": 3.75, - "learning_rate": 2.9789880445960685e-05, - "loss": 0.3314, - "step": 6253000 - }, - { - "epoch": 3.75, - "learning_rate": 2.978778048040012e-05, - "loss": 0.3315, - "step": 6253500 - }, - { - "epoch": 3.75, - "learning_rate": 2.9785680514839555e-05, - "loss": 0.3288, - "step": 6254000 - }, - { - "epoch": 3.75, - "learning_rate": 2.9783580549278992e-05, - "loss": 0.3268, - "step": 6254500 - }, - { - "epoch": 3.75, - "learning_rate": 2.9781484783649545e-05, - "loss": 0.3312, - "step": 6255000 - }, - { - "epoch": 3.75, - "learning_rate": 2.9779384818088982e-05, - "loss": 0.3318, - "step": 6255500 - }, - { - "epoch": 3.75, - "learning_rate": 2.9777284852528416e-05, - "loss": 0.3176, - "step": 6256000 - }, - { - "epoch": 3.75, - "learning_rate": 2.9775184886967852e-05, - "loss": 0.3254, - "step": 6256500 - }, - { - "epoch": 3.75, - "learning_rate": 2.977308492140729e-05, - "loss": 0.3211, - "step": 6257000 - }, - { - "epoch": 3.75, - "learning_rate": 2.9770984955846723e-05, - "loss": 0.3241, - "step": 6257500 - }, - { - "epoch": 3.75, - "learning_rate": 2.976888499028616e-05, - "loss": 0.3288, - "step": 6258000 - }, - { - "epoch": 3.75, - "learning_rate": 2.9766785024725596e-05, - "loss": 0.3244, - "step": 6258500 - }, - { - "epoch": 3.75, - "learning_rate": 2.976468925909615e-05, - "loss": 0.3311, - "step": 6259000 - }, - { - "epoch": 3.75, - "learning_rate": 2.9762589293535587e-05, - "loss": 0.3279, - "step": 6259500 - }, - { - "epoch": 3.75, - "learning_rate": 2.976048932797502e-05, - "loss": 0.3286, - "step": 6260000 - }, - { - "epoch": 3.75, - "learning_rate": 2.9758393562345577e-05, - "loss": 0.3348, - "step": 6260500 - }, - { - "epoch": 3.75, - "learning_rate": 2.975629359678501e-05, - "loss": 0.3267, - "step": 6261000 - }, - { - "epoch": 3.75, - "learning_rate": 2.9754193631224448e-05, - "loss": 0.3225, - "step": 6261500 - }, - { - "epoch": 3.75, - "learning_rate": 2.9752093665663884e-05, - "loss": 0.3184, - "step": 6262000 - }, - { - "epoch": 3.75, - "learning_rate": 2.9749993700103318e-05, - "loss": 0.3341, - "step": 6262500 - }, - { - "epoch": 3.75, - "learning_rate": 2.9747893734542755e-05, - "loss": 0.3242, - "step": 6263000 - }, - { - "epoch": 3.76, - "learning_rate": 2.974579376898219e-05, - "loss": 0.3245, - "step": 6263500 - }, - { - "epoch": 3.76, - "learning_rate": 2.9743693803421625e-05, - "loss": 0.3211, - "step": 6264000 - }, - { - "epoch": 3.76, - "learning_rate": 2.974159803779218e-05, - "loss": 0.3234, - "step": 6264500 - }, - { - "epoch": 3.76, - "learning_rate": 2.9739498072231615e-05, - "loss": 0.3198, - "step": 6265000 - }, - { - "epoch": 3.76, - "learning_rate": 2.9737398106671052e-05, - "loss": 0.3247, - "step": 6265500 - }, - { - "epoch": 3.76, - "learning_rate": 2.973529814111049e-05, - "loss": 0.3204, - "step": 6266000 - }, - { - "epoch": 3.76, - "learning_rate": 2.9733198175549923e-05, - "loss": 0.3263, - "step": 6266500 - }, - { - "epoch": 3.76, - "learning_rate": 2.9731098209989353e-05, - "loss": 0.3251, - "step": 6267000 - }, - { - "epoch": 3.76, - "learning_rate": 2.9729002444359913e-05, - "loss": 0.3246, - "step": 6267500 - }, - { - "epoch": 3.76, - "learning_rate": 2.972690247879935e-05, - "loss": 0.3192, - "step": 6268000 - }, - { - "epoch": 3.76, - "learning_rate": 2.9724802513238783e-05, - "loss": 0.327, - "step": 6268500 - }, - { - "epoch": 3.76, - "learning_rate": 2.972270254767822e-05, - "loss": 0.3262, - "step": 6269000 - }, - { - "epoch": 3.76, - "learning_rate": 2.972060258211765e-05, - "loss": 0.3186, - "step": 6269500 - }, - { - "epoch": 3.76, - "learning_rate": 2.971850681648821e-05, - "loss": 0.3351, - "step": 6270000 - }, - { - "epoch": 3.76, - "learning_rate": 2.9716406850927647e-05, - "loss": 0.3245, - "step": 6270500 - }, - { - "epoch": 3.76, - "learning_rate": 2.971430688536708e-05, - "loss": 0.3217, - "step": 6271000 - }, - { - "epoch": 3.76, - "learning_rate": 2.9712206919806518e-05, - "loss": 0.316, - "step": 6271500 - }, - { - "epoch": 3.76, - "learning_rate": 2.9710106954245948e-05, - "loss": 0.3226, - "step": 6272000 - }, - { - "epoch": 3.76, - "learning_rate": 2.9708006988685385e-05, - "loss": 0.3311, - "step": 6272500 - }, - { - "epoch": 3.76, - "learning_rate": 2.9705907023124818e-05, - "loss": 0.318, - "step": 6273000 - }, - { - "epoch": 3.76, - "learning_rate": 2.970381125749538e-05, - "loss": 0.3255, - "step": 6273500 - }, - { - "epoch": 3.76, - "learning_rate": 2.970171129193481e-05, - "loss": 0.3243, - "step": 6274000 - }, - { - "epoch": 3.76, - "learning_rate": 2.9699611326374245e-05, - "loss": 0.3363, - "step": 6274500 - }, - { - "epoch": 3.76, - "learning_rate": 2.969751136081368e-05, - "loss": 0.3284, - "step": 6275000 - }, - { - "epoch": 3.76, - "learning_rate": 2.9695411395253116e-05, - "loss": 0.3297, - "step": 6275500 - }, - { - "epoch": 3.76, - "learning_rate": 2.9693315629623676e-05, - "loss": 0.3274, - "step": 6276000 - }, - { - "epoch": 3.76, - "learning_rate": 2.9691215664063106e-05, - "loss": 0.3231, - "step": 6276500 - }, - { - "epoch": 3.76, - "learning_rate": 2.9689115698502543e-05, - "loss": 0.326, - "step": 6277000 - }, - { - "epoch": 3.76, - "learning_rate": 2.9687015732941976e-05, - "loss": 0.3304, - "step": 6277500 - }, - { - "epoch": 3.76, - "learning_rate": 2.9684915767381413e-05, - "loss": 0.3335, - "step": 6278000 - }, - { - "epoch": 3.76, - "learning_rate": 2.968281580182085e-05, - "loss": 0.3292, - "step": 6278500 - }, - { - "epoch": 3.76, - "learning_rate": 2.9680720036191404e-05, - "loss": 0.3271, - "step": 6279000 - }, - { - "epoch": 3.76, - "learning_rate": 2.967862007063084e-05, - "loss": 0.3229, - "step": 6279500 - }, - { - "epoch": 3.77, - "learning_rate": 2.9676520105070274e-05, - "loss": 0.3268, - "step": 6280000 - }, - { - "epoch": 3.77, - "learning_rate": 2.9674424339440834e-05, - "loss": 0.338, - "step": 6280500 - }, - { - "epoch": 3.77, - "learning_rate": 2.967232437388027e-05, - "loss": 0.322, - "step": 6281000 - }, - { - "epoch": 3.77, - "learning_rate": 2.96702244083197e-05, - "loss": 0.3287, - "step": 6281500 - }, - { - "epoch": 3.77, - "learning_rate": 2.9668124442759135e-05, - "loss": 0.3282, - "step": 6282000 - }, - { - "epoch": 3.77, - "learning_rate": 2.966602447719857e-05, - "loss": 0.3212, - "step": 6282500 - }, - { - "epoch": 3.77, - "learning_rate": 2.966392451163801e-05, - "loss": 0.3247, - "step": 6283000 - }, - { - "epoch": 3.77, - "learning_rate": 2.9661824546077442e-05, - "loss": 0.3199, - "step": 6283500 - }, - { - "epoch": 3.77, - "learning_rate": 2.965972458051688e-05, - "loss": 0.3286, - "step": 6284000 - }, - { - "epoch": 3.77, - "learning_rate": 2.9657624614956316e-05, - "loss": 0.3287, - "step": 6284500 - }, - { - "epoch": 3.77, - "learning_rate": 2.965552884932687e-05, - "loss": 0.3217, - "step": 6285000 - }, - { - "epoch": 3.77, - "learning_rate": 2.9653428883766306e-05, - "loss": 0.324, - "step": 6285500 - }, - { - "epoch": 3.77, - "learning_rate": 2.965132891820574e-05, - "loss": 0.3257, - "step": 6286000 - }, - { - "epoch": 3.77, - "learning_rate": 2.9649228952645176e-05, - "loss": 0.3276, - "step": 6286500 - }, - { - "epoch": 3.77, - "learning_rate": 2.9647128987084613e-05, - "loss": 0.3308, - "step": 6287000 - }, - { - "epoch": 3.77, - "learning_rate": 2.9645029021524047e-05, - "loss": 0.3172, - "step": 6287500 - }, - { - "epoch": 3.77, - "learning_rate": 2.9642933255894604e-05, - "loss": 0.3231, - "step": 6288000 - }, - { - "epoch": 3.77, - "learning_rate": 2.9640833290334037e-05, - "loss": 0.3218, - "step": 6288500 - }, - { - "epoch": 3.77, - "learning_rate": 2.9638733324773474e-05, - "loss": 0.3227, - "step": 6289000 - }, - { - "epoch": 3.77, - "learning_rate": 2.963663335921291e-05, - "loss": 0.333, - "step": 6289500 - }, - { - "epoch": 3.77, - "learning_rate": 2.9634537593583464e-05, - "loss": 0.3257, - "step": 6290000 - }, - { - "epoch": 3.77, - "learning_rate": 2.9632437628022898e-05, - "loss": 0.3283, - "step": 6290500 - }, - { - "epoch": 3.77, - "learning_rate": 2.9630337662462335e-05, - "loss": 0.3159, - "step": 6291000 - }, - { - "epoch": 3.77, - "learning_rate": 2.962823769690177e-05, - "loss": 0.328, - "step": 6291500 - }, - { - "epoch": 3.77, - "learning_rate": 2.9626141931272325e-05, - "loss": 0.3372, - "step": 6292000 - }, - { - "epoch": 3.77, - "learning_rate": 2.9624041965711762e-05, - "loss": 0.3266, - "step": 6292500 - }, - { - "epoch": 3.77, - "learning_rate": 2.9621942000151195e-05, - "loss": 0.3291, - "step": 6293000 - }, - { - "epoch": 3.77, - "learning_rate": 2.9619842034590632e-05, - "loss": 0.3264, - "step": 6293500 - }, - { - "epoch": 3.77, - "learning_rate": 2.961774206903007e-05, - "loss": 0.3294, - "step": 6294000 - }, - { - "epoch": 3.77, - "learning_rate": 2.9615646303400623e-05, - "loss": 0.3335, - "step": 6294500 - }, - { - "epoch": 3.77, - "learning_rate": 2.961354633784006e-05, - "loss": 0.3432, - "step": 6295000 - }, - { - "epoch": 3.77, - "learning_rate": 2.9611446372279493e-05, - "loss": 0.3196, - "step": 6295500 - }, - { - "epoch": 3.77, - "learning_rate": 2.960934640671893e-05, - "loss": 0.3208, - "step": 6296000 - }, - { - "epoch": 3.78, - "learning_rate": 2.9607246441158367e-05, - "loss": 0.3327, - "step": 6296500 - }, - { - "epoch": 3.78, - "learning_rate": 2.960515067552892e-05, - "loss": 0.3255, - "step": 6297000 - }, - { - "epoch": 3.78, - "learning_rate": 2.960305490989948e-05, - "loss": 0.3372, - "step": 6297500 - }, - { - "epoch": 3.78, - "learning_rate": 2.960095494433891e-05, - "loss": 0.3223, - "step": 6298000 - }, - { - "epoch": 3.78, - "learning_rate": 2.9598854978778344e-05, - "loss": 0.3279, - "step": 6298500 - }, - { - "epoch": 3.78, - "learning_rate": 2.959675501321778e-05, - "loss": 0.3296, - "step": 6299000 - }, - { - "epoch": 3.78, - "learning_rate": 2.9594655047657218e-05, - "loss": 0.321, - "step": 6299500 - }, - { - "epoch": 3.78, - "learning_rate": 2.959255508209665e-05, - "loss": 0.3175, - "step": 6300000 - }, - { - "epoch": 3.78, - "eval_loss": 0.3166191577911377, - "eval_runtime": 1117.0284, - "eval_samples_per_second": 471.537, - "eval_steps_per_second": 78.59, - "step": 6300000 - }, - { - "epoch": 3.78, - "learning_rate": 2.9590455116536088e-05, - "loss": 0.3221, - "step": 6300500 - }, - { - "epoch": 3.78, - "learning_rate": 2.9588355150975525e-05, - "loss": 0.3261, - "step": 6301000 - }, - { - "epoch": 3.78, - "learning_rate": 2.958625938534608e-05, - "loss": 0.3218, - "step": 6301500 - }, - { - "epoch": 3.78, - "learning_rate": 2.9584159419785515e-05, - "loss": 0.3312, - "step": 6302000 - }, - { - "epoch": 3.78, - "learning_rate": 2.958205945422495e-05, - "loss": 0.3334, - "step": 6302500 - }, - { - "epoch": 3.78, - "learning_rate": 2.9579959488664386e-05, - "loss": 0.3266, - "step": 6303000 - }, - { - "epoch": 3.78, - "learning_rate": 2.9577859523103823e-05, - "loss": 0.3304, - "step": 6303500 - }, - { - "epoch": 3.78, - "learning_rate": 2.9575759557543256e-05, - "loss": 0.3229, - "step": 6304000 - }, - { - "epoch": 3.78, - "learning_rate": 2.957366379191381e-05, - "loss": 0.3247, - "step": 6304500 - }, - { - "epoch": 3.78, - "learning_rate": 2.9571563826353246e-05, - "loss": 0.3286, - "step": 6305000 - }, - { - "epoch": 3.78, - "learning_rate": 2.9569463860792683e-05, - "loss": 0.3196, - "step": 6305500 - }, - { - "epoch": 3.78, - "learning_rate": 2.956736389523212e-05, - "loss": 0.3315, - "step": 6306000 - }, - { - "epoch": 3.78, - "learning_rate": 2.9565263929671554e-05, - "loss": 0.3316, - "step": 6306500 - }, - { - "epoch": 3.78, - "learning_rate": 2.9563168164042107e-05, - "loss": 0.3245, - "step": 6307000 - }, - { - "epoch": 3.78, - "learning_rate": 2.9561068198481544e-05, - "loss": 0.3229, - "step": 6307500 - }, - { - "epoch": 3.78, - "learning_rate": 2.955896823292098e-05, - "loss": 0.3277, - "step": 6308000 - }, - { - "epoch": 3.78, - "learning_rate": 2.9556868267360414e-05, - "loss": 0.3224, - "step": 6308500 - }, - { - "epoch": 3.78, - "learning_rate": 2.955476830179985e-05, - "loss": 0.3222, - "step": 6309000 - }, - { - "epoch": 3.78, - "learning_rate": 2.9552672536170405e-05, - "loss": 0.3317, - "step": 6309500 - }, - { - "epoch": 3.78, - "learning_rate": 2.955057257060984e-05, - "loss": 0.3252, - "step": 6310000 - }, - { - "epoch": 3.78, - "learning_rate": 2.954847260504928e-05, - "loss": 0.3249, - "step": 6310500 - }, - { - "epoch": 3.78, - "learning_rate": 2.9546372639488712e-05, - "loss": 0.3129, - "step": 6311000 - }, - { - "epoch": 3.78, - "learning_rate": 2.9544281073790392e-05, - "loss": 0.3408, - "step": 6311500 - }, - { - "epoch": 3.78, - "learning_rate": 2.954218110822983e-05, - "loss": 0.3261, - "step": 6312000 - }, - { - "epoch": 3.78, - "learning_rate": 2.9540081142669256e-05, - "loss": 0.3308, - "step": 6312500 - }, - { - "epoch": 3.78, - "learning_rate": 2.9537981177108693e-05, - "loss": 0.3288, - "step": 6313000 - }, - { - "epoch": 3.79, - "learning_rate": 2.953588121154813e-05, - "loss": 0.3286, - "step": 6313500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9533781245987563e-05, - "loss": 0.3264, - "step": 6314000 - }, - { - "epoch": 3.79, - "learning_rate": 2.9531681280427e-05, - "loss": 0.3382, - "step": 6314500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9529581314866437e-05, - "loss": 0.3238, - "step": 6315000 - }, - { - "epoch": 3.79, - "learning_rate": 2.952748134930587e-05, - "loss": 0.323, - "step": 6315500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9525385583676427e-05, - "loss": 0.326, - "step": 6316000 - }, - { - "epoch": 3.79, - "learning_rate": 2.952328561811586e-05, - "loss": 0.3303, - "step": 6316500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9521185652555297e-05, - "loss": 0.3321, - "step": 6317000 - }, - { - "epoch": 3.79, - "learning_rate": 2.9519085686994734e-05, - "loss": 0.3239, - "step": 6317500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9516989921365288e-05, - "loss": 0.3284, - "step": 6318000 - }, - { - "epoch": 3.79, - "learning_rate": 2.9514889955804725e-05, - "loss": 0.3201, - "step": 6318500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9512789990244158e-05, - "loss": 0.3312, - "step": 6319000 - }, - { - "epoch": 3.79, - "learning_rate": 2.9510690024683595e-05, - "loss": 0.3207, - "step": 6319500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9508590059123032e-05, - "loss": 0.3386, - "step": 6320000 - }, - { - "epoch": 3.79, - "learning_rate": 2.9506490093562465e-05, - "loss": 0.3291, - "step": 6320500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9504390128001902e-05, - "loss": 0.3278, - "step": 6321000 - }, - { - "epoch": 3.79, - "learning_rate": 2.950229016244134e-05, - "loss": 0.3262, - "step": 6321500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9500190196880772e-05, - "loss": 0.3212, - "step": 6322000 - }, - { - "epoch": 3.79, - "learning_rate": 2.9498094431251326e-05, - "loss": 0.3174, - "step": 6322500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9495994465690763e-05, - "loss": 0.3273, - "step": 6323000 - }, - { - "epoch": 3.79, - "learning_rate": 2.94938945001302e-05, - "loss": 0.328, - "step": 6323500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9491794534569633e-05, - "loss": 0.3249, - "step": 6324000 - }, - { - "epoch": 3.79, - "learning_rate": 2.948969876894019e-05, - "loss": 0.3292, - "step": 6324500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9487598803379624e-05, - "loss": 0.3272, - "step": 6325000 - }, - { - "epoch": 3.79, - "learning_rate": 2.948549883781906e-05, - "loss": 0.3251, - "step": 6325500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9483398872258497e-05, - "loss": 0.3212, - "step": 6326000 - }, - { - "epoch": 3.79, - "learning_rate": 2.948129890669793e-05, - "loss": 0.3188, - "step": 6326500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9479198941137368e-05, - "loss": 0.3276, - "step": 6327000 - }, - { - "epoch": 3.79, - "learning_rate": 2.947710317550792e-05, - "loss": 0.3262, - "step": 6327500 - }, - { - "epoch": 3.79, - "learning_rate": 2.9475003209947358e-05, - "loss": 0.3155, - "step": 6328000 - }, - { - "epoch": 3.79, - "learning_rate": 2.9472903244386795e-05, - "loss": 0.3256, - "step": 6328500 - }, - { - "epoch": 3.79, - "learning_rate": 2.947080327882623e-05, - "loss": 0.3233, - "step": 6329000 - }, - { - "epoch": 3.79, - "learning_rate": 2.9468703313265665e-05, - "loss": 0.3257, - "step": 6329500 - }, - { - "epoch": 3.8, - "learning_rate": 2.9466603347705095e-05, - "loss": 0.3301, - "step": 6330000 - }, - { - "epoch": 3.8, - "learning_rate": 2.946450338214453e-05, - "loss": 0.325, - "step": 6330500 - }, - { - "epoch": 3.8, - "learning_rate": 2.9462403416583966e-05, - "loss": 0.3301, - "step": 6331000 - }, - { - "epoch": 3.8, - "learning_rate": 2.9460307650954526e-05, - "loss": 0.3255, - "step": 6331500 - }, - { - "epoch": 3.8, - "learning_rate": 2.945821188532508e-05, - "loss": 0.3231, - "step": 6332000 - }, - { - "epoch": 3.8, - "learning_rate": 2.9456111919764516e-05, - "loss": 0.3216, - "step": 6332500 - }, - { - "epoch": 3.8, - "learning_rate": 2.9454011954203953e-05, - "loss": 0.3271, - "step": 6333000 - }, - { - "epoch": 3.8, - "learning_rate": 2.9451911988643387e-05, - "loss": 0.3234, - "step": 6333500 - }, - { - "epoch": 3.8, - "learning_rate": 2.9449816223013944e-05, - "loss": 0.3271, - "step": 6334000 - }, - { - "epoch": 3.8, - "learning_rate": 2.9447716257453377e-05, - "loss": 0.3249, - "step": 6334500 - }, - { - "epoch": 3.8, - "learning_rate": 2.9445616291892814e-05, - "loss": 0.3259, - "step": 6335000 - }, - { - "epoch": 3.8, - "learning_rate": 2.944351632633225e-05, - "loss": 0.3249, - "step": 6335500 - }, - { - "epoch": 3.8, - "learning_rate": 2.9441416360771684e-05, - "loss": 0.3274, - "step": 6336000 - }, - { - "epoch": 3.8, - "learning_rate": 2.943931639521112e-05, - "loss": 0.3198, - "step": 6336500 - }, - { - "epoch": 3.8, - "learning_rate": 2.943721642965055e-05, - "loss": 0.334, - "step": 6337000 - }, - { - "epoch": 3.8, - "learning_rate": 2.9435116464089985e-05, - "loss": 0.3266, - "step": 6337500 - }, - { - "epoch": 3.8, - "learning_rate": 2.943302069846055e-05, - "loss": 0.3263, - "step": 6338000 - }, - { - "epoch": 3.8, - "learning_rate": 2.9430920732899982e-05, - "loss": 0.3336, - "step": 6338500 - }, - { - "epoch": 3.8, - "learning_rate": 2.942882076733942e-05, - "loss": 0.3197, - "step": 6339000 - }, - { - "epoch": 3.8, - "learning_rate": 2.9426725001709972e-05, - "loss": 0.3297, - "step": 6339500 - }, - { - "epoch": 3.8, - "learning_rate": 2.942462503614941e-05, - "loss": 0.322, - "step": 6340000 - }, - { - "epoch": 3.8, - "learning_rate": 2.9422525070588843e-05, - "loss": 0.3302, - "step": 6340500 - }, - { - "epoch": 3.8, - "learning_rate": 2.942042510502828e-05, - "loss": 0.3244, - "step": 6341000 - }, - { - "epoch": 3.8, - "learning_rate": 2.941832513946771e-05, - "loss": 0.3276, - "step": 6341500 - }, - { - "epoch": 3.8, - "learning_rate": 2.9416225173907146e-05, - "loss": 0.3242, - "step": 6342000 - }, - { - "epoch": 3.8, - "learning_rate": 2.941412520834658e-05, - "loss": 0.3234, - "step": 6342500 - }, - { - "epoch": 3.8, - "learning_rate": 2.9412025242786017e-05, - "loss": 0.3187, - "step": 6343000 - }, - { - "epoch": 3.8, - "learning_rate": 2.9409929477156577e-05, - "loss": 0.3379, - "step": 6343500 - }, - { - "epoch": 3.8, - "learning_rate": 2.9407829511596007e-05, - "loss": 0.3302, - "step": 6344000 - }, - { - "epoch": 3.8, - "learning_rate": 2.9405733745966567e-05, - "loss": 0.333, - "step": 6344500 - }, - { - "epoch": 3.8, - "learning_rate": 2.9403633780406004e-05, - "loss": 0.3322, - "step": 6345000 - }, - { - "epoch": 3.8, - "learning_rate": 2.9401533814845438e-05, - "loss": 0.3296, - "step": 6345500 - }, - { - "epoch": 3.8, - "learning_rate": 2.9399433849284875e-05, - "loss": 0.3293, - "step": 6346000 - }, - { - "epoch": 3.8, - "learning_rate": 2.9397333883724305e-05, - "loss": 0.3308, - "step": 6346500 - }, - { - "epoch": 3.81, - "learning_rate": 2.9395233918163738e-05, - "loss": 0.3244, - "step": 6347000 - }, - { - "epoch": 3.81, - "learning_rate": 2.9393133952603175e-05, - "loss": 0.3256, - "step": 6347500 - }, - { - "epoch": 3.81, - "learning_rate": 2.9391033987042612e-05, - "loss": 0.3255, - "step": 6348000 - }, - { - "epoch": 3.81, - "learning_rate": 2.9388938221413172e-05, - "loss": 0.3256, - "step": 6348500 - }, - { - "epoch": 3.81, - "learning_rate": 2.9386838255852602e-05, - "loss": 0.322, - "step": 6349000 - }, - { - "epoch": 3.81, - "learning_rate": 2.9384742490223163e-05, - "loss": 0.3209, - "step": 6349500 - }, - { - "epoch": 3.81, - "learning_rate": 2.9382642524662596e-05, - "loss": 0.3315, - "step": 6350000 - }, - { - "epoch": 3.81, - "learning_rate": 2.9380542559102033e-05, - "loss": 0.3243, - "step": 6350500 - }, - { - "epoch": 3.81, - "learning_rate": 2.9378442593541463e-05, - "loss": 0.3317, - "step": 6351000 - }, - { - "epoch": 3.81, - "learning_rate": 2.93763426279809e-05, - "loss": 0.3301, - "step": 6351500 - }, - { - "epoch": 3.81, - "learning_rate": 2.937424686235146e-05, - "loss": 0.3341, - "step": 6352000 - }, - { - "epoch": 3.81, - "learning_rate": 2.9372146896790894e-05, - "loss": 0.3192, - "step": 6352500 - }, - { - "epoch": 3.81, - "learning_rate": 2.937004693123033e-05, - "loss": 0.3164, - "step": 6353000 - }, - { - "epoch": 3.81, - "learning_rate": 2.936794696566976e-05, - "loss": 0.34, - "step": 6353500 - }, - { - "epoch": 3.81, - "learning_rate": 2.936585120004032e-05, - "loss": 0.3192, - "step": 6354000 - }, - { - "epoch": 3.81, - "learning_rate": 2.9363751234479754e-05, - "loss": 0.3341, - "step": 6354500 - }, - { - "epoch": 3.81, - "learning_rate": 2.936165126891919e-05, - "loss": 0.3217, - "step": 6355000 - }, - { - "epoch": 3.81, - "learning_rate": 2.9359551303358628e-05, - "loss": 0.3321, - "step": 6355500 - }, - { - "epoch": 3.81, - "learning_rate": 2.9357451337798058e-05, - "loss": 0.321, - "step": 6356000 - }, - { - "epoch": 3.81, - "learning_rate": 2.935535137223749e-05, - "loss": 0.3342, - "step": 6356500 - }, - { - "epoch": 3.81, - "learning_rate": 2.935325140667693e-05, - "loss": 0.3265, - "step": 6357000 - }, - { - "epoch": 3.81, - "learning_rate": 2.9351151441116365e-05, - "loss": 0.3207, - "step": 6357500 - }, - { - "epoch": 3.81, - "learning_rate": 2.9349055675486926e-05, - "loss": 0.3234, - "step": 6358000 - }, - { - "epoch": 3.81, - "learning_rate": 2.9346955709926356e-05, - "loss": 0.3233, - "step": 6358500 - }, - { - "epoch": 3.81, - "learning_rate": 2.934485574436579e-05, - "loss": 0.3214, - "step": 6359000 - }, - { - "epoch": 3.81, - "learning_rate": 2.9342755778805226e-05, - "loss": 0.313, - "step": 6359500 - }, - { - "epoch": 3.81, - "learning_rate": 2.9340655813244663e-05, - "loss": 0.3199, - "step": 6360000 - }, - { - "epoch": 3.81, - "learning_rate": 2.9338560047615223e-05, - "loss": 0.3269, - "step": 6360500 - }, - { - "epoch": 3.81, - "learning_rate": 2.933646008205465e-05, - "loss": 0.3294, - "step": 6361000 - }, - { - "epoch": 3.81, - "learning_rate": 2.9334360116494087e-05, - "loss": 0.3326, - "step": 6361500 - }, - { - "epoch": 3.81, - "learning_rate": 2.9332260150933523e-05, - "loss": 0.3297, - "step": 6362000 - }, - { - "epoch": 3.81, - "learning_rate": 2.9330160185372957e-05, - "loss": 0.3286, - "step": 6362500 - }, - { - "epoch": 3.81, - "learning_rate": 2.9328060219812394e-05, - "loss": 0.3293, - "step": 6363000 - }, - { - "epoch": 3.82, - "learning_rate": 2.9325964454182947e-05, - "loss": 0.3326, - "step": 6363500 - }, - { - "epoch": 3.82, - "learning_rate": 2.9323864488622384e-05, - "loss": 0.3397, - "step": 6364000 - }, - { - "epoch": 3.82, - "learning_rate": 2.932176452306182e-05, - "loss": 0.3243, - "step": 6364500 - }, - { - "epoch": 3.82, - "learning_rate": 2.931966875743238e-05, - "loss": 0.3155, - "step": 6365000 - }, - { - "epoch": 3.82, - "learning_rate": 2.931756879187181e-05, - "loss": 0.3364, - "step": 6365500 - }, - { - "epoch": 3.82, - "learning_rate": 2.9315468826311245e-05, - "loss": 0.3348, - "step": 6366000 - }, - { - "epoch": 3.82, - "learning_rate": 2.9313368860750682e-05, - "loss": 0.3181, - "step": 6366500 - }, - { - "epoch": 3.82, - "learning_rate": 2.931126889519012e-05, - "loss": 0.3173, - "step": 6367000 - }, - { - "epoch": 3.82, - "learning_rate": 2.9309168929629552e-05, - "loss": 0.322, - "step": 6367500 - }, - { - "epoch": 3.82, - "learning_rate": 2.930706896406899e-05, - "loss": 0.3199, - "step": 6368000 - }, - { - "epoch": 3.82, - "learning_rate": 2.9304968998508426e-05, - "loss": 0.3262, - "step": 6368500 - }, - { - "epoch": 3.82, - "learning_rate": 2.930286903294786e-05, - "loss": 0.3114, - "step": 6369000 - }, - { - "epoch": 3.82, - "learning_rate": 2.9300773267318413e-05, - "loss": 0.3209, - "step": 6369500 - }, - { - "epoch": 3.82, - "learning_rate": 2.929867330175785e-05, - "loss": 0.3202, - "step": 6370000 - }, - { - "epoch": 3.82, - "learning_rate": 2.9296573336197287e-05, - "loss": 0.3279, - "step": 6370500 - }, - { - "epoch": 3.82, - "learning_rate": 2.9294473370636723e-05, - "loss": 0.328, - "step": 6371000 - }, - { - "epoch": 3.82, - "learning_rate": 2.9292373405076157e-05, - "loss": 0.3277, - "step": 6371500 - }, - { - "epoch": 3.82, - "learning_rate": 2.9290273439515594e-05, - "loss": 0.3296, - "step": 6372000 - }, - { - "epoch": 3.82, - "learning_rate": 2.928817347395503e-05, - "loss": 0.3268, - "step": 6372500 - }, - { - "epoch": 3.82, - "learning_rate": 2.9286077708325584e-05, - "loss": 0.3236, - "step": 6373000 - }, - { - "epoch": 3.82, - "learning_rate": 2.9283977742765018e-05, - "loss": 0.3251, - "step": 6373500 - }, - { - "epoch": 3.82, - "learning_rate": 2.9281877777204454e-05, - "loss": 0.3148, - "step": 6374000 - }, - { - "epoch": 3.82, - "learning_rate": 2.927977781164389e-05, - "loss": 0.3322, - "step": 6374500 - }, - { - "epoch": 3.82, - "learning_rate": 2.9277682046014445e-05, - "loss": 0.3303, - "step": 6375000 - }, - { - "epoch": 3.82, - "learning_rate": 2.927558208045388e-05, - "loss": 0.3296, - "step": 6375500 - }, - { - "epoch": 3.82, - "learning_rate": 2.9273482114893315e-05, - "loss": 0.3279, - "step": 6376000 - }, - { - "epoch": 3.82, - "learning_rate": 2.9271382149332752e-05, - "loss": 0.3234, - "step": 6376500 - }, - { - "epoch": 3.82, - "learning_rate": 2.926928218377219e-05, - "loss": 0.3163, - "step": 6377000 - }, - { - "epoch": 3.82, - "learning_rate": 2.9267182218211622e-05, - "loss": 0.326, - "step": 6377500 - }, - { - "epoch": 3.82, - "learning_rate": 2.9265082252651052e-05, - "loss": 0.3205, - "step": 6378000 - }, - { - "epoch": 3.82, - "learning_rate": 2.926298228709049e-05, - "loss": 0.3284, - "step": 6378500 - }, - { - "epoch": 3.82, - "learning_rate": 2.926088652146105e-05, - "loss": 0.3131, - "step": 6379000 - }, - { - "epoch": 3.82, - "learning_rate": 2.9258790755831603e-05, - "loss": 0.318, - "step": 6379500 - }, - { - "epoch": 3.83, - "learning_rate": 2.925669079027104e-05, - "loss": 0.3218, - "step": 6380000 - }, - { - "epoch": 3.83, - "learning_rate": 2.9254590824710473e-05, - "loss": 0.3199, - "step": 6380500 - }, - { - "epoch": 3.83, - "learning_rate": 2.925249085914991e-05, - "loss": 0.3195, - "step": 6381000 - }, - { - "epoch": 3.83, - "learning_rate": 2.9250390893589347e-05, - "loss": 0.3294, - "step": 6381500 - }, - { - "epoch": 3.83, - "learning_rate": 2.924829092802878e-05, - "loss": 0.3226, - "step": 6382000 - }, - { - "epoch": 3.83, - "learning_rate": 2.9246190962468217e-05, - "loss": 0.3248, - "step": 6382500 - }, - { - "epoch": 3.83, - "learning_rate": 2.924409519683877e-05, - "loss": 0.3224, - "step": 6383000 - }, - { - "epoch": 3.83, - "learning_rate": 2.9241995231278208e-05, - "loss": 0.325, - "step": 6383500 - }, - { - "epoch": 3.83, - "learning_rate": 2.9239895265717645e-05, - "loss": 0.3336, - "step": 6384000 - }, - { - "epoch": 3.83, - "learning_rate": 2.9237795300157078e-05, - "loss": 0.3212, - "step": 6384500 - }, - { - "epoch": 3.83, - "learning_rate": 2.9235695334596515e-05, - "loss": 0.3241, - "step": 6385000 - }, - { - "epoch": 3.83, - "learning_rate": 2.9233595369035945e-05, - "loss": 0.3304, - "step": 6385500 - }, - { - "epoch": 3.83, - "learning_rate": 2.9231495403475382e-05, - "loss": 0.3284, - "step": 6386000 - }, - { - "epoch": 3.83, - "learning_rate": 2.9229399637845942e-05, - "loss": 0.3282, - "step": 6386500 - }, - { - "epoch": 3.83, - "learning_rate": 2.9227299672285376e-05, - "loss": 0.326, - "step": 6387000 - }, - { - "epoch": 3.83, - "learning_rate": 2.9225199706724813e-05, - "loss": 0.3209, - "step": 6387500 - }, - { - "epoch": 3.83, - "learning_rate": 2.9223099741164243e-05, - "loss": 0.3142, - "step": 6388000 - }, - { - "epoch": 3.83, - "learning_rate": 2.9220999775603676e-05, - "loss": 0.3282, - "step": 6388500 - }, - { - "epoch": 3.83, - "learning_rate": 2.9218899810043113e-05, - "loss": 0.3158, - "step": 6389000 - }, - { - "epoch": 3.83, - "learning_rate": 2.921679984448255e-05, - "loss": 0.3205, - "step": 6389500 - }, - { - "epoch": 3.83, - "learning_rate": 2.9214699878921983e-05, - "loss": 0.3291, - "step": 6390000 - }, - { - "epoch": 3.83, - "learning_rate": 2.9212608313223664e-05, - "loss": 0.3238, - "step": 6390500 - }, - { - "epoch": 3.83, - "learning_rate": 2.92105083476631e-05, - "loss": 0.3257, - "step": 6391000 - }, - { - "epoch": 3.83, - "learning_rate": 2.9208408382102534e-05, - "loss": 0.3242, - "step": 6391500 - }, - { - "epoch": 3.83, - "learning_rate": 2.920630841654197e-05, - "loss": 0.3276, - "step": 6392000 - }, - { - "epoch": 3.83, - "learning_rate": 2.9204212650912524e-05, - "loss": 0.3126, - "step": 6392500 - }, - { - "epoch": 3.83, - "learning_rate": 2.920211268535196e-05, - "loss": 0.3321, - "step": 6393000 - }, - { - "epoch": 3.83, - "learning_rate": 2.9200012719791398e-05, - "loss": 0.3163, - "step": 6393500 - }, - { - "epoch": 3.83, - "learning_rate": 2.919791275423083e-05, - "loss": 0.3277, - "step": 6394000 - }, - { - "epoch": 3.83, - "learning_rate": 2.919581278867027e-05, - "loss": 0.3242, - "step": 6394500 - }, - { - "epoch": 3.83, - "learning_rate": 2.91937128231097e-05, - "loss": 0.3259, - "step": 6395000 - }, - { - "epoch": 3.83, - "learning_rate": 2.919161705748026e-05, - "loss": 0.321, - "step": 6395500 - }, - { - "epoch": 3.83, - "learning_rate": 2.9189517091919692e-05, - "loss": 0.3252, - "step": 6396000 - }, - { - "epoch": 3.83, - "learning_rate": 2.918741712635913e-05, - "loss": 0.3325, - "step": 6396500 - }, - { - "epoch": 3.84, - "learning_rate": 2.9185317160798566e-05, - "loss": 0.3307, - "step": 6397000 - }, - { - "epoch": 3.84, - "learning_rate": 2.918322139516912e-05, - "loss": 0.3237, - "step": 6397500 - }, - { - "epoch": 3.84, - "learning_rate": 2.9181121429608556e-05, - "loss": 0.3268, - "step": 6398000 - }, - { - "epoch": 3.84, - "learning_rate": 2.917902566397911e-05, - "loss": 0.3136, - "step": 6398500 - }, - { - "epoch": 3.84, - "learning_rate": 2.9176925698418547e-05, - "loss": 0.324, - "step": 6399000 - }, - { - "epoch": 3.84, - "learning_rate": 2.917482573285798e-05, - "loss": 0.3228, - "step": 6399500 - }, - { - "epoch": 3.84, - "learning_rate": 2.9172725767297417e-05, - "loss": 0.3183, - "step": 6400000 - }, - { - "epoch": 3.84, - "eval_loss": 0.3165391683578491, - "eval_runtime": 1122.9364, - "eval_samples_per_second": 469.056, - "eval_steps_per_second": 78.176, - "step": 6400000 - }, - { - "epoch": 3.84, - "learning_rate": 2.9170625801736854e-05, - "loss": 0.3264, - "step": 6400500 - }, - { - "epoch": 3.84, - "learning_rate": 2.9168525836176287e-05, - "loss": 0.3122, - "step": 6401000 - }, - { - "epoch": 3.84, - "learning_rate": 2.9166425870615724e-05, - "loss": 0.3184, - "step": 6401500 - }, - { - "epoch": 3.84, - "learning_rate": 2.9164325905055154e-05, - "loss": 0.3271, - "step": 6402000 - }, - { - "epoch": 3.84, - "learning_rate": 2.9162225939494588e-05, - "loss": 0.3242, - "step": 6402500 - }, - { - "epoch": 3.84, - "learning_rate": 2.9160125973934025e-05, - "loss": 0.3272, - "step": 6403000 - }, - { - "epoch": 3.84, - "learning_rate": 2.9158030208304585e-05, - "loss": 0.3277, - "step": 6403500 - }, - { - "epoch": 3.84, - "learning_rate": 2.9155930242744022e-05, - "loss": 0.3318, - "step": 6404000 - }, - { - "epoch": 3.84, - "learning_rate": 2.9153830277183452e-05, - "loss": 0.3323, - "step": 6404500 - }, - { - "epoch": 3.84, - "learning_rate": 2.9151730311622885e-05, - "loss": 0.3317, - "step": 6405000 - }, - { - "epoch": 3.84, - "learning_rate": 2.9149630346062322e-05, - "loss": 0.3171, - "step": 6405500 - }, - { - "epoch": 3.84, - "learning_rate": 2.914753038050176e-05, - "loss": 0.3126, - "step": 6406000 - }, - { - "epoch": 3.84, - "learning_rate": 2.914543461487232e-05, - "loss": 0.3245, - "step": 6406500 - }, - { - "epoch": 3.84, - "learning_rate": 2.914333464931175e-05, - "loss": 0.3297, - "step": 6407000 - }, - { - "epoch": 3.84, - "learning_rate": 2.9141234683751183e-05, - "loss": 0.316, - "step": 6407500 - }, - { - "epoch": 3.84, - "learning_rate": 2.913913471819062e-05, - "loss": 0.321, - "step": 6408000 - }, - { - "epoch": 3.84, - "learning_rate": 2.9137043152492297e-05, - "loss": 0.3225, - "step": 6408500 - }, - { - "epoch": 3.84, - "learning_rate": 2.9134943186931734e-05, - "loss": 0.3269, - "step": 6409000 - }, - { - "epoch": 3.84, - "learning_rate": 2.913284322137117e-05, - "loss": 0.3322, - "step": 6409500 - }, - { - "epoch": 3.84, - "learning_rate": 2.9130743255810607e-05, - "loss": 0.3259, - "step": 6410000 - }, - { - "epoch": 3.84, - "learning_rate": 2.912864329025004e-05, - "loss": 0.3262, - "step": 6410500 - }, - { - "epoch": 3.84, - "learning_rate": 2.9126543324689478e-05, - "loss": 0.3264, - "step": 6411000 - }, - { - "epoch": 3.84, - "learning_rate": 2.9124443359128908e-05, - "loss": 0.3211, - "step": 6411500 - }, - { - "epoch": 3.84, - "learning_rate": 2.912234339356834e-05, - "loss": 0.3213, - "step": 6412000 - }, - { - "epoch": 3.84, - "learning_rate": 2.91202476279389e-05, - "loss": 0.3267, - "step": 6412500 - }, - { - "epoch": 3.84, - "learning_rate": 2.911814766237834e-05, - "loss": 0.3232, - "step": 6413000 - }, - { - "epoch": 3.85, - "learning_rate": 2.9116047696817775e-05, - "loss": 0.3198, - "step": 6413500 - }, - { - "epoch": 3.85, - "learning_rate": 2.9113947731257205e-05, - "loss": 0.315, - "step": 6414000 - }, - { - "epoch": 3.85, - "learning_rate": 2.9111851965627766e-05, - "loss": 0.3244, - "step": 6414500 - }, - { - "epoch": 3.85, - "learning_rate": 2.91097520000672e-05, - "loss": 0.3315, - "step": 6415000 - }, - { - "epoch": 3.85, - "learning_rate": 2.9107652034506636e-05, - "loss": 0.3259, - "step": 6415500 - }, - { - "epoch": 3.85, - "learning_rate": 2.9105552068946073e-05, - "loss": 0.3278, - "step": 6416000 - }, - { - "epoch": 3.85, - "learning_rate": 2.9103452103385503e-05, - "loss": 0.3193, - "step": 6416500 - }, - { - "epoch": 3.85, - "learning_rate": 2.9101352137824936e-05, - "loss": 0.3213, - "step": 6417000 - }, - { - "epoch": 3.85, - "learning_rate": 2.9099252172264373e-05, - "loss": 0.3215, - "step": 6417500 - }, - { - "epoch": 3.85, - "learning_rate": 2.9097156406634934e-05, - "loss": 0.3149, - "step": 6418000 - }, - { - "epoch": 3.85, - "learning_rate": 2.9095056441074364e-05, - "loss": 0.3259, - "step": 6418500 - }, - { - "epoch": 3.85, - "learning_rate": 2.9092956475513797e-05, - "loss": 0.3307, - "step": 6419000 - }, - { - "epoch": 3.85, - "learning_rate": 2.9090856509953234e-05, - "loss": 0.3202, - "step": 6419500 - }, - { - "epoch": 3.85, - "learning_rate": 2.908875654439267e-05, - "loss": 0.3224, - "step": 6420000 - }, - { - "epoch": 3.85, - "learning_rate": 2.908666077876323e-05, - "loss": 0.3136, - "step": 6420500 - }, - { - "epoch": 3.85, - "learning_rate": 2.908456081320266e-05, - "loss": 0.3213, - "step": 6421000 - }, - { - "epoch": 3.85, - "learning_rate": 2.9082460847642095e-05, - "loss": 0.329, - "step": 6421500 - }, - { - "epoch": 3.85, - "learning_rate": 2.908036088208153e-05, - "loss": 0.3215, - "step": 6422000 - }, - { - "epoch": 3.85, - "learning_rate": 2.907826091652097e-05, - "loss": 0.3179, - "step": 6422500 - }, - { - "epoch": 3.85, - "learning_rate": 2.9076160950960402e-05, - "loss": 0.3229, - "step": 6423000 - }, - { - "epoch": 3.85, - "learning_rate": 2.907406098539984e-05, - "loss": 0.3265, - "step": 6423500 - }, - { - "epoch": 3.85, - "learning_rate": 2.9071961019839276e-05, - "loss": 0.3218, - "step": 6424000 - }, - { - "epoch": 3.85, - "learning_rate": 2.906986525420983e-05, - "loss": 0.3284, - "step": 6424500 - }, - { - "epoch": 3.85, - "learning_rate": 2.9067765288649266e-05, - "loss": 0.3228, - "step": 6425000 - }, - { - "epoch": 3.85, - "learning_rate": 2.9065669523019826e-05, - "loss": 0.3165, - "step": 6425500 - }, - { - "epoch": 3.85, - "learning_rate": 2.9063569557459253e-05, - "loss": 0.3292, - "step": 6426000 - }, - { - "epoch": 3.85, - "learning_rate": 2.906146959189869e-05, - "loss": 0.3168, - "step": 6426500 - }, - { - "epoch": 3.85, - "learning_rate": 2.9059369626338127e-05, - "loss": 0.3181, - "step": 6427000 - }, - { - "epoch": 3.85, - "learning_rate": 2.905726966077756e-05, - "loss": 0.3171, - "step": 6427500 - }, - { - "epoch": 3.85, - "learning_rate": 2.9055169695216997e-05, - "loss": 0.3193, - "step": 6428000 - }, - { - "epoch": 3.85, - "learning_rate": 2.9053069729656434e-05, - "loss": 0.3224, - "step": 6428500 - }, - { - "epoch": 3.85, - "learning_rate": 2.9050969764095867e-05, - "loss": 0.3206, - "step": 6429000 - }, - { - "epoch": 3.85, - "learning_rate": 2.9048878198397548e-05, - "loss": 0.3225, - "step": 6429500 - }, - { - "epoch": 3.86, - "learning_rate": 2.9046778232836985e-05, - "loss": 0.3221, - "step": 6430000 - }, - { - "epoch": 3.86, - "learning_rate": 2.9044678267276415e-05, - "loss": 0.3273, - "step": 6430500 - }, - { - "epoch": 3.86, - "learning_rate": 2.9042578301715848e-05, - "loss": 0.3212, - "step": 6431000 - }, - { - "epoch": 3.86, - "learning_rate": 2.9040478336155285e-05, - "loss": 0.3263, - "step": 6431500 - }, - { - "epoch": 3.86, - "learning_rate": 2.9038378370594722e-05, - "loss": 0.3249, - "step": 6432000 - }, - { - "epoch": 3.86, - "learning_rate": 2.9036278405034155e-05, - "loss": 0.3254, - "step": 6432500 - }, - { - "epoch": 3.86, - "learning_rate": 2.9034178439473592e-05, - "loss": 0.3247, - "step": 6433000 - }, - { - "epoch": 3.86, - "learning_rate": 2.9032082673844146e-05, - "loss": 0.3301, - "step": 6433500 - }, - { - "epoch": 3.86, - "learning_rate": 2.9029982708283583e-05, - "loss": 0.3256, - "step": 6434000 - }, - { - "epoch": 3.86, - "learning_rate": 2.9027882742723016e-05, - "loss": 0.3182, - "step": 6434500 - }, - { - "epoch": 3.86, - "learning_rate": 2.9025782777162453e-05, - "loss": 0.3334, - "step": 6435000 - }, - { - "epoch": 3.86, - "learning_rate": 2.9023687011533007e-05, - "loss": 0.3312, - "step": 6435500 - }, - { - "epoch": 3.86, - "learning_rate": 2.9021587045972443e-05, - "loss": 0.3346, - "step": 6436000 - }, - { - "epoch": 3.86, - "learning_rate": 2.901948708041188e-05, - "loss": 0.3214, - "step": 6436500 - }, - { - "epoch": 3.86, - "learning_rate": 2.9017387114851314e-05, - "loss": 0.3237, - "step": 6437000 - }, - { - "epoch": 3.86, - "learning_rate": 2.901528714929075e-05, - "loss": 0.3268, - "step": 6437500 - }, - { - "epoch": 3.86, - "learning_rate": 2.9013191383661304e-05, - "loss": 0.3316, - "step": 6438000 - }, - { - "epoch": 3.86, - "learning_rate": 2.901109141810074e-05, - "loss": 0.3284, - "step": 6438500 - }, - { - "epoch": 3.86, - "learning_rate": 2.9008991452540178e-05, - "loss": 0.3264, - "step": 6439000 - }, - { - "epoch": 3.86, - "learning_rate": 2.900689148697961e-05, - "loss": 0.3134, - "step": 6439500 - }, - { - "epoch": 3.86, - "learning_rate": 2.9004791521419048e-05, - "loss": 0.3262, - "step": 6440000 - }, - { - "epoch": 3.86, - "learning_rate": 2.9002691555858485e-05, - "loss": 0.3219, - "step": 6440500 - }, - { - "epoch": 3.86, - "learning_rate": 2.900059159029792e-05, - "loss": 0.318, - "step": 6441000 - }, - { - "epoch": 3.86, - "learning_rate": 2.8998491624737355e-05, - "loss": 0.3308, - "step": 6441500 - }, - { - "epoch": 3.86, - "learning_rate": 2.899639585910791e-05, - "loss": 0.3222, - "step": 6442000 - }, - { - "epoch": 3.86, - "learning_rate": 2.8994295893547346e-05, - "loss": 0.3338, - "step": 6442500 - }, - { - "epoch": 3.86, - "learning_rate": 2.8992195927986783e-05, - "loss": 0.3321, - "step": 6443000 - }, - { - "epoch": 3.86, - "learning_rate": 2.8990095962426216e-05, - "loss": 0.3343, - "step": 6443500 - }, - { - "epoch": 3.86, - "learning_rate": 2.8987995996865653e-05, - "loss": 0.3337, - "step": 6444000 - }, - { - "epoch": 3.86, - "learning_rate": 2.8985900231236206e-05, - "loss": 0.3218, - "step": 6444500 - }, - { - "epoch": 3.86, - "learning_rate": 2.8983800265675643e-05, - "loss": 0.3244, - "step": 6445000 - }, - { - "epoch": 3.86, - "learning_rate": 2.8981700300115077e-05, - "loss": 0.3212, - "step": 6445500 - }, - { - "epoch": 3.86, - "learning_rate": 2.8979600334554514e-05, - "loss": 0.3283, - "step": 6446000 - }, - { - "epoch": 3.86, - "learning_rate": 2.897750036899395e-05, - "loss": 0.3189, - "step": 6446500 - }, - { - "epoch": 3.87, - "learning_rate": 2.8975404603364504e-05, - "loss": 0.328, - "step": 6447000 - }, - { - "epoch": 3.87, - "learning_rate": 2.897330463780394e-05, - "loss": 0.331, - "step": 6447500 - }, - { - "epoch": 3.87, - "learning_rate": 2.8971204672243374e-05, - "loss": 0.3169, - "step": 6448000 - }, - { - "epoch": 3.87, - "learning_rate": 2.896910470668281e-05, - "loss": 0.3244, - "step": 6448500 - }, - { - "epoch": 3.87, - "learning_rate": 2.8967004741122248e-05, - "loss": 0.3222, - "step": 6449000 - }, - { - "epoch": 3.87, - "learning_rate": 2.896490477556168e-05, - "loss": 0.3185, - "step": 6449500 - }, - { - "epoch": 3.87, - "learning_rate": 2.8962804810001118e-05, - "loss": 0.3134, - "step": 6450000 - }, - { - "epoch": 3.87, - "learning_rate": 2.896070484444055e-05, - "loss": 0.3186, - "step": 6450500 - }, - { - "epoch": 3.87, - "learning_rate": 2.895860907881111e-05, - "loss": 0.3201, - "step": 6451000 - }, - { - "epoch": 3.87, - "learning_rate": 2.8956513313181662e-05, - "loss": 0.3261, - "step": 6451500 - }, - { - "epoch": 3.87, - "learning_rate": 2.89544133476211e-05, - "loss": 0.3322, - "step": 6452000 - }, - { - "epoch": 3.87, - "learning_rate": 2.8952313382060533e-05, - "loss": 0.331, - "step": 6452500 - }, - { - "epoch": 3.87, - "learning_rate": 2.895021341649997e-05, - "loss": 0.323, - "step": 6453000 - }, - { - "epoch": 3.87, - "learning_rate": 2.8948117650870523e-05, - "loss": 0.3219, - "step": 6453500 - }, - { - "epoch": 3.87, - "learning_rate": 2.894601768530996e-05, - "loss": 0.3241, - "step": 6454000 - }, - { - "epoch": 3.87, - "learning_rate": 2.8943917719749397e-05, - "loss": 0.3194, - "step": 6454500 - }, - { - "epoch": 3.87, - "learning_rate": 2.894181775418883e-05, - "loss": 0.3312, - "step": 6455000 - }, - { - "epoch": 3.87, - "learning_rate": 2.8939717788628267e-05, - "loss": 0.3249, - "step": 6455500 - }, - { - "epoch": 3.87, - "learning_rate": 2.8937617823067704e-05, - "loss": 0.3273, - "step": 6456000 - }, - { - "epoch": 3.87, - "learning_rate": 2.8935522057438257e-05, - "loss": 0.3284, - "step": 6456500 - }, - { - "epoch": 3.87, - "learning_rate": 2.8933422091877694e-05, - "loss": 0.3264, - "step": 6457000 - }, - { - "epoch": 3.87, - "learning_rate": 2.8931322126317128e-05, - "loss": 0.3172, - "step": 6457500 - }, - { - "epoch": 3.87, - "learning_rate": 2.8929222160756565e-05, - "loss": 0.3197, - "step": 6458000 - }, - { - "epoch": 3.87, - "learning_rate": 2.8927122195196e-05, - "loss": 0.3177, - "step": 6458500 - }, - { - "epoch": 3.87, - "learning_rate": 2.8925022229635435e-05, - "loss": 0.3298, - "step": 6459000 - }, - { - "epoch": 3.87, - "learning_rate": 2.892292646400599e-05, - "loss": 0.335, - "step": 6459500 - }, - { - "epoch": 3.87, - "learning_rate": 2.8920826498445425e-05, - "loss": 0.3203, - "step": 6460000 - }, - { - "epoch": 3.87, - "learning_rate": 2.8918726532884862e-05, - "loss": 0.3309, - "step": 6460500 - }, - { - "epoch": 3.87, - "learning_rate": 2.8916626567324296e-05, - "loss": 0.3194, - "step": 6461000 - }, - { - "epoch": 3.87, - "learning_rate": 2.891453500162597e-05, - "loss": 0.3163, - "step": 6461500 - }, - { - "epoch": 3.87, - "learning_rate": 2.8912435036065406e-05, - "loss": 0.3157, - "step": 6462000 - }, - { - "epoch": 3.87, - "learning_rate": 2.8910335070504843e-05, - "loss": 0.3237, - "step": 6462500 - }, - { - "epoch": 3.87, - "learning_rate": 2.8908235104944276e-05, - "loss": 0.336, - "step": 6463000 - }, - { - "epoch": 3.88, - "learning_rate": 2.8906135139383713e-05, - "loss": 0.3269, - "step": 6463500 - }, - { - "epoch": 3.88, - "learning_rate": 2.890403517382315e-05, - "loss": 0.3276, - "step": 6464000 - }, - { - "epoch": 3.88, - "learning_rate": 2.8901935208262584e-05, - "loss": 0.3235, - "step": 6464500 - }, - { - "epoch": 3.88, - "learning_rate": 2.889983524270202e-05, - "loss": 0.3282, - "step": 6465000 - }, - { - "epoch": 3.88, - "learning_rate": 2.8897739477072574e-05, - "loss": 0.3296, - "step": 6465500 - }, - { - "epoch": 3.88, - "learning_rate": 2.889563951151201e-05, - "loss": 0.3292, - "step": 6466000 - }, - { - "epoch": 3.88, - "learning_rate": 2.8893539545951444e-05, - "loss": 0.3284, - "step": 6466500 - }, - { - "epoch": 3.88, - "learning_rate": 2.889143958039088e-05, - "loss": 0.3228, - "step": 6467000 - }, - { - "epoch": 3.88, - "learning_rate": 2.8889339614830318e-05, - "loss": 0.3084, - "step": 6467500 - }, - { - "epoch": 3.88, - "learning_rate": 2.888724384920087e-05, - "loss": 0.3258, - "step": 6468000 - }, - { - "epoch": 3.88, - "learning_rate": 2.888514388364031e-05, - "loss": 0.3206, - "step": 6468500 - }, - { - "epoch": 3.88, - "learning_rate": 2.8883043918079742e-05, - "loss": 0.3196, - "step": 6469000 - }, - { - "epoch": 3.88, - "learning_rate": 2.888094395251918e-05, - "loss": 0.3154, - "step": 6469500 - }, - { - "epoch": 3.88, - "learning_rate": 2.8878843986958616e-05, - "loss": 0.3244, - "step": 6470000 - }, - { - "epoch": 3.88, - "learning_rate": 2.887674822132917e-05, - "loss": 0.3188, - "step": 6470500 - }, - { - "epoch": 3.88, - "learning_rate": 2.8874648255768606e-05, - "loss": 0.3205, - "step": 6471000 - }, - { - "epoch": 3.88, - "learning_rate": 2.887254829020804e-05, - "loss": 0.3238, - "step": 6471500 - }, - { - "epoch": 3.88, - "learning_rate": 2.8870448324647476e-05, - "loss": 0.3231, - "step": 6472000 - }, - { - "epoch": 3.88, - "learning_rate": 2.8868348359086913e-05, - "loss": 0.3282, - "step": 6472500 - }, - { - "epoch": 3.88, - "learning_rate": 2.8866252593457467e-05, - "loss": 0.3221, - "step": 6473000 - }, - { - "epoch": 3.88, - "learning_rate": 2.88641526278969e-05, - "loss": 0.3165, - "step": 6473500 - }, - { - "epoch": 3.88, - "learning_rate": 2.8862052662336337e-05, - "loss": 0.3299, - "step": 6474000 - }, - { - "epoch": 3.88, - "learning_rate": 2.8859952696775774e-05, - "loss": 0.3192, - "step": 6474500 - }, - { - "epoch": 3.88, - "learning_rate": 2.885785273121521e-05, - "loss": 0.3296, - "step": 6475000 - }, - { - "epoch": 3.88, - "learning_rate": 2.8855756965585764e-05, - "loss": 0.3208, - "step": 6475500 - }, - { - "epoch": 3.88, - "learning_rate": 2.8853661199956318e-05, - "loss": 0.3187, - "step": 6476000 - }, - { - "epoch": 3.88, - "learning_rate": 2.8851561234395755e-05, - "loss": 0.327, - "step": 6476500 - }, - { - "epoch": 3.88, - "learning_rate": 2.8849461268835188e-05, - "loss": 0.3304, - "step": 6477000 - }, - { - "epoch": 3.88, - "learning_rate": 2.8847361303274625e-05, - "loss": 0.3168, - "step": 6477500 - }, - { - "epoch": 3.88, - "learning_rate": 2.8845261337714062e-05, - "loss": 0.3205, - "step": 6478000 - }, - { - "epoch": 3.88, - "learning_rate": 2.8843161372153495e-05, - "loss": 0.3244, - "step": 6478500 - }, - { - "epoch": 3.88, - "learning_rate": 2.8841061406592932e-05, - "loss": 0.3191, - "step": 6479000 - }, - { - "epoch": 3.88, - "learning_rate": 2.883896144103237e-05, - "loss": 0.3232, - "step": 6479500 - }, - { - "epoch": 3.89, - "learning_rate": 2.8836865675402923e-05, - "loss": 0.3261, - "step": 6480000 - }, - { - "epoch": 3.89, - "learning_rate": 2.8834765709842356e-05, - "loss": 0.3269, - "step": 6480500 - }, - { - "epoch": 3.89, - "learning_rate": 2.8832665744281793e-05, - "loss": 0.3165, - "step": 6481000 - }, - { - "epoch": 3.89, - "learning_rate": 2.883056577872123e-05, - "loss": 0.3177, - "step": 6481500 - }, - { - "epoch": 3.89, - "learning_rate": 2.8828465813160667e-05, - "loss": 0.3147, - "step": 6482000 - }, - { - "epoch": 3.89, - "learning_rate": 2.88263658476001e-05, - "loss": 0.3337, - "step": 6482500 - }, - { - "epoch": 3.89, - "learning_rate": 2.8824265882039537e-05, - "loss": 0.3163, - "step": 6483000 - }, - { - "epoch": 3.89, - "learning_rate": 2.8822165916478974e-05, - "loss": 0.3221, - "step": 6483500 - }, - { - "epoch": 3.89, - "learning_rate": 2.88200659509184e-05, - "loss": 0.3217, - "step": 6484000 - }, - { - "epoch": 3.89, - "learning_rate": 2.881797018528896e-05, - "loss": 0.3299, - "step": 6484500 - }, - { - "epoch": 3.89, - "learning_rate": 2.8815870219728398e-05, - "loss": 0.3188, - "step": 6485000 - }, - { - "epoch": 3.89, - "learning_rate": 2.8813770254167835e-05, - "loss": 0.3311, - "step": 6485500 - }, - { - "epoch": 3.89, - "learning_rate": 2.8811670288607265e-05, - "loss": 0.3209, - "step": 6486000 - }, - { - "epoch": 3.89, - "learning_rate": 2.8809570323046698e-05, - "loss": 0.3174, - "step": 6486500 - }, - { - "epoch": 3.89, - "learning_rate": 2.880747455741726e-05, - "loss": 0.3226, - "step": 6487000 - }, - { - "epoch": 3.89, - "learning_rate": 2.8805374591856695e-05, - "loss": 0.3249, - "step": 6487500 - }, - { - "epoch": 3.89, - "learning_rate": 2.8803274626296132e-05, - "loss": 0.3243, - "step": 6488000 - }, - { - "epoch": 3.89, - "learning_rate": 2.8801174660735562e-05, - "loss": 0.3228, - "step": 6488500 - }, - { - "epoch": 3.89, - "learning_rate": 2.8799074695174996e-05, - "loss": 0.3273, - "step": 6489000 - }, - { - "epoch": 3.89, - "learning_rate": 2.8796978929545556e-05, - "loss": 0.3182, - "step": 6489500 - }, - { - "epoch": 3.89, - "learning_rate": 2.8794878963984993e-05, - "loss": 0.3188, - "step": 6490000 - }, - { - "epoch": 3.89, - "learning_rate": 2.879277899842443e-05, - "loss": 0.3242, - "step": 6490500 - }, - { - "epoch": 3.89, - "learning_rate": 2.8790679032863856e-05, - "loss": 0.3271, - "step": 6491000 - }, - { - "epoch": 3.89, - "learning_rate": 2.8788579067303293e-05, - "loss": 0.3314, - "step": 6491500 - }, - { - "epoch": 3.89, - "learning_rate": 2.8786483301673854e-05, - "loss": 0.3198, - "step": 6492000 - }, - { - "epoch": 3.89, - "learning_rate": 2.878438333611329e-05, - "loss": 0.3195, - "step": 6492500 - }, - { - "epoch": 3.89, - "learning_rate": 2.8782283370552724e-05, - "loss": 0.3226, - "step": 6493000 - }, - { - "epoch": 3.89, - "learning_rate": 2.8780183404992154e-05, - "loss": 0.3235, - "step": 6493500 - }, - { - "epoch": 3.89, - "learning_rate": 2.8778087639362714e-05, - "loss": 0.3254, - "step": 6494000 - }, - { - "epoch": 3.89, - "learning_rate": 2.877598767380215e-05, - "loss": 0.3246, - "step": 6494500 - }, - { - "epoch": 3.89, - "learning_rate": 2.8773887708241588e-05, - "loss": 0.3352, - "step": 6495000 - }, - { - "epoch": 3.89, - "learning_rate": 2.877178774268102e-05, - "loss": 0.3221, - "step": 6495500 - }, - { - "epoch": 3.89, - "learning_rate": 2.876969197705158e-05, - "loss": 0.3224, - "step": 6496000 - }, - { - "epoch": 3.89, - "learning_rate": 2.8767592011491012e-05, - "loss": 0.3263, - "step": 6496500 - }, - { - "epoch": 3.9, - "learning_rate": 2.876549204593045e-05, - "loss": 0.321, - "step": 6497000 - }, - { - "epoch": 3.9, - "learning_rate": 2.8763392080369886e-05, - "loss": 0.3254, - "step": 6497500 - }, - { - "epoch": 3.9, - "learning_rate": 2.8761292114809312e-05, - "loss": 0.3189, - "step": 6498000 - }, - { - "epoch": 3.9, - "learning_rate": 2.875919214924875e-05, - "loss": 0.3169, - "step": 6498500 - }, - { - "epoch": 3.9, - "learning_rate": 2.875709638361931e-05, - "loss": 0.3306, - "step": 6499000 - }, - { - "epoch": 3.9, - "learning_rate": 2.8754996418058746e-05, - "loss": 0.3219, - "step": 6499500 - }, - { - "epoch": 3.9, - "learning_rate": 2.875289645249818e-05, - "loss": 0.3247, - "step": 6500000 - }, - { - "epoch": 3.9, - "eval_loss": 0.3145334720611572, - "eval_runtime": 1125.3621, - "eval_samples_per_second": 468.045, - "eval_steps_per_second": 78.008, - "step": 6500000 - }, - { - "epoch": 3.9, - "learning_rate": 2.875079648693761e-05, - "loss": 0.3224, - "step": 6500500 - }, - { - "epoch": 3.9, - "learning_rate": 2.8748696521377047e-05, - "loss": 0.3209, - "step": 6501000 - }, - { - "epoch": 3.9, - "learning_rate": 2.8746600755747607e-05, - "loss": 0.3309, - "step": 6501500 - }, - { - "epoch": 3.9, - "learning_rate": 2.8744500790187044e-05, - "loss": 0.3261, - "step": 6502000 - }, - { - "epoch": 3.9, - "learning_rate": 2.8742400824626477e-05, - "loss": 0.3179, - "step": 6502500 - }, - { - "epoch": 3.9, - "learning_rate": 2.8740300859065907e-05, - "loss": 0.3222, - "step": 6503000 - }, - { - "epoch": 3.9, - "learning_rate": 2.8738205093436468e-05, - "loss": 0.3269, - "step": 6503500 - }, - { - "epoch": 3.9, - "learning_rate": 2.8736105127875905e-05, - "loss": 0.3279, - "step": 6504000 - }, - { - "epoch": 3.9, - "learning_rate": 2.873400516231534e-05, - "loss": 0.3289, - "step": 6504500 - }, - { - "epoch": 3.9, - "learning_rate": 2.8731909396685895e-05, - "loss": 0.3219, - "step": 6505000 - }, - { - "epoch": 3.9, - "learning_rate": 2.872980943112533e-05, - "loss": 0.3268, - "step": 6505500 - }, - { - "epoch": 3.9, - "learning_rate": 2.8727709465564765e-05, - "loss": 0.3273, - "step": 6506000 - }, - { - "epoch": 3.9, - "learning_rate": 2.8725609500004202e-05, - "loss": 0.3221, - "step": 6506500 - }, - { - "epoch": 3.9, - "learning_rate": 2.8723509534443636e-05, - "loss": 0.3186, - "step": 6507000 - }, - { - "epoch": 3.9, - "learning_rate": 2.8721409568883066e-05, - "loss": 0.3178, - "step": 6507500 - }, - { - "epoch": 3.9, - "learning_rate": 2.8719309603322503e-05, - "loss": 0.313, - "step": 6508000 - }, - { - "epoch": 3.9, - "learning_rate": 2.871720963776194e-05, - "loss": 0.3217, - "step": 6508500 - }, - { - "epoch": 3.9, - "learning_rate": 2.8715109672201373e-05, - "loss": 0.3251, - "step": 6509000 - }, - { - "epoch": 3.9, - "learning_rate": 2.871300970664081e-05, - "loss": 0.3185, - "step": 6509500 - }, - { - "epoch": 3.9, - "learning_rate": 2.8710909741080247e-05, - "loss": 0.3271, - "step": 6510000 - }, - { - "epoch": 3.9, - "learning_rate": 2.87088139754508e-05, - "loss": 0.3198, - "step": 6510500 - }, - { - "epoch": 3.9, - "learning_rate": 2.8706714009890237e-05, - "loss": 0.3195, - "step": 6511000 - }, - { - "epoch": 3.9, - "learning_rate": 2.870461404432967e-05, - "loss": 0.3284, - "step": 6511500 - }, - { - "epoch": 3.9, - "learning_rate": 2.8702514078769107e-05, - "loss": 0.3245, - "step": 6512000 - }, - { - "epoch": 3.9, - "learning_rate": 2.870041831313966e-05, - "loss": 0.3267, - "step": 6512500 - }, - { - "epoch": 3.9, - "learning_rate": 2.8698318347579098e-05, - "loss": 0.3214, - "step": 6513000 - }, - { - "epoch": 3.91, - "learning_rate": 2.869621838201853e-05, - "loss": 0.3164, - "step": 6513500 - }, - { - "epoch": 3.91, - "learning_rate": 2.8694118416457968e-05, - "loss": 0.33, - "step": 6514000 - }, - { - "epoch": 3.91, - "learning_rate": 2.869202265082853e-05, - "loss": 0.3276, - "step": 6514500 - }, - { - "epoch": 3.91, - "learning_rate": 2.8689926885199082e-05, - "loss": 0.3207, - "step": 6515000 - }, - { - "epoch": 3.91, - "learning_rate": 2.868782691963852e-05, - "loss": 0.3225, - "step": 6515500 - }, - { - "epoch": 3.91, - "learning_rate": 2.8685726954077956e-05, - "loss": 0.3269, - "step": 6516000 - }, - { - "epoch": 3.91, - "learning_rate": 2.868362698851739e-05, - "loss": 0.3298, - "step": 6516500 - }, - { - "epoch": 3.91, - "learning_rate": 2.868152702295682e-05, - "loss": 0.3148, - "step": 6517000 - }, - { - "epoch": 3.91, - "learning_rate": 2.8679427057396256e-05, - "loss": 0.3371, - "step": 6517500 - }, - { - "epoch": 3.91, - "learning_rate": 2.8677327091835693e-05, - "loss": 0.317, - "step": 6518000 - }, - { - "epoch": 3.91, - "learning_rate": 2.8675227126275126e-05, - "loss": 0.3214, - "step": 6518500 - }, - { - "epoch": 3.91, - "learning_rate": 2.8673127160714563e-05, - "loss": 0.3243, - "step": 6519000 - }, - { - "epoch": 3.91, - "learning_rate": 2.867103559501624e-05, - "loss": 0.3274, - "step": 6519500 - }, - { - "epoch": 3.91, - "learning_rate": 2.8668935629455677e-05, - "loss": 0.3252, - "step": 6520000 - }, - { - "epoch": 3.91, - "learning_rate": 2.8666835663895114e-05, - "loss": 0.3226, - "step": 6520500 - }, - { - "epoch": 3.91, - "learning_rate": 2.866473569833455e-05, - "loss": 0.3316, - "step": 6521000 - }, - { - "epoch": 3.91, - "learning_rate": 2.8662635732773984e-05, - "loss": 0.3243, - "step": 6521500 - }, - { - "epoch": 3.91, - "learning_rate": 2.8660535767213414e-05, - "loss": 0.3278, - "step": 6522000 - }, - { - "epoch": 3.91, - "learning_rate": 2.865843580165285e-05, - "loss": 0.317, - "step": 6522500 - }, - { - "epoch": 3.91, - "learning_rate": 2.8656335836092285e-05, - "loss": 0.3169, - "step": 6523000 - }, - { - "epoch": 3.91, - "learning_rate": 2.865423587053172e-05, - "loss": 0.3272, - "step": 6523500 - }, - { - "epoch": 3.91, - "learning_rate": 2.8652140104902282e-05, - "loss": 0.3373, - "step": 6524000 - }, - { - "epoch": 3.91, - "learning_rate": 2.8650040139341712e-05, - "loss": 0.3194, - "step": 6524500 - }, - { - "epoch": 3.91, - "learning_rate": 2.864794017378115e-05, - "loss": 0.3197, - "step": 6525000 - }, - { - "epoch": 3.91, - "learning_rate": 2.8645840208220582e-05, - "loss": 0.3218, - "step": 6525500 - }, - { - "epoch": 3.91, - "learning_rate": 2.864374024266002e-05, - "loss": 0.3152, - "step": 6526000 - }, - { - "epoch": 3.91, - "learning_rate": 2.8641640277099456e-05, - "loss": 0.3245, - "step": 6526500 - }, - { - "epoch": 3.91, - "learning_rate": 2.863954451147001e-05, - "loss": 0.3337, - "step": 6527000 - }, - { - "epoch": 3.91, - "learning_rate": 2.8637444545909446e-05, - "loss": 0.3098, - "step": 6527500 - }, - { - "epoch": 3.91, - "learning_rate": 2.863534458034888e-05, - "loss": 0.3251, - "step": 6528000 - }, - { - "epoch": 3.91, - "learning_rate": 2.8633244614788317e-05, - "loss": 0.3246, - "step": 6528500 - }, - { - "epoch": 3.91, - "learning_rate": 2.863114884915887e-05, - "loss": 0.3233, - "step": 6529000 - }, - { - "epoch": 3.91, - "learning_rate": 2.8629048883598307e-05, - "loss": 0.3239, - "step": 6529500 - }, - { - "epoch": 3.91, - "learning_rate": 2.862694891803774e-05, - "loss": 0.3294, - "step": 6530000 - }, - { - "epoch": 3.92, - "learning_rate": 2.8624848952477177e-05, - "loss": 0.3261, - "step": 6530500 - }, - { - "epoch": 3.92, - "learning_rate": 2.8622748986916614e-05, - "loss": 0.3361, - "step": 6531000 - }, - { - "epoch": 3.92, - "learning_rate": 2.8620653221287168e-05, - "loss": 0.331, - "step": 6531500 - }, - { - "epoch": 3.92, - "learning_rate": 2.8618553255726605e-05, - "loss": 0.3122, - "step": 6532000 - }, - { - "epoch": 3.92, - "learning_rate": 2.8616453290166038e-05, - "loss": 0.3179, - "step": 6532500 - }, - { - "epoch": 3.92, - "learning_rate": 2.8614353324605475e-05, - "loss": 0.325, - "step": 6533000 - }, - { - "epoch": 3.92, - "learning_rate": 2.8612257558976035e-05, - "loss": 0.3185, - "step": 6533500 - }, - { - "epoch": 3.92, - "learning_rate": 2.8610157593415465e-05, - "loss": 0.3292, - "step": 6534000 - }, - { - "epoch": 3.92, - "learning_rate": 2.8608057627854902e-05, - "loss": 0.3283, - "step": 6534500 - }, - { - "epoch": 3.92, - "learning_rate": 2.8605957662294336e-05, - "loss": 0.3258, - "step": 6535000 - }, - { - "epoch": 3.92, - "learning_rate": 2.8603857696733772e-05, - "loss": 0.3269, - "step": 6535500 - }, - { - "epoch": 3.92, - "learning_rate": 2.8601761931104333e-05, - "loss": 0.3185, - "step": 6536000 - }, - { - "epoch": 3.92, - "learning_rate": 2.8599661965543763e-05, - "loss": 0.3174, - "step": 6536500 - }, - { - "epoch": 3.92, - "learning_rate": 2.8597566199914323e-05, - "loss": 0.3276, - "step": 6537000 - }, - { - "epoch": 3.92, - "learning_rate": 2.8595466234353757e-05, - "loss": 0.3208, - "step": 6537500 - }, - { - "epoch": 3.92, - "learning_rate": 2.8593366268793194e-05, - "loss": 0.3187, - "step": 6538000 - }, - { - "epoch": 3.92, - "learning_rate": 2.8591266303232624e-05, - "loss": 0.3285, - "step": 6538500 - }, - { - "epoch": 3.92, - "learning_rate": 2.858916633767206e-05, - "loss": 0.3301, - "step": 6539000 - }, - { - "epoch": 3.92, - "learning_rate": 2.8587066372111494e-05, - "loss": 0.3239, - "step": 6539500 - }, - { - "epoch": 3.92, - "learning_rate": 2.858496640655093e-05, - "loss": 0.3226, - "step": 6540000 - }, - { - "epoch": 3.92, - "learning_rate": 2.8582866440990368e-05, - "loss": 0.3235, - "step": 6540500 - }, - { - "epoch": 3.92, - "learning_rate": 2.85807664754298e-05, - "loss": 0.322, - "step": 6541000 - }, - { - "epoch": 3.92, - "learning_rate": 2.8578666509869238e-05, - "loss": 0.3212, - "step": 6541500 - }, - { - "epoch": 3.92, - "learning_rate": 2.8576566544308675e-05, - "loss": 0.3178, - "step": 6542000 - }, - { - "epoch": 3.92, - "learning_rate": 2.857447077867923e-05, - "loss": 0.3242, - "step": 6542500 - }, - { - "epoch": 3.92, - "learning_rate": 2.8572370813118665e-05, - "loss": 0.3306, - "step": 6543000 - }, - { - "epoch": 3.92, - "learning_rate": 2.85702708475581e-05, - "loss": 0.317, - "step": 6543500 - }, - { - "epoch": 3.92, - "learning_rate": 2.8568170881997536e-05, - "loss": 0.3186, - "step": 6544000 - }, - { - "epoch": 3.92, - "learning_rate": 2.8566070916436972e-05, - "loss": 0.3208, - "step": 6544500 - }, - { - "epoch": 3.92, - "learning_rate": 2.8563970950876406e-05, - "loss": 0.3277, - "step": 6545000 - }, - { - "epoch": 3.92, - "learning_rate": 2.856187518524696e-05, - "loss": 0.3153, - "step": 6545500 - }, - { - "epoch": 3.92, - "learning_rate": 2.8559775219686396e-05, - "loss": 0.3237, - "step": 6546000 - }, - { - "epoch": 3.92, - "learning_rate": 2.8557675254125833e-05, - "loss": 0.3271, - "step": 6546500 - }, - { - "epoch": 3.93, - "learning_rate": 2.855557528856527e-05, - "loss": 0.3301, - "step": 6547000 - }, - { - "epoch": 3.93, - "learning_rate": 2.8553475323004703e-05, - "loss": 0.3343, - "step": 6547500 - }, - { - "epoch": 3.93, - "learning_rate": 2.855137535744414e-05, - "loss": 0.3235, - "step": 6548000 - }, - { - "epoch": 3.93, - "learning_rate": 2.8549279591814694e-05, - "loss": 0.3305, - "step": 6548500 - }, - { - "epoch": 3.93, - "learning_rate": 2.854717962625413e-05, - "loss": 0.318, - "step": 6549000 - }, - { - "epoch": 3.93, - "learning_rate": 2.8545079660693564e-05, - "loss": 0.3294, - "step": 6549500 - }, - { - "epoch": 3.93, - "learning_rate": 2.8542979695133e-05, - "loss": 0.3149, - "step": 6550000 - }, - { - "epoch": 3.93, - "learning_rate": 2.8540879729572438e-05, - "loss": 0.3164, - "step": 6550500 - }, - { - "epoch": 3.93, - "learning_rate": 2.853877976401187e-05, - "loss": 0.3272, - "step": 6551000 - }, - { - "epoch": 3.93, - "learning_rate": 2.85366797984513e-05, - "loss": 0.3233, - "step": 6551500 - }, - { - "epoch": 3.93, - "learning_rate": 2.8534584032821862e-05, - "loss": 0.3239, - "step": 6552000 - }, - { - "epoch": 3.93, - "learning_rate": 2.85324840672613e-05, - "loss": 0.3224, - "step": 6552500 - }, - { - "epoch": 3.93, - "learning_rate": 2.8530384101700735e-05, - "loss": 0.3229, - "step": 6553000 - }, - { - "epoch": 3.93, - "learning_rate": 2.8528284136140165e-05, - "loss": 0.3183, - "step": 6553500 - }, - { - "epoch": 3.93, - "learning_rate": 2.8526188370510726e-05, - "loss": 0.3281, - "step": 6554000 - }, - { - "epoch": 3.93, - "learning_rate": 2.852408840495016e-05, - "loss": 0.3244, - "step": 6554500 - }, - { - "epoch": 3.93, - "learning_rate": 2.8521988439389596e-05, - "loss": 0.3213, - "step": 6555000 - }, - { - "epoch": 3.93, - "learning_rate": 2.8519888473829033e-05, - "loss": 0.323, - "step": 6555500 - }, - { - "epoch": 3.93, - "learning_rate": 2.851778850826846e-05, - "loss": 0.3156, - "step": 6556000 - }, - { - "epoch": 3.93, - "learning_rate": 2.8515688542707896e-05, - "loss": 0.3209, - "step": 6556500 - }, - { - "epoch": 3.93, - "learning_rate": 2.8513588577147333e-05, - "loss": 0.3186, - "step": 6557000 - }, - { - "epoch": 3.93, - "learning_rate": 2.8511488611586767e-05, - "loss": 0.3207, - "step": 6557500 - }, - { - "epoch": 3.93, - "learning_rate": 2.8509388646026204e-05, - "loss": 0.322, - "step": 6558000 - }, - { - "epoch": 3.93, - "learning_rate": 2.850728868046564e-05, - "loss": 0.3241, - "step": 6558500 - }, - { - "epoch": 3.93, - "learning_rate": 2.8505188714905074e-05, - "loss": 0.3192, - "step": 6559000 - }, - { - "epoch": 3.93, - "learning_rate": 2.850308874934451e-05, - "loss": 0.3225, - "step": 6559500 - }, - { - "epoch": 3.93, - "learning_rate": 2.8500992983715064e-05, - "loss": 0.3212, - "step": 6560000 - }, - { - "epoch": 3.93, - "learning_rate": 2.84988930181545e-05, - "loss": 0.3263, - "step": 6560500 - }, - { - "epoch": 3.93, - "learning_rate": 2.8496793052593938e-05, - "loss": 0.3196, - "step": 6561000 - }, - { - "epoch": 3.93, - "learning_rate": 2.849469728696449e-05, - "loss": 0.3201, - "step": 6561500 - }, - { - "epoch": 3.93, - "learning_rate": 2.849259732140393e-05, - "loss": 0.3202, - "step": 6562000 - }, - { - "epoch": 3.93, - "learning_rate": 2.8490497355843362e-05, - "loss": 0.3314, - "step": 6562500 - }, - { - "epoch": 3.93, - "learning_rate": 2.84883973902828e-05, - "loss": 0.3203, - "step": 6563000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8486301624653352e-05, - "loss": 0.3294, - "step": 6563500 - }, - { - "epoch": 3.94, - "learning_rate": 2.848420165909279e-05, - "loss": 0.3231, - "step": 6564000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8482101693532223e-05, - "loss": 0.3182, - "step": 6564500 - }, - { - "epoch": 3.94, - "learning_rate": 2.848000172797166e-05, - "loss": 0.319, - "step": 6565000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8477905962342213e-05, - "loss": 0.3192, - "step": 6565500 - }, - { - "epoch": 3.94, - "learning_rate": 2.847580599678165e-05, - "loss": 0.3229, - "step": 6566000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8473706031221087e-05, - "loss": 0.3215, - "step": 6566500 - }, - { - "epoch": 3.94, - "learning_rate": 2.847160606566052e-05, - "loss": 0.3243, - "step": 6567000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8469506100099957e-05, - "loss": 0.3207, - "step": 6567500 - }, - { - "epoch": 3.94, - "learning_rate": 2.8467406134539394e-05, - "loss": 0.3214, - "step": 6568000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8465306168978827e-05, - "loss": 0.3235, - "step": 6568500 - }, - { - "epoch": 3.94, - "learning_rate": 2.8463206203418264e-05, - "loss": 0.3271, - "step": 6569000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8461110437788818e-05, - "loss": 0.3289, - "step": 6569500 - }, - { - "epoch": 3.94, - "learning_rate": 2.8459010472228255e-05, - "loss": 0.3252, - "step": 6570000 - }, - { - "epoch": 3.94, - "learning_rate": 2.845691050666769e-05, - "loss": 0.3196, - "step": 6570500 - }, - { - "epoch": 3.94, - "learning_rate": 2.8454810541107125e-05, - "loss": 0.3207, - "step": 6571000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8452710575546562e-05, - "loss": 0.3186, - "step": 6571500 - }, - { - "epoch": 3.94, - "learning_rate": 2.8450610609986e-05, - "loss": 0.3331, - "step": 6572000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8448514844356552e-05, - "loss": 0.3322, - "step": 6572500 - }, - { - "epoch": 3.94, - "learning_rate": 2.844641487879599e-05, - "loss": 0.3262, - "step": 6573000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8444314913235423e-05, - "loss": 0.3143, - "step": 6573500 - }, - { - "epoch": 3.94, - "learning_rate": 2.844221494767486e-05, - "loss": 0.3179, - "step": 6574000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8440114982114296e-05, - "loss": 0.3263, - "step": 6574500 - }, - { - "epoch": 3.94, - "learning_rate": 2.843801921648485e-05, - "loss": 0.336, - "step": 6575000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8435919250924283e-05, - "loss": 0.3178, - "step": 6575500 - }, - { - "epoch": 3.94, - "learning_rate": 2.843381928536372e-05, - "loss": 0.3174, - "step": 6576000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8431719319803157e-05, - "loss": 0.3281, - "step": 6576500 - }, - { - "epoch": 3.94, - "learning_rate": 2.842961935424259e-05, - "loss": 0.3267, - "step": 6577000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8427519388682027e-05, - "loss": 0.3249, - "step": 6577500 - }, - { - "epoch": 3.94, - "learning_rate": 2.842542362305258e-05, - "loss": 0.3215, - "step": 6578000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8423323657492018e-05, - "loss": 0.3224, - "step": 6578500 - }, - { - "epoch": 3.94, - "learning_rate": 2.8421223691931455e-05, - "loss": 0.3174, - "step": 6579000 - }, - { - "epoch": 3.94, - "learning_rate": 2.8419123726370888e-05, - "loss": 0.3208, - "step": 6579500 - }, - { - "epoch": 3.94, - "learning_rate": 2.8417027960741445e-05, - "loss": 0.3195, - "step": 6580000 - }, - { - "epoch": 3.95, - "learning_rate": 2.841492799518088e-05, - "loss": 0.3281, - "step": 6580500 - }, - { - "epoch": 3.95, - "learning_rate": 2.8412828029620315e-05, - "loss": 0.3222, - "step": 6581000 - }, - { - "epoch": 3.95, - "learning_rate": 2.8410728064059752e-05, - "loss": 0.3174, - "step": 6581500 - }, - { - "epoch": 3.95, - "learning_rate": 2.8408628098499186e-05, - "loss": 0.3268, - "step": 6582000 - }, - { - "epoch": 3.95, - "learning_rate": 2.8406528132938622e-05, - "loss": 0.3191, - "step": 6582500 - }, - { - "epoch": 3.95, - "learning_rate": 2.8404428167378053e-05, - "loss": 0.3236, - "step": 6583000 - }, - { - "epoch": 3.95, - "learning_rate": 2.8402328201817486e-05, - "loss": 0.3284, - "step": 6583500 - }, - { - "epoch": 3.95, - "learning_rate": 2.8400228236256923e-05, - "loss": 0.3155, - "step": 6584000 - }, - { - "epoch": 3.95, - "learning_rate": 2.839812827069636e-05, - "loss": 0.3314, - "step": 6584500 - }, - { - "epoch": 3.95, - "learning_rate": 2.839603250506692e-05, - "loss": 0.3232, - "step": 6585000 - }, - { - "epoch": 3.95, - "learning_rate": 2.839393253950635e-05, - "loss": 0.3173, - "step": 6585500 - }, - { - "epoch": 3.95, - "learning_rate": 2.8391832573945784e-05, - "loss": 0.3223, - "step": 6586000 - }, - { - "epoch": 3.95, - "learning_rate": 2.838973260838522e-05, - "loss": 0.3169, - "step": 6586500 - }, - { - "epoch": 3.95, - "learning_rate": 2.8387632642824657e-05, - "loss": 0.3248, - "step": 6587000 - }, - { - "epoch": 3.95, - "learning_rate": 2.838553267726409e-05, - "loss": 0.314, - "step": 6587500 - }, - { - "epoch": 3.95, - "learning_rate": 2.8383432711703528e-05, - "loss": 0.3195, - "step": 6588000 - }, - { - "epoch": 3.95, - "learning_rate": 2.8381332746142964e-05, - "loss": 0.3207, - "step": 6588500 - }, - { - "epoch": 3.95, - "learning_rate": 2.8379236980513518e-05, - "loss": 0.3288, - "step": 6589000 - }, - { - "epoch": 3.95, - "learning_rate": 2.837714121488408e-05, - "loss": 0.3197, - "step": 6589500 - }, - { - "epoch": 3.95, - "learning_rate": 2.837504124932351e-05, - "loss": 0.3186, - "step": 6590000 - }, - { - "epoch": 3.95, - "learning_rate": 2.8372941283762942e-05, - "loss": 0.3172, - "step": 6590500 - }, - { - "epoch": 3.95, - "learning_rate": 2.837084131820238e-05, - "loss": 0.3205, - "step": 6591000 - }, - { - "epoch": 3.95, - "learning_rate": 2.836874555257294e-05, - "loss": 0.3185, - "step": 6591500 - }, - { - "epoch": 3.95, - "learning_rate": 2.8366645587012376e-05, - "loss": 0.3153, - "step": 6592000 - }, - { - "epoch": 3.95, - "learning_rate": 2.8364545621451806e-05, - "loss": 0.323, - "step": 6592500 - }, - { - "epoch": 3.95, - "learning_rate": 2.836244565589124e-05, - "loss": 0.3237, - "step": 6593000 - }, - { - "epoch": 3.95, - "learning_rate": 2.83603498902618e-05, - "loss": 0.3279, - "step": 6593500 - }, - { - "epoch": 3.95, - "learning_rate": 2.8358249924701237e-05, - "loss": 0.3248, - "step": 6594000 - }, - { - "epoch": 3.95, - "learning_rate": 2.8356149959140673e-05, - "loss": 0.3224, - "step": 6594500 - }, - { - "epoch": 3.95, - "learning_rate": 2.8354049993580104e-05, - "loss": 0.3273, - "step": 6595000 - }, - { - "epoch": 3.95, - "learning_rate": 2.8351950028019537e-05, - "loss": 0.323, - "step": 6595500 - }, - { - "epoch": 3.95, - "learning_rate": 2.8349854262390097e-05, - "loss": 0.3336, - "step": 6596000 - }, - { - "epoch": 3.95, - "learning_rate": 2.8347754296829534e-05, - "loss": 0.3156, - "step": 6596500 - }, - { - "epoch": 3.96, - "learning_rate": 2.834565433126897e-05, - "loss": 0.3118, - "step": 6597000 - }, - { - "epoch": 3.96, - "learning_rate": 2.8343554365708398e-05, - "loss": 0.3248, - "step": 6597500 - }, - { - "epoch": 3.96, - "learning_rate": 2.8341454400147835e-05, - "loss": 0.3165, - "step": 6598000 - }, - { - "epoch": 3.96, - "learning_rate": 2.833935443458727e-05, - "loss": 0.3235, - "step": 6598500 - }, - { - "epoch": 3.96, - "learning_rate": 2.8337254469026705e-05, - "loss": 0.325, - "step": 6599000 - }, - { - "epoch": 3.96, - "learning_rate": 2.833515870339727e-05, - "loss": 0.3155, - "step": 6599500 - }, - { - "epoch": 3.96, - "learning_rate": 2.8333058737836695e-05, - "loss": 0.3181, - "step": 6600000 - }, - { - "epoch": 3.96, - "eval_loss": 0.31399548053741455, - "eval_runtime": 1119.5492, - "eval_samples_per_second": 470.475, - "eval_steps_per_second": 78.413, - "step": 6600000 - }, - { - "epoch": 3.96, - "learning_rate": 2.8330958772276132e-05, - "loss": 0.3168, - "step": 6600500 - }, - { - "epoch": 3.96, - "learning_rate": 2.832885880671557e-05, - "loss": 0.328, - "step": 6601000 - }, - { - "epoch": 3.96, - "learning_rate": 2.8326758841155002e-05, - "loss": 0.323, - "step": 6601500 - }, - { - "epoch": 3.96, - "learning_rate": 2.832465887559444e-05, - "loss": 0.32, - "step": 6602000 - }, - { - "epoch": 3.96, - "learning_rate": 2.8322558910033876e-05, - "loss": 0.3144, - "step": 6602500 - }, - { - "epoch": 3.96, - "learning_rate": 2.832045894447331e-05, - "loss": 0.314, - "step": 6603000 - }, - { - "epoch": 3.96, - "learning_rate": 2.8318363178843867e-05, - "loss": 0.3381, - "step": 6603500 - }, - { - "epoch": 3.96, - "learning_rate": 2.83162632132833e-05, - "loss": 0.3171, - "step": 6604000 - }, - { - "epoch": 3.96, - "learning_rate": 2.8314163247722737e-05, - "loss": 0.3162, - "step": 6604500 - }, - { - "epoch": 3.96, - "learning_rate": 2.8312063282162174e-05, - "loss": 0.3255, - "step": 6605000 - }, - { - "epoch": 3.96, - "learning_rate": 2.8309963316601607e-05, - "loss": 0.3112, - "step": 6605500 - }, - { - "epoch": 3.96, - "learning_rate": 2.8307863351041044e-05, - "loss": 0.3267, - "step": 6606000 - }, - { - "epoch": 3.96, - "learning_rate": 2.830576338548048e-05, - "loss": 0.3268, - "step": 6606500 - }, - { - "epoch": 3.96, - "learning_rate": 2.8303667619851034e-05, - "loss": 0.3202, - "step": 6607000 - }, - { - "epoch": 3.96, - "learning_rate": 2.830156765429047e-05, - "loss": 0.3143, - "step": 6607500 - }, - { - "epoch": 3.96, - "learning_rate": 2.8299467688729905e-05, - "loss": 0.3307, - "step": 6608000 - }, - { - "epoch": 3.96, - "learning_rate": 2.829736772316934e-05, - "loss": 0.3174, - "step": 6608500 - }, - { - "epoch": 3.96, - "learning_rate": 2.829526775760878e-05, - "loss": 0.3262, - "step": 6609000 - }, - { - "epoch": 3.96, - "learning_rate": 2.8293167792048212e-05, - "loss": 0.3232, - "step": 6609500 - }, - { - "epoch": 3.96, - "learning_rate": 2.8291067826487642e-05, - "loss": 0.3149, - "step": 6610000 - }, - { - "epoch": 3.96, - "learning_rate": 2.828896786092708e-05, - "loss": 0.3183, - "step": 6610500 - }, - { - "epoch": 3.96, - "learning_rate": 2.828687209529764e-05, - "loss": 0.322, - "step": 6611000 - }, - { - "epoch": 3.96, - "learning_rate": 2.8284776329668193e-05, - "loss": 0.3332, - "step": 6611500 - }, - { - "epoch": 3.96, - "learning_rate": 2.828267636410763e-05, - "loss": 0.3213, - "step": 6612000 - }, - { - "epoch": 3.96, - "learning_rate": 2.8280576398547063e-05, - "loss": 0.3327, - "step": 6612500 - }, - { - "epoch": 3.96, - "learning_rate": 2.82784764329865e-05, - "loss": 0.3257, - "step": 6613000 - }, - { - "epoch": 3.97, - "learning_rate": 2.8276376467425937e-05, - "loss": 0.3355, - "step": 6613500 - }, - { - "epoch": 3.97, - "learning_rate": 2.827427650186537e-05, - "loss": 0.3246, - "step": 6614000 - }, - { - "epoch": 3.97, - "learning_rate": 2.8272180736235927e-05, - "loss": 0.3286, - "step": 6614500 - }, - { - "epoch": 3.97, - "learning_rate": 2.827008077067536e-05, - "loss": 0.3164, - "step": 6615000 - }, - { - "epoch": 3.97, - "learning_rate": 2.8267980805114797e-05, - "loss": 0.3227, - "step": 6615500 - }, - { - "epoch": 3.97, - "learning_rate": 2.8265880839554234e-05, - "loss": 0.332, - "step": 6616000 - }, - { - "epoch": 3.97, - "learning_rate": 2.8263780873993668e-05, - "loss": 0.3228, - "step": 6616500 - }, - { - "epoch": 3.97, - "learning_rate": 2.826168510836422e-05, - "loss": 0.3196, - "step": 6617000 - }, - { - "epoch": 3.97, - "learning_rate": 2.8259585142803658e-05, - "loss": 0.3254, - "step": 6617500 - }, - { - "epoch": 3.97, - "learning_rate": 2.8257485177243095e-05, - "loss": 0.3201, - "step": 6618000 - }, - { - "epoch": 3.97, - "learning_rate": 2.825538521168253e-05, - "loss": 0.3252, - "step": 6618500 - }, - { - "epoch": 3.97, - "learning_rate": 2.8253289446053085e-05, - "loss": 0.3261, - "step": 6619000 - }, - { - "epoch": 3.97, - "learning_rate": 2.825118948049252e-05, - "loss": 0.3235, - "step": 6619500 - }, - { - "epoch": 3.97, - "learning_rate": 2.8249089514931956e-05, - "loss": 0.326, - "step": 6620000 - }, - { - "epoch": 3.97, - "learning_rate": 2.8246989549371393e-05, - "loss": 0.3288, - "step": 6620500 - }, - { - "epoch": 3.97, - "learning_rate": 2.8244889583810826e-05, - "loss": 0.3229, - "step": 6621000 - }, - { - "epoch": 3.97, - "learning_rate": 2.8242789618250263e-05, - "loss": 0.3182, - "step": 6621500 - }, - { - "epoch": 3.97, - "learning_rate": 2.8240693852620817e-05, - "loss": 0.3219, - "step": 6622000 - }, - { - "epoch": 3.97, - "learning_rate": 2.8238593887060253e-05, - "loss": 0.324, - "step": 6622500 - }, - { - "epoch": 3.97, - "learning_rate": 2.823649392149969e-05, - "loss": 0.3119, - "step": 6623000 - }, - { - "epoch": 3.97, - "learning_rate": 2.8234393955939124e-05, - "loss": 0.321, - "step": 6623500 - }, - { - "epoch": 3.97, - "learning_rate": 2.823229399037856e-05, - "loss": 0.3215, - "step": 6624000 - }, - { - "epoch": 3.97, - "learning_rate": 2.8230198224749114e-05, - "loss": 0.3206, - "step": 6624500 - }, - { - "epoch": 3.97, - "learning_rate": 2.822809825918855e-05, - "loss": 0.3338, - "step": 6625000 - }, - { - "epoch": 3.97, - "learning_rate": 2.8225998293627988e-05, - "loss": 0.3242, - "step": 6625500 - }, - { - "epoch": 3.97, - "learning_rate": 2.822389832806742e-05, - "loss": 0.3238, - "step": 6626000 - }, - { - "epoch": 3.97, - "learning_rate": 2.8221802562437975e-05, - "loss": 0.3195, - "step": 6626500 - }, - { - "epoch": 3.97, - "learning_rate": 2.821970259687741e-05, - "loss": 0.3226, - "step": 6627000 - }, - { - "epoch": 3.97, - "learning_rate": 2.821760263131685e-05, - "loss": 0.3264, - "step": 6627500 - }, - { - "epoch": 3.97, - "learning_rate": 2.8215502665756282e-05, - "loss": 0.3266, - "step": 6628000 - }, - { - "epoch": 3.97, - "learning_rate": 2.821340270019572e-05, - "loss": 0.3149, - "step": 6628500 - }, - { - "epoch": 3.97, - "learning_rate": 2.8211306934566272e-05, - "loss": 0.3159, - "step": 6629000 - }, - { - "epoch": 3.97, - "learning_rate": 2.820920696900571e-05, - "loss": 0.328, - "step": 6629500 - }, - { - "epoch": 3.97, - "learning_rate": 2.8207107003445146e-05, - "loss": 0.3286, - "step": 6630000 - }, - { - "epoch": 3.98, - "learning_rate": 2.820500703788458e-05, - "loss": 0.3197, - "step": 6630500 - }, - { - "epoch": 3.98, - "learning_rate": 2.8202907072324016e-05, - "loss": 0.3208, - "step": 6631000 - }, - { - "epoch": 3.98, - "learning_rate": 2.8200807106763446e-05, - "loss": 0.3205, - "step": 6631500 - }, - { - "epoch": 3.98, - "learning_rate": 2.819870714120288e-05, - "loss": 0.3087, - "step": 6632000 - }, - { - "epoch": 3.98, - "learning_rate": 2.8196607175642317e-05, - "loss": 0.3199, - "step": 6632500 - }, - { - "epoch": 3.98, - "learning_rate": 2.8194511410012877e-05, - "loss": 0.3247, - "step": 6633000 - }, - { - "epoch": 3.98, - "learning_rate": 2.8192411444452314e-05, - "loss": 0.3291, - "step": 6633500 - }, - { - "epoch": 3.98, - "learning_rate": 2.8190311478891744e-05, - "loss": 0.3175, - "step": 6634000 - }, - { - "epoch": 3.98, - "learning_rate": 2.8188211513331177e-05, - "loss": 0.328, - "step": 6634500 - }, - { - "epoch": 3.98, - "learning_rate": 2.8186115747701738e-05, - "loss": 0.3141, - "step": 6635000 - }, - { - "epoch": 3.98, - "learning_rate": 2.8184019982072295e-05, - "loss": 0.324, - "step": 6635500 - }, - { - "epoch": 3.98, - "learning_rate": 2.8181920016511728e-05, - "loss": 0.3386, - "step": 6636000 - }, - { - "epoch": 3.98, - "learning_rate": 2.8179820050951165e-05, - "loss": 0.3227, - "step": 6636500 - }, - { - "epoch": 3.98, - "learning_rate": 2.8177720085390602e-05, - "loss": 0.3199, - "step": 6637000 - }, - { - "epoch": 3.98, - "learning_rate": 2.8175620119830035e-05, - "loss": 0.3198, - "step": 6637500 - }, - { - "epoch": 3.98, - "learning_rate": 2.8173520154269472e-05, - "loss": 0.3118, - "step": 6638000 - }, - { - "epoch": 3.98, - "learning_rate": 2.8171424388640026e-05, - "loss": 0.3237, - "step": 6638500 - }, - { - "epoch": 3.98, - "learning_rate": 2.8169324423079463e-05, - "loss": 0.3258, - "step": 6639000 - }, - { - "epoch": 3.98, - "learning_rate": 2.81672244575189e-05, - "loss": 0.3245, - "step": 6639500 - }, - { - "epoch": 3.98, - "learning_rate": 2.8165124491958333e-05, - "loss": 0.3266, - "step": 6640000 - }, - { - "epoch": 3.98, - "learning_rate": 2.816302452639777e-05, - "loss": 0.3271, - "step": 6640500 - }, - { - "epoch": 3.98, - "learning_rate": 2.81609245608372e-05, - "loss": 0.3254, - "step": 6641000 - }, - { - "epoch": 3.98, - "learning_rate": 2.815882879520776e-05, - "loss": 0.3184, - "step": 6641500 - }, - { - "epoch": 3.98, - "learning_rate": 2.8156728829647194e-05, - "loss": 0.3194, - "step": 6642000 - }, - { - "epoch": 3.98, - "learning_rate": 2.815463306401775e-05, - "loss": 0.327, - "step": 6642500 - }, - { - "epoch": 3.98, - "learning_rate": 2.8152533098457184e-05, - "loss": 0.3209, - "step": 6643000 - }, - { - "epoch": 3.98, - "learning_rate": 2.815043313289662e-05, - "loss": 0.3261, - "step": 6643500 - }, - { - "epoch": 3.98, - "learning_rate": 2.8148333167336058e-05, - "loss": 0.3163, - "step": 6644000 - }, - { - "epoch": 3.98, - "learning_rate": 2.814623320177549e-05, - "loss": 0.3221, - "step": 6644500 - }, - { - "epoch": 3.98, - "learning_rate": 2.8144133236214928e-05, - "loss": 0.3253, - "step": 6645000 - }, - { - "epoch": 3.98, - "learning_rate": 2.8142033270654365e-05, - "loss": 0.3233, - "step": 6645500 - }, - { - "epoch": 3.98, - "learning_rate": 2.8139933305093795e-05, - "loss": 0.323, - "step": 6646000 - }, - { - "epoch": 3.98, - "learning_rate": 2.813783333953323e-05, - "loss": 0.3293, - "step": 6646500 - }, - { - "epoch": 3.99, - "learning_rate": 2.8135733373972665e-05, - "loss": 0.32, - "step": 6647000 - }, - { - "epoch": 3.99, - "learning_rate": 2.8133637608343226e-05, - "loss": 0.3217, - "step": 6647500 - }, - { - "epoch": 3.99, - "learning_rate": 2.8131537642782656e-05, - "loss": 0.3196, - "step": 6648000 - }, - { - "epoch": 3.99, - "learning_rate": 2.812943767722209e-05, - "loss": 0.3205, - "step": 6648500 - }, - { - "epoch": 3.99, - "learning_rate": 2.8127337711661526e-05, - "loss": 0.3253, - "step": 6649000 - }, - { - "epoch": 3.99, - "learning_rate": 2.8125237746100963e-05, - "loss": 0.3238, - "step": 6649500 - }, - { - "epoch": 3.99, - "learning_rate": 2.8123137780540396e-05, - "loss": 0.3273, - "step": 6650000 - }, - { - "epoch": 3.99, - "learning_rate": 2.8121037814979833e-05, - "loss": 0.332, - "step": 6650500 - }, - { - "epoch": 3.99, - "learning_rate": 2.811893784941927e-05, - "loss": 0.3171, - "step": 6651000 - }, - { - "epoch": 3.99, - "learning_rate": 2.8116842083789824e-05, - "loss": 0.3237, - "step": 6651500 - }, - { - "epoch": 3.99, - "learning_rate": 2.811474211822926e-05, - "loss": 0.319, - "step": 6652000 - }, - { - "epoch": 3.99, - "learning_rate": 2.811264635259982e-05, - "loss": 0.3178, - "step": 6652500 - }, - { - "epoch": 3.99, - "learning_rate": 2.811054638703925e-05, - "loss": 0.3209, - "step": 6653000 - }, - { - "epoch": 3.99, - "learning_rate": 2.8108446421478684e-05, - "loss": 0.3219, - "step": 6653500 - }, - { - "epoch": 3.99, - "learning_rate": 2.810634645591812e-05, - "loss": 0.3173, - "step": 6654000 - }, - { - "epoch": 3.99, - "learning_rate": 2.8104246490357558e-05, - "loss": 0.3214, - "step": 6654500 - }, - { - "epoch": 3.99, - "learning_rate": 2.810214652479699e-05, - "loss": 0.3256, - "step": 6655000 - }, - { - "epoch": 3.99, - "learning_rate": 2.810004655923643e-05, - "loss": 0.3307, - "step": 6655500 - }, - { - "epoch": 3.99, - "learning_rate": 2.8097950793606982e-05, - "loss": 0.3216, - "step": 6656000 - }, - { - "epoch": 3.99, - "learning_rate": 2.809585082804642e-05, - "loss": 0.3254, - "step": 6656500 - }, - { - "epoch": 3.99, - "learning_rate": 2.8093750862485852e-05, - "loss": 0.3181, - "step": 6657000 - }, - { - "epoch": 3.99, - "learning_rate": 2.809165509685641e-05, - "loss": 0.3293, - "step": 6657500 - }, - { - "epoch": 3.99, - "learning_rate": 2.8089555131295843e-05, - "loss": 0.3204, - "step": 6658000 - }, - { - "epoch": 3.99, - "learning_rate": 2.808745516573528e-05, - "loss": 0.3264, - "step": 6658500 - }, - { - "epoch": 3.99, - "learning_rate": 2.8085355200174716e-05, - "loss": 0.3218, - "step": 6659000 - }, - { - "epoch": 3.99, - "learning_rate": 2.808325523461415e-05, - "loss": 0.3235, - "step": 6659500 - }, - { - "epoch": 3.99, - "learning_rate": 2.8081155269053587e-05, - "loss": 0.3153, - "step": 6660000 - }, - { - "epoch": 3.99, - "learning_rate": 2.8079055303493024e-05, - "loss": 0.324, - "step": 6660500 - }, - { - "epoch": 3.99, - "learning_rate": 2.8076955337932457e-05, - "loss": 0.3292, - "step": 6661000 - }, - { - "epoch": 3.99, - "learning_rate": 2.8074855372371894e-05, - "loss": 0.3186, - "step": 6661500 - }, - { - "epoch": 3.99, - "learning_rate": 2.8072759606742447e-05, - "loss": 0.3326, - "step": 6662000 - }, - { - "epoch": 3.99, - "learning_rate": 2.8070659641181884e-05, - "loss": 0.3213, - "step": 6662500 - }, - { - "epoch": 3.99, - "learning_rate": 2.806855967562132e-05, - "loss": 0.3289, - "step": 6663000 - }, - { - "epoch": 4.0, - "learning_rate": 2.8066459710060755e-05, - "loss": 0.3189, - "step": 6663500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8064363944431308e-05, - "loss": 0.3291, - "step": 6664000 - }, - { - "epoch": 4.0, - "learning_rate": 2.8062263978870745e-05, - "loss": 0.3134, - "step": 6664500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8060164013310182e-05, - "loss": 0.3191, - "step": 6665000 - }, - { - "epoch": 4.0, - "learning_rate": 2.805806404774962e-05, - "loss": 0.3207, - "step": 6665500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8055964082189052e-05, - "loss": 0.319, - "step": 6666000 - }, - { - "epoch": 4.0, - "learning_rate": 2.805386411662849e-05, - "loss": 0.3175, - "step": 6666500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8051764151067926e-05, - "loss": 0.3261, - "step": 6667000 - }, - { - "epoch": 4.0, - "learning_rate": 2.804966418550736e-05, - "loss": 0.3266, - "step": 6667500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8047568419877913e-05, - "loss": 0.321, - "step": 6668000 - }, - { - "epoch": 4.0, - "learning_rate": 2.804547265424847e-05, - "loss": 0.3186, - "step": 6668500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8043372688687903e-05, - "loss": 0.3245, - "step": 6669000 - }, - { - "epoch": 4.0, - "learning_rate": 2.804127272312734e-05, - "loss": 0.3264, - "step": 6669500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8039172757566777e-05, - "loss": 0.3144, - "step": 6670000 - }, - { - "epoch": 4.0, - "learning_rate": 2.803707279200621e-05, - "loss": 0.3249, - "step": 6670500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8034972826445647e-05, - "loss": 0.324, - "step": 6671000 - }, - { - "epoch": 4.0, - "learning_rate": 2.8032872860885084e-05, - "loss": 0.3276, - "step": 6671500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8030777095255638e-05, - "loss": 0.3202, - "step": 6672000 - }, - { - "epoch": 4.0, - "learning_rate": 2.8028677129695075e-05, - "loss": 0.3074, - "step": 6672500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8026577164134508e-05, - "loss": 0.3187, - "step": 6673000 - }, - { - "epoch": 4.0, - "learning_rate": 2.8024477198573945e-05, - "loss": 0.3096, - "step": 6673500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8022377233013382e-05, - "loss": 0.2962, - "step": 6674000 - }, - { - "epoch": 4.0, - "learning_rate": 2.8020277267452815e-05, - "loss": 0.3069, - "step": 6674500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8018177301892245e-05, - "loss": 0.308, - "step": 6675000 - }, - { - "epoch": 4.0, - "learning_rate": 2.8016077336331682e-05, - "loss": 0.3094, - "step": 6675500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8013981570702242e-05, - "loss": 0.3082, - "step": 6676000 - }, - { - "epoch": 4.0, - "learning_rate": 2.8011881605141676e-05, - "loss": 0.3166, - "step": 6676500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8009781639581113e-05, - "loss": 0.3155, - "step": 6677000 - }, - { - "epoch": 4.0, - "learning_rate": 2.8007685873951666e-05, - "loss": 0.3106, - "step": 6677500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8005585908391103e-05, - "loss": 0.3053, - "step": 6678000 - }, - { - "epoch": 4.0, - "learning_rate": 2.800348594283054e-05, - "loss": 0.3088, - "step": 6678500 - }, - { - "epoch": 4.0, - "learning_rate": 2.8001385977269973e-05, - "loss": 0.3149, - "step": 6679000 - }, - { - "epoch": 4.0, - "learning_rate": 2.7999286011709407e-05, - "loss": 0.3036, - "step": 6679500 - }, - { - "epoch": 4.0, - "learning_rate": 2.7997186046148844e-05, - "loss": 0.3146, - "step": 6680000 - }, - { - "epoch": 4.01, - "learning_rate": 2.79950902805194e-05, - "loss": 0.3182, - "step": 6680500 - }, - { - "epoch": 4.01, - "learning_rate": 2.7992990314958834e-05, - "loss": 0.3098, - "step": 6681000 - }, - { - "epoch": 4.01, - "learning_rate": 2.7990890349398268e-05, - "loss": 0.317, - "step": 6681500 - }, - { - "epoch": 4.01, - "learning_rate": 2.7988790383837705e-05, - "loss": 0.318, - "step": 6682000 - }, - { - "epoch": 4.01, - "learning_rate": 2.798669041827714e-05, - "loss": 0.3253, - "step": 6682500 - }, - { - "epoch": 4.01, - "learning_rate": 2.7984590452716575e-05, - "loss": 0.313, - "step": 6683000 - }, - { - "epoch": 4.01, - "learning_rate": 2.7982494687087132e-05, - "loss": 0.299, - "step": 6683500 - }, - { - "epoch": 4.01, - "learning_rate": 2.7980394721526565e-05, - "loss": 0.316, - "step": 6684000 - }, - { - "epoch": 4.01, - "learning_rate": 2.7978294755966002e-05, - "loss": 0.3121, - "step": 6684500 - }, - { - "epoch": 4.01, - "learning_rate": 2.797619479040544e-05, - "loss": 0.3031, - "step": 6685000 - }, - { - "epoch": 4.01, - "learning_rate": 2.7974099024775993e-05, - "loss": 0.3076, - "step": 6685500 - }, - { - "epoch": 4.01, - "learning_rate": 2.7971999059215426e-05, - "loss": 0.3103, - "step": 6686000 - }, - { - "epoch": 4.01, - "learning_rate": 2.7969899093654863e-05, - "loss": 0.3101, - "step": 6686500 - }, - { - "epoch": 4.01, - "learning_rate": 2.79677991280943e-05, - "loss": 0.3209, - "step": 6687000 - }, - { - "epoch": 4.01, - "learning_rate": 2.7965703362464857e-05, - "loss": 0.3154, - "step": 6687500 - }, - { - "epoch": 4.01, - "learning_rate": 2.796360339690429e-05, - "loss": 0.3162, - "step": 6688000 - }, - { - "epoch": 4.01, - "learning_rate": 2.7961503431343724e-05, - "loss": 0.3099, - "step": 6688500 - }, - { - "epoch": 4.01, - "learning_rate": 2.795940346578316e-05, - "loss": 0.3082, - "step": 6689000 - }, - { - "epoch": 4.01, - "learning_rate": 2.7957307700153717e-05, - "loss": 0.3137, - "step": 6689500 - }, - { - "epoch": 4.01, - "learning_rate": 2.7955207734593154e-05, - "loss": 0.3107, - "step": 6690000 - }, - { - "epoch": 4.01, - "learning_rate": 2.7953107769032588e-05, - "loss": 0.3116, - "step": 6690500 - }, - { - "epoch": 4.01, - "learning_rate": 2.795100780347202e-05, - "loss": 0.311, - "step": 6691000 - }, - { - "epoch": 4.01, - "learning_rate": 2.7948907837911458e-05, - "loss": 0.3088, - "step": 6691500 - }, - { - "epoch": 4.01, - "learning_rate": 2.7946807872350895e-05, - "loss": 0.3024, - "step": 6692000 - }, - { - "epoch": 4.01, - "learning_rate": 2.7944707906790328e-05, - "loss": 0.3025, - "step": 6692500 - }, - { - "epoch": 4.01, - "learning_rate": 2.7942607941229762e-05, - "loss": 0.3122, - "step": 6693000 - }, - { - "epoch": 4.01, - "learning_rate": 2.794051217560032e-05, - "loss": 0.3126, - "step": 6693500 - }, - { - "epoch": 4.01, - "learning_rate": 2.7938412210039756e-05, - "loss": 0.3054, - "step": 6694000 - }, - { - "epoch": 4.01, - "learning_rate": 2.7936312244479192e-05, - "loss": 0.3047, - "step": 6694500 - }, - { - "epoch": 4.01, - "learning_rate": 2.7934212278918626e-05, - "loss": 0.3146, - "step": 6695000 - }, - { - "epoch": 4.01, - "learning_rate": 2.793211231335806e-05, - "loss": 0.3167, - "step": 6695500 - }, - { - "epoch": 4.01, - "learning_rate": 2.7930012347797496e-05, - "loss": 0.3147, - "step": 6696000 - }, - { - "epoch": 4.01, - "learning_rate": 2.792791238223693e-05, - "loss": 0.3097, - "step": 6696500 - }, - { - "epoch": 4.02, - "learning_rate": 2.7925812416676366e-05, - "loss": 0.3096, - "step": 6697000 - }, - { - "epoch": 4.02, - "learning_rate": 2.7923716651046923e-05, - "loss": 0.3048, - "step": 6697500 - }, - { - "epoch": 4.02, - "learning_rate": 2.7921616685486357e-05, - "loss": 0.3108, - "step": 6698000 - }, - { - "epoch": 4.02, - "learning_rate": 2.7919516719925794e-05, - "loss": 0.3069, - "step": 6698500 - }, - { - "epoch": 4.02, - "learning_rate": 2.791742095429635e-05, - "loss": 0.3083, - "step": 6699000 - }, - { - "epoch": 4.02, - "learning_rate": 2.7915320988735784e-05, - "loss": 0.314, - "step": 6699500 - }, - { - "epoch": 4.02, - "learning_rate": 2.791322102317522e-05, - "loss": 0.3049, - "step": 6700000 - }, - { - "epoch": 4.02, - "eval_loss": 0.3142998516559601, - "eval_runtime": 1121.2362, - "eval_samples_per_second": 469.767, - "eval_steps_per_second": 78.295, - "step": 6700000 - }, - { - "epoch": 4.02, - "learning_rate": 2.7911121057614654e-05, - "loss": 0.3054, - "step": 6700500 - }, - { - "epoch": 4.02, - "learning_rate": 2.790902529198521e-05, - "loss": 0.3209, - "step": 6701000 - }, - { - "epoch": 4.02, - "learning_rate": 2.7906925326424648e-05, - "loss": 0.3167, - "step": 6701500 - }, - { - "epoch": 4.02, - "learning_rate": 2.7904825360864082e-05, - "loss": 0.318, - "step": 6702000 - }, - { - "epoch": 4.02, - "learning_rate": 2.7902725395303515e-05, - "loss": 0.3213, - "step": 6702500 - }, - { - "epoch": 4.02, - "learning_rate": 2.7900625429742952e-05, - "loss": 0.3023, - "step": 6703000 - }, - { - "epoch": 4.02, - "learning_rate": 2.7898525464182386e-05, - "loss": 0.303, - "step": 6703500 - }, - { - "epoch": 4.02, - "learning_rate": 2.7896425498621822e-05, - "loss": 0.3184, - "step": 6704000 - }, - { - "epoch": 4.02, - "learning_rate": 2.789432553306126e-05, - "loss": 0.3174, - "step": 6704500 - }, - { - "epoch": 4.02, - "learning_rate": 2.7892229767431813e-05, - "loss": 0.2968, - "step": 6705000 - }, - { - "epoch": 4.02, - "learning_rate": 2.789013400180237e-05, - "loss": 0.3093, - "step": 6705500 - }, - { - "epoch": 4.02, - "learning_rate": 2.7888034036241807e-05, - "loss": 0.321, - "step": 6706000 - }, - { - "epoch": 4.02, - "learning_rate": 2.788593407068124e-05, - "loss": 0.3065, - "step": 6706500 - }, - { - "epoch": 4.02, - "learning_rate": 2.7883834105120677e-05, - "loss": 0.3102, - "step": 6707000 - }, - { - "epoch": 4.02, - "learning_rate": 2.788173413956011e-05, - "loss": 0.3062, - "step": 6707500 - }, - { - "epoch": 4.02, - "learning_rate": 2.7879638373930667e-05, - "loss": 0.3112, - "step": 6708000 - }, - { - "epoch": 4.02, - "learning_rate": 2.7877538408370104e-05, - "loss": 0.3076, - "step": 6708500 - }, - { - "epoch": 4.02, - "learning_rate": 2.7875438442809538e-05, - "loss": 0.324, - "step": 6709000 - }, - { - "epoch": 4.02, - "learning_rate": 2.7873338477248974e-05, - "loss": 0.3004, - "step": 6709500 - }, - { - "epoch": 4.02, - "learning_rate": 2.7871238511688408e-05, - "loss": 0.3176, - "step": 6710000 - }, - { - "epoch": 4.02, - "learning_rate": 2.7869142746058965e-05, - "loss": 0.3044, - "step": 6710500 - }, - { - "epoch": 4.02, - "learning_rate": 2.78670427804984e-05, - "loss": 0.306, - "step": 6711000 - }, - { - "epoch": 4.02, - "learning_rate": 2.7864942814937835e-05, - "loss": 0.3068, - "step": 6711500 - }, - { - "epoch": 4.02, - "learning_rate": 2.7862842849377272e-05, - "loss": 0.302, - "step": 6712000 - }, - { - "epoch": 4.02, - "learning_rate": 2.7860747083747826e-05, - "loss": 0.3104, - "step": 6712500 - }, - { - "epoch": 4.02, - "learning_rate": 2.7858647118187262e-05, - "loss": 0.3126, - "step": 6713000 - }, - { - "epoch": 4.03, - "learning_rate": 2.7856547152626696e-05, - "loss": 0.3097, - "step": 6713500 - }, - { - "epoch": 4.03, - "learning_rate": 2.7854447187066133e-05, - "loss": 0.3178, - "step": 6714000 - }, - { - "epoch": 4.03, - "learning_rate": 2.7852347221505566e-05, - "loss": 0.3134, - "step": 6714500 - }, - { - "epoch": 4.03, - "learning_rate": 2.7850247255945e-05, - "loss": 0.3124, - "step": 6715000 - }, - { - "epoch": 4.03, - "learning_rate": 2.7848147290384437e-05, - "loss": 0.3182, - "step": 6715500 - }, - { - "epoch": 4.03, - "learning_rate": 2.7846051524754994e-05, - "loss": 0.31, - "step": 6716000 - }, - { - "epoch": 4.03, - "learning_rate": 2.7843955759125547e-05, - "loss": 0.3212, - "step": 6716500 - }, - { - "epoch": 4.03, - "learning_rate": 2.7841855793564984e-05, - "loss": 0.3077, - "step": 6717000 - }, - { - "epoch": 4.03, - "learning_rate": 2.783975582800442e-05, - "loss": 0.3068, - "step": 6717500 - }, - { - "epoch": 4.03, - "learning_rate": 2.7837655862443854e-05, - "loss": 0.3132, - "step": 6718000 - }, - { - "epoch": 4.03, - "learning_rate": 2.783555589688329e-05, - "loss": 0.3059, - "step": 6718500 - }, - { - "epoch": 4.03, - "learning_rate": 2.7833455931322728e-05, - "loss": 0.3158, - "step": 6719000 - }, - { - "epoch": 4.03, - "learning_rate": 2.783135596576216e-05, - "loss": 0.3079, - "step": 6719500 - }, - { - "epoch": 4.03, - "learning_rate": 2.7829256000201595e-05, - "loss": 0.3198, - "step": 6720000 - }, - { - "epoch": 4.03, - "learning_rate": 2.782715603464103e-05, - "loss": 0.3184, - "step": 6720500 - }, - { - "epoch": 4.03, - "learning_rate": 2.782506026901159e-05, - "loss": 0.308, - "step": 6721000 - }, - { - "epoch": 4.03, - "learning_rate": 2.7822960303451025e-05, - "loss": 0.318, - "step": 6721500 - }, - { - "epoch": 4.03, - "learning_rate": 2.7820860337890456e-05, - "loss": 0.3096, - "step": 6722000 - }, - { - "epoch": 4.03, - "learning_rate": 2.7818760372329892e-05, - "loss": 0.3106, - "step": 6722500 - }, - { - "epoch": 4.03, - "learning_rate": 2.781666460670045e-05, - "loss": 0.3114, - "step": 6723000 - }, - { - "epoch": 4.03, - "learning_rate": 2.7814564641139886e-05, - "loss": 0.311, - "step": 6723500 - }, - { - "epoch": 4.03, - "learning_rate": 2.781246467557932e-05, - "loss": 0.3131, - "step": 6724000 - }, - { - "epoch": 4.03, - "learning_rate": 2.7810364710018753e-05, - "loss": 0.3059, - "step": 6724500 - }, - { - "epoch": 4.03, - "learning_rate": 2.780826474445819e-05, - "loss": 0.3043, - "step": 6725000 - }, - { - "epoch": 4.03, - "learning_rate": 2.7806164778897627e-05, - "loss": 0.306, - "step": 6725500 - }, - { - "epoch": 4.03, - "learning_rate": 2.7804069013268184e-05, - "loss": 0.3146, - "step": 6726000 - }, - { - "epoch": 4.03, - "learning_rate": 2.7801969047707617e-05, - "loss": 0.3198, - "step": 6726500 - }, - { - "epoch": 4.03, - "learning_rate": 2.779986908214705e-05, - "loss": 0.3089, - "step": 6727000 - }, - { - "epoch": 4.03, - "learning_rate": 2.7797769116586488e-05, - "loss": 0.3031, - "step": 6727500 - }, - { - "epoch": 4.03, - "learning_rate": 2.7795669151025924e-05, - "loss": 0.3142, - "step": 6728000 - }, - { - "epoch": 4.03, - "learning_rate": 2.779357338539648e-05, - "loss": 0.3136, - "step": 6728500 - }, - { - "epoch": 4.03, - "learning_rate": 2.779147341983591e-05, - "loss": 0.3118, - "step": 6729000 - }, - { - "epoch": 4.03, - "learning_rate": 2.7789373454275348e-05, - "loss": 0.3124, - "step": 6729500 - }, - { - "epoch": 4.03, - "learning_rate": 2.7787273488714785e-05, - "loss": 0.3181, - "step": 6730000 - }, - { - "epoch": 4.04, - "learning_rate": 2.7785177723085342e-05, - "loss": 0.3226, - "step": 6730500 - }, - { - "epoch": 4.04, - "learning_rate": 2.778307775752478e-05, - "loss": 0.3089, - "step": 6731000 - }, - { - "epoch": 4.04, - "learning_rate": 2.778097779196421e-05, - "loss": 0.3088, - "step": 6731500 - }, - { - "epoch": 4.04, - "learning_rate": 2.7778877826403646e-05, - "loss": 0.3071, - "step": 6732000 - }, - { - "epoch": 4.04, - "learning_rate": 2.7776777860843083e-05, - "loss": 0.3139, - "step": 6732500 - }, - { - "epoch": 4.04, - "learning_rate": 2.7774677895282516e-05, - "loss": 0.3048, - "step": 6733000 - }, - { - "epoch": 4.04, - "learning_rate": 2.7772577929721953e-05, - "loss": 0.3079, - "step": 6733500 - }, - { - "epoch": 4.04, - "learning_rate": 2.7770477964161386e-05, - "loss": 0.3144, - "step": 6734000 - }, - { - "epoch": 4.04, - "learning_rate": 2.7768382198531943e-05, - "loss": 0.3149, - "step": 6734500 - }, - { - "epoch": 4.04, - "learning_rate": 2.776628223297138e-05, - "loss": 0.3177, - "step": 6735000 - }, - { - "epoch": 4.04, - "learning_rate": 2.7764182267410814e-05, - "loss": 0.317, - "step": 6735500 - }, - { - "epoch": 4.04, - "learning_rate": 2.776208230185025e-05, - "loss": 0.3256, - "step": 6736000 - }, - { - "epoch": 4.04, - "learning_rate": 2.7759982336289684e-05, - "loss": 0.305, - "step": 6736500 - }, - { - "epoch": 4.04, - "learning_rate": 2.775788657066024e-05, - "loss": 0.3143, - "step": 6737000 - }, - { - "epoch": 4.04, - "learning_rate": 2.7755786605099678e-05, - "loss": 0.3098, - "step": 6737500 - }, - { - "epoch": 4.04, - "learning_rate": 2.775368663953911e-05, - "loss": 0.3215, - "step": 6738000 - }, - { - "epoch": 4.04, - "learning_rate": 2.7751586673978548e-05, - "loss": 0.3093, - "step": 6738500 - }, - { - "epoch": 4.04, - "learning_rate": 2.7749490908349102e-05, - "loss": 0.3057, - "step": 6739000 - }, - { - "epoch": 4.04, - "learning_rate": 2.774739094278854e-05, - "loss": 0.3046, - "step": 6739500 - }, - { - "epoch": 4.04, - "learning_rate": 2.7745290977227972e-05, - "loss": 0.3138, - "step": 6740000 - }, - { - "epoch": 4.04, - "learning_rate": 2.774319101166741e-05, - "loss": 0.3114, - "step": 6740500 - }, - { - "epoch": 4.04, - "learning_rate": 2.7741091046106846e-05, - "loss": 0.2996, - "step": 6741000 - }, - { - "epoch": 4.04, - "learning_rate": 2.77389952804774e-05, - "loss": 0.3068, - "step": 6741500 - }, - { - "epoch": 4.04, - "learning_rate": 2.7736895314916836e-05, - "loss": 0.3132, - "step": 6742000 - }, - { - "epoch": 4.04, - "learning_rate": 2.773479534935627e-05, - "loss": 0.3079, - "step": 6742500 - }, - { - "epoch": 4.04, - "learning_rate": 2.7732695383795706e-05, - "loss": 0.3235, - "step": 6743000 - }, - { - "epoch": 4.04, - "learning_rate": 2.773059541823514e-05, - "loss": 0.3151, - "step": 6743500 - }, - { - "epoch": 4.04, - "learning_rate": 2.7728495452674573e-05, - "loss": 0.3104, - "step": 6744000 - }, - { - "epoch": 4.04, - "learning_rate": 2.772639548711401e-05, - "loss": 0.3154, - "step": 6744500 - }, - { - "epoch": 4.04, - "learning_rate": 2.7724295521553447e-05, - "loss": 0.3036, - "step": 6745000 - }, - { - "epoch": 4.04, - "learning_rate": 2.7722199755924004e-05, - "loss": 0.3117, - "step": 6745500 - }, - { - "epoch": 4.04, - "learning_rate": 2.7720099790363438e-05, - "loss": 0.3122, - "step": 6746000 - }, - { - "epoch": 4.04, - "learning_rate": 2.7718004024733994e-05, - "loss": 0.3044, - "step": 6746500 - }, - { - "epoch": 4.05, - "learning_rate": 2.7715904059173428e-05, - "loss": 0.3094, - "step": 6747000 - }, - { - "epoch": 4.05, - "learning_rate": 2.7713804093612865e-05, - "loss": 0.3253, - "step": 6747500 - }, - { - "epoch": 4.05, - "learning_rate": 2.77117041280523e-05, - "loss": 0.3248, - "step": 6748000 - }, - { - "epoch": 4.05, - "learning_rate": 2.7709604162491735e-05, - "loss": 0.3062, - "step": 6748500 - }, - { - "epoch": 4.05, - "learning_rate": 2.770750419693117e-05, - "loss": 0.3061, - "step": 6749000 - }, - { - "epoch": 4.05, - "learning_rate": 2.7705404231370605e-05, - "loss": 0.3124, - "step": 6749500 - }, - { - "epoch": 4.05, - "learning_rate": 2.7703304265810042e-05, - "loss": 0.3049, - "step": 6750000 - }, - { - "epoch": 4.05, - "learning_rate": 2.77012085001806e-05, - "loss": 0.3087, - "step": 6750500 - }, - { - "epoch": 4.05, - "learning_rate": 2.769910853462003e-05, - "loss": 0.3111, - "step": 6751000 - }, - { - "epoch": 4.05, - "learning_rate": 2.7697008569059466e-05, - "loss": 0.3038, - "step": 6751500 - }, - { - "epoch": 4.05, - "learning_rate": 2.7694908603498903e-05, - "loss": 0.3056, - "step": 6752000 - }, - { - "epoch": 4.05, - "learning_rate": 2.769281283786946e-05, - "loss": 0.3035, - "step": 6752500 - }, - { - "epoch": 4.05, - "learning_rate": 2.7690717072240014e-05, - "loss": 0.3099, - "step": 6753000 - }, - { - "epoch": 4.05, - "learning_rate": 2.768861710667945e-05, - "loss": 0.3236, - "step": 6753500 - }, - { - "epoch": 4.05, - "learning_rate": 2.7686517141118884e-05, - "loss": 0.3086, - "step": 6754000 - }, - { - "epoch": 4.05, - "learning_rate": 2.768441717555832e-05, - "loss": 0.3216, - "step": 6754500 - }, - { - "epoch": 4.05, - "learning_rate": 2.7682321409928874e-05, - "loss": 0.313, - "step": 6755000 - }, - { - "epoch": 4.05, - "learning_rate": 2.768022144436831e-05, - "loss": 0.3091, - "step": 6755500 - }, - { - "epoch": 4.05, - "learning_rate": 2.7678121478807748e-05, - "loss": 0.3252, - "step": 6756000 - }, - { - "epoch": 4.05, - "learning_rate": 2.767602151324718e-05, - "loss": 0.3131, - "step": 6756500 - }, - { - "epoch": 4.05, - "learning_rate": 2.7673921547686618e-05, - "loss": 0.3147, - "step": 6757000 - }, - { - "epoch": 4.05, - "learning_rate": 2.7671821582126055e-05, - "loss": 0.3171, - "step": 6757500 - }, - { - "epoch": 4.05, - "learning_rate": 2.7669721616565485e-05, - "loss": 0.3117, - "step": 6758000 - }, - { - "epoch": 4.05, - "learning_rate": 2.7667625850936046e-05, - "loss": 0.3115, - "step": 6758500 - }, - { - "epoch": 4.05, - "learning_rate": 2.766552588537548e-05, - "loss": 0.3132, - "step": 6759000 - }, - { - "epoch": 4.05, - "learning_rate": 2.7663425919814916e-05, - "loss": 0.3022, - "step": 6759500 - }, - { - "epoch": 4.05, - "learning_rate": 2.7661325954254353e-05, - "loss": 0.3207, - "step": 6760000 - }, - { - "epoch": 4.05, - "learning_rate": 2.7659230188624906e-05, - "loss": 0.3125, - "step": 6760500 - }, - { - "epoch": 4.05, - "learning_rate": 2.765713022306434e-05, - "loss": 0.3109, - "step": 6761000 - }, - { - "epoch": 4.05, - "learning_rate": 2.7655030257503777e-05, - "loss": 0.3106, - "step": 6761500 - }, - { - "epoch": 4.05, - "learning_rate": 2.7652930291943213e-05, - "loss": 0.3218, - "step": 6762000 - }, - { - "epoch": 4.05, - "learning_rate": 2.7650834526313767e-05, - "loss": 0.3223, - "step": 6762500 - }, - { - "epoch": 4.05, - "learning_rate": 2.7648734560753204e-05, - "loss": 0.3058, - "step": 6763000 - }, - { - "epoch": 4.05, - "learning_rate": 2.7646634595192637e-05, - "loss": 0.3113, - "step": 6763500 - }, - { - "epoch": 4.06, - "learning_rate": 2.7644534629632074e-05, - "loss": 0.3157, - "step": 6764000 - }, - { - "epoch": 4.06, - "learning_rate": 2.764243466407151e-05, - "loss": 0.3066, - "step": 6764500 - }, - { - "epoch": 4.06, - "learning_rate": 2.7640338898442065e-05, - "loss": 0.3161, - "step": 6765000 - }, - { - "epoch": 4.06, - "learning_rate": 2.76382389328815e-05, - "loss": 0.3089, - "step": 6765500 - }, - { - "epoch": 4.06, - "learning_rate": 2.7636138967320935e-05, - "loss": 0.3141, - "step": 6766000 - }, - { - "epoch": 4.06, - "learning_rate": 2.763403900176037e-05, - "loss": 0.3116, - "step": 6766500 - }, - { - "epoch": 4.06, - "learning_rate": 2.763193903619981e-05, - "loss": 0.3072, - "step": 6767000 - }, - { - "epoch": 4.06, - "learning_rate": 2.762983907063924e-05, - "loss": 0.3077, - "step": 6767500 - }, - { - "epoch": 4.06, - "learning_rate": 2.7627739105078675e-05, - "loss": 0.3097, - "step": 6768000 - }, - { - "epoch": 4.06, - "learning_rate": 2.7625639139518112e-05, - "loss": 0.3123, - "step": 6768500 - }, - { - "epoch": 4.06, - "learning_rate": 2.762354337388867e-05, - "loss": 0.3239, - "step": 6769000 - }, - { - "epoch": 4.06, - "learning_rate": 2.7621447608259223e-05, - "loss": 0.314, - "step": 6769500 - }, - { - "epoch": 4.06, - "learning_rate": 2.761934764269866e-05, - "loss": 0.3159, - "step": 6770000 - }, - { - "epoch": 4.06, - "learning_rate": 2.7617247677138093e-05, - "loss": 0.3059, - "step": 6770500 - }, - { - "epoch": 4.06, - "learning_rate": 2.761514771157753e-05, - "loss": 0.3131, - "step": 6771000 - }, - { - "epoch": 4.06, - "learning_rate": 2.7613051945948087e-05, - "loss": 0.3155, - "step": 6771500 - }, - { - "epoch": 4.06, - "learning_rate": 2.761095198038752e-05, - "loss": 0.3085, - "step": 6772000 - }, - { - "epoch": 4.06, - "learning_rate": 2.7608852014826957e-05, - "loss": 0.309, - "step": 6772500 - }, - { - "epoch": 4.06, - "learning_rate": 2.760675204926639e-05, - "loss": 0.3101, - "step": 6773000 - }, - { - "epoch": 4.06, - "learning_rate": 2.7604652083705828e-05, - "loss": 0.3101, - "step": 6773500 - }, - { - "epoch": 4.06, - "learning_rate": 2.7602552118145264e-05, - "loss": 0.3063, - "step": 6774000 - }, - { - "epoch": 4.06, - "learning_rate": 2.7600456352515818e-05, - "loss": 0.3118, - "step": 6774500 - }, - { - "epoch": 4.06, - "learning_rate": 2.759835638695525e-05, - "loss": 0.3147, - "step": 6775000 - }, - { - "epoch": 4.06, - "learning_rate": 2.7596256421394688e-05, - "loss": 0.3107, - "step": 6775500 - }, - { - "epoch": 4.06, - "learning_rate": 2.7594156455834125e-05, - "loss": 0.3155, - "step": 6776000 - }, - { - "epoch": 4.06, - "learning_rate": 2.7592056490273562e-05, - "loss": 0.3091, - "step": 6776500 - }, - { - "epoch": 4.06, - "learning_rate": 2.7589960724644116e-05, - "loss": 0.3089, - "step": 6777000 - }, - { - "epoch": 4.06, - "learning_rate": 2.758786075908355e-05, - "loss": 0.315, - "step": 6777500 - }, - { - "epoch": 4.06, - "learning_rate": 2.7585760793522986e-05, - "loss": 0.3127, - "step": 6778000 - }, - { - "epoch": 4.06, - "learning_rate": 2.7583660827962423e-05, - "loss": 0.3129, - "step": 6778500 - }, - { - "epoch": 4.06, - "learning_rate": 2.7581560862401856e-05, - "loss": 0.3154, - "step": 6779000 - }, - { - "epoch": 4.06, - "learning_rate": 2.757946089684129e-05, - "loss": 0.2998, - "step": 6779500 - }, - { - "epoch": 4.06, - "learning_rate": 2.7577360931280726e-05, - "loss": 0.305, - "step": 6780000 - }, - { - "epoch": 4.07, - "learning_rate": 2.7575260965720163e-05, - "loss": 0.3162, - "step": 6780500 - }, - { - "epoch": 4.07, - "learning_rate": 2.757316520009072e-05, - "loss": 0.3223, - "step": 6781000 - }, - { - "epoch": 4.07, - "learning_rate": 2.7571069434461274e-05, - "loss": 0.3079, - "step": 6781500 - }, - { - "epoch": 4.07, - "learning_rate": 2.7568969468900707e-05, - "loss": 0.3128, - "step": 6782000 - }, - { - "epoch": 4.07, - "learning_rate": 2.7566869503340144e-05, - "loss": 0.3155, - "step": 6782500 - }, - { - "epoch": 4.07, - "learning_rate": 2.756476953777958e-05, - "loss": 0.3125, - "step": 6783000 - }, - { - "epoch": 4.07, - "learning_rate": 2.7562669572219018e-05, - "loss": 0.3059, - "step": 6783500 - }, - { - "epoch": 4.07, - "learning_rate": 2.7560569606658448e-05, - "loss": 0.3183, - "step": 6784000 - }, - { - "epoch": 4.07, - "learning_rate": 2.7558473841029005e-05, - "loss": 0.3126, - "step": 6784500 - }, - { - "epoch": 4.07, - "learning_rate": 2.7556373875468442e-05, - "loss": 0.326, - "step": 6785000 - }, - { - "epoch": 4.07, - "learning_rate": 2.755427390990788e-05, - "loss": 0.3068, - "step": 6785500 - }, - { - "epoch": 4.07, - "learning_rate": 2.7552173944347312e-05, - "loss": 0.3026, - "step": 6786000 - }, - { - "epoch": 4.07, - "learning_rate": 2.755007817871787e-05, - "loss": 0.3117, - "step": 6786500 - }, - { - "epoch": 4.07, - "learning_rate": 2.7547978213157302e-05, - "loss": 0.3097, - "step": 6787000 - }, - { - "epoch": 4.07, - "learning_rate": 2.754587824759674e-05, - "loss": 0.3101, - "step": 6787500 - }, - { - "epoch": 4.07, - "learning_rate": 2.7543778282036176e-05, - "loss": 0.3105, - "step": 6788000 - }, - { - "epoch": 4.07, - "learning_rate": 2.754167831647561e-05, - "loss": 0.3058, - "step": 6788500 - }, - { - "epoch": 4.07, - "learning_rate": 2.7539582550846167e-05, - "loss": 0.3181, - "step": 6789000 - }, - { - "epoch": 4.07, - "learning_rate": 2.75374825852856e-05, - "loss": 0.3131, - "step": 6789500 - }, - { - "epoch": 4.07, - "learning_rate": 2.7535382619725037e-05, - "loss": 0.3153, - "step": 6790000 - }, - { - "epoch": 4.07, - "learning_rate": 2.7533282654164474e-05, - "loss": 0.3014, - "step": 6790500 - }, - { - "epoch": 4.07, - "learning_rate": 2.7531182688603907e-05, - "loss": 0.3071, - "step": 6791000 - }, - { - "epoch": 4.07, - "learning_rate": 2.752908272304334e-05, - "loss": 0.3102, - "step": 6791500 - }, - { - "epoch": 4.07, - "learning_rate": 2.7526986957413898e-05, - "loss": 0.3156, - "step": 6792000 - }, - { - "epoch": 4.07, - "learning_rate": 2.7524886991853334e-05, - "loss": 0.3152, - "step": 6792500 - }, - { - "epoch": 4.07, - "learning_rate": 2.7522787026292768e-05, - "loss": 0.3071, - "step": 6793000 - }, - { - "epoch": 4.07, - "learning_rate": 2.75206870607322e-05, - "loss": 0.328, - "step": 6793500 - }, - { - "epoch": 4.07, - "learning_rate": 2.7518587095171638e-05, - "loss": 0.3143, - "step": 6794000 - }, - { - "epoch": 4.07, - "learning_rate": 2.7516491329542195e-05, - "loss": 0.3257, - "step": 6794500 - }, - { - "epoch": 4.07, - "learning_rate": 2.7514391363981632e-05, - "loss": 0.3171, - "step": 6795000 - }, - { - "epoch": 4.07, - "learning_rate": 2.7512291398421066e-05, - "loss": 0.313, - "step": 6795500 - }, - { - "epoch": 4.07, - "learning_rate": 2.75101914328605e-05, - "loss": 0.3075, - "step": 6796000 - }, - { - "epoch": 4.07, - "learning_rate": 2.7508091467299936e-05, - "loss": 0.3093, - "step": 6796500 - }, - { - "epoch": 4.08, - "learning_rate": 2.750599150173937e-05, - "loss": 0.3151, - "step": 6797000 - }, - { - "epoch": 4.08, - "learning_rate": 2.7503891536178806e-05, - "loss": 0.3225, - "step": 6797500 - }, - { - "epoch": 4.08, - "learning_rate": 2.7501795770549363e-05, - "loss": 0.3086, - "step": 6798000 - }, - { - "epoch": 4.08, - "learning_rate": 2.7499695804988797e-05, - "loss": 0.3136, - "step": 6798500 - }, - { - "epoch": 4.08, - "learning_rate": 2.7497595839428233e-05, - "loss": 0.3046, - "step": 6799000 - }, - { - "epoch": 4.08, - "learning_rate": 2.7495495873867667e-05, - "loss": 0.3095, - "step": 6799500 - }, - { - "epoch": 4.08, - "learning_rate": 2.7493395908307104e-05, - "loss": 0.3069, - "step": 6800000 - }, - { - "epoch": 4.08, - "eval_loss": 0.3127097189426422, - "eval_runtime": 1122.1159, - "eval_samples_per_second": 469.399, - "eval_steps_per_second": 78.233, - "step": 6800000 - }, - { - "epoch": 4.08, - "learning_rate": 2.749130014267766e-05, - "loss": 0.3145, - "step": 6800500 - }, - { - "epoch": 4.08, - "learning_rate": 2.7489200177117094e-05, - "loss": 0.3134, - "step": 6801000 - }, - { - "epoch": 4.08, - "learning_rate": 2.748710021155653e-05, - "loss": 0.3195, - "step": 6801500 - }, - { - "epoch": 4.08, - "learning_rate": 2.7485000245995964e-05, - "loss": 0.3055, - "step": 6802000 - }, - { - "epoch": 4.08, - "learning_rate": 2.74829002804354e-05, - "loss": 0.3135, - "step": 6802500 - }, - { - "epoch": 4.08, - "learning_rate": 2.7480800314874838e-05, - "loss": 0.3139, - "step": 6803000 - }, - { - "epoch": 4.08, - "learning_rate": 2.7478700349314268e-05, - "loss": 0.3083, - "step": 6803500 - }, - { - "epoch": 4.08, - "learning_rate": 2.7476604583684825e-05, - "loss": 0.3102, - "step": 6804000 - }, - { - "epoch": 4.08, - "learning_rate": 2.7474504618124262e-05, - "loss": 0.3072, - "step": 6804500 - }, - { - "epoch": 4.08, - "learning_rate": 2.74724046525637e-05, - "loss": 0.3128, - "step": 6805000 - }, - { - "epoch": 4.08, - "learning_rate": 2.7470304687003136e-05, - "loss": 0.3087, - "step": 6805500 - }, - { - "epoch": 4.08, - "learning_rate": 2.7468204721442566e-05, - "loss": 0.3062, - "step": 6806000 - }, - { - "epoch": 4.08, - "learning_rate": 2.7466104755882003e-05, - "loss": 0.3114, - "step": 6806500 - }, - { - "epoch": 4.08, - "learning_rate": 2.746400479032144e-05, - "loss": 0.3, - "step": 6807000 - }, - { - "epoch": 4.08, - "learning_rate": 2.7461909024691996e-05, - "loss": 0.3078, - "step": 6807500 - }, - { - "epoch": 4.08, - "learning_rate": 2.745980905913143e-05, - "loss": 0.3079, - "step": 6808000 - }, - { - "epoch": 4.08, - "learning_rate": 2.7457709093570863e-05, - "loss": 0.3075, - "step": 6808500 - }, - { - "epoch": 4.08, - "learning_rate": 2.74556091280103e-05, - "loss": 0.3127, - "step": 6809000 - }, - { - "epoch": 4.08, - "learning_rate": 2.7453513362380857e-05, - "loss": 0.3129, - "step": 6809500 - }, - { - "epoch": 4.08, - "learning_rate": 2.7451413396820294e-05, - "loss": 0.3058, - "step": 6810000 - }, - { - "epoch": 4.08, - "learning_rate": 2.7449313431259727e-05, - "loss": 0.3153, - "step": 6810500 - }, - { - "epoch": 4.08, - "learning_rate": 2.744721346569916e-05, - "loss": 0.314, - "step": 6811000 - }, - { - "epoch": 4.08, - "learning_rate": 2.7445113500138598e-05, - "loss": 0.3103, - "step": 6811500 - }, - { - "epoch": 4.08, - "learning_rate": 2.7443017734509155e-05, - "loss": 0.3195, - "step": 6812000 - }, - { - "epoch": 4.08, - "learning_rate": 2.744091776894859e-05, - "loss": 0.3065, - "step": 6812500 - }, - { - "epoch": 4.08, - "learning_rate": 2.743881780338802e-05, - "loss": 0.3128, - "step": 6813000 - }, - { - "epoch": 4.08, - "learning_rate": 2.743671783782746e-05, - "loss": 0.3082, - "step": 6813500 - }, - { - "epoch": 4.09, - "learning_rate": 2.7434617872266895e-05, - "loss": 0.3145, - "step": 6814000 - }, - { - "epoch": 4.09, - "learning_rate": 2.7432522106637452e-05, - "loss": 0.3239, - "step": 6814500 - }, - { - "epoch": 4.09, - "learning_rate": 2.7430422141076886e-05, - "loss": 0.3119, - "step": 6815000 - }, - { - "epoch": 4.09, - "learning_rate": 2.742832217551632e-05, - "loss": 0.3127, - "step": 6815500 - }, - { - "epoch": 4.09, - "learning_rate": 2.7426222209955756e-05, - "loss": 0.316, - "step": 6816000 - }, - { - "epoch": 4.09, - "learning_rate": 2.7424126444326313e-05, - "loss": 0.3155, - "step": 6816500 - }, - { - "epoch": 4.09, - "learning_rate": 2.742202647876575e-05, - "loss": 0.3114, - "step": 6817000 - }, - { - "epoch": 4.09, - "learning_rate": 2.7419926513205183e-05, - "loss": 0.3091, - "step": 6817500 - }, - { - "epoch": 4.09, - "learning_rate": 2.7417826547644617e-05, - "loss": 0.3115, - "step": 6818000 - }, - { - "epoch": 4.09, - "learning_rate": 2.7415726582084054e-05, - "loss": 0.32, - "step": 6818500 - }, - { - "epoch": 4.09, - "learning_rate": 2.7413626616523487e-05, - "loss": 0.31, - "step": 6819000 - }, - { - "epoch": 4.09, - "learning_rate": 2.7411530850894047e-05, - "loss": 0.3112, - "step": 6819500 - }, - { - "epoch": 4.09, - "learning_rate": 2.740943088533348e-05, - "loss": 0.3157, - "step": 6820000 - }, - { - "epoch": 4.09, - "learning_rate": 2.7407330919772914e-05, - "loss": 0.3207, - "step": 6820500 - }, - { - "epoch": 4.09, - "learning_rate": 2.740523095421235e-05, - "loss": 0.3207, - "step": 6821000 - }, - { - "epoch": 4.09, - "learning_rate": 2.7403130988651785e-05, - "loss": 0.3229, - "step": 6821500 - }, - { - "epoch": 4.09, - "learning_rate": 2.740103102309122e-05, - "loss": 0.3044, - "step": 6822000 - }, - { - "epoch": 4.09, - "learning_rate": 2.739893105753066e-05, - "loss": 0.3151, - "step": 6822500 - }, - { - "epoch": 4.09, - "learning_rate": 2.739683109197009e-05, - "loss": 0.3058, - "step": 6823000 - }, - { - "epoch": 4.09, - "learning_rate": 2.739473532634065e-05, - "loss": 0.3238, - "step": 6823500 - }, - { - "epoch": 4.09, - "learning_rate": 2.7392635360780082e-05, - "loss": 0.3111, - "step": 6824000 - }, - { - "epoch": 4.09, - "learning_rate": 2.739053539521952e-05, - "loss": 0.3147, - "step": 6824500 - }, - { - "epoch": 4.09, - "learning_rate": 2.7388435429658956e-05, - "loss": 0.3117, - "step": 6825000 - }, - { - "epoch": 4.09, - "learning_rate": 2.738633966402951e-05, - "loss": 0.3197, - "step": 6825500 - }, - { - "epoch": 4.09, - "learning_rate": 2.7384239698468943e-05, - "loss": 0.314, - "step": 6826000 - }, - { - "epoch": 4.09, - "learning_rate": 2.738213973290838e-05, - "loss": 0.3177, - "step": 6826500 - }, - { - "epoch": 4.09, - "learning_rate": 2.7380039767347817e-05, - "loss": 0.3081, - "step": 6827000 - }, - { - "epoch": 4.09, - "learning_rate": 2.737793980178725e-05, - "loss": 0.3141, - "step": 6827500 - }, - { - "epoch": 4.09, - "learning_rate": 2.7375839836226684e-05, - "loss": 0.3108, - "step": 6828000 - }, - { - "epoch": 4.09, - "learning_rate": 2.737374407059724e-05, - "loss": 0.3257, - "step": 6828500 - }, - { - "epoch": 4.09, - "learning_rate": 2.7371644105036677e-05, - "loss": 0.3093, - "step": 6829000 - }, - { - "epoch": 4.09, - "learning_rate": 2.7369544139476114e-05, - "loss": 0.3138, - "step": 6829500 - }, - { - "epoch": 4.09, - "learning_rate": 2.7367448373846668e-05, - "loss": 0.3242, - "step": 6830000 - }, - { - "epoch": 4.1, - "learning_rate": 2.7365348408286105e-05, - "loss": 0.3097, - "step": 6830500 - }, - { - "epoch": 4.1, - "learning_rate": 2.7363248442725538e-05, - "loss": 0.3131, - "step": 6831000 - }, - { - "epoch": 4.1, - "learning_rate": 2.7361148477164975e-05, - "loss": 0.3113, - "step": 6831500 - }, - { - "epoch": 4.1, - "learning_rate": 2.7359048511604412e-05, - "loss": 0.2993, - "step": 6832000 - }, - { - "epoch": 4.1, - "learning_rate": 2.7356948546043842e-05, - "loss": 0.3155, - "step": 6832500 - }, - { - "epoch": 4.1, - "learning_rate": 2.735484858048328e-05, - "loss": 0.308, - "step": 6833000 - }, - { - "epoch": 4.1, - "learning_rate": 2.7352748614922716e-05, - "loss": 0.3033, - "step": 6833500 - }, - { - "epoch": 4.1, - "learning_rate": 2.735064864936215e-05, - "loss": 0.3092, - "step": 6834000 - }, - { - "epoch": 4.1, - "learning_rate": 2.7348552883732706e-05, - "loss": 0.3182, - "step": 6834500 - }, - { - "epoch": 4.1, - "learning_rate": 2.734645291817214e-05, - "loss": 0.3083, - "step": 6835000 - }, - { - "epoch": 4.1, - "learning_rate": 2.7344352952611576e-05, - "loss": 0.3118, - "step": 6835500 - }, - { - "epoch": 4.1, - "learning_rate": 2.7342252987051013e-05, - "loss": 0.3124, - "step": 6836000 - }, - { - "epoch": 4.1, - "learning_rate": 2.734015722142157e-05, - "loss": 0.3163, - "step": 6836500 - }, - { - "epoch": 4.1, - "learning_rate": 2.7338057255861004e-05, - "loss": 0.3124, - "step": 6837000 - }, - { - "epoch": 4.1, - "learning_rate": 2.7335957290300437e-05, - "loss": 0.3155, - "step": 6837500 - }, - { - "epoch": 4.1, - "learning_rate": 2.7333857324739874e-05, - "loss": 0.3131, - "step": 6838000 - }, - { - "epoch": 4.1, - "learning_rate": 2.733175735917931e-05, - "loss": 0.3213, - "step": 6838500 - }, - { - "epoch": 4.1, - "learning_rate": 2.7329657393618744e-05, - "loss": 0.313, - "step": 6839000 - }, - { - "epoch": 4.1, - "learning_rate": 2.732755742805818e-05, - "loss": 0.3065, - "step": 6839500 - }, - { - "epoch": 4.1, - "learning_rate": 2.7325457462497614e-05, - "loss": 0.3094, - "step": 6840000 - }, - { - "epoch": 4.1, - "learning_rate": 2.732336169686817e-05, - "loss": 0.3123, - "step": 6840500 - }, - { - "epoch": 4.1, - "learning_rate": 2.732126593123873e-05, - "loss": 0.3204, - "step": 6841000 - }, - { - "epoch": 4.1, - "learning_rate": 2.7319165965678165e-05, - "loss": 0.3029, - "step": 6841500 - }, - { - "epoch": 4.1, - "learning_rate": 2.7317066000117595e-05, - "loss": 0.3156, - "step": 6842000 - }, - { - "epoch": 4.1, - "learning_rate": 2.7314966034557032e-05, - "loss": 0.3078, - "step": 6842500 - }, - { - "epoch": 4.1, - "learning_rate": 2.731287026892759e-05, - "loss": 0.3095, - "step": 6843000 - }, - { - "epoch": 4.1, - "learning_rate": 2.7310770303367026e-05, - "loss": 0.3112, - "step": 6843500 - }, - { - "epoch": 4.1, - "learning_rate": 2.730867033780646e-05, - "loss": 0.3182, - "step": 6844000 - }, - { - "epoch": 4.1, - "learning_rate": 2.7306570372245893e-05, - "loss": 0.3068, - "step": 6844500 - }, - { - "epoch": 4.1, - "learning_rate": 2.730447460661645e-05, - "loss": 0.3145, - "step": 6845000 - }, - { - "epoch": 4.1, - "learning_rate": 2.7302374641055887e-05, - "loss": 0.3173, - "step": 6845500 - }, - { - "epoch": 4.1, - "learning_rate": 2.7300274675495324e-05, - "loss": 0.3144, - "step": 6846000 - }, - { - "epoch": 4.1, - "learning_rate": 2.7298174709934757e-05, - "loss": 0.3139, - "step": 6846500 - }, - { - "epoch": 4.11, - "learning_rate": 2.729607894430531e-05, - "loss": 0.3045, - "step": 6847000 - }, - { - "epoch": 4.11, - "learning_rate": 2.7293978978744747e-05, - "loss": 0.3292, - "step": 6847500 - }, - { - "epoch": 4.11, - "learning_rate": 2.7291879013184184e-05, - "loss": 0.3139, - "step": 6848000 - }, - { - "epoch": 4.11, - "learning_rate": 2.728977904762362e-05, - "loss": 0.3233, - "step": 6848500 - }, - { - "epoch": 4.11, - "learning_rate": 2.7287679082063055e-05, - "loss": 0.3084, - "step": 6849000 - }, - { - "epoch": 4.11, - "learning_rate": 2.7285579116502488e-05, - "loss": 0.3053, - "step": 6849500 - }, - { - "epoch": 4.11, - "learning_rate": 2.7283483350873045e-05, - "loss": 0.3167, - "step": 6850000 - }, - { - "epoch": 4.11, - "learning_rate": 2.7281383385312482e-05, - "loss": 0.3096, - "step": 6850500 - }, - { - "epoch": 4.11, - "learning_rate": 2.7279283419751915e-05, - "loss": 0.3133, - "step": 6851000 - }, - { - "epoch": 4.11, - "learning_rate": 2.727718345419135e-05, - "loss": 0.3049, - "step": 6851500 - }, - { - "epoch": 4.11, - "learning_rate": 2.7275087688561906e-05, - "loss": 0.3095, - "step": 6852000 - }, - { - "epoch": 4.11, - "learning_rate": 2.7272987723001343e-05, - "loss": 0.3175, - "step": 6852500 - }, - { - "epoch": 4.11, - "learning_rate": 2.727088775744078e-05, - "loss": 0.3143, - "step": 6853000 - }, - { - "epoch": 4.11, - "learning_rate": 2.7268787791880213e-05, - "loss": 0.302, - "step": 6853500 - }, - { - "epoch": 4.11, - "learning_rate": 2.7266687826319646e-05, - "loss": 0.3099, - "step": 6854000 - }, - { - "epoch": 4.11, - "learning_rate": 2.7264587860759083e-05, - "loss": 0.3067, - "step": 6854500 - }, - { - "epoch": 4.11, - "learning_rate": 2.7262487895198517e-05, - "loss": 0.3134, - "step": 6855000 - }, - { - "epoch": 4.11, - "learning_rate": 2.7260387929637954e-05, - "loss": 0.3117, - "step": 6855500 - }, - { - "epoch": 4.11, - "learning_rate": 2.725829216400851e-05, - "loss": 0.3279, - "step": 6856000 - }, - { - "epoch": 4.11, - "learning_rate": 2.7256192198447944e-05, - "loss": 0.3233, - "step": 6856500 - }, - { - "epoch": 4.11, - "learning_rate": 2.725409223288738e-05, - "loss": 0.3167, - "step": 6857000 - }, - { - "epoch": 4.11, - "learning_rate": 2.7251992267326814e-05, - "loss": 0.308, - "step": 6857500 - }, - { - "epoch": 4.11, - "learning_rate": 2.724989650169737e-05, - "loss": 0.3092, - "step": 6858000 - }, - { - "epoch": 4.11, - "learning_rate": 2.7247796536136808e-05, - "loss": 0.3137, - "step": 6858500 - }, - { - "epoch": 4.11, - "learning_rate": 2.724569657057624e-05, - "loss": 0.3098, - "step": 6859000 - }, - { - "epoch": 4.11, - "learning_rate": 2.724359660501568e-05, - "loss": 0.3157, - "step": 6859500 - }, - { - "epoch": 4.11, - "learning_rate": 2.7241496639455112e-05, - "loss": 0.307, - "step": 6860000 - }, - { - "epoch": 4.11, - "learning_rate": 2.723939667389455e-05, - "loss": 0.311, - "step": 6860500 - }, - { - "epoch": 4.11, - "learning_rate": 2.7237296708333986e-05, - "loss": 0.315, - "step": 6861000 - }, - { - "epoch": 4.11, - "learning_rate": 2.7235196742773416e-05, - "loss": 0.3123, - "step": 6861500 - }, - { - "epoch": 4.11, - "learning_rate": 2.7233100977143973e-05, - "loss": 0.3134, - "step": 6862000 - }, - { - "epoch": 4.11, - "learning_rate": 2.7231005211514533e-05, - "loss": 0.3104, - "step": 6862500 - }, - { - "epoch": 4.11, - "learning_rate": 2.7228905245953966e-05, - "loss": 0.311, - "step": 6863000 - }, - { - "epoch": 4.11, - "learning_rate": 2.72268052803934e-05, - "loss": 0.3087, - "step": 6863500 - }, - { - "epoch": 4.12, - "learning_rate": 2.7224705314832837e-05, - "loss": 0.3119, - "step": 6864000 - }, - { - "epoch": 4.12, - "learning_rate": 2.7222609549203394e-05, - "loss": 0.3181, - "step": 6864500 - }, - { - "epoch": 4.12, - "learning_rate": 2.7220509583642827e-05, - "loss": 0.3119, - "step": 6865000 - }, - { - "epoch": 4.12, - "learning_rate": 2.7218409618082264e-05, - "loss": 0.3129, - "step": 6865500 - }, - { - "epoch": 4.12, - "learning_rate": 2.7216309652521697e-05, - "loss": 0.3134, - "step": 6866000 - }, - { - "epoch": 4.12, - "learning_rate": 2.7214213886892254e-05, - "loss": 0.3214, - "step": 6866500 - }, - { - "epoch": 4.12, - "learning_rate": 2.721211392133169e-05, - "loss": 0.3129, - "step": 6867000 - }, - { - "epoch": 4.12, - "learning_rate": 2.7210013955771125e-05, - "loss": 0.3146, - "step": 6867500 - }, - { - "epoch": 4.12, - "learning_rate": 2.720791399021056e-05, - "loss": 0.3125, - "step": 6868000 - }, - { - "epoch": 4.12, - "learning_rate": 2.7205814024649995e-05, - "loss": 0.3076, - "step": 6868500 - }, - { - "epoch": 4.12, - "learning_rate": 2.7203718259020552e-05, - "loss": 0.3111, - "step": 6869000 - }, - { - "epoch": 4.12, - "learning_rate": 2.720161829345999e-05, - "loss": 0.3082, - "step": 6869500 - }, - { - "epoch": 4.12, - "learning_rate": 2.7199518327899422e-05, - "loss": 0.3098, - "step": 6870000 - }, - { - "epoch": 4.12, - "learning_rate": 2.7197418362338856e-05, - "loss": 0.3095, - "step": 6870500 - }, - { - "epoch": 4.12, - "learning_rate": 2.7195318396778293e-05, - "loss": 0.3032, - "step": 6871000 - }, - { - "epoch": 4.12, - "learning_rate": 2.7193218431217726e-05, - "loss": 0.3193, - "step": 6871500 - }, - { - "epoch": 4.12, - "learning_rate": 2.7191122665588283e-05, - "loss": 0.3041, - "step": 6872000 - }, - { - "epoch": 4.12, - "learning_rate": 2.718902270002772e-05, - "loss": 0.3204, - "step": 6872500 - }, - { - "epoch": 4.12, - "learning_rate": 2.7186922734467153e-05, - "loss": 0.3115, - "step": 6873000 - }, - { - "epoch": 4.12, - "learning_rate": 2.718482276890659e-05, - "loss": 0.3094, - "step": 6873500 - }, - { - "epoch": 4.12, - "learning_rate": 2.7182727003277147e-05, - "loss": 0.3214, - "step": 6874000 - }, - { - "epoch": 4.12, - "learning_rate": 2.718062703771658e-05, - "loss": 0.32, - "step": 6874500 - }, - { - "epoch": 4.12, - "learning_rate": 2.7178527072156017e-05, - "loss": 0.308, - "step": 6875000 - }, - { - "epoch": 4.12, - "learning_rate": 2.717642710659545e-05, - "loss": 0.3094, - "step": 6875500 - }, - { - "epoch": 4.12, - "learning_rate": 2.7174327141034884e-05, - "loss": 0.3136, - "step": 6876000 - }, - { - "epoch": 4.12, - "learning_rate": 2.717222717547432e-05, - "loss": 0.3044, - "step": 6876500 - }, - { - "epoch": 4.12, - "learning_rate": 2.7170131409844878e-05, - "loss": 0.3064, - "step": 6877000 - }, - { - "epoch": 4.12, - "learning_rate": 2.7168031444284315e-05, - "loss": 0.3124, - "step": 6877500 - }, - { - "epoch": 4.12, - "learning_rate": 2.716593147872375e-05, - "loss": 0.3143, - "step": 6878000 - }, - { - "epoch": 4.12, - "learning_rate": 2.7163831513163182e-05, - "loss": 0.3177, - "step": 6878500 - }, - { - "epoch": 4.12, - "learning_rate": 2.716173154760262e-05, - "loss": 0.3119, - "step": 6879000 - }, - { - "epoch": 4.12, - "learning_rate": 2.7159631582042056e-05, - "loss": 0.3158, - "step": 6879500 - }, - { - "epoch": 4.12, - "learning_rate": 2.715753161648149e-05, - "loss": 0.3201, - "step": 6880000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7155431650920922e-05, - "loss": 0.3168, - "step": 6880500 - }, - { - "epoch": 4.13, - "learning_rate": 2.715333588529148e-05, - "loss": 0.3042, - "step": 6881000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7151235919730916e-05, - "loss": 0.3026, - "step": 6881500 - }, - { - "epoch": 4.13, - "learning_rate": 2.7149140154101473e-05, - "loss": 0.3174, - "step": 6882000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7147040188540907e-05, - "loss": 0.3096, - "step": 6882500 - }, - { - "epoch": 4.13, - "learning_rate": 2.714494022298034e-05, - "loss": 0.3055, - "step": 6883000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7142840257419777e-05, - "loss": 0.314, - "step": 6883500 - }, - { - "epoch": 4.13, - "learning_rate": 2.7140744491790334e-05, - "loss": 0.3076, - "step": 6884000 - }, - { - "epoch": 4.13, - "learning_rate": 2.713864452622977e-05, - "loss": 0.3086, - "step": 6884500 - }, - { - "epoch": 4.13, - "learning_rate": 2.7136544560669204e-05, - "loss": 0.3194, - "step": 6885000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7134444595108638e-05, - "loss": 0.3133, - "step": 6885500 - }, - { - "epoch": 4.13, - "learning_rate": 2.7132344629548075e-05, - "loss": 0.3149, - "step": 6886000 - }, - { - "epoch": 4.13, - "learning_rate": 2.713024466398751e-05, - "loss": 0.3141, - "step": 6886500 - }, - { - "epoch": 4.13, - "learning_rate": 2.7128144698426945e-05, - "loss": 0.3106, - "step": 6887000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7126048932797502e-05, - "loss": 0.3169, - "step": 6887500 - }, - { - "epoch": 4.13, - "learning_rate": 2.7123948967236935e-05, - "loss": 0.3169, - "step": 6888000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7121849001676372e-05, - "loss": 0.3103, - "step": 6888500 - }, - { - "epoch": 4.13, - "learning_rate": 2.711974903611581e-05, - "loss": 0.3152, - "step": 6889000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7117649070555242e-05, - "loss": 0.3071, - "step": 6889500 - }, - { - "epoch": 4.13, - "learning_rate": 2.7115549104994676e-05, - "loss": 0.3116, - "step": 6890000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7113449139434113e-05, - "loss": 0.3083, - "step": 6890500 - }, - { - "epoch": 4.13, - "learning_rate": 2.7111349173873546e-05, - "loss": 0.3046, - "step": 6891000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7109253408244107e-05, - "loss": 0.3134, - "step": 6891500 - }, - { - "epoch": 4.13, - "learning_rate": 2.710715344268354e-05, - "loss": 0.3158, - "step": 6892000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7105053477122974e-05, - "loss": 0.3195, - "step": 6892500 - }, - { - "epoch": 4.13, - "learning_rate": 2.710295351156241e-05, - "loss": 0.3109, - "step": 6893000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7100853546001844e-05, - "loss": 0.3083, - "step": 6893500 - }, - { - "epoch": 4.13, - "learning_rate": 2.709875358044128e-05, - "loss": 0.3118, - "step": 6894000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7096653614880718e-05, - "loss": 0.3196, - "step": 6894500 - }, - { - "epoch": 4.13, - "learning_rate": 2.709455784925127e-05, - "loss": 0.3317, - "step": 6895000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7092457883690708e-05, - "loss": 0.3149, - "step": 6895500 - }, - { - "epoch": 4.13, - "learning_rate": 2.709035791813014e-05, - "loss": 0.3071, - "step": 6896000 - }, - { - "epoch": 4.13, - "learning_rate": 2.7088257952569578e-05, - "loss": 0.3128, - "step": 6896500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7086157987009015e-05, - "loss": 0.3147, - "step": 6897000 - }, - { - "epoch": 4.14, - "learning_rate": 2.708406222137957e-05, - "loss": 0.3096, - "step": 6897500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7081962255819002e-05, - "loss": 0.3097, - "step": 6898000 - }, - { - "epoch": 4.14, - "learning_rate": 2.707986229025844e-05, - "loss": 0.3108, - "step": 6898500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7077762324697876e-05, - "loss": 0.3057, - "step": 6899000 - }, - { - "epoch": 4.14, - "learning_rate": 2.707566235913731e-05, - "loss": 0.3159, - "step": 6899500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7073562393576743e-05, - "loss": 0.3113, - "step": 6900000 - }, - { - "epoch": 4.14, - "eval_loss": 0.31120121479034424, - "eval_runtime": 1123.8834, - "eval_samples_per_second": 468.661, - "eval_steps_per_second": 78.11, - "step": 6900000 - }, - { - "epoch": 4.14, - "learning_rate": 2.70714666279473e-05, - "loss": 0.316, - "step": 6900500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7069366662386737e-05, - "loss": 0.3162, - "step": 6901000 - }, - { - "epoch": 4.14, - "learning_rate": 2.7067266696826173e-05, - "loss": 0.3107, - "step": 6901500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7065166731265607e-05, - "loss": 0.3142, - "step": 6902000 - }, - { - "epoch": 4.14, - "learning_rate": 2.706306676570504e-05, - "loss": 0.3198, - "step": 6902500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7060966800144477e-05, - "loss": 0.3175, - "step": 6903000 - }, - { - "epoch": 4.14, - "learning_rate": 2.7058871034515034e-05, - "loss": 0.3101, - "step": 6903500 - }, - { - "epoch": 4.14, - "learning_rate": 2.705677106895447e-05, - "loss": 0.3182, - "step": 6904000 - }, - { - "epoch": 4.14, - "learning_rate": 2.7054671103393904e-05, - "loss": 0.3079, - "step": 6904500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7052571137833338e-05, - "loss": 0.3103, - "step": 6905000 - }, - { - "epoch": 4.14, - "learning_rate": 2.7050471172272775e-05, - "loss": 0.3054, - "step": 6905500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7048375406643332e-05, - "loss": 0.3148, - "step": 6906000 - }, - { - "epoch": 4.14, - "learning_rate": 2.7046275441082765e-05, - "loss": 0.3171, - "step": 6906500 - }, - { - "epoch": 4.14, - "learning_rate": 2.70441754755222e-05, - "loss": 0.3094, - "step": 6907000 - }, - { - "epoch": 4.14, - "learning_rate": 2.7042075509961635e-05, - "loss": 0.3155, - "step": 6907500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7039975544401072e-05, - "loss": 0.3078, - "step": 6908000 - }, - { - "epoch": 4.14, - "learning_rate": 2.7037875578840506e-05, - "loss": 0.3175, - "step": 6908500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7035775613279943e-05, - "loss": 0.3067, - "step": 6909000 - }, - { - "epoch": 4.14, - "learning_rate": 2.703367564771938e-05, - "loss": 0.3083, - "step": 6909500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7031584082021053e-05, - "loss": 0.3219, - "step": 6910000 - }, - { - "epoch": 4.14, - "learning_rate": 2.702948411646049e-05, - "loss": 0.3136, - "step": 6910500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7027384150899927e-05, - "loss": 0.3226, - "step": 6911000 - }, - { - "epoch": 4.14, - "learning_rate": 2.702528418533936e-05, - "loss": 0.3244, - "step": 6911500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7023184219778794e-05, - "loss": 0.3133, - "step": 6912000 - }, - { - "epoch": 4.14, - "learning_rate": 2.702108425421823e-05, - "loss": 0.3178, - "step": 6912500 - }, - { - "epoch": 4.14, - "learning_rate": 2.7018984288657664e-05, - "loss": 0.3068, - "step": 6913000 - }, - { - "epoch": 4.14, - "learning_rate": 2.70168843230971e-05, - "loss": 0.3074, - "step": 6913500 - }, - { - "epoch": 4.15, - "learning_rate": 2.7014788557467658e-05, - "loss": 0.3165, - "step": 6914000 - }, - { - "epoch": 4.15, - "learning_rate": 2.701268859190709e-05, - "loss": 0.3121, - "step": 6914500 - }, - { - "epoch": 4.15, - "learning_rate": 2.7010588626346528e-05, - "loss": 0.3168, - "step": 6915000 - }, - { - "epoch": 4.15, - "learning_rate": 2.700848866078596e-05, - "loss": 0.3139, - "step": 6915500 - }, - { - "epoch": 4.15, - "learning_rate": 2.70063886952254e-05, - "loss": 0.3099, - "step": 6916000 - }, - { - "epoch": 4.15, - "learning_rate": 2.7004288729664835e-05, - "loss": 0.3152, - "step": 6916500 - }, - { - "epoch": 4.15, - "learning_rate": 2.700219296403539e-05, - "loss": 0.3136, - "step": 6917000 - }, - { - "epoch": 4.15, - "learning_rate": 2.7000092998474826e-05, - "loss": 0.2994, - "step": 6917500 - }, - { - "epoch": 4.15, - "learning_rate": 2.699799303291426e-05, - "loss": 0.3093, - "step": 6918000 - }, - { - "epoch": 4.15, - "learning_rate": 2.6995893067353696e-05, - "loss": 0.3206, - "step": 6918500 - }, - { - "epoch": 4.15, - "learning_rate": 2.6993793101793133e-05, - "loss": 0.3069, - "step": 6919000 - }, - { - "epoch": 4.15, - "learning_rate": 2.6991693136232563e-05, - "loss": 0.3178, - "step": 6919500 - }, - { - "epoch": 4.15, - "learning_rate": 2.698959737060312e-05, - "loss": 0.3075, - "step": 6920000 - }, - { - "epoch": 4.15, - "learning_rate": 2.6987497405042557e-05, - "loss": 0.3232, - "step": 6920500 - }, - { - "epoch": 4.15, - "learning_rate": 2.6985397439481994e-05, - "loss": 0.3073, - "step": 6921000 - }, - { - "epoch": 4.15, - "learning_rate": 2.6983297473921427e-05, - "loss": 0.3138, - "step": 6921500 - }, - { - "epoch": 4.15, - "learning_rate": 2.698119750836086e-05, - "loss": 0.3201, - "step": 6922000 - }, - { - "epoch": 4.15, - "learning_rate": 2.6979097542800297e-05, - "loss": 0.3108, - "step": 6922500 - }, - { - "epoch": 4.15, - "learning_rate": 2.6977001777170854e-05, - "loss": 0.313, - "step": 6923000 - }, - { - "epoch": 4.15, - "learning_rate": 2.697490181161029e-05, - "loss": 0.3091, - "step": 6923500 - }, - { - "epoch": 4.15, - "learning_rate": 2.6972801846049725e-05, - "loss": 0.3088, - "step": 6924000 - }, - { - "epoch": 4.15, - "learning_rate": 2.6970701880489158e-05, - "loss": 0.3121, - "step": 6924500 - }, - { - "epoch": 4.15, - "learning_rate": 2.6968606114859715e-05, - "loss": 0.3156, - "step": 6925000 - }, - { - "epoch": 4.15, - "learning_rate": 2.6966506149299152e-05, - "loss": 0.3092, - "step": 6925500 - }, - { - "epoch": 4.15, - "learning_rate": 2.696440618373859e-05, - "loss": 0.3129, - "step": 6926000 - }, - { - "epoch": 4.15, - "learning_rate": 2.696230621817802e-05, - "loss": 0.3132, - "step": 6926500 - }, - { - "epoch": 4.15, - "learning_rate": 2.6960206252617456e-05, - "loss": 0.3143, - "step": 6927000 - }, - { - "epoch": 4.15, - "learning_rate": 2.6958110486988013e-05, - "loss": 0.3237, - "step": 6927500 - }, - { - "epoch": 4.15, - "learning_rate": 2.695601052142745e-05, - "loss": 0.3125, - "step": 6928000 - }, - { - "epoch": 4.15, - "learning_rate": 2.6953910555866883e-05, - "loss": 0.3164, - "step": 6928500 - }, - { - "epoch": 4.15, - "learning_rate": 2.6951810590306316e-05, - "loss": 0.3093, - "step": 6929000 - }, - { - "epoch": 4.15, - "learning_rate": 2.6949710624745753e-05, - "loss": 0.3135, - "step": 6929500 - }, - { - "epoch": 4.15, - "learning_rate": 2.694761485911631e-05, - "loss": 0.3119, - "step": 6930000 - }, - { - "epoch": 4.16, - "learning_rate": 2.6945514893555747e-05, - "loss": 0.3072, - "step": 6930500 - }, - { - "epoch": 4.16, - "learning_rate": 2.694341492799518e-05, - "loss": 0.3034, - "step": 6931000 - }, - { - "epoch": 4.16, - "learning_rate": 2.6941314962434614e-05, - "loss": 0.3092, - "step": 6931500 - }, - { - "epoch": 4.16, - "learning_rate": 2.693921499687405e-05, - "loss": 0.3096, - "step": 6932000 - }, - { - "epoch": 4.16, - "learning_rate": 2.6937119231244608e-05, - "loss": 0.3158, - "step": 6932500 - }, - { - "epoch": 4.16, - "learning_rate": 2.6935019265684045e-05, - "loss": 0.3184, - "step": 6933000 - }, - { - "epoch": 4.16, - "learning_rate": 2.6932923500054598e-05, - "loss": 0.3166, - "step": 6933500 - }, - { - "epoch": 4.16, - "learning_rate": 2.6930823534494032e-05, - "loss": 0.3128, - "step": 6934000 - }, - { - "epoch": 4.16, - "learning_rate": 2.692872356893347e-05, - "loss": 0.3057, - "step": 6934500 - }, - { - "epoch": 4.16, - "learning_rate": 2.6926623603372905e-05, - "loss": 0.3111, - "step": 6935000 - }, - { - "epoch": 4.16, - "learning_rate": 2.692452363781234e-05, - "loss": 0.3015, - "step": 6935500 - }, - { - "epoch": 4.16, - "learning_rate": 2.6922423672251772e-05, - "loss": 0.3078, - "step": 6936000 - }, - { - "epoch": 4.16, - "learning_rate": 2.692032370669121e-05, - "loss": 0.3161, - "step": 6936500 - }, - { - "epoch": 4.16, - "learning_rate": 2.6918223741130646e-05, - "loss": 0.3181, - "step": 6937000 - }, - { - "epoch": 4.16, - "learning_rate": 2.691612377557008e-05, - "loss": 0.3162, - "step": 6937500 - }, - { - "epoch": 4.16, - "learning_rate": 2.6914023810009516e-05, - "loss": 0.3272, - "step": 6938000 - }, - { - "epoch": 4.16, - "learning_rate": 2.6911923844448953e-05, - "loss": 0.3137, - "step": 6938500 - }, - { - "epoch": 4.16, - "learning_rate": 2.6909823878888383e-05, - "loss": 0.3105, - "step": 6939000 - }, - { - "epoch": 4.16, - "learning_rate": 2.6907728113258944e-05, - "loss": 0.3165, - "step": 6939500 - }, - { - "epoch": 4.16, - "learning_rate": 2.6905628147698377e-05, - "loss": 0.3067, - "step": 6940000 - }, - { - "epoch": 4.16, - "learning_rate": 2.6903528182137814e-05, - "loss": 0.3185, - "step": 6940500 - }, - { - "epoch": 4.16, - "learning_rate": 2.690142821657725e-05, - "loss": 0.3068, - "step": 6941000 - }, - { - "epoch": 4.16, - "learning_rate": 2.6899332450947804e-05, - "loss": 0.3106, - "step": 6941500 - }, - { - "epoch": 4.16, - "learning_rate": 2.6897232485387238e-05, - "loss": 0.3161, - "step": 6942000 - }, - { - "epoch": 4.16, - "learning_rate": 2.6895132519826675e-05, - "loss": 0.3107, - "step": 6942500 - }, - { - "epoch": 4.16, - "learning_rate": 2.689303255426611e-05, - "loss": 0.3067, - "step": 6943000 - }, - { - "epoch": 4.16, - "learning_rate": 2.6890932588705545e-05, - "loss": 0.3067, - "step": 6943500 - }, - { - "epoch": 4.16, - "learning_rate": 2.688883262314498e-05, - "loss": 0.3165, - "step": 6944000 - }, - { - "epoch": 4.16, - "learning_rate": 2.6886736857515535e-05, - "loss": 0.3091, - "step": 6944500 - }, - { - "epoch": 4.16, - "learning_rate": 2.6884636891954972e-05, - "loss": 0.3174, - "step": 6945000 - }, - { - "epoch": 4.16, - "learning_rate": 2.688253692639441e-05, - "loss": 0.3053, - "step": 6945500 - }, - { - "epoch": 4.16, - "learning_rate": 2.688043696083384e-05, - "loss": 0.318, - "step": 6946000 - }, - { - "epoch": 4.16, - "learning_rate": 2.6878336995273276e-05, - "loss": 0.3107, - "step": 6946500 - }, - { - "epoch": 4.17, - "learning_rate": 2.6876241229643833e-05, - "loss": 0.3107, - "step": 6947000 - }, - { - "epoch": 4.17, - "learning_rate": 2.687414126408327e-05, - "loss": 0.3129, - "step": 6947500 - }, - { - "epoch": 4.17, - "learning_rate": 2.6872041298522707e-05, - "loss": 0.3119, - "step": 6948000 - }, - { - "epoch": 4.17, - "learning_rate": 2.6869941332962137e-05, - "loss": 0.3129, - "step": 6948500 - }, - { - "epoch": 4.17, - "learning_rate": 2.6867841367401574e-05, - "loss": 0.3186, - "step": 6949000 - }, - { - "epoch": 4.17, - "learning_rate": 2.686574560177213e-05, - "loss": 0.3047, - "step": 6949500 - }, - { - "epoch": 4.17, - "learning_rate": 2.6863645636211567e-05, - "loss": 0.3145, - "step": 6950000 - }, - { - "epoch": 4.17, - "learning_rate": 2.6861545670651e-05, - "loss": 0.322, - "step": 6950500 - }, - { - "epoch": 4.17, - "learning_rate": 2.6859445705090434e-05, - "loss": 0.3178, - "step": 6951000 - }, - { - "epoch": 4.17, - "learning_rate": 2.685734573952987e-05, - "loss": 0.3114, - "step": 6951500 - }, - { - "epoch": 4.17, - "learning_rate": 2.6855249973900428e-05, - "loss": 0.3135, - "step": 6952000 - }, - { - "epoch": 4.17, - "learning_rate": 2.6853150008339865e-05, - "loss": 0.3133, - "step": 6952500 - }, - { - "epoch": 4.17, - "learning_rate": 2.68510500427793e-05, - "loss": 0.3198, - "step": 6953000 - }, - { - "epoch": 4.17, - "learning_rate": 2.6848950077218732e-05, - "loss": 0.3054, - "step": 6953500 - }, - { - "epoch": 4.17, - "learning_rate": 2.684685011165817e-05, - "loss": 0.3092, - "step": 6954000 - }, - { - "epoch": 4.17, - "learning_rate": 2.6844750146097602e-05, - "loss": 0.3117, - "step": 6954500 - }, - { - "epoch": 4.17, - "learning_rate": 2.6842654380468163e-05, - "loss": 0.3135, - "step": 6955000 - }, - { - "epoch": 4.17, - "learning_rate": 2.6840554414907593e-05, - "loss": 0.3114, - "step": 6955500 - }, - { - "epoch": 4.17, - "learning_rate": 2.683845444934703e-05, - "loss": 0.3192, - "step": 6956000 - }, - { - "epoch": 4.17, - "learning_rate": 2.6836354483786466e-05, - "loss": 0.312, - "step": 6956500 - }, - { - "epoch": 4.17, - "learning_rate": 2.6834258718157023e-05, - "loss": 0.3151, - "step": 6957000 - }, - { - "epoch": 4.17, - "learning_rate": 2.6832158752596457e-05, - "loss": 0.3114, - "step": 6957500 - }, - { - "epoch": 4.17, - "learning_rate": 2.683005878703589e-05, - "loss": 0.3199, - "step": 6958000 - }, - { - "epoch": 4.17, - "learning_rate": 2.6827958821475327e-05, - "loss": 0.311, - "step": 6958500 - }, - { - "epoch": 4.17, - "learning_rate": 2.6825858855914764e-05, - "loss": 0.3226, - "step": 6959000 - }, - { - "epoch": 4.17, - "learning_rate": 2.6823758890354197e-05, - "loss": 0.3058, - "step": 6959500 - }, - { - "epoch": 4.17, - "learning_rate": 2.6821663124724754e-05, - "loss": 0.317, - "step": 6960000 - }, - { - "epoch": 4.17, - "learning_rate": 2.6819563159164188e-05, - "loss": 0.3136, - "step": 6960500 - }, - { - "epoch": 4.17, - "learning_rate": 2.6817463193603625e-05, - "loss": 0.3194, - "step": 6961000 - }, - { - "epoch": 4.17, - "learning_rate": 2.6815363228043058e-05, - "loss": 0.319, - "step": 6961500 - }, - { - "epoch": 4.17, - "learning_rate": 2.6813263262482495e-05, - "loss": 0.3153, - "step": 6962000 - }, - { - "epoch": 4.17, - "learning_rate": 2.6811163296921932e-05, - "loss": 0.312, - "step": 6962500 - }, - { - "epoch": 4.17, - "learning_rate": 2.6809063331361365e-05, - "loss": 0.3189, - "step": 6963000 - }, - { - "epoch": 4.17, - "learning_rate": 2.68069633658008e-05, - "loss": 0.305, - "step": 6963500 - }, - { - "epoch": 4.18, - "learning_rate": 2.6804867600171356e-05, - "loss": 0.3095, - "step": 6964000 - }, - { - "epoch": 4.18, - "learning_rate": 2.6802767634610792e-05, - "loss": 0.3096, - "step": 6964500 - }, - { - "epoch": 4.18, - "learning_rate": 2.680066766905023e-05, - "loss": 0.3225, - "step": 6965000 - }, - { - "epoch": 4.18, - "learning_rate": 2.679856770348966e-05, - "loss": 0.312, - "step": 6965500 - }, - { - "epoch": 4.18, - "learning_rate": 2.6796467737929096e-05, - "loss": 0.3095, - "step": 6966000 - }, - { - "epoch": 4.18, - "learning_rate": 2.6794367772368533e-05, - "loss": 0.3077, - "step": 6966500 - }, - { - "epoch": 4.18, - "learning_rate": 2.679227200673909e-05, - "loss": 0.3155, - "step": 6967000 - }, - { - "epoch": 4.18, - "learning_rate": 2.6790172041178527e-05, - "loss": 0.3123, - "step": 6967500 - }, - { - "epoch": 4.18, - "learning_rate": 2.6788072075617957e-05, - "loss": 0.3178, - "step": 6968000 - }, - { - "epoch": 4.18, - "learning_rate": 2.6785972110057394e-05, - "loss": 0.3024, - "step": 6968500 - }, - { - "epoch": 4.18, - "learning_rate": 2.678387634442795e-05, - "loss": 0.3219, - "step": 6969000 - }, - { - "epoch": 4.18, - "learning_rate": 2.6781776378867388e-05, - "loss": 0.3252, - "step": 6969500 - }, - { - "epoch": 4.18, - "learning_rate": 2.6779676413306824e-05, - "loss": 0.3056, - "step": 6970000 - }, - { - "epoch": 4.18, - "learning_rate": 2.6777576447746255e-05, - "loss": 0.3202, - "step": 6970500 - }, - { - "epoch": 4.18, - "learning_rate": 2.677547648218569e-05, - "loss": 0.3099, - "step": 6971000 - }, - { - "epoch": 4.18, - "learning_rate": 2.6773376516625128e-05, - "loss": 0.3152, - "step": 6971500 - }, - { - "epoch": 4.18, - "learning_rate": 2.6771280750995685e-05, - "loss": 0.309, - "step": 6972000 - }, - { - "epoch": 4.18, - "learning_rate": 2.676918078543512e-05, - "loss": 0.3203, - "step": 6972500 - }, - { - "epoch": 4.18, - "learning_rate": 2.6767080819874552e-05, - "loss": 0.305, - "step": 6973000 - }, - { - "epoch": 4.18, - "learning_rate": 2.676498085431399e-05, - "loss": 0.3086, - "step": 6973500 - }, - { - "epoch": 4.18, - "learning_rate": 2.6762880888753426e-05, - "loss": 0.31, - "step": 6974000 - }, - { - "epoch": 4.18, - "learning_rate": 2.6760785123123983e-05, - "loss": 0.3155, - "step": 6974500 - }, - { - "epoch": 4.18, - "learning_rate": 2.6758685157563413e-05, - "loss": 0.3103, - "step": 6975000 - }, - { - "epoch": 4.18, - "learning_rate": 2.675658519200285e-05, - "loss": 0.3134, - "step": 6975500 - }, - { - "epoch": 4.18, - "learning_rate": 2.6754485226442287e-05, - "loss": 0.3127, - "step": 6976000 - }, - { - "epoch": 4.18, - "learning_rate": 2.6752389460812843e-05, - "loss": 0.3067, - "step": 6976500 - }, - { - "epoch": 4.18, - "learning_rate": 2.675028949525228e-05, - "loss": 0.3096, - "step": 6977000 - }, - { - "epoch": 4.18, - "learning_rate": 2.674818952969171e-05, - "loss": 0.3102, - "step": 6977500 - }, - { - "epoch": 4.18, - "learning_rate": 2.6746089564131147e-05, - "loss": 0.32, - "step": 6978000 - }, - { - "epoch": 4.18, - "learning_rate": 2.6743989598570584e-05, - "loss": 0.3052, - "step": 6978500 - }, - { - "epoch": 4.18, - "learning_rate": 2.674189383294114e-05, - "loss": 0.3161, - "step": 6979000 - }, - { - "epoch": 4.18, - "learning_rate": 2.6739793867380575e-05, - "loss": 0.3084, - "step": 6979500 - }, - { - "epoch": 4.18, - "learning_rate": 2.6737693901820008e-05, - "loss": 0.3205, - "step": 6980000 - }, - { - "epoch": 4.19, - "learning_rate": 2.6735593936259445e-05, - "loss": 0.3189, - "step": 6980500 - }, - { - "epoch": 4.19, - "learning_rate": 2.673349397069888e-05, - "loss": 0.3188, - "step": 6981000 - }, - { - "epoch": 4.19, - "learning_rate": 2.6731394005138315e-05, - "loss": 0.3084, - "step": 6981500 - }, - { - "epoch": 4.19, - "learning_rate": 2.672929823950887e-05, - "loss": 0.3108, - "step": 6982000 - }, - { - "epoch": 4.19, - "learning_rate": 2.6727198273948306e-05, - "loss": 0.3215, - "step": 6982500 - }, - { - "epoch": 4.19, - "learning_rate": 2.6725098308387742e-05, - "loss": 0.3125, - "step": 6983000 - }, - { - "epoch": 4.19, - "learning_rate": 2.6722998342827176e-05, - "loss": 0.3119, - "step": 6983500 - }, - { - "epoch": 4.19, - "learning_rate": 2.6720898377266613e-05, - "loss": 0.3044, - "step": 6984000 - }, - { - "epoch": 4.19, - "learning_rate": 2.671879841170605e-05, - "loss": 0.3156, - "step": 6984500 - }, - { - "epoch": 4.19, - "learning_rate": 2.6716702646076603e-05, - "loss": 0.3268, - "step": 6985000 - }, - { - "epoch": 4.19, - "learning_rate": 2.671460268051604e-05, - "loss": 0.3136, - "step": 6985500 - }, - { - "epoch": 4.19, - "learning_rate": 2.6712506914886597e-05, - "loss": 0.312, - "step": 6986000 - }, - { - "epoch": 4.19, - "learning_rate": 2.671040694932603e-05, - "loss": 0.3176, - "step": 6986500 - }, - { - "epoch": 4.19, - "learning_rate": 2.6708306983765464e-05, - "loss": 0.3148, - "step": 6987000 - }, - { - "epoch": 4.19, - "learning_rate": 2.67062070182049e-05, - "loss": 0.3101, - "step": 6987500 - }, - { - "epoch": 4.19, - "learning_rate": 2.6704107052644338e-05, - "loss": 0.3156, - "step": 6988000 - }, - { - "epoch": 4.19, - "learning_rate": 2.670200708708377e-05, - "loss": 0.3157, - "step": 6988500 - }, - { - "epoch": 4.19, - "learning_rate": 2.6699907121523208e-05, - "loss": 0.3158, - "step": 6989000 - }, - { - "epoch": 4.19, - "learning_rate": 2.6697807155962645e-05, - "loss": 0.3177, - "step": 6989500 - }, - { - "epoch": 4.19, - "learning_rate": 2.6695707190402075e-05, - "loss": 0.3076, - "step": 6990000 - }, - { - "epoch": 4.19, - "learning_rate": 2.669360722484151e-05, - "loss": 0.3075, - "step": 6990500 - }, - { - "epoch": 4.19, - "learning_rate": 2.669151145921207e-05, - "loss": 0.3171, - "step": 6991000 - }, - { - "epoch": 4.19, - "learning_rate": 2.6689411493651505e-05, - "loss": 0.3133, - "step": 6991500 - }, - { - "epoch": 4.19, - "learning_rate": 2.668731152809094e-05, - "loss": 0.3157, - "step": 6992000 - }, - { - "epoch": 4.19, - "learning_rate": 2.6685211562530372e-05, - "loss": 0.3064, - "step": 6992500 - }, - { - "epoch": 4.19, - "learning_rate": 2.668311159696981e-05, - "loss": 0.3056, - "step": 6993000 - }, - { - "epoch": 4.19, - "learning_rate": 2.6681011631409246e-05, - "loss": 0.3079, - "step": 6993500 - }, - { - "epoch": 4.19, - "learning_rate": 2.6678915865779803e-05, - "loss": 0.3181, - "step": 6994000 - }, - { - "epoch": 4.19, - "learning_rate": 2.6676820100150357e-05, - "loss": 0.3045, - "step": 6994500 - }, - { - "epoch": 4.19, - "learning_rate": 2.6674720134589793e-05, - "loss": 0.3052, - "step": 6995000 - }, - { - "epoch": 4.19, - "learning_rate": 2.6672620169029227e-05, - "loss": 0.3127, - "step": 6995500 - }, - { - "epoch": 4.19, - "learning_rate": 2.6670520203468664e-05, - "loss": 0.3154, - "step": 6996000 - }, - { - "epoch": 4.19, - "learning_rate": 2.66684202379081e-05, - "loss": 0.3191, - "step": 6996500 - }, - { - "epoch": 4.19, - "learning_rate": 2.666632027234753e-05, - "loss": 0.3089, - "step": 6997000 - }, - { - "epoch": 4.2, - "learning_rate": 2.6664220306786967e-05, - "loss": 0.3109, - "step": 6997500 - }, - { - "epoch": 4.2, - "learning_rate": 2.6662120341226404e-05, - "loss": 0.3116, - "step": 6998000 - }, - { - "epoch": 4.2, - "learning_rate": 2.6660020375665838e-05, - "loss": 0.3208, - "step": 6998500 - }, - { - "epoch": 4.2, - "learning_rate": 2.6657920410105275e-05, - "loss": 0.3213, - "step": 6999000 - }, - { - "epoch": 4.2, - "learning_rate": 2.6655824644475828e-05, - "loss": 0.3089, - "step": 6999500 - }, - { - "epoch": 4.2, - "learning_rate": 2.6653724678915265e-05, - "loss": 0.3125, - "step": 7000000 - }, - { - "epoch": 4.2, - "eval_loss": 0.31092390418052673, - "eval_runtime": 1121.2076, - "eval_samples_per_second": 469.779, - "eval_steps_per_second": 78.297, - "step": 7000000 - }, - { - "epoch": 4.2, - "learning_rate": 2.6651624713354702e-05, - "loss": 0.307, - "step": 7000500 - }, - { - "epoch": 4.2, - "learning_rate": 2.6649524747794135e-05, - "loss": 0.3215, - "step": 7001000 - }, - { - "epoch": 4.2, - "learning_rate": 2.6647424782233572e-05, - "loss": 0.3121, - "step": 7001500 - }, - { - "epoch": 4.2, - "learning_rate": 2.6645324816673006e-05, - "loss": 0.3098, - "step": 7002000 - }, - { - "epoch": 4.2, - "learning_rate": 2.664322485111244e-05, - "loss": 0.3054, - "step": 7002500 - }, - { - "epoch": 4.2, - "learning_rate": 2.6641124885551876e-05, - "loss": 0.3159, - "step": 7003000 - }, - { - "epoch": 4.2, - "learning_rate": 2.6639033319853556e-05, - "loss": 0.3025, - "step": 7003500 - }, - { - "epoch": 4.2, - "learning_rate": 2.6636933354292987e-05, - "loss": 0.3035, - "step": 7004000 - }, - { - "epoch": 4.2, - "learning_rate": 2.6634833388732423e-05, - "loss": 0.3081, - "step": 7004500 - }, - { - "epoch": 4.2, - "learning_rate": 2.663273342317186e-05, - "loss": 0.3117, - "step": 7005000 - }, - { - "epoch": 4.2, - "learning_rate": 2.6630633457611294e-05, - "loss": 0.3099, - "step": 7005500 - }, - { - "epoch": 4.2, - "learning_rate": 2.662853349205073e-05, - "loss": 0.3142, - "step": 7006000 - }, - { - "epoch": 4.2, - "learning_rate": 2.6626437726421284e-05, - "loss": 0.3209, - "step": 7006500 - }, - { - "epoch": 4.2, - "learning_rate": 2.662433776086072e-05, - "loss": 0.3215, - "step": 7007000 - }, - { - "epoch": 4.2, - "learning_rate": 2.6622237795300158e-05, - "loss": 0.3104, - "step": 7007500 - }, - { - "epoch": 4.2, - "learning_rate": 2.662013782973959e-05, - "loss": 0.3125, - "step": 7008000 - }, - { - "epoch": 4.2, - "learning_rate": 2.6618037864179028e-05, - "loss": 0.3087, - "step": 7008500 - }, - { - "epoch": 4.2, - "learning_rate": 2.661594209854958e-05, - "loss": 0.316, - "step": 7009000 - }, - { - "epoch": 4.2, - "learning_rate": 2.661384213298902e-05, - "loss": 0.3148, - "step": 7009500 - }, - { - "epoch": 4.2, - "learning_rate": 2.6611742167428455e-05, - "loss": 0.3165, - "step": 7010000 - }, - { - "epoch": 4.2, - "learning_rate": 2.660964220186789e-05, - "loss": 0.3206, - "step": 7010500 - }, - { - "epoch": 4.2, - "learning_rate": 2.6607542236307326e-05, - "loss": 0.317, - "step": 7011000 - }, - { - "epoch": 4.2, - "learning_rate": 2.660544647067788e-05, - "loss": 0.3151, - "step": 7011500 - }, - { - "epoch": 4.2, - "learning_rate": 2.6603346505117316e-05, - "loss": 0.3112, - "step": 7012000 - }, - { - "epoch": 4.2, - "learning_rate": 2.660124653955675e-05, - "loss": 0.312, - "step": 7012500 - }, - { - "epoch": 4.2, - "learning_rate": 2.6599146573996186e-05, - "loss": 0.3151, - "step": 7013000 - }, - { - "epoch": 4.2, - "learning_rate": 2.6597046608435623e-05, - "loss": 0.3219, - "step": 7013500 - }, - { - "epoch": 4.21, - "learning_rate": 2.6594950842806177e-05, - "loss": 0.3085, - "step": 7014000 - }, - { - "epoch": 4.21, - "learning_rate": 2.6592850877245614e-05, - "loss": 0.3063, - "step": 7014500 - }, - { - "epoch": 4.21, - "learning_rate": 2.6590750911685047e-05, - "loss": 0.318, - "step": 7015000 - }, - { - "epoch": 4.21, - "learning_rate": 2.6588650946124484e-05, - "loss": 0.314, - "step": 7015500 - }, - { - "epoch": 4.21, - "learning_rate": 2.658655098056392e-05, - "loss": 0.3106, - "step": 7016000 - }, - { - "epoch": 4.21, - "learning_rate": 2.658445101500335e-05, - "loss": 0.3125, - "step": 7016500 - }, - { - "epoch": 4.21, - "learning_rate": 2.658235524937391e-05, - "loss": 0.3221, - "step": 7017000 - }, - { - "epoch": 4.21, - "learning_rate": 2.6580255283813345e-05, - "loss": 0.3114, - "step": 7017500 - }, - { - "epoch": 4.21, - "learning_rate": 2.657815531825278e-05, - "loss": 0.3128, - "step": 7018000 - }, - { - "epoch": 4.21, - "learning_rate": 2.6576055352692215e-05, - "loss": 0.312, - "step": 7018500 - }, - { - "epoch": 4.21, - "learning_rate": 2.6573959587062772e-05, - "loss": 0.3176, - "step": 7019000 - }, - { - "epoch": 4.21, - "learning_rate": 2.6571859621502205e-05, - "loss": 0.3044, - "step": 7019500 - }, - { - "epoch": 4.21, - "learning_rate": 2.6569759655941642e-05, - "loss": 0.3225, - "step": 7020000 - }, - { - "epoch": 4.21, - "learning_rate": 2.656765969038108e-05, - "loss": 0.3113, - "step": 7020500 - }, - { - "epoch": 4.21, - "learning_rate": 2.6565559724820513e-05, - "loss": 0.3121, - "step": 7021000 - }, - { - "epoch": 4.21, - "learning_rate": 2.6563459759259946e-05, - "loss": 0.3132, - "step": 7021500 - }, - { - "epoch": 4.21, - "learning_rate": 2.6561359793699383e-05, - "loss": 0.3165, - "step": 7022000 - }, - { - "epoch": 4.21, - "learning_rate": 2.655926402806994e-05, - "loss": 0.313, - "step": 7022500 - }, - { - "epoch": 4.21, - "learning_rate": 2.6557164062509377e-05, - "loss": 0.3018, - "step": 7023000 - }, - { - "epoch": 4.21, - "learning_rate": 2.6555064096948807e-05, - "loss": 0.3176, - "step": 7023500 - }, - { - "epoch": 4.21, - "learning_rate": 2.6552964131388244e-05, - "loss": 0.3143, - "step": 7024000 - }, - { - "epoch": 4.21, - "learning_rate": 2.655086416582768e-05, - "loss": 0.3013, - "step": 7024500 - }, - { - "epoch": 4.21, - "learning_rate": 2.6548764200267117e-05, - "loss": 0.3104, - "step": 7025000 - }, - { - "epoch": 4.21, - "learning_rate": 2.6546668434637674e-05, - "loss": 0.3198, - "step": 7025500 - }, - { - "epoch": 4.21, - "learning_rate": 2.6544568469077104e-05, - "loss": 0.3102, - "step": 7026000 - }, - { - "epoch": 4.21, - "learning_rate": 2.654246850351654e-05, - "loss": 0.3096, - "step": 7026500 - }, - { - "epoch": 4.21, - "learning_rate": 2.6540368537955978e-05, - "loss": 0.3112, - "step": 7027000 - }, - { - "epoch": 4.21, - "learning_rate": 2.653826857239541e-05, - "loss": 0.3153, - "step": 7027500 - }, - { - "epoch": 4.21, - "learning_rate": 2.653616860683485e-05, - "loss": 0.3138, - "step": 7028000 - }, - { - "epoch": 4.21, - "learning_rate": 2.6534072841205402e-05, - "loss": 0.3117, - "step": 7028500 - }, - { - "epoch": 4.21, - "learning_rate": 2.653197287564484e-05, - "loss": 0.3107, - "step": 7029000 - }, - { - "epoch": 4.21, - "learning_rate": 2.6529872910084276e-05, - "loss": 0.3167, - "step": 7029500 - }, - { - "epoch": 4.21, - "learning_rate": 2.652777294452371e-05, - "loss": 0.3225, - "step": 7030000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6525672978963146e-05, - "loss": 0.3131, - "step": 7030500 - }, - { - "epoch": 4.22, - "learning_rate": 2.65235772133337e-05, - "loss": 0.3144, - "step": 7031000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6521477247773136e-05, - "loss": 0.31, - "step": 7031500 - }, - { - "epoch": 4.22, - "learning_rate": 2.6519377282212573e-05, - "loss": 0.3061, - "step": 7032000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6517277316652007e-05, - "loss": 0.3086, - "step": 7032500 - }, - { - "epoch": 4.22, - "learning_rate": 2.6515177351091444e-05, - "loss": 0.3102, - "step": 7033000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6513077385530877e-05, - "loss": 0.3193, - "step": 7033500 - }, - { - "epoch": 4.22, - "learning_rate": 2.651097741997031e-05, - "loss": 0.3241, - "step": 7034000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6508881654340867e-05, - "loss": 0.3138, - "step": 7034500 - }, - { - "epoch": 4.22, - "learning_rate": 2.6506781688780304e-05, - "loss": 0.3236, - "step": 7035000 - }, - { - "epoch": 4.22, - "learning_rate": 2.650468172321974e-05, - "loss": 0.3069, - "step": 7035500 - }, - { - "epoch": 4.22, - "learning_rate": 2.6502581757659175e-05, - "loss": 0.3146, - "step": 7036000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6500481792098608e-05, - "loss": 0.3124, - "step": 7036500 - }, - { - "epoch": 4.22, - "learning_rate": 2.6498381826538045e-05, - "loss": 0.299, - "step": 7037000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6496286060908602e-05, - "loss": 0.3127, - "step": 7037500 - }, - { - "epoch": 4.22, - "learning_rate": 2.6494186095348035e-05, - "loss": 0.3141, - "step": 7038000 - }, - { - "epoch": 4.22, - "learning_rate": 2.649208612978747e-05, - "loss": 0.3158, - "step": 7038500 - }, - { - "epoch": 4.22, - "learning_rate": 2.6489986164226906e-05, - "loss": 0.3154, - "step": 7039000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6487890398597463e-05, - "loss": 0.3138, - "step": 7039500 - }, - { - "epoch": 4.22, - "learning_rate": 2.64857904330369e-05, - "loss": 0.3054, - "step": 7040000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6483690467476333e-05, - "loss": 0.3208, - "step": 7040500 - }, - { - "epoch": 4.22, - "learning_rate": 2.6481590501915766e-05, - "loss": 0.3151, - "step": 7041000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6479490536355203e-05, - "loss": 0.3143, - "step": 7041500 - }, - { - "epoch": 4.22, - "learning_rate": 2.647739057079464e-05, - "loss": 0.3116, - "step": 7042000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6475294805165197e-05, - "loss": 0.32, - "step": 7042500 - }, - { - "epoch": 4.22, - "learning_rate": 2.647319483960463e-05, - "loss": 0.315, - "step": 7043000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6471094874044064e-05, - "loss": 0.3155, - "step": 7043500 - }, - { - "epoch": 4.22, - "learning_rate": 2.64689949084835e-05, - "loss": 0.3111, - "step": 7044000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6466894942922938e-05, - "loss": 0.314, - "step": 7044500 - }, - { - "epoch": 4.22, - "learning_rate": 2.646479497736237e-05, - "loss": 0.3094, - "step": 7045000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6462695011801804e-05, - "loss": 0.3142, - "step": 7045500 - }, - { - "epoch": 4.22, - "learning_rate": 2.646059924617236e-05, - "loss": 0.3228, - "step": 7046000 - }, - { - "epoch": 4.22, - "learning_rate": 2.6458499280611798e-05, - "loss": 0.309, - "step": 7046500 - }, - { - "epoch": 4.22, - "learning_rate": 2.6456399315051232e-05, - "loss": 0.3095, - "step": 7047000 - }, - { - "epoch": 4.23, - "learning_rate": 2.645429934949067e-05, - "loss": 0.3223, - "step": 7047500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6452203583861222e-05, - "loss": 0.3141, - "step": 7048000 - }, - { - "epoch": 4.23, - "learning_rate": 2.645010361830066e-05, - "loss": 0.3082, - "step": 7048500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6448003652740096e-05, - "loss": 0.3136, - "step": 7049000 - }, - { - "epoch": 4.23, - "learning_rate": 2.644590368717953e-05, - "loss": 0.3111, - "step": 7049500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6443803721618966e-05, - "loss": 0.3141, - "step": 7050000 - }, - { - "epoch": 4.23, - "learning_rate": 2.644170795598952e-05, - "loss": 0.3162, - "step": 7050500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6439607990428957e-05, - "loss": 0.3155, - "step": 7051000 - }, - { - "epoch": 4.23, - "learning_rate": 2.6437508024868393e-05, - "loss": 0.3098, - "step": 7051500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6435408059307827e-05, - "loss": 0.3133, - "step": 7052000 - }, - { - "epoch": 4.23, - "learning_rate": 2.6433308093747264e-05, - "loss": 0.3116, - "step": 7052500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6431208128186697e-05, - "loss": 0.3084, - "step": 7053000 - }, - { - "epoch": 4.23, - "learning_rate": 2.6429112362557254e-05, - "loss": 0.3198, - "step": 7053500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6427012396996688e-05, - "loss": 0.3142, - "step": 7054000 - }, - { - "epoch": 4.23, - "learning_rate": 2.6424912431436124e-05, - "loss": 0.318, - "step": 7054500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6422812465875558e-05, - "loss": 0.3124, - "step": 7055000 - }, - { - "epoch": 4.23, - "learning_rate": 2.6420712500314995e-05, - "loss": 0.3231, - "step": 7055500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6418612534754428e-05, - "loss": 0.3082, - "step": 7056000 - }, - { - "epoch": 4.23, - "learning_rate": 2.6416512569193865e-05, - "loss": 0.31, - "step": 7056500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6414412603633302e-05, - "loss": 0.3134, - "step": 7057000 - }, - { - "epoch": 4.23, - "learning_rate": 2.6412321037934976e-05, - "loss": 0.3226, - "step": 7057500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6410221072374412e-05, - "loss": 0.309, - "step": 7058000 - }, - { - "epoch": 4.23, - "learning_rate": 2.640812110681385e-05, - "loss": 0.3109, - "step": 7058500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6406021141253283e-05, - "loss": 0.3075, - "step": 7059000 - }, - { - "epoch": 4.23, - "learning_rate": 2.6403925375623836e-05, - "loss": 0.3118, - "step": 7059500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6401825410063273e-05, - "loss": 0.319, - "step": 7060000 - }, - { - "epoch": 4.23, - "learning_rate": 2.639972544450271e-05, - "loss": 0.3149, - "step": 7060500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6397625478942147e-05, - "loss": 0.3127, - "step": 7061000 - }, - { - "epoch": 4.23, - "learning_rate": 2.639552551338158e-05, - "loss": 0.3106, - "step": 7061500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6393425547821017e-05, - "loss": 0.3126, - "step": 7062000 - }, - { - "epoch": 4.23, - "learning_rate": 2.639132558226045e-05, - "loss": 0.3164, - "step": 7062500 - }, - { - "epoch": 4.23, - "learning_rate": 2.6389225616699884e-05, - "loss": 0.3084, - "step": 7063000 - }, - { - "epoch": 4.23, - "learning_rate": 2.638712985107044e-05, - "loss": 0.3127, - "step": 7063500 - }, - { - "epoch": 4.24, - "learning_rate": 2.6385029885509878e-05, - "loss": 0.3173, - "step": 7064000 - }, - { - "epoch": 4.24, - "learning_rate": 2.6382929919949315e-05, - "loss": 0.3105, - "step": 7064500 - }, - { - "epoch": 4.24, - "learning_rate": 2.6380829954388748e-05, - "loss": 0.3101, - "step": 7065000 - }, - { - "epoch": 4.24, - "learning_rate": 2.6378729988828182e-05, - "loss": 0.3139, - "step": 7065500 - }, - { - "epoch": 4.24, - "learning_rate": 2.637663002326762e-05, - "loss": 0.3136, - "step": 7066000 - }, - { - "epoch": 4.24, - "learning_rate": 2.6374530057707055e-05, - "loss": 0.3108, - "step": 7066500 - }, - { - "epoch": 4.24, - "learning_rate": 2.637243429207761e-05, - "loss": 0.3176, - "step": 7067000 - }, - { - "epoch": 4.24, - "learning_rate": 2.6370334326517042e-05, - "loss": 0.3264, - "step": 7067500 - }, - { - "epoch": 4.24, - "learning_rate": 2.636823436095648e-05, - "loss": 0.3063, - "step": 7068000 - }, - { - "epoch": 4.24, - "learning_rate": 2.6366134395395916e-05, - "loss": 0.3142, - "step": 7068500 - }, - { - "epoch": 4.24, - "learning_rate": 2.636403442983535e-05, - "loss": 0.3114, - "step": 7069000 - }, - { - "epoch": 4.24, - "learning_rate": 2.6361934464274786e-05, - "loss": 0.3125, - "step": 7069500 - }, - { - "epoch": 4.24, - "learning_rate": 2.635983449871422e-05, - "loss": 0.3144, - "step": 7070000 - }, - { - "epoch": 4.24, - "learning_rate": 2.6357734533153657e-05, - "loss": 0.3074, - "step": 7070500 - }, - { - "epoch": 4.24, - "learning_rate": 2.6355638767524214e-05, - "loss": 0.314, - "step": 7071000 - }, - { - "epoch": 4.24, - "learning_rate": 2.6353538801963647e-05, - "loss": 0.3147, - "step": 7071500 - }, - { - "epoch": 4.24, - "learning_rate": 2.6351443036334204e-05, - "loss": 0.3189, - "step": 7072000 - }, - { - "epoch": 4.24, - "learning_rate": 2.6349343070773638e-05, - "loss": 0.3166, - "step": 7072500 - }, - { - "epoch": 4.24, - "learning_rate": 2.6347243105213074e-05, - "loss": 0.3171, - "step": 7073000 - }, - { - "epoch": 4.24, - "learning_rate": 2.634514313965251e-05, - "loss": 0.3113, - "step": 7073500 - }, - { - "epoch": 4.24, - "learning_rate": 2.6343043174091945e-05, - "loss": 0.3192, - "step": 7074000 - }, - { - "epoch": 4.24, - "learning_rate": 2.63409474084625e-05, - "loss": 0.3133, - "step": 7074500 - }, - { - "epoch": 4.24, - "learning_rate": 2.6338847442901935e-05, - "loss": 0.3156, - "step": 7075000 - }, - { - "epoch": 4.24, - "learning_rate": 2.6336747477341372e-05, - "loss": 0.308, - "step": 7075500 - }, - { - "epoch": 4.24, - "learning_rate": 2.6334647511780805e-05, - "loss": 0.3189, - "step": 7076000 - }, - { - "epoch": 4.24, - "learning_rate": 2.6332547546220242e-05, - "loss": 0.3106, - "step": 7076500 - }, - { - "epoch": 4.24, - "learning_rate": 2.6330447580659676e-05, - "loss": 0.3024, - "step": 7077000 - }, - { - "epoch": 4.24, - "learning_rate": 2.6328347615099113e-05, - "loss": 0.3079, - "step": 7077500 - }, - { - "epoch": 4.24, - "learning_rate": 2.6326247649538546e-05, - "loss": 0.3144, - "step": 7078000 - }, - { - "epoch": 4.24, - "learning_rate": 2.6324151883909103e-05, - "loss": 0.3188, - "step": 7078500 - }, - { - "epoch": 4.24, - "learning_rate": 2.632205611827966e-05, - "loss": 0.3217, - "step": 7079000 - }, - { - "epoch": 4.24, - "learning_rate": 2.6319956152719093e-05, - "loss": 0.3169, - "step": 7079500 - }, - { - "epoch": 4.24, - "learning_rate": 2.631785618715853e-05, - "loss": 0.3133, - "step": 7080000 - }, - { - "epoch": 4.25, - "learning_rate": 2.6315756221597967e-05, - "loss": 0.312, - "step": 7080500 - }, - { - "epoch": 4.25, - "learning_rate": 2.6313660455968524e-05, - "loss": 0.312, - "step": 7081000 - }, - { - "epoch": 4.25, - "learning_rate": 2.6311560490407954e-05, - "loss": 0.3151, - "step": 7081500 - }, - { - "epoch": 4.25, - "learning_rate": 2.630946052484739e-05, - "loss": 0.3106, - "step": 7082000 - }, - { - "epoch": 4.25, - "learning_rate": 2.6307360559286828e-05, - "loss": 0.3139, - "step": 7082500 - }, - { - "epoch": 4.25, - "learning_rate": 2.630526059372626e-05, - "loss": 0.304, - "step": 7083000 - }, - { - "epoch": 4.25, - "learning_rate": 2.630316482809682e-05, - "loss": 0.3148, - "step": 7083500 - }, - { - "epoch": 4.25, - "learning_rate": 2.6301064862536252e-05, - "loss": 0.3101, - "step": 7084000 - }, - { - "epoch": 4.25, - "learning_rate": 2.629896489697569e-05, - "loss": 0.3147, - "step": 7084500 - }, - { - "epoch": 4.25, - "learning_rate": 2.6296864931415125e-05, - "loss": 0.3224, - "step": 7085000 - }, - { - "epoch": 4.25, - "learning_rate": 2.629476496585456e-05, - "loss": 0.3148, - "step": 7085500 - }, - { - "epoch": 4.25, - "learning_rate": 2.6292669200225116e-05, - "loss": 0.3027, - "step": 7086000 - }, - { - "epoch": 4.25, - "learning_rate": 2.629056923466455e-05, - "loss": 0.3112, - "step": 7086500 - }, - { - "epoch": 4.25, - "learning_rate": 2.6288469269103986e-05, - "loss": 0.3171, - "step": 7087000 - }, - { - "epoch": 4.25, - "learning_rate": 2.6286369303543423e-05, - "loss": 0.3234, - "step": 7087500 - }, - { - "epoch": 4.25, - "learning_rate": 2.628427353791398e-05, - "loss": 0.3141, - "step": 7088000 - }, - { - "epoch": 4.25, - "learning_rate": 2.6282177772284534e-05, - "loss": 0.3112, - "step": 7088500 - }, - { - "epoch": 4.25, - "learning_rate": 2.628007780672397e-05, - "loss": 0.3137, - "step": 7089000 - }, - { - "epoch": 4.25, - "learning_rate": 2.6277977841163404e-05, - "loss": 0.3141, - "step": 7089500 - }, - { - "epoch": 4.25, - "learning_rate": 2.627587787560284e-05, - "loss": 0.31, - "step": 7090000 - }, - { - "epoch": 4.25, - "learning_rate": 2.6273777910042278e-05, - "loss": 0.3134, - "step": 7090500 - }, - { - "epoch": 4.25, - "learning_rate": 2.6271677944481708e-05, - "loss": 0.3195, - "step": 7091000 - }, - { - "epoch": 4.25, - "learning_rate": 2.6269577978921144e-05, - "loss": 0.3045, - "step": 7091500 - }, - { - "epoch": 4.25, - "learning_rate": 2.626747801336058e-05, - "loss": 0.3085, - "step": 7092000 - }, - { - "epoch": 4.25, - "learning_rate": 2.6265382247731138e-05, - "loss": 0.3074, - "step": 7092500 - }, - { - "epoch": 4.25, - "learning_rate": 2.6263282282170572e-05, - "loss": 0.3051, - "step": 7093000 - }, - { - "epoch": 4.25, - "learning_rate": 2.6261182316610005e-05, - "loss": 0.3261, - "step": 7093500 - }, - { - "epoch": 4.25, - "learning_rate": 2.6259082351049442e-05, - "loss": 0.3142, - "step": 7094000 - }, - { - "epoch": 4.25, - "learning_rate": 2.625698238548888e-05, - "loss": 0.3127, - "step": 7094500 - }, - { - "epoch": 4.25, - "learning_rate": 2.6254882419928312e-05, - "loss": 0.3036, - "step": 7095000 - }, - { - "epoch": 4.25, - "learning_rate": 2.625278245436775e-05, - "loss": 0.3044, - "step": 7095500 - }, - { - "epoch": 4.25, - "learning_rate": 2.6250686688738303e-05, - "loss": 0.3113, - "step": 7096000 - }, - { - "epoch": 4.25, - "learning_rate": 2.624858672317774e-05, - "loss": 0.3191, - "step": 7096500 - }, - { - "epoch": 4.25, - "learning_rate": 2.6246486757617176e-05, - "loss": 0.3235, - "step": 7097000 - }, - { - "epoch": 4.26, - "learning_rate": 2.624438679205661e-05, - "loss": 0.3117, - "step": 7097500 - }, - { - "epoch": 4.26, - "learning_rate": 2.6242286826496047e-05, - "loss": 0.316, - "step": 7098000 - }, - { - "epoch": 4.26, - "learning_rate": 2.624018686093548e-05, - "loss": 0.3156, - "step": 7098500 - }, - { - "epoch": 4.26, - "learning_rate": 2.6238086895374914e-05, - "loss": 0.3157, - "step": 7099000 - }, - { - "epoch": 4.26, - "learning_rate": 2.623599112974547e-05, - "loss": 0.3099, - "step": 7099500 - }, - { - "epoch": 4.26, - "learning_rate": 2.6233891164184908e-05, - "loss": 0.3081, - "step": 7100000 - }, - { - "epoch": 4.26, - "eval_loss": 0.30983468890190125, - "eval_runtime": 1121.5578, - "eval_samples_per_second": 469.632, - "eval_steps_per_second": 78.272, - "step": 7100000 - }, - { - "epoch": 4.26, - "learning_rate": 2.6231791198624344e-05, - "loss": 0.3221, - "step": 7100500 - }, - { - "epoch": 4.26, - "learning_rate": 2.6229691233063778e-05, - "loss": 0.3121, - "step": 7101000 - }, - { - "epoch": 4.26, - "learning_rate": 2.622759126750321e-05, - "loss": 0.3133, - "step": 7101500 - }, - { - "epoch": 4.26, - "learning_rate": 2.6225491301942648e-05, - "loss": 0.3157, - "step": 7102000 - }, - { - "epoch": 4.26, - "learning_rate": 2.6223391336382085e-05, - "loss": 0.3139, - "step": 7102500 - }, - { - "epoch": 4.26, - "learning_rate": 2.622129137082152e-05, - "loss": 0.318, - "step": 7103000 - }, - { - "epoch": 4.26, - "learning_rate": 2.6219195605192072e-05, - "loss": 0.3134, - "step": 7103500 - }, - { - "epoch": 4.26, - "learning_rate": 2.621709563963151e-05, - "loss": 0.3118, - "step": 7104000 - }, - { - "epoch": 4.26, - "learning_rate": 2.6214995674070946e-05, - "loss": 0.3286, - "step": 7104500 - }, - { - "epoch": 4.26, - "learning_rate": 2.621289570851038e-05, - "loss": 0.3036, - "step": 7105000 - }, - { - "epoch": 4.26, - "learning_rate": 2.6210799942880936e-05, - "loss": 0.3131, - "step": 7105500 - }, - { - "epoch": 4.26, - "learning_rate": 2.6208704177251493e-05, - "loss": 0.3302, - "step": 7106000 - }, - { - "epoch": 4.26, - "learning_rate": 2.6206604211690927e-05, - "loss": 0.3205, - "step": 7106500 - }, - { - "epoch": 4.26, - "learning_rate": 2.6204504246130363e-05, - "loss": 0.3168, - "step": 7107000 - }, - { - "epoch": 4.26, - "learning_rate": 2.62024042805698e-05, - "loss": 0.3141, - "step": 7107500 - }, - { - "epoch": 4.26, - "learning_rate": 2.6200304315009234e-05, - "loss": 0.3099, - "step": 7108000 - }, - { - "epoch": 4.26, - "learning_rate": 2.6198204349448667e-05, - "loss": 0.3053, - "step": 7108500 - }, - { - "epoch": 4.26, - "learning_rate": 2.6196108583819224e-05, - "loss": 0.3216, - "step": 7109000 - }, - { - "epoch": 4.26, - "learning_rate": 2.619400861825866e-05, - "loss": 0.3091, - "step": 7109500 - }, - { - "epoch": 4.26, - "learning_rate": 2.6191908652698098e-05, - "loss": 0.3069, - "step": 7110000 - }, - { - "epoch": 4.26, - "learning_rate": 2.6189808687137528e-05, - "loss": 0.3097, - "step": 7110500 - }, - { - "epoch": 4.26, - "learning_rate": 2.6187708721576965e-05, - "loss": 0.3131, - "step": 7111000 - }, - { - "epoch": 4.26, - "learning_rate": 2.6185612955947522e-05, - "loss": 0.3177, - "step": 7111500 - }, - { - "epoch": 4.26, - "learning_rate": 2.618351299038696e-05, - "loss": 0.3024, - "step": 7112000 - }, - { - "epoch": 4.26, - "learning_rate": 2.6181413024826395e-05, - "loss": 0.3166, - "step": 7112500 - }, - { - "epoch": 4.26, - "learning_rate": 2.6179313059265825e-05, - "loss": 0.3103, - "step": 7113000 - }, - { - "epoch": 4.26, - "learning_rate": 2.6177213093705262e-05, - "loss": 0.314, - "step": 7113500 - }, - { - "epoch": 4.27, - "learning_rate": 2.61751131281447e-05, - "loss": 0.3148, - "step": 7114000 - }, - { - "epoch": 4.27, - "learning_rate": 2.6173013162584133e-05, - "loss": 0.3151, - "step": 7114500 - }, - { - "epoch": 4.27, - "learning_rate": 2.617091319702357e-05, - "loss": 0.307, - "step": 7115000 - }, - { - "epoch": 4.27, - "learning_rate": 2.6168817431394123e-05, - "loss": 0.3118, - "step": 7115500 - }, - { - "epoch": 4.27, - "learning_rate": 2.616671746583356e-05, - "loss": 0.3066, - "step": 7116000 - }, - { - "epoch": 4.27, - "learning_rate": 2.6164617500272997e-05, - "loss": 0.3161, - "step": 7116500 - }, - { - "epoch": 4.27, - "learning_rate": 2.616251753471243e-05, - "loss": 0.3242, - "step": 7117000 - }, - { - "epoch": 4.27, - "learning_rate": 2.6160417569151867e-05, - "loss": 0.3127, - "step": 7117500 - }, - { - "epoch": 4.27, - "learning_rate": 2.615832180352242e-05, - "loss": 0.3095, - "step": 7118000 - }, - { - "epoch": 4.27, - "learning_rate": 2.6156221837961857e-05, - "loss": 0.3085, - "step": 7118500 - }, - { - "epoch": 4.27, - "learning_rate": 2.615412187240129e-05, - "loss": 0.3134, - "step": 7119000 - }, - { - "epoch": 4.27, - "learning_rate": 2.6152021906840728e-05, - "loss": 0.3204, - "step": 7119500 - }, - { - "epoch": 4.27, - "learning_rate": 2.6149921941280165e-05, - "loss": 0.3126, - "step": 7120000 - }, - { - "epoch": 4.27, - "learning_rate": 2.6147826175650718e-05, - "loss": 0.3139, - "step": 7120500 - }, - { - "epoch": 4.27, - "learning_rate": 2.6145726210090155e-05, - "loss": 0.304, - "step": 7121000 - }, - { - "epoch": 4.27, - "learning_rate": 2.614362624452959e-05, - "loss": 0.3174, - "step": 7121500 - }, - { - "epoch": 4.27, - "learning_rate": 2.6141526278969025e-05, - "loss": 0.317, - "step": 7122000 - }, - { - "epoch": 4.27, - "learning_rate": 2.613943051333958e-05, - "loss": 0.3118, - "step": 7122500 - }, - { - "epoch": 4.27, - "learning_rate": 2.6137330547779016e-05, - "loss": 0.3118, - "step": 7123000 - }, - { - "epoch": 4.27, - "learning_rate": 2.6135230582218453e-05, - "loss": 0.3152, - "step": 7123500 - }, - { - "epoch": 4.27, - "learning_rate": 2.6133130616657886e-05, - "loss": 0.3094, - "step": 7124000 - }, - { - "epoch": 4.27, - "learning_rate": 2.6131030651097323e-05, - "loss": 0.3167, - "step": 7124500 - }, - { - "epoch": 4.27, - "learning_rate": 2.6128934885467876e-05, - "loss": 0.3124, - "step": 7125000 - }, - { - "epoch": 4.27, - "learning_rate": 2.6126834919907313e-05, - "loss": 0.3182, - "step": 7125500 - }, - { - "epoch": 4.27, - "learning_rate": 2.6124734954346747e-05, - "loss": 0.3153, - "step": 7126000 - }, - { - "epoch": 4.27, - "learning_rate": 2.6122634988786184e-05, - "loss": 0.3164, - "step": 7126500 - }, - { - "epoch": 4.27, - "learning_rate": 2.612053502322562e-05, - "loss": 0.3166, - "step": 7127000 - }, - { - "epoch": 4.27, - "learning_rate": 2.6118439257596174e-05, - "loss": 0.3135, - "step": 7127500 - }, - { - "epoch": 4.27, - "learning_rate": 2.611633929203561e-05, - "loss": 0.3241, - "step": 7128000 - }, - { - "epoch": 4.27, - "learning_rate": 2.6114239326475044e-05, - "loss": 0.3093, - "step": 7128500 - }, - { - "epoch": 4.27, - "learning_rate": 2.611213936091448e-05, - "loss": 0.3143, - "step": 7129000 - }, - { - "epoch": 4.27, - "learning_rate": 2.6110039395353918e-05, - "loss": 0.3135, - "step": 7129500 - }, - { - "epoch": 4.27, - "learning_rate": 2.610793942979335e-05, - "loss": 0.3118, - "step": 7130000 - }, - { - "epoch": 4.28, - "learning_rate": 2.6105839464232785e-05, - "loss": 0.3126, - "step": 7130500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6103739498672222e-05, - "loss": 0.3123, - "step": 7131000 - }, - { - "epoch": 4.28, - "learning_rate": 2.610164373304278e-05, - "loss": 0.3208, - "step": 7131500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6099547967413332e-05, - "loss": 0.3132, - "step": 7132000 - }, - { - "epoch": 4.28, - "learning_rate": 2.609745220178389e-05, - "loss": 0.3059, - "step": 7132500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6095352236223326e-05, - "loss": 0.3132, - "step": 7133000 - }, - { - "epoch": 4.28, - "learning_rate": 2.6093252270662763e-05, - "loss": 0.3183, - "step": 7133500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6091152305102196e-05, - "loss": 0.3106, - "step": 7134000 - }, - { - "epoch": 4.28, - "learning_rate": 2.608905233954163e-05, - "loss": 0.3184, - "step": 7134500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6086952373981067e-05, - "loss": 0.3231, - "step": 7135000 - }, - { - "epoch": 4.28, - "learning_rate": 2.60848524084205e-05, - "loss": 0.3124, - "step": 7135500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6082752442859937e-05, - "loss": 0.3182, - "step": 7136000 - }, - { - "epoch": 4.28, - "learning_rate": 2.6080652477299374e-05, - "loss": 0.3144, - "step": 7136500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6078552511738807e-05, - "loss": 0.305, - "step": 7137000 - }, - { - "epoch": 4.28, - "learning_rate": 2.6076456746109364e-05, - "loss": 0.3153, - "step": 7137500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6074356780548798e-05, - "loss": 0.3088, - "step": 7138000 - }, - { - "epoch": 4.28, - "learning_rate": 2.6072256814988235e-05, - "loss": 0.3229, - "step": 7138500 - }, - { - "epoch": 4.28, - "learning_rate": 2.607015684942767e-05, - "loss": 0.3194, - "step": 7139000 - }, - { - "epoch": 4.28, - "learning_rate": 2.60680568838671e-05, - "loss": 0.3144, - "step": 7139500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6065961118237662e-05, - "loss": 0.3157, - "step": 7140000 - }, - { - "epoch": 4.28, - "learning_rate": 2.6063861152677095e-05, - "loss": 0.3144, - "step": 7140500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6061761187116532e-05, - "loss": 0.3084, - "step": 7141000 - }, - { - "epoch": 4.28, - "learning_rate": 2.605966122155597e-05, - "loss": 0.3099, - "step": 7141500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6057565455926523e-05, - "loss": 0.3161, - "step": 7142000 - }, - { - "epoch": 4.28, - "learning_rate": 2.6055465490365956e-05, - "loss": 0.3197, - "step": 7142500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6053365524805393e-05, - "loss": 0.3106, - "step": 7143000 - }, - { - "epoch": 4.28, - "learning_rate": 2.605126555924483e-05, - "loss": 0.3062, - "step": 7143500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6049169793615383e-05, - "loss": 0.3137, - "step": 7144000 - }, - { - "epoch": 4.28, - "learning_rate": 2.604706982805482e-05, - "loss": 0.3162, - "step": 7144500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6044969862494254e-05, - "loss": 0.312, - "step": 7145000 - }, - { - "epoch": 4.28, - "learning_rate": 2.604286989693369e-05, - "loss": 0.3135, - "step": 7145500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6040774131304244e-05, - "loss": 0.3165, - "step": 7146000 - }, - { - "epoch": 4.28, - "learning_rate": 2.603867416574368e-05, - "loss": 0.3166, - "step": 7146500 - }, - { - "epoch": 4.28, - "learning_rate": 2.6036574200183118e-05, - "loss": 0.3042, - "step": 7147000 - }, - { - "epoch": 4.29, - "learning_rate": 2.603447423462255e-05, - "loss": 0.3133, - "step": 7147500 - }, - { - "epoch": 4.29, - "learning_rate": 2.6032374269061988e-05, - "loss": 0.3179, - "step": 7148000 - }, - { - "epoch": 4.29, - "learning_rate": 2.6030274303501425e-05, - "loss": 0.3124, - "step": 7148500 - }, - { - "epoch": 4.29, - "learning_rate": 2.6028174337940855e-05, - "loss": 0.3116, - "step": 7149000 - }, - { - "epoch": 4.29, - "learning_rate": 2.6026074372380292e-05, - "loss": 0.3168, - "step": 7149500 - }, - { - "epoch": 4.29, - "learning_rate": 2.602397440681973e-05, - "loss": 0.3108, - "step": 7150000 - }, - { - "epoch": 4.29, - "learning_rate": 2.6021874441259162e-05, - "loss": 0.3125, - "step": 7150500 - }, - { - "epoch": 4.29, - "learning_rate": 2.601977867562972e-05, - "loss": 0.3192, - "step": 7151000 - }, - { - "epoch": 4.29, - "learning_rate": 2.6017678710069153e-05, - "loss": 0.3146, - "step": 7151500 - }, - { - "epoch": 4.29, - "learning_rate": 2.601557874450859e-05, - "loss": 0.317, - "step": 7152000 - }, - { - "epoch": 4.29, - "learning_rate": 2.6013478778948026e-05, - "loss": 0.3088, - "step": 7152500 - }, - { - "epoch": 4.29, - "learning_rate": 2.601137881338746e-05, - "loss": 0.311, - "step": 7153000 - }, - { - "epoch": 4.29, - "learning_rate": 2.6009278847826897e-05, - "loss": 0.3108, - "step": 7153500 - }, - { - "epoch": 4.29, - "learning_rate": 2.600718308219745e-05, - "loss": 0.3171, - "step": 7154000 - }, - { - "epoch": 4.29, - "learning_rate": 2.6005083116636887e-05, - "loss": 0.3152, - "step": 7154500 - }, - { - "epoch": 4.29, - "learning_rate": 2.600298315107632e-05, - "loss": 0.3106, - "step": 7155000 - }, - { - "epoch": 4.29, - "learning_rate": 2.6000883185515757e-05, - "loss": 0.311, - "step": 7155500 - }, - { - "epoch": 4.29, - "learning_rate": 2.5998783219955194e-05, - "loss": 0.3132, - "step": 7156000 - }, - { - "epoch": 4.29, - "learning_rate": 2.5996687454325748e-05, - "loss": 0.3179, - "step": 7156500 - }, - { - "epoch": 4.29, - "learning_rate": 2.5994587488765185e-05, - "loss": 0.3136, - "step": 7157000 - }, - { - "epoch": 4.29, - "learning_rate": 2.5992487523204618e-05, - "loss": 0.3161, - "step": 7157500 - }, - { - "epoch": 4.29, - "learning_rate": 2.5990387557644055e-05, - "loss": 0.3115, - "step": 7158000 - }, - { - "epoch": 4.29, - "learning_rate": 2.5988287592083492e-05, - "loss": 0.3085, - "step": 7158500 - }, - { - "epoch": 4.29, - "learning_rate": 2.5986187626522922e-05, - "loss": 0.306, - "step": 7159000 - }, - { - "epoch": 4.29, - "learning_rate": 2.598408766096236e-05, - "loss": 0.3152, - "step": 7159500 - }, - { - "epoch": 4.29, - "learning_rate": 2.5981987695401796e-05, - "loss": 0.3087, - "step": 7160000 - }, - { - "epoch": 4.29, - "learning_rate": 2.5979891929772352e-05, - "loss": 0.3025, - "step": 7160500 - }, - { - "epoch": 4.29, - "learning_rate": 2.5977791964211786e-05, - "loss": 0.3086, - "step": 7161000 - }, - { - "epoch": 4.29, - "learning_rate": 2.5975696198582343e-05, - "loss": 0.3121, - "step": 7161500 - }, - { - "epoch": 4.29, - "learning_rate": 2.5973596233021776e-05, - "loss": 0.3153, - "step": 7162000 - }, - { - "epoch": 4.29, - "learning_rate": 2.5971496267461213e-05, - "loss": 0.3114, - "step": 7162500 - }, - { - "epoch": 4.29, - "learning_rate": 2.596939630190065e-05, - "loss": 0.312, - "step": 7163000 - }, - { - "epoch": 4.29, - "learning_rate": 2.5967300536271204e-05, - "loss": 0.317, - "step": 7163500 - }, - { - "epoch": 4.3, - "learning_rate": 2.596520057071064e-05, - "loss": 0.3134, - "step": 7164000 - }, - { - "epoch": 4.3, - "learning_rate": 2.5963100605150074e-05, - "loss": 0.3286, - "step": 7164500 - }, - { - "epoch": 4.3, - "learning_rate": 2.596100063958951e-05, - "loss": 0.3126, - "step": 7165000 - }, - { - "epoch": 4.3, - "learning_rate": 2.5958900674028948e-05, - "loss": 0.3124, - "step": 7165500 - }, - { - "epoch": 4.3, - "learning_rate": 2.595680070846838e-05, - "loss": 0.3084, - "step": 7166000 - }, - { - "epoch": 4.3, - "learning_rate": 2.5954700742907815e-05, - "loss": 0.3174, - "step": 7166500 - }, - { - "epoch": 4.3, - "learning_rate": 2.595260077734725e-05, - "loss": 0.3088, - "step": 7167000 - }, - { - "epoch": 4.3, - "learning_rate": 2.595050501171781e-05, - "loss": 0.3181, - "step": 7167500 - }, - { - "epoch": 4.3, - "learning_rate": 2.5948405046157245e-05, - "loss": 0.3046, - "step": 7168000 - }, - { - "epoch": 4.3, - "learning_rate": 2.5946305080596675e-05, - "loss": 0.3096, - "step": 7168500 - }, - { - "epoch": 4.3, - "learning_rate": 2.5944205115036112e-05, - "loss": 0.3207, - "step": 7169000 - }, - { - "epoch": 4.3, - "learning_rate": 2.594210934940667e-05, - "loss": 0.3242, - "step": 7169500 - }, - { - "epoch": 4.3, - "learning_rate": 2.5940009383846106e-05, - "loss": 0.3119, - "step": 7170000 - }, - { - "epoch": 4.3, - "learning_rate": 2.5937909418285543e-05, - "loss": 0.3029, - "step": 7170500 - }, - { - "epoch": 4.3, - "learning_rate": 2.5935809452724973e-05, - "loss": 0.314, - "step": 7171000 - }, - { - "epoch": 4.3, - "learning_rate": 2.593370948716441e-05, - "loss": 0.3114, - "step": 7171500 - }, - { - "epoch": 4.3, - "learning_rate": 2.5931609521603847e-05, - "loss": 0.3148, - "step": 7172000 - }, - { - "epoch": 4.3, - "learning_rate": 2.592950955604328e-05, - "loss": 0.3041, - "step": 7172500 - }, - { - "epoch": 4.3, - "learning_rate": 2.5927413790413837e-05, - "loss": 0.308, - "step": 7173000 - }, - { - "epoch": 4.3, - "learning_rate": 2.592531382485327e-05, - "loss": 0.3106, - "step": 7173500 - }, - { - "epoch": 4.3, - "learning_rate": 2.5923213859292707e-05, - "loss": 0.3118, - "step": 7174000 - }, - { - "epoch": 4.3, - "learning_rate": 2.5921113893732144e-05, - "loss": 0.3237, - "step": 7174500 - }, - { - "epoch": 4.3, - "learning_rate": 2.5919013928171578e-05, - "loss": 0.3126, - "step": 7175000 - }, - { - "epoch": 4.3, - "learning_rate": 2.591691816254213e-05, - "loss": 0.3194, - "step": 7175500 - }, - { - "epoch": 4.3, - "learning_rate": 2.5914818196981568e-05, - "loss": 0.3111, - "step": 7176000 - }, - { - "epoch": 4.3, - "learning_rate": 2.5912718231421005e-05, - "loss": 0.3109, - "step": 7176500 - }, - { - "epoch": 4.3, - "learning_rate": 2.591061826586044e-05, - "loss": 0.3077, - "step": 7177000 - }, - { - "epoch": 4.3, - "learning_rate": 2.5908518300299875e-05, - "loss": 0.3124, - "step": 7177500 - }, - { - "epoch": 4.3, - "learning_rate": 2.5906418334739312e-05, - "loss": 0.3082, - "step": 7178000 - }, - { - "epoch": 4.3, - "learning_rate": 2.5904322569109866e-05, - "loss": 0.3062, - "step": 7178500 - }, - { - "epoch": 4.3, - "learning_rate": 2.5902222603549302e-05, - "loss": 0.3079, - "step": 7179000 - }, - { - "epoch": 4.3, - "learning_rate": 2.5900122637988736e-05, - "loss": 0.32, - "step": 7179500 - }, - { - "epoch": 4.3, - "learning_rate": 2.5898022672428173e-05, - "loss": 0.3087, - "step": 7180000 - }, - { - "epoch": 4.31, - "learning_rate": 2.5895922706867606e-05, - "loss": 0.3181, - "step": 7180500 - }, - { - "epoch": 4.31, - "learning_rate": 2.589382274130704e-05, - "loss": 0.3123, - "step": 7181000 - }, - { - "epoch": 4.31, - "learning_rate": 2.58917269756776e-05, - "loss": 0.3206, - "step": 7181500 - }, - { - "epoch": 4.31, - "learning_rate": 2.5889627010117033e-05, - "loss": 0.3007, - "step": 7182000 - }, - { - "epoch": 4.31, - "learning_rate": 2.588752704455647e-05, - "loss": 0.311, - "step": 7182500 - }, - { - "epoch": 4.31, - "learning_rate": 2.5885427078995904e-05, - "loss": 0.3141, - "step": 7183000 - }, - { - "epoch": 4.31, - "learning_rate": 2.588333131336646e-05, - "loss": 0.3122, - "step": 7183500 - }, - { - "epoch": 4.31, - "learning_rate": 2.5881231347805894e-05, - "loss": 0.3098, - "step": 7184000 - }, - { - "epoch": 4.31, - "learning_rate": 2.587913138224533e-05, - "loss": 0.3128, - "step": 7184500 - }, - { - "epoch": 4.31, - "learning_rate": 2.5877031416684768e-05, - "loss": 0.3075, - "step": 7185000 - }, - { - "epoch": 4.31, - "learning_rate": 2.58749314511242e-05, - "loss": 0.3103, - "step": 7185500 - }, - { - "epoch": 4.31, - "learning_rate": 2.5872831485563635e-05, - "loss": 0.3167, - "step": 7186000 - }, - { - "epoch": 4.31, - "learning_rate": 2.5870735719934192e-05, - "loss": 0.3123, - "step": 7186500 - }, - { - "epoch": 4.31, - "learning_rate": 2.586863575437363e-05, - "loss": 0.312, - "step": 7187000 - }, - { - "epoch": 4.31, - "learning_rate": 2.5866535788813065e-05, - "loss": 0.3092, - "step": 7187500 - }, - { - "epoch": 4.31, - "learning_rate": 2.5864435823252496e-05, - "loss": 0.3156, - "step": 7188000 - }, - { - "epoch": 4.31, - "learning_rate": 2.5862340057623056e-05, - "loss": 0.3219, - "step": 7188500 - }, - { - "epoch": 4.31, - "learning_rate": 2.586024009206249e-05, - "loss": 0.3121, - "step": 7189000 - }, - { - "epoch": 4.31, - "learning_rate": 2.5858140126501926e-05, - "loss": 0.3141, - "step": 7189500 - }, - { - "epoch": 4.31, - "learning_rate": 2.585604016094136e-05, - "loss": 0.3132, - "step": 7190000 - }, - { - "epoch": 4.31, - "learning_rate": 2.5853940195380793e-05, - "loss": 0.3141, - "step": 7190500 - }, - { - "epoch": 4.31, - "learning_rate": 2.585184442975135e-05, - "loss": 0.3138, - "step": 7191000 - }, - { - "epoch": 4.31, - "learning_rate": 2.5849744464190787e-05, - "loss": 0.3076, - "step": 7191500 - }, - { - "epoch": 4.31, - "learning_rate": 2.5847644498630224e-05, - "loss": 0.3112, - "step": 7192000 - }, - { - "epoch": 4.31, - "learning_rate": 2.5845544533069657e-05, - "loss": 0.3119, - "step": 7192500 - }, - { - "epoch": 4.31, - "learning_rate": 2.584344456750909e-05, - "loss": 0.3072, - "step": 7193000 - }, - { - "epoch": 4.31, - "learning_rate": 2.5841348801879648e-05, - "loss": 0.3259, - "step": 7193500 - }, - { - "epoch": 4.31, - "learning_rate": 2.5839248836319085e-05, - "loss": 0.3116, - "step": 7194000 - }, - { - "epoch": 4.31, - "learning_rate": 2.583714887075852e-05, - "loss": 0.3189, - "step": 7194500 - }, - { - "epoch": 4.31, - "learning_rate": 2.583504890519795e-05, - "loss": 0.3127, - "step": 7195000 - }, - { - "epoch": 4.31, - "learning_rate": 2.5832948939637388e-05, - "loss": 0.3092, - "step": 7195500 - }, - { - "epoch": 4.31, - "learning_rate": 2.5830848974076825e-05, - "loss": 0.3201, - "step": 7196000 - }, - { - "epoch": 4.31, - "learning_rate": 2.5828753208447382e-05, - "loss": 0.3111, - "step": 7196500 - }, - { - "epoch": 4.31, - "learning_rate": 2.582665324288682e-05, - "loss": 0.3081, - "step": 7197000 - }, - { - "epoch": 4.32, - "learning_rate": 2.582455327732625e-05, - "loss": 0.3105, - "step": 7197500 - }, - { - "epoch": 4.32, - "learning_rate": 2.5822453311765686e-05, - "loss": 0.3062, - "step": 7198000 - }, - { - "epoch": 4.32, - "learning_rate": 2.5820353346205123e-05, - "loss": 0.3098, - "step": 7198500 - }, - { - "epoch": 4.32, - "learning_rate": 2.581825758057568e-05, - "loss": 0.3113, - "step": 7199000 - }, - { - "epoch": 4.32, - "learning_rate": 2.5816157615015117e-05, - "loss": 0.3197, - "step": 7199500 - }, - { - "epoch": 4.32, - "learning_rate": 2.5814057649454547e-05, - "loss": 0.3087, - "step": 7200000 - }, - { - "epoch": 4.32, - "eval_loss": 0.30957677960395813, - "eval_runtime": 1120.3548, - "eval_samples_per_second": 470.137, - "eval_steps_per_second": 78.356, - "step": 7200000 - }, - { - "epoch": 4.32, - "learning_rate": 2.5811957683893983e-05, - "loss": 0.3068, - "step": 7200500 - }, - { - "epoch": 4.32, - "learning_rate": 2.580985771833342e-05, - "loss": 0.3101, - "step": 7201000 - }, - { - "epoch": 4.32, - "learning_rate": 2.5807757752772854e-05, - "loss": 0.316, - "step": 7201500 - }, - { - "epoch": 4.32, - "learning_rate": 2.580565778721229e-05, - "loss": 0.308, - "step": 7202000 - }, - { - "epoch": 4.32, - "learning_rate": 2.5803562021582844e-05, - "loss": 0.3203, - "step": 7202500 - }, - { - "epoch": 4.32, - "learning_rate": 2.580146205602228e-05, - "loss": 0.3194, - "step": 7203000 - }, - { - "epoch": 4.32, - "learning_rate": 2.5799362090461718e-05, - "loss": 0.3124, - "step": 7203500 - }, - { - "epoch": 4.32, - "learning_rate": 2.579726212490115e-05, - "loss": 0.3043, - "step": 7204000 - }, - { - "epoch": 4.32, - "learning_rate": 2.5795162159340588e-05, - "loss": 0.3061, - "step": 7204500 - }, - { - "epoch": 4.32, - "learning_rate": 2.579306219378002e-05, - "loss": 0.3121, - "step": 7205000 - }, - { - "epoch": 4.32, - "learning_rate": 2.579096642815058e-05, - "loss": 0.3209, - "step": 7205500 - }, - { - "epoch": 4.32, - "learning_rate": 2.5788866462590012e-05, - "loss": 0.3054, - "step": 7206000 - }, - { - "epoch": 4.32, - "learning_rate": 2.578676649702945e-05, - "loss": 0.3017, - "step": 7206500 - }, - { - "epoch": 4.32, - "learning_rate": 2.5784666531468886e-05, - "loss": 0.3078, - "step": 7207000 - }, - { - "epoch": 4.32, - "learning_rate": 2.578256656590832e-05, - "loss": 0.3042, - "step": 7207500 - }, - { - "epoch": 4.32, - "learning_rate": 2.5780466600347753e-05, - "loss": 0.3162, - "step": 7208000 - }, - { - "epoch": 4.32, - "learning_rate": 2.577837083471831e-05, - "loss": 0.3102, - "step": 7208500 - }, - { - "epoch": 4.32, - "learning_rate": 2.5776270869157746e-05, - "loss": 0.3105, - "step": 7209000 - }, - { - "epoch": 4.32, - "learning_rate": 2.577417090359718e-05, - "loss": 0.3124, - "step": 7209500 - }, - { - "epoch": 4.32, - "learning_rate": 2.5772070938036613e-05, - "loss": 0.3051, - "step": 7210000 - }, - { - "epoch": 4.32, - "learning_rate": 2.576997097247605e-05, - "loss": 0.3099, - "step": 7210500 - }, - { - "epoch": 4.32, - "learning_rate": 2.5767875206846607e-05, - "loss": 0.3215, - "step": 7211000 - }, - { - "epoch": 4.32, - "learning_rate": 2.5765775241286044e-05, - "loss": 0.309, - "step": 7211500 - }, - { - "epoch": 4.32, - "learning_rate": 2.5763675275725477e-05, - "loss": 0.3134, - "step": 7212000 - }, - { - "epoch": 4.32, - "learning_rate": 2.576157531016491e-05, - "loss": 0.3208, - "step": 7212500 - }, - { - "epoch": 4.32, - "learning_rate": 2.5759479544535468e-05, - "loss": 0.3097, - "step": 7213000 - }, - { - "epoch": 4.32, - "learning_rate": 2.5757379578974905e-05, - "loss": 0.3151, - "step": 7213500 - }, - { - "epoch": 4.33, - "learning_rate": 2.575527961341434e-05, - "loss": 0.3095, - "step": 7214000 - }, - { - "epoch": 4.33, - "learning_rate": 2.5753179647853775e-05, - "loss": 0.3152, - "step": 7214500 - }, - { - "epoch": 4.33, - "learning_rate": 2.575107968229321e-05, - "loss": 0.314, - "step": 7215000 - }, - { - "epoch": 4.33, - "learning_rate": 2.5748979716732645e-05, - "loss": 0.3091, - "step": 7215500 - }, - { - "epoch": 4.33, - "learning_rate": 2.5746879751172082e-05, - "loss": 0.3094, - "step": 7216000 - }, - { - "epoch": 4.33, - "learning_rate": 2.5744779785611516e-05, - "loss": 0.3082, - "step": 7216500 - }, - { - "epoch": 4.33, - "learning_rate": 2.574267982005095e-05, - "loss": 0.3167, - "step": 7217000 - }, - { - "epoch": 4.33, - "learning_rate": 2.5740584054421506e-05, - "loss": 0.314, - "step": 7217500 - }, - { - "epoch": 4.33, - "learning_rate": 2.5738484088860943e-05, - "loss": 0.3128, - "step": 7218000 - }, - { - "epoch": 4.33, - "learning_rate": 2.573638412330038e-05, - "loss": 0.3157, - "step": 7218500 - }, - { - "epoch": 4.33, - "learning_rate": 2.5734284157739813e-05, - "loss": 0.326, - "step": 7219000 - }, - { - "epoch": 4.33, - "learning_rate": 2.5732184192179247e-05, - "loss": 0.3117, - "step": 7219500 - }, - { - "epoch": 4.33, - "learning_rate": 2.5730088426549804e-05, - "loss": 0.2995, - "step": 7220000 - }, - { - "epoch": 4.33, - "learning_rate": 2.572798846098924e-05, - "loss": 0.3141, - "step": 7220500 - }, - { - "epoch": 4.33, - "learning_rate": 2.5725888495428674e-05, - "loss": 0.3105, - "step": 7221000 - }, - { - "epoch": 4.33, - "learning_rate": 2.572378852986811e-05, - "loss": 0.3118, - "step": 7221500 - }, - { - "epoch": 4.33, - "learning_rate": 2.5721692764238664e-05, - "loss": 0.3188, - "step": 7222000 - }, - { - "epoch": 4.33, - "learning_rate": 2.57195927986781e-05, - "loss": 0.3075, - "step": 7222500 - }, - { - "epoch": 4.33, - "learning_rate": 2.5717492833117538e-05, - "loss": 0.3194, - "step": 7223000 - }, - { - "epoch": 4.33, - "learning_rate": 2.571539286755697e-05, - "loss": 0.3145, - "step": 7223500 - }, - { - "epoch": 4.33, - "learning_rate": 2.571329290199641e-05, - "loss": 0.3131, - "step": 7224000 - }, - { - "epoch": 4.33, - "learning_rate": 2.5711192936435842e-05, - "loss": 0.3153, - "step": 7224500 - }, - { - "epoch": 4.33, - "learning_rate": 2.57090971708064e-05, - "loss": 0.3235, - "step": 7225000 - }, - { - "epoch": 4.33, - "learning_rate": 2.5706997205245836e-05, - "loss": 0.3126, - "step": 7225500 - }, - { - "epoch": 4.33, - "learning_rate": 2.570489723968527e-05, - "loss": 0.3182, - "step": 7226000 - }, - { - "epoch": 4.33, - "learning_rate": 2.5702801474055823e-05, - "loss": 0.3209, - "step": 7226500 - }, - { - "epoch": 4.33, - "learning_rate": 2.570070150849526e-05, - "loss": 0.3216, - "step": 7227000 - }, - { - "epoch": 4.33, - "learning_rate": 2.5698601542934696e-05, - "loss": 0.3089, - "step": 7227500 - }, - { - "epoch": 4.33, - "learning_rate": 2.569650157737413e-05, - "loss": 0.3101, - "step": 7228000 - }, - { - "epoch": 4.33, - "learning_rate": 2.5694401611813567e-05, - "loss": 0.3157, - "step": 7228500 - }, - { - "epoch": 4.33, - "learning_rate": 2.5692301646253e-05, - "loss": 0.3099, - "step": 7229000 - }, - { - "epoch": 4.33, - "learning_rate": 2.5690201680692437e-05, - "loss": 0.3088, - "step": 7229500 - }, - { - "epoch": 4.33, - "learning_rate": 2.568810171513187e-05, - "loss": 0.3251, - "step": 7230000 - }, - { - "epoch": 4.33, - "learning_rate": 2.5686005949502427e-05, - "loss": 0.3101, - "step": 7230500 - }, - { - "epoch": 4.34, - "learning_rate": 2.5683905983941864e-05, - "loss": 0.3151, - "step": 7231000 - }, - { - "epoch": 4.34, - "learning_rate": 2.5681806018381298e-05, - "loss": 0.3119, - "step": 7231500 - }, - { - "epoch": 4.34, - "learning_rate": 2.567970605282073e-05, - "loss": 0.3059, - "step": 7232000 - }, - { - "epoch": 4.34, - "learning_rate": 2.5677606087260168e-05, - "loss": 0.3143, - "step": 7232500 - }, - { - "epoch": 4.34, - "learning_rate": 2.5675506121699605e-05, - "loss": 0.3039, - "step": 7233000 - }, - { - "epoch": 4.34, - "learning_rate": 2.5673410356070162e-05, - "loss": 0.3135, - "step": 7233500 - }, - { - "epoch": 4.34, - "learning_rate": 2.5671310390509595e-05, - "loss": 0.3079, - "step": 7234000 - }, - { - "epoch": 4.34, - "learning_rate": 2.566921042494903e-05, - "loss": 0.3086, - "step": 7234500 - }, - { - "epoch": 4.34, - "learning_rate": 2.5667110459388466e-05, - "loss": 0.3104, - "step": 7235000 - }, - { - "epoch": 4.34, - "learning_rate": 2.5665010493827902e-05, - "loss": 0.3137, - "step": 7235500 - }, - { - "epoch": 4.34, - "learning_rate": 2.566291472819846e-05, - "loss": 0.306, - "step": 7236000 - }, - { - "epoch": 4.34, - "learning_rate": 2.5660814762637893e-05, - "loss": 0.309, - "step": 7236500 - }, - { - "epoch": 4.34, - "learning_rate": 2.5658714797077326e-05, - "loss": 0.3102, - "step": 7237000 - }, - { - "epoch": 4.34, - "learning_rate": 2.5656614831516763e-05, - "loss": 0.3127, - "step": 7237500 - }, - { - "epoch": 4.34, - "learning_rate": 2.565451906588732e-05, - "loss": 0.3113, - "step": 7238000 - }, - { - "epoch": 4.34, - "learning_rate": 2.5652419100326754e-05, - "loss": 0.3109, - "step": 7238500 - }, - { - "epoch": 4.34, - "learning_rate": 2.5650319134766187e-05, - "loss": 0.3078, - "step": 7239000 - }, - { - "epoch": 4.34, - "learning_rate": 2.5648219169205624e-05, - "loss": 0.305, - "step": 7239500 - }, - { - "epoch": 4.34, - "learning_rate": 2.564611920364506e-05, - "loss": 0.3146, - "step": 7240000 - }, - { - "epoch": 4.34, - "learning_rate": 2.5644019238084494e-05, - "loss": 0.3053, - "step": 7240500 - }, - { - "epoch": 4.34, - "learning_rate": 2.564192347245505e-05, - "loss": 0.3039, - "step": 7241000 - }, - { - "epoch": 4.34, - "learning_rate": 2.5639823506894485e-05, - "loss": 0.3178, - "step": 7241500 - }, - { - "epoch": 4.34, - "learning_rate": 2.563772354133392e-05, - "loss": 0.3222, - "step": 7242000 - }, - { - "epoch": 4.34, - "learning_rate": 2.563562357577336e-05, - "loss": 0.3165, - "step": 7242500 - }, - { - "epoch": 4.34, - "learning_rate": 2.5633523610212792e-05, - "loss": 0.3091, - "step": 7243000 - }, - { - "epoch": 4.34, - "learning_rate": 2.563142784458335e-05, - "loss": 0.3077, - "step": 7243500 - }, - { - "epoch": 4.34, - "learning_rate": 2.5629327879022782e-05, - "loss": 0.3101, - "step": 7244000 - }, - { - "epoch": 4.34, - "learning_rate": 2.562722791346222e-05, - "loss": 0.3076, - "step": 7244500 - }, - { - "epoch": 4.34, - "learning_rate": 2.5625127947901656e-05, - "loss": 0.3131, - "step": 7245000 - }, - { - "epoch": 4.34, - "learning_rate": 2.5623032182272213e-05, - "loss": 0.3021, - "step": 7245500 - }, - { - "epoch": 4.34, - "learning_rate": 2.5620932216711643e-05, - "loss": 0.3121, - "step": 7246000 - }, - { - "epoch": 4.34, - "learning_rate": 2.561883225115108e-05, - "loss": 0.3113, - "step": 7246500 - }, - { - "epoch": 4.34, - "learning_rate": 2.5616732285590517e-05, - "loss": 0.3119, - "step": 7247000 - }, - { - "epoch": 4.35, - "learning_rate": 2.561463232002995e-05, - "loss": 0.3121, - "step": 7247500 - }, - { - "epoch": 4.35, - "learning_rate": 2.5612532354469387e-05, - "loss": 0.3053, - "step": 7248000 - }, - { - "epoch": 4.35, - "learning_rate": 2.561043238890882e-05, - "loss": 0.3114, - "step": 7248500 - }, - { - "epoch": 4.35, - "learning_rate": 2.5608332423348257e-05, - "loss": 0.3093, - "step": 7249000 - }, - { - "epoch": 4.35, - "learning_rate": 2.5606236657718814e-05, - "loss": 0.314, - "step": 7249500 - }, - { - "epoch": 4.35, - "learning_rate": 2.5604136692158248e-05, - "loss": 0.3103, - "step": 7250000 - }, - { - "epoch": 4.35, - "learning_rate": 2.5602036726597685e-05, - "loss": 0.3142, - "step": 7250500 - }, - { - "epoch": 4.35, - "learning_rate": 2.5599936761037118e-05, - "loss": 0.3097, - "step": 7251000 - }, - { - "epoch": 4.35, - "learning_rate": 2.5597836795476555e-05, - "loss": 0.3106, - "step": 7251500 - }, - { - "epoch": 4.35, - "learning_rate": 2.5595741029847112e-05, - "loss": 0.3094, - "step": 7252000 - }, - { - "epoch": 4.35, - "learning_rate": 2.5593641064286545e-05, - "loss": 0.3174, - "step": 7252500 - }, - { - "epoch": 4.35, - "learning_rate": 2.5591541098725982e-05, - "loss": 0.3167, - "step": 7253000 - }, - { - "epoch": 4.35, - "learning_rate": 2.5589441133165416e-05, - "loss": 0.3104, - "step": 7253500 - }, - { - "epoch": 4.35, - "learning_rate": 2.558734116760485e-05, - "loss": 0.3144, - "step": 7254000 - }, - { - "epoch": 4.35, - "learning_rate": 2.5585241202044286e-05, - "loss": 0.3163, - "step": 7254500 - }, - { - "epoch": 4.35, - "learning_rate": 2.5583141236483723e-05, - "loss": 0.3119, - "step": 7255000 - }, - { - "epoch": 4.35, - "learning_rate": 2.5581041270923156e-05, - "loss": 0.3099, - "step": 7255500 - }, - { - "epoch": 4.35, - "learning_rate": 2.5578945505293713e-05, - "loss": 0.3053, - "step": 7256000 - }, - { - "epoch": 4.35, - "learning_rate": 2.557684973966427e-05, - "loss": 0.3072, - "step": 7256500 - }, - { - "epoch": 4.35, - "learning_rate": 2.5574749774103704e-05, - "loss": 0.3107, - "step": 7257000 - }, - { - "epoch": 4.35, - "learning_rate": 2.557264980854314e-05, - "loss": 0.3096, - "step": 7257500 - }, - { - "epoch": 4.35, - "learning_rate": 2.5570549842982574e-05, - "loss": 0.3195, - "step": 7258000 - }, - { - "epoch": 4.35, - "learning_rate": 2.556844987742201e-05, - "loss": 0.3083, - "step": 7258500 - }, - { - "epoch": 4.35, - "learning_rate": 2.5566354111792568e-05, - "loss": 0.3122, - "step": 7259000 - }, - { - "epoch": 4.35, - "learning_rate": 2.5564254146232e-05, - "loss": 0.3091, - "step": 7259500 - }, - { - "epoch": 4.35, - "learning_rate": 2.5562154180671438e-05, - "loss": 0.3144, - "step": 7260000 - }, - { - "epoch": 4.35, - "learning_rate": 2.556005421511087e-05, - "loss": 0.3042, - "step": 7260500 - }, - { - "epoch": 4.35, - "learning_rate": 2.5557954249550305e-05, - "loss": 0.3137, - "step": 7261000 - }, - { - "epoch": 4.35, - "learning_rate": 2.5555858483920865e-05, - "loss": 0.3091, - "step": 7261500 - }, - { - "epoch": 4.35, - "learning_rate": 2.55537585183603e-05, - "loss": 0.3125, - "step": 7262000 - }, - { - "epoch": 4.35, - "learning_rate": 2.5551658552799736e-05, - "loss": 0.3174, - "step": 7262500 - }, - { - "epoch": 4.35, - "learning_rate": 2.554955858723917e-05, - "loss": 0.3111, - "step": 7263000 - }, - { - "epoch": 4.35, - "learning_rate": 2.5547458621678602e-05, - "loss": 0.3114, - "step": 7263500 - }, - { - "epoch": 4.36, - "learning_rate": 2.554535865611804e-05, - "loss": 0.3068, - "step": 7264000 - }, - { - "epoch": 4.36, - "learning_rate": 2.5543258690557476e-05, - "loss": 0.306, - "step": 7264500 - }, - { - "epoch": 4.36, - "learning_rate": 2.554116292492803e-05, - "loss": 0.3132, - "step": 7265000 - }, - { - "epoch": 4.36, - "learning_rate": 2.5539062959367467e-05, - "loss": 0.3112, - "step": 7265500 - }, - { - "epoch": 4.36, - "learning_rate": 2.55369629938069e-05, - "loss": 0.3149, - "step": 7266000 - }, - { - "epoch": 4.36, - "learning_rate": 2.5534863028246337e-05, - "loss": 0.3039, - "step": 7266500 - }, - { - "epoch": 4.36, - "learning_rate": 2.5532763062685774e-05, - "loss": 0.3234, - "step": 7267000 - }, - { - "epoch": 4.36, - "learning_rate": 2.5530667297056327e-05, - "loss": 0.3244, - "step": 7267500 - }, - { - "epoch": 4.36, - "learning_rate": 2.552856733149576e-05, - "loss": 0.3152, - "step": 7268000 - }, - { - "epoch": 4.36, - "learning_rate": 2.5526467365935198e-05, - "loss": 0.3137, - "step": 7268500 - }, - { - "epoch": 4.36, - "learning_rate": 2.5524367400374634e-05, - "loss": 0.3107, - "step": 7269000 - }, - { - "epoch": 4.36, - "learning_rate": 2.552227163474519e-05, - "loss": 0.316, - "step": 7269500 - }, - { - "epoch": 4.36, - "learning_rate": 2.5520171669184625e-05, - "loss": 0.3125, - "step": 7270000 - }, - { - "epoch": 4.36, - "learning_rate": 2.551807170362406e-05, - "loss": 0.3153, - "step": 7270500 - }, - { - "epoch": 4.36, - "learning_rate": 2.5515971738063495e-05, - "loss": 0.3113, - "step": 7271000 - }, - { - "epoch": 4.36, - "learning_rate": 2.5513871772502932e-05, - "loss": 0.3109, - "step": 7271500 - }, - { - "epoch": 4.36, - "learning_rate": 2.551177600687349e-05, - "loss": 0.3124, - "step": 7272000 - }, - { - "epoch": 4.36, - "learning_rate": 2.5509676041312922e-05, - "loss": 0.3219, - "step": 7272500 - }, - { - "epoch": 4.36, - "learning_rate": 2.5507576075752356e-05, - "loss": 0.3169, - "step": 7273000 - }, - { - "epoch": 4.36, - "learning_rate": 2.5505476110191793e-05, - "loss": 0.312, - "step": 7273500 - }, - { - "epoch": 4.36, - "learning_rate": 2.550337614463123e-05, - "loss": 0.3052, - "step": 7274000 - }, - { - "epoch": 4.36, - "learning_rate": 2.5501280379001787e-05, - "loss": 0.3135, - "step": 7274500 - }, - { - "epoch": 4.36, - "learning_rate": 2.5499180413441217e-05, - "loss": 0.3057, - "step": 7275000 - }, - { - "epoch": 4.36, - "learning_rate": 2.5497080447880654e-05, - "loss": 0.3113, - "step": 7275500 - }, - { - "epoch": 4.36, - "learning_rate": 2.549498048232009e-05, - "loss": 0.3071, - "step": 7276000 - }, - { - "epoch": 4.36, - "learning_rate": 2.5492880516759524e-05, - "loss": 0.3255, - "step": 7276500 - }, - { - "epoch": 4.36, - "learning_rate": 2.549078475113008e-05, - "loss": 0.3067, - "step": 7277000 - }, - { - "epoch": 4.36, - "learning_rate": 2.5488684785569514e-05, - "loss": 0.3067, - "step": 7277500 - }, - { - "epoch": 4.36, - "learning_rate": 2.548658482000895e-05, - "loss": 0.3118, - "step": 7278000 - }, - { - "epoch": 4.36, - "learning_rate": 2.5484484854448388e-05, - "loss": 0.3179, - "step": 7278500 - }, - { - "epoch": 4.36, - "learning_rate": 2.5482389088818945e-05, - "loss": 0.2992, - "step": 7279000 - }, - { - "epoch": 4.36, - "learning_rate": 2.548028912325838e-05, - "loss": 0.315, - "step": 7279500 - }, - { - "epoch": 4.36, - "learning_rate": 2.5478189157697812e-05, - "loss": 0.3158, - "step": 7280000 - }, - { - "epoch": 4.36, - "learning_rate": 2.547608919213725e-05, - "loss": 0.3193, - "step": 7280500 - }, - { - "epoch": 4.37, - "learning_rate": 2.5473993426507806e-05, - "loss": 0.3109, - "step": 7281000 - }, - { - "epoch": 4.37, - "learning_rate": 2.5471893460947242e-05, - "loss": 0.3142, - "step": 7281500 - }, - { - "epoch": 4.37, - "learning_rate": 2.5469793495386673e-05, - "loss": 0.3138, - "step": 7282000 - }, - { - "epoch": 4.37, - "learning_rate": 2.546769352982611e-05, - "loss": 0.3202, - "step": 7282500 - }, - { - "epoch": 4.37, - "learning_rate": 2.5465597764196666e-05, - "loss": 0.3169, - "step": 7283000 - }, - { - "epoch": 4.37, - "learning_rate": 2.5463497798636103e-05, - "loss": 0.3142, - "step": 7283500 - }, - { - "epoch": 4.37, - "learning_rate": 2.546139783307554e-05, - "loss": 0.3133, - "step": 7284000 - }, - { - "epoch": 4.37, - "learning_rate": 2.545929786751497e-05, - "loss": 0.3128, - "step": 7284500 - }, - { - "epoch": 4.37, - "learning_rate": 2.5457197901954407e-05, - "loss": 0.3048, - "step": 7285000 - }, - { - "epoch": 4.37, - "learning_rate": 2.5455097936393844e-05, - "loss": 0.3181, - "step": 7285500 - }, - { - "epoch": 4.37, - "learning_rate": 2.5452997970833277e-05, - "loss": 0.3099, - "step": 7286000 - }, - { - "epoch": 4.37, - "learning_rate": 2.5450898005272714e-05, - "loss": 0.3148, - "step": 7286500 - }, - { - "epoch": 4.37, - "learning_rate": 2.5448802239643268e-05, - "loss": 0.3087, - "step": 7287000 - }, - { - "epoch": 4.37, - "learning_rate": 2.5446702274082705e-05, - "loss": 0.3168, - "step": 7287500 - }, - { - "epoch": 4.37, - "learning_rate": 2.544460650845326e-05, - "loss": 0.3159, - "step": 7288000 - }, - { - "epoch": 4.37, - "learning_rate": 2.54425065428927e-05, - "loss": 0.3066, - "step": 7288500 - }, - { - "epoch": 4.37, - "learning_rate": 2.544040657733213e-05, - "loss": 0.3108, - "step": 7289000 - }, - { - "epoch": 4.37, - "learning_rate": 2.5438306611771565e-05, - "loss": 0.3078, - "step": 7289500 - }, - { - "epoch": 4.37, - "learning_rate": 2.5436206646211002e-05, - "loss": 0.3112, - "step": 7290000 - }, - { - "epoch": 4.37, - "learning_rate": 2.543410668065044e-05, - "loss": 0.3102, - "step": 7290500 - }, - { - "epoch": 4.37, - "learning_rate": 2.5432010915020996e-05, - "loss": 0.3119, - "step": 7291000 - }, - { - "epoch": 4.37, - "learning_rate": 2.5429910949460426e-05, - "loss": 0.3091, - "step": 7291500 - }, - { - "epoch": 4.37, - "learning_rate": 2.5427810983899863e-05, - "loss": 0.3103, - "step": 7292000 - }, - { - "epoch": 4.37, - "learning_rate": 2.54257110183393e-05, - "loss": 0.3078, - "step": 7292500 - }, - { - "epoch": 4.37, - "learning_rate": 2.5423615252709857e-05, - "loss": 0.3057, - "step": 7293000 - }, - { - "epoch": 4.37, - "learning_rate": 2.5421515287149293e-05, - "loss": 0.3127, - "step": 7293500 - }, - { - "epoch": 4.37, - "learning_rate": 2.5419415321588724e-05, - "loss": 0.3257, - "step": 7294000 - }, - { - "epoch": 4.37, - "learning_rate": 2.541731535602816e-05, - "loss": 0.3184, - "step": 7294500 - }, - { - "epoch": 4.37, - "learning_rate": 2.5415215390467597e-05, - "loss": 0.3055, - "step": 7295000 - }, - { - "epoch": 4.37, - "learning_rate": 2.5413119624838154e-05, - "loss": 0.3169, - "step": 7295500 - }, - { - "epoch": 4.37, - "learning_rate": 2.5411019659277584e-05, - "loss": 0.3095, - "step": 7296000 - }, - { - "epoch": 4.37, - "learning_rate": 2.5408923893648145e-05, - "loss": 0.3117, - "step": 7296500 - }, - { - "epoch": 4.37, - "learning_rate": 2.5406823928087578e-05, - "loss": 0.3085, - "step": 7297000 - }, - { - "epoch": 4.38, - "learning_rate": 2.5404723962527015e-05, - "loss": 0.3067, - "step": 7297500 - }, - { - "epoch": 4.38, - "learning_rate": 2.5402623996966452e-05, - "loss": 0.3118, - "step": 7298000 - }, - { - "epoch": 4.38, - "learning_rate": 2.5400524031405882e-05, - "loss": 0.3148, - "step": 7298500 - }, - { - "epoch": 4.38, - "learning_rate": 2.539842406584532e-05, - "loss": 0.2993, - "step": 7299000 - }, - { - "epoch": 4.38, - "learning_rate": 2.5396324100284756e-05, - "loss": 0.3137, - "step": 7299500 - }, - { - "epoch": 4.38, - "learning_rate": 2.539422413472419e-05, - "loss": 0.3115, - "step": 7300000 - }, - { - "epoch": 4.38, - "eval_loss": 0.3086244761943817, - "eval_runtime": 1124.4663, - "eval_samples_per_second": 468.418, - "eval_steps_per_second": 78.07, - "step": 7300000 - }, - { - "epoch": 4.38, - "learning_rate": 2.5392124169163626e-05, - "loss": 0.3085, - "step": 7300500 - }, - { - "epoch": 4.38, - "learning_rate": 2.5390024203603063e-05, - "loss": 0.3183, - "step": 7301000 - }, - { - "epoch": 4.38, - "learning_rate": 2.5387924238042496e-05, - "loss": 0.311, - "step": 7301500 - }, - { - "epoch": 4.38, - "learning_rate": 2.538582427248193e-05, - "loss": 0.3137, - "step": 7302000 - }, - { - "epoch": 4.38, - "learning_rate": 2.538373270678361e-05, - "loss": 0.3144, - "step": 7302500 - }, - { - "epoch": 4.38, - "learning_rate": 2.5381632741223044e-05, - "loss": 0.3105, - "step": 7303000 - }, - { - "epoch": 4.38, - "learning_rate": 2.5379532775662477e-05, - "loss": 0.3154, - "step": 7303500 - }, - { - "epoch": 4.38, - "learning_rate": 2.5377432810101914e-05, - "loss": 0.3156, - "step": 7304000 - }, - { - "epoch": 4.38, - "learning_rate": 2.537533284454135e-05, - "loss": 0.3173, - "step": 7304500 - }, - { - "epoch": 4.38, - "learning_rate": 2.5373232878980784e-05, - "loss": 0.318, - "step": 7305000 - }, - { - "epoch": 4.38, - "learning_rate": 2.537113291342022e-05, - "loss": 0.3082, - "step": 7305500 - }, - { - "epoch": 4.38, - "learning_rate": 2.5369037147790775e-05, - "loss": 0.3094, - "step": 7306000 - }, - { - "epoch": 4.38, - "learning_rate": 2.536693718223021e-05, - "loss": 0.3131, - "step": 7306500 - }, - { - "epoch": 4.38, - "learning_rate": 2.5364837216669645e-05, - "loss": 0.3092, - "step": 7307000 - }, - { - "epoch": 4.38, - "learning_rate": 2.5362737251109082e-05, - "loss": 0.3214, - "step": 7307500 - }, - { - "epoch": 4.38, - "learning_rate": 2.536063728554852e-05, - "loss": 0.3176, - "step": 7308000 - }, - { - "epoch": 4.38, - "learning_rate": 2.5358537319987952e-05, - "loss": 0.3157, - "step": 7308500 - }, - { - "epoch": 4.38, - "learning_rate": 2.5356437354427386e-05, - "loss": 0.3198, - "step": 7309000 - }, - { - "epoch": 4.38, - "learning_rate": 2.5354341588797942e-05, - "loss": 0.3117, - "step": 7309500 - }, - { - "epoch": 4.38, - "learning_rate": 2.535224162323738e-05, - "loss": 0.3119, - "step": 7310000 - }, - { - "epoch": 4.38, - "learning_rate": 2.5350141657676816e-05, - "loss": 0.3146, - "step": 7310500 - }, - { - "epoch": 4.38, - "learning_rate": 2.5348041692116246e-05, - "loss": 0.3152, - "step": 7311000 - }, - { - "epoch": 4.38, - "learning_rate": 2.5345941726555683e-05, - "loss": 0.3061, - "step": 7311500 - }, - { - "epoch": 4.38, - "learning_rate": 2.534384596092624e-05, - "loss": 0.3163, - "step": 7312000 - }, - { - "epoch": 4.38, - "learning_rate": 2.5341745995365677e-05, - "loss": 0.3036, - "step": 7312500 - }, - { - "epoch": 4.38, - "learning_rate": 2.5339646029805114e-05, - "loss": 0.3192, - "step": 7313000 - }, - { - "epoch": 4.38, - "learning_rate": 2.5337546064244544e-05, - "loss": 0.3089, - "step": 7313500 - }, - { - "epoch": 4.39, - "learning_rate": 2.53354502986151e-05, - "loss": 0.3158, - "step": 7314000 - }, - { - "epoch": 4.39, - "learning_rate": 2.5333350333054538e-05, - "loss": 0.3043, - "step": 7314500 - }, - { - "epoch": 4.39, - "learning_rate": 2.5331250367493974e-05, - "loss": 0.3153, - "step": 7315000 - }, - { - "epoch": 4.39, - "learning_rate": 2.5329150401933408e-05, - "loss": 0.3106, - "step": 7315500 - }, - { - "epoch": 4.39, - "learning_rate": 2.5327054636303965e-05, - "loss": 0.3106, - "step": 7316000 - }, - { - "epoch": 4.39, - "learning_rate": 2.53249546707434e-05, - "loss": 0.3092, - "step": 7316500 - }, - { - "epoch": 4.39, - "learning_rate": 2.5322854705182835e-05, - "loss": 0.3131, - "step": 7317000 - }, - { - "epoch": 4.39, - "learning_rate": 2.5320754739622272e-05, - "loss": 0.3086, - "step": 7317500 - }, - { - "epoch": 4.39, - "learning_rate": 2.5318654774061702e-05, - "loss": 0.3098, - "step": 7318000 - }, - { - "epoch": 4.39, - "learning_rate": 2.5316559008432262e-05, - "loss": 0.3125, - "step": 7318500 - }, - { - "epoch": 4.39, - "learning_rate": 2.5314459042871696e-05, - "loss": 0.3167, - "step": 7319000 - }, - { - "epoch": 4.39, - "learning_rate": 2.5312359077311133e-05, - "loss": 0.3197, - "step": 7319500 - }, - { - "epoch": 4.39, - "learning_rate": 2.531025911175057e-05, - "loss": 0.3132, - "step": 7320000 - }, - { - "epoch": 4.39, - "learning_rate": 2.530815914619e-05, - "loss": 0.3163, - "step": 7320500 - }, - { - "epoch": 4.39, - "learning_rate": 2.5306059180629437e-05, - "loss": 0.318, - "step": 7321000 - }, - { - "epoch": 4.39, - "learning_rate": 2.5303959215068873e-05, - "loss": 0.318, - "step": 7321500 - }, - { - "epoch": 4.39, - "learning_rate": 2.530186344943943e-05, - "loss": 0.3112, - "step": 7322000 - }, - { - "epoch": 4.39, - "learning_rate": 2.5299763483878864e-05, - "loss": 0.3084, - "step": 7322500 - }, - { - "epoch": 4.39, - "learning_rate": 2.5297663518318297e-05, - "loss": 0.3191, - "step": 7323000 - }, - { - "epoch": 4.39, - "learning_rate": 2.5295563552757734e-05, - "loss": 0.311, - "step": 7323500 - }, - { - "epoch": 4.39, - "learning_rate": 2.529346358719717e-05, - "loss": 0.3085, - "step": 7324000 - }, - { - "epoch": 4.39, - "learning_rate": 2.5291367821567728e-05, - "loss": 0.3029, - "step": 7324500 - }, - { - "epoch": 4.39, - "learning_rate": 2.5289267856007158e-05, - "loss": 0.31, - "step": 7325000 - }, - { - "epoch": 4.39, - "learning_rate": 2.5287167890446595e-05, - "loss": 0.3165, - "step": 7325500 - }, - { - "epoch": 4.39, - "learning_rate": 2.528506792488603e-05, - "loss": 0.3158, - "step": 7326000 - }, - { - "epoch": 4.39, - "learning_rate": 2.528296795932547e-05, - "loss": 0.3109, - "step": 7326500 - }, - { - "epoch": 4.39, - "learning_rate": 2.5280872193696025e-05, - "loss": 0.3091, - "step": 7327000 - }, - { - "epoch": 4.39, - "learning_rate": 2.5278772228135456e-05, - "loss": 0.3069, - "step": 7327500 - }, - { - "epoch": 4.39, - "learning_rate": 2.5276672262574892e-05, - "loss": 0.3098, - "step": 7328000 - }, - { - "epoch": 4.39, - "learning_rate": 2.527457229701433e-05, - "loss": 0.312, - "step": 7328500 - }, - { - "epoch": 4.39, - "learning_rate": 2.5272472331453763e-05, - "loss": 0.3082, - "step": 7329000 - }, - { - "epoch": 4.39, - "learning_rate": 2.52703723658932e-05, - "loss": 0.3106, - "step": 7329500 - }, - { - "epoch": 4.39, - "learning_rate": 2.5268276600263753e-05, - "loss": 0.3229, - "step": 7330000 - }, - { - "epoch": 4.39, - "learning_rate": 2.526617663470319e-05, - "loss": 0.3087, - "step": 7330500 - }, - { - "epoch": 4.4, - "learning_rate": 2.5264076669142627e-05, - "loss": 0.309, - "step": 7331000 - }, - { - "epoch": 4.4, - "learning_rate": 2.526197670358206e-05, - "loss": 0.3106, - "step": 7331500 - }, - { - "epoch": 4.4, - "learning_rate": 2.5259876738021497e-05, - "loss": 0.3113, - "step": 7332000 - }, - { - "epoch": 4.4, - "learning_rate": 2.525777677246093e-05, - "loss": 0.3159, - "step": 7332500 - }, - { - "epoch": 4.4, - "learning_rate": 2.5255676806900364e-05, - "loss": 0.31, - "step": 7333000 - }, - { - "epoch": 4.4, - "learning_rate": 2.52535768413398e-05, - "loss": 0.3146, - "step": 7333500 - }, - { - "epoch": 4.4, - "learning_rate": 2.5251481075710358e-05, - "loss": 0.3159, - "step": 7334000 - }, - { - "epoch": 4.4, - "learning_rate": 2.5249381110149795e-05, - "loss": 0.3118, - "step": 7334500 - }, - { - "epoch": 4.4, - "learning_rate": 2.5247281144589228e-05, - "loss": 0.3157, - "step": 7335000 - }, - { - "epoch": 4.4, - "learning_rate": 2.524518117902866e-05, - "loss": 0.3137, - "step": 7335500 - }, - { - "epoch": 4.4, - "learning_rate": 2.52430812134681e-05, - "loss": 0.3229, - "step": 7336000 - }, - { - "epoch": 4.4, - "learning_rate": 2.5240985447838655e-05, - "loss": 0.3144, - "step": 7336500 - }, - { - "epoch": 4.4, - "learning_rate": 2.5238885482278092e-05, - "loss": 0.3107, - "step": 7337000 - }, - { - "epoch": 4.4, - "learning_rate": 2.5236785516717526e-05, - "loss": 0.3154, - "step": 7337500 - }, - { - "epoch": 4.4, - "learning_rate": 2.523468555115696e-05, - "loss": 0.3087, - "step": 7338000 - }, - { - "epoch": 4.4, - "learning_rate": 2.5232585585596396e-05, - "loss": 0.3094, - "step": 7338500 - }, - { - "epoch": 4.4, - "learning_rate": 2.5230485620035833e-05, - "loss": 0.3141, - "step": 7339000 - }, - { - "epoch": 4.4, - "learning_rate": 2.5228385654475266e-05, - "loss": 0.3162, - "step": 7339500 - }, - { - "epoch": 4.4, - "learning_rate": 2.52262856889147e-05, - "loss": 0.3128, - "step": 7340000 - }, - { - "epoch": 4.4, - "learning_rate": 2.5224189923285257e-05, - "loss": 0.3147, - "step": 7340500 - }, - { - "epoch": 4.4, - "learning_rate": 2.5222089957724694e-05, - "loss": 0.3167, - "step": 7341000 - }, - { - "epoch": 4.4, - "learning_rate": 2.5219989992164127e-05, - "loss": 0.3131, - "step": 7341500 - }, - { - "epoch": 4.4, - "learning_rate": 2.5217890026603564e-05, - "loss": 0.3058, - "step": 7342000 - }, - { - "epoch": 4.4, - "learning_rate": 2.5215790061042997e-05, - "loss": 0.3092, - "step": 7342500 - }, - { - "epoch": 4.4, - "learning_rate": 2.5213690095482434e-05, - "loss": 0.3158, - "step": 7343000 - }, - { - "epoch": 4.4, - "learning_rate": 2.5211590129921868e-05, - "loss": 0.3084, - "step": 7343500 - }, - { - "epoch": 4.4, - "learning_rate": 2.5209490164361305e-05, - "loss": 0.3088, - "step": 7344000 - }, - { - "epoch": 4.4, - "learning_rate": 2.520739439873186e-05, - "loss": 0.3266, - "step": 7344500 - }, - { - "epoch": 4.4, - "learning_rate": 2.5205294433171295e-05, - "loss": 0.314, - "step": 7345000 - }, - { - "epoch": 4.4, - "learning_rate": 2.520319446761073e-05, - "loss": 0.3127, - "step": 7345500 - }, - { - "epoch": 4.4, - "learning_rate": 2.5201094502050165e-05, - "loss": 0.3148, - "step": 7346000 - }, - { - "epoch": 4.4, - "learning_rate": 2.5198994536489602e-05, - "loss": 0.3152, - "step": 7346500 - }, - { - "epoch": 4.4, - "learning_rate": 2.519689877086016e-05, - "loss": 0.3076, - "step": 7347000 - }, - { - "epoch": 4.41, - "learning_rate": 2.5194798805299593e-05, - "loss": 0.3183, - "step": 7347500 - }, - { - "epoch": 4.41, - "learning_rate": 2.5192698839739026e-05, - "loss": 0.3093, - "step": 7348000 - }, - { - "epoch": 4.41, - "learning_rate": 2.5190598874178463e-05, - "loss": 0.3098, - "step": 7348500 - }, - { - "epoch": 4.41, - "learning_rate": 2.51884989086179e-05, - "loss": 0.3218, - "step": 7349000 - }, - { - "epoch": 4.41, - "learning_rate": 2.5186403142988457e-05, - "loss": 0.3137, - "step": 7349500 - }, - { - "epoch": 4.41, - "learning_rate": 2.518430317742789e-05, - "loss": 0.3039, - "step": 7350000 - }, - { - "epoch": 4.41, - "learning_rate": 2.5182203211867324e-05, - "loss": 0.3148, - "step": 7350500 - }, - { - "epoch": 4.41, - "learning_rate": 2.518010324630676e-05, - "loss": 0.3099, - "step": 7351000 - }, - { - "epoch": 4.41, - "learning_rate": 2.5178003280746197e-05, - "loss": 0.3152, - "step": 7351500 - }, - { - "epoch": 4.41, - "learning_rate": 2.517590751511675e-05, - "loss": 0.3163, - "step": 7352000 - }, - { - "epoch": 4.41, - "learning_rate": 2.5173807549556188e-05, - "loss": 0.3089, - "step": 7352500 - }, - { - "epoch": 4.41, - "learning_rate": 2.517170758399562e-05, - "loss": 0.3124, - "step": 7353000 - }, - { - "epoch": 4.41, - "learning_rate": 2.5169607618435058e-05, - "loss": 0.3026, - "step": 7353500 - }, - { - "epoch": 4.41, - "learning_rate": 2.5167511852805615e-05, - "loss": 0.3206, - "step": 7354000 - }, - { - "epoch": 4.41, - "learning_rate": 2.516541188724505e-05, - "loss": 0.308, - "step": 7354500 - }, - { - "epoch": 4.41, - "learning_rate": 2.5163311921684482e-05, - "loss": 0.3161, - "step": 7355000 - }, - { - "epoch": 4.41, - "learning_rate": 2.516121195612392e-05, - "loss": 0.3123, - "step": 7355500 - }, - { - "epoch": 4.41, - "learning_rate": 2.5159111990563356e-05, - "loss": 0.3104, - "step": 7356000 - }, - { - "epoch": 4.41, - "learning_rate": 2.515701202500279e-05, - "loss": 0.3112, - "step": 7356500 - }, - { - "epoch": 4.41, - "learning_rate": 2.5154912059442226e-05, - "loss": 0.3217, - "step": 7357000 - }, - { - "epoch": 4.41, - "learning_rate": 2.515281209388166e-05, - "loss": 0.3063, - "step": 7357500 - }, - { - "epoch": 4.41, - "learning_rate": 2.5150716328252216e-05, - "loss": 0.3115, - "step": 7358000 - }, - { - "epoch": 4.41, - "learning_rate": 2.5148616362691653e-05, - "loss": 0.3093, - "step": 7358500 - }, - { - "epoch": 4.41, - "learning_rate": 2.5146516397131087e-05, - "loss": 0.3106, - "step": 7359000 - }, - { - "epoch": 4.41, - "learning_rate": 2.514441643157052e-05, - "loss": 0.3122, - "step": 7359500 - }, - { - "epoch": 4.41, - "learning_rate": 2.5142316466009957e-05, - "loss": 0.3164, - "step": 7360000 - }, - { - "epoch": 4.41, - "learning_rate": 2.514021650044939e-05, - "loss": 0.3182, - "step": 7360500 - }, - { - "epoch": 4.41, - "learning_rate": 2.5138116534888827e-05, - "loss": 0.3113, - "step": 7361000 - }, - { - "epoch": 4.41, - "learning_rate": 2.5136020769259384e-05, - "loss": 0.315, - "step": 7361500 - }, - { - "epoch": 4.41, - "learning_rate": 2.5133920803698818e-05, - "loss": 0.3057, - "step": 7362000 - }, - { - "epoch": 4.41, - "learning_rate": 2.5131820838138254e-05, - "loss": 0.3137, - "step": 7362500 - }, - { - "epoch": 4.41, - "learning_rate": 2.5129720872577688e-05, - "loss": 0.3125, - "step": 7363000 - }, - { - "epoch": 4.41, - "learning_rate": 2.5127620907017125e-05, - "loss": 0.3147, - "step": 7363500 - }, - { - "epoch": 4.42, - "learning_rate": 2.5125525141387682e-05, - "loss": 0.3127, - "step": 7364000 - }, - { - "epoch": 4.42, - "learning_rate": 2.5123425175827115e-05, - "loss": 0.3138, - "step": 7364500 - }, - { - "epoch": 4.42, - "learning_rate": 2.5121325210266552e-05, - "loss": 0.3162, - "step": 7365000 - }, - { - "epoch": 4.42, - "learning_rate": 2.5119225244705986e-05, - "loss": 0.3115, - "step": 7365500 - }, - { - "epoch": 4.42, - "learning_rate": 2.5117125279145422e-05, - "loss": 0.3185, - "step": 7366000 - }, - { - "epoch": 4.42, - "learning_rate": 2.511502531358486e-05, - "loss": 0.317, - "step": 7366500 - }, - { - "epoch": 4.42, - "learning_rate": 2.511292534802429e-05, - "loss": 0.3093, - "step": 7367000 - }, - { - "epoch": 4.42, - "learning_rate": 2.5110825382463726e-05, - "loss": 0.3132, - "step": 7367500 - }, - { - "epoch": 4.42, - "learning_rate": 2.5108729616834283e-05, - "loss": 0.3101, - "step": 7368000 - }, - { - "epoch": 4.42, - "learning_rate": 2.510662965127372e-05, - "loss": 0.3063, - "step": 7368500 - }, - { - "epoch": 4.42, - "learning_rate": 2.5104533885644274e-05, - "loss": 0.3307, - "step": 7369000 - }, - { - "epoch": 4.42, - "learning_rate": 2.510243392008371e-05, - "loss": 0.3178, - "step": 7369500 - }, - { - "epoch": 4.42, - "learning_rate": 2.5100333954523144e-05, - "loss": 0.3236, - "step": 7370000 - }, - { - "epoch": 4.42, - "learning_rate": 2.509823398896258e-05, - "loss": 0.3094, - "step": 7370500 - }, - { - "epoch": 4.42, - "learning_rate": 2.5096134023402018e-05, - "loss": 0.3061, - "step": 7371000 - }, - { - "epoch": 4.42, - "learning_rate": 2.509403825777257e-05, - "loss": 0.3138, - "step": 7371500 - }, - { - "epoch": 4.42, - "learning_rate": 2.5091938292212008e-05, - "loss": 0.3083, - "step": 7372000 - }, - { - "epoch": 4.42, - "learning_rate": 2.508983832665144e-05, - "loss": 0.3171, - "step": 7372500 - }, - { - "epoch": 4.42, - "learning_rate": 2.5087738361090878e-05, - "loss": 0.3163, - "step": 7373000 - }, - { - "epoch": 4.42, - "learning_rate": 2.5085638395530315e-05, - "loss": 0.3108, - "step": 7373500 - }, - { - "epoch": 4.42, - "learning_rate": 2.508353842996975e-05, - "loss": 0.3093, - "step": 7374000 - }, - { - "epoch": 4.42, - "learning_rate": 2.5081438464409182e-05, - "loss": 0.3096, - "step": 7374500 - }, - { - "epoch": 4.42, - "learning_rate": 2.507933849884862e-05, - "loss": 0.318, - "step": 7375000 - }, - { - "epoch": 4.42, - "learning_rate": 2.5077242733219176e-05, - "loss": 0.3103, - "step": 7375500 - }, - { - "epoch": 4.42, - "learning_rate": 2.5075146967589733e-05, - "loss": 0.3148, - "step": 7376000 - }, - { - "epoch": 4.42, - "learning_rate": 2.5073047002029166e-05, - "loss": 0.3077, - "step": 7376500 - }, - { - "epoch": 4.42, - "learning_rate": 2.50709470364686e-05, - "loss": 0.3158, - "step": 7377000 - }, - { - "epoch": 4.42, - "learning_rate": 2.5068847070908037e-05, - "loss": 0.3137, - "step": 7377500 - }, - { - "epoch": 4.42, - "learning_rate": 2.5066751305278594e-05, - "loss": 0.3128, - "step": 7378000 - }, - { - "epoch": 4.42, - "learning_rate": 2.506465133971803e-05, - "loss": 0.3031, - "step": 7378500 - }, - { - "epoch": 4.42, - "learning_rate": 2.5062551374157464e-05, - "loss": 0.3201, - "step": 7379000 - }, - { - "epoch": 4.42, - "learning_rate": 2.5060451408596897e-05, - "loss": 0.315, - "step": 7379500 - }, - { - "epoch": 4.42, - "learning_rate": 2.5058351443036334e-05, - "loss": 0.3087, - "step": 7380000 - }, - { - "epoch": 4.42, - "learning_rate": 2.505625567740689e-05, - "loss": 0.3176, - "step": 7380500 - }, - { - "epoch": 4.43, - "learning_rate": 2.5054155711846325e-05, - "loss": 0.3195, - "step": 7381000 - }, - { - "epoch": 4.43, - "learning_rate": 2.5052055746285758e-05, - "loss": 0.3131, - "step": 7381500 - }, - { - "epoch": 4.43, - "learning_rate": 2.5049955780725195e-05, - "loss": 0.3188, - "step": 7382000 - }, - { - "epoch": 4.43, - "learning_rate": 2.5047855815164632e-05, - "loss": 0.315, - "step": 7382500 - }, - { - "epoch": 4.43, - "learning_rate": 2.504576004953519e-05, - "loss": 0.3064, - "step": 7383000 - }, - { - "epoch": 4.43, - "learning_rate": 2.5043660083974622e-05, - "loss": 0.3157, - "step": 7383500 - }, - { - "epoch": 4.43, - "learning_rate": 2.5041560118414056e-05, - "loss": 0.3195, - "step": 7384000 - }, - { - "epoch": 4.43, - "learning_rate": 2.5039460152853492e-05, - "loss": 0.3165, - "step": 7384500 - }, - { - "epoch": 4.43, - "learning_rate": 2.503736438722405e-05, - "loss": 0.3063, - "step": 7385000 - }, - { - "epoch": 4.43, - "learning_rate": 2.5035264421663486e-05, - "loss": 0.3122, - "step": 7385500 - }, - { - "epoch": 4.43, - "learning_rate": 2.503316445610292e-05, - "loss": 0.3134, - "step": 7386000 - }, - { - "epoch": 4.43, - "learning_rate": 2.5031064490542353e-05, - "loss": 0.3124, - "step": 7386500 - }, - { - "epoch": 4.43, - "learning_rate": 2.502896452498179e-05, - "loss": 0.3273, - "step": 7387000 - }, - { - "epoch": 4.43, - "learning_rate": 2.5026868759352347e-05, - "loss": 0.3078, - "step": 7387500 - }, - { - "epoch": 4.43, - "learning_rate": 2.5024768793791784e-05, - "loss": 0.3163, - "step": 7388000 - }, - { - "epoch": 4.43, - "learning_rate": 2.5022668828231217e-05, - "loss": 0.304, - "step": 7388500 - }, - { - "epoch": 4.43, - "learning_rate": 2.502056886267065e-05, - "loss": 0.3183, - "step": 7389000 - }, - { - "epoch": 4.43, - "learning_rate": 2.5018473097041208e-05, - "loss": 0.312, - "step": 7389500 - }, - { - "epoch": 4.43, - "learning_rate": 2.5016373131480645e-05, - "loss": 0.3048, - "step": 7390000 - }, - { - "epoch": 4.43, - "learning_rate": 2.5014273165920078e-05, - "loss": 0.3181, - "step": 7390500 - }, - { - "epoch": 4.43, - "learning_rate": 2.501217320035951e-05, - "loss": 0.3023, - "step": 7391000 - }, - { - "epoch": 4.43, - "learning_rate": 2.501007323479895e-05, - "loss": 0.3146, - "step": 7391500 - }, - { - "epoch": 4.43, - "learning_rate": 2.5007973269238385e-05, - "loss": 0.3157, - "step": 7392000 - }, - { - "epoch": 4.43, - "learning_rate": 2.500587330367782e-05, - "loss": 0.3127, - "step": 7392500 - }, - { - "epoch": 4.43, - "learning_rate": 2.5003773338117255e-05, - "loss": 0.3065, - "step": 7393000 - }, - { - "epoch": 4.43, - "learning_rate": 2.500167757248781e-05, - "loss": 0.3028, - "step": 7393500 - }, - { - "epoch": 4.43, - "learning_rate": 2.4999577606927246e-05, - "loss": 0.3007, - "step": 7394000 - }, - { - "epoch": 4.43, - "learning_rate": 2.4997477641366683e-05, - "loss": 0.3083, - "step": 7394500 - }, - { - "epoch": 4.43, - "learning_rate": 2.4995377675806116e-05, - "loss": 0.3116, - "step": 7395000 - }, - { - "epoch": 4.43, - "learning_rate": 2.4993281910176673e-05, - "loss": 0.315, - "step": 7395500 - }, - { - "epoch": 4.43, - "learning_rate": 2.4991181944616107e-05, - "loss": 0.3167, - "step": 7396000 - }, - { - "epoch": 4.43, - "learning_rate": 2.4989086178986664e-05, - "loss": 0.3067, - "step": 7396500 - }, - { - "epoch": 4.43, - "learning_rate": 2.49869862134261e-05, - "loss": 0.3133, - "step": 7397000 - }, - { - "epoch": 4.44, - "learning_rate": 2.4984886247865537e-05, - "loss": 0.311, - "step": 7397500 - }, - { - "epoch": 4.44, - "learning_rate": 2.4982786282304967e-05, - "loss": 0.3117, - "step": 7398000 - }, - { - "epoch": 4.44, - "learning_rate": 2.4980686316744404e-05, - "loss": 0.3078, - "step": 7398500 - }, - { - "epoch": 4.44, - "learning_rate": 2.497859055111496e-05, - "loss": 0.3115, - "step": 7399000 - }, - { - "epoch": 4.44, - "learning_rate": 2.4976490585554398e-05, - "loss": 0.3124, - "step": 7399500 - }, - { - "epoch": 4.44, - "learning_rate": 2.497439061999383e-05, - "loss": 0.3148, - "step": 7400000 - }, - { - "epoch": 4.44, - "eval_loss": 0.3081914484500885, - "eval_runtime": 1115.8311, - "eval_samples_per_second": 472.043, - "eval_steps_per_second": 78.674, - "step": 7400000 - }, - { - "epoch": 4.44, - "learning_rate": 2.4972290654433265e-05, - "loss": 0.3158, - "step": 7400500 - }, - { - "epoch": 4.44, - "learning_rate": 2.4970190688872702e-05, - "loss": 0.3014, - "step": 7401000 - }, - { - "epoch": 4.44, - "learning_rate": 2.496809492324326e-05, - "loss": 0.3116, - "step": 7401500 - }, - { - "epoch": 4.44, - "learning_rate": 2.4965994957682696e-05, - "loss": 0.3274, - "step": 7402000 - }, - { - "epoch": 4.44, - "learning_rate": 2.496389499212213e-05, - "loss": 0.3174, - "step": 7402500 - }, - { - "epoch": 4.44, - "learning_rate": 2.4961795026561562e-05, - "loss": 0.3131, - "step": 7403000 - }, - { - "epoch": 4.44, - "learning_rate": 2.495969926093212e-05, - "loss": 0.3131, - "step": 7403500 - }, - { - "epoch": 4.44, - "learning_rate": 2.4957599295371556e-05, - "loss": 0.3124, - "step": 7404000 - }, - { - "epoch": 4.44, - "learning_rate": 2.4955499329810993e-05, - "loss": 0.3127, - "step": 7404500 - }, - { - "epoch": 4.44, - "learning_rate": 2.4953399364250423e-05, - "loss": 0.3135, - "step": 7405000 - }, - { - "epoch": 4.44, - "learning_rate": 2.495129939868986e-05, - "loss": 0.3088, - "step": 7405500 - }, - { - "epoch": 4.44, - "learning_rate": 2.4949199433129297e-05, - "loss": 0.3101, - "step": 7406000 - }, - { - "epoch": 4.44, - "learning_rate": 2.4947103667499854e-05, - "loss": 0.3095, - "step": 7406500 - }, - { - "epoch": 4.44, - "learning_rate": 2.494500370193929e-05, - "loss": 0.3088, - "step": 7407000 - }, - { - "epoch": 4.44, - "learning_rate": 2.494290373637872e-05, - "loss": 0.3059, - "step": 7407500 - }, - { - "epoch": 4.44, - "learning_rate": 2.4940803770818158e-05, - "loss": 0.3104, - "step": 7408000 - }, - { - "epoch": 4.44, - "learning_rate": 2.4938708005188715e-05, - "loss": 0.3113, - "step": 7408500 - }, - { - "epoch": 4.44, - "learning_rate": 2.493660803962815e-05, - "loss": 0.3127, - "step": 7409000 - }, - { - "epoch": 4.44, - "learning_rate": 2.4934508074067585e-05, - "loss": 0.3089, - "step": 7409500 - }, - { - "epoch": 4.44, - "learning_rate": 2.493240810850702e-05, - "loss": 0.319, - "step": 7410000 - }, - { - "epoch": 4.44, - "learning_rate": 2.4930308142946455e-05, - "loss": 0.3111, - "step": 7410500 - }, - { - "epoch": 4.44, - "learning_rate": 2.4928212377317012e-05, - "loss": 0.3062, - "step": 7411000 - }, - { - "epoch": 4.44, - "learning_rate": 2.492611241175645e-05, - "loss": 0.3085, - "step": 7411500 - }, - { - "epoch": 4.44, - "learning_rate": 2.492401244619588e-05, - "loss": 0.3101, - "step": 7412000 - }, - { - "epoch": 4.44, - "learning_rate": 2.4921912480635316e-05, - "loss": 0.3103, - "step": 7412500 - }, - { - "epoch": 4.44, - "learning_rate": 2.4919816715005873e-05, - "loss": 0.317, - "step": 7413000 - }, - { - "epoch": 4.44, - "learning_rate": 2.491771674944531e-05, - "loss": 0.3121, - "step": 7413500 - }, - { - "epoch": 4.44, - "learning_rate": 2.4915616783884747e-05, - "loss": 0.319, - "step": 7414000 - }, - { - "epoch": 4.45, - "learning_rate": 2.4913516818324177e-05, - "loss": 0.3146, - "step": 7414500 - }, - { - "epoch": 4.45, - "learning_rate": 2.4911416852763614e-05, - "loss": 0.3102, - "step": 7415000 - }, - { - "epoch": 4.45, - "learning_rate": 2.490932108713417e-05, - "loss": 0.3253, - "step": 7415500 - }, - { - "epoch": 4.45, - "learning_rate": 2.4907221121573607e-05, - "loss": 0.314, - "step": 7416000 - }, - { - "epoch": 4.45, - "learning_rate": 2.490512115601304e-05, - "loss": 0.3045, - "step": 7416500 - }, - { - "epoch": 4.45, - "learning_rate": 2.4903021190452474e-05, - "loss": 0.3085, - "step": 7417000 - }, - { - "epoch": 4.45, - "learning_rate": 2.490092542482303e-05, - "loss": 0.3051, - "step": 7417500 - }, - { - "epoch": 4.45, - "learning_rate": 2.4898825459262468e-05, - "loss": 0.3217, - "step": 7418000 - }, - { - "epoch": 4.45, - "learning_rate": 2.4896725493701905e-05, - "loss": 0.3062, - "step": 7418500 - }, - { - "epoch": 4.45, - "learning_rate": 2.489462972807246e-05, - "loss": 0.3049, - "step": 7419000 - }, - { - "epoch": 4.45, - "learning_rate": 2.4892529762511895e-05, - "loss": 0.3115, - "step": 7419500 - }, - { - "epoch": 4.45, - "learning_rate": 2.489042979695133e-05, - "loss": 0.3144, - "step": 7420000 - }, - { - "epoch": 4.45, - "learning_rate": 2.4888329831390766e-05, - "loss": 0.3158, - "step": 7420500 - }, - { - "epoch": 4.45, - "learning_rate": 2.4886229865830202e-05, - "loss": 0.3156, - "step": 7421000 - }, - { - "epoch": 4.45, - "learning_rate": 2.4884129900269633e-05, - "loss": 0.3071, - "step": 7421500 - }, - { - "epoch": 4.45, - "learning_rate": 2.488202993470907e-05, - "loss": 0.3092, - "step": 7422000 - }, - { - "epoch": 4.45, - "learning_rate": 2.4879929969148506e-05, - "loss": 0.3175, - "step": 7422500 - }, - { - "epoch": 4.45, - "learning_rate": 2.487783000358794e-05, - "loss": 0.3107, - "step": 7423000 - }, - { - "epoch": 4.45, - "learning_rate": 2.4875734237958497e-05, - "loss": 0.302, - "step": 7423500 - }, - { - "epoch": 4.45, - "learning_rate": 2.487363427239793e-05, - "loss": 0.3095, - "step": 7424000 - }, - { - "epoch": 4.45, - "learning_rate": 2.4871534306837367e-05, - "loss": 0.3128, - "step": 7424500 - }, - { - "epoch": 4.45, - "learning_rate": 2.4869434341276804e-05, - "loss": 0.3046, - "step": 7425000 - }, - { - "epoch": 4.45, - "learning_rate": 2.4867334375716237e-05, - "loss": 0.3134, - "step": 7425500 - }, - { - "epoch": 4.45, - "learning_rate": 2.4865238610086794e-05, - "loss": 0.3177, - "step": 7426000 - }, - { - "epoch": 4.45, - "learning_rate": 2.4863138644526228e-05, - "loss": 0.3168, - "step": 7426500 - }, - { - "epoch": 4.45, - "learning_rate": 2.4861038678965665e-05, - "loss": 0.3084, - "step": 7427000 - }, - { - "epoch": 4.45, - "learning_rate": 2.48589387134051e-05, - "loss": 0.3042, - "step": 7427500 - }, - { - "epoch": 4.45, - "learning_rate": 2.4856838747844535e-05, - "loss": 0.306, - "step": 7428000 - }, - { - "epoch": 4.45, - "learning_rate": 2.4854738782283972e-05, - "loss": 0.3119, - "step": 7428500 - }, - { - "epoch": 4.45, - "learning_rate": 2.4852643016654525e-05, - "loss": 0.3096, - "step": 7429000 - }, - { - "epoch": 4.45, - "learning_rate": 2.4850543051093962e-05, - "loss": 0.3189, - "step": 7429500 - }, - { - "epoch": 4.45, - "learning_rate": 2.4848443085533396e-05, - "loss": 0.3117, - "step": 7430000 - }, - { - "epoch": 4.45, - "learning_rate": 2.4846343119972832e-05, - "loss": 0.3061, - "step": 7430500 - }, - { - "epoch": 4.46, - "learning_rate": 2.484424315441227e-05, - "loss": 0.3131, - "step": 7431000 - }, - { - "epoch": 4.46, - "learning_rate": 2.4842147388782823e-05, - "loss": 0.3118, - "step": 7431500 - }, - { - "epoch": 4.46, - "learning_rate": 2.484004742322226e-05, - "loss": 0.3101, - "step": 7432000 - }, - { - "epoch": 4.46, - "learning_rate": 2.4837947457661693e-05, - "loss": 0.3138, - "step": 7432500 - }, - { - "epoch": 4.46, - "learning_rate": 2.483584749210113e-05, - "loss": 0.313, - "step": 7433000 - }, - { - "epoch": 4.46, - "learning_rate": 2.4833747526540567e-05, - "loss": 0.3117, - "step": 7433500 - }, - { - "epoch": 4.46, - "learning_rate": 2.483165176091112e-05, - "loss": 0.3105, - "step": 7434000 - }, - { - "epoch": 4.46, - "learning_rate": 2.4829551795350557e-05, - "loss": 0.3089, - "step": 7434500 - }, - { - "epoch": 4.46, - "learning_rate": 2.482745182978999e-05, - "loss": 0.3076, - "step": 7435000 - }, - { - "epoch": 4.46, - "learning_rate": 2.4825351864229428e-05, - "loss": 0.33, - "step": 7435500 - }, - { - "epoch": 4.46, - "learning_rate": 2.4823251898668864e-05, - "loss": 0.3072, - "step": 7436000 - }, - { - "epoch": 4.46, - "learning_rate": 2.4821156133039418e-05, - "loss": 0.3196, - "step": 7436500 - }, - { - "epoch": 4.46, - "learning_rate": 2.481905616747885e-05, - "loss": 0.3083, - "step": 7437000 - }, - { - "epoch": 4.46, - "learning_rate": 2.481695620191829e-05, - "loss": 0.3137, - "step": 7437500 - }, - { - "epoch": 4.46, - "learning_rate": 2.4814856236357725e-05, - "loss": 0.3025, - "step": 7438000 - }, - { - "epoch": 4.46, - "learning_rate": 2.481275627079716e-05, - "loss": 0.3018, - "step": 7438500 - }, - { - "epoch": 4.46, - "learning_rate": 2.4810656305236592e-05, - "loss": 0.3033, - "step": 7439000 - }, - { - "epoch": 4.46, - "learning_rate": 2.480856053960715e-05, - "loss": 0.3118, - "step": 7439500 - }, - { - "epoch": 4.46, - "learning_rate": 2.4806460574046586e-05, - "loss": 0.3135, - "step": 7440000 - }, - { - "epoch": 4.46, - "learning_rate": 2.4804360608486023e-05, - "loss": 0.3156, - "step": 7440500 - }, - { - "epoch": 4.46, - "learning_rate": 2.4802260642925453e-05, - "loss": 0.3079, - "step": 7441000 - }, - { - "epoch": 4.46, - "learning_rate": 2.4800164877296013e-05, - "loss": 0.3122, - "step": 7441500 - }, - { - "epoch": 4.46, - "learning_rate": 2.4798064911735447e-05, - "loss": 0.3135, - "step": 7442000 - }, - { - "epoch": 4.46, - "learning_rate": 2.4795964946174883e-05, - "loss": 0.3023, - "step": 7442500 - }, - { - "epoch": 4.46, - "learning_rate": 2.479386498061432e-05, - "loss": 0.3127, - "step": 7443000 - }, - { - "epoch": 4.46, - "learning_rate": 2.479176501505375e-05, - "loss": 0.3162, - "step": 7443500 - }, - { - "epoch": 4.46, - "learning_rate": 2.4789669249424307e-05, - "loss": 0.3072, - "step": 7444000 - }, - { - "epoch": 4.46, - "learning_rate": 2.4787569283863744e-05, - "loss": 0.3121, - "step": 7444500 - }, - { - "epoch": 4.46, - "learning_rate": 2.478546931830318e-05, - "loss": 0.3093, - "step": 7445000 - }, - { - "epoch": 4.46, - "learning_rate": 2.4783369352742614e-05, - "loss": 0.3068, - "step": 7445500 - }, - { - "epoch": 4.46, - "learning_rate": 2.478127358711317e-05, - "loss": 0.3186, - "step": 7446000 - }, - { - "epoch": 4.46, - "learning_rate": 2.4779173621552605e-05, - "loss": 0.3105, - "step": 7446500 - }, - { - "epoch": 4.46, - "learning_rate": 2.4777073655992042e-05, - "loss": 0.3039, - "step": 7447000 - }, - { - "epoch": 4.47, - "learning_rate": 2.477497369043148e-05, - "loss": 0.3115, - "step": 7447500 - }, - { - "epoch": 4.47, - "learning_rate": 2.4772873724870912e-05, - "loss": 0.3122, - "step": 7448000 - }, - { - "epoch": 4.47, - "learning_rate": 2.4770773759310346e-05, - "loss": 0.307, - "step": 7448500 - }, - { - "epoch": 4.47, - "learning_rate": 2.4768677993680902e-05, - "loss": 0.3196, - "step": 7449000 - }, - { - "epoch": 4.47, - "learning_rate": 2.476657802812034e-05, - "loss": 0.3113, - "step": 7449500 - }, - { - "epoch": 4.47, - "learning_rate": 2.4764478062559776e-05, - "loss": 0.3105, - "step": 7450000 - }, - { - "epoch": 4.47, - "learning_rate": 2.4762378096999206e-05, - "loss": 0.3117, - "step": 7450500 - }, - { - "epoch": 4.47, - "learning_rate": 2.4760282331369763e-05, - "loss": 0.3067, - "step": 7451000 - }, - { - "epoch": 4.47, - "learning_rate": 2.47581823658092e-05, - "loss": 0.315, - "step": 7451500 - }, - { - "epoch": 4.47, - "learning_rate": 2.4756082400248637e-05, - "loss": 0.3154, - "step": 7452000 - }, - { - "epoch": 4.47, - "learning_rate": 2.475398243468807e-05, - "loss": 0.3103, - "step": 7452500 - }, - { - "epoch": 4.47, - "learning_rate": 2.4751882469127504e-05, - "loss": 0.3145, - "step": 7453000 - }, - { - "epoch": 4.47, - "learning_rate": 2.474978670349806e-05, - "loss": 0.3155, - "step": 7453500 - }, - { - "epoch": 4.47, - "learning_rate": 2.4747686737937498e-05, - "loss": 0.3178, - "step": 7454000 - }, - { - "epoch": 4.47, - "learning_rate": 2.4745586772376934e-05, - "loss": 0.3168, - "step": 7454500 - }, - { - "epoch": 4.47, - "learning_rate": 2.4743486806816368e-05, - "loss": 0.3096, - "step": 7455000 - }, - { - "epoch": 4.47, - "learning_rate": 2.4741391041186925e-05, - "loss": 0.3169, - "step": 7455500 - }, - { - "epoch": 4.47, - "learning_rate": 2.473929107562636e-05, - "loss": 0.3134, - "step": 7456000 - }, - { - "epoch": 4.47, - "learning_rate": 2.4737191110065795e-05, - "loss": 0.3169, - "step": 7456500 - }, - { - "epoch": 4.47, - "learning_rate": 2.4735091144505232e-05, - "loss": 0.3114, - "step": 7457000 - }, - { - "epoch": 4.47, - "learning_rate": 2.4732991178944666e-05, - "loss": 0.3176, - "step": 7457500 - }, - { - "epoch": 4.47, - "learning_rate": 2.47308912133841e-05, - "loss": 0.3065, - "step": 7458000 - }, - { - "epoch": 4.47, - "learning_rate": 2.4728795447754656e-05, - "loss": 0.3158, - "step": 7458500 - }, - { - "epoch": 4.47, - "learning_rate": 2.4726695482194093e-05, - "loss": 0.3061, - "step": 7459000 - }, - { - "epoch": 4.47, - "learning_rate": 2.4724595516633526e-05, - "loss": 0.3105, - "step": 7459500 - }, - { - "epoch": 4.47, - "learning_rate": 2.472249555107296e-05, - "loss": 0.3091, - "step": 7460000 - }, - { - "epoch": 4.47, - "learning_rate": 2.4720395585512397e-05, - "loss": 0.3102, - "step": 7460500 - }, - { - "epoch": 4.47, - "learning_rate": 2.4718295619951833e-05, - "loss": 0.3045, - "step": 7461000 - }, - { - "epoch": 4.47, - "learning_rate": 2.4716195654391267e-05, - "loss": 0.3155, - "step": 7461500 - }, - { - "epoch": 4.47, - "learning_rate": 2.4714099888761824e-05, - "loss": 0.3078, - "step": 7462000 - }, - { - "epoch": 4.47, - "learning_rate": 2.4711999923201257e-05, - "loss": 0.3074, - "step": 7462500 - }, - { - "epoch": 4.47, - "learning_rate": 2.4709899957640694e-05, - "loss": 0.3063, - "step": 7463000 - }, - { - "epoch": 4.47, - "learning_rate": 2.470779999208013e-05, - "loss": 0.3152, - "step": 7463500 - }, - { - "epoch": 4.47, - "learning_rate": 2.4705700026519564e-05, - "loss": 0.3122, - "step": 7464000 - }, - { - "epoch": 4.48, - "learning_rate": 2.4703600060959e-05, - "loss": 0.3162, - "step": 7464500 - }, - { - "epoch": 4.48, - "learning_rate": 2.4701500095398438e-05, - "loss": 0.3139, - "step": 7465000 - }, - { - "epoch": 4.48, - "learning_rate": 2.4699400129837868e-05, - "loss": 0.3066, - "step": 7465500 - }, - { - "epoch": 4.48, - "learning_rate": 2.469730856413955e-05, - "loss": 0.3204, - "step": 7466000 - }, - { - "epoch": 4.48, - "learning_rate": 2.4695208598578986e-05, - "loss": 0.3128, - "step": 7466500 - }, - { - "epoch": 4.48, - "learning_rate": 2.469310863301842e-05, - "loss": 0.3097, - "step": 7467000 - }, - { - "epoch": 4.48, - "learning_rate": 2.4691008667457852e-05, - "loss": 0.3021, - "step": 7467500 - }, - { - "epoch": 4.48, - "learning_rate": 2.468890870189729e-05, - "loss": 0.3099, - "step": 7468000 - }, - { - "epoch": 4.48, - "learning_rate": 2.4686808736336723e-05, - "loss": 0.3058, - "step": 7468500 - }, - { - "epoch": 4.48, - "learning_rate": 2.468470877077616e-05, - "loss": 0.3139, - "step": 7469000 - }, - { - "epoch": 4.48, - "learning_rate": 2.4682608805215596e-05, - "loss": 0.3079, - "step": 7469500 - }, - { - "epoch": 4.48, - "learning_rate": 2.468051303958615e-05, - "loss": 0.3162, - "step": 7470000 - }, - { - "epoch": 4.48, - "learning_rate": 2.4678413074025587e-05, - "loss": 0.3133, - "step": 7470500 - }, - { - "epoch": 4.48, - "learning_rate": 2.467631310846502e-05, - "loss": 0.3082, - "step": 7471000 - }, - { - "epoch": 4.48, - "learning_rate": 2.4674213142904457e-05, - "loss": 0.3044, - "step": 7471500 - }, - { - "epoch": 4.48, - "learning_rate": 2.467211737727501e-05, - "loss": 0.3127, - "step": 7472000 - }, - { - "epoch": 4.48, - "learning_rate": 2.4670017411714448e-05, - "loss": 0.3088, - "step": 7472500 - }, - { - "epoch": 4.48, - "learning_rate": 2.466791744615388e-05, - "loss": 0.3083, - "step": 7473000 - }, - { - "epoch": 4.48, - "learning_rate": 2.4665817480593318e-05, - "loss": 0.307, - "step": 7473500 - }, - { - "epoch": 4.48, - "learning_rate": 2.4663721714963875e-05, - "loss": 0.3127, - "step": 7474000 - }, - { - "epoch": 4.48, - "learning_rate": 2.466162174940331e-05, - "loss": 0.3148, - "step": 7474500 - }, - { - "epoch": 4.48, - "learning_rate": 2.4659521783842745e-05, - "loss": 0.3024, - "step": 7475000 - }, - { - "epoch": 4.48, - "learning_rate": 2.465742181828218e-05, - "loss": 0.3133, - "step": 7475500 - }, - { - "epoch": 4.48, - "learning_rate": 2.4655321852721615e-05, - "loss": 0.3081, - "step": 7476000 - }, - { - "epoch": 4.48, - "learning_rate": 2.4653221887161052e-05, - "loss": 0.3035, - "step": 7476500 - }, - { - "epoch": 4.48, - "learning_rate": 2.4651121921600482e-05, - "loss": 0.3113, - "step": 7477000 - }, - { - "epoch": 4.48, - "learning_rate": 2.464902195603992e-05, - "loss": 0.3154, - "step": 7477500 - }, - { - "epoch": 4.48, - "learning_rate": 2.4646926190410476e-05, - "loss": 0.3037, - "step": 7478000 - }, - { - "epoch": 4.48, - "learning_rate": 2.4644826224849913e-05, - "loss": 0.3085, - "step": 7478500 - }, - { - "epoch": 4.48, - "learning_rate": 2.464272625928935e-05, - "loss": 0.3194, - "step": 7479000 - }, - { - "epoch": 4.48, - "learning_rate": 2.4640630493659903e-05, - "loss": 0.302, - "step": 7479500 - }, - { - "epoch": 4.48, - "learning_rate": 2.4638530528099337e-05, - "loss": 0.3011, - "step": 7480000 - }, - { - "epoch": 4.48, - "learning_rate": 2.4636430562538774e-05, - "loss": 0.3119, - "step": 7480500 - }, - { - "epoch": 4.49, - "learning_rate": 2.463433059697821e-05, - "loss": 0.3161, - "step": 7481000 - }, - { - "epoch": 4.49, - "learning_rate": 2.4632230631417644e-05, - "loss": 0.3108, - "step": 7481500 - }, - { - "epoch": 4.49, - "learning_rate": 2.46301348657882e-05, - "loss": 0.3101, - "step": 7482000 - }, - { - "epoch": 4.49, - "learning_rate": 2.4628034900227634e-05, - "loss": 0.3127, - "step": 7482500 - }, - { - "epoch": 4.49, - "learning_rate": 2.462593493466707e-05, - "loss": 0.3051, - "step": 7483000 - }, - { - "epoch": 4.49, - "learning_rate": 2.4623834969106508e-05, - "loss": 0.3157, - "step": 7483500 - }, - { - "epoch": 4.49, - "learning_rate": 2.462173500354594e-05, - "loss": 0.3107, - "step": 7484000 - }, - { - "epoch": 4.49, - "learning_rate": 2.46196392379165e-05, - "loss": 0.3162, - "step": 7484500 - }, - { - "epoch": 4.49, - "learning_rate": 2.4617543472287056e-05, - "loss": 0.3187, - "step": 7485000 - }, - { - "epoch": 4.49, - "learning_rate": 2.461544350672649e-05, - "loss": 0.3184, - "step": 7485500 - }, - { - "epoch": 4.49, - "learning_rate": 2.4613343541165926e-05, - "loss": 0.3038, - "step": 7486000 - }, - { - "epoch": 4.49, - "learning_rate": 2.461124357560536e-05, - "loss": 0.3129, - "step": 7486500 - }, - { - "epoch": 4.49, - "learning_rate": 2.4609143610044793e-05, - "loss": 0.3202, - "step": 7487000 - }, - { - "epoch": 4.49, - "learning_rate": 2.460704364448423e-05, - "loss": 0.3121, - "step": 7487500 - }, - { - "epoch": 4.49, - "learning_rate": 2.4604943678923666e-05, - "loss": 0.3149, - "step": 7488000 - }, - { - "epoch": 4.49, - "learning_rate": 2.46028437133631e-05, - "loss": 0.3058, - "step": 7488500 - }, - { - "epoch": 4.49, - "learning_rate": 2.4600743747802533e-05, - "loss": 0.3175, - "step": 7489000 - }, - { - "epoch": 4.49, - "learning_rate": 2.459864378224197e-05, - "loss": 0.3083, - "step": 7489500 - }, - { - "epoch": 4.49, - "learning_rate": 2.4596543816681407e-05, - "loss": 0.3018, - "step": 7490000 - }, - { - "epoch": 4.49, - "learning_rate": 2.459444385112084e-05, - "loss": 0.311, - "step": 7490500 - }, - { - "epoch": 4.49, - "learning_rate": 2.4592348085491398e-05, - "loss": 0.3177, - "step": 7491000 - }, - { - "epoch": 4.49, - "learning_rate": 2.4590252319861954e-05, - "loss": 0.3148, - "step": 7491500 - }, - { - "epoch": 4.49, - "learning_rate": 2.458815655423251e-05, - "loss": 0.3251, - "step": 7492000 - }, - { - "epoch": 4.49, - "learning_rate": 2.4586056588671945e-05, - "loss": 0.3118, - "step": 7492500 - }, - { - "epoch": 4.49, - "learning_rate": 2.4583956623111382e-05, - "loss": 0.2981, - "step": 7493000 - }, - { - "epoch": 4.49, - "learning_rate": 2.4581856657550815e-05, - "loss": 0.3188, - "step": 7493500 - }, - { - "epoch": 4.49, - "learning_rate": 2.457975669199025e-05, - "loss": 0.318, - "step": 7494000 - }, - { - "epoch": 4.49, - "learning_rate": 2.4577656726429686e-05, - "loss": 0.307, - "step": 7494500 - }, - { - "epoch": 4.49, - "learning_rate": 2.4575556760869122e-05, - "loss": 0.315, - "step": 7495000 - }, - { - "epoch": 4.49, - "learning_rate": 2.4573456795308556e-05, - "loss": 0.312, - "step": 7495500 - }, - { - "epoch": 4.49, - "learning_rate": 2.4571356829747993e-05, - "loss": 0.3012, - "step": 7496000 - }, - { - "epoch": 4.49, - "learning_rate": 2.4569256864187426e-05, - "loss": 0.3051, - "step": 7496500 - }, - { - "epoch": 4.49, - "learning_rate": 2.4567156898626863e-05, - "loss": 0.308, - "step": 7497000 - }, - { - "epoch": 4.5, - "learning_rate": 2.4565056933066296e-05, - "loss": 0.3116, - "step": 7497500 - }, - { - "epoch": 4.5, - "learning_rate": 2.4562961167436853e-05, - "loss": 0.3145, - "step": 7498000 - }, - { - "epoch": 4.5, - "learning_rate": 2.4560861201876287e-05, - "loss": 0.3078, - "step": 7498500 - }, - { - "epoch": 4.5, - "learning_rate": 2.4558765436246844e-05, - "loss": 0.3069, - "step": 7499000 - }, - { - "epoch": 4.5, - "learning_rate": 2.455666547068628e-05, - "loss": 0.3163, - "step": 7499500 - }, - { - "epoch": 4.5, - "learning_rate": 2.4554565505125718e-05, - "loss": 0.3179, - "step": 7500000 - }, - { - "epoch": 4.5, - "eval_loss": 0.3063606917858124, - "eval_runtime": 1125.0954, - "eval_samples_per_second": 468.156, - "eval_steps_per_second": 78.026, - "step": 7500000 - }, - { - "epoch": 4.5, - "learning_rate": 2.455246553956515e-05, - "loss": 0.3062, - "step": 7500500 - }, - { - "epoch": 4.5, - "learning_rate": 2.4550365574004584e-05, - "loss": 0.3095, - "step": 7501000 - }, - { - "epoch": 4.5, - "learning_rate": 2.454826560844402e-05, - "loss": 0.3007, - "step": 7501500 - }, - { - "epoch": 4.5, - "learning_rate": 2.4546165642883455e-05, - "loss": 0.314, - "step": 7502000 - }, - { - "epoch": 4.5, - "learning_rate": 2.4544069877254015e-05, - "loss": 0.3227, - "step": 7502500 - }, - { - "epoch": 4.5, - "learning_rate": 2.454196991169345e-05, - "loss": 0.2997, - "step": 7503000 - }, - { - "epoch": 4.5, - "learning_rate": 2.4539869946132882e-05, - "loss": 0.3036, - "step": 7503500 - }, - { - "epoch": 4.5, - "learning_rate": 2.453776998057232e-05, - "loss": 0.3105, - "step": 7504000 - }, - { - "epoch": 4.5, - "learning_rate": 2.4535670015011752e-05, - "loss": 0.3142, - "step": 7504500 - }, - { - "epoch": 4.5, - "learning_rate": 2.453357004945119e-05, - "loss": 0.3036, - "step": 7505000 - }, - { - "epoch": 4.5, - "learning_rate": 2.4531470083890626e-05, - "loss": 0.3081, - "step": 7505500 - }, - { - "epoch": 4.5, - "learning_rate": 2.452937431826118e-05, - "loss": 0.3199, - "step": 7506000 - }, - { - "epoch": 4.5, - "learning_rate": 2.4527274352700616e-05, - "loss": 0.3048, - "step": 7506500 - }, - { - "epoch": 4.5, - "learning_rate": 2.452517438714005e-05, - "loss": 0.31, - "step": 7507000 - }, - { - "epoch": 4.5, - "learning_rate": 2.4523074421579487e-05, - "loss": 0.304, - "step": 7507500 - }, - { - "epoch": 4.5, - "learning_rate": 2.452097865595004e-05, - "loss": 0.3179, - "step": 7508000 - }, - { - "epoch": 4.5, - "learning_rate": 2.4518878690389477e-05, - "loss": 0.3104, - "step": 7508500 - }, - { - "epoch": 4.5, - "learning_rate": 2.451677872482891e-05, - "loss": 0.3106, - "step": 7509000 - }, - { - "epoch": 4.5, - "learning_rate": 2.4514678759268347e-05, - "loss": 0.3086, - "step": 7509500 - }, - { - "epoch": 4.5, - "learning_rate": 2.4512582993638904e-05, - "loss": 0.312, - "step": 7510000 - }, - { - "epoch": 4.5, - "learning_rate": 2.4510483028078338e-05, - "loss": 0.3054, - "step": 7510500 - }, - { - "epoch": 4.5, - "learning_rate": 2.4508383062517775e-05, - "loss": 0.3244, - "step": 7511000 - }, - { - "epoch": 4.5, - "learning_rate": 2.4506283096957208e-05, - "loss": 0.3137, - "step": 7511500 - }, - { - "epoch": 4.5, - "learning_rate": 2.4504183131396645e-05, - "loss": 0.3178, - "step": 7512000 - }, - { - "epoch": 4.5, - "learning_rate": 2.4502087365767202e-05, - "loss": 0.3003, - "step": 7512500 - }, - { - "epoch": 4.5, - "learning_rate": 2.4499987400206635e-05, - "loss": 0.3066, - "step": 7513000 - }, - { - "epoch": 4.5, - "learning_rate": 2.4497887434646072e-05, - "loss": 0.314, - "step": 7513500 - }, - { - "epoch": 4.5, - "learning_rate": 2.4495787469085506e-05, - "loss": 0.3232, - "step": 7514000 - }, - { - "epoch": 4.51, - "learning_rate": 2.4493687503524943e-05, - "loss": 0.3104, - "step": 7514500 - }, - { - "epoch": 4.51, - "learning_rate": 2.449158753796438e-05, - "loss": 0.3065, - "step": 7515000 - }, - { - "epoch": 4.51, - "learning_rate": 2.4489491772334933e-05, - "loss": 0.3078, - "step": 7515500 - }, - { - "epoch": 4.51, - "learning_rate": 2.4487391806774367e-05, - "loss": 0.3123, - "step": 7516000 - }, - { - "epoch": 4.51, - "learning_rate": 2.4485291841213803e-05, - "loss": 0.3168, - "step": 7516500 - }, - { - "epoch": 4.51, - "learning_rate": 2.448319187565324e-05, - "loss": 0.3115, - "step": 7517000 - }, - { - "epoch": 4.51, - "learning_rate": 2.4481091910092674e-05, - "loss": 0.3217, - "step": 7517500 - }, - { - "epoch": 4.51, - "learning_rate": 2.4478991944532107e-05, - "loss": 0.3118, - "step": 7518000 - }, - { - "epoch": 4.51, - "learning_rate": 2.4476896178902664e-05, - "loss": 0.3053, - "step": 7518500 - }, - { - "epoch": 4.51, - "learning_rate": 2.44747962133421e-05, - "loss": 0.3136, - "step": 7519000 - }, - { - "epoch": 4.51, - "learning_rate": 2.4472696247781538e-05, - "loss": 0.3212, - "step": 7519500 - }, - { - "epoch": 4.51, - "learning_rate": 2.447059628222097e-05, - "loss": 0.3141, - "step": 7520000 - }, - { - "epoch": 4.51, - "learning_rate": 2.4468500516591528e-05, - "loss": 0.3167, - "step": 7520500 - }, - { - "epoch": 4.51, - "learning_rate": 2.446640055103096e-05, - "loss": 0.3185, - "step": 7521000 - }, - { - "epoch": 4.51, - "learning_rate": 2.446430478540152e-05, - "loss": 0.3027, - "step": 7521500 - }, - { - "epoch": 4.51, - "learning_rate": 2.4462204819840955e-05, - "loss": 0.3125, - "step": 7522000 - }, - { - "epoch": 4.51, - "learning_rate": 2.446010485428039e-05, - "loss": 0.3099, - "step": 7522500 - }, - { - "epoch": 4.51, - "learning_rate": 2.4458004888719822e-05, - "loss": 0.3064, - "step": 7523000 - }, - { - "epoch": 4.51, - "learning_rate": 2.445590492315926e-05, - "loss": 0.3165, - "step": 7523500 - }, - { - "epoch": 4.51, - "learning_rate": 2.4453804957598696e-05, - "loss": 0.3093, - "step": 7524000 - }, - { - "epoch": 4.51, - "learning_rate": 2.4451709191969253e-05, - "loss": 0.3156, - "step": 7524500 - }, - { - "epoch": 4.51, - "learning_rate": 2.4449609226408686e-05, - "loss": 0.3062, - "step": 7525000 - }, - { - "epoch": 4.51, - "learning_rate": 2.444750926084812e-05, - "loss": 0.3052, - "step": 7525500 - }, - { - "epoch": 4.51, - "learning_rate": 2.4445409295287557e-05, - "loss": 0.3112, - "step": 7526000 - }, - { - "epoch": 4.51, - "learning_rate": 2.4443309329726994e-05, - "loss": 0.3136, - "step": 7526500 - }, - { - "epoch": 4.51, - "learning_rate": 2.4441209364166427e-05, - "loss": 0.312, - "step": 7527000 - }, - { - "epoch": 4.51, - "learning_rate": 2.443910939860586e-05, - "loss": 0.3113, - "step": 7527500 - }, - { - "epoch": 4.51, - "learning_rate": 2.4437009433045297e-05, - "loss": 0.3134, - "step": 7528000 - }, - { - "epoch": 4.51, - "learning_rate": 2.443490946748473e-05, - "loss": 0.3102, - "step": 7528500 - }, - { - "epoch": 4.51, - "learning_rate": 2.443281370185529e-05, - "loss": 0.3143, - "step": 7529000 - }, - { - "epoch": 4.51, - "learning_rate": 2.4430713736294725e-05, - "loss": 0.3096, - "step": 7529500 - }, - { - "epoch": 4.51, - "learning_rate": 2.4428613770734158e-05, - "loss": 0.3098, - "step": 7530000 - }, - { - "epoch": 4.51, - "learning_rate": 2.4426513805173595e-05, - "loss": 0.3086, - "step": 7530500 - }, - { - "epoch": 4.52, - "learning_rate": 2.442441383961303e-05, - "loss": 0.3138, - "step": 7531000 - }, - { - "epoch": 4.52, - "learning_rate": 2.4422313874052465e-05, - "loss": 0.3121, - "step": 7531500 - }, - { - "epoch": 4.52, - "learning_rate": 2.4420218108423022e-05, - "loss": 0.3046, - "step": 7532000 - }, - { - "epoch": 4.52, - "learning_rate": 2.4418118142862456e-05, - "loss": 0.3173, - "step": 7532500 - }, - { - "epoch": 4.52, - "learning_rate": 2.4416018177301893e-05, - "loss": 0.3077, - "step": 7533000 - }, - { - "epoch": 4.52, - "learning_rate": 2.4413918211741326e-05, - "loss": 0.3073, - "step": 7533500 - }, - { - "epoch": 4.52, - "learning_rate": 2.4411818246180763e-05, - "loss": 0.3101, - "step": 7534000 - }, - { - "epoch": 4.52, - "learning_rate": 2.44097182806202e-05, - "loss": 0.3165, - "step": 7534500 - }, - { - "epoch": 4.52, - "learning_rate": 2.4407622514990753e-05, - "loss": 0.3111, - "step": 7535000 - }, - { - "epoch": 4.52, - "learning_rate": 2.440552254943019e-05, - "loss": 0.3098, - "step": 7535500 - }, - { - "epoch": 4.52, - "learning_rate": 2.4403422583869624e-05, - "loss": 0.3186, - "step": 7536000 - }, - { - "epoch": 4.52, - "learning_rate": 2.440132261830906e-05, - "loss": 0.3028, - "step": 7536500 - }, - { - "epoch": 4.52, - "learning_rate": 2.4399226852679614e-05, - "loss": 0.3092, - "step": 7537000 - }, - { - "epoch": 4.52, - "learning_rate": 2.439712688711905e-05, - "loss": 0.3125, - "step": 7537500 - }, - { - "epoch": 4.52, - "learning_rate": 2.4395026921558484e-05, - "loss": 0.3112, - "step": 7538000 - }, - { - "epoch": 4.52, - "learning_rate": 2.439292695599792e-05, - "loss": 0.3066, - "step": 7538500 - }, - { - "epoch": 4.52, - "learning_rate": 2.4390826990437358e-05, - "loss": 0.3145, - "step": 7539000 - }, - { - "epoch": 4.52, - "learning_rate": 2.438872702487679e-05, - "loss": 0.2965, - "step": 7539500 - }, - { - "epoch": 4.52, - "learning_rate": 2.438663125924735e-05, - "loss": 0.3119, - "step": 7540000 - }, - { - "epoch": 4.52, - "learning_rate": 2.4384531293686782e-05, - "loss": 0.316, - "step": 7540500 - }, - { - "epoch": 4.52, - "learning_rate": 2.438243132812622e-05, - "loss": 0.3114, - "step": 7541000 - }, - { - "epoch": 4.52, - "learning_rate": 2.4380331362565656e-05, - "loss": 0.299, - "step": 7541500 - }, - { - "epoch": 4.52, - "learning_rate": 2.437823139700509e-05, - "loss": 0.2988, - "step": 7542000 - }, - { - "epoch": 4.52, - "learning_rate": 2.4376131431444523e-05, - "loss": 0.2977, - "step": 7542500 - }, - { - "epoch": 4.52, - "learning_rate": 2.437403566581508e-05, - "loss": 0.3103, - "step": 7543000 - }, - { - "epoch": 4.52, - "learning_rate": 2.4371935700254516e-05, - "loss": 0.3056, - "step": 7543500 - }, - { - "epoch": 4.52, - "learning_rate": 2.4369835734693953e-05, - "loss": 0.3025, - "step": 7544000 - }, - { - "epoch": 4.52, - "learning_rate": 2.4367739969064507e-05, - "loss": 0.3177, - "step": 7544500 - }, - { - "epoch": 4.52, - "learning_rate": 2.436564000350394e-05, - "loss": 0.3181, - "step": 7545000 - }, - { - "epoch": 4.52, - "learning_rate": 2.4363540037943377e-05, - "loss": 0.3116, - "step": 7545500 - }, - { - "epoch": 4.52, - "learning_rate": 2.4361440072382814e-05, - "loss": 0.3054, - "step": 7546000 - }, - { - "epoch": 4.52, - "learning_rate": 2.4359340106822247e-05, - "loss": 0.316, - "step": 7546500 - }, - { - "epoch": 4.52, - "learning_rate": 2.435724014126168e-05, - "loss": 0.3122, - "step": 7547000 - }, - { - "epoch": 4.53, - "learning_rate": 2.4355140175701118e-05, - "loss": 0.3077, - "step": 7547500 - }, - { - "epoch": 4.53, - "learning_rate": 2.4353040210140555e-05, - "loss": 0.3119, - "step": 7548000 - }, - { - "epoch": 4.53, - "learning_rate": 2.4350940244579988e-05, - "loss": 0.3003, - "step": 7548500 - }, - { - "epoch": 4.53, - "learning_rate": 2.4348844478950545e-05, - "loss": 0.3124, - "step": 7549000 - }, - { - "epoch": 4.53, - "learning_rate": 2.434674451338998e-05, - "loss": 0.2993, - "step": 7549500 - }, - { - "epoch": 4.53, - "learning_rate": 2.4344644547829415e-05, - "loss": 0.3102, - "step": 7550000 - }, - { - "epoch": 4.53, - "learning_rate": 2.434254458226885e-05, - "loss": 0.3066, - "step": 7550500 - }, - { - "epoch": 4.53, - "learning_rate": 2.434044881663941e-05, - "loss": 0.3034, - "step": 7551000 - }, - { - "epoch": 4.53, - "learning_rate": 2.4338348851078843e-05, - "loss": 0.3093, - "step": 7551500 - }, - { - "epoch": 4.53, - "learning_rate": 2.4336248885518276e-05, - "loss": 0.3137, - "step": 7552000 - }, - { - "epoch": 4.53, - "learning_rate": 2.4334148919957713e-05, - "loss": 0.318, - "step": 7552500 - }, - { - "epoch": 4.53, - "learning_rate": 2.4332048954397146e-05, - "loss": 0.3095, - "step": 7553000 - }, - { - "epoch": 4.53, - "learning_rate": 2.4329953188767703e-05, - "loss": 0.3085, - "step": 7553500 - }, - { - "epoch": 4.53, - "learning_rate": 2.432785322320714e-05, - "loss": 0.3155, - "step": 7554000 - }, - { - "epoch": 4.53, - "learning_rate": 2.4325753257646574e-05, - "loss": 0.3048, - "step": 7554500 - }, - { - "epoch": 4.53, - "learning_rate": 2.432365329208601e-05, - "loss": 0.311, - "step": 7555000 - }, - { - "epoch": 4.53, - "learning_rate": 2.4321553326525444e-05, - "loss": 0.3022, - "step": 7555500 - }, - { - "epoch": 4.53, - "learning_rate": 2.4319457560896e-05, - "loss": 0.3158, - "step": 7556000 - }, - { - "epoch": 4.53, - "learning_rate": 2.4317357595335434e-05, - "loss": 0.309, - "step": 7556500 - }, - { - "epoch": 4.53, - "learning_rate": 2.431525762977487e-05, - "loss": 0.3146, - "step": 7557000 - }, - { - "epoch": 4.53, - "learning_rate": 2.4313157664214305e-05, - "loss": 0.3135, - "step": 7557500 - }, - { - "epoch": 4.53, - "learning_rate": 2.4311061898584865e-05, - "loss": 0.3073, - "step": 7558000 - }, - { - "epoch": 4.53, - "learning_rate": 2.43089619330243e-05, - "loss": 0.3034, - "step": 7558500 - }, - { - "epoch": 4.53, - "learning_rate": 2.4306861967463732e-05, - "loss": 0.3005, - "step": 7559000 - }, - { - "epoch": 4.53, - "learning_rate": 2.430476200190317e-05, - "loss": 0.3135, - "step": 7559500 - }, - { - "epoch": 4.53, - "learning_rate": 2.4302666236273726e-05, - "loss": 0.3143, - "step": 7560000 - }, - { - "epoch": 4.53, - "learning_rate": 2.430056627071316e-05, - "loss": 0.3119, - "step": 7560500 - }, - { - "epoch": 4.53, - "learning_rate": 2.4298470505083716e-05, - "loss": 0.307, - "step": 7561000 - }, - { - "epoch": 4.53, - "learning_rate": 2.429637053952315e-05, - "loss": 0.3149, - "step": 7561500 - }, - { - "epoch": 4.53, - "learning_rate": 2.4294270573962586e-05, - "loss": 0.3054, - "step": 7562000 - }, - { - "epoch": 4.53, - "learning_rate": 2.4292170608402023e-05, - "loss": 0.3102, - "step": 7562500 - }, - { - "epoch": 4.53, - "learning_rate": 2.4290070642841457e-05, - "loss": 0.3084, - "step": 7563000 - }, - { - "epoch": 4.53, - "learning_rate": 2.4287970677280894e-05, - "loss": 0.3096, - "step": 7563500 - }, - { - "epoch": 4.53, - "learning_rate": 2.4285870711720327e-05, - "loss": 0.31, - "step": 7564000 - }, - { - "epoch": 4.54, - "learning_rate": 2.428377074615976e-05, - "loss": 0.312, - "step": 7564500 - }, - { - "epoch": 4.54, - "learning_rate": 2.4281670780599197e-05, - "loss": 0.3198, - "step": 7565000 - }, - { - "epoch": 4.54, - "learning_rate": 2.4279570815038634e-05, - "loss": 0.3196, - "step": 7565500 - }, - { - "epoch": 4.54, - "learning_rate": 2.4277475049409188e-05, - "loss": 0.3088, - "step": 7566000 - }, - { - "epoch": 4.54, - "learning_rate": 2.4275375083848625e-05, - "loss": 0.32, - "step": 7566500 - }, - { - "epoch": 4.54, - "learning_rate": 2.4273275118288058e-05, - "loss": 0.3141, - "step": 7567000 - }, - { - "epoch": 4.54, - "learning_rate": 2.4271175152727495e-05, - "loss": 0.3143, - "step": 7567500 - }, - { - "epoch": 4.54, - "learning_rate": 2.4269075187166932e-05, - "loss": 0.3119, - "step": 7568000 - }, - { - "epoch": 4.54, - "learning_rate": 2.4266979421537485e-05, - "loss": 0.311, - "step": 7568500 - }, - { - "epoch": 4.54, - "learning_rate": 2.4264879455976922e-05, - "loss": 0.307, - "step": 7569000 - }, - { - "epoch": 4.54, - "learning_rate": 2.4262779490416356e-05, - "loss": 0.312, - "step": 7569500 - }, - { - "epoch": 4.54, - "learning_rate": 2.4260679524855792e-05, - "loss": 0.3273, - "step": 7570000 - }, - { - "epoch": 4.54, - "learning_rate": 2.425857955929523e-05, - "loss": 0.307, - "step": 7570500 - }, - { - "epoch": 4.54, - "learning_rate": 2.4256479593734663e-05, - "loss": 0.3169, - "step": 7571000 - }, - { - "epoch": 4.54, - "learning_rate": 2.4254379628174096e-05, - "loss": 0.309, - "step": 7571500 - }, - { - "epoch": 4.54, - "learning_rate": 2.4252283862544653e-05, - "loss": 0.3148, - "step": 7572000 - }, - { - "epoch": 4.54, - "learning_rate": 2.425018389698409e-05, - "loss": 0.3076, - "step": 7572500 - }, - { - "epoch": 4.54, - "learning_rate": 2.4248083931423527e-05, - "loss": 0.3089, - "step": 7573000 - }, - { - "epoch": 4.54, - "learning_rate": 2.4245983965862957e-05, - "loss": 0.3043, - "step": 7573500 - }, - { - "epoch": 4.54, - "learning_rate": 2.4243884000302394e-05, - "loss": 0.3107, - "step": 7574000 - }, - { - "epoch": 4.54, - "learning_rate": 2.424178403474183e-05, - "loss": 0.3082, - "step": 7574500 - }, - { - "epoch": 4.54, - "learning_rate": 2.4239684069181264e-05, - "loss": 0.3124, - "step": 7575000 - }, - { - "epoch": 4.54, - "learning_rate": 2.42375841036207e-05, - "loss": 0.3127, - "step": 7575500 - }, - { - "epoch": 4.54, - "learning_rate": 2.4235488337991255e-05, - "loss": 0.317, - "step": 7576000 - }, - { - "epoch": 4.54, - "learning_rate": 2.423338837243069e-05, - "loss": 0.3183, - "step": 7576500 - }, - { - "epoch": 4.54, - "learning_rate": 2.4231288406870128e-05, - "loss": 0.3196, - "step": 7577000 - }, - { - "epoch": 4.54, - "learning_rate": 2.422918844130956e-05, - "loss": 0.3083, - "step": 7577500 - }, - { - "epoch": 4.54, - "learning_rate": 2.422709267568012e-05, - "loss": 0.3126, - "step": 7578000 - }, - { - "epoch": 4.54, - "learning_rate": 2.4224992710119552e-05, - "loss": 0.3133, - "step": 7578500 - }, - { - "epoch": 4.54, - "learning_rate": 2.422289274455899e-05, - "loss": 0.3159, - "step": 7579000 - }, - { - "epoch": 4.54, - "learning_rate": 2.4220792778998422e-05, - "loss": 0.3087, - "step": 7579500 - }, - { - "epoch": 4.54, - "learning_rate": 2.421869281343786e-05, - "loss": 0.3197, - "step": 7580000 - }, - { - "epoch": 4.54, - "learning_rate": 2.4216592847877296e-05, - "loss": 0.3121, - "step": 7580500 - }, - { - "epoch": 4.55, - "learning_rate": 2.421449288231673e-05, - "loss": 0.3168, - "step": 7581000 - }, - { - "epoch": 4.55, - "learning_rate": 2.4212397116687287e-05, - "loss": 0.3102, - "step": 7581500 - }, - { - "epoch": 4.55, - "learning_rate": 2.421029715112672e-05, - "loss": 0.304, - "step": 7582000 - }, - { - "epoch": 4.55, - "learning_rate": 2.4208197185566157e-05, - "loss": 0.3105, - "step": 7582500 - }, - { - "epoch": 4.55, - "learning_rate": 2.4206097220005594e-05, - "loss": 0.3031, - "step": 7583000 - }, - { - "epoch": 4.55, - "learning_rate": 2.4204001454376147e-05, - "loss": 0.3122, - "step": 7583500 - }, - { - "epoch": 4.55, - "learning_rate": 2.4201901488815584e-05, - "loss": 0.3095, - "step": 7584000 - }, - { - "epoch": 4.55, - "learning_rate": 2.4199801523255018e-05, - "loss": 0.311, - "step": 7584500 - }, - { - "epoch": 4.55, - "learning_rate": 2.4197701557694454e-05, - "loss": 0.3087, - "step": 7585000 - }, - { - "epoch": 4.55, - "learning_rate": 2.419560159213389e-05, - "loss": 0.3015, - "step": 7585500 - }, - { - "epoch": 4.55, - "learning_rate": 2.4193505826504445e-05, - "loss": 0.3142, - "step": 7586000 - }, - { - "epoch": 4.55, - "learning_rate": 2.4191405860943878e-05, - "loss": 0.316, - "step": 7586500 - }, - { - "epoch": 4.55, - "learning_rate": 2.4189305895383315e-05, - "loss": 0.3161, - "step": 7587000 - }, - { - "epoch": 4.55, - "learning_rate": 2.4187205929822752e-05, - "loss": 0.3024, - "step": 7587500 - }, - { - "epoch": 4.55, - "learning_rate": 2.418510596426219e-05, - "loss": 0.3045, - "step": 7588000 - }, - { - "epoch": 4.55, - "learning_rate": 2.4183010198632742e-05, - "loss": 0.3103, - "step": 7588500 - }, - { - "epoch": 4.55, - "learning_rate": 2.4180910233072176e-05, - "loss": 0.3089, - "step": 7589000 - }, - { - "epoch": 4.55, - "learning_rate": 2.4178810267511613e-05, - "loss": 0.3029, - "step": 7589500 - }, - { - "epoch": 4.55, - "learning_rate": 2.417671030195105e-05, - "loss": 0.3035, - "step": 7590000 - }, - { - "epoch": 4.55, - "learning_rate": 2.4174610336390483e-05, - "loss": 0.3093, - "step": 7590500 - }, - { - "epoch": 4.55, - "learning_rate": 2.417251457076104e-05, - "loss": 0.3192, - "step": 7591000 - }, - { - "epoch": 4.55, - "learning_rate": 2.4170414605200473e-05, - "loss": 0.3136, - "step": 7591500 - }, - { - "epoch": 4.55, - "learning_rate": 2.416831463963991e-05, - "loss": 0.3123, - "step": 7592000 - }, - { - "epoch": 4.55, - "learning_rate": 2.4166214674079347e-05, - "loss": 0.3157, - "step": 7592500 - }, - { - "epoch": 4.55, - "learning_rate": 2.4164114708518777e-05, - "loss": 0.315, - "step": 7593000 - }, - { - "epoch": 4.55, - "learning_rate": 2.4162014742958214e-05, - "loss": 0.3062, - "step": 7593500 - }, - { - "epoch": 4.55, - "learning_rate": 2.415991477739765e-05, - "loss": 0.3061, - "step": 7594000 - }, - { - "epoch": 4.55, - "learning_rate": 2.4157814811837084e-05, - "loss": 0.3062, - "step": 7594500 - }, - { - "epoch": 4.55, - "learning_rate": 2.4155719046207645e-05, - "loss": 0.3133, - "step": 7595000 - }, - { - "epoch": 4.55, - "learning_rate": 2.4153619080647075e-05, - "loss": 0.3026, - "step": 7595500 - }, - { - "epoch": 4.55, - "learning_rate": 2.415151911508651e-05, - "loss": 0.3116, - "step": 7596000 - }, - { - "epoch": 4.55, - "learning_rate": 2.414941914952595e-05, - "loss": 0.3143, - "step": 7596500 - }, - { - "epoch": 4.55, - "learning_rate": 2.4147319183965382e-05, - "loss": 0.3114, - "step": 7597000 - }, - { - "epoch": 4.56, - "learning_rate": 2.414521921840482e-05, - "loss": 0.3135, - "step": 7597500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4143123452775372e-05, - "loss": 0.3097, - "step": 7598000 - }, - { - "epoch": 4.56, - "learning_rate": 2.414102348721481e-05, - "loss": 0.3116, - "step": 7598500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4138923521654246e-05, - "loss": 0.3159, - "step": 7599000 - }, - { - "epoch": 4.56, - "learning_rate": 2.413682355609368e-05, - "loss": 0.3165, - "step": 7599500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4134723590533116e-05, - "loss": 0.3019, - "step": 7600000 - }, - { - "epoch": 4.56, - "eval_loss": 0.30695515871047974, - "eval_runtime": 1116.4663, - "eval_samples_per_second": 471.774, - "eval_steps_per_second": 78.629, - "step": 7600000 - }, - { - "epoch": 4.56, - "learning_rate": 2.413263202483479e-05, - "loss": 0.3118, - "step": 7600500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4130532059274227e-05, - "loss": 0.3126, - "step": 7601000 - }, - { - "epoch": 4.56, - "learning_rate": 2.4128432093713664e-05, - "loss": 0.3063, - "step": 7601500 - }, - { - "epoch": 4.56, - "learning_rate": 2.41263321281531e-05, - "loss": 0.3039, - "step": 7602000 - }, - { - "epoch": 4.56, - "learning_rate": 2.412423216259253e-05, - "loss": 0.306, - "step": 7602500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4122132197031967e-05, - "loss": 0.3142, - "step": 7603000 - }, - { - "epoch": 4.56, - "learning_rate": 2.4120032231471404e-05, - "loss": 0.3107, - "step": 7603500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4117932265910838e-05, - "loss": 0.3058, - "step": 7604000 - }, - { - "epoch": 4.56, - "learning_rate": 2.4115836500281395e-05, - "loss": 0.3133, - "step": 7604500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4113736534720828e-05, - "loss": 0.3047, - "step": 7605000 - }, - { - "epoch": 4.56, - "learning_rate": 2.4111636569160265e-05, - "loss": 0.3199, - "step": 7605500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4109536603599702e-05, - "loss": 0.318, - "step": 7606000 - }, - { - "epoch": 4.56, - "learning_rate": 2.4107436638039135e-05, - "loss": 0.2992, - "step": 7606500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4105336672478572e-05, - "loss": 0.3158, - "step": 7607000 - }, - { - "epoch": 4.56, - "learning_rate": 2.410323670691801e-05, - "loss": 0.3102, - "step": 7607500 - }, - { - "epoch": 4.56, - "learning_rate": 2.410113674135744e-05, - "loss": 0.3198, - "step": 7608000 - }, - { - "epoch": 4.56, - "learning_rate": 2.4099040975727996e-05, - "loss": 0.3132, - "step": 7608500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4096941010167433e-05, - "loss": 0.31, - "step": 7609000 - }, - { - "epoch": 4.56, - "learning_rate": 2.409484104460687e-05, - "loss": 0.3184, - "step": 7609500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4092741079046303e-05, - "loss": 0.3143, - "step": 7610000 - }, - { - "epoch": 4.56, - "learning_rate": 2.409064531341686e-05, - "loss": 0.3171, - "step": 7610500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4088545347856294e-05, - "loss": 0.3136, - "step": 7611000 - }, - { - "epoch": 4.56, - "learning_rate": 2.408644538229573e-05, - "loss": 0.3051, - "step": 7611500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4084345416735167e-05, - "loss": 0.3109, - "step": 7612000 - }, - { - "epoch": 4.56, - "learning_rate": 2.4082245451174597e-05, - "loss": 0.3107, - "step": 7612500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4080149685545158e-05, - "loss": 0.3055, - "step": 7613000 - }, - { - "epoch": 4.56, - "learning_rate": 2.407804971998459e-05, - "loss": 0.3055, - "step": 7613500 - }, - { - "epoch": 4.56, - "learning_rate": 2.4075949754424028e-05, - "loss": 0.3111, - "step": 7614000 - }, - { - "epoch": 4.57, - "learning_rate": 2.4073849788863465e-05, - "loss": 0.3082, - "step": 7614500 - }, - { - "epoch": 4.57, - "learning_rate": 2.4071749823302895e-05, - "loss": 0.3091, - "step": 7615000 - }, - { - "epoch": 4.57, - "learning_rate": 2.4069649857742332e-05, - "loss": 0.3138, - "step": 7615500 - }, - { - "epoch": 4.57, - "learning_rate": 2.406755409211289e-05, - "loss": 0.3175, - "step": 7616000 - }, - { - "epoch": 4.57, - "learning_rate": 2.4065454126552326e-05, - "loss": 0.308, - "step": 7616500 - }, - { - "epoch": 4.57, - "learning_rate": 2.406335416099176e-05, - "loss": 0.3069, - "step": 7617000 - }, - { - "epoch": 4.57, - "learning_rate": 2.4061254195431193e-05, - "loss": 0.3112, - "step": 7617500 - }, - { - "epoch": 4.57, - "learning_rate": 2.405915842980175e-05, - "loss": 0.314, - "step": 7618000 - }, - { - "epoch": 4.57, - "learning_rate": 2.4057058464241186e-05, - "loss": 0.3167, - "step": 7618500 - }, - { - "epoch": 4.57, - "learning_rate": 2.4054958498680623e-05, - "loss": 0.3083, - "step": 7619000 - }, - { - "epoch": 4.57, - "learning_rate": 2.4052858533120053e-05, - "loss": 0.3149, - "step": 7619500 - }, - { - "epoch": 4.57, - "learning_rate": 2.405075856755949e-05, - "loss": 0.3014, - "step": 7620000 - }, - { - "epoch": 4.57, - "learning_rate": 2.4048658601998927e-05, - "loss": 0.31, - "step": 7620500 - }, - { - "epoch": 4.57, - "learning_rate": 2.4046562836369484e-05, - "loss": 0.3144, - "step": 7621000 - }, - { - "epoch": 4.57, - "learning_rate": 2.404446287080892e-05, - "loss": 0.312, - "step": 7621500 - }, - { - "epoch": 4.57, - "learning_rate": 2.404236290524835e-05, - "loss": 0.3064, - "step": 7622000 - }, - { - "epoch": 4.57, - "learning_rate": 2.4040262939687788e-05, - "loss": 0.3113, - "step": 7622500 - }, - { - "epoch": 4.57, - "learning_rate": 2.4038162974127225e-05, - "loss": 0.3144, - "step": 7623000 - }, - { - "epoch": 4.57, - "learning_rate": 2.403606720849778e-05, - "loss": 0.3167, - "step": 7623500 - }, - { - "epoch": 4.57, - "learning_rate": 2.403396724293722e-05, - "loss": 0.3067, - "step": 7624000 - }, - { - "epoch": 4.57, - "learning_rate": 2.403186727737665e-05, - "loss": 0.3111, - "step": 7624500 - }, - { - "epoch": 4.57, - "learning_rate": 2.4029767311816085e-05, - "loss": 0.3152, - "step": 7625000 - }, - { - "epoch": 4.57, - "learning_rate": 2.4027667346255522e-05, - "loss": 0.3159, - "step": 7625500 - }, - { - "epoch": 4.57, - "learning_rate": 2.402557158062608e-05, - "loss": 0.3121, - "step": 7626000 - }, - { - "epoch": 4.57, - "learning_rate": 2.4023471615065513e-05, - "loss": 0.3098, - "step": 7626500 - }, - { - "epoch": 4.57, - "learning_rate": 2.4021371649504946e-05, - "loss": 0.3099, - "step": 7627000 - }, - { - "epoch": 4.57, - "learning_rate": 2.4019271683944383e-05, - "loss": 0.3121, - "step": 7627500 - }, - { - "epoch": 4.57, - "learning_rate": 2.401717171838382e-05, - "loss": 0.31, - "step": 7628000 - }, - { - "epoch": 4.57, - "learning_rate": 2.4015075952754377e-05, - "loss": 0.314, - "step": 7628500 - }, - { - "epoch": 4.57, - "learning_rate": 2.401297598719381e-05, - "loss": 0.3086, - "step": 7629000 - }, - { - "epoch": 4.57, - "learning_rate": 2.4010876021633244e-05, - "loss": 0.3054, - "step": 7629500 - }, - { - "epoch": 4.57, - "learning_rate": 2.400877605607268e-05, - "loss": 0.3165, - "step": 7630000 - }, - { - "epoch": 4.57, - "learning_rate": 2.4006676090512114e-05, - "loss": 0.311, - "step": 7630500 - }, - { - "epoch": 4.58, - "learning_rate": 2.4004580324882674e-05, - "loss": 0.3011, - "step": 7631000 - }, - { - "epoch": 4.58, - "learning_rate": 2.4002480359322104e-05, - "loss": 0.3104, - "step": 7631500 - }, - { - "epoch": 4.58, - "learning_rate": 2.400038039376154e-05, - "loss": 0.3007, - "step": 7632000 - }, - { - "epoch": 4.58, - "learning_rate": 2.3998280428200978e-05, - "loss": 0.3055, - "step": 7632500 - }, - { - "epoch": 4.58, - "learning_rate": 2.3996184662571535e-05, - "loss": 0.3054, - "step": 7633000 - }, - { - "epoch": 4.58, - "learning_rate": 2.399408469701097e-05, - "loss": 0.3026, - "step": 7633500 - }, - { - "epoch": 4.58, - "learning_rate": 2.3991984731450402e-05, - "loss": 0.3081, - "step": 7634000 - }, - { - "epoch": 4.58, - "learning_rate": 2.398988476588984e-05, - "loss": 0.3107, - "step": 7634500 - }, - { - "epoch": 4.58, - "learning_rate": 2.3987784800329276e-05, - "loss": 0.3052, - "step": 7635000 - }, - { - "epoch": 4.58, - "learning_rate": 2.3985689034699833e-05, - "loss": 0.3182, - "step": 7635500 - }, - { - "epoch": 4.58, - "learning_rate": 2.3983589069139266e-05, - "loss": 0.3072, - "step": 7636000 - }, - { - "epoch": 4.58, - "learning_rate": 2.39814891035787e-05, - "loss": 0.3089, - "step": 7636500 - }, - { - "epoch": 4.58, - "learning_rate": 2.3979389138018136e-05, - "loss": 0.3185, - "step": 7637000 - }, - { - "epoch": 4.58, - "learning_rate": 2.397728917245757e-05, - "loss": 0.3112, - "step": 7637500 - }, - { - "epoch": 4.58, - "learning_rate": 2.397519340682813e-05, - "loss": 0.328, - "step": 7638000 - }, - { - "epoch": 4.58, - "learning_rate": 2.3973093441267564e-05, - "loss": 0.306, - "step": 7638500 - }, - { - "epoch": 4.58, - "learning_rate": 2.3970993475706997e-05, - "loss": 0.3214, - "step": 7639000 - }, - { - "epoch": 4.58, - "learning_rate": 2.3968893510146434e-05, - "loss": 0.3057, - "step": 7639500 - }, - { - "epoch": 4.58, - "learning_rate": 2.3966793544585867e-05, - "loss": 0.3094, - "step": 7640000 - }, - { - "epoch": 4.58, - "learning_rate": 2.3964693579025304e-05, - "loss": 0.3119, - "step": 7640500 - }, - { - "epoch": 4.58, - "learning_rate": 2.3962597813395858e-05, - "loss": 0.3142, - "step": 7641000 - }, - { - "epoch": 4.58, - "learning_rate": 2.3960497847835295e-05, - "loss": 0.311, - "step": 7641500 - }, - { - "epoch": 4.58, - "learning_rate": 2.395840208220585e-05, - "loss": 0.3196, - "step": 7642000 - }, - { - "epoch": 4.58, - "learning_rate": 2.395630211664529e-05, - "loss": 0.3091, - "step": 7642500 - }, - { - "epoch": 4.58, - "learning_rate": 2.3954202151084722e-05, - "loss": 0.3095, - "step": 7643000 - }, - { - "epoch": 4.58, - "learning_rate": 2.3952102185524155e-05, - "loss": 0.309, - "step": 7643500 - }, - { - "epoch": 4.58, - "learning_rate": 2.3950002219963592e-05, - "loss": 0.3127, - "step": 7644000 - }, - { - "epoch": 4.58, - "learning_rate": 2.3947902254403026e-05, - "loss": 0.3108, - "step": 7644500 - }, - { - "epoch": 4.58, - "learning_rate": 2.3945802288842463e-05, - "loss": 0.3088, - "step": 7645000 - }, - { - "epoch": 4.58, - "learning_rate": 2.39437023232819e-05, - "loss": 0.3096, - "step": 7645500 - }, - { - "epoch": 4.58, - "learning_rate": 2.3941602357721333e-05, - "loss": 0.3077, - "step": 7646000 - }, - { - "epoch": 4.58, - "learning_rate": 2.3939502392160766e-05, - "loss": 0.3045, - "step": 7646500 - }, - { - "epoch": 4.58, - "learning_rate": 2.3937402426600203e-05, - "loss": 0.3045, - "step": 7647000 - }, - { - "epoch": 4.58, - "learning_rate": 2.393530246103964e-05, - "loss": 0.3086, - "step": 7647500 - }, - { - "epoch": 4.59, - "learning_rate": 2.3933206695410197e-05, - "loss": 0.3077, - "step": 7648000 - }, - { - "epoch": 4.59, - "learning_rate": 2.3931106729849627e-05, - "loss": 0.3068, - "step": 7648500 - }, - { - "epoch": 4.59, - "learning_rate": 2.3929006764289064e-05, - "loss": 0.3085, - "step": 7649000 - }, - { - "epoch": 4.59, - "learning_rate": 2.39269067987285e-05, - "loss": 0.31, - "step": 7649500 - }, - { - "epoch": 4.59, - "learning_rate": 2.3924811033099058e-05, - "loss": 0.319, - "step": 7650000 - }, - { - "epoch": 4.59, - "learning_rate": 2.3922711067538495e-05, - "loss": 0.3064, - "step": 7650500 - }, - { - "epoch": 4.59, - "learning_rate": 2.3920611101977925e-05, - "loss": 0.3142, - "step": 7651000 - }, - { - "epoch": 4.59, - "learning_rate": 2.391851113641736e-05, - "loss": 0.305, - "step": 7651500 - }, - { - "epoch": 4.59, - "learning_rate": 2.3916411170856798e-05, - "loss": 0.3065, - "step": 7652000 - }, - { - "epoch": 4.59, - "learning_rate": 2.3914311205296232e-05, - "loss": 0.3, - "step": 7652500 - }, - { - "epoch": 4.59, - "learning_rate": 2.391221543966679e-05, - "loss": 0.3122, - "step": 7653000 - }, - { - "epoch": 4.59, - "learning_rate": 2.3910115474106222e-05, - "loss": 0.3107, - "step": 7653500 - }, - { - "epoch": 4.59, - "learning_rate": 2.390801550854566e-05, - "loss": 0.3123, - "step": 7654000 - }, - { - "epoch": 4.59, - "learning_rate": 2.3905915542985096e-05, - "loss": 0.3131, - "step": 7654500 - }, - { - "epoch": 4.59, - "learning_rate": 2.390381557742453e-05, - "loss": 0.3223, - "step": 7655000 - }, - { - "epoch": 4.59, - "learning_rate": 2.3901715611863966e-05, - "loss": 0.313, - "step": 7655500 - }, - { - "epoch": 4.59, - "learning_rate": 2.389961984623452e-05, - "loss": 0.3129, - "step": 7656000 - }, - { - "epoch": 4.59, - "learning_rate": 2.3897519880673957e-05, - "loss": 0.3053, - "step": 7656500 - }, - { - "epoch": 4.59, - "learning_rate": 2.3895419915113393e-05, - "loss": 0.3063, - "step": 7657000 - }, - { - "epoch": 4.59, - "learning_rate": 2.3893319949552827e-05, - "loss": 0.3118, - "step": 7657500 - }, - { - "epoch": 4.59, - "learning_rate": 2.3891219983992264e-05, - "loss": 0.3071, - "step": 7658000 - }, - { - "epoch": 4.59, - "learning_rate": 2.3889120018431697e-05, - "loss": 0.3118, - "step": 7658500 - }, - { - "epoch": 4.59, - "learning_rate": 2.3887024252802254e-05, - "loss": 0.3094, - "step": 7659000 - }, - { - "epoch": 4.59, - "learning_rate": 2.3884924287241688e-05, - "loss": 0.3082, - "step": 7659500 - }, - { - "epoch": 4.59, - "learning_rate": 2.3882824321681124e-05, - "loss": 0.3033, - "step": 7660000 - }, - { - "epoch": 4.59, - "learning_rate": 2.388072435612056e-05, - "loss": 0.3049, - "step": 7660500 - }, - { - "epoch": 4.59, - "learning_rate": 2.3878628590491115e-05, - "loss": 0.3101, - "step": 7661000 - }, - { - "epoch": 4.59, - "learning_rate": 2.3876528624930552e-05, - "loss": 0.3124, - "step": 7661500 - }, - { - "epoch": 4.59, - "learning_rate": 2.3874428659369985e-05, - "loss": 0.3162, - "step": 7662000 - }, - { - "epoch": 4.59, - "learning_rate": 2.3872328693809422e-05, - "loss": 0.3021, - "step": 7662500 - }, - { - "epoch": 4.59, - "learning_rate": 2.387022872824886e-05, - "loss": 0.307, - "step": 7663000 - }, - { - "epoch": 4.59, - "learning_rate": 2.386812876268829e-05, - "loss": 0.3144, - "step": 7663500 - }, - { - "epoch": 4.59, - "learning_rate": 2.386603299705885e-05, - "loss": 0.3111, - "step": 7664000 - }, - { - "epoch": 4.6, - "learning_rate": 2.3863933031498283e-05, - "loss": 0.3072, - "step": 7664500 - }, - { - "epoch": 4.6, - "learning_rate": 2.386183306593772e-05, - "loss": 0.3096, - "step": 7665000 - }, - { - "epoch": 4.6, - "learning_rate": 2.3859733100377156e-05, - "loss": 0.3013, - "step": 7665500 - }, - { - "epoch": 4.6, - "learning_rate": 2.3857633134816587e-05, - "loss": 0.314, - "step": 7666000 - }, - { - "epoch": 4.6, - "learning_rate": 2.3855533169256023e-05, - "loss": 0.3042, - "step": 7666500 - }, - { - "epoch": 4.6, - "learning_rate": 2.385343320369546e-05, - "loss": 0.3123, - "step": 7667000 - }, - { - "epoch": 4.6, - "learning_rate": 2.3851337438066017e-05, - "loss": 0.3046, - "step": 7667500 - }, - { - "epoch": 4.6, - "learning_rate": 2.384923747250545e-05, - "loss": 0.312, - "step": 7668000 - }, - { - "epoch": 4.6, - "learning_rate": 2.3847137506944884e-05, - "loss": 0.309, - "step": 7668500 - }, - { - "epoch": 4.6, - "learning_rate": 2.384503754138432e-05, - "loss": 0.3051, - "step": 7669000 - }, - { - "epoch": 4.6, - "learning_rate": 2.3842937575823758e-05, - "loss": 0.3187, - "step": 7669500 - }, - { - "epoch": 4.6, - "learning_rate": 2.3840841810194315e-05, - "loss": 0.3126, - "step": 7670000 - }, - { - "epoch": 4.6, - "learning_rate": 2.3838741844633745e-05, - "loss": 0.3039, - "step": 7670500 - }, - { - "epoch": 4.6, - "learning_rate": 2.3836641879073182e-05, - "loss": 0.3063, - "step": 7671000 - }, - { - "epoch": 4.6, - "learning_rate": 2.383454191351262e-05, - "loss": 0.3151, - "step": 7671500 - }, - { - "epoch": 4.6, - "learning_rate": 2.3832441947952052e-05, - "loss": 0.3022, - "step": 7672000 - }, - { - "epoch": 4.6, - "learning_rate": 2.383034198239149e-05, - "loss": 0.3097, - "step": 7672500 - }, - { - "epoch": 4.6, - "learning_rate": 2.3828246216762042e-05, - "loss": 0.3131, - "step": 7673000 - }, - { - "epoch": 4.6, - "learning_rate": 2.382614625120148e-05, - "loss": 0.3135, - "step": 7673500 - }, - { - "epoch": 4.6, - "learning_rate": 2.3824046285640916e-05, - "loss": 0.3065, - "step": 7674000 - }, - { - "epoch": 4.6, - "learning_rate": 2.382194632008035e-05, - "loss": 0.3121, - "step": 7674500 - }, - { - "epoch": 4.6, - "learning_rate": 2.3819846354519786e-05, - "loss": 0.3023, - "step": 7675000 - }, - { - "epoch": 4.6, - "learning_rate": 2.381775058889034e-05, - "loss": 0.3103, - "step": 7675500 - }, - { - "epoch": 4.6, - "learning_rate": 2.3815650623329777e-05, - "loss": 0.3129, - "step": 7676000 - }, - { - "epoch": 4.6, - "learning_rate": 2.3813550657769214e-05, - "loss": 0.3082, - "step": 7676500 - }, - { - "epoch": 4.6, - "learning_rate": 2.3811450692208647e-05, - "loss": 0.304, - "step": 7677000 - }, - { - "epoch": 4.6, - "learning_rate": 2.3809350726648084e-05, - "loss": 0.3025, - "step": 7677500 - }, - { - "epoch": 4.6, - "learning_rate": 2.3807250761087517e-05, - "loss": 0.3116, - "step": 7678000 - }, - { - "epoch": 4.6, - "learning_rate": 2.3805154995458074e-05, - "loss": 0.3044, - "step": 7678500 - }, - { - "epoch": 4.6, - "learning_rate": 2.3803055029897508e-05, - "loss": 0.3133, - "step": 7679000 - }, - { - "epoch": 4.6, - "learning_rate": 2.3800955064336945e-05, - "loss": 0.301, - "step": 7679500 - }, - { - "epoch": 4.6, - "learning_rate": 2.379885509877638e-05, - "loss": 0.3062, - "step": 7680000 - }, - { - "epoch": 4.6, - "learning_rate": 2.3796755133215815e-05, - "loss": 0.3145, - "step": 7680500 - }, - { - "epoch": 4.61, - "learning_rate": 2.379465516765525e-05, - "loss": 0.3011, - "step": 7681000 - }, - { - "epoch": 4.61, - "learning_rate": 2.3792559402025805e-05, - "loss": 0.3095, - "step": 7681500 - }, - { - "epoch": 4.61, - "learning_rate": 2.3790459436465242e-05, - "loss": 0.3024, - "step": 7682000 - }, - { - "epoch": 4.61, - "learning_rate": 2.378835947090468e-05, - "loss": 0.3084, - "step": 7682500 - }, - { - "epoch": 4.61, - "learning_rate": 2.378625950534411e-05, - "loss": 0.3069, - "step": 7683000 - }, - { - "epoch": 4.61, - "learning_rate": 2.378416373971467e-05, - "loss": 0.3061, - "step": 7683500 - }, - { - "epoch": 4.61, - "learning_rate": 2.3782063774154103e-05, - "loss": 0.3082, - "step": 7684000 - }, - { - "epoch": 4.61, - "learning_rate": 2.377996380859354e-05, - "loss": 0.3065, - "step": 7684500 - }, - { - "epoch": 4.61, - "learning_rate": 2.3777863843032973e-05, - "loss": 0.298, - "step": 7685000 - }, - { - "epoch": 4.61, - "learning_rate": 2.3775763877472407e-05, - "loss": 0.3129, - "step": 7685500 - }, - { - "epoch": 4.61, - "learning_rate": 2.3773668111842964e-05, - "loss": 0.3065, - "step": 7686000 - }, - { - "epoch": 4.61, - "learning_rate": 2.37715681462824e-05, - "loss": 0.303, - "step": 7686500 - }, - { - "epoch": 4.61, - "learning_rate": 2.3769468180721837e-05, - "loss": 0.3072, - "step": 7687000 - }, - { - "epoch": 4.61, - "learning_rate": 2.376736821516127e-05, - "loss": 0.3101, - "step": 7687500 - }, - { - "epoch": 4.61, - "learning_rate": 2.3765272449531828e-05, - "loss": 0.3036, - "step": 7688000 - }, - { - "epoch": 4.61, - "learning_rate": 2.376317248397126e-05, - "loss": 0.3197, - "step": 7688500 - }, - { - "epoch": 4.61, - "learning_rate": 2.3761072518410698e-05, - "loss": 0.3176, - "step": 7689000 - }, - { - "epoch": 4.61, - "learning_rate": 2.3758972552850135e-05, - "loss": 0.3028, - "step": 7689500 - }, - { - "epoch": 4.61, - "learning_rate": 2.375687258728957e-05, - "loss": 0.3124, - "step": 7690000 - }, - { - "epoch": 4.61, - "learning_rate": 2.3754772621729002e-05, - "loss": 0.3131, - "step": 7690500 - }, - { - "epoch": 4.61, - "learning_rate": 2.375267265616844e-05, - "loss": 0.311, - "step": 7691000 - }, - { - "epoch": 4.61, - "learning_rate": 2.3750572690607876e-05, - "loss": 0.3141, - "step": 7691500 - }, - { - "epoch": 4.61, - "learning_rate": 2.3748476924978433e-05, - "loss": 0.3133, - "step": 7692000 - }, - { - "epoch": 4.61, - "learning_rate": 2.3746376959417863e-05, - "loss": 0.3044, - "step": 7692500 - }, - { - "epoch": 4.61, - "learning_rate": 2.37442769938573e-05, - "loss": 0.3039, - "step": 7693000 - }, - { - "epoch": 4.61, - "learning_rate": 2.3742177028296736e-05, - "loss": 0.3113, - "step": 7693500 - }, - { - "epoch": 4.61, - "learning_rate": 2.374007706273617e-05, - "loss": 0.3133, - "step": 7694000 - }, - { - "epoch": 4.61, - "learning_rate": 2.3737977097175607e-05, - "loss": 0.3069, - "step": 7694500 - }, - { - "epoch": 4.61, - "learning_rate": 2.373588133154616e-05, - "loss": 0.3134, - "step": 7695000 - }, - { - "epoch": 4.61, - "learning_rate": 2.3733781365985597e-05, - "loss": 0.3039, - "step": 7695500 - }, - { - "epoch": 4.61, - "learning_rate": 2.3731681400425034e-05, - "loss": 0.3052, - "step": 7696000 - }, - { - "epoch": 4.61, - "learning_rate": 2.3729581434864467e-05, - "loss": 0.2971, - "step": 7696500 - }, - { - "epoch": 4.61, - "learning_rate": 2.3727481469303904e-05, - "loss": 0.3151, - "step": 7697000 - }, - { - "epoch": 4.61, - "learning_rate": 2.3725381503743338e-05, - "loss": 0.3108, - "step": 7697500 - }, - { - "epoch": 4.62, - "learning_rate": 2.3723285738113895e-05, - "loss": 0.306, - "step": 7698000 - }, - { - "epoch": 4.62, - "learning_rate": 2.372118577255333e-05, - "loss": 0.3159, - "step": 7698500 - }, - { - "epoch": 4.62, - "learning_rate": 2.3719085806992765e-05, - "loss": 0.3119, - "step": 7699000 - }, - { - "epoch": 4.62, - "learning_rate": 2.3716985841432202e-05, - "loss": 0.3161, - "step": 7699500 - }, - { - "epoch": 4.62, - "learning_rate": 2.3714885875871635e-05, - "loss": 0.3165, - "step": 7700000 - }, - { - "epoch": 4.62, - "eval_loss": 0.30487242341041565, - "eval_runtime": 1115.4735, - "eval_samples_per_second": 472.194, - "eval_steps_per_second": 78.699, - "step": 7700000 - }, - { - "epoch": 4.62, - "learning_rate": 2.3712790110242192e-05, - "loss": 0.3088, - "step": 7700500 - }, - { - "epoch": 4.62, - "learning_rate": 2.3710690144681626e-05, - "loss": 0.3085, - "step": 7701000 - }, - { - "epoch": 4.62, - "learning_rate": 2.3708590179121063e-05, - "loss": 0.3034, - "step": 7701500 - }, - { - "epoch": 4.62, - "learning_rate": 2.37064902135605e-05, - "loss": 0.3104, - "step": 7702000 - }, - { - "epoch": 4.62, - "learning_rate": 2.3704394447931053e-05, - "loss": 0.3077, - "step": 7702500 - }, - { - "epoch": 4.62, - "learning_rate": 2.370229448237049e-05, - "loss": 0.3105, - "step": 7703000 - }, - { - "epoch": 4.62, - "learning_rate": 2.3700194516809923e-05, - "loss": 0.3081, - "step": 7703500 - }, - { - "epoch": 4.62, - "learning_rate": 2.369809455124936e-05, - "loss": 0.3099, - "step": 7704000 - }, - { - "epoch": 4.62, - "learning_rate": 2.3695994585688794e-05, - "loss": 0.3074, - "step": 7704500 - }, - { - "epoch": 4.62, - "learning_rate": 2.3693894620128227e-05, - "loss": 0.3159, - "step": 7705000 - }, - { - "epoch": 4.62, - "learning_rate": 2.3691798854498787e-05, - "loss": 0.3082, - "step": 7705500 - }, - { - "epoch": 4.62, - "learning_rate": 2.368969888893822e-05, - "loss": 0.3085, - "step": 7706000 - }, - { - "epoch": 4.62, - "learning_rate": 2.3687598923377658e-05, - "loss": 0.3056, - "step": 7706500 - }, - { - "epoch": 4.62, - "learning_rate": 2.368549895781709e-05, - "loss": 0.3002, - "step": 7707000 - }, - { - "epoch": 4.62, - "learning_rate": 2.3683398992256525e-05, - "loss": 0.3098, - "step": 7707500 - }, - { - "epoch": 4.62, - "learning_rate": 2.368129902669596e-05, - "loss": 0.3114, - "step": 7708000 - }, - { - "epoch": 4.62, - "learning_rate": 2.36791990611354e-05, - "loss": 0.3102, - "step": 7708500 - }, - { - "epoch": 4.62, - "learning_rate": 2.3677103295505955e-05, - "loss": 0.321, - "step": 7709000 - }, - { - "epoch": 4.62, - "learning_rate": 2.367500332994539e-05, - "loss": 0.3137, - "step": 7709500 - }, - { - "epoch": 4.62, - "learning_rate": 2.3672903364384822e-05, - "loss": 0.3117, - "step": 7710000 - }, - { - "epoch": 4.62, - "learning_rate": 2.367080759875538e-05, - "loss": 0.3157, - "step": 7710500 - }, - { - "epoch": 4.62, - "learning_rate": 2.3668707633194816e-05, - "loss": 0.3049, - "step": 7711000 - }, - { - "epoch": 4.62, - "learning_rate": 2.3666607667634253e-05, - "loss": 0.3099, - "step": 7711500 - }, - { - "epoch": 4.62, - "learning_rate": 2.3664507702073683e-05, - "loss": 0.3131, - "step": 7712000 - }, - { - "epoch": 4.62, - "learning_rate": 2.366240773651312e-05, - "loss": 0.3161, - "step": 7712500 - }, - { - "epoch": 4.62, - "learning_rate": 2.3660307770952557e-05, - "loss": 0.3138, - "step": 7713000 - }, - { - "epoch": 4.62, - "learning_rate": 2.3658207805391993e-05, - "loss": 0.3102, - "step": 7713500 - }, - { - "epoch": 4.62, - "learning_rate": 2.3656107839831427e-05, - "loss": 0.3094, - "step": 7714000 - }, - { - "epoch": 4.63, - "learning_rate": 2.365400787427086e-05, - "loss": 0.3099, - "step": 7714500 - }, - { - "epoch": 4.63, - "learning_rate": 2.3651912108641417e-05, - "loss": 0.3062, - "step": 7715000 - }, - { - "epoch": 4.63, - "learning_rate": 2.3649812143080854e-05, - "loss": 0.3064, - "step": 7715500 - }, - { - "epoch": 4.63, - "learning_rate": 2.3647712177520288e-05, - "loss": 0.3135, - "step": 7716000 - }, - { - "epoch": 4.63, - "learning_rate": 2.3645612211959725e-05, - "loss": 0.3064, - "step": 7716500 - }, - { - "epoch": 4.63, - "learning_rate": 2.3643512246399158e-05, - "loss": 0.3036, - "step": 7717000 - }, - { - "epoch": 4.63, - "learning_rate": 2.3641416480769715e-05, - "loss": 0.3036, - "step": 7717500 - }, - { - "epoch": 4.63, - "learning_rate": 2.3639316515209152e-05, - "loss": 0.3192, - "step": 7718000 - }, - { - "epoch": 4.63, - "learning_rate": 2.3637216549648585e-05, - "loss": 0.2973, - "step": 7718500 - }, - { - "epoch": 4.63, - "learning_rate": 2.3635116584088022e-05, - "loss": 0.3077, - "step": 7719000 - }, - { - "epoch": 4.63, - "learning_rate": 2.3633016618527456e-05, - "loss": 0.3039, - "step": 7719500 - }, - { - "epoch": 4.63, - "learning_rate": 2.3630920852898012e-05, - "loss": 0.305, - "step": 7720000 - }, - { - "epoch": 4.63, - "learning_rate": 2.362882088733745e-05, - "loss": 0.3063, - "step": 7720500 - }, - { - "epoch": 4.63, - "learning_rate": 2.3626720921776883e-05, - "loss": 0.3086, - "step": 7721000 - }, - { - "epoch": 4.63, - "learning_rate": 2.3624620956216316e-05, - "loss": 0.3043, - "step": 7721500 - }, - { - "epoch": 4.63, - "learning_rate": 2.3622520990655753e-05, - "loss": 0.3129, - "step": 7722000 - }, - { - "epoch": 4.63, - "learning_rate": 2.362042522502631e-05, - "loss": 0.3104, - "step": 7722500 - }, - { - "epoch": 4.63, - "learning_rate": 2.3618325259465744e-05, - "loss": 0.314, - "step": 7723000 - }, - { - "epoch": 4.63, - "learning_rate": 2.361622529390518e-05, - "loss": 0.3147, - "step": 7723500 - }, - { - "epoch": 4.63, - "learning_rate": 2.3614125328344614e-05, - "loss": 0.3119, - "step": 7724000 - }, - { - "epoch": 4.63, - "learning_rate": 2.361202536278405e-05, - "loss": 0.31, - "step": 7724500 - }, - { - "epoch": 4.63, - "learning_rate": 2.3609929597154608e-05, - "loss": 0.308, - "step": 7725000 - }, - { - "epoch": 4.63, - "learning_rate": 2.360782963159404e-05, - "loss": 0.3154, - "step": 7725500 - }, - { - "epoch": 4.63, - "learning_rate": 2.3605729666033478e-05, - "loss": 0.3136, - "step": 7726000 - }, - { - "epoch": 4.63, - "learning_rate": 2.360362970047291e-05, - "loss": 0.3107, - "step": 7726500 - }, - { - "epoch": 4.63, - "learning_rate": 2.3601529734912345e-05, - "loss": 0.3066, - "step": 7727000 - }, - { - "epoch": 4.63, - "learning_rate": 2.3599433969282905e-05, - "loss": 0.3113, - "step": 7727500 - }, - { - "epoch": 4.63, - "learning_rate": 2.359733400372234e-05, - "loss": 0.3055, - "step": 7728000 - }, - { - "epoch": 4.63, - "learning_rate": 2.3595234038161776e-05, - "loss": 0.3036, - "step": 7728500 - }, - { - "epoch": 4.63, - "learning_rate": 2.359313407260121e-05, - "loss": 0.3092, - "step": 7729000 - }, - { - "epoch": 4.63, - "learning_rate": 2.3591034107040642e-05, - "loss": 0.295, - "step": 7729500 - }, - { - "epoch": 4.63, - "learning_rate": 2.35889383414112e-05, - "loss": 0.3093, - "step": 7730000 - }, - { - "epoch": 4.63, - "learning_rate": 2.3586838375850636e-05, - "loss": 0.3116, - "step": 7730500 - }, - { - "epoch": 4.64, - "learning_rate": 2.3584738410290073e-05, - "loss": 0.3127, - "step": 7731000 - }, - { - "epoch": 4.64, - "learning_rate": 2.3582638444729507e-05, - "loss": 0.3102, - "step": 7731500 - }, - { - "epoch": 4.64, - "learning_rate": 2.358053847916894e-05, - "loss": 0.3069, - "step": 7732000 - }, - { - "epoch": 4.64, - "learning_rate": 2.3578442713539497e-05, - "loss": 0.3204, - "step": 7732500 - }, - { - "epoch": 4.64, - "learning_rate": 2.3576342747978934e-05, - "loss": 0.3085, - "step": 7733000 - }, - { - "epoch": 4.64, - "learning_rate": 2.3574242782418367e-05, - "loss": 0.3061, - "step": 7733500 - }, - { - "epoch": 4.64, - "learning_rate": 2.35721428168578e-05, - "loss": 0.3134, - "step": 7734000 - }, - { - "epoch": 4.64, - "learning_rate": 2.3570042851297238e-05, - "loss": 0.3143, - "step": 7734500 - }, - { - "epoch": 4.64, - "learning_rate": 2.3567942885736674e-05, - "loss": 0.3067, - "step": 7735000 - }, - { - "epoch": 4.64, - "learning_rate": 2.356584712010723e-05, - "loss": 0.3011, - "step": 7735500 - }, - { - "epoch": 4.64, - "learning_rate": 2.3563747154546665e-05, - "loss": 0.3072, - "step": 7736000 - }, - { - "epoch": 4.64, - "learning_rate": 2.35616471889861e-05, - "loss": 0.3136, - "step": 7736500 - }, - { - "epoch": 4.64, - "learning_rate": 2.3559547223425535e-05, - "loss": 0.3071, - "step": 7737000 - }, - { - "epoch": 4.64, - "learning_rate": 2.3557451457796092e-05, - "loss": 0.3134, - "step": 7737500 - }, - { - "epoch": 4.64, - "learning_rate": 2.355535149223553e-05, - "loss": 0.3158, - "step": 7738000 - }, - { - "epoch": 4.64, - "learning_rate": 2.3553251526674962e-05, - "loss": 0.3112, - "step": 7738500 - }, - { - "epoch": 4.64, - "learning_rate": 2.3551151561114396e-05, - "loss": 0.3129, - "step": 7739000 - }, - { - "epoch": 4.64, - "learning_rate": 2.3549051595553833e-05, - "loss": 0.3165, - "step": 7739500 - }, - { - "epoch": 4.64, - "learning_rate": 2.354695162999327e-05, - "loss": 0.3046, - "step": 7740000 - }, - { - "epoch": 4.64, - "learning_rate": 2.3544851664432703e-05, - "loss": 0.3061, - "step": 7740500 - }, - { - "epoch": 4.64, - "learning_rate": 2.3542751698872137e-05, - "loss": 0.315, - "step": 7741000 - }, - { - "epoch": 4.64, - "learning_rate": 2.3540655933242693e-05, - "loss": 0.3157, - "step": 7741500 - }, - { - "epoch": 4.64, - "learning_rate": 2.353855596768213e-05, - "loss": 0.3156, - "step": 7742000 - }, - { - "epoch": 4.64, - "learning_rate": 2.3536456002121567e-05, - "loss": 0.2993, - "step": 7742500 - }, - { - "epoch": 4.64, - "learning_rate": 2.3534356036561e-05, - "loss": 0.3091, - "step": 7743000 - }, - { - "epoch": 4.64, - "learning_rate": 2.3532256071000434e-05, - "loss": 0.3118, - "step": 7743500 - }, - { - "epoch": 4.64, - "learning_rate": 2.353015610543987e-05, - "loss": 0.2982, - "step": 7744000 - }, - { - "epoch": 4.64, - "learning_rate": 2.3528056139879304e-05, - "loss": 0.3076, - "step": 7744500 - }, - { - "epoch": 4.64, - "learning_rate": 2.352595617431874e-05, - "loss": 0.315, - "step": 7745000 - }, - { - "epoch": 4.64, - "learning_rate": 2.3523860408689298e-05, - "loss": 0.3062, - "step": 7745500 - }, - { - "epoch": 4.64, - "learning_rate": 2.3521764643059852e-05, - "loss": 0.3028, - "step": 7746000 - }, - { - "epoch": 4.64, - "learning_rate": 2.351966467749929e-05, - "loss": 0.3124, - "step": 7746500 - }, - { - "epoch": 4.64, - "learning_rate": 2.3517564711938725e-05, - "loss": 0.3022, - "step": 7747000 - }, - { - "epoch": 4.64, - "learning_rate": 2.351546474637816e-05, - "loss": 0.3062, - "step": 7747500 - }, - { - "epoch": 4.65, - "learning_rate": 2.3513364780817596e-05, - "loss": 0.3072, - "step": 7748000 - }, - { - "epoch": 4.65, - "learning_rate": 2.351126481525703e-05, - "loss": 0.3121, - "step": 7748500 - }, - { - "epoch": 4.65, - "learning_rate": 2.3509164849696463e-05, - "loss": 0.3053, - "step": 7749000 - }, - { - "epoch": 4.65, - "learning_rate": 2.35070648841359e-05, - "loss": 0.3073, - "step": 7749500 - }, - { - "epoch": 4.65, - "learning_rate": 2.3504969118506457e-05, - "loss": 0.3065, - "step": 7750000 - }, - { - "epoch": 4.65, - "learning_rate": 2.350286915294589e-05, - "loss": 0.3155, - "step": 7750500 - }, - { - "epoch": 4.65, - "learning_rate": 2.3500769187385327e-05, - "loss": 0.312, - "step": 7751000 - }, - { - "epoch": 4.65, - "learning_rate": 2.3498673421755884e-05, - "loss": 0.3173, - "step": 7751500 - }, - { - "epoch": 4.65, - "learning_rate": 2.349657765612644e-05, - "loss": 0.3118, - "step": 7752000 - }, - { - "epoch": 4.65, - "learning_rate": 2.3494477690565874e-05, - "loss": 0.3082, - "step": 7752500 - }, - { - "epoch": 4.65, - "learning_rate": 2.3492377725005308e-05, - "loss": 0.3109, - "step": 7753000 - }, - { - "epoch": 4.65, - "learning_rate": 2.3490277759444745e-05, - "loss": 0.2992, - "step": 7753500 - }, - { - "epoch": 4.65, - "learning_rate": 2.348817779388418e-05, - "loss": 0.3134, - "step": 7754000 - }, - { - "epoch": 4.65, - "learning_rate": 2.3486077828323615e-05, - "loss": 0.3059, - "step": 7754500 - }, - { - "epoch": 4.65, - "learning_rate": 2.348397786276305e-05, - "loss": 0.3128, - "step": 7755000 - }, - { - "epoch": 4.65, - "learning_rate": 2.3481877897202485e-05, - "loss": 0.3073, - "step": 7755500 - }, - { - "epoch": 4.65, - "learning_rate": 2.347977793164192e-05, - "loss": 0.31, - "step": 7756000 - }, - { - "epoch": 4.65, - "learning_rate": 2.347768216601248e-05, - "loss": 0.3092, - "step": 7756500 - }, - { - "epoch": 4.65, - "learning_rate": 2.3475582200451912e-05, - "loss": 0.3071, - "step": 7757000 - }, - { - "epoch": 4.65, - "learning_rate": 2.347348223489135e-05, - "loss": 0.3086, - "step": 7757500 - }, - { - "epoch": 4.65, - "learning_rate": 2.3471382269330783e-05, - "loss": 0.3152, - "step": 7758000 - }, - { - "epoch": 4.65, - "learning_rate": 2.346928650370134e-05, - "loss": 0.3095, - "step": 7758500 - }, - { - "epoch": 4.65, - "learning_rate": 2.3467186538140773e-05, - "loss": 0.3055, - "step": 7759000 - }, - { - "epoch": 4.65, - "learning_rate": 2.346508657258021e-05, - "loss": 0.2987, - "step": 7759500 - }, - { - "epoch": 4.65, - "learning_rate": 2.3462986607019643e-05, - "loss": 0.3056, - "step": 7760000 - }, - { - "epoch": 4.65, - "learning_rate": 2.34608908413902e-05, - "loss": 0.3059, - "step": 7760500 - }, - { - "epoch": 4.65, - "learning_rate": 2.3458790875829637e-05, - "loss": 0.3128, - "step": 7761000 - }, - { - "epoch": 4.65, - "learning_rate": 2.345669091026907e-05, - "loss": 0.3222, - "step": 7761500 - }, - { - "epoch": 4.65, - "learning_rate": 2.3454590944708508e-05, - "loss": 0.3077, - "step": 7762000 - }, - { - "epoch": 4.65, - "learning_rate": 2.345249517907906e-05, - "loss": 0.3204, - "step": 7762500 - }, - { - "epoch": 4.65, - "learning_rate": 2.3450395213518498e-05, - "loss": 0.3055, - "step": 7763000 - }, - { - "epoch": 4.65, - "learning_rate": 2.3448295247957935e-05, - "loss": 0.3137, - "step": 7763500 - }, - { - "epoch": 4.65, - "learning_rate": 2.3446195282397368e-05, - "loss": 0.3069, - "step": 7764000 - }, - { - "epoch": 4.66, - "learning_rate": 2.3444095316836805e-05, - "loss": 0.312, - "step": 7764500 - }, - { - "epoch": 4.66, - "learning_rate": 2.344199955120736e-05, - "loss": 0.3072, - "step": 7765000 - }, - { - "epoch": 4.66, - "learning_rate": 2.3439899585646796e-05, - "loss": 0.3211, - "step": 7765500 - }, - { - "epoch": 4.66, - "learning_rate": 2.343779962008623e-05, - "loss": 0.3157, - "step": 7766000 - }, - { - "epoch": 4.66, - "learning_rate": 2.3435699654525666e-05, - "loss": 0.3123, - "step": 7766500 - }, - { - "epoch": 4.66, - "learning_rate": 2.3433599688965103e-05, - "loss": 0.3063, - "step": 7767000 - }, - { - "epoch": 4.66, - "learning_rate": 2.3431508123266776e-05, - "loss": 0.3119, - "step": 7767500 - }, - { - "epoch": 4.66, - "learning_rate": 2.3429408157706213e-05, - "loss": 0.3013, - "step": 7768000 - }, - { - "epoch": 4.66, - "learning_rate": 2.342730819214565e-05, - "loss": 0.3121, - "step": 7768500 - }, - { - "epoch": 4.66, - "learning_rate": 2.3425208226585084e-05, - "loss": 0.3104, - "step": 7769000 - }, - { - "epoch": 4.66, - "learning_rate": 2.3423108261024517e-05, - "loss": 0.3177, - "step": 7769500 - }, - { - "epoch": 4.66, - "learning_rate": 2.3421008295463954e-05, - "loss": 0.3081, - "step": 7770000 - }, - { - "epoch": 4.66, - "learning_rate": 2.341890832990339e-05, - "loss": 0.311, - "step": 7770500 - }, - { - "epoch": 4.66, - "learning_rate": 2.3416808364342824e-05, - "loss": 0.3072, - "step": 7771000 - }, - { - "epoch": 4.66, - "learning_rate": 2.341470839878226e-05, - "loss": 0.3026, - "step": 7771500 - }, - { - "epoch": 4.66, - "learning_rate": 2.3412612633152815e-05, - "loss": 0.305, - "step": 7772000 - }, - { - "epoch": 4.66, - "learning_rate": 2.341051266759225e-05, - "loss": 0.3144, - "step": 7772500 - }, - { - "epoch": 4.66, - "learning_rate": 2.3408412702031685e-05, - "loss": 0.3068, - "step": 7773000 - }, - { - "epoch": 4.66, - "learning_rate": 2.3406312736471122e-05, - "loss": 0.3104, - "step": 7773500 - }, - { - "epoch": 4.66, - "learning_rate": 2.340421277091056e-05, - "loss": 0.3035, - "step": 7774000 - }, - { - "epoch": 4.66, - "learning_rate": 2.3402117005281112e-05, - "loss": 0.3232, - "step": 7774500 - }, - { - "epoch": 4.66, - "learning_rate": 2.340001703972055e-05, - "loss": 0.3096, - "step": 7775000 - }, - { - "epoch": 4.66, - "learning_rate": 2.3397917074159982e-05, - "loss": 0.3118, - "step": 7775500 - }, - { - "epoch": 4.66, - "learning_rate": 2.339581710859942e-05, - "loss": 0.3086, - "step": 7776000 - }, - { - "epoch": 4.66, - "learning_rate": 2.3393717143038856e-05, - "loss": 0.3085, - "step": 7776500 - }, - { - "epoch": 4.66, - "learning_rate": 2.3391617177478286e-05, - "loss": 0.3025, - "step": 7777000 - }, - { - "epoch": 4.66, - "learning_rate": 2.3389521411848847e-05, - "loss": 0.3111, - "step": 7777500 - }, - { - "epoch": 4.66, - "learning_rate": 2.338742144628828e-05, - "loss": 0.3101, - "step": 7778000 - }, - { - "epoch": 4.66, - "learning_rate": 2.3385321480727717e-05, - "loss": 0.3112, - "step": 7778500 - }, - { - "epoch": 4.66, - "learning_rate": 2.3383221515167154e-05, - "loss": 0.3064, - "step": 7779000 - }, - { - "epoch": 4.66, - "learning_rate": 2.3381125749537707e-05, - "loss": 0.3018, - "step": 7779500 - }, - { - "epoch": 4.66, - "learning_rate": 2.337902578397714e-05, - "loss": 0.3127, - "step": 7780000 - }, - { - "epoch": 4.66, - "learning_rate": 2.3376925818416578e-05, - "loss": 0.3023, - "step": 7780500 - }, - { - "epoch": 4.67, - "learning_rate": 2.3374825852856014e-05, - "loss": 0.3073, - "step": 7781000 - }, - { - "epoch": 4.67, - "learning_rate": 2.3372730087226568e-05, - "loss": 0.3248, - "step": 7781500 - }, - { - "epoch": 4.67, - "learning_rate": 2.3370630121666005e-05, - "loss": 0.311, - "step": 7782000 - }, - { - "epoch": 4.67, - "learning_rate": 2.336853015610544e-05, - "loss": 0.3113, - "step": 7782500 - }, - { - "epoch": 4.67, - "learning_rate": 2.3366430190544875e-05, - "loss": 0.3059, - "step": 7783000 - }, - { - "epoch": 4.67, - "learning_rate": 2.3364330224984312e-05, - "loss": 0.3052, - "step": 7783500 - }, - { - "epoch": 4.67, - "learning_rate": 2.3362230259423742e-05, - "loss": 0.3042, - "step": 7784000 - }, - { - "epoch": 4.67, - "learning_rate": 2.336013029386318e-05, - "loss": 0.3087, - "step": 7784500 - }, - { - "epoch": 4.67, - "learning_rate": 2.3358030328302616e-05, - "loss": 0.3148, - "step": 7785000 - }, - { - "epoch": 4.67, - "learning_rate": 2.3355930362742053e-05, - "loss": 0.3027, - "step": 7785500 - }, - { - "epoch": 4.67, - "learning_rate": 2.335383459711261e-05, - "loss": 0.3159, - "step": 7786000 - }, - { - "epoch": 4.67, - "learning_rate": 2.335173463155204e-05, - "loss": 0.3135, - "step": 7786500 - }, - { - "epoch": 4.67, - "learning_rate": 2.3349634665991477e-05, - "loss": 0.3068, - "step": 7787000 - }, - { - "epoch": 4.67, - "learning_rate": 2.3347534700430913e-05, - "loss": 0.3161, - "step": 7787500 - }, - { - "epoch": 4.67, - "learning_rate": 2.3345434734870347e-05, - "loss": 0.3133, - "step": 7788000 - }, - { - "epoch": 4.67, - "learning_rate": 2.3343334769309784e-05, - "loss": 0.2987, - "step": 7788500 - }, - { - "epoch": 4.67, - "learning_rate": 2.3341239003680337e-05, - "loss": 0.2984, - "step": 7789000 - }, - { - "epoch": 4.67, - "learning_rate": 2.3339139038119774e-05, - "loss": 0.306, - "step": 7789500 - }, - { - "epoch": 4.67, - "learning_rate": 2.333703907255921e-05, - "loss": 0.3101, - "step": 7790000 - }, - { - "epoch": 4.67, - "learning_rate": 2.3334939106998644e-05, - "loss": 0.3122, - "step": 7790500 - }, - { - "epoch": 4.67, - "learning_rate": 2.333283914143808e-05, - "loss": 0.3101, - "step": 7791000 - }, - { - "epoch": 4.67, - "learning_rate": 2.3330739175877515e-05, - "loss": 0.313, - "step": 7791500 - }, - { - "epoch": 4.67, - "learning_rate": 2.332864341024807e-05, - "loss": 0.3114, - "step": 7792000 - }, - { - "epoch": 4.67, - "learning_rate": 2.332654344468751e-05, - "loss": 0.313, - "step": 7792500 - }, - { - "epoch": 4.67, - "learning_rate": 2.3324443479126942e-05, - "loss": 0.3082, - "step": 7793000 - }, - { - "epoch": 4.67, - "learning_rate": 2.332234351356638e-05, - "loss": 0.3162, - "step": 7793500 - }, - { - "epoch": 4.67, - "learning_rate": 2.3320247747936932e-05, - "loss": 0.3116, - "step": 7794000 - }, - { - "epoch": 4.67, - "learning_rate": 2.331814778237637e-05, - "loss": 0.3085, - "step": 7794500 - }, - { - "epoch": 4.67, - "learning_rate": 2.3316047816815803e-05, - "loss": 0.3071, - "step": 7795000 - }, - { - "epoch": 4.67, - "learning_rate": 2.331394785125524e-05, - "loss": 0.3132, - "step": 7795500 - }, - { - "epoch": 4.67, - "learning_rate": 2.3311847885694676e-05, - "loss": 0.3068, - "step": 7796000 - }, - { - "epoch": 4.67, - "learning_rate": 2.330975212006523e-05, - "loss": 0.3049, - "step": 7796500 - }, - { - "epoch": 4.67, - "learning_rate": 2.3307652154504667e-05, - "loss": 0.3076, - "step": 7797000 - }, - { - "epoch": 4.67, - "learning_rate": 2.33055521889441e-05, - "loss": 0.3063, - "step": 7797500 - }, - { - "epoch": 4.68, - "learning_rate": 2.3303452223383537e-05, - "loss": 0.303, - "step": 7798000 - }, - { - "epoch": 4.68, - "learning_rate": 2.330135645775409e-05, - "loss": 0.313, - "step": 7798500 - }, - { - "epoch": 4.68, - "learning_rate": 2.3299256492193528e-05, - "loss": 0.3188, - "step": 7799000 - }, - { - "epoch": 4.68, - "learning_rate": 2.3297156526632964e-05, - "loss": 0.3166, - "step": 7799500 - }, - { - "epoch": 4.68, - "learning_rate": 2.3295056561072398e-05, - "loss": 0.3185, - "step": 7800000 - }, - { - "epoch": 4.68, - "eval_loss": 0.30413320660591125, - "eval_runtime": 1114.0591, - "eval_samples_per_second": 472.794, - "eval_steps_per_second": 78.799, - "step": 7800000 } ], "max_steps": 13343552, "num_train_epochs": 8, - "total_flos": 1.5920735259482726e+18, + "total_flos": 8.775359130063053e+17, "trial_name": null, "trial_params": null }