diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,6736 +1,1336 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.6055880638592613, - "global_step": 560000, + "epoch": 0.9983934941049403, + "global_step": 110000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "learning_rate": 4.999098827285924e-05, - "loss": 5.3192, + "learning_rate": 4.9924364129234474e-05, + "loss": 4.7552, "step": 500 }, { - "epoch": 0.0, - "learning_rate": 4.998197654571848e-05, - "loss": 5.4021, + "epoch": 0.01, + "learning_rate": 4.984872825846895e-05, + "loss": 4.6634, "step": 1000 }, { - "epoch": 0.0, - "learning_rate": 4.9972964818577715e-05, - "loss": 5.4284, + "epoch": 0.01, + "learning_rate": 4.9773092387703424e-05, + "loss": 4.583, "step": 1500 }, { - "epoch": 0.0, - "learning_rate": 4.996395309143695e-05, - "loss": 5.4341, + "epoch": 0.02, + "learning_rate": 4.96974565169379e-05, + "loss": 4.5872, "step": 2000 }, { - "epoch": 0.0, - "learning_rate": 4.995494136429619e-05, - "loss": 5.4215, + "epoch": 0.02, + "learning_rate": 4.962182064617237e-05, + "loss": 4.5253, "step": 2500 }, { - "epoch": 0.0, - "learning_rate": 4.994592963715543e-05, - "loss": 5.3564, + "epoch": 0.03, + "learning_rate": 4.9546184775406845e-05, + "loss": 4.4986, "step": 3000 }, { - "epoch": 0.0, - "learning_rate": 4.9936917910014664e-05, - "loss": 5.3319, + "epoch": 0.03, + "learning_rate": 4.947054890464132e-05, + "loss": 4.5247, "step": 3500 }, { - "epoch": 0.0, - "learning_rate": 4.99279061828739e-05, - "loss": 5.3326, + "epoch": 0.04, + "learning_rate": 4.9394913033875794e-05, + "loss": 4.4397, "step": 4000 }, { - "epoch": 0.0, - "learning_rate": 4.991889445573314e-05, - "loss": 5.3575, + "epoch": 0.04, + "learning_rate": 4.931927716311027e-05, + "loss": 4.5077, "step": 4500 }, { - "epoch": 0.01, - "learning_rate": 4.990988272859237e-05, - "loss": 5.3404, + "epoch": 0.05, + "learning_rate": 4.9243641292344744e-05, + "loss": 4.4674, "step": 5000 }, { - "epoch": 0.01, - "learning_rate": 4.990087100145161e-05, - "loss": 5.339, + "epoch": 0.05, + "learning_rate": 4.9168005421579216e-05, + "loss": 4.4637, "step": 5500 }, { - "epoch": 0.01, - "learning_rate": 4.989185927431085e-05, - "loss": 5.2714, + "epoch": 0.05, + "learning_rate": 4.9092369550813694e-05, + "loss": 4.4311, "step": 6000 }, { - "epoch": 0.01, - "learning_rate": 4.988284754717009e-05, - "loss": 5.2691, + "epoch": 0.06, + "learning_rate": 4.9016733680048165e-05, + "loss": 4.4189, "step": 6500 }, { - "epoch": 0.01, - "learning_rate": 4.9873835820029326e-05, - "loss": 5.2559, + "epoch": 0.06, + "learning_rate": 4.8941097809282644e-05, + "loss": 4.3811, "step": 7000 }, { - "epoch": 0.01, - "learning_rate": 4.9864824092888563e-05, - "loss": 5.209, + "epoch": 0.07, + "learning_rate": 4.8865461938517115e-05, + "loss": 4.4015, "step": 7500 }, { - "epoch": 0.01, - "learning_rate": 4.9855812365747794e-05, - "loss": 5.26, + "epoch": 0.07, + "learning_rate": 4.8789826067751587e-05, + "loss": 4.3717, "step": 8000 }, { - "epoch": 0.01, - "learning_rate": 4.984680063860703e-05, - "loss": 5.1878, + "epoch": 0.08, + "learning_rate": 4.8714190196986065e-05, + "loss": 4.3931, "step": 8500 }, { - "epoch": 0.01, - "learning_rate": 4.983778891146627e-05, - "loss": 5.212, + "epoch": 0.08, + "learning_rate": 4.8638554326220536e-05, + "loss": 4.3651, "step": 9000 }, { - "epoch": 0.01, - "learning_rate": 4.9828777184325506e-05, - "loss": 5.2063, + "epoch": 0.09, + "learning_rate": 4.8562918455455014e-05, + "loss": 4.3974, "step": 9500 }, { - "epoch": 0.01, - "learning_rate": 4.981976545718475e-05, - "loss": 5.2132, + "epoch": 0.09, + "learning_rate": 4.8487282584689486e-05, + "loss": 4.3297, "step": 10000 }, { - "epoch": 0.01, - "learning_rate": 4.981075373004399e-05, - "loss": 5.221, + "epoch": 0.1, + "learning_rate": 4.8411646713923964e-05, + "loss": 4.3414, "step": 10500 }, { - "epoch": 0.01, - "learning_rate": 4.980174200290322e-05, - "loss": 5.1786, + "epoch": 0.1, + "learning_rate": 4.8336010843158436e-05, + "loss": 4.3682, "step": 11000 }, { - "epoch": 0.01, - "learning_rate": 4.9792730275762456e-05, - "loss": 5.1868, + "epoch": 0.1, + "learning_rate": 4.826037497239291e-05, + "loss": 4.3614, "step": 11500 }, { - "epoch": 0.01, - "learning_rate": 4.978371854862169e-05, - "loss": 5.1585, + "epoch": 0.11, + "learning_rate": 4.8184739101627385e-05, + "loss": 4.368, "step": 12000 }, { - "epoch": 0.01, - "learning_rate": 4.977470682148093e-05, - "loss": 5.2465, + "epoch": 0.11, + "learning_rate": 4.810910323086186e-05, + "loss": 4.3026, "step": 12500 }, { - "epoch": 0.01, - "learning_rate": 4.976569509434017e-05, - "loss": 5.1645, + "epoch": 0.12, + "learning_rate": 4.8033467360096335e-05, + "loss": 4.3342, "step": 13000 }, { - "epoch": 0.01, - "learning_rate": 4.9756683367199405e-05, - "loss": 5.1462, + "epoch": 0.12, + "learning_rate": 4.7957831489330807e-05, + "loss": 4.3359, "step": 13500 }, { - "epoch": 0.02, - "learning_rate": 4.974767164005864e-05, - "loss": 5.1588, + "epoch": 0.13, + "learning_rate": 4.788219561856528e-05, + "loss": 4.3332, "step": 14000 }, { - "epoch": 0.02, - "learning_rate": 4.973865991291788e-05, - "loss": 5.145, + "epoch": 0.13, + "learning_rate": 4.7806559747799756e-05, + "loss": 4.2581, "step": 14500 }, { - "epoch": 0.02, - "learning_rate": 4.972964818577712e-05, - "loss": 5.1256, + "epoch": 0.14, + "learning_rate": 4.773092387703423e-05, + "loss": 4.3509, "step": 15000 }, { - "epoch": 0.02, - "learning_rate": 4.9720636458636355e-05, - "loss": 5.1227, + "epoch": 0.14, + "learning_rate": 4.7655288006268706e-05, + "loss": 4.2819, "step": 15500 }, { - "epoch": 0.02, - "learning_rate": 4.971162473149559e-05, - "loss": 5.096, + "epoch": 0.15, + "learning_rate": 4.757965213550318e-05, + "loss": 4.2966, "step": 16000 }, { - "epoch": 0.02, - "learning_rate": 4.970261300435483e-05, - "loss": 5.1427, + "epoch": 0.15, + "learning_rate": 4.750401626473765e-05, + "loss": 4.2967, "step": 16500 }, { - "epoch": 0.02, - "learning_rate": 4.969360127721407e-05, - "loss": 5.121, + "epoch": 0.15, + "learning_rate": 4.742838039397213e-05, + "loss": 4.2915, "step": 17000 }, { - "epoch": 0.02, - "learning_rate": 4.9684589550073305e-05, - "loss": 5.1324, + "epoch": 0.16, + "learning_rate": 4.73527445232066e-05, + "loss": 4.2473, "step": 17500 }, { - "epoch": 0.02, - "learning_rate": 4.967557782293254e-05, - "loss": 5.1476, + "epoch": 0.16, + "learning_rate": 4.727710865244108e-05, + "loss": 4.2357, "step": 18000 }, { - "epoch": 0.02, - "learning_rate": 4.966656609579178e-05, - "loss": 5.0538, + "epoch": 0.17, + "learning_rate": 4.720147278167555e-05, + "loss": 4.2754, "step": 18500 }, { - "epoch": 0.02, - "learning_rate": 4.965755436865102e-05, - "loss": 5.0635, + "epoch": 0.17, + "learning_rate": 4.712583691091002e-05, + "loss": 4.2699, "step": 19000 }, { - "epoch": 0.02, - "learning_rate": 4.9648542641510254e-05, - "loss": 5.0309, + "epoch": 0.18, + "learning_rate": 4.70502010401445e-05, + "loss": 4.2866, "step": 19500 }, { - "epoch": 0.02, - "learning_rate": 4.963953091436949e-05, - "loss": 5.0623, + "epoch": 0.18, + "learning_rate": 4.697456516937897e-05, + "loss": 4.2778, "step": 20000 }, { - "epoch": 0.02, - "learning_rate": 4.963051918722872e-05, - "loss": 5.0624, + "epoch": 0.19, + "learning_rate": 4.689892929861345e-05, + "loss": 4.2414, "step": 20500 }, { - "epoch": 0.02, - "learning_rate": 4.962150746008796e-05, - "loss": 5.0844, + "epoch": 0.19, + "learning_rate": 4.682329342784792e-05, + "loss": 4.2963, "step": 21000 }, { - "epoch": 0.02, - "learning_rate": 4.9612495732947204e-05, - "loss": 5.0667, + "epoch": 0.2, + "learning_rate": 4.674765755708239e-05, + "loss": 4.3053, "step": 21500 }, { - "epoch": 0.02, - "learning_rate": 4.960348400580644e-05, - "loss": 5.0536, + "epoch": 0.2, + "learning_rate": 4.667202168631687e-05, + "loss": 4.2914, "step": 22000 }, { - "epoch": 0.02, - "learning_rate": 4.959447227866568e-05, - "loss": 5.0783, + "epoch": 0.2, + "learning_rate": 4.659638581555134e-05, + "loss": 4.2527, "step": 22500 }, { - "epoch": 0.02, - "learning_rate": 4.9585460551524916e-05, - "loss": 5.0335, + "epoch": 0.21, + "learning_rate": 4.652074994478582e-05, + "loss": 4.251, "step": 23000 }, { - "epoch": 0.03, - "learning_rate": 4.957644882438415e-05, - "loss": 5.0321, + "epoch": 0.21, + "learning_rate": 4.644511407402029e-05, + "loss": 4.2694, "step": 23500 }, { - "epoch": 0.03, - "learning_rate": 4.9567437097243384e-05, - "loss": 5.037, + "epoch": 0.22, + "learning_rate": 4.636947820325476e-05, + "loss": 4.2306, "step": 24000 }, { - "epoch": 0.03, - "learning_rate": 4.955842537010262e-05, - "loss": 5.0187, + "epoch": 0.22, + "learning_rate": 4.629384233248924e-05, + "loss": 4.2451, "step": 24500 }, { - "epoch": 0.03, - "learning_rate": 4.954941364296186e-05, - "loss": 5.0357, + "epoch": 0.23, + "learning_rate": 4.621820646172371e-05, + "loss": 4.232, "step": 25000 }, { - "epoch": 0.03, - "learning_rate": 4.95404019158211e-05, - "loss": 5.0128, + "epoch": 0.23, + "learning_rate": 4.614257059095819e-05, + "loss": 4.2506, "step": 25500 }, { - "epoch": 0.03, - "learning_rate": 4.953139018868034e-05, - "loss": 5.0553, + "epoch": 0.24, + "learning_rate": 4.606693472019266e-05, + "loss": 4.2425, "step": 26000 }, { - "epoch": 0.03, - "learning_rate": 4.952237846153957e-05, - "loss": 5.0024, + "epoch": 0.24, + "learning_rate": 4.599129884942713e-05, + "loss": 4.2495, "step": 26500 }, { - "epoch": 0.03, - "learning_rate": 4.951336673439881e-05, - "loss": 5.0177, + "epoch": 0.25, + "learning_rate": 4.591566297866161e-05, + "loss": 4.2475, "step": 27000 }, { - "epoch": 0.03, - "learning_rate": 4.9504355007258046e-05, - "loss": 5.0174, + "epoch": 0.25, + "learning_rate": 4.584002710789608e-05, + "loss": 4.2516, "step": 27500 }, { - "epoch": 0.03, - "learning_rate": 4.949534328011728e-05, - "loss": 5.0167, + "epoch": 0.25, + "learning_rate": 4.576439123713056e-05, + "loss": 4.2478, "step": 28000 }, { - "epoch": 0.03, - "learning_rate": 4.948633155297652e-05, - "loss": 4.9896, + "epoch": 0.26, + "learning_rate": 4.568875536636503e-05, + "loss": 4.2214, "step": 28500 }, { - "epoch": 0.03, - "learning_rate": 4.947731982583576e-05, - "loss": 5.0355, + "epoch": 0.26, + "learning_rate": 4.56131194955995e-05, + "loss": 4.244, "step": 29000 }, { - "epoch": 0.03, - "learning_rate": 4.9468308098694995e-05, - "loss": 4.9929, + "epoch": 0.27, + "learning_rate": 4.553748362483398e-05, + "loss": 4.2125, "step": 29500 }, { - "epoch": 0.03, - "learning_rate": 4.945929637155423e-05, - "loss": 4.9702, + "epoch": 0.27, + "learning_rate": 4.546184775406845e-05, + "loss": 4.251, "step": 30000 }, { - "epoch": 0.03, - "learning_rate": 4.945028464441347e-05, - "loss": 4.944, + "epoch": 0.28, + "learning_rate": 4.538621188330293e-05, + "loss": 4.1668, "step": 30500 }, { - "epoch": 0.03, - "learning_rate": 4.944127291727271e-05, - "loss": 4.9957, + "epoch": 0.28, + "learning_rate": 4.53105760125374e-05, + "loss": 4.2469, "step": 31000 }, { - "epoch": 0.03, - "learning_rate": 4.9432261190131945e-05, - "loss": 4.9908, + "epoch": 0.29, + "learning_rate": 4.523494014177188e-05, + "loss": 4.2189, "step": 31500 }, { - "epoch": 0.03, - "learning_rate": 4.942324946299118e-05, - "loss": 4.9816, + "epoch": 0.29, + "learning_rate": 4.515930427100635e-05, + "loss": 4.2727, "step": 32000 }, { - "epoch": 0.04, - "learning_rate": 4.941423773585042e-05, - "loss": 4.9649, + "epoch": 0.29, + "learning_rate": 4.5083668400240824e-05, + "loss": 4.2318, "step": 32500 }, { - "epoch": 0.04, - "learning_rate": 4.940522600870966e-05, - "loss": 4.9434, + "epoch": 0.3, + "learning_rate": 4.50080325294753e-05, + "loss": 4.2083, "step": 33000 }, { - "epoch": 0.04, - "learning_rate": 4.9396214281568895e-05, - "loss": 5.0387, + "epoch": 0.3, + "learning_rate": 4.4932396658709774e-05, + "loss": 4.1744, "step": 33500 }, { - "epoch": 0.04, - "learning_rate": 4.938720255442813e-05, - "loss": 4.9799, + "epoch": 0.31, + "learning_rate": 4.485676078794425e-05, + "loss": 4.1938, "step": 34000 }, { - "epoch": 0.04, - "learning_rate": 4.937819082728737e-05, - "loss": 4.9648, + "epoch": 0.31, + "learning_rate": 4.478112491717872e-05, + "loss": 4.1745, "step": 34500 }, { - "epoch": 0.04, - "learning_rate": 4.936917910014661e-05, - "loss": 4.9593, + "epoch": 0.32, + "learning_rate": 4.4705489046413195e-05, + "loss": 4.1725, "step": 35000 }, { - "epoch": 0.04, - "learning_rate": 4.9360167373005844e-05, - "loss": 4.9687, + "epoch": 0.32, + "learning_rate": 4.462985317564767e-05, + "loss": 4.2081, "step": 35500 }, { - "epoch": 0.04, - "learning_rate": 4.9351155645865075e-05, - "loss": 4.9474, + "epoch": 0.33, + "learning_rate": 4.4554217304882145e-05, + "loss": 4.1804, "step": 36000 }, { - "epoch": 0.04, - "learning_rate": 4.934214391872431e-05, - "loss": 4.9344, + "epoch": 0.33, + "learning_rate": 4.447858143411662e-05, + "loss": 4.1977, "step": 36500 }, { - "epoch": 0.04, - "learning_rate": 4.9333132191583556e-05, - "loss": 4.932, + "epoch": 0.34, + "learning_rate": 4.4402945563351094e-05, + "loss": 4.1939, "step": 37000 }, { - "epoch": 0.04, - "learning_rate": 4.9324120464442794e-05, - "loss": 5.0116, + "epoch": 0.34, + "learning_rate": 4.4327309692585566e-05, + "loss": 4.2137, "step": 37500 }, { - "epoch": 0.04, - "learning_rate": 4.931510873730203e-05, - "loss": 4.9311, + "epoch": 0.34, + "learning_rate": 4.4251673821820044e-05, + "loss": 4.1899, "step": 38000 }, { - "epoch": 0.04, - "learning_rate": 4.930609701016127e-05, - "loss": 4.9114, + "epoch": 0.35, + "learning_rate": 4.4176037951054515e-05, + "loss": 4.1409, "step": 38500 }, { - "epoch": 0.04, - "learning_rate": 4.92970852830205e-05, - "loss": 4.9517, + "epoch": 0.35, + "learning_rate": 4.4100402080288994e-05, + "loss": 4.155, "step": 39000 }, { - "epoch": 0.04, - "learning_rate": 4.928807355587974e-05, - "loss": 4.9541, + "epoch": 0.36, + "learning_rate": 4.4024766209523465e-05, + "loss": 4.2214, "step": 39500 }, { - "epoch": 0.04, - "learning_rate": 4.9279061828738974e-05, - "loss": 4.9637, + "epoch": 0.36, + "learning_rate": 4.3949130338757937e-05, + "loss": 4.1553, "step": 40000 }, { - "epoch": 0.04, - "learning_rate": 4.927005010159821e-05, - "loss": 4.9498, + "epoch": 0.37, + "learning_rate": 4.3873494467992415e-05, + "loss": 4.1859, "step": 40500 }, { - "epoch": 0.04, - "learning_rate": 4.926103837445745e-05, - "loss": 4.8924, + "epoch": 0.37, + "learning_rate": 4.3797858597226886e-05, + "loss": 4.2127, "step": 41000 }, { - "epoch": 0.04, - "learning_rate": 4.925202664731669e-05, - "loss": 4.9596, + "epoch": 0.38, + "learning_rate": 4.3722222726461365e-05, + "loss": 4.1758, "step": 41500 }, { - "epoch": 0.05, - "learning_rate": 4.924301492017593e-05, - "loss": 4.9264, + "epoch": 0.38, + "learning_rate": 4.3646586855695836e-05, + "loss": 4.2035, "step": 42000 }, { - "epoch": 0.05, - "learning_rate": 4.923400319303516e-05, - "loss": 4.9179, + "epoch": 0.39, + "learning_rate": 4.357095098493031e-05, + "loss": 4.1486, "step": 42500 }, { - "epoch": 0.05, - "learning_rate": 4.92249914658944e-05, - "loss": 4.9151, + "epoch": 0.39, + "learning_rate": 4.3495315114164786e-05, + "loss": 4.2252, "step": 43000 }, { - "epoch": 0.05, - "learning_rate": 4.9215979738753636e-05, - "loss": 4.9101, + "epoch": 0.39, + "learning_rate": 4.341967924339926e-05, + "loss": 4.1585, "step": 43500 }, { - "epoch": 0.05, - "learning_rate": 4.920696801161287e-05, - "loss": 4.9541, + "epoch": 0.4, + "learning_rate": 4.3344043372633735e-05, + "loss": 4.1804, "step": 44000 }, { - "epoch": 0.05, - "learning_rate": 4.919795628447211e-05, - "loss": 4.9423, + "epoch": 0.4, + "learning_rate": 4.326840750186821e-05, + "loss": 4.2005, "step": 44500 }, { - "epoch": 0.05, - "learning_rate": 4.918894455733135e-05, - "loss": 4.8763, + "epoch": 0.41, + "learning_rate": 4.319277163110268e-05, + "loss": 4.1257, "step": 45000 }, { - "epoch": 0.05, - "learning_rate": 4.9179932830190586e-05, - "loss": 4.9015, + "epoch": 0.41, + "learning_rate": 4.3117135760337157e-05, + "loss": 4.1687, "step": 45500 }, { - "epoch": 0.05, - "learning_rate": 4.917092110304982e-05, - "loss": 4.9179, + "epoch": 0.42, + "learning_rate": 4.304149988957163e-05, + "loss": 4.1845, "step": 46000 }, { - "epoch": 0.05, - "learning_rate": 4.916190937590906e-05, - "loss": 4.8837, + "epoch": 0.42, + "learning_rate": 4.2965864018806106e-05, + "loss": 4.1383, "step": 46500 }, { - "epoch": 0.05, - "learning_rate": 4.91528976487683e-05, - "loss": 4.9141, + "epoch": 0.43, + "learning_rate": 4.289022814804058e-05, + "loss": 4.1346, "step": 47000 }, { - "epoch": 0.05, - "learning_rate": 4.9143885921627535e-05, - "loss": 4.8766, + "epoch": 0.43, + "learning_rate": 4.281459227727505e-05, + "loss": 4.1611, "step": 47500 }, { - "epoch": 0.05, - "learning_rate": 4.913487419448677e-05, - "loss": 4.9088, + "epoch": 0.44, + "learning_rate": 4.273895640650953e-05, + "loss": 4.1542, "step": 48000 }, { - "epoch": 0.05, - "learning_rate": 4.912586246734601e-05, - "loss": 4.9137, + "epoch": 0.44, + "learning_rate": 4.2663320535744e-05, + "loss": 4.2022, "step": 48500 }, { - "epoch": 0.05, - "learning_rate": 4.911685074020525e-05, - "loss": 4.8692, + "epoch": 0.44, + "learning_rate": 4.258768466497848e-05, + "loss": 4.1973, "step": 49000 }, { - "epoch": 0.05, - "learning_rate": 4.9107839013064485e-05, - "loss": 4.8607, + "epoch": 0.45, + "learning_rate": 4.251204879421295e-05, + "loss": 4.1386, "step": 49500 }, { - "epoch": 0.05, - "learning_rate": 4.909882728592372e-05, - "loss": 4.8573, + "epoch": 0.45, + "learning_rate": 4.243641292344743e-05, + "loss": 4.1542, "step": 50000 }, { - "epoch": 0.05, - "learning_rate": 4.908981555878296e-05, - "loss": 4.9472, + "epoch": 0.46, + "learning_rate": 4.23607770526819e-05, + "loss": 4.1301, "step": 50500 }, { - "epoch": 0.06, - "learning_rate": 4.90808038316422e-05, - "loss": 4.9144, + "epoch": 0.46, + "learning_rate": 4.228514118191637e-05, + "loss": 4.1239, "step": 51000 }, { - "epoch": 0.06, - "learning_rate": 4.907179210450143e-05, - "loss": 4.973, + "epoch": 0.47, + "learning_rate": 4.220950531115085e-05, + "loss": 4.1453, "step": 51500 }, { - "epoch": 0.06, - "learning_rate": 4.9062780377360665e-05, - "loss": 4.9413, + "epoch": 0.47, + "learning_rate": 4.213386944038532e-05, + "loss": 4.1403, "step": 52000 }, { - "epoch": 0.06, - "learning_rate": 4.90537686502199e-05, - "loss": 4.972, + "epoch": 0.48, + "learning_rate": 4.20582335696198e-05, + "loss": 4.1665, "step": 52500 }, { - "epoch": 0.06, - "learning_rate": 4.9044756923079147e-05, - "loss": 4.9722, + "epoch": 0.48, + "learning_rate": 4.198259769885427e-05, + "loss": 4.1306, "step": 53000 }, { - "epoch": 0.06, - "learning_rate": 4.9035745195938384e-05, - "loss": 4.9126, + "epoch": 0.49, + "learning_rate": 4.190696182808874e-05, + "loss": 4.1622, "step": 53500 }, { - "epoch": 0.06, - "learning_rate": 4.902673346879762e-05, - "loss": 4.9117, + "epoch": 0.49, + "learning_rate": 4.183132595732322e-05, + "loss": 4.1524, "step": 54000 }, { - "epoch": 0.06, - "learning_rate": 4.901772174165686e-05, - "loss": 4.9233, + "epoch": 0.49, + "learning_rate": 4.175569008655769e-05, + "loss": 4.1656, "step": 54500 }, { - "epoch": 0.06, - "learning_rate": 4.900871001451609e-05, - "loss": 4.9693, + "epoch": 0.5, + "learning_rate": 4.168005421579217e-05, + "loss": 4.092, "step": 55000 }, { - "epoch": 0.06, - "learning_rate": 4.899969828737533e-05, - "loss": 4.9875, + "epoch": 0.5, + "learning_rate": 4.160441834502664e-05, + "loss": 4.1483, "step": 55500 }, { - "epoch": 0.06, - "learning_rate": 4.8990686560234564e-05, - "loss": 4.9741, + "epoch": 0.51, + "learning_rate": 4.152878247426111e-05, + "loss": 4.1474, "step": 56000 }, { - "epoch": 0.06, - "learning_rate": 4.89816748330938e-05, - "loss": 4.9411, + "epoch": 0.51, + "learning_rate": 4.145314660349559e-05, + "loss": 4.1685, "step": 56500 }, { - "epoch": 0.06, - "learning_rate": 4.8972663105953046e-05, - "loss": 4.9281, + "epoch": 0.52, + "learning_rate": 4.137751073273006e-05, + "loss": 4.1404, "step": 57000 }, { - "epoch": 0.06, - "learning_rate": 4.896365137881228e-05, - "loss": 4.9392, + "epoch": 0.52, + "learning_rate": 4.130187486196454e-05, + "loss": 4.0895, "step": 57500 }, { - "epoch": 0.06, - "learning_rate": 4.8954639651671514e-05, - "loss": 4.9473, + "epoch": 0.53, + "learning_rate": 4.122623899119901e-05, + "loss": 4.1132, "step": 58000 }, { - "epoch": 0.06, - "learning_rate": 4.894562792453075e-05, - "loss": 4.9333, + "epoch": 0.53, + "learning_rate": 4.115060312043348e-05, + "loss": 4.1438, "step": 58500 }, { - "epoch": 0.06, - "learning_rate": 4.893661619738999e-05, - "loss": 4.9547, + "epoch": 0.54, + "learning_rate": 4.107496724966796e-05, + "loss": 4.1197, "step": 59000 }, { - "epoch": 0.06, - "learning_rate": 4.8927604470249226e-05, - "loss": 4.9422, + "epoch": 0.54, + "learning_rate": 4.099933137890243e-05, + "loss": 4.1568, "step": 59500 }, { - "epoch": 0.06, - "learning_rate": 4.891859274310846e-05, - "loss": 4.9182, + "epoch": 0.54, + "learning_rate": 4.092369550813691e-05, + "loss": 4.1546, "step": 60000 }, { - "epoch": 0.07, - "learning_rate": 4.89095810159677e-05, - "loss": 4.9282, + "epoch": 0.55, + "learning_rate": 4.084805963737138e-05, + "loss": 4.1391, "step": 60500 }, { - "epoch": 0.07, - "learning_rate": 4.890056928882694e-05, - "loss": 4.943, + "epoch": 0.55, + "learning_rate": 4.077242376660585e-05, + "loss": 4.1197, "step": 61000 }, { - "epoch": 0.07, - "learning_rate": 4.8891557561686176e-05, - "loss": 4.9436, + "epoch": 0.56, + "learning_rate": 4.069678789584033e-05, + "loss": 4.1323, "step": 61500 }, { - "epoch": 0.07, - "learning_rate": 4.888254583454541e-05, - "loss": 4.9253, + "epoch": 0.56, + "learning_rate": 4.06211520250748e-05, + "loss": 4.1305, "step": 62000 }, { - "epoch": 0.07, - "learning_rate": 4.887353410740465e-05, - "loss": 4.9442, + "epoch": 0.57, + "learning_rate": 4.054551615430928e-05, + "loss": 4.0884, "step": 62500 }, { - "epoch": 0.07, - "learning_rate": 4.886452238026389e-05, - "loss": 4.8888, + "epoch": 0.57, + "learning_rate": 4.046988028354375e-05, + "loss": 4.1144, "step": 63000 }, { - "epoch": 0.07, - "learning_rate": 4.8855510653123125e-05, - "loss": 4.9155, + "epoch": 0.58, + "learning_rate": 4.0394244412778224e-05, + "loss": 4.1014, "step": 63500 }, { - "epoch": 0.07, - "learning_rate": 4.8846498925982356e-05, - "loss": 4.9519, + "epoch": 0.58, + "learning_rate": 4.03186085420127e-05, + "loss": 4.0915, "step": 64000 }, { - "epoch": 0.07, - "learning_rate": 4.88374871988416e-05, - "loss": 4.9563, + "epoch": 0.59, + "learning_rate": 4.0242972671247174e-05, + "loss": 4.158, "step": 64500 }, { - "epoch": 0.07, - "learning_rate": 4.882847547170084e-05, - "loss": 4.9553, + "epoch": 0.59, + "learning_rate": 4.016733680048165e-05, + "loss": 4.13, "step": 65000 }, { - "epoch": 0.07, - "learning_rate": 4.8819463744560075e-05, - "loss": 4.8865, + "epoch": 0.59, + "learning_rate": 4.0091700929716124e-05, + "loss": 4.1329, "step": 65500 }, { - "epoch": 0.07, - "learning_rate": 4.881045201741931e-05, - "loss": 4.8987, + "epoch": 0.6, + "learning_rate": 4.0016065058950595e-05, + "loss": 4.0738, "step": 66000 }, { - "epoch": 0.07, - "learning_rate": 4.880144029027855e-05, - "loss": 4.9129, + "epoch": 0.6, + "learning_rate": 3.994042918818507e-05, + "loss": 4.1312, "step": 66500 }, { - "epoch": 0.07, - "learning_rate": 4.879242856313779e-05, - "loss": 4.9565, + "epoch": 0.61, + "learning_rate": 3.9864793317419545e-05, + "loss": 4.1058, "step": 67000 }, { - "epoch": 0.07, - "learning_rate": 4.878341683599702e-05, - "loss": 4.8831, + "epoch": 0.61, + "learning_rate": 3.978915744665402e-05, + "loss": 4.0855, "step": 67500 }, { - "epoch": 0.07, - "learning_rate": 4.8774405108856255e-05, - "loss": 4.9007, + "epoch": 0.62, + "learning_rate": 3.9713521575888495e-05, + "loss": 4.1531, "step": 68000 }, { - "epoch": 0.07, - "learning_rate": 4.87653933817155e-05, - "loss": 4.9337, + "epoch": 0.62, + "learning_rate": 3.9637885705122966e-05, + "loss": 4.1676, "step": 68500 }, { - "epoch": 0.07, - "learning_rate": 4.8756381654574737e-05, - "loss": 4.8446, + "epoch": 0.63, + "learning_rate": 3.9562249834357444e-05, + "loss": 4.0996, "step": 69000 }, { - "epoch": 0.08, - "learning_rate": 4.8747369927433974e-05, - "loss": 4.9388, + "epoch": 0.63, + "learning_rate": 3.9486613963591916e-05, + "loss": 4.0781, "step": 69500 }, { - "epoch": 0.08, - "learning_rate": 4.873835820029321e-05, - "loss": 4.9655, + "epoch": 0.64, + "learning_rate": 3.9410978092826394e-05, + "loss": 4.0646, "step": 70000 }, { - "epoch": 0.08, - "learning_rate": 4.872934647315244e-05, - "loss": 4.9309, + "epoch": 0.64, + "learning_rate": 3.9335342222060865e-05, + "loss": 4.0895, "step": 70500 }, { - "epoch": 0.08, - "learning_rate": 4.872033474601168e-05, - "loss": 4.9102, + "epoch": 0.64, + "learning_rate": 3.9259706351295344e-05, + "loss": 4.1257, "step": 71000 }, { - "epoch": 0.08, - "learning_rate": 4.871132301887092e-05, - "loss": 4.8491, + "epoch": 0.65, + "learning_rate": 3.9184070480529815e-05, + "loss": 4.0724, "step": 71500 }, { - "epoch": 0.08, - "learning_rate": 4.8702311291730154e-05, - "loss": 4.895, + "epoch": 0.65, + "learning_rate": 3.910843460976429e-05, + "loss": 4.082, "step": 72000 }, { - "epoch": 0.08, - "learning_rate": 4.86932995645894e-05, - "loss": 4.9222, + "epoch": 0.66, + "learning_rate": 3.9032798738998765e-05, + "loss": 4.1544, "step": 72500 }, { - "epoch": 0.08, - "learning_rate": 4.8684287837448636e-05, - "loss": 4.8966, + "epoch": 0.66, + "learning_rate": 3.8957162868233236e-05, + "loss": 4.124, "step": 73000 }, { - "epoch": 0.08, - "learning_rate": 4.8675276110307866e-05, - "loss": 4.8669, + "epoch": 0.67, + "learning_rate": 3.8881526997467715e-05, + "loss": 4.0763, "step": 73500 }, { - "epoch": 0.08, - "learning_rate": 4.8666264383167104e-05, - "loss": 4.8332, + "epoch": 0.67, + "learning_rate": 3.8805891126702186e-05, + "loss": 4.1284, "step": 74000 }, { - "epoch": 0.08, - "learning_rate": 4.865725265602634e-05, - "loss": 4.9127, + "epoch": 0.68, + "learning_rate": 3.873025525593666e-05, + "loss": 4.0564, "step": 74500 }, { - "epoch": 0.08, - "learning_rate": 4.864824092888558e-05, - "loss": 4.9251, + "epoch": 0.68, + "learning_rate": 3.8654619385171136e-05, + "loss": 4.1, "step": 75000 }, { - "epoch": 0.08, - "learning_rate": 4.8639229201744816e-05, - "loss": 4.9379, + "epoch": 0.69, + "learning_rate": 3.857898351440561e-05, + "loss": 4.1089, "step": 75500 }, { - "epoch": 0.08, - "learning_rate": 4.8630217474604053e-05, - "loss": 4.8682, + "epoch": 0.69, + "learning_rate": 3.8503347643640085e-05, + "loss": 4.092, "step": 76000 }, { - "epoch": 0.08, - "learning_rate": 4.862120574746329e-05, - "loss": 4.8762, + "epoch": 0.69, + "learning_rate": 3.842771177287456e-05, + "loss": 4.127, "step": 76500 }, { - "epoch": 0.08, - "learning_rate": 4.861219402032253e-05, - "loss": 4.8544, + "epoch": 0.7, + "learning_rate": 3.835207590210903e-05, + "loss": 4.1307, "step": 77000 }, { - "epoch": 0.08, - "learning_rate": 4.8603182293181766e-05, - "loss": 4.8835, + "epoch": 0.7, + "learning_rate": 3.827644003134351e-05, + "loss": 4.0732, "step": 77500 }, { - "epoch": 0.08, - "learning_rate": 4.8594170566041e-05, - "loss": 4.8346, + "epoch": 0.71, + "learning_rate": 3.820080416057798e-05, + "loss": 4.0956, "step": 78000 }, { - "epoch": 0.08, - "learning_rate": 4.858515883890024e-05, - "loss": 4.9229, + "epoch": 0.71, + "learning_rate": 3.8125168289812456e-05, + "loss": 4.0683, "step": 78500 }, { - "epoch": 0.09, - "learning_rate": 4.857614711175948e-05, - "loss": 4.9159, + "epoch": 0.72, + "learning_rate": 3.804953241904693e-05, + "loss": 4.0822, "step": 79000 }, { - "epoch": 0.09, - "learning_rate": 4.856713538461871e-05, - "loss": 4.8566, + "epoch": 0.72, + "learning_rate": 3.79738965482814e-05, + "loss": 4.1471, "step": 79500 }, { - "epoch": 0.09, - "learning_rate": 4.855812365747795e-05, - "loss": 4.8155, + "epoch": 0.73, + "learning_rate": 3.789826067751588e-05, + "loss": 4.127, "step": 80000 }, { - "epoch": 0.09, - "learning_rate": 4.854911193033719e-05, - "loss": 4.9222, + "epoch": 0.73, + "learning_rate": 3.782262480675035e-05, + "loss": 4.1113, "step": 80500 }, { - "epoch": 0.09, - "learning_rate": 4.854010020319643e-05, - "loss": 4.8545, + "epoch": 0.74, + "learning_rate": 3.774698893598483e-05, + "loss": 4.0932, "step": 81000 }, { - "epoch": 0.09, - "learning_rate": 4.8531088476055665e-05, - "loss": 4.8295, + "epoch": 0.74, + "learning_rate": 3.76713530652193e-05, + "loss": 4.1176, "step": 81500 }, { - "epoch": 0.09, - "learning_rate": 4.85220767489149e-05, - "loss": 4.8223, + "epoch": 0.74, + "learning_rate": 3.759571719445377e-05, + "loss": 4.1078, "step": 82000 }, { - "epoch": 0.09, - "learning_rate": 4.851306502177414e-05, - "loss": 4.8637, + "epoch": 0.75, + "learning_rate": 3.752008132368825e-05, + "loss": 4.0988, "step": 82500 }, { - "epoch": 0.09, - "learning_rate": 4.850405329463337e-05, - "loss": 4.878, + "epoch": 0.75, + "learning_rate": 3.744444545292272e-05, + "loss": 4.1075, "step": 83000 }, { - "epoch": 0.09, - "learning_rate": 4.849504156749261e-05, - "loss": 4.8677, + "epoch": 0.76, + "learning_rate": 3.73688095821572e-05, + "loss": 4.0456, "step": 83500 }, { - "epoch": 0.09, - "learning_rate": 4.848602984035185e-05, - "loss": 4.8636, + "epoch": 0.76, + "learning_rate": 3.729317371139167e-05, + "loss": 4.1451, "step": 84000 }, { - "epoch": 0.09, - "learning_rate": 4.847701811321109e-05, - "loss": 4.8708, + "epoch": 0.77, + "learning_rate": 3.721753784062614e-05, + "loss": 4.116, "step": 84500 }, { - "epoch": 0.09, - "learning_rate": 4.846800638607033e-05, - "loss": 4.8608, + "epoch": 0.77, + "learning_rate": 3.714190196986062e-05, + "loss": 4.0827, "step": 85000 }, { - "epoch": 0.09, - "learning_rate": 4.8458994658929564e-05, - "loss": 4.8347, + "epoch": 0.78, + "learning_rate": 3.706626609909509e-05, + "loss": 4.1093, "step": 85500 }, { - "epoch": 0.09, - "learning_rate": 4.8449982931788795e-05, - "loss": 4.8385, + "epoch": 0.78, + "learning_rate": 3.699063022832957e-05, + "loss": 4.0878, "step": 86000 }, { - "epoch": 0.09, - "learning_rate": 4.844097120464803e-05, - "loss": 4.8565, + "epoch": 0.79, + "learning_rate": 3.691499435756404e-05, + "loss": 4.1075, "step": 86500 }, { - "epoch": 0.09, - "learning_rate": 4.843195947750727e-05, - "loss": 4.867, + "epoch": 0.79, + "learning_rate": 3.683935848679851e-05, + "loss": 4.0942, "step": 87000 }, { - "epoch": 0.09, - "learning_rate": 4.842294775036651e-05, - "loss": 4.8456, + "epoch": 0.79, + "learning_rate": 3.676372261603299e-05, + "loss": 4.1082, "step": 87500 }, { - "epoch": 0.1, - "learning_rate": 4.841393602322575e-05, - "loss": 4.8739, + "epoch": 0.8, + "learning_rate": 3.668808674526746e-05, + "loss": 4.0797, "step": 88000 }, { - "epoch": 0.1, - "learning_rate": 4.840492429608499e-05, - "loss": 4.8473, + "epoch": 0.8, + "learning_rate": 3.661245087450194e-05, + "loss": 4.0613, "step": 88500 }, { - "epoch": 0.1, - "learning_rate": 4.839591256894422e-05, - "loss": 4.8496, + "epoch": 0.81, + "learning_rate": 3.653681500373641e-05, + "loss": 4.1392, "step": 89000 }, { - "epoch": 0.1, - "learning_rate": 4.8386900841803456e-05, - "loss": 4.8579, + "epoch": 0.81, + "learning_rate": 3.646117913297089e-05, + "loss": 4.064, "step": 89500 }, { - "epoch": 0.1, - "learning_rate": 4.8377889114662694e-05, - "loss": 4.8543, + "epoch": 0.82, + "learning_rate": 3.638554326220536e-05, + "loss": 4.0528, "step": 90000 }, { - "epoch": 0.1, - "learning_rate": 4.836887738752193e-05, - "loss": 4.8855, + "epoch": 0.82, + "learning_rate": 3.630990739143983e-05, + "loss": 4.0703, "step": 90500 }, { - "epoch": 0.1, - "learning_rate": 4.835986566038117e-05, - "loss": 4.8511, + "epoch": 0.83, + "learning_rate": 3.623427152067431e-05, + "loss": 4.0334, "step": 91000 }, { - "epoch": 0.1, - "learning_rate": 4.8350853933240406e-05, - "loss": 4.8682, + "epoch": 0.83, + "learning_rate": 3.615863564990878e-05, + "loss": 4.0341, "step": 91500 }, { - "epoch": 0.1, - "learning_rate": 4.8341842206099643e-05, - "loss": 4.8074, + "epoch": 0.84, + "learning_rate": 3.608299977914326e-05, + "loss": 4.079, "step": 92000 }, { - "epoch": 0.1, - "learning_rate": 4.833283047895888e-05, - "loss": 4.8034, + "epoch": 0.84, + "learning_rate": 3.600736390837773e-05, + "loss": 4.0658, "step": 92500 }, { - "epoch": 0.1, - "learning_rate": 4.832381875181812e-05, - "loss": 4.842, + "epoch": 0.84, + "learning_rate": 3.5931728037612203e-05, + "loss": 4.0723, "step": 93000 }, { - "epoch": 0.1, - "learning_rate": 4.8314807024677356e-05, - "loss": 4.8146, + "epoch": 0.85, + "learning_rate": 3.585609216684668e-05, + "loss": 4.0875, "step": 93500 }, { - "epoch": 0.1, - "learning_rate": 4.830579529753659e-05, - "loss": 4.8353, + "epoch": 0.85, + "learning_rate": 3.578045629608115e-05, + "loss": 4.0256, "step": 94000 }, { - "epoch": 0.1, - "learning_rate": 4.829678357039583e-05, - "loss": 4.8151, + "epoch": 0.86, + "learning_rate": 3.570482042531563e-05, + "loss": 4.1114, "step": 94500 }, { - "epoch": 0.1, - "learning_rate": 4.828777184325507e-05, - "loss": 4.8127, + "epoch": 0.86, + "learning_rate": 3.56291845545501e-05, + "loss": 4.0955, "step": 95000 }, { - "epoch": 0.1, - "learning_rate": 4.8278760116114305e-05, - "loss": 4.833, + "epoch": 0.87, + "learning_rate": 3.5553548683784574e-05, + "loss": 4.1109, "step": 95500 }, { - "epoch": 0.1, - "learning_rate": 4.826974838897354e-05, - "loss": 4.8383, + "epoch": 0.87, + "learning_rate": 3.547791281301905e-05, + "loss": 4.0938, "step": 96000 }, { - "epoch": 0.1, - "learning_rate": 4.826073666183278e-05, - "loss": 4.8441, + "epoch": 0.88, + "learning_rate": 3.5402276942253524e-05, + "loss": 4.0839, "step": 96500 }, { - "epoch": 0.1, - "learning_rate": 4.825172493469202e-05, - "loss": 4.8794, + "epoch": 0.88, + "learning_rate": 3.5326641071488e-05, + "loss": 4.0671, "step": 97000 }, { - "epoch": 0.11, - "learning_rate": 4.8242713207551255e-05, - "loss": 4.828, + "epoch": 0.88, + "learning_rate": 3.5251005200722474e-05, + "loss": 4.0446, "step": 97500 }, { - "epoch": 0.11, - "learning_rate": 4.823370148041049e-05, - "loss": 4.7572, + "epoch": 0.89, + "learning_rate": 3.5175369329956945e-05, + "loss": 4.0323, "step": 98000 }, { - "epoch": 0.11, - "learning_rate": 4.822468975326972e-05, - "loss": 4.7658, + "epoch": 0.89, + "learning_rate": 3.5099733459191423e-05, + "loss": 4.0442, "step": 98500 }, { - "epoch": 0.11, - "learning_rate": 4.821567802612896e-05, - "loss": 4.8123, + "epoch": 0.9, + "learning_rate": 3.5024097588425895e-05, + "loss": 4.0989, "step": 99000 }, { - "epoch": 0.11, - "learning_rate": 4.8206666298988204e-05, - "loss": 4.8082, + "epoch": 0.9, + "learning_rate": 3.494846171766037e-05, + "loss": 4.0851, "step": 99500 }, { - "epoch": 0.11, - "learning_rate": 4.819765457184744e-05, - "loss": 4.7542, + "epoch": 0.91, + "learning_rate": 3.4872825846894845e-05, + "loss": 4.0733, "step": 100000 }, { - "epoch": 0.11, - "learning_rate": 4.818864284470668e-05, - "loss": 4.8264, + "epoch": 0.91, + "learning_rate": 3.4797189976129316e-05, + "loss": 4.0693, "step": 100500 }, { - "epoch": 0.11, - "learning_rate": 4.817963111756592e-05, - "loss": 4.7541, + "epoch": 0.92, + "learning_rate": 3.4721554105363794e-05, + "loss": 4.0285, "step": 101000 }, { - "epoch": 0.11, - "learning_rate": 4.817061939042515e-05, - "loss": 4.7992, + "epoch": 0.92, + "learning_rate": 3.4645918234598266e-05, + "loss": 4.1077, "step": 101500 }, { - "epoch": 0.11, - "learning_rate": 4.8161607663284385e-05, - "loss": 4.8078, + "epoch": 0.93, + "learning_rate": 3.4570282363832744e-05, + "loss": 4.0844, "step": 102000 }, { - "epoch": 0.11, - "learning_rate": 4.815259593614362e-05, - "loss": 4.8453, + "epoch": 0.93, + "learning_rate": 3.4494646493067215e-05, + "loss": 4.0826, "step": 102500 }, { - "epoch": 0.11, - "learning_rate": 4.814358420900286e-05, - "loss": 4.8276, + "epoch": 0.93, + "learning_rate": 3.441901062230169e-05, + "loss": 4.0967, "step": 103000 }, { - "epoch": 0.11, - "learning_rate": 4.81345724818621e-05, - "loss": 4.7253, + "epoch": 0.94, + "learning_rate": 3.4343374751536165e-05, + "loss": 4.0536, "step": 103500 }, { - "epoch": 0.11, - "learning_rate": 4.812556075472134e-05, - "loss": 4.8102, + "epoch": 0.94, + "learning_rate": 3.426773888077064e-05, + "loss": 4.1287, "step": 104000 }, { - "epoch": 0.11, - "learning_rate": 4.811654902758057e-05, - "loss": 4.8006, + "epoch": 0.95, + "learning_rate": 3.4192103010005115e-05, + "loss": 4.0883, "step": 104500 }, { - "epoch": 0.11, - "learning_rate": 4.810753730043981e-05, - "loss": 4.7603, + "epoch": 0.95, + "learning_rate": 3.4116467139239586e-05, + "loss": 4.0862, "step": 105000 }, { - "epoch": 0.11, - "learning_rate": 4.8098525573299047e-05, - "loss": 4.7124, + "epoch": 0.96, + "learning_rate": 3.404083126847406e-05, + "loss": 4.0683, "step": 105500 }, { - "epoch": 0.11, - "learning_rate": 4.8089513846158284e-05, - "loss": 4.7757, + "epoch": 0.96, + "learning_rate": 3.3965195397708536e-05, + "loss": 4.0465, "step": 106000 }, { - "epoch": 0.12, - "learning_rate": 4.808050211901752e-05, - "loss": 4.7593, + "epoch": 0.97, + "learning_rate": 3.388955952694301e-05, + "loss": 4.0712, "step": 106500 }, { - "epoch": 0.12, - "learning_rate": 4.807149039187676e-05, - "loss": 4.8501, + "epoch": 0.97, + "learning_rate": 3.3813923656177486e-05, + "loss": 4.0737, "step": 107000 }, { - "epoch": 0.12, - "learning_rate": 4.8062478664735996e-05, - "loss": 4.8105, + "epoch": 0.98, + "learning_rate": 3.373828778541196e-05, + "loss": 4.0399, "step": 107500 }, { - "epoch": 0.12, - "learning_rate": 4.8053466937595234e-05, - "loss": 4.7692, + "epoch": 0.98, + "learning_rate": 3.366265191464643e-05, + "loss": 4.0716, "step": 108000 }, { - "epoch": 0.12, - "learning_rate": 4.804445521045447e-05, - "loss": 4.7855, + "epoch": 0.98, + "learning_rate": 3.358701604388091e-05, + "loss": 4.0129, "step": 108500 }, { - "epoch": 0.12, - "learning_rate": 4.803544348331371e-05, - "loss": 4.8032, + "epoch": 0.99, + "learning_rate": 3.351138017311538e-05, + "loss": 4.0711, "step": 109000 }, { - "epoch": 0.12, - "learning_rate": 4.8026431756172946e-05, - "loss": 4.7814, + "epoch": 0.99, + "learning_rate": 3.343574430234986e-05, + "loss": 4.0583, "step": 109500 }, { - "epoch": 0.12, - "learning_rate": 4.801742002903218e-05, - "loss": 4.8473, + "epoch": 1.0, + "learning_rate": 3.336010843158433e-05, + "loss": 4.0821, "step": 110000 - }, - { - "epoch": 0.12, - "learning_rate": 4.800840830189142e-05, - "loss": 4.8047, - "step": 110500 - }, - { - "epoch": 0.12, - "learning_rate": 4.799939657475066e-05, - "loss": 4.8326, - "step": 111000 - }, - { - "epoch": 0.12, - "learning_rate": 4.7990384847609895e-05, - "loss": 4.7993, - "step": 111500 - }, - { - "epoch": 0.12, - "learning_rate": 4.798137312046913e-05, - "loss": 4.7892, - "step": 112000 - }, - { - "epoch": 0.12, - "learning_rate": 4.797236139332837e-05, - "loss": 4.751, - "step": 112500 - }, - { - "epoch": 0.12, - "learning_rate": 4.796334966618761e-05, - "loss": 4.7795, - "step": 113000 - }, - { - "epoch": 0.12, - "learning_rate": 4.7954337939046845e-05, - "loss": 4.7684, - "step": 113500 - }, - { - "epoch": 0.12, - "learning_rate": 4.7945326211906076e-05, - "loss": 4.7673, - "step": 114000 - }, - { - "epoch": 0.12, - "learning_rate": 4.793631448476531e-05, - "loss": 4.7614, - "step": 114500 - }, - { - "epoch": 0.12, - "learning_rate": 4.792730275762455e-05, - "loss": 4.7529, - "step": 115000 - }, - { - "epoch": 0.12, - "learning_rate": 4.7918291030483795e-05, - "loss": 4.8215, - "step": 115500 - }, - { - "epoch": 0.13, - "learning_rate": 4.790927930334303e-05, - "loss": 4.7351, - "step": 116000 - }, - { - "epoch": 0.13, - "learning_rate": 4.790026757620227e-05, - "loss": 4.7878, - "step": 116500 - }, - { - "epoch": 0.13, - "learning_rate": 4.78912558490615e-05, - "loss": 4.7618, - "step": 117000 - }, - { - "epoch": 0.13, - "learning_rate": 4.788224412192074e-05, - "loss": 4.846, - "step": 117500 - }, - { - "epoch": 0.13, - "learning_rate": 4.7873232394779975e-05, - "loss": 4.8027, - "step": 118000 - }, - { - "epoch": 0.13, - "learning_rate": 4.786422066763921e-05, - "loss": 4.7415, - "step": 118500 - }, - { - "epoch": 0.13, - "learning_rate": 4.785520894049845e-05, - "loss": 4.7554, - "step": 119000 - }, - { - "epoch": 0.13, - "learning_rate": 4.7846197213357694e-05, - "loss": 4.7287, - "step": 119500 - }, - { - "epoch": 0.13, - "learning_rate": 4.783718548621693e-05, - "loss": 4.81, - "step": 120000 - }, - { - "epoch": 0.13, - "learning_rate": 4.782817375907616e-05, - "loss": 4.7374, - "step": 120500 - }, - { - "epoch": 0.13, - "learning_rate": 4.78191620319354e-05, - "loss": 4.7541, - "step": 121000 - }, - { - "epoch": 0.13, - "learning_rate": 4.7810150304794637e-05, - "loss": 4.7704, - "step": 121500 - }, - { - "epoch": 0.13, - "learning_rate": 4.7801138577653874e-05, - "loss": 4.7743, - "step": 122000 - }, - { - "epoch": 0.13, - "learning_rate": 4.779212685051311e-05, - "loss": 4.7569, - "step": 122500 - }, - { - "epoch": 0.13, - "learning_rate": 4.778311512337235e-05, - "loss": 4.69, - "step": 123000 - }, - { - "epoch": 0.13, - "learning_rate": 4.7774103396231586e-05, - "loss": 4.8213, - "step": 123500 - }, - { - "epoch": 0.13, - "learning_rate": 4.7765091669090824e-05, - "loss": 4.7616, - "step": 124000 - }, - { - "epoch": 0.13, - "learning_rate": 4.775607994195006e-05, - "loss": 4.7587, - "step": 124500 - }, - { - "epoch": 0.14, - "learning_rate": 4.77470682148093e-05, - "loss": 4.7599, - "step": 125000 - }, - { - "epoch": 0.14, - "learning_rate": 4.7738056487668536e-05, - "loss": 4.692, - "step": 125500 - }, - { - "epoch": 0.14, - "learning_rate": 4.772904476052777e-05, - "loss": 4.8163, - "step": 126000 - }, - { - "epoch": 0.14, - "learning_rate": 4.7720033033387004e-05, - "loss": 4.7533, - "step": 126500 - }, - { - "epoch": 0.14, - "learning_rate": 4.771102130624625e-05, - "loss": 4.7933, - "step": 127000 - }, - { - "epoch": 0.14, - "learning_rate": 4.7702009579105485e-05, - "loss": 4.7659, - "step": 127500 - }, - { - "epoch": 0.14, - "learning_rate": 4.769299785196472e-05, - "loss": 4.7502, - "step": 128000 - }, - { - "epoch": 0.14, - "learning_rate": 4.768398612482396e-05, - "loss": 4.7412, - "step": 128500 - }, - { - "epoch": 0.14, - "learning_rate": 4.76749743976832e-05, - "loss": 4.7917, - "step": 129000 - }, - { - "epoch": 0.14, - "learning_rate": 4.766596267054243e-05, - "loss": 4.7984, - "step": 129500 - }, - { - "epoch": 0.14, - "learning_rate": 4.7656950943401666e-05, - "loss": 4.7151, - "step": 130000 - }, - { - "epoch": 0.14, - "learning_rate": 4.76479392162609e-05, - "loss": 4.7101, - "step": 130500 - }, - { - "epoch": 0.14, - "learning_rate": 4.763892748912015e-05, - "loss": 4.7416, - "step": 131000 - }, - { - "epoch": 0.14, - "learning_rate": 4.7629915761979385e-05, - "loss": 4.7401, - "step": 131500 - }, - { - "epoch": 0.14, - "learning_rate": 4.762090403483862e-05, - "loss": 4.7234, - "step": 132000 - }, - { - "epoch": 0.14, - "learning_rate": 4.761189230769785e-05, - "loss": 4.7334, - "step": 132500 - }, - { - "epoch": 0.14, - "learning_rate": 4.760288058055709e-05, - "loss": 4.7305, - "step": 133000 - }, - { - "epoch": 0.14, - "learning_rate": 4.759386885341633e-05, - "loss": 4.7889, - "step": 133500 - }, - { - "epoch": 0.14, - "learning_rate": 4.7584857126275565e-05, - "loss": 4.7615, - "step": 134000 - }, - { - "epoch": 0.15, - "learning_rate": 4.75758453991348e-05, - "loss": 4.6827, - "step": 134500 - }, - { - "epoch": 0.15, - "learning_rate": 4.7566833671994046e-05, - "loss": 4.7555, - "step": 135000 - }, - { - "epoch": 0.15, - "learning_rate": 4.7557821944853284e-05, - "loss": 4.7644, - "step": 135500 - }, - { - "epoch": 0.15, - "learning_rate": 4.7548810217712514e-05, - "loss": 4.7292, - "step": 136000 - }, - { - "epoch": 0.15, - "learning_rate": 4.753979849057175e-05, - "loss": 4.7221, - "step": 136500 - }, - { - "epoch": 0.15, - "learning_rate": 4.753078676343099e-05, - "loss": 4.7045, - "step": 137000 - }, - { - "epoch": 0.15, - "learning_rate": 4.752177503629023e-05, - "loss": 4.6832, - "step": 137500 - }, - { - "epoch": 0.15, - "learning_rate": 4.7512763309149464e-05, - "loss": 4.7221, - "step": 138000 - }, - { - "epoch": 0.15, - "learning_rate": 4.75037515820087e-05, - "loss": 4.6595, - "step": 138500 - }, - { - "epoch": 0.15, - "learning_rate": 4.749473985486794e-05, - "loss": 4.7322, - "step": 139000 - }, - { - "epoch": 0.15, - "learning_rate": 4.7485728127727176e-05, - "loss": 4.7332, - "step": 139500 - }, - { - "epoch": 0.15, - "learning_rate": 4.7476716400586414e-05, - "loss": 4.7665, - "step": 140000 - }, - { - "epoch": 0.15, - "learning_rate": 4.746770467344565e-05, - "loss": 4.6936, - "step": 140500 - }, - { - "epoch": 0.15, - "learning_rate": 4.745869294630489e-05, - "loss": 4.7322, - "step": 141000 - }, - { - "epoch": 0.15, - "learning_rate": 4.7449681219164126e-05, - "loss": 4.7406, - "step": 141500 - }, - { - "epoch": 0.15, - "learning_rate": 4.7440669492023356e-05, - "loss": 4.757, - "step": 142000 - }, - { - "epoch": 0.15, - "learning_rate": 4.74316577648826e-05, - "loss": 4.7518, - "step": 142500 - }, - { - "epoch": 0.15, - "learning_rate": 4.742264603774184e-05, - "loss": 4.6843, - "step": 143000 - }, - { - "epoch": 0.16, - "learning_rate": 4.7413634310601075e-05, - "loss": 4.6937, - "step": 143500 - }, - { - "epoch": 0.16, - "learning_rate": 4.740462258346031e-05, - "loss": 4.7167, - "step": 144000 - }, - { - "epoch": 0.16, - "learning_rate": 4.739561085631955e-05, - "loss": 4.7101, - "step": 144500 - }, - { - "epoch": 0.16, - "learning_rate": 4.738659912917878e-05, - "loss": 4.7401, - "step": 145000 - }, - { - "epoch": 0.16, - "learning_rate": 4.737758740203802e-05, - "loss": 4.7357, - "step": 145500 - }, - { - "epoch": 0.16, - "learning_rate": 4.7368575674897256e-05, - "loss": 4.7034, - "step": 146000 - }, - { - "epoch": 0.16, - "learning_rate": 4.73595639477565e-05, - "loss": 4.6982, - "step": 146500 - }, - { - "epoch": 0.16, - "learning_rate": 4.735055222061574e-05, - "loss": 4.729, - "step": 147000 - }, - { - "epoch": 0.16, - "learning_rate": 4.7341540493474975e-05, - "loss": 4.7402, - "step": 147500 - }, - { - "epoch": 0.16, - "learning_rate": 4.733252876633421e-05, - "loss": 4.7249, - "step": 148000 - }, - { - "epoch": 0.16, - "learning_rate": 4.732351703919344e-05, - "loss": 4.6795, - "step": 148500 - }, - { - "epoch": 0.16, - "learning_rate": 4.731450531205268e-05, - "loss": 4.7496, - "step": 149000 - }, - { - "epoch": 0.16, - "learning_rate": 4.730549358491192e-05, - "loss": 4.7258, - "step": 149500 - }, - { - "epoch": 0.16, - "learning_rate": 4.7296481857771155e-05, - "loss": 4.7273, - "step": 150000 - }, - { - "epoch": 0.16, - "learning_rate": 4.72874701306304e-05, - "loss": 4.6983, - "step": 150500 - }, - { - "epoch": 0.16, - "learning_rate": 4.7278458403489636e-05, - "loss": 4.6593, - "step": 151000 - }, - { - "epoch": 0.16, - "learning_rate": 4.726944667634887e-05, - "loss": 4.6931, - "step": 151500 - }, - { - "epoch": 0.16, - "learning_rate": 4.7260434949208104e-05, - "loss": 4.6728, - "step": 152000 - }, - { - "epoch": 0.16, - "learning_rate": 4.725142322206734e-05, - "loss": 4.6942, - "step": 152500 - }, - { - "epoch": 0.17, - "learning_rate": 4.724241149492658e-05, - "loss": 4.655, - "step": 153000 - }, - { - "epoch": 0.17, - "learning_rate": 4.723339976778582e-05, - "loss": 4.6958, - "step": 153500 - }, - { - "epoch": 0.17, - "learning_rate": 4.7224388040645054e-05, - "loss": 4.727, - "step": 154000 - }, - { - "epoch": 0.17, - "learning_rate": 4.721537631350429e-05, - "loss": 4.7039, - "step": 154500 - }, - { - "epoch": 0.17, - "learning_rate": 4.720636458636353e-05, - "loss": 4.6621, - "step": 155000 - }, - { - "epoch": 0.17, - "learning_rate": 4.7197352859222766e-05, - "loss": 4.7307, - "step": 155500 - }, - { - "epoch": 0.17, - "learning_rate": 4.7188341132082004e-05, - "loss": 4.6781, - "step": 156000 - }, - { - "epoch": 0.17, - "learning_rate": 4.717932940494124e-05, - "loss": 4.6862, - "step": 156500 - }, - { - "epoch": 0.17, - "learning_rate": 4.717031767780048e-05, - "loss": 4.6321, - "step": 157000 - }, - { - "epoch": 0.17, - "learning_rate": 4.716130595065971e-05, - "loss": 4.6918, - "step": 157500 - }, - { - "epoch": 0.17, - "learning_rate": 4.715229422351895e-05, - "loss": 4.7254, - "step": 158000 - }, - { - "epoch": 0.17, - "learning_rate": 4.714328249637819e-05, - "loss": 4.6808, - "step": 158500 - }, - { - "epoch": 0.17, - "learning_rate": 4.713427076923743e-05, - "loss": 4.6929, - "step": 159000 - }, - { - "epoch": 0.17, - "learning_rate": 4.7125259042096665e-05, - "loss": 4.6183, - "step": 159500 - }, - { - "epoch": 0.17, - "learning_rate": 4.71162473149559e-05, - "loss": 4.6005, - "step": 160000 - }, - { - "epoch": 0.17, - "learning_rate": 4.710723558781514e-05, - "loss": 4.7159, - "step": 160500 - }, - { - "epoch": 0.17, - "learning_rate": 4.709822386067437e-05, - "loss": 4.6412, - "step": 161000 - }, - { - "epoch": 0.17, - "learning_rate": 4.708921213353361e-05, - "loss": 4.6927, - "step": 161500 - }, - { - "epoch": 0.18, - "learning_rate": 4.708020040639285e-05, - "loss": 4.7037, - "step": 162000 - }, - { - "epoch": 0.18, - "learning_rate": 4.707118867925209e-05, - "loss": 4.7063, - "step": 162500 - }, - { - "epoch": 0.18, - "learning_rate": 4.706217695211133e-05, - "loss": 4.739, - "step": 163000 - }, - { - "epoch": 0.18, - "learning_rate": 4.7053165224970565e-05, - "loss": 4.6985, - "step": 163500 - }, - { - "epoch": 0.18, - "learning_rate": 4.7044153497829795e-05, - "loss": 4.6828, - "step": 164000 - }, - { - "epoch": 0.18, - "learning_rate": 4.703514177068903e-05, - "loss": 4.7187, - "step": 164500 - }, - { - "epoch": 0.18, - "learning_rate": 4.702613004354827e-05, - "loss": 4.7055, - "step": 165000 - }, - { - "epoch": 0.18, - "learning_rate": 4.701711831640751e-05, - "loss": 4.6414, - "step": 165500 - }, - { - "epoch": 0.18, - "learning_rate": 4.7008106589266745e-05, - "loss": 4.6793, - "step": 166000 - }, - { - "epoch": 0.18, - "learning_rate": 4.699909486212599e-05, - "loss": 4.7155, - "step": 166500 - }, - { - "epoch": 0.18, - "learning_rate": 4.699008313498522e-05, - "loss": 4.6599, - "step": 167000 - }, - { - "epoch": 0.18, - "learning_rate": 4.698107140784446e-05, - "loss": 4.6949, - "step": 167500 - }, - { - "epoch": 0.18, - "learning_rate": 4.6972059680703695e-05, - "loss": 4.6781, - "step": 168000 - }, - { - "epoch": 0.18, - "learning_rate": 4.696304795356293e-05, - "loss": 4.6621, - "step": 168500 - }, - { - "epoch": 0.18, - "learning_rate": 4.695403622642217e-05, - "loss": 4.675, - "step": 169000 - }, - { - "epoch": 0.18, - "learning_rate": 4.694502449928141e-05, - "loss": 4.6254, - "step": 169500 - }, - { - "epoch": 0.18, - "learning_rate": 4.6936012772140644e-05, - "loss": 4.7044, - "step": 170000 - }, - { - "epoch": 0.18, - "learning_rate": 4.692700104499988e-05, - "loss": 4.6353, - "step": 170500 - }, - { - "epoch": 0.18, - "learning_rate": 4.691798931785912e-05, - "loss": 4.6393, - "step": 171000 - }, - { - "epoch": 0.19, - "learning_rate": 4.6908977590718356e-05, - "loss": 4.6692, - "step": 171500 - }, - { - "epoch": 0.19, - "learning_rate": 4.6899965863577594e-05, - "loss": 4.6501, - "step": 172000 - }, - { - "epoch": 0.19, - "learning_rate": 4.689095413643683e-05, - "loss": 4.6289, - "step": 172500 - }, - { - "epoch": 0.19, - "learning_rate": 4.688194240929607e-05, - "loss": 4.6656, - "step": 173000 - }, - { - "epoch": 0.19, - "learning_rate": 4.6872930682155306e-05, - "loss": 4.6542, - "step": 173500 - }, - { - "epoch": 0.19, - "learning_rate": 4.686391895501454e-05, - "loss": 4.678, - "step": 174000 - }, - { - "epoch": 0.19, - "learning_rate": 4.685490722787378e-05, - "loss": 4.648, - "step": 174500 - }, - { - "epoch": 0.19, - "learning_rate": 4.684589550073302e-05, - "loss": 4.6518, - "step": 175000 - }, - { - "epoch": 0.19, - "learning_rate": 4.6836883773592256e-05, - "loss": 4.7169, - "step": 175500 - }, - { - "epoch": 0.19, - "learning_rate": 4.682787204645149e-05, - "loss": 4.6243, - "step": 176000 - }, - { - "epoch": 0.19, - "learning_rate": 4.6818860319310724e-05, - "loss": 4.6988, - "step": 176500 - }, - { - "epoch": 0.19, - "learning_rate": 4.680984859216996e-05, - "loss": 4.5944, - "step": 177000 - }, - { - "epoch": 0.19, - "learning_rate": 4.68008368650292e-05, - "loss": 4.7104, - "step": 177500 - }, - { - "epoch": 0.19, - "learning_rate": 4.679182513788844e-05, - "loss": 4.6633, - "step": 178000 - }, - { - "epoch": 0.19, - "learning_rate": 4.678281341074768e-05, - "loss": 4.6841, - "step": 178500 - }, - { - "epoch": 0.19, - "learning_rate": 4.677380168360692e-05, - "loss": 4.6535, - "step": 179000 - }, - { - "epoch": 0.19, - "learning_rate": 4.676478995646615e-05, - "loss": 4.7139, - "step": 179500 - }, - { - "epoch": 0.19, - "learning_rate": 4.6755778229325385e-05, - "loss": 4.6433, - "step": 180000 - }, - { - "epoch": 0.2, - "learning_rate": 4.674676650218462e-05, - "loss": 4.7148, - "step": 180500 - }, - { - "epoch": 0.2, - "learning_rate": 4.673775477504386e-05, - "loss": 4.6483, - "step": 181000 - }, - { - "epoch": 0.2, - "learning_rate": 4.67287430479031e-05, - "loss": 4.6044, - "step": 181500 - }, - { - "epoch": 0.2, - "learning_rate": 4.671973132076234e-05, - "loss": 4.6271, - "step": 182000 - }, - { - "epoch": 0.2, - "learning_rate": 4.671071959362157e-05, - "loss": 4.6416, - "step": 182500 - }, - { - "epoch": 0.2, - "learning_rate": 4.670170786648081e-05, - "loss": 4.6732, - "step": 183000 - }, - { - "epoch": 0.2, - "learning_rate": 4.669269613934005e-05, - "loss": 4.6461, - "step": 183500 - }, - { - "epoch": 0.2, - "learning_rate": 4.6683684412199285e-05, - "loss": 4.6583, - "step": 184000 - }, - { - "epoch": 0.2, - "learning_rate": 4.667467268505852e-05, - "loss": 4.6572, - "step": 184500 - }, - { - "epoch": 0.2, - "learning_rate": 4.666566095791776e-05, - "loss": 4.6394, - "step": 185000 - }, - { - "epoch": 0.2, - "learning_rate": 4.6656649230777e-05, - "loss": 4.676, - "step": 185500 - }, - { - "epoch": 0.2, - "learning_rate": 4.6647637503636234e-05, - "loss": 4.6573, - "step": 186000 - }, - { - "epoch": 0.2, - "learning_rate": 4.663862577649547e-05, - "loss": 4.6528, - "step": 186500 - }, - { - "epoch": 0.2, - "learning_rate": 4.662961404935471e-05, - "loss": 4.658, - "step": 187000 - }, - { - "epoch": 0.2, - "learning_rate": 4.6620602322213946e-05, - "loss": 4.6363, - "step": 187500 - }, - { - "epoch": 0.2, - "learning_rate": 4.6611590595073184e-05, - "loss": 4.6629, - "step": 188000 - }, - { - "epoch": 0.2, - "learning_rate": 4.660257886793242e-05, - "loss": 4.6319, - "step": 188500 - }, - { - "epoch": 0.2, - "learning_rate": 4.659356714079166e-05, - "loss": 4.6833, - "step": 189000 - }, - { - "epoch": 0.2, - "learning_rate": 4.6584555413650896e-05, - "loss": 4.586, - "step": 189500 - }, - { - "epoch": 0.21, - "learning_rate": 4.657554368651013e-05, - "loss": 4.6757, - "step": 190000 - }, - { - "epoch": 0.21, - "learning_rate": 4.656653195936937e-05, - "loss": 4.6509, - "step": 190500 - }, - { - "epoch": 0.21, - "learning_rate": 4.655752023222861e-05, - "loss": 4.6792, - "step": 191000 - }, - { - "epoch": 0.21, - "learning_rate": 4.6548508505087846e-05, - "loss": 4.6738, - "step": 191500 - }, - { - "epoch": 0.21, - "learning_rate": 4.6539496777947076e-05, - "loss": 4.6407, - "step": 192000 - }, - { - "epoch": 0.21, - "learning_rate": 4.6530485050806314e-05, - "loss": 4.6581, - "step": 192500 - }, - { - "epoch": 0.21, - "learning_rate": 4.652147332366555e-05, - "loss": 4.688, - "step": 193000 - }, - { - "epoch": 0.21, - "learning_rate": 4.6512461596524795e-05, - "loss": 4.6858, - "step": 193500 - }, - { - "epoch": 0.21, - "learning_rate": 4.650344986938403e-05, - "loss": 4.6618, - "step": 194000 - }, - { - "epoch": 0.21, - "learning_rate": 4.649443814224327e-05, - "loss": 4.6565, - "step": 194500 - }, - { - "epoch": 0.21, - "learning_rate": 4.64854264151025e-05, - "loss": 4.6477, - "step": 195000 - }, - { - "epoch": 0.21, - "learning_rate": 4.647641468796174e-05, - "loss": 4.6347, - "step": 195500 - }, - { - "epoch": 0.21, - "learning_rate": 4.6467402960820975e-05, - "loss": 4.6384, - "step": 196000 - }, - { - "epoch": 0.21, - "learning_rate": 4.645839123368021e-05, - "loss": 4.6041, - "step": 196500 - }, - { - "epoch": 0.21, - "learning_rate": 4.644937950653945e-05, - "loss": 4.6302, - "step": 197000 - }, - { - "epoch": 0.21, - "learning_rate": 4.6440367779398694e-05, - "loss": 4.582, - "step": 197500 - }, - { - "epoch": 0.21, - "learning_rate": 4.6431356052257925e-05, - "loss": 4.6465, - "step": 198000 - }, - { - "epoch": 0.21, - "learning_rate": 4.642234432511716e-05, - "loss": 4.6427, - "step": 198500 - }, - { - "epoch": 0.22, - "learning_rate": 4.64133325979764e-05, - "loss": 4.6421, - "step": 199000 - }, - { - "epoch": 0.22, - "learning_rate": 4.640432087083564e-05, - "loss": 4.6108, - "step": 199500 - }, - { - "epoch": 0.22, - "learning_rate": 4.6395309143694875e-05, - "loss": 4.6228, - "step": 200000 - }, - { - "epoch": 0.22, - "learning_rate": 4.638629741655411e-05, - "loss": 4.5645, - "step": 200500 - }, - { - "epoch": 0.22, - "learning_rate": 4.637728568941335e-05, - "loss": 4.5875, - "step": 201000 - }, - { - "epoch": 0.22, - "learning_rate": 4.636827396227259e-05, - "loss": 4.6283, - "step": 201500 - }, - { - "epoch": 0.22, - "learning_rate": 4.6359262235131824e-05, - "loss": 4.6218, - "step": 202000 - }, - { - "epoch": 0.22, - "learning_rate": 4.635025050799106e-05, - "loss": 4.6801, - "step": 202500 - }, - { - "epoch": 0.22, - "learning_rate": 4.63412387808503e-05, - "loss": 4.6695, - "step": 203000 - }, - { - "epoch": 0.22, - "learning_rate": 4.6332227053709536e-05, - "loss": 4.684, - "step": 203500 - }, - { - "epoch": 0.22, - "learning_rate": 4.6323215326568774e-05, - "loss": 4.5908, - "step": 204000 - }, - { - "epoch": 0.22, - "learning_rate": 4.6314203599428004e-05, - "loss": 4.6085, - "step": 204500 - }, - { - "epoch": 0.22, - "learning_rate": 4.630519187228725e-05, - "loss": 4.6316, - "step": 205000 - }, - { - "epoch": 0.22, - "learning_rate": 4.6296180145146486e-05, - "loss": 4.6607, - "step": 205500 - }, - { - "epoch": 0.22, - "learning_rate": 4.6287168418005723e-05, - "loss": 4.6351, - "step": 206000 - }, - { - "epoch": 0.22, - "learning_rate": 4.627815669086496e-05, - "loss": 4.6443, - "step": 206500 - }, - { - "epoch": 0.22, - "learning_rate": 4.62691449637242e-05, - "loss": 4.6842, - "step": 207000 - }, - { - "epoch": 0.22, - "learning_rate": 4.626013323658343e-05, - "loss": 4.6173, - "step": 207500 - }, - { - "epoch": 0.22, - "learning_rate": 4.6251121509442666e-05, - "loss": 4.593, - "step": 208000 - }, - { - "epoch": 0.23, - "learning_rate": 4.6242109782301904e-05, - "loss": 4.606, - "step": 208500 - }, - { - "epoch": 0.23, - "learning_rate": 4.623309805516115e-05, - "loss": 4.6188, - "step": 209000 - }, - { - "epoch": 0.23, - "learning_rate": 4.6224086328020385e-05, - "loss": 4.6307, - "step": 209500 - }, - { - "epoch": 0.23, - "learning_rate": 4.621507460087962e-05, - "loss": 4.5919, - "step": 210000 - }, - { - "epoch": 0.23, - "learning_rate": 4.620606287373885e-05, - "loss": 4.6507, - "step": 210500 - }, - { - "epoch": 0.23, - "learning_rate": 4.619705114659809e-05, - "loss": 4.6382, - "step": 211000 - }, - { - "epoch": 0.23, - "learning_rate": 4.618803941945733e-05, - "loss": 4.5784, - "step": 211500 - }, - { - "epoch": 0.23, - "learning_rate": 4.6179027692316565e-05, - "loss": 4.5813, - "step": 212000 - }, - { - "epoch": 0.23, - "learning_rate": 4.61700159651758e-05, - "loss": 4.6059, - "step": 212500 - }, - { - "epoch": 0.23, - "learning_rate": 4.616100423803505e-05, - "loss": 4.5996, - "step": 213000 - }, - { - "epoch": 0.23, - "learning_rate": 4.6151992510894284e-05, - "loss": 4.6524, - "step": 213500 - }, - { - "epoch": 0.23, - "learning_rate": 4.6142980783753515e-05, - "loss": 4.6452, - "step": 214000 - }, - { - "epoch": 0.23, - "learning_rate": 4.613396905661275e-05, - "loss": 4.6752, - "step": 214500 - }, - { - "epoch": 0.23, - "learning_rate": 4.612495732947199e-05, - "loss": 4.5912, - "step": 215000 - }, - { - "epoch": 0.23, - "learning_rate": 4.611594560233123e-05, - "loss": 4.6646, - "step": 215500 - }, - { - "epoch": 0.23, - "learning_rate": 4.6106933875190465e-05, - "loss": 4.6234, - "step": 216000 - }, - { - "epoch": 0.23, - "learning_rate": 4.60979221480497e-05, - "loss": 4.6457, - "step": 216500 - }, - { - "epoch": 0.23, - "learning_rate": 4.608891042090894e-05, - "loss": 4.6285, - "step": 217000 - }, - { - "epoch": 0.24, - "learning_rate": 4.607989869376818e-05, - "loss": 4.6047, - "step": 217500 - }, - { - "epoch": 0.24, - "learning_rate": 4.6070886966627414e-05, - "loss": 4.5877, - "step": 218000 - }, - { - "epoch": 0.24, - "learning_rate": 4.606187523948665e-05, - "loss": 4.6101, - "step": 218500 - }, - { - "epoch": 0.24, - "learning_rate": 4.605286351234589e-05, - "loss": 4.6867, - "step": 219000 - }, - { - "epoch": 0.24, - "learning_rate": 4.6043851785205126e-05, - "loss": 4.6508, - "step": 219500 - }, - { - "epoch": 0.24, - "learning_rate": 4.603484005806436e-05, - "loss": 4.6099, - "step": 220000 - }, - { - "epoch": 0.24, - "learning_rate": 4.60258283309236e-05, - "loss": 4.6508, - "step": 220500 - }, - { - "epoch": 0.24, - "learning_rate": 4.601681660378284e-05, - "loss": 4.6105, - "step": 221000 - }, - { - "epoch": 0.24, - "learning_rate": 4.6007804876642076e-05, - "loss": 4.6001, - "step": 221500 - }, - { - "epoch": 0.24, - "learning_rate": 4.5998793149501313e-05, - "loss": 4.6344, - "step": 222000 - }, - { - "epoch": 0.24, - "learning_rate": 4.598978142236055e-05, - "loss": 4.585, - "step": 222500 - }, - { - "epoch": 0.24, - "learning_rate": 4.598076969521978e-05, - "loss": 4.5558, - "step": 223000 - }, - { - "epoch": 0.24, - "learning_rate": 4.597175796807902e-05, - "loss": 4.5825, - "step": 223500 - }, - { - "epoch": 0.24, - "learning_rate": 4.5962746240938256e-05, - "loss": 4.5569, - "step": 224000 - }, - { - "epoch": 0.24, - "learning_rate": 4.59537345137975e-05, - "loss": 4.5647, - "step": 224500 - }, - { - "epoch": 0.24, - "learning_rate": 4.594472278665674e-05, - "loss": 4.5887, - "step": 225000 - }, - { - "epoch": 0.24, - "learning_rate": 4.5935711059515975e-05, - "loss": 4.5825, - "step": 225500 - }, - { - "epoch": 0.24, - "learning_rate": 4.5926699332375206e-05, - "loss": 4.5739, - "step": 226000 - }, - { - "epoch": 0.24, - "learning_rate": 4.591768760523444e-05, - "loss": 4.5726, - "step": 226500 - }, - { - "epoch": 0.25, - "learning_rate": 4.590867587809368e-05, - "loss": 4.6447, - "step": 227000 - }, - { - "epoch": 0.25, - "learning_rate": 4.589966415095292e-05, - "loss": 4.5851, - "step": 227500 - }, - { - "epoch": 0.25, - "learning_rate": 4.5890652423812155e-05, - "loss": 4.5571, - "step": 228000 - }, - { - "epoch": 0.25, - "learning_rate": 4.58816406966714e-05, - "loss": 4.5877, - "step": 228500 - }, - { - "epoch": 0.25, - "learning_rate": 4.587262896953064e-05, - "loss": 4.5896, - "step": 229000 - }, - { - "epoch": 0.25, - "learning_rate": 4.586361724238987e-05, - "loss": 4.591, - "step": 229500 - }, - { - "epoch": 0.25, - "learning_rate": 4.5854605515249105e-05, - "loss": 4.5587, - "step": 230000 - }, - { - "epoch": 0.25, - "learning_rate": 4.584559378810834e-05, - "loss": 4.5871, - "step": 230500 - }, - { - "epoch": 0.25, - "learning_rate": 4.583658206096758e-05, - "loss": 4.6129, - "step": 231000 - }, - { - "epoch": 0.25, - "learning_rate": 4.582757033382682e-05, - "loss": 4.5838, - "step": 231500 - }, - { - "epoch": 0.25, - "learning_rate": 4.5818558606686055e-05, - "loss": 4.6555, - "step": 232000 - }, - { - "epoch": 0.25, - "learning_rate": 4.580954687954529e-05, - "loss": 4.5784, - "step": 232500 - }, - { - "epoch": 0.25, - "learning_rate": 4.580053515240453e-05, - "loss": 4.5853, - "step": 233000 - }, - { - "epoch": 0.25, - "learning_rate": 4.579152342526377e-05, - "loss": 4.5536, - "step": 233500 - }, - { - "epoch": 0.25, - "learning_rate": 4.5782511698123004e-05, - "loss": 4.6067, - "step": 234000 - }, - { - "epoch": 0.25, - "learning_rate": 4.577349997098224e-05, - "loss": 4.6091, - "step": 234500 - }, - { - "epoch": 0.25, - "learning_rate": 4.576448824384148e-05, - "loss": 4.5912, - "step": 235000 - }, - { - "epoch": 0.25, - "learning_rate": 4.575547651670071e-05, - "loss": 4.5887, - "step": 235500 - }, - { - "epoch": 0.26, - "learning_rate": 4.5746464789559954e-05, - "loss": 4.5748, - "step": 236000 - }, - { - "epoch": 0.26, - "learning_rate": 4.573745306241919e-05, - "loss": 4.537, - "step": 236500 - }, - { - "epoch": 0.26, - "learning_rate": 4.572844133527843e-05, - "loss": 4.518, - "step": 237000 - }, - { - "epoch": 0.26, - "learning_rate": 4.5719429608137666e-05, - "loss": 4.5982, - "step": 237500 - }, - { - "epoch": 0.26, - "learning_rate": 4.5710417880996904e-05, - "loss": 4.5996, - "step": 238000 - }, - { - "epoch": 0.26, - "learning_rate": 4.5701406153856134e-05, - "loss": 4.6103, - "step": 238500 - }, - { - "epoch": 0.26, - "learning_rate": 4.569239442671537e-05, - "loss": 4.5725, - "step": 239000 - }, - { - "epoch": 0.26, - "learning_rate": 4.568338269957461e-05, - "loss": 4.6039, - "step": 239500 - }, - { - "epoch": 0.26, - "learning_rate": 4.567437097243385e-05, - "loss": 4.5271, - "step": 240000 - }, - { - "epoch": 0.26, - "learning_rate": 4.566535924529309e-05, - "loss": 4.6387, - "step": 240500 - }, - { - "epoch": 0.26, - "learning_rate": 4.565634751815233e-05, - "loss": 4.5238, - "step": 241000 - }, - { - "epoch": 0.26, - "learning_rate": 4.5647335791011565e-05, - "loss": 4.5608, - "step": 241500 - }, - { - "epoch": 0.26, - "learning_rate": 4.5638324063870796e-05, - "loss": 4.582, - "step": 242000 - }, - { - "epoch": 0.26, - "learning_rate": 4.562931233673003e-05, - "loss": 4.5491, - "step": 242500 - }, - { - "epoch": 0.26, - "learning_rate": 4.562030060958927e-05, - "loss": 4.5778, - "step": 243000 - }, - { - "epoch": 0.26, - "learning_rate": 4.561128888244851e-05, - "loss": 4.6373, - "step": 243500 - }, - { - "epoch": 0.26, - "learning_rate": 4.5602277155307746e-05, - "loss": 4.6209, - "step": 244000 - }, - { - "epoch": 0.26, - "learning_rate": 4.559326542816699e-05, - "loss": 4.5673, - "step": 244500 - }, - { - "epoch": 0.26, - "learning_rate": 4.558425370102622e-05, - "loss": 4.5685, - "step": 245000 - }, - { - "epoch": 0.27, - "learning_rate": 4.557524197388546e-05, - "loss": 4.5357, - "step": 245500 - }, - { - "epoch": 0.27, - "learning_rate": 4.5566230246744695e-05, - "loss": 4.576, - "step": 246000 - }, - { - "epoch": 0.27, - "learning_rate": 4.555721851960393e-05, - "loss": 4.602, - "step": 246500 - }, - { - "epoch": 0.27, - "learning_rate": 4.554820679246317e-05, - "loss": 4.4973, - "step": 247000 - }, - { - "epoch": 0.27, - "learning_rate": 4.553919506532241e-05, - "loss": 4.5782, - "step": 247500 - }, - { - "epoch": 0.27, - "learning_rate": 4.5530183338181645e-05, - "loss": 4.5825, - "step": 248000 - }, - { - "epoch": 0.27, - "learning_rate": 4.552117161104088e-05, - "loss": 4.5145, - "step": 248500 - }, - { - "epoch": 0.27, - "learning_rate": 4.551215988390012e-05, - "loss": 4.5698, - "step": 249000 - }, - { - "epoch": 0.27, - "learning_rate": 4.550314815675936e-05, - "loss": 4.5806, - "step": 249500 - }, - { - "epoch": 0.27, - "learning_rate": 4.5494136429618594e-05, - "loss": 4.5052, - "step": 250000 - }, - { - "epoch": 0.27, - "learning_rate": 4.548512470247783e-05, - "loss": 4.5976, - "step": 250500 - }, - { - "epoch": 0.27, - "learning_rate": 4.547611297533706e-05, - "loss": 4.5667, - "step": 251000 - }, - { - "epoch": 0.27, - "learning_rate": 4.5467101248196307e-05, - "loss": 4.5431, - "step": 251500 - }, - { - "epoch": 0.27, - "learning_rate": 4.5458089521055544e-05, - "loss": 4.5659, - "step": 252000 - }, - { - "epoch": 0.27, - "learning_rate": 4.544907779391478e-05, - "loss": 4.5484, - "step": 252500 - }, - { - "epoch": 0.27, - "learning_rate": 4.544006606677402e-05, - "loss": 4.5668, - "step": 253000 - }, - { - "epoch": 0.27, - "learning_rate": 4.5431054339633256e-05, - "loss": 4.5605, - "step": 253500 - }, - { - "epoch": 0.27, - "learning_rate": 4.5422042612492494e-05, - "loss": 4.5686, - "step": 254000 - }, - { - "epoch": 0.28, - "learning_rate": 4.5413030885351724e-05, - "loss": 4.5157, - "step": 254500 - }, - { - "epoch": 0.28, - "learning_rate": 4.540401915821096e-05, - "loss": 4.5193, - "step": 255000 - }, - { - "epoch": 0.28, - "learning_rate": 4.53950074310702e-05, - "loss": 4.5781, - "step": 255500 - }, - { - "epoch": 0.28, - "learning_rate": 4.538599570392944e-05, - "loss": 4.5719, - "step": 256000 - }, - { - "epoch": 0.28, - "learning_rate": 4.537698397678868e-05, - "loss": 4.5509, - "step": 256500 - }, - { - "epoch": 0.28, - "learning_rate": 4.536797224964792e-05, - "loss": 4.5617, - "step": 257000 - }, - { - "epoch": 0.28, - "learning_rate": 4.535896052250715e-05, - "loss": 4.5149, - "step": 257500 - }, - { - "epoch": 0.28, - "learning_rate": 4.5349948795366386e-05, - "loss": 4.5628, - "step": 258000 - }, - { - "epoch": 0.28, - "learning_rate": 4.534093706822562e-05, - "loss": 4.5809, - "step": 258500 - }, - { - "epoch": 0.28, - "learning_rate": 4.533192534108486e-05, - "loss": 4.525, - "step": 259000 - }, - { - "epoch": 0.28, - "learning_rate": 4.53229136139441e-05, - "loss": 4.5036, - "step": 259500 - }, - { - "epoch": 0.28, - "learning_rate": 4.531390188680334e-05, - "loss": 4.5399, - "step": 260000 - }, - { - "epoch": 0.28, - "learning_rate": 4.530489015966257e-05, - "loss": 4.5939, - "step": 260500 - }, - { - "epoch": 0.28, - "learning_rate": 4.529587843252181e-05, - "loss": 4.605, - "step": 261000 - }, - { - "epoch": 0.28, - "learning_rate": 4.528686670538105e-05, - "loss": 4.4758, - "step": 261500 - }, - { - "epoch": 0.28, - "learning_rate": 4.5277854978240285e-05, - "loss": 4.5757, - "step": 262000 - }, - { - "epoch": 0.28, - "learning_rate": 4.526884325109952e-05, - "loss": 4.5944, - "step": 262500 - }, - { - "epoch": 0.28, - "learning_rate": 4.525983152395876e-05, - "loss": 4.5485, - "step": 263000 - }, - { - "epoch": 0.28, - "learning_rate": 4.5250819796818e-05, - "loss": 4.6034, - "step": 263500 - }, - { - "epoch": 0.29, - "learning_rate": 4.5241808069677235e-05, - "loss": 4.5887, - "step": 264000 - }, - { - "epoch": 0.29, - "learning_rate": 4.523279634253647e-05, - "loss": 4.5265, - "step": 264500 - }, - { - "epoch": 0.29, - "learning_rate": 4.522378461539571e-05, - "loss": 4.5177, - "step": 265000 - }, - { - "epoch": 0.29, - "learning_rate": 4.521477288825495e-05, - "loss": 4.6046, - "step": 265500 - }, - { - "epoch": 0.29, - "learning_rate": 4.5205761161114184e-05, - "loss": 4.5481, - "step": 266000 - }, - { - "epoch": 0.29, - "learning_rate": 4.519674943397342e-05, - "loss": 4.5171, - "step": 266500 - }, - { - "epoch": 0.29, - "learning_rate": 4.518773770683265e-05, - "loss": 4.522, - "step": 267000 - }, - { - "epoch": 0.29, - "learning_rate": 4.5178725979691897e-05, - "loss": 4.5399, - "step": 267500 - }, - { - "epoch": 0.29, - "learning_rate": 4.5169714252551134e-05, - "loss": 4.5763, - "step": 268000 - }, - { - "epoch": 0.29, - "learning_rate": 4.516070252541037e-05, - "loss": 4.5583, - "step": 268500 - }, - { - "epoch": 0.29, - "learning_rate": 4.515169079826961e-05, - "loss": 4.569, - "step": 269000 - }, - { - "epoch": 0.29, - "learning_rate": 4.5142679071128846e-05, - "loss": 4.5812, - "step": 269500 - }, - { - "epoch": 0.29, - "learning_rate": 4.513366734398808e-05, - "loss": 4.5136, - "step": 270000 - }, - { - "epoch": 0.29, - "learning_rate": 4.5124655616847314e-05, - "loss": 4.525, - "step": 270500 - }, - { - "epoch": 0.29, - "learning_rate": 4.511564388970655e-05, - "loss": 4.4892, - "step": 271000 - }, - { - "epoch": 0.29, - "learning_rate": 4.5106632162565796e-05, - "loss": 4.4955, - "step": 271500 - }, - { - "epoch": 0.29, - "learning_rate": 4.509762043542503e-05, - "loss": 4.5783, - "step": 272000 - }, - { - "epoch": 0.29, - "learning_rate": 4.508860870828427e-05, - "loss": 4.5685, - "step": 272500 - }, - { - "epoch": 0.3, - "learning_rate": 4.50795969811435e-05, - "loss": 4.5577, - "step": 273000 - }, - { - "epoch": 0.3, - "learning_rate": 4.507058525400274e-05, - "loss": 4.6029, - "step": 273500 - }, - { - "epoch": 0.3, - "learning_rate": 4.5061573526861976e-05, - "loss": 4.5451, - "step": 274000 - }, - { - "epoch": 0.3, - "learning_rate": 4.5052561799721213e-05, - "loss": 4.5816, - "step": 274500 - }, - { - "epoch": 0.3, - "learning_rate": 4.504355007258045e-05, - "loss": 4.4644, - "step": 275000 - }, - { - "epoch": 0.3, - "learning_rate": 4.5034538345439695e-05, - "loss": 4.5545, - "step": 275500 - }, - { - "epoch": 0.3, - "learning_rate": 4.5025526618298926e-05, - "loss": 4.5686, - "step": 276000 - }, - { - "epoch": 0.3, - "learning_rate": 4.501651489115816e-05, - "loss": 4.598, - "step": 276500 - }, - { - "epoch": 0.3, - "learning_rate": 4.50075031640174e-05, - "loss": 4.5273, - "step": 277000 - }, - { - "epoch": 0.3, - "learning_rate": 4.499849143687664e-05, - "loss": 4.498, - "step": 277500 - }, - { - "epoch": 0.3, - "learning_rate": 4.4989479709735875e-05, - "loss": 4.5226, - "step": 278000 - }, - { - "epoch": 0.3, - "learning_rate": 4.498046798259511e-05, - "loss": 4.5453, - "step": 278500 - }, - { - "epoch": 0.3, - "learning_rate": 4.497145625545435e-05, - "loss": 4.5878, - "step": 279000 - }, - { - "epoch": 0.3, - "learning_rate": 4.496244452831359e-05, - "loss": 4.4889, - "step": 279500 - }, - { - "epoch": 0.3, - "learning_rate": 4.4953432801172825e-05, - "loss": 4.531, - "step": 280000 - }, - { - "epoch": 0.3, - "learning_rate": 4.494442107403206e-05, - "loss": 4.5446, - "step": 280500 - }, - { - "epoch": 0.3, - "learning_rate": 4.49354093468913e-05, - "loss": 4.5806, - "step": 281000 - }, - { - "epoch": 0.3, - "learning_rate": 4.492639761975054e-05, - "loss": 4.5838, - "step": 281500 - }, - { - "epoch": 0.3, - "learning_rate": 4.4917385892609774e-05, - "loss": 4.5335, - "step": 282000 - }, - { - "epoch": 0.31, - "learning_rate": 4.4908374165469005e-05, - "loss": 4.4475, - "step": 282500 - }, - { - "epoch": 0.31, - "learning_rate": 4.489936243832825e-05, - "loss": 4.4819, - "step": 283000 - }, - { - "epoch": 0.31, - "learning_rate": 4.489035071118749e-05, - "loss": 4.4793, - "step": 283500 - }, - { - "epoch": 0.31, - "learning_rate": 4.4881338984046724e-05, - "loss": 4.5127, - "step": 284000 - }, - { - "epoch": 0.31, - "learning_rate": 4.487232725690596e-05, - "loss": 4.5424, - "step": 284500 - }, - { - "epoch": 0.31, - "learning_rate": 4.48633155297652e-05, - "loss": 4.4961, - "step": 285000 - }, - { - "epoch": 0.31, - "learning_rate": 4.485430380262443e-05, - "loss": 4.5096, - "step": 285500 - }, - { - "epoch": 0.31, - "learning_rate": 4.484529207548367e-05, - "loss": 4.5307, - "step": 286000 - }, - { - "epoch": 0.31, - "learning_rate": 4.4836280348342904e-05, - "loss": 4.5627, - "step": 286500 - }, - { - "epoch": 0.31, - "learning_rate": 4.482726862120215e-05, - "loss": 4.526, - "step": 287000 - }, - { - "epoch": 0.31, - "learning_rate": 4.4818256894061386e-05, - "loss": 4.5265, - "step": 287500 - }, - { - "epoch": 0.31, - "learning_rate": 4.480924516692062e-05, - "loss": 4.5695, - "step": 288000 - }, - { - "epoch": 0.31, - "learning_rate": 4.4800233439779854e-05, - "loss": 4.5962, - "step": 288500 - }, - { - "epoch": 0.31, - "learning_rate": 4.479122171263909e-05, - "loss": 4.5317, - "step": 289000 - }, - { - "epoch": 0.31, - "learning_rate": 4.478220998549833e-05, - "loss": 4.5309, - "step": 289500 - }, - { - "epoch": 0.31, - "learning_rate": 4.4773198258357566e-05, - "loss": 4.5631, - "step": 290000 - }, - { - "epoch": 0.31, - "learning_rate": 4.4764186531216803e-05, - "loss": 4.5116, - "step": 290500 - }, - { - "epoch": 0.31, - "learning_rate": 4.475517480407605e-05, - "loss": 4.5155, - "step": 291000 - }, - { - "epoch": 0.32, - "learning_rate": 4.474616307693528e-05, - "loss": 4.5214, - "step": 291500 - }, - { - "epoch": 0.32, - "learning_rate": 4.4737151349794516e-05, - "loss": 4.5764, - "step": 292000 - }, - { - "epoch": 0.32, - "learning_rate": 4.472813962265375e-05, - "loss": 4.5458, - "step": 292500 - }, - { - "epoch": 0.32, - "learning_rate": 4.471912789551299e-05, - "loss": 4.5426, - "step": 293000 - }, - { - "epoch": 0.32, - "learning_rate": 4.471011616837223e-05, - "loss": 4.5371, - "step": 293500 - }, - { - "epoch": 0.32, - "learning_rate": 4.4701104441231465e-05, - "loss": 4.5577, - "step": 294000 - }, - { - "epoch": 0.32, - "learning_rate": 4.46920927140907e-05, - "loss": 4.4803, - "step": 294500 - }, - { - "epoch": 0.32, - "learning_rate": 4.468308098694994e-05, - "loss": 4.5597, - "step": 295000 - }, - { - "epoch": 0.32, - "learning_rate": 4.467406925980918e-05, - "loss": 4.5193, - "step": 295500 - }, - { - "epoch": 0.32, - "learning_rate": 4.4665057532668415e-05, - "loss": 4.4773, - "step": 296000 - }, - { - "epoch": 0.32, - "learning_rate": 4.465604580552765e-05, - "loss": 4.5625, - "step": 296500 - }, - { - "epoch": 0.32, - "learning_rate": 4.464703407838689e-05, - "loss": 4.5206, - "step": 297000 - }, - { - "epoch": 0.32, - "learning_rate": 4.463802235124613e-05, - "loss": 4.49, - "step": 297500 - }, - { - "epoch": 0.32, - "learning_rate": 4.462901062410536e-05, - "loss": 4.511, - "step": 298000 - }, - { - "epoch": 0.32, - "learning_rate": 4.46199988969646e-05, - "loss": 4.4423, - "step": 298500 - }, - { - "epoch": 0.32, - "learning_rate": 4.461098716982384e-05, - "loss": 4.5147, - "step": 299000 - }, - { - "epoch": 0.32, - "learning_rate": 4.460197544268308e-05, - "loss": 4.5474, - "step": 299500 - }, - { - "epoch": 0.32, - "learning_rate": 4.4592963715542314e-05, - "loss": 4.5493, - "step": 300000 - }, - { - "epoch": 0.32, - "learning_rate": 4.458395198840155e-05, - "loss": 4.49, - "step": 300500 - }, - { - "epoch": 0.33, - "learning_rate": 4.457494026126078e-05, - "loss": 4.5583, - "step": 301000 - }, - { - "epoch": 0.33, - "learning_rate": 4.456592853412002e-05, - "loss": 4.5288, - "step": 301500 - }, - { - "epoch": 0.33, - "learning_rate": 4.455691680697926e-05, - "loss": 4.5368, - "step": 302000 - }, - { - "epoch": 0.33, - "learning_rate": 4.45479050798385e-05, - "loss": 4.5171, - "step": 302500 - }, - { - "epoch": 0.33, - "learning_rate": 4.453889335269774e-05, - "loss": 4.4643, - "step": 303000 - }, - { - "epoch": 0.33, - "learning_rate": 4.4529881625556976e-05, - "loss": 4.4714, - "step": 303500 - }, - { - "epoch": 0.33, - "learning_rate": 4.4520869898416207e-05, - "loss": 4.5301, - "step": 304000 - }, - { - "epoch": 0.33, - "learning_rate": 4.4511858171275444e-05, - "loss": 4.4925, - "step": 304500 - }, - { - "epoch": 0.33, - "learning_rate": 4.450284644413468e-05, - "loss": 4.534, - "step": 305000 - }, - { - "epoch": 0.33, - "learning_rate": 4.449383471699392e-05, - "loss": 4.5594, - "step": 305500 - }, - { - "epoch": 0.33, - "learning_rate": 4.4484822989853156e-05, - "loss": 4.5079, - "step": 306000 - }, - { - "epoch": 0.33, - "learning_rate": 4.4475811262712394e-05, - "loss": 4.5475, - "step": 306500 - }, - { - "epoch": 0.33, - "learning_rate": 4.446679953557164e-05, - "loss": 4.4948, - "step": 307000 - }, - { - "epoch": 0.33, - "learning_rate": 4.445778780843087e-05, - "loss": 4.5257, - "step": 307500 - }, - { - "epoch": 0.33, - "learning_rate": 4.4448776081290106e-05, - "loss": 4.4913, - "step": 308000 - }, - { - "epoch": 0.33, - "learning_rate": 4.443976435414934e-05, - "loss": 4.5662, - "step": 308500 - }, - { - "epoch": 0.33, - "learning_rate": 4.443075262700858e-05, - "loss": 4.5267, - "step": 309000 - }, - { - "epoch": 0.33, - "learning_rate": 4.442174089986782e-05, - "loss": 4.4999, - "step": 309500 - }, - { - "epoch": 0.34, - "learning_rate": 4.4412729172727055e-05, - "loss": 4.5111, - "step": 310000 - }, - { - "epoch": 0.34, - "learning_rate": 4.440371744558629e-05, - "loss": 4.5107, - "step": 310500 - }, - { - "epoch": 0.34, - "learning_rate": 4.439470571844553e-05, - "loss": 4.5682, - "step": 311000 - }, - { - "epoch": 0.34, - "learning_rate": 4.438569399130477e-05, - "loss": 4.5016, - "step": 311500 - }, - { - "epoch": 0.34, - "learning_rate": 4.4376682264164005e-05, - "loss": 4.4973, - "step": 312000 - }, - { - "epoch": 0.34, - "learning_rate": 4.436767053702324e-05, - "loss": 4.5022, - "step": 312500 - }, - { - "epoch": 0.34, - "learning_rate": 4.435865880988248e-05, - "loss": 4.5441, - "step": 313000 - }, - { - "epoch": 0.34, - "learning_rate": 4.434964708274171e-05, - "loss": 4.5459, - "step": 313500 - }, - { - "epoch": 0.34, - "learning_rate": 4.4340635355600955e-05, - "loss": 4.5141, - "step": 314000 - }, - { - "epoch": 0.34, - "learning_rate": 4.433162362846019e-05, - "loss": 4.5329, - "step": 314500 - }, - { - "epoch": 0.34, - "learning_rate": 4.432261190131943e-05, - "loss": 4.5172, - "step": 315000 - }, - { - "epoch": 0.34, - "learning_rate": 4.431360017417867e-05, - "loss": 4.5264, - "step": 315500 - }, - { - "epoch": 0.34, - "learning_rate": 4.4304588447037904e-05, - "loss": 4.5352, - "step": 316000 - }, - { - "epoch": 0.34, - "learning_rate": 4.4295576719897135e-05, - "loss": 4.5287, - "step": 316500 - }, - { - "epoch": 0.34, - "learning_rate": 4.428656499275637e-05, - "loss": 4.5314, - "step": 317000 - }, - { - "epoch": 0.34, - "learning_rate": 4.427755326561561e-05, - "loss": 4.5219, - "step": 317500 - }, - { - "epoch": 0.34, - "learning_rate": 4.426854153847485e-05, - "loss": 4.5136, - "step": 318000 - }, - { - "epoch": 0.34, - "learning_rate": 4.425952981133409e-05, - "loss": 4.483, - "step": 318500 - }, - { - "epoch": 0.34, - "learning_rate": 4.425051808419333e-05, - "loss": 4.4569, - "step": 319000 - }, - { - "epoch": 0.35, - "learning_rate": 4.4241506357052566e-05, - "loss": 4.522, - "step": 319500 - }, - { - "epoch": 0.35, - "learning_rate": 4.4232494629911797e-05, - "loss": 4.4795, - "step": 320000 - }, - { - "epoch": 0.35, - "learning_rate": 4.4223482902771034e-05, - "loss": 4.5522, - "step": 320500 - }, - { - "epoch": 0.35, - "learning_rate": 4.421447117563027e-05, - "loss": 4.49, - "step": 321000 - }, - { - "epoch": 0.35, - "learning_rate": 4.420545944848951e-05, - "loss": 4.5291, - "step": 321500 - }, - { - "epoch": 0.35, - "learning_rate": 4.4196447721348746e-05, - "loss": 4.5003, - "step": 322000 - }, - { - "epoch": 0.35, - "learning_rate": 4.418743599420799e-05, - "loss": 4.4768, - "step": 322500 - }, - { - "epoch": 0.35, - "learning_rate": 4.417842426706722e-05, - "loss": 4.498, - "step": 323000 - }, - { - "epoch": 0.35, - "learning_rate": 4.416941253992646e-05, - "loss": 4.4942, - "step": 323500 - }, - { - "epoch": 0.35, - "learning_rate": 4.4160400812785696e-05, - "loss": 4.5224, - "step": 324000 - }, - { - "epoch": 0.35, - "learning_rate": 4.415138908564493e-05, - "loss": 4.4901, - "step": 324500 - }, - { - "epoch": 0.35, - "learning_rate": 4.414237735850417e-05, - "loss": 4.4871, - "step": 325000 - }, - { - "epoch": 0.35, - "learning_rate": 4.413336563136341e-05, - "loss": 4.4594, - "step": 325500 - }, - { - "epoch": 0.35, - "learning_rate": 4.4124353904222645e-05, - "loss": 4.515, - "step": 326000 - }, - { - "epoch": 0.35, - "learning_rate": 4.411534217708188e-05, - "loss": 4.4832, - "step": 326500 - }, - { - "epoch": 0.35, - "learning_rate": 4.410633044994112e-05, - "loss": 4.5075, - "step": 327000 - }, - { - "epoch": 0.35, - "learning_rate": 4.409731872280036e-05, - "loss": 4.4815, - "step": 327500 - }, - { - "epoch": 0.35, - "learning_rate": 4.4088306995659595e-05, - "loss": 4.5843, - "step": 328000 - }, - { - "epoch": 0.36, - "learning_rate": 4.407929526851883e-05, - "loss": 4.4808, - "step": 328500 - }, - { - "epoch": 0.36, - "learning_rate": 4.407028354137806e-05, - "loss": 4.5299, - "step": 329000 - }, - { - "epoch": 0.36, - "learning_rate": 4.40612718142373e-05, - "loss": 4.4625, - "step": 329500 - }, - { - "epoch": 0.36, - "learning_rate": 4.4052260087096545e-05, - "loss": 4.5026, - "step": 330000 - }, - { - "epoch": 0.36, - "learning_rate": 4.404324835995578e-05, - "loss": 4.4895, - "step": 330500 - }, - { - "epoch": 0.36, - "learning_rate": 4.403423663281502e-05, - "loss": 4.5502, - "step": 331000 - }, - { - "epoch": 0.36, - "learning_rate": 4.402522490567426e-05, - "loss": 4.4647, - "step": 331500 - }, - { - "epoch": 0.36, - "learning_rate": 4.401621317853349e-05, - "loss": 4.4907, - "step": 332000 - }, - { - "epoch": 0.36, - "learning_rate": 4.4007201451392725e-05, - "loss": 4.4623, - "step": 332500 - }, - { - "epoch": 0.36, - "learning_rate": 4.399818972425196e-05, - "loss": 4.5451, - "step": 333000 - }, - { - "epoch": 0.36, - "learning_rate": 4.39891779971112e-05, - "loss": 4.5007, - "step": 333500 - }, - { - "epoch": 0.36, - "learning_rate": 4.3980166269970444e-05, - "loss": 4.507, - "step": 334000 - }, - { - "epoch": 0.36, - "learning_rate": 4.397115454282968e-05, - "loss": 4.4595, - "step": 334500 - }, - { - "epoch": 0.36, - "learning_rate": 4.396214281568892e-05, - "loss": 4.5234, - "step": 335000 - }, - { - "epoch": 0.36, - "learning_rate": 4.395313108854815e-05, - "loss": 4.4674, - "step": 335500 - }, - { - "epoch": 0.36, - "learning_rate": 4.394411936140739e-05, - "loss": 4.4806, - "step": 336000 - }, - { - "epoch": 0.36, - "learning_rate": 4.3935107634266624e-05, - "loss": 4.4845, - "step": 336500 - }, - { - "epoch": 0.36, - "learning_rate": 4.392609590712586e-05, - "loss": 4.5202, - "step": 337000 - }, - { - "epoch": 0.36, - "learning_rate": 4.39170841799851e-05, - "loss": 4.5301, - "step": 337500 - }, - { - "epoch": 0.37, - "learning_rate": 4.390807245284434e-05, - "loss": 4.5139, - "step": 338000 - }, - { - "epoch": 0.37, - "learning_rate": 4.3899060725703574e-05, - "loss": 4.4715, - "step": 338500 - }, - { - "epoch": 0.37, - "learning_rate": 4.389004899856281e-05, - "loss": 4.4752, - "step": 339000 - }, - { - "epoch": 0.37, - "learning_rate": 4.388103727142205e-05, - "loss": 4.4945, - "step": 339500 - }, - { - "epoch": 0.37, - "learning_rate": 4.3872025544281286e-05, - "loss": 4.4648, - "step": 340000 - }, - { - "epoch": 0.37, - "learning_rate": 4.386301381714052e-05, - "loss": 4.5011, - "step": 340500 - }, - { - "epoch": 0.37, - "learning_rate": 4.385400208999976e-05, - "loss": 4.527, - "step": 341000 - }, - { - "epoch": 0.37, - "learning_rate": 4.3844990362859e-05, - "loss": 4.5182, - "step": 341500 - }, - { - "epoch": 0.37, - "learning_rate": 4.3835978635718235e-05, - "loss": 4.4455, - "step": 342000 - }, - { - "epoch": 0.37, - "learning_rate": 4.382696690857747e-05, - "loss": 4.4408, - "step": 342500 - }, - { - "epoch": 0.37, - "learning_rate": 4.381795518143671e-05, - "loss": 4.4726, - "step": 343000 - }, - { - "epoch": 0.37, - "learning_rate": 4.380894345429595e-05, - "loss": 4.4912, - "step": 343500 - }, - { - "epoch": 0.37, - "learning_rate": 4.3799931727155185e-05, - "loss": 4.4715, - "step": 344000 - }, - { - "epoch": 0.37, - "learning_rate": 4.3790920000014416e-05, - "loss": 4.4808, - "step": 344500 - }, - { - "epoch": 0.37, - "learning_rate": 4.378190827287365e-05, - "loss": 4.469, - "step": 345000 - }, - { - "epoch": 0.37, - "learning_rate": 4.37728965457329e-05, - "loss": 4.5089, - "step": 345500 - }, - { - "epoch": 0.37, - "learning_rate": 4.3763884818592135e-05, - "loss": 4.4609, - "step": 346000 - }, - { - "epoch": 0.37, - "learning_rate": 4.375487309145137e-05, - "loss": 4.5222, - "step": 346500 - }, - { - "epoch": 0.38, - "learning_rate": 4.374586136431061e-05, - "loss": 4.4459, - "step": 347000 - }, - { - "epoch": 0.38, - "learning_rate": 4.373684963716985e-05, - "loss": 4.5242, - "step": 347500 - }, - { - "epoch": 0.38, - "learning_rate": 4.372783791002908e-05, - "loss": 4.4754, - "step": 348000 - }, - { - "epoch": 0.38, - "learning_rate": 4.3718826182888315e-05, - "loss": 4.4723, - "step": 348500 - }, - { - "epoch": 0.38, - "learning_rate": 4.370981445574755e-05, - "loss": 4.4723, - "step": 349000 - }, - { - "epoch": 0.38, - "learning_rate": 4.3700802728606796e-05, - "loss": 4.489, - "step": 349500 - }, - { - "epoch": 0.38, - "learning_rate": 4.3691791001466034e-05, - "loss": 4.4956, - "step": 350000 - }, - { - "epoch": 0.38, - "learning_rate": 4.368277927432527e-05, - "loss": 4.5123, - "step": 350500 - }, - { - "epoch": 0.38, - "learning_rate": 4.36737675471845e-05, - "loss": 4.5453, - "step": 351000 - }, - { - "epoch": 0.38, - "learning_rate": 4.366475582004374e-05, - "loss": 4.4737, - "step": 351500 - }, - { - "epoch": 0.38, - "learning_rate": 4.365574409290298e-05, - "loss": 4.4692, - "step": 352000 - }, - { - "epoch": 0.38, - "learning_rate": 4.3646732365762214e-05, - "loss": 4.4465, - "step": 352500 - }, - { - "epoch": 0.38, - "learning_rate": 4.363772063862145e-05, - "loss": 4.4285, - "step": 353000 - }, - { - "epoch": 0.38, - "learning_rate": 4.3628708911480696e-05, - "loss": 4.5237, - "step": 353500 - }, - { - "epoch": 0.38, - "learning_rate": 4.3619697184339926e-05, - "loss": 4.5195, - "step": 354000 - }, - { - "epoch": 0.38, - "learning_rate": 4.3610685457199164e-05, - "loss": 4.492, - "step": 354500 - }, - { - "epoch": 0.38, - "learning_rate": 4.36016737300584e-05, - "loss": 4.485, - "step": 355000 - }, - { - "epoch": 0.38, - "learning_rate": 4.359266200291764e-05, - "loss": 4.4856, - "step": 355500 - }, - { - "epoch": 0.38, - "learning_rate": 4.3583650275776876e-05, - "loss": 4.5072, - "step": 356000 - }, - { - "epoch": 0.39, - "learning_rate": 4.357463854863611e-05, - "loss": 4.4757, - "step": 356500 - }, - { - "epoch": 0.39, - "learning_rate": 4.356562682149535e-05, - "loss": 4.4942, - "step": 357000 - }, - { - "epoch": 0.39, - "learning_rate": 4.355661509435459e-05, - "loss": 4.4687, - "step": 357500 - }, - { - "epoch": 0.39, - "learning_rate": 4.3547603367213825e-05, - "loss": 4.4391, - "step": 358000 - }, - { - "epoch": 0.39, - "learning_rate": 4.353859164007306e-05, - "loss": 4.4766, - "step": 358500 - }, - { - "epoch": 0.39, - "learning_rate": 4.35295799129323e-05, - "loss": 4.5417, - "step": 359000 - }, - { - "epoch": 0.39, - "learning_rate": 4.352056818579154e-05, - "loss": 4.4657, - "step": 359500 - }, - { - "epoch": 0.39, - "learning_rate": 4.3511556458650775e-05, - "loss": 4.5017, - "step": 360000 - }, - { - "epoch": 0.39, - "learning_rate": 4.3502544731510006e-05, - "loss": 4.4788, - "step": 360500 - }, - { - "epoch": 0.39, - "learning_rate": 4.349353300436925e-05, - "loss": 4.4984, - "step": 361000 - }, - { - "epoch": 0.39, - "learning_rate": 4.348452127722849e-05, - "loss": 4.543, - "step": 361500 - }, - { - "epoch": 0.39, - "learning_rate": 4.3475509550087725e-05, - "loss": 4.4832, - "step": 362000 - }, - { - "epoch": 0.39, - "learning_rate": 4.346649782294696e-05, - "loss": 4.5137, - "step": 362500 - }, - { - "epoch": 0.39, - "learning_rate": 4.34574860958062e-05, - "loss": 4.4928, - "step": 363000 - }, - { - "epoch": 0.39, - "learning_rate": 4.344847436866543e-05, - "loss": 4.4666, - "step": 363500 - }, - { - "epoch": 0.39, - "learning_rate": 4.343946264152467e-05, - "loss": 4.4647, - "step": 364000 - }, - { - "epoch": 0.39, - "learning_rate": 4.3430450914383905e-05, - "loss": 4.4378, - "step": 364500 - }, - { - "epoch": 0.39, - "learning_rate": 4.342143918724315e-05, - "loss": 4.4485, - "step": 365000 - }, - { - "epoch": 0.4, - "learning_rate": 4.3412427460102386e-05, - "loss": 4.4648, - "step": 365500 - }, - { - "epoch": 0.4, - "learning_rate": 4.3403415732961624e-05, - "loss": 4.4384, - "step": 366000 - }, - { - "epoch": 0.4, - "learning_rate": 4.3394404005820855e-05, - "loss": 4.4319, - "step": 366500 - }, - { - "epoch": 0.4, - "learning_rate": 4.338539227868009e-05, - "loss": 4.4791, - "step": 367000 - }, - { - "epoch": 0.4, - "learning_rate": 4.337638055153933e-05, - "loss": 4.5161, - "step": 367500 - }, - { - "epoch": 0.4, - "learning_rate": 4.336736882439857e-05, - "loss": 4.4157, - "step": 368000 - }, - { - "epoch": 0.4, - "learning_rate": 4.3358357097257804e-05, - "loss": 4.4717, - "step": 368500 - }, - { - "epoch": 0.4, - "learning_rate": 4.334934537011704e-05, - "loss": 4.479, - "step": 369000 - }, - { - "epoch": 0.4, - "learning_rate": 4.334033364297628e-05, - "loss": 4.4731, - "step": 369500 - }, - { - "epoch": 0.4, - "learning_rate": 4.3331321915835516e-05, - "loss": 4.4899, - "step": 370000 - }, - { - "epoch": 0.4, - "learning_rate": 4.3322310188694754e-05, - "loss": 4.502, - "step": 370500 - }, - { - "epoch": 0.4, - "learning_rate": 4.331329846155399e-05, - "loss": 4.4384, - "step": 371000 - }, - { - "epoch": 0.4, - "learning_rate": 4.330428673441323e-05, - "loss": 4.4643, - "step": 371500 - }, - { - "epoch": 0.4, - "learning_rate": 4.3295275007272466e-05, - "loss": 4.4491, - "step": 372000 - }, - { - "epoch": 0.4, - "learning_rate": 4.32862632801317e-05, - "loss": 4.409, - "step": 372500 - }, - { - "epoch": 0.4, - "learning_rate": 4.327725155299094e-05, - "loss": 4.4673, - "step": 373000 - }, - { - "epoch": 0.4, - "learning_rate": 4.326823982585018e-05, - "loss": 4.4603, - "step": 373500 - }, - { - "epoch": 0.4, - "learning_rate": 4.3259228098709416e-05, - "loss": 4.4743, - "step": 374000 - }, - { - "epoch": 0.4, - "learning_rate": 4.325021637156865e-05, - "loss": 4.4634, - "step": 374500 - }, - { - "epoch": 0.41, - "learning_rate": 4.324120464442789e-05, - "loss": 4.4576, - "step": 375000 - }, - { - "epoch": 0.41, - "learning_rate": 4.323219291728713e-05, - "loss": 4.484, - "step": 375500 - }, - { - "epoch": 0.41, - "learning_rate": 4.322318119014636e-05, - "loss": 4.431, - "step": 376000 - }, - { - "epoch": 0.41, - "learning_rate": 4.32141694630056e-05, - "loss": 4.5062, - "step": 376500 - }, - { - "epoch": 0.41, - "learning_rate": 4.320515773586484e-05, - "loss": 4.4464, - "step": 377000 - }, - { - "epoch": 0.41, - "learning_rate": 4.319614600872408e-05, - "loss": 4.4734, - "step": 377500 - }, - { - "epoch": 0.41, - "learning_rate": 4.3187134281583315e-05, - "loss": 4.4615, - "step": 378000 - }, - { - "epoch": 0.41, - "learning_rate": 4.317812255444255e-05, - "loss": 4.4707, - "step": 378500 - }, - { - "epoch": 0.41, - "learning_rate": 4.316911082730178e-05, - "loss": 4.5084, - "step": 379000 - }, - { - "epoch": 0.41, - "learning_rate": 4.316009910016102e-05, - "loss": 4.4335, - "step": 379500 - }, - { - "epoch": 0.41, - "learning_rate": 4.315108737302026e-05, - "loss": 4.5115, - "step": 380000 - }, - { - "epoch": 0.41, - "learning_rate": 4.31420756458795e-05, - "loss": 4.495, - "step": 380500 - }, - { - "epoch": 0.41, - "learning_rate": 4.313306391873874e-05, - "loss": 4.5261, - "step": 381000 - }, - { - "epoch": 0.41, - "learning_rate": 4.3124052191597977e-05, - "loss": 4.5579, - "step": 381500 - }, - { - "epoch": 0.41, - "learning_rate": 4.311504046445721e-05, - "loss": 4.4861, - "step": 382000 - }, - { - "epoch": 0.41, - "learning_rate": 4.3106028737316445e-05, - "loss": 4.4294, - "step": 382500 - }, - { - "epoch": 0.41, - "learning_rate": 4.309701701017568e-05, - "loss": 4.4975, - "step": 383000 - }, - { - "epoch": 0.41, - "learning_rate": 4.308800528303492e-05, - "loss": 4.4527, - "step": 383500 - }, - { - "epoch": 0.42, - "learning_rate": 4.307899355589416e-05, - "loss": 4.4521, - "step": 384000 - }, - { - "epoch": 0.42, - "learning_rate": 4.3069981828753394e-05, - "loss": 4.5024, - "step": 384500 - }, - { - "epoch": 0.42, - "learning_rate": 4.306097010161263e-05, - "loss": 4.4181, - "step": 385000 - }, - { - "epoch": 0.42, - "learning_rate": 4.305195837447187e-05, - "loss": 4.4786, - "step": 385500 - }, - { - "epoch": 0.42, - "learning_rate": 4.3042946647331106e-05, - "loss": 4.4245, - "step": 386000 - }, - { - "epoch": 0.42, - "learning_rate": 4.3033934920190344e-05, - "loss": 4.4878, - "step": 386500 - }, - { - "epoch": 0.42, - "learning_rate": 4.302492319304958e-05, - "loss": 4.4427, - "step": 387000 - }, - { - "epoch": 0.42, - "learning_rate": 4.301591146590882e-05, - "loss": 4.3975, - "step": 387500 - }, - { - "epoch": 0.42, - "learning_rate": 4.3006899738768056e-05, - "loss": 4.4566, - "step": 388000 - }, - { - "epoch": 0.42, - "learning_rate": 4.299788801162729e-05, - "loss": 4.4441, - "step": 388500 - }, - { - "epoch": 0.42, - "learning_rate": 4.298887628448653e-05, - "loss": 4.4606, - "step": 389000 - }, - { - "epoch": 0.42, - "learning_rate": 4.297986455734577e-05, - "loss": 4.5232, - "step": 389500 - }, - { - "epoch": 0.42, - "learning_rate": 4.2970852830205006e-05, - "loss": 4.464, - "step": 390000 - }, - { - "epoch": 0.42, - "learning_rate": 4.296184110306424e-05, - "loss": 4.3564, - "step": 390500 - }, - { - "epoch": 0.42, - "learning_rate": 4.295282937592348e-05, - "loss": 4.4523, - "step": 391000 - }, - { - "epoch": 0.42, - "learning_rate": 4.294381764878271e-05, - "loss": 4.4288, - "step": 391500 - }, - { - "epoch": 0.42, - "learning_rate": 4.2934805921641955e-05, - "loss": 4.3775, - "step": 392000 - }, - { - "epoch": 0.42, - "learning_rate": 4.292579419450119e-05, - "loss": 4.4579, - "step": 392500 - }, - { - "epoch": 0.42, - "learning_rate": 4.291678246736043e-05, - "loss": 4.4408, - "step": 393000 - }, - { - "epoch": 0.43, - "learning_rate": 4.290777074021967e-05, - "loss": 4.4396, - "step": 393500 - }, - { - "epoch": 0.43, - "learning_rate": 4.2898759013078905e-05, - "loss": 4.4438, - "step": 394000 - }, - { - "epoch": 0.43, - "learning_rate": 4.2889747285938135e-05, - "loss": 4.46, - "step": 394500 - }, - { - "epoch": 0.43, - "learning_rate": 4.288073555879737e-05, - "loss": 4.522, - "step": 395000 - }, - { - "epoch": 0.43, - "learning_rate": 4.287172383165661e-05, - "loss": 4.471, - "step": 395500 - }, - { - "epoch": 0.43, - "learning_rate": 4.286271210451585e-05, - "loss": 4.3984, - "step": 396000 - }, - { - "epoch": 0.43, - "learning_rate": 4.285370037737509e-05, - "loss": 4.4826, - "step": 396500 - }, - { - "epoch": 0.43, - "learning_rate": 4.284468865023433e-05, - "loss": 4.4343, - "step": 397000 - }, - { - "epoch": 0.43, - "learning_rate": 4.283567692309356e-05, - "loss": 4.4479, - "step": 397500 - }, - { - "epoch": 0.43, - "learning_rate": 4.28266651959528e-05, - "loss": 4.4768, - "step": 398000 - }, - { - "epoch": 0.43, - "learning_rate": 4.2817653468812035e-05, - "loss": 4.4508, - "step": 398500 - }, - { - "epoch": 0.43, - "learning_rate": 4.280864174167127e-05, - "loss": 4.4707, - "step": 399000 - }, - { - "epoch": 0.43, - "learning_rate": 4.279963001453051e-05, - "loss": 4.4679, - "step": 399500 - }, - { - "epoch": 0.43, - "learning_rate": 4.279061828738975e-05, - "loss": 4.3981, - "step": 400000 - }, - { - "epoch": 0.43, - "learning_rate": 4.278160656024899e-05, - "loss": 4.4731, - "step": 400500 - }, - { - "epoch": 0.43, - "learning_rate": 4.277259483310822e-05, - "loss": 4.4104, - "step": 401000 - }, - { - "epoch": 0.43, - "learning_rate": 4.276358310596746e-05, - "loss": 4.4197, - "step": 401500 - }, - { - "epoch": 0.43, - "learning_rate": 4.2754571378826696e-05, - "loss": 4.4555, - "step": 402000 - }, - { - "epoch": 0.44, - "learning_rate": 4.2745559651685934e-05, - "loss": 4.4885, - "step": 402500 - }, - { - "epoch": 0.44, - "learning_rate": 4.273654792454517e-05, - "loss": 4.3961, - "step": 403000 - }, - { - "epoch": 0.44, - "learning_rate": 4.272753619740441e-05, - "loss": 4.4982, - "step": 403500 - }, - { - "epoch": 0.44, - "learning_rate": 4.2718524470263646e-05, - "loss": 4.4241, - "step": 404000 - }, - { - "epoch": 0.44, - "learning_rate": 4.2709512743122883e-05, - "loss": 4.4929, - "step": 404500 - }, - { - "epoch": 0.44, - "learning_rate": 4.270050101598212e-05, - "loss": 4.438, - "step": 405000 - }, - { - "epoch": 0.44, - "learning_rate": 4.269148928884136e-05, - "loss": 4.4581, - "step": 405500 - }, - { - "epoch": 0.44, - "learning_rate": 4.2682477561700596e-05, - "loss": 4.4261, - "step": 406000 - }, - { - "epoch": 0.44, - "learning_rate": 4.267346583455983e-05, - "loss": 4.4314, - "step": 406500 - }, - { - "epoch": 0.44, - "learning_rate": 4.2664454107419064e-05, - "loss": 4.5199, - "step": 407000 - }, - { - "epoch": 0.44, - "learning_rate": 4.26554423802783e-05, - "loss": 4.3874, - "step": 407500 - }, - { - "epoch": 0.44, - "learning_rate": 4.2646430653137545e-05, - "loss": 4.4064, - "step": 408000 - }, - { - "epoch": 0.44, - "learning_rate": 4.263741892599678e-05, - "loss": 4.4102, - "step": 408500 - }, - { - "epoch": 0.44, - "learning_rate": 4.262840719885602e-05, - "loss": 4.4532, - "step": 409000 - }, - { - "epoch": 0.44, - "learning_rate": 4.261939547171526e-05, - "loss": 4.4605, - "step": 409500 - }, - { - "epoch": 0.44, - "learning_rate": 4.261038374457449e-05, - "loss": 4.4169, - "step": 410000 - }, - { - "epoch": 0.44, - "learning_rate": 4.2601372017433725e-05, - "loss": 4.4427, - "step": 410500 - }, - { - "epoch": 0.44, - "learning_rate": 4.259236029029296e-05, - "loss": 4.4733, - "step": 411000 - }, - { - "epoch": 0.44, - "learning_rate": 4.25833485631522e-05, - "loss": 4.5038, - "step": 411500 - }, - { - "epoch": 0.45, - "learning_rate": 4.2574336836011444e-05, - "loss": 4.4452, - "step": 412000 - }, - { - "epoch": 0.45, - "learning_rate": 4.256532510887068e-05, - "loss": 4.4282, - "step": 412500 - }, - { - "epoch": 0.45, - "learning_rate": 4.255631338172992e-05, - "loss": 4.4557, - "step": 413000 - }, - { - "epoch": 0.45, - "learning_rate": 4.254730165458915e-05, - "loss": 4.4631, - "step": 413500 - }, - { - "epoch": 0.45, - "learning_rate": 4.253828992744839e-05, - "loss": 4.4623, - "step": 414000 - }, - { - "epoch": 0.45, - "learning_rate": 4.2529278200307625e-05, - "loss": 4.4419, - "step": 414500 - }, - { - "epoch": 0.45, - "learning_rate": 4.252026647316686e-05, - "loss": 4.4337, - "step": 415000 - }, - { - "epoch": 0.45, - "learning_rate": 4.25112547460261e-05, - "loss": 4.4549, - "step": 415500 - }, - { - "epoch": 0.45, - "learning_rate": 4.2502243018885344e-05, - "loss": 4.4857, - "step": 416000 - }, - { - "epoch": 0.45, - "learning_rate": 4.2493231291744574e-05, - "loss": 4.4788, - "step": 416500 - }, - { - "epoch": 0.45, - "learning_rate": 4.248421956460381e-05, - "loss": 4.4158, - "step": 417000 - }, - { - "epoch": 0.45, - "learning_rate": 4.247520783746305e-05, - "loss": 4.4255, - "step": 417500 - }, - { - "epoch": 0.45, - "learning_rate": 4.2466196110322286e-05, - "loss": 4.5044, - "step": 418000 - }, - { - "epoch": 0.45, - "learning_rate": 4.2457184383181524e-05, - "loss": 4.352, - "step": 418500 - }, - { - "epoch": 0.45, - "learning_rate": 4.244817265604076e-05, - "loss": 4.4666, - "step": 419000 - }, - { - "epoch": 0.45, - "learning_rate": 4.24391609289e-05, - "loss": 4.4477, - "step": 419500 - }, - { - "epoch": 0.45, - "learning_rate": 4.2430149201759236e-05, - "loss": 4.4425, - "step": 420000 - }, - { - "epoch": 0.45, - "learning_rate": 4.2421137474618473e-05, - "loss": 4.5022, - "step": 420500 - }, - { - "epoch": 0.46, - "learning_rate": 4.241212574747771e-05, - "loss": 4.3832, - "step": 421000 - }, - { - "epoch": 0.46, - "learning_rate": 4.240311402033695e-05, - "loss": 4.4686, - "step": 421500 - }, - { - "epoch": 0.46, - "learning_rate": 4.2394102293196186e-05, - "loss": 4.4394, - "step": 422000 - }, - { - "epoch": 0.46, - "learning_rate": 4.2385090566055416e-05, - "loss": 4.4662, - "step": 422500 - }, - { - "epoch": 0.46, - "learning_rate": 4.2376078838914654e-05, - "loss": 4.4482, - "step": 423000 - }, - { - "epoch": 0.46, - "learning_rate": 4.23670671117739e-05, - "loss": 4.4238, - "step": 423500 - }, - { - "epoch": 0.46, - "learning_rate": 4.2358055384633135e-05, - "loss": 4.399, - "step": 424000 - }, - { - "epoch": 0.46, - "learning_rate": 4.234904365749237e-05, - "loss": 4.4646, - "step": 424500 - }, - { - "epoch": 0.46, - "learning_rate": 4.234003193035161e-05, - "loss": 4.4333, - "step": 425000 - }, - { - "epoch": 0.46, - "learning_rate": 4.233102020321084e-05, - "loss": 4.4222, - "step": 425500 - }, - { - "epoch": 0.46, - "learning_rate": 4.232200847607008e-05, - "loss": 4.4807, - "step": 426000 - }, - { - "epoch": 0.46, - "learning_rate": 4.2312996748929315e-05, - "loss": 4.4585, - "step": 426500 - }, - { - "epoch": 0.46, - "learning_rate": 4.230398502178855e-05, - "loss": 4.4629, - "step": 427000 - }, - { - "epoch": 0.46, - "learning_rate": 4.22949732946478e-05, - "loss": 4.3969, - "step": 427500 - }, - { - "epoch": 0.46, - "learning_rate": 4.2285961567507034e-05, - "loss": 4.4375, - "step": 428000 - }, - { - "epoch": 0.46, - "learning_rate": 4.227694984036627e-05, - "loss": 4.4462, - "step": 428500 - }, - { - "epoch": 0.46, - "learning_rate": 4.22679381132255e-05, - "loss": 4.3994, - "step": 429000 - }, - { - "epoch": 0.46, - "learning_rate": 4.225892638608474e-05, - "loss": 4.441, - "step": 429500 - }, - { - "epoch": 0.47, - "learning_rate": 4.224991465894398e-05, - "loss": 4.4581, - "step": 430000 - }, - { - "epoch": 0.47, - "learning_rate": 4.2240902931803215e-05, - "loss": 4.4271, - "step": 430500 - }, - { - "epoch": 0.47, - "learning_rate": 4.223189120466245e-05, - "loss": 4.4268, - "step": 431000 - }, - { - "epoch": 0.47, - "learning_rate": 4.2222879477521696e-05, - "loss": 4.4299, - "step": 431500 - }, - { - "epoch": 0.47, - "learning_rate": 4.221386775038093e-05, - "loss": 4.4034, - "step": 432000 - }, - { - "epoch": 0.47, - "learning_rate": 4.2204856023240164e-05, - "loss": 4.5251, - "step": 432500 - }, - { - "epoch": 0.47, - "learning_rate": 4.21958442960994e-05, - "loss": 4.4132, - "step": 433000 - }, - { - "epoch": 0.47, - "learning_rate": 4.218683256895864e-05, - "loss": 4.4342, - "step": 433500 - }, - { - "epoch": 0.47, - "learning_rate": 4.2177820841817877e-05, - "loss": 4.4125, - "step": 434000 - }, - { - "epoch": 0.47, - "learning_rate": 4.2168809114677114e-05, - "loss": 4.4599, - "step": 434500 - }, - { - "epoch": 0.47, - "learning_rate": 4.215979738753635e-05, - "loss": 4.3972, - "step": 435000 - }, - { - "epoch": 0.47, - "learning_rate": 4.215078566039559e-05, - "loss": 4.5031, - "step": 435500 - }, - { - "epoch": 0.47, - "learning_rate": 4.2141773933254826e-05, - "loss": 4.4313, - "step": 436000 - }, - { - "epoch": 0.47, - "learning_rate": 4.2132762206114064e-05, - "loss": 4.4108, - "step": 436500 - }, - { - "epoch": 0.47, - "learning_rate": 4.21237504789733e-05, - "loss": 4.4509, - "step": 437000 - }, - { - "epoch": 0.47, - "learning_rate": 4.211473875183254e-05, - "loss": 4.4684, - "step": 437500 - }, - { - "epoch": 0.47, - "learning_rate": 4.210572702469177e-05, - "loss": 4.4394, - "step": 438000 - }, - { - "epoch": 0.47, - "learning_rate": 4.2096715297551006e-05, - "loss": 4.3804, - "step": 438500 - }, - { - "epoch": 0.47, - "learning_rate": 4.208770357041025e-05, - "loss": 4.4641, - "step": 439000 - }, - { - "epoch": 0.48, - "learning_rate": 4.207869184326949e-05, - "loss": 4.3934, - "step": 439500 - }, - { - "epoch": 0.48, - "learning_rate": 4.2069680116128725e-05, - "loss": 4.3989, - "step": 440000 - }, - { - "epoch": 0.48, - "learning_rate": 4.206066838898796e-05, - "loss": 4.447, - "step": 440500 - }, - { - "epoch": 0.48, - "learning_rate": 4.20516566618472e-05, - "loss": 4.4046, - "step": 441000 - }, - { - "epoch": 0.48, - "learning_rate": 4.204264493470643e-05, - "loss": 4.4264, - "step": 441500 - }, - { - "epoch": 0.48, - "learning_rate": 4.203363320756567e-05, - "loss": 4.3891, - "step": 442000 - }, - { - "epoch": 0.48, - "learning_rate": 4.2024621480424906e-05, - "loss": 4.4143, - "step": 442500 - }, - { - "epoch": 0.48, - "learning_rate": 4.201560975328415e-05, - "loss": 4.4362, - "step": 443000 - }, - { - "epoch": 0.48, - "learning_rate": 4.200659802614339e-05, - "loss": 4.4681, - "step": 443500 - }, - { - "epoch": 0.48, - "learning_rate": 4.1997586299002625e-05, - "loss": 4.4628, - "step": 444000 - }, - { - "epoch": 0.48, - "learning_rate": 4.1988574571861855e-05, - "loss": 4.4444, - "step": 444500 - }, - { - "epoch": 0.48, - "learning_rate": 4.197956284472109e-05, - "loss": 4.3894, - "step": 445000 - }, - { - "epoch": 0.48, - "learning_rate": 4.197055111758033e-05, - "loss": 4.4775, - "step": 445500 - }, - { - "epoch": 0.48, - "learning_rate": 4.196153939043957e-05, - "loss": 4.3898, - "step": 446000 - }, - { - "epoch": 0.48, - "learning_rate": 4.1952527663298805e-05, - "loss": 4.4591, - "step": 446500 - }, - { - "epoch": 0.48, - "learning_rate": 4.194351593615804e-05, - "loss": 4.4336, - "step": 447000 - }, - { - "epoch": 0.48, - "learning_rate": 4.193450420901728e-05, - "loss": 4.4063, - "step": 447500 - }, - { - "epoch": 0.48, - "learning_rate": 4.192549248187652e-05, - "loss": 4.4326, - "step": 448000 - }, - { - "epoch": 0.49, - "learning_rate": 4.1916480754735754e-05, - "loss": 4.4418, - "step": 448500 - }, - { - "epoch": 0.49, - "learning_rate": 4.190746902759499e-05, - "loss": 4.4141, - "step": 449000 - }, - { - "epoch": 0.49, - "learning_rate": 4.189845730045423e-05, - "loss": 4.3698, - "step": 449500 - }, - { - "epoch": 0.49, - "learning_rate": 4.1889445573313467e-05, - "loss": 4.4296, - "step": 450000 - }, - { - "epoch": 0.49, - "learning_rate": 4.1880433846172704e-05, - "loss": 4.4399, - "step": 450500 - }, - { - "epoch": 0.49, - "learning_rate": 4.187142211903194e-05, - "loss": 4.4123, - "step": 451000 - }, - { - "epoch": 0.49, - "learning_rate": 4.186241039189118e-05, - "loss": 4.3735, - "step": 451500 - }, - { - "epoch": 0.49, - "learning_rate": 4.1853398664750416e-05, - "loss": 4.3984, - "step": 452000 - }, - { - "epoch": 0.49, - "learning_rate": 4.1844386937609654e-05, - "loss": 4.4167, - "step": 452500 - }, - { - "epoch": 0.49, - "learning_rate": 4.183537521046889e-05, - "loss": 4.3666, - "step": 453000 - }, - { - "epoch": 0.49, - "learning_rate": 4.182636348332813e-05, - "loss": 4.4422, - "step": 453500 - }, - { - "epoch": 0.49, - "learning_rate": 4.181735175618736e-05, - "loss": 4.3986, - "step": 454000 - }, - { - "epoch": 0.49, - "learning_rate": 4.18083400290466e-05, - "loss": 4.4333, - "step": 454500 - }, - { - "epoch": 0.49, - "learning_rate": 4.179932830190584e-05, - "loss": 4.4112, - "step": 455000 - }, - { - "epoch": 0.49, - "learning_rate": 4.179031657476508e-05, - "loss": 4.42, - "step": 455500 - }, - { - "epoch": 0.49, - "learning_rate": 4.1781304847624315e-05, - "loss": 4.4114, - "step": 456000 - }, - { - "epoch": 0.49, - "learning_rate": 4.177229312048355e-05, - "loss": 4.3842, - "step": 456500 - }, - { - "epoch": 0.49, - "learning_rate": 4.1763281393342783e-05, - "loss": 4.4399, - "step": 457000 - }, - { - "epoch": 0.49, - "learning_rate": 4.175426966620202e-05, - "loss": 4.4283, - "step": 457500 - }, - { - "epoch": 0.5, - "learning_rate": 4.174525793906126e-05, - "loss": 4.4357, - "step": 458000 - }, - { - "epoch": 0.5, - "learning_rate": 4.1736246211920496e-05, - "loss": 4.4383, - "step": 458500 - }, - { - "epoch": 0.5, - "learning_rate": 4.172723448477974e-05, - "loss": 4.4069, - "step": 459000 - }, - { - "epoch": 0.5, - "learning_rate": 4.171822275763898e-05, - "loss": 4.4093, - "step": 459500 - }, - { - "epoch": 0.5, - "learning_rate": 4.170921103049821e-05, - "loss": 4.4003, - "step": 460000 - }, - { - "epoch": 0.5, - "learning_rate": 4.1700199303357445e-05, - "loss": 4.3554, - "step": 460500 - }, - { - "epoch": 0.5, - "learning_rate": 4.169118757621668e-05, - "loss": 4.4233, - "step": 461000 - }, - { - "epoch": 0.5, - "learning_rate": 4.168217584907592e-05, - "loss": 4.4297, - "step": 461500 - }, - { - "epoch": 0.5, - "learning_rate": 4.167316412193516e-05, - "loss": 4.41, - "step": 462000 - }, - { - "epoch": 0.5, - "learning_rate": 4.1664152394794395e-05, - "loss": 4.4319, - "step": 462500 - }, - { - "epoch": 0.5, - "learning_rate": 4.165514066765363e-05, - "loss": 4.4113, - "step": 463000 - }, - { - "epoch": 0.5, - "learning_rate": 4.164612894051287e-05, - "loss": 4.4162, - "step": 463500 - }, - { - "epoch": 0.5, - "learning_rate": 4.163711721337211e-05, - "loss": 4.437, - "step": 464000 - }, - { - "epoch": 0.5, - "learning_rate": 4.1628105486231344e-05, - "loss": 4.4412, - "step": 464500 - }, - { - "epoch": 0.5, - "learning_rate": 4.161909375909058e-05, - "loss": 4.4154, - "step": 465000 - }, - { - "epoch": 0.5, - "learning_rate": 4.161008203194982e-05, - "loss": 4.4167, - "step": 465500 - }, - { - "epoch": 0.5, - "learning_rate": 4.1601070304809057e-05, - "loss": 4.4659, - "step": 466000 - }, - { - "epoch": 0.5, - "learning_rate": 4.1592058577668294e-05, - "loss": 4.4041, - "step": 466500 - }, - { - "epoch": 0.51, - "learning_rate": 4.158304685052753e-05, - "loss": 4.4115, - "step": 467000 - }, - { - "epoch": 0.51, - "learning_rate": 4.157403512338677e-05, - "loss": 4.4393, - "step": 467500 - }, - { - "epoch": 0.51, - "learning_rate": 4.1565023396246006e-05, - "loss": 4.3725, - "step": 468000 - }, - { - "epoch": 0.51, - "learning_rate": 4.1556011669105244e-05, - "loss": 4.4011, - "step": 468500 - }, - { - "epoch": 0.51, - "learning_rate": 4.154699994196448e-05, - "loss": 4.353, - "step": 469000 - }, - { - "epoch": 0.51, - "learning_rate": 4.153798821482371e-05, - "loss": 4.3823, - "step": 469500 - }, - { - "epoch": 0.51, - "learning_rate": 4.152897648768295e-05, - "loss": 4.4488, - "step": 470000 - }, - { - "epoch": 0.51, - "learning_rate": 4.151996476054219e-05, - "loss": 4.4014, - "step": 470500 - }, - { - "epoch": 0.51, - "learning_rate": 4.151095303340143e-05, - "loss": 4.3857, - "step": 471000 - }, - { - "epoch": 0.51, - "learning_rate": 4.150194130626067e-05, - "loss": 4.4427, - "step": 471500 - }, - { - "epoch": 0.51, - "learning_rate": 4.1492929579119905e-05, - "loss": 4.4374, - "step": 472000 - }, - { - "epoch": 0.51, - "learning_rate": 4.1483917851979136e-05, - "loss": 4.3678, - "step": 472500 - }, - { - "epoch": 0.51, - "learning_rate": 4.1474906124838373e-05, - "loss": 4.3926, - "step": 473000 - }, - { - "epoch": 0.51, - "learning_rate": 4.146589439769761e-05, - "loss": 4.426, - "step": 473500 - }, - { - "epoch": 0.51, - "learning_rate": 4.145688267055685e-05, - "loss": 4.4384, - "step": 474000 - }, - { - "epoch": 0.51, - "learning_rate": 4.144787094341609e-05, - "loss": 4.4226, - "step": 474500 - }, - { - "epoch": 0.51, - "learning_rate": 4.143885921627533e-05, - "loss": 4.3985, - "step": 475000 - }, - { - "epoch": 0.51, - "learning_rate": 4.142984748913456e-05, - "loss": 4.3802, - "step": 475500 - }, - { - "epoch": 0.51, - "learning_rate": 4.14208357619938e-05, - "loss": 4.4457, - "step": 476000 - }, - { - "epoch": 0.52, - "learning_rate": 4.1411824034853035e-05, - "loss": 4.4333, - "step": 476500 - }, - { - "epoch": 0.52, - "learning_rate": 4.140281230771227e-05, - "loss": 4.3906, - "step": 477000 - }, - { - "epoch": 0.52, - "learning_rate": 4.139380058057151e-05, - "loss": 4.3618, - "step": 477500 - }, - { - "epoch": 0.52, - "learning_rate": 4.138478885343075e-05, - "loss": 4.4389, - "step": 478000 - }, - { - "epoch": 0.52, - "learning_rate": 4.1375777126289985e-05, - "loss": 4.354, - "step": 478500 - }, - { - "epoch": 0.52, - "learning_rate": 4.136676539914922e-05, - "loss": 4.4335, - "step": 479000 - }, - { - "epoch": 0.52, - "learning_rate": 4.135775367200846e-05, - "loss": 4.405, - "step": 479500 - }, - { - "epoch": 0.52, - "learning_rate": 4.13487419448677e-05, - "loss": 4.4223, - "step": 480000 - }, - { - "epoch": 0.52, - "learning_rate": 4.1339730217726934e-05, - "loss": 4.4074, - "step": 480500 - }, - { - "epoch": 0.52, - "learning_rate": 4.133071849058617e-05, - "loss": 4.3557, - "step": 481000 - }, - { - "epoch": 0.52, - "learning_rate": 4.132170676344541e-05, - "loss": 4.4342, - "step": 481500 - }, - { - "epoch": 0.52, - "learning_rate": 4.131269503630465e-05, - "loss": 4.3986, - "step": 482000 - }, - { - "epoch": 0.52, - "learning_rate": 4.1303683309163884e-05, - "loss": 4.4292, - "step": 482500 - }, - { - "epoch": 0.52, - "learning_rate": 4.129467158202312e-05, - "loss": 4.4526, - "step": 483000 - }, - { - "epoch": 0.52, - "learning_rate": 4.128565985488236e-05, - "loss": 4.4217, - "step": 483500 - }, - { - "epoch": 0.52, - "learning_rate": 4.1276648127741596e-05, - "loss": 4.3949, - "step": 484000 - }, - { - "epoch": 0.52, - "learning_rate": 4.1267636400600834e-05, - "loss": 4.4406, - "step": 484500 - }, - { - "epoch": 0.52, - "learning_rate": 4.1258624673460064e-05, - "loss": 4.4383, - "step": 485000 - }, - { - "epoch": 0.53, - "learning_rate": 4.12496129463193e-05, - "loss": 4.4106, - "step": 485500 - }, - { - "epoch": 0.53, - "learning_rate": 4.1240601219178546e-05, - "loss": 4.382, - "step": 486000 - }, - { - "epoch": 0.53, - "learning_rate": 4.123158949203778e-05, - "loss": 4.3267, - "step": 486500 - }, - { - "epoch": 0.53, - "learning_rate": 4.122257776489702e-05, - "loss": 4.4221, - "step": 487000 - }, - { - "epoch": 0.53, - "learning_rate": 4.121356603775626e-05, - "loss": 4.3966, - "step": 487500 - }, - { - "epoch": 0.53, - "learning_rate": 4.120455431061549e-05, - "loss": 4.3824, - "step": 488000 - }, - { - "epoch": 0.53, - "learning_rate": 4.1195542583474726e-05, - "loss": 4.4346, - "step": 488500 - }, - { - "epoch": 0.53, - "learning_rate": 4.1186530856333963e-05, - "loss": 4.3681, - "step": 489000 - }, - { - "epoch": 0.53, - "learning_rate": 4.11775191291932e-05, - "loss": 4.405, - "step": 489500 - }, - { - "epoch": 0.53, - "learning_rate": 4.1168507402052445e-05, - "loss": 4.4267, - "step": 490000 - }, - { - "epoch": 0.53, - "learning_rate": 4.115949567491168e-05, - "loss": 4.4356, - "step": 490500 - }, - { - "epoch": 0.53, - "learning_rate": 4.115048394777091e-05, - "loss": 4.3915, - "step": 491000 - }, - { - "epoch": 0.53, - "learning_rate": 4.114147222063015e-05, - "loss": 4.4071, - "step": 491500 - }, - { - "epoch": 0.53, - "learning_rate": 4.113246049348939e-05, - "loss": 4.4558, - "step": 492000 - }, - { - "epoch": 0.53, - "learning_rate": 4.1123448766348625e-05, - "loss": 4.4161, - "step": 492500 - }, - { - "epoch": 0.53, - "learning_rate": 4.111443703920786e-05, - "loss": 4.4322, - "step": 493000 - }, - { - "epoch": 0.53, - "learning_rate": 4.11054253120671e-05, - "loss": 4.4165, - "step": 493500 - }, - { - "epoch": 0.53, - "learning_rate": 4.1096413584926344e-05, - "loss": 4.3936, - "step": 494000 - }, - { - "epoch": 0.53, - "learning_rate": 4.1087401857785575e-05, - "loss": 4.464, - "step": 494500 - }, - { - "epoch": 0.54, - "learning_rate": 4.107839013064481e-05, - "loss": 4.4453, - "step": 495000 - }, - { - "epoch": 0.54, - "learning_rate": 4.106937840350405e-05, - "loss": 4.4496, - "step": 495500 - }, - { - "epoch": 0.54, - "learning_rate": 4.106036667636329e-05, - "loss": 4.4243, - "step": 496000 - }, - { - "epoch": 0.54, - "learning_rate": 4.1051354949222524e-05, - "loss": 4.4202, - "step": 496500 - }, - { - "epoch": 0.54, - "learning_rate": 4.104234322208176e-05, - "loss": 4.4393, - "step": 497000 - }, - { - "epoch": 0.54, - "learning_rate": 4.1033331494941e-05, - "loss": 4.3986, - "step": 497500 - }, - { - "epoch": 0.54, - "learning_rate": 4.102431976780024e-05, - "loss": 4.3453, - "step": 498000 - }, - { - "epoch": 0.54, - "learning_rate": 4.1015308040659474e-05, - "loss": 4.4282, - "step": 498500 - }, - { - "epoch": 0.54, - "learning_rate": 4.100629631351871e-05, - "loss": 4.4063, - "step": 499000 - }, - { - "epoch": 0.54, - "learning_rate": 4.099728458637795e-05, - "loss": 4.4041, - "step": 499500 - }, - { - "epoch": 0.54, - "learning_rate": 4.0988272859237186e-05, - "loss": 4.4121, - "step": 500000 - }, - { - "epoch": 0.54, - "learning_rate": 4.097926113209642e-05, - "loss": 4.3535, - "step": 500500 - }, - { - "epoch": 0.54, - "learning_rate": 4.0970249404955654e-05, - "loss": 4.4579, - "step": 501000 - }, - { - "epoch": 0.54, - "learning_rate": 4.09612376778149e-05, - "loss": 4.4047, - "step": 501500 - }, - { - "epoch": 0.54, - "learning_rate": 4.0952225950674136e-05, - "loss": 4.3953, - "step": 502000 - }, - { - "epoch": 0.54, - "learning_rate": 4.094321422353337e-05, - "loss": 4.3709, - "step": 502500 - }, - { - "epoch": 0.54, - "learning_rate": 4.093420249639261e-05, - "loss": 4.4017, - "step": 503000 - }, - { - "epoch": 0.54, - "learning_rate": 4.092519076925184e-05, - "loss": 4.3861, - "step": 503500 - }, - { - "epoch": 0.55, - "learning_rate": 4.091617904211108e-05, - "loss": 4.4664, - "step": 504000 - }, - { - "epoch": 0.55, - "learning_rate": 4.0907167314970316e-05, - "loss": 4.4029, - "step": 504500 - }, - { - "epoch": 0.55, - "learning_rate": 4.0898155587829554e-05, - "loss": 4.386, - "step": 505000 - }, - { - "epoch": 0.55, - "learning_rate": 4.08891438606888e-05, - "loss": 4.3983, - "step": 505500 - }, - { - "epoch": 0.55, - "learning_rate": 4.0880132133548035e-05, - "loss": 4.3899, - "step": 506000 - }, - { - "epoch": 0.55, - "learning_rate": 4.087112040640727e-05, - "loss": 4.3988, - "step": 506500 - }, - { - "epoch": 0.55, - "learning_rate": 4.08621086792665e-05, - "loss": 4.3771, - "step": 507000 - }, - { - "epoch": 0.55, - "learning_rate": 4.085309695212574e-05, - "loss": 4.3443, - "step": 507500 - }, - { - "epoch": 0.55, - "learning_rate": 4.084408522498498e-05, - "loss": 4.3714, - "step": 508000 - }, - { - "epoch": 0.55, - "learning_rate": 4.0835073497844215e-05, - "loss": 4.3909, - "step": 508500 - }, - { - "epoch": 0.55, - "learning_rate": 4.082606177070345e-05, - "loss": 4.4214, - "step": 509000 - }, - { - "epoch": 0.55, - "learning_rate": 4.081705004356269e-05, - "loss": 4.4305, - "step": 509500 - }, - { - "epoch": 0.55, - "learning_rate": 4.080803831642193e-05, - "loss": 4.3784, - "step": 510000 - }, - { - "epoch": 0.55, - "learning_rate": 4.0799026589281165e-05, - "loss": 4.4198, - "step": 510500 - }, - { - "epoch": 0.55, - "learning_rate": 4.07900148621404e-05, - "loss": 4.3954, - "step": 511000 - }, - { - "epoch": 0.55, - "learning_rate": 4.078100313499964e-05, - "loss": 4.4075, - "step": 511500 - }, - { - "epoch": 0.55, - "learning_rate": 4.077199140785888e-05, - "loss": 4.4065, - "step": 512000 - }, - { - "epoch": 0.55, - "learning_rate": 4.0762979680718115e-05, - "loss": 4.4122, - "step": 512500 - }, - { - "epoch": 0.55, - "learning_rate": 4.075396795357735e-05, - "loss": 4.4, - "step": 513000 - }, - { - "epoch": 0.56, - "learning_rate": 4.074495622643659e-05, - "loss": 4.3722, - "step": 513500 - }, - { - "epoch": 0.56, - "learning_rate": 4.073594449929583e-05, - "loss": 4.3375, - "step": 514000 - }, - { - "epoch": 0.56, - "learning_rate": 4.0726932772155064e-05, - "loss": 4.3655, - "step": 514500 - }, - { - "epoch": 0.56, - "learning_rate": 4.07179210450143e-05, - "loss": 4.3714, - "step": 515000 - }, - { - "epoch": 0.56, - "learning_rate": 4.070890931787354e-05, - "loss": 4.4154, - "step": 515500 - }, - { - "epoch": 0.56, - "learning_rate": 4.069989759073277e-05, - "loss": 4.4121, - "step": 516000 - }, - { - "epoch": 0.56, - "learning_rate": 4.069088586359201e-05, - "loss": 4.4102, - "step": 516500 - }, - { - "epoch": 0.56, - "learning_rate": 4.068187413645125e-05, - "loss": 4.3882, - "step": 517000 - }, - { - "epoch": 0.56, - "learning_rate": 4.067286240931049e-05, - "loss": 4.4476, - "step": 517500 - }, - { - "epoch": 0.56, - "learning_rate": 4.0663850682169726e-05, - "loss": 4.3978, - "step": 518000 - }, - { - "epoch": 0.56, - "learning_rate": 4.065483895502896e-05, - "loss": 4.4405, - "step": 518500 - }, - { - "epoch": 0.56, - "learning_rate": 4.06458272278882e-05, - "loss": 4.3647, - "step": 519000 - }, - { - "epoch": 0.56, - "learning_rate": 4.063681550074743e-05, - "loss": 4.3729, - "step": 519500 - }, - { - "epoch": 0.56, - "learning_rate": 4.062780377360667e-05, - "loss": 4.4138, - "step": 520000 - }, - { - "epoch": 0.56, - "learning_rate": 4.0618792046465906e-05, - "loss": 4.3248, - "step": 520500 - }, - { - "epoch": 0.56, - "learning_rate": 4.0609780319325144e-05, - "loss": 4.422, - "step": 521000 - }, - { - "epoch": 0.56, - "learning_rate": 4.060076859218439e-05, - "loss": 4.3538, - "step": 521500 - }, - { - "epoch": 0.56, - "learning_rate": 4.0591756865043625e-05, - "loss": 4.4099, - "step": 522000 - }, - { - "epoch": 0.57, - "learning_rate": 4.0582745137902856e-05, - "loss": 4.4193, - "step": 522500 - }, - { - "epoch": 0.57, - "learning_rate": 4.057373341076209e-05, - "loss": 4.3988, - "step": 523000 - }, - { - "epoch": 0.57, - "learning_rate": 4.056472168362133e-05, - "loss": 4.4022, - "step": 523500 - }, - { - "epoch": 0.57, - "learning_rate": 4.055570995648057e-05, - "loss": 4.3413, - "step": 524000 - }, - { - "epoch": 0.57, - "learning_rate": 4.0546698229339805e-05, - "loss": 4.434, - "step": 524500 - }, - { - "epoch": 0.57, - "learning_rate": 4.053768650219904e-05, - "loss": 4.3744, - "step": 525000 - }, - { - "epoch": 0.57, - "learning_rate": 4.052867477505828e-05, - "loss": 4.418, - "step": 525500 - }, - { - "epoch": 0.57, - "learning_rate": 4.051966304791752e-05, - "loss": 4.3814, - "step": 526000 - }, - { - "epoch": 0.57, - "learning_rate": 4.0510651320776755e-05, - "loss": 4.3454, - "step": 526500 - }, - { - "epoch": 0.57, - "learning_rate": 4.050163959363599e-05, - "loss": 4.3251, - "step": 527000 - }, - { - "epoch": 0.57, - "learning_rate": 4.049262786649523e-05, - "loss": 4.4182, - "step": 527500 - }, - { - "epoch": 0.57, - "learning_rate": 4.048361613935447e-05, - "loss": 4.3319, - "step": 528000 - }, - { - "epoch": 0.57, - "learning_rate": 4.0474604412213705e-05, - "loss": 4.3861, - "step": 528500 - }, - { - "epoch": 0.57, - "learning_rate": 4.046559268507294e-05, - "loss": 4.4092, - "step": 529000 - }, - { - "epoch": 0.57, - "learning_rate": 4.045658095793218e-05, - "loss": 4.397, - "step": 529500 - }, - { - "epoch": 0.57, - "learning_rate": 4.044756923079142e-05, - "loss": 4.3839, - "step": 530000 - }, - { - "epoch": 0.57, - "learning_rate": 4.0438557503650654e-05, - "loss": 4.4383, - "step": 530500 - }, - { - "epoch": 0.57, - "learning_rate": 4.042954577650989e-05, - "loss": 4.4198, - "step": 531000 - }, - { - "epoch": 0.57, - "learning_rate": 4.042053404936912e-05, - "loss": 4.3632, - "step": 531500 - }, - { - "epoch": 0.58, - "learning_rate": 4.041152232222836e-05, - "loss": 4.3722, - "step": 532000 - }, - { - "epoch": 0.58, - "learning_rate": 4.04025105950876e-05, - "loss": 4.3664, - "step": 532500 - }, - { - "epoch": 0.58, - "learning_rate": 4.039349886794684e-05, - "loss": 4.357, - "step": 533000 - }, - { - "epoch": 0.58, - "learning_rate": 4.038448714080608e-05, - "loss": 4.3484, - "step": 533500 - }, - { - "epoch": 0.58, - "learning_rate": 4.0375475413665316e-05, - "loss": 4.4506, - "step": 534000 - }, - { - "epoch": 0.58, - "learning_rate": 4.0366463686524553e-05, - "loss": 4.37, - "step": 534500 - }, - { - "epoch": 0.58, - "learning_rate": 4.0357451959383784e-05, - "loss": 4.3452, - "step": 535000 - }, - { - "epoch": 0.58, - "learning_rate": 4.034844023224302e-05, - "loss": 4.4018, - "step": 535500 - }, - { - "epoch": 0.58, - "learning_rate": 4.033942850510226e-05, - "loss": 4.4079, - "step": 536000 - }, - { - "epoch": 0.58, - "learning_rate": 4.0330416777961496e-05, - "loss": 4.3569, - "step": 536500 - }, - { - "epoch": 0.58, - "learning_rate": 4.032140505082074e-05, - "loss": 4.3495, - "step": 537000 - }, - { - "epoch": 0.58, - "learning_rate": 4.031239332367998e-05, - "loss": 4.3752, - "step": 537500 - }, - { - "epoch": 0.58, - "learning_rate": 4.030338159653921e-05, - "loss": 4.3821, - "step": 538000 - }, - { - "epoch": 0.58, - "learning_rate": 4.0294369869398446e-05, - "loss": 4.431, - "step": 538500 - }, - { - "epoch": 0.58, - "learning_rate": 4.028535814225768e-05, - "loss": 4.3057, - "step": 539000 - }, - { - "epoch": 0.58, - "learning_rate": 4.027634641511692e-05, - "loss": 4.3249, - "step": 539500 - }, - { - "epoch": 0.58, - "learning_rate": 4.026733468797616e-05, - "loss": 4.3181, - "step": 540000 - }, - { - "epoch": 0.58, - "learning_rate": 4.0258322960835395e-05, - "loss": 4.3905, - "step": 540500 - }, - { - "epoch": 0.59, - "learning_rate": 4.024931123369463e-05, - "loss": 4.3406, - "step": 541000 - }, - { - "epoch": 0.59, - "learning_rate": 4.024029950655387e-05, - "loss": 4.3545, - "step": 541500 - }, - { - "epoch": 0.59, - "learning_rate": 4.023128777941311e-05, - "loss": 4.3554, - "step": 542000 - }, - { - "epoch": 0.59, - "learning_rate": 4.0222276052272345e-05, - "loss": 4.4182, - "step": 542500 - }, - { - "epoch": 0.59, - "learning_rate": 4.021326432513158e-05, - "loss": 4.4599, - "step": 543000 - }, - { - "epoch": 0.59, - "learning_rate": 4.020425259799082e-05, - "loss": 4.326, - "step": 543500 - }, - { - "epoch": 0.59, - "learning_rate": 4.019524087085006e-05, - "loss": 4.3247, - "step": 544000 - }, - { - "epoch": 0.59, - "learning_rate": 4.0186229143709295e-05, - "loss": 4.4027, - "step": 544500 - }, - { - "epoch": 0.59, - "learning_rate": 4.017721741656853e-05, - "loss": 4.315, - "step": 545000 - }, - { - "epoch": 0.59, - "learning_rate": 4.016820568942777e-05, - "loss": 4.3967, - "step": 545500 - }, - { - "epoch": 0.59, - "learning_rate": 4.015919396228701e-05, - "loss": 4.3808, - "step": 546000 - }, - { - "epoch": 0.59, - "learning_rate": 4.0150182235146244e-05, - "loss": 4.3609, - "step": 546500 - }, - { - "epoch": 0.59, - "learning_rate": 4.014117050800548e-05, - "loss": 4.3969, - "step": 547000 - }, - { - "epoch": 0.59, - "learning_rate": 4.013215878086471e-05, - "loss": 4.3735, - "step": 547500 - }, - { - "epoch": 0.59, - "learning_rate": 4.012314705372395e-05, - "loss": 4.3567, - "step": 548000 - }, - { - "epoch": 0.59, - "learning_rate": 4.0114135326583194e-05, - "loss": 4.3614, - "step": 548500 - }, - { - "epoch": 0.59, - "learning_rate": 4.010512359944243e-05, - "loss": 4.3611, - "step": 549000 - }, - { - "epoch": 0.59, - "learning_rate": 4.009611187230167e-05, - "loss": 4.376, - "step": 549500 - }, - { - "epoch": 0.59, - "learning_rate": 4.0087100145160906e-05, - "loss": 4.3128, - "step": 550000 - }, - { - "epoch": 0.6, - "learning_rate": 4.007808841802014e-05, - "loss": 4.3885, - "step": 550500 - }, - { - "epoch": 0.6, - "learning_rate": 4.0069076690879374e-05, - "loss": 4.3767, - "step": 551000 - }, - { - "epoch": 0.6, - "learning_rate": 4.006006496373861e-05, - "loss": 4.3457, - "step": 551500 - }, - { - "epoch": 0.6, - "learning_rate": 4.005105323659785e-05, - "loss": 4.462, - "step": 552000 - }, - { - "epoch": 0.6, - "learning_rate": 4.004204150945709e-05, - "loss": 4.3849, - "step": 552500 - }, - { - "epoch": 0.6, - "learning_rate": 4.003302978231633e-05, - "loss": 4.3644, - "step": 553000 - }, - { - "epoch": 0.6, - "learning_rate": 4.002401805517556e-05, - "loss": 4.3445, - "step": 553500 - }, - { - "epoch": 0.6, - "learning_rate": 4.00150063280348e-05, - "loss": 4.3573, - "step": 554000 - }, - { - "epoch": 0.6, - "learning_rate": 4.0005994600894036e-05, - "loss": 4.3702, - "step": 554500 - }, - { - "epoch": 0.6, - "learning_rate": 3.999698287375327e-05, - "loss": 4.335, - "step": 555000 - }, - { - "epoch": 0.6, - "learning_rate": 3.998797114661251e-05, - "loss": 4.3592, - "step": 555500 - }, - { - "epoch": 0.6, - "learning_rate": 3.997895941947175e-05, - "loss": 4.3702, - "step": 556000 - }, - { - "epoch": 0.6, - "learning_rate": 3.9969947692330985e-05, - "loss": 4.3976, - "step": 556500 - }, - { - "epoch": 0.6, - "learning_rate": 3.996093596519022e-05, - "loss": 4.3542, - "step": 557000 - }, - { - "epoch": 0.6, - "learning_rate": 3.995192423804946e-05, - "loss": 4.3243, - "step": 557500 - }, - { - "epoch": 0.6, - "learning_rate": 3.99429125109087e-05, - "loss": 4.3865, - "step": 558000 - }, - { - "epoch": 0.6, - "learning_rate": 3.9933900783767935e-05, - "loss": 4.3937, - "step": 558500 - }, - { - "epoch": 0.6, - "learning_rate": 3.992488905662717e-05, - "loss": 4.4588, - "step": 559000 - }, - { - "epoch": 0.61, - "learning_rate": 3.991587732948641e-05, - "loss": 4.3763, - "step": 559500 - }, - { - "epoch": 0.61, - "learning_rate": 3.990686560234565e-05, - "loss": 4.3972, - "step": 560000 } ], - "max_steps": 2774163, + "max_steps": 330531, "num_train_epochs": 3, - "total_flos": 3.658088448e+16, + "total_flos": 7185530880000000.0, "trial_name": null, "trial_params": null }