diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9541 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "global_step": 793047, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9968476017184356e-05, + "loss": 2.2908, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 4.993695203436871e-05, + "loss": 1.9499, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 4.990542805155306e-05, + "loss": 1.8515, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9873904068737416e-05, + "loss": 1.7764, + "step": 2000 + }, + { + "epoch": 0.01, + "learning_rate": 4.984238008592177e-05, + "loss": 1.7312, + "step": 2500 + }, + { + "epoch": 0.01, + "learning_rate": 4.981085610310612e-05, + "loss": 1.7093, + "step": 3000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9779332120290476e-05, + "loss": 1.6757, + "step": 3500 + }, + { + "epoch": 0.02, + "learning_rate": 4.974780813747483e-05, + "loss": 1.6496, + "step": 4000 + }, + { + "epoch": 0.02, + "learning_rate": 4.971628415465918e-05, + "loss": 1.6336, + "step": 4500 + }, + { + "epoch": 0.02, + "learning_rate": 4.968476017184354e-05, + "loss": 1.6159, + "step": 5000 + }, + { + "epoch": 0.02, + "learning_rate": 4.965323618902789e-05, + "loss": 1.6152, + "step": 5500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9621712206212244e-05, + "loss": 1.5905, + "step": 6000 + }, + { + "epoch": 0.02, + "learning_rate": 4.95901882233966e-05, + "loss": 1.5704, + "step": 6500 + }, + { + "epoch": 0.03, + "learning_rate": 4.955866424058096e-05, + "loss": 1.5667, + "step": 7000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9527140257765304e-05, + "loss": 1.5538, + "step": 7500 + }, + { + "epoch": 0.03, + "learning_rate": 4.949561627494966e-05, + "loss": 1.5581, + "step": 8000 + }, + { + "epoch": 0.03, + "learning_rate": 4.946409229213401e-05, + "loss": 1.5497, + "step": 8500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9432568309318364e-05, + "loss": 1.5373, + "step": 9000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9401044326502724e-05, + "loss": 1.5261, + "step": 9500 + }, + { + "epoch": 0.04, + "learning_rate": 4.936952034368707e-05, + "loss": 1.5222, + "step": 10000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9337996360871425e-05, + "loss": 1.5082, + "step": 10500 + }, + { + "epoch": 0.04, + "learning_rate": 4.930647237805578e-05, + "loss": 1.5114, + "step": 11000 + }, + { + "epoch": 0.04, + "learning_rate": 4.927494839524013e-05, + "loss": 1.5152, + "step": 11500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9243424412424485e-05, + "loss": 1.5012, + "step": 12000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9211900429608845e-05, + "loss": 1.4883, + "step": 12500 + }, + { + "epoch": 0.05, + "learning_rate": 4.918037644679319e-05, + "loss": 1.5023, + "step": 13000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9148852463977545e-05, + "loss": 1.4903, + "step": 13500 + }, + { + "epoch": 0.05, + "learning_rate": 4.91173284811619e-05, + "loss": 1.4818, + "step": 14000 + }, + { + "epoch": 0.05, + "learning_rate": 4.908580449834625e-05, + "loss": 1.4761, + "step": 14500 + }, + { + "epoch": 0.06, + "learning_rate": 4.905428051553061e-05, + "loss": 1.4685, + "step": 15000 + }, + { + "epoch": 0.06, + "learning_rate": 4.902275653271496e-05, + "loss": 1.4781, + "step": 15500 + }, + { + "epoch": 0.06, + "learning_rate": 4.899123254989931e-05, + "loss": 1.474, + "step": 16000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8959708567083666e-05, + "loss": 1.4616, + "step": 16500 + }, + { + "epoch": 0.06, + "learning_rate": 4.892818458426802e-05, + "loss": 1.4584, + "step": 17000 + }, + { + "epoch": 0.07, + "learning_rate": 4.889666060145238e-05, + "loss": 1.4573, + "step": 17500 + }, + { + "epoch": 0.07, + "learning_rate": 4.886513661863673e-05, + "loss": 1.4563, + "step": 18000 + }, + { + "epoch": 0.07, + "learning_rate": 4.883361263582108e-05, + "loss": 1.4433, + "step": 18500 + }, + { + "epoch": 0.07, + "learning_rate": 4.880208865300543e-05, + "loss": 1.4416, + "step": 19000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8770564670189786e-05, + "loss": 1.4432, + "step": 19500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8739040687374147e-05, + "loss": 1.4404, + "step": 20000 + }, + { + "epoch": 0.08, + "learning_rate": 4.87075167045585e-05, + "loss": 1.4457, + "step": 20500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8675992721742853e-05, + "loss": 1.4489, + "step": 21000 + }, + { + "epoch": 0.08, + "learning_rate": 4.86444687389272e-05, + "loss": 1.4411, + "step": 21500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8612944756111554e-05, + "loss": 1.4365, + "step": 22000 + }, + { + "epoch": 0.09, + "learning_rate": 4.858142077329591e-05, + "loss": 1.4291, + "step": 22500 + }, + { + "epoch": 0.09, + "learning_rate": 4.854989679048027e-05, + "loss": 1.4225, + "step": 23000 + }, + { + "epoch": 0.09, + "learning_rate": 4.851837280766462e-05, + "loss": 1.4206, + "step": 23500 + }, + { + "epoch": 0.09, + "learning_rate": 4.848684882484897e-05, + "loss": 1.4259, + "step": 24000 + }, + { + "epoch": 0.09, + "learning_rate": 4.845532484203332e-05, + "loss": 1.4254, + "step": 24500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8423800859217674e-05, + "loss": 1.4368, + "step": 25000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8392276876402034e-05, + "loss": 1.4213, + "step": 25500 + }, + { + "epoch": 0.1, + "learning_rate": 4.836075289358639e-05, + "loss": 1.4223, + "step": 26000 + }, + { + "epoch": 0.1, + "learning_rate": 4.832922891077074e-05, + "loss": 1.4233, + "step": 26500 + }, + { + "epoch": 0.1, + "learning_rate": 4.829770492795509e-05, + "loss": 1.4174, + "step": 27000 + }, + { + "epoch": 0.1, + "learning_rate": 4.826618094513944e-05, + "loss": 1.4244, + "step": 27500 + }, + { + "epoch": 0.11, + "learning_rate": 4.82346569623238e-05, + "loss": 1.4063, + "step": 28000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8203132979508155e-05, + "loss": 1.4058, + "step": 28500 + }, + { + "epoch": 0.11, + "learning_rate": 4.817160899669251e-05, + "loss": 1.4124, + "step": 29000 + }, + { + "epoch": 0.11, + "learning_rate": 4.814008501387686e-05, + "loss": 1.3993, + "step": 29500 + }, + { + "epoch": 0.11, + "learning_rate": 4.810856103106121e-05, + "loss": 1.4086, + "step": 30000 + }, + { + "epoch": 0.12, + "learning_rate": 4.807703704824557e-05, + "loss": 1.4062, + "step": 30500 + }, + { + "epoch": 0.12, + "learning_rate": 4.804551306542992e-05, + "loss": 1.3945, + "step": 31000 + }, + { + "epoch": 0.12, + "learning_rate": 4.8013989082614276e-05, + "loss": 1.4016, + "step": 31500 + }, + { + "epoch": 0.12, + "learning_rate": 4.798246509979863e-05, + "loss": 1.4041, + "step": 32000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7950941116982976e-05, + "loss": 1.4049, + "step": 32500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7919417134167336e-05, + "loss": 1.4018, + "step": 33000 + }, + { + "epoch": 0.13, + "learning_rate": 4.788789315135169e-05, + "loss": 1.3937, + "step": 33500 + }, + { + "epoch": 0.13, + "learning_rate": 4.785636916853604e-05, + "loss": 1.3996, + "step": 34000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7824845185720396e-05, + "loss": 1.3855, + "step": 34500 + }, + { + "epoch": 0.13, + "learning_rate": 4.779332120290475e-05, + "loss": 1.392, + "step": 35000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7761797220089096e-05, + "loss": 1.3924, + "step": 35500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7730273237273457e-05, + "loss": 1.3935, + "step": 36000 + }, + { + "epoch": 0.14, + "learning_rate": 4.769874925445781e-05, + "loss": 1.3912, + "step": 36500 + }, + { + "epoch": 0.14, + "learning_rate": 4.7667225271642163e-05, + "loss": 1.3808, + "step": 37000 + }, + { + "epoch": 0.14, + "learning_rate": 4.763570128882652e-05, + "loss": 1.3806, + "step": 37500 + }, + { + "epoch": 0.14, + "learning_rate": 4.760417730601087e-05, + "loss": 1.3829, + "step": 38000 + }, + { + "epoch": 0.15, + "learning_rate": 4.7572653323195224e-05, + "loss": 1.3838, + "step": 38500 + }, + { + "epoch": 0.15, + "learning_rate": 4.754112934037958e-05, + "loss": 1.3819, + "step": 39000 + }, + { + "epoch": 0.15, + "learning_rate": 4.750960535756393e-05, + "loss": 1.3824, + "step": 39500 + }, + { + "epoch": 0.15, + "learning_rate": 4.7478081374748284e-05, + "loss": 1.3787, + "step": 40000 + }, + { + "epoch": 0.15, + "learning_rate": 4.744655739193264e-05, + "loss": 1.3675, + "step": 40500 + }, + { + "epoch": 0.16, + "learning_rate": 4.741503340911699e-05, + "loss": 1.3719, + "step": 41000 + }, + { + "epoch": 0.16, + "learning_rate": 4.7383509426301344e-05, + "loss": 1.3732, + "step": 41500 + }, + { + "epoch": 0.16, + "learning_rate": 4.73519854434857e-05, + "loss": 1.3658, + "step": 42000 + }, + { + "epoch": 0.16, + "learning_rate": 4.732046146067005e-05, + "loss": 1.3841, + "step": 42500 + }, + { + "epoch": 0.16, + "learning_rate": 4.7288937477854405e-05, + "loss": 1.3722, + "step": 43000 + }, + { + "epoch": 0.16, + "learning_rate": 4.725741349503876e-05, + "loss": 1.3592, + "step": 43500 + }, + { + "epoch": 0.17, + "learning_rate": 4.722588951222311e-05, + "loss": 1.3792, + "step": 44000 + }, + { + "epoch": 0.17, + "learning_rate": 4.7194365529407465e-05, + "loss": 1.3712, + "step": 44500 + }, + { + "epoch": 0.17, + "learning_rate": 4.716284154659182e-05, + "loss": 1.374, + "step": 45000 + }, + { + "epoch": 0.17, + "learning_rate": 4.713131756377617e-05, + "loss": 1.3716, + "step": 45500 + }, + { + "epoch": 0.17, + "learning_rate": 4.7099793580960525e-05, + "loss": 1.367, + "step": 46000 + }, + { + "epoch": 0.18, + "learning_rate": 4.706826959814488e-05, + "loss": 1.364, + "step": 46500 + }, + { + "epoch": 0.18, + "learning_rate": 4.703674561532923e-05, + "loss": 1.3562, + "step": 47000 + }, + { + "epoch": 0.18, + "learning_rate": 4.7005221632513586e-05, + "loss": 1.3589, + "step": 47500 + }, + { + "epoch": 0.18, + "learning_rate": 4.697369764969794e-05, + "loss": 1.3608, + "step": 48000 + }, + { + "epoch": 0.18, + "learning_rate": 4.694217366688229e-05, + "loss": 1.3528, + "step": 48500 + }, + { + "epoch": 0.19, + "learning_rate": 4.6910649684066646e-05, + "loss": 1.3687, + "step": 49000 + }, + { + "epoch": 0.19, + "learning_rate": 4.6879125701251e-05, + "loss": 1.3676, + "step": 49500 + }, + { + "epoch": 0.19, + "learning_rate": 4.684760171843535e-05, + "loss": 1.3578, + "step": 50000 + }, + { + "epoch": 0.19, + "learning_rate": 4.6816077735619706e-05, + "loss": 1.3606, + "step": 50500 + }, + { + "epoch": 0.19, + "learning_rate": 4.678455375280406e-05, + "loss": 1.3575, + "step": 51000 + }, + { + "epoch": 0.19, + "learning_rate": 4.675302976998841e-05, + "loss": 1.3589, + "step": 51500 + }, + { + "epoch": 0.2, + "learning_rate": 4.672150578717277e-05, + "loss": 1.3624, + "step": 52000 + }, + { + "epoch": 0.2, + "learning_rate": 4.668998180435712e-05, + "loss": 1.353, + "step": 52500 + }, + { + "epoch": 0.2, + "learning_rate": 4.665845782154147e-05, + "loss": 1.3658, + "step": 53000 + }, + { + "epoch": 0.2, + "learning_rate": 4.662693383872583e-05, + "loss": 1.3542, + "step": 53500 + }, + { + "epoch": 0.2, + "learning_rate": 4.659540985591018e-05, + "loss": 1.3519, + "step": 54000 + }, + { + "epoch": 0.21, + "learning_rate": 4.656388587309454e-05, + "loss": 1.3473, + "step": 54500 + }, + { + "epoch": 0.21, + "learning_rate": 4.653236189027889e-05, + "loss": 1.3512, + "step": 55000 + }, + { + "epoch": 0.21, + "learning_rate": 4.650083790746324e-05, + "loss": 1.3439, + "step": 55500 + }, + { + "epoch": 0.21, + "learning_rate": 4.6469313924647594e-05, + "loss": 1.3446, + "step": 56000 + }, + { + "epoch": 0.21, + "learning_rate": 4.643778994183195e-05, + "loss": 1.3454, + "step": 56500 + }, + { + "epoch": 0.22, + "learning_rate": 4.64062659590163e-05, + "loss": 1.35, + "step": 57000 + }, + { + "epoch": 0.22, + "learning_rate": 4.637474197620066e-05, + "loss": 1.3424, + "step": 57500 + }, + { + "epoch": 0.22, + "learning_rate": 4.634321799338501e-05, + "loss": 1.3411, + "step": 58000 + }, + { + "epoch": 0.22, + "learning_rate": 4.631169401056936e-05, + "loss": 1.3423, + "step": 58500 + }, + { + "epoch": 0.22, + "learning_rate": 4.6280170027753715e-05, + "loss": 1.337, + "step": 59000 + }, + { + "epoch": 0.23, + "learning_rate": 4.624864604493807e-05, + "loss": 1.3488, + "step": 59500 + }, + { + "epoch": 0.23, + "learning_rate": 4.621712206212243e-05, + "loss": 1.3368, + "step": 60000 + }, + { + "epoch": 0.23, + "learning_rate": 4.618559807930678e-05, + "loss": 1.3392, + "step": 60500 + }, + { + "epoch": 0.23, + "learning_rate": 4.615407409649113e-05, + "loss": 1.3441, + "step": 61000 + }, + { + "epoch": 0.23, + "learning_rate": 4.612255011367548e-05, + "loss": 1.3338, + "step": 61500 + }, + { + "epoch": 0.23, + "learning_rate": 4.6091026130859835e-05, + "loss": 1.3369, + "step": 62000 + }, + { + "epoch": 0.24, + "learning_rate": 4.6059502148044195e-05, + "loss": 1.3319, + "step": 62500 + }, + { + "epoch": 0.24, + "learning_rate": 4.602797816522855e-05, + "loss": 1.3293, + "step": 63000 + }, + { + "epoch": 0.24, + "learning_rate": 4.5996454182412896e-05, + "loss": 1.3421, + "step": 63500 + }, + { + "epoch": 0.24, + "learning_rate": 4.596493019959725e-05, + "loss": 1.3262, + "step": 64000 + }, + { + "epoch": 0.24, + "learning_rate": 4.59334062167816e-05, + "loss": 1.3422, + "step": 64500 + }, + { + "epoch": 0.25, + "learning_rate": 4.590188223396596e-05, + "loss": 1.3312, + "step": 65000 + }, + { + "epoch": 0.25, + "learning_rate": 4.5870358251150316e-05, + "loss": 1.3497, + "step": 65500 + }, + { + "epoch": 0.25, + "learning_rate": 4.583883426833467e-05, + "loss": 1.3355, + "step": 66000 + }, + { + "epoch": 0.25, + "learning_rate": 4.5807310285519016e-05, + "loss": 1.3249, + "step": 66500 + }, + { + "epoch": 0.25, + "learning_rate": 4.577578630270337e-05, + "loss": 1.3319, + "step": 67000 + }, + { + "epoch": 0.26, + "learning_rate": 4.574426231988772e-05, + "loss": 1.326, + "step": 67500 + }, + { + "epoch": 0.26, + "learning_rate": 4.571273833707208e-05, + "loss": 1.3303, + "step": 68000 + }, + { + "epoch": 0.26, + "learning_rate": 4.568121435425644e-05, + "loss": 1.3296, + "step": 68500 + }, + { + "epoch": 0.26, + "learning_rate": 4.564969037144078e-05, + "loss": 1.3301, + "step": 69000 + }, + { + "epoch": 0.26, + "learning_rate": 4.561816638862514e-05, + "loss": 1.3399, + "step": 69500 + }, + { + "epoch": 0.26, + "learning_rate": 4.558664240580949e-05, + "loss": 1.3273, + "step": 70000 + }, + { + "epoch": 0.27, + "learning_rate": 4.555511842299385e-05, + "loss": 1.3289, + "step": 70500 + }, + { + "epoch": 0.27, + "learning_rate": 4.5523594440178204e-05, + "loss": 1.3256, + "step": 71000 + }, + { + "epoch": 0.27, + "learning_rate": 4.549207045736256e-05, + "loss": 1.3309, + "step": 71500 + }, + { + "epoch": 0.27, + "learning_rate": 4.5460546474546904e-05, + "loss": 1.3286, + "step": 72000 + }, + { + "epoch": 0.27, + "learning_rate": 4.542902249173126e-05, + "loss": 1.3211, + "step": 72500 + }, + { + "epoch": 0.28, + "learning_rate": 4.539749850891562e-05, + "loss": 1.3264, + "step": 73000 + }, + { + "epoch": 0.28, + "learning_rate": 4.536597452609997e-05, + "loss": 1.3232, + "step": 73500 + }, + { + "epoch": 0.28, + "learning_rate": 4.5334450543284324e-05, + "loss": 1.3264, + "step": 74000 + }, + { + "epoch": 0.28, + "learning_rate": 4.530292656046868e-05, + "loss": 1.3244, + "step": 74500 + }, + { + "epoch": 0.28, + "learning_rate": 4.5271402577653025e-05, + "loss": 1.3139, + "step": 75000 + }, + { + "epoch": 0.29, + "learning_rate": 4.5239878594837385e-05, + "loss": 1.3191, + "step": 75500 + }, + { + "epoch": 0.29, + "learning_rate": 4.520835461202174e-05, + "loss": 1.3205, + "step": 76000 + }, + { + "epoch": 0.29, + "learning_rate": 4.517683062920609e-05, + "loss": 1.3061, + "step": 76500 + }, + { + "epoch": 0.29, + "learning_rate": 4.5145306646390445e-05, + "loss": 1.3169, + "step": 77000 + }, + { + "epoch": 0.29, + "learning_rate": 4.511378266357479e-05, + "loss": 1.3283, + "step": 77500 + }, + { + "epoch": 0.3, + "learning_rate": 4.5082258680759145e-05, + "loss": 1.3166, + "step": 78000 + }, + { + "epoch": 0.3, + "learning_rate": 4.5050734697943505e-05, + "loss": 1.3215, + "step": 78500 + }, + { + "epoch": 0.3, + "learning_rate": 4.501921071512786e-05, + "loss": 1.327, + "step": 79000 + }, + { + "epoch": 0.3, + "learning_rate": 4.498768673231221e-05, + "loss": 1.3162, + "step": 79500 + }, + { + "epoch": 0.3, + "learning_rate": 4.4956162749496566e-05, + "loss": 1.3175, + "step": 80000 + }, + { + "epoch": 0.3, + "learning_rate": 4.492463876668091e-05, + "loss": 1.3113, + "step": 80500 + }, + { + "epoch": 0.31, + "learning_rate": 4.489311478386527e-05, + "loss": 1.3117, + "step": 81000 + }, + { + "epoch": 0.31, + "learning_rate": 4.4861590801049626e-05, + "loss": 1.3138, + "step": 81500 + }, + { + "epoch": 0.31, + "learning_rate": 4.483006681823398e-05, + "loss": 1.3192, + "step": 82000 + }, + { + "epoch": 0.31, + "learning_rate": 4.479854283541833e-05, + "loss": 1.3126, + "step": 82500 + }, + { + "epoch": 0.31, + "learning_rate": 4.4767018852602686e-05, + "loss": 1.3262, + "step": 83000 + }, + { + "epoch": 0.32, + "learning_rate": 4.473549486978704e-05, + "loss": 1.313, + "step": 83500 + }, + { + "epoch": 0.32, + "learning_rate": 4.470397088697139e-05, + "loss": 1.3137, + "step": 84000 + }, + { + "epoch": 0.32, + "learning_rate": 4.467244690415575e-05, + "loss": 1.3055, + "step": 84500 + }, + { + "epoch": 0.32, + "learning_rate": 4.46409229213401e-05, + "loss": 1.3179, + "step": 85000 + }, + { + "epoch": 0.32, + "learning_rate": 4.4609398938524453e-05, + "loss": 1.3109, + "step": 85500 + }, + { + "epoch": 0.33, + "learning_rate": 4.457787495570881e-05, + "loss": 1.3089, + "step": 86000 + }, + { + "epoch": 0.33, + "learning_rate": 4.454635097289316e-05, + "loss": 1.318, + "step": 86500 + }, + { + "epoch": 0.33, + "learning_rate": 4.4514826990077514e-05, + "loss": 1.3125, + "step": 87000 + }, + { + "epoch": 0.33, + "learning_rate": 4.448330300726187e-05, + "loss": 1.3045, + "step": 87500 + }, + { + "epoch": 0.33, + "learning_rate": 4.445177902444622e-05, + "loss": 1.3032, + "step": 88000 + }, + { + "epoch": 0.33, + "learning_rate": 4.4420255041630574e-05, + "loss": 1.3108, + "step": 88500 + }, + { + "epoch": 0.34, + "learning_rate": 4.438873105881493e-05, + "loss": 1.3126, + "step": 89000 + }, + { + "epoch": 0.34, + "learning_rate": 4.435720707599928e-05, + "loss": 1.3027, + "step": 89500 + }, + { + "epoch": 0.34, + "learning_rate": 4.4325683093183634e-05, + "loss": 1.3019, + "step": 90000 + }, + { + "epoch": 0.34, + "learning_rate": 4.429415911036799e-05, + "loss": 1.3121, + "step": 90500 + }, + { + "epoch": 0.34, + "learning_rate": 4.426263512755234e-05, + "loss": 1.3049, + "step": 91000 + }, + { + "epoch": 0.35, + "learning_rate": 4.4231111144736695e-05, + "loss": 1.3035, + "step": 91500 + }, + { + "epoch": 0.35, + "learning_rate": 4.419958716192105e-05, + "loss": 1.3013, + "step": 92000 + }, + { + "epoch": 0.35, + "learning_rate": 4.41680631791054e-05, + "loss": 1.3063, + "step": 92500 + }, + { + "epoch": 0.35, + "learning_rate": 4.4136539196289755e-05, + "loss": 1.3069, + "step": 93000 + }, + { + "epoch": 0.35, + "learning_rate": 4.410501521347411e-05, + "loss": 1.2973, + "step": 93500 + }, + { + "epoch": 0.36, + "learning_rate": 4.407349123065846e-05, + "loss": 1.3056, + "step": 94000 + }, + { + "epoch": 0.36, + "learning_rate": 4.4041967247842815e-05, + "loss": 1.3061, + "step": 94500 + }, + { + "epoch": 0.36, + "learning_rate": 4.401044326502717e-05, + "loss": 1.3061, + "step": 95000 + }, + { + "epoch": 0.36, + "learning_rate": 4.397891928221152e-05, + "loss": 1.2963, + "step": 95500 + }, + { + "epoch": 0.36, + "learning_rate": 4.3947395299395876e-05, + "loss": 1.2948, + "step": 96000 + }, + { + "epoch": 0.37, + "learning_rate": 4.391587131658023e-05, + "loss": 1.3008, + "step": 96500 + }, + { + "epoch": 0.37, + "learning_rate": 4.388434733376459e-05, + "loss": 1.2946, + "step": 97000 + }, + { + "epoch": 0.37, + "learning_rate": 4.3852823350948936e-05, + "loss": 1.2885, + "step": 97500 + }, + { + "epoch": 0.37, + "learning_rate": 4.382129936813329e-05, + "loss": 1.2894, + "step": 98000 + }, + { + "epoch": 0.37, + "learning_rate": 4.378977538531764e-05, + "loss": 1.2977, + "step": 98500 + }, + { + "epoch": 0.37, + "learning_rate": 4.3758251402501996e-05, + "loss": 1.2929, + "step": 99000 + }, + { + "epoch": 0.38, + "learning_rate": 4.372672741968635e-05, + "loss": 1.2893, + "step": 99500 + }, + { + "epoch": 0.38, + "learning_rate": 4.36952034368707e-05, + "loss": 1.2903, + "step": 100000 + }, + { + "epoch": 0.38, + "learning_rate": 4.3663679454055057e-05, + "loss": 1.294, + "step": 100500 + }, + { + "epoch": 0.38, + "learning_rate": 4.363215547123941e-05, + "loss": 1.296, + "step": 101000 + }, + { + "epoch": 0.38, + "learning_rate": 4.3600631488423763e-05, + "loss": 1.2844, + "step": 101500 + }, + { + "epoch": 0.39, + "learning_rate": 4.356910750560812e-05, + "loss": 1.2975, + "step": 102000 + }, + { + "epoch": 0.39, + "learning_rate": 4.353758352279248e-05, + "loss": 1.2952, + "step": 102500 + }, + { + "epoch": 0.39, + "learning_rate": 4.3506059539976824e-05, + "loss": 1.2894, + "step": 103000 + }, + { + "epoch": 0.39, + "learning_rate": 4.347453555716118e-05, + "loss": 1.2931, + "step": 103500 + }, + { + "epoch": 0.39, + "learning_rate": 4.344301157434553e-05, + "loss": 1.2907, + "step": 104000 + }, + { + "epoch": 0.4, + "learning_rate": 4.3411487591529884e-05, + "loss": 1.3006, + "step": 104500 + }, + { + "epoch": 0.4, + "learning_rate": 4.3379963608714244e-05, + "loss": 1.2961, + "step": 105000 + }, + { + "epoch": 0.4, + "learning_rate": 4.33484396258986e-05, + "loss": 1.2926, + "step": 105500 + }, + { + "epoch": 0.4, + "learning_rate": 4.3316915643082944e-05, + "loss": 1.2992, + "step": 106000 + }, + { + "epoch": 0.4, + "learning_rate": 4.32853916602673e-05, + "loss": 1.2856, + "step": 106500 + }, + { + "epoch": 0.4, + "learning_rate": 4.325386767745165e-05, + "loss": 1.2947, + "step": 107000 + }, + { + "epoch": 0.41, + "learning_rate": 4.322234369463601e-05, + "loss": 1.2989, + "step": 107500 + }, + { + "epoch": 0.41, + "learning_rate": 4.3190819711820365e-05, + "loss": 1.2981, + "step": 108000 + }, + { + "epoch": 0.41, + "learning_rate": 4.315929572900471e-05, + "loss": 1.284, + "step": 108500 + }, + { + "epoch": 0.41, + "learning_rate": 4.3127771746189065e-05, + "loss": 1.282, + "step": 109000 + }, + { + "epoch": 0.41, + "learning_rate": 4.309624776337342e-05, + "loss": 1.281, + "step": 109500 + }, + { + "epoch": 0.42, + "learning_rate": 4.306472378055777e-05, + "loss": 1.2902, + "step": 110000 + }, + { + "epoch": 0.42, + "learning_rate": 4.303319979774213e-05, + "loss": 1.2881, + "step": 110500 + }, + { + "epoch": 0.42, + "learning_rate": 4.3001675814926486e-05, + "loss": 1.2849, + "step": 111000 + }, + { + "epoch": 0.42, + "learning_rate": 4.297015183211083e-05, + "loss": 1.2867, + "step": 111500 + }, + { + "epoch": 0.42, + "learning_rate": 4.2938627849295186e-05, + "loss": 1.2795, + "step": 112000 + }, + { + "epoch": 0.43, + "learning_rate": 4.290710386647954e-05, + "loss": 1.2809, + "step": 112500 + }, + { + "epoch": 0.43, + "learning_rate": 4.28755798836639e-05, + "loss": 1.2786, + "step": 113000 + }, + { + "epoch": 0.43, + "learning_rate": 4.284405590084825e-05, + "loss": 1.2806, + "step": 113500 + }, + { + "epoch": 0.43, + "learning_rate": 4.2812531918032606e-05, + "loss": 1.2939, + "step": 114000 + }, + { + "epoch": 0.43, + "learning_rate": 4.278100793521695e-05, + "loss": 1.2797, + "step": 114500 + }, + { + "epoch": 0.44, + "learning_rate": 4.2749483952401306e-05, + "loss": 1.2836, + "step": 115000 + }, + { + "epoch": 0.44, + "learning_rate": 4.2717959969585666e-05, + "loss": 1.274, + "step": 115500 + }, + { + "epoch": 0.44, + "learning_rate": 4.268643598677002e-05, + "loss": 1.2819, + "step": 116000 + }, + { + "epoch": 0.44, + "learning_rate": 4.265491200395437e-05, + "loss": 1.2758, + "step": 116500 + }, + { + "epoch": 0.44, + "learning_rate": 4.262338802113872e-05, + "loss": 1.2819, + "step": 117000 + }, + { + "epoch": 0.44, + "learning_rate": 4.2591864038323073e-05, + "loss": 1.2888, + "step": 117500 + }, + { + "epoch": 0.45, + "learning_rate": 4.2560340055507434e-05, + "loss": 1.2723, + "step": 118000 + }, + { + "epoch": 0.45, + "learning_rate": 4.252881607269179e-05, + "loss": 1.2848, + "step": 118500 + }, + { + "epoch": 0.45, + "learning_rate": 4.249729208987614e-05, + "loss": 1.2675, + "step": 119000 + }, + { + "epoch": 0.45, + "learning_rate": 4.2465768107060494e-05, + "loss": 1.2842, + "step": 119500 + }, + { + "epoch": 0.45, + "learning_rate": 4.243424412424484e-05, + "loss": 1.2751, + "step": 120000 + }, + { + "epoch": 0.46, + "learning_rate": 4.24027201414292e-05, + "loss": 1.278, + "step": 120500 + }, + { + "epoch": 0.46, + "learning_rate": 4.2371196158613554e-05, + "loss": 1.2786, + "step": 121000 + }, + { + "epoch": 0.46, + "learning_rate": 4.233967217579791e-05, + "loss": 1.2816, + "step": 121500 + }, + { + "epoch": 0.46, + "learning_rate": 4.230814819298226e-05, + "loss": 1.2707, + "step": 122000 + }, + { + "epoch": 0.46, + "learning_rate": 4.227662421016661e-05, + "loss": 1.2715, + "step": 122500 + }, + { + "epoch": 0.47, + "learning_rate": 4.224510022735096e-05, + "loss": 1.2801, + "step": 123000 + }, + { + "epoch": 0.47, + "learning_rate": 4.221357624453532e-05, + "loss": 1.2818, + "step": 123500 + }, + { + "epoch": 0.47, + "learning_rate": 4.2182052261719675e-05, + "loss": 1.2813, + "step": 124000 + }, + { + "epoch": 0.47, + "learning_rate": 4.215052827890403e-05, + "loss": 1.2805, + "step": 124500 + }, + { + "epoch": 0.47, + "learning_rate": 4.211900429608838e-05, + "loss": 1.2766, + "step": 125000 + }, + { + "epoch": 0.47, + "learning_rate": 4.208748031327273e-05, + "loss": 1.2736, + "step": 125500 + }, + { + "epoch": 0.48, + "learning_rate": 4.205595633045709e-05, + "loss": 1.2794, + "step": 126000 + }, + { + "epoch": 0.48, + "learning_rate": 4.202443234764144e-05, + "loss": 1.2829, + "step": 126500 + }, + { + "epoch": 0.48, + "learning_rate": 4.1992908364825795e-05, + "loss": 1.274, + "step": 127000 + }, + { + "epoch": 0.48, + "learning_rate": 4.196138438201015e-05, + "loss": 1.2724, + "step": 127500 + }, + { + "epoch": 0.48, + "learning_rate": 4.19298603991945e-05, + "loss": 1.2762, + "step": 128000 + }, + { + "epoch": 0.49, + "learning_rate": 4.1898336416378856e-05, + "loss": 1.2685, + "step": 128500 + }, + { + "epoch": 0.49, + "learning_rate": 4.186681243356321e-05, + "loss": 1.2634, + "step": 129000 + }, + { + "epoch": 0.49, + "learning_rate": 4.183528845074756e-05, + "loss": 1.2662, + "step": 129500 + }, + { + "epoch": 0.49, + "learning_rate": 4.1803764467931916e-05, + "loss": 1.2812, + "step": 130000 + }, + { + "epoch": 0.49, + "learning_rate": 4.177224048511627e-05, + "loss": 1.2663, + "step": 130500 + }, + { + "epoch": 0.5, + "learning_rate": 4.174071650230062e-05, + "loss": 1.279, + "step": 131000 + }, + { + "epoch": 0.5, + "learning_rate": 4.1709192519484976e-05, + "loss": 1.2679, + "step": 131500 + }, + { + "epoch": 0.5, + "learning_rate": 4.167766853666933e-05, + "loss": 1.2692, + "step": 132000 + }, + { + "epoch": 0.5, + "learning_rate": 4.164614455385368e-05, + "loss": 1.2751, + "step": 132500 + }, + { + "epoch": 0.5, + "learning_rate": 4.161462057103804e-05, + "loss": 1.272, + "step": 133000 + }, + { + "epoch": 0.51, + "learning_rate": 4.158309658822239e-05, + "loss": 1.2676, + "step": 133500 + }, + { + "epoch": 0.51, + "learning_rate": 4.1551572605406744e-05, + "loss": 1.2581, + "step": 134000 + }, + { + "epoch": 0.51, + "learning_rate": 4.15200486225911e-05, + "loss": 1.2685, + "step": 134500 + }, + { + "epoch": 0.51, + "learning_rate": 4.148852463977545e-05, + "loss": 1.2698, + "step": 135000 + }, + { + "epoch": 0.51, + "learning_rate": 4.1457000656959804e-05, + "loss": 1.2633, + "step": 135500 + }, + { + "epoch": 0.51, + "learning_rate": 4.142547667414416e-05, + "loss": 1.267, + "step": 136000 + }, + { + "epoch": 0.52, + "learning_rate": 4.139395269132851e-05, + "loss": 1.2745, + "step": 136500 + }, + { + "epoch": 0.52, + "learning_rate": 4.1362428708512864e-05, + "loss": 1.2688, + "step": 137000 + }, + { + "epoch": 0.52, + "learning_rate": 4.133090472569722e-05, + "loss": 1.2691, + "step": 137500 + }, + { + "epoch": 0.52, + "learning_rate": 4.129938074288157e-05, + "loss": 1.2614, + "step": 138000 + }, + { + "epoch": 0.52, + "learning_rate": 4.1267856760065924e-05, + "loss": 1.2656, + "step": 138500 + }, + { + "epoch": 0.53, + "learning_rate": 4.123633277725028e-05, + "loss": 1.2741, + "step": 139000 + }, + { + "epoch": 0.53, + "learning_rate": 4.120480879443463e-05, + "loss": 1.2673, + "step": 139500 + }, + { + "epoch": 0.53, + "learning_rate": 4.1173284811618985e-05, + "loss": 1.2641, + "step": 140000 + }, + { + "epoch": 0.53, + "learning_rate": 4.114176082880334e-05, + "loss": 1.2654, + "step": 140500 + }, + { + "epoch": 0.53, + "learning_rate": 4.111023684598769e-05, + "loss": 1.2634, + "step": 141000 + }, + { + "epoch": 0.54, + "learning_rate": 4.1078712863172045e-05, + "loss": 1.2698, + "step": 141500 + }, + { + "epoch": 0.54, + "learning_rate": 4.1047188880356405e-05, + "loss": 1.2551, + "step": 142000 + }, + { + "epoch": 0.54, + "learning_rate": 4.101566489754075e-05, + "loss": 1.2571, + "step": 142500 + }, + { + "epoch": 0.54, + "learning_rate": 4.0984140914725105e-05, + "loss": 1.2685, + "step": 143000 + }, + { + "epoch": 0.54, + "learning_rate": 4.095261693190946e-05, + "loss": 1.2608, + "step": 143500 + }, + { + "epoch": 0.54, + "learning_rate": 4.092109294909381e-05, + "loss": 1.2628, + "step": 144000 + }, + { + "epoch": 0.55, + "learning_rate": 4.0889568966278166e-05, + "loss": 1.2666, + "step": 144500 + }, + { + "epoch": 0.55, + "learning_rate": 4.085804498346252e-05, + "loss": 1.2568, + "step": 145000 + }, + { + "epoch": 0.55, + "learning_rate": 4.082652100064687e-05, + "loss": 1.2561, + "step": 145500 + }, + { + "epoch": 0.55, + "learning_rate": 4.0794997017831226e-05, + "loss": 1.2623, + "step": 146000 + }, + { + "epoch": 0.55, + "learning_rate": 4.076347303501558e-05, + "loss": 1.2662, + "step": 146500 + }, + { + "epoch": 0.56, + "learning_rate": 4.073194905219993e-05, + "loss": 1.2621, + "step": 147000 + }, + { + "epoch": 0.56, + "learning_rate": 4.070042506938429e-05, + "loss": 1.2644, + "step": 147500 + }, + { + "epoch": 0.56, + "learning_rate": 4.066890108656864e-05, + "loss": 1.2604, + "step": 148000 + }, + { + "epoch": 0.56, + "learning_rate": 4.063737710375299e-05, + "loss": 1.2581, + "step": 148500 + }, + { + "epoch": 0.56, + "learning_rate": 4.060585312093735e-05, + "loss": 1.2607, + "step": 149000 + }, + { + "epoch": 0.57, + "learning_rate": 4.05743291381217e-05, + "loss": 1.2517, + "step": 149500 + }, + { + "epoch": 0.57, + "learning_rate": 4.054280515530606e-05, + "loss": 1.2597, + "step": 150000 + }, + { + "epoch": 0.57, + "learning_rate": 4.0511281172490414e-05, + "loss": 1.2582, + "step": 150500 + }, + { + "epoch": 0.57, + "learning_rate": 4.047975718967476e-05, + "loss": 1.249, + "step": 151000 + }, + { + "epoch": 0.57, + "learning_rate": 4.0448233206859114e-05, + "loss": 1.2691, + "step": 151500 + }, + { + "epoch": 0.57, + "learning_rate": 4.041670922404347e-05, + "loss": 1.2587, + "step": 152000 + }, + { + "epoch": 0.58, + "learning_rate": 4.038518524122783e-05, + "loss": 1.2517, + "step": 152500 + }, + { + "epoch": 0.58, + "learning_rate": 4.035366125841218e-05, + "loss": 1.2535, + "step": 153000 + }, + { + "epoch": 0.58, + "learning_rate": 4.032213727559653e-05, + "loss": 1.2532, + "step": 153500 + }, + { + "epoch": 0.58, + "learning_rate": 4.029061329278088e-05, + "loss": 1.2524, + "step": 154000 + }, + { + "epoch": 0.58, + "learning_rate": 4.0259089309965234e-05, + "loss": 1.2587, + "step": 154500 + }, + { + "epoch": 0.59, + "learning_rate": 4.022756532714959e-05, + "loss": 1.2537, + "step": 155000 + }, + { + "epoch": 0.59, + "learning_rate": 4.019604134433395e-05, + "loss": 1.2535, + "step": 155500 + }, + { + "epoch": 0.59, + "learning_rate": 4.01645173615183e-05, + "loss": 1.2587, + "step": 156000 + }, + { + "epoch": 0.59, + "learning_rate": 4.013299337870265e-05, + "loss": 1.2454, + "step": 156500 + }, + { + "epoch": 0.59, + "learning_rate": 4.0101469395887e-05, + "loss": 1.245, + "step": 157000 + }, + { + "epoch": 0.6, + "learning_rate": 4.0069945413071355e-05, + "loss": 1.2485, + "step": 157500 + }, + { + "epoch": 0.6, + "learning_rate": 4.0038421430255715e-05, + "loss": 1.2613, + "step": 158000 + }, + { + "epoch": 0.6, + "learning_rate": 4.000689744744007e-05, + "loss": 1.2438, + "step": 158500 + }, + { + "epoch": 0.6, + "learning_rate": 3.997537346462442e-05, + "loss": 1.2495, + "step": 159000 + }, + { + "epoch": 0.6, + "learning_rate": 3.994384948180877e-05, + "loss": 1.2535, + "step": 159500 + }, + { + "epoch": 0.61, + "learning_rate": 3.991232549899312e-05, + "loss": 1.2556, + "step": 160000 + }, + { + "epoch": 0.61, + "learning_rate": 3.988080151617748e-05, + "loss": 1.2549, + "step": 160500 + }, + { + "epoch": 0.61, + "learning_rate": 3.9849277533361836e-05, + "loss": 1.2447, + "step": 161000 + }, + { + "epoch": 0.61, + "learning_rate": 3.981775355054619e-05, + "loss": 1.2486, + "step": 161500 + }, + { + "epoch": 0.61, + "learning_rate": 3.9786229567730536e-05, + "loss": 1.253, + "step": 162000 + }, + { + "epoch": 0.61, + "learning_rate": 3.975470558491489e-05, + "loss": 1.2464, + "step": 162500 + }, + { + "epoch": 0.62, + "learning_rate": 3.972318160209925e-05, + "loss": 1.2555, + "step": 163000 + }, + { + "epoch": 0.62, + "learning_rate": 3.96916576192836e-05, + "loss": 1.2475, + "step": 163500 + }, + { + "epoch": 0.62, + "learning_rate": 3.9660133636467956e-05, + "loss": 1.2646, + "step": 164000 + }, + { + "epoch": 0.62, + "learning_rate": 3.962860965365231e-05, + "loss": 1.2571, + "step": 164500 + }, + { + "epoch": 0.62, + "learning_rate": 3.9597085670836657e-05, + "loss": 1.2415, + "step": 165000 + }, + { + "epoch": 0.63, + "learning_rate": 3.956556168802101e-05, + "loss": 1.2467, + "step": 165500 + }, + { + "epoch": 0.63, + "learning_rate": 3.953403770520537e-05, + "loss": 1.2416, + "step": 166000 + }, + { + "epoch": 0.63, + "learning_rate": 3.9502513722389724e-05, + "loss": 1.249, + "step": 166500 + }, + { + "epoch": 0.63, + "learning_rate": 3.947098973957408e-05, + "loss": 1.2416, + "step": 167000 + }, + { + "epoch": 0.63, + "learning_rate": 3.9439465756758424e-05, + "loss": 1.2555, + "step": 167500 + }, + { + "epoch": 0.64, + "learning_rate": 3.940794177394278e-05, + "loss": 1.2515, + "step": 168000 + }, + { + "epoch": 0.64, + "learning_rate": 3.937641779112714e-05, + "loss": 1.2445, + "step": 168500 + }, + { + "epoch": 0.64, + "learning_rate": 3.934489380831149e-05, + "loss": 1.2498, + "step": 169000 + }, + { + "epoch": 0.64, + "learning_rate": 3.9313369825495844e-05, + "loss": 1.2459, + "step": 169500 + }, + { + "epoch": 0.64, + "learning_rate": 3.92818458426802e-05, + "loss": 1.2501, + "step": 170000 + }, + { + "epoch": 0.64, + "learning_rate": 3.9250321859864544e-05, + "loss": 1.2514, + "step": 170500 + }, + { + "epoch": 0.65, + "learning_rate": 3.9218797877048905e-05, + "loss": 1.2483, + "step": 171000 + }, + { + "epoch": 0.65, + "learning_rate": 3.918727389423326e-05, + "loss": 1.2369, + "step": 171500 + }, + { + "epoch": 0.65, + "learning_rate": 3.915574991141761e-05, + "loss": 1.2338, + "step": 172000 + }, + { + "epoch": 0.65, + "learning_rate": 3.9124225928601965e-05, + "loss": 1.2439, + "step": 172500 + }, + { + "epoch": 0.65, + "learning_rate": 3.909270194578632e-05, + "loss": 1.2419, + "step": 173000 + }, + { + "epoch": 0.66, + "learning_rate": 3.906117796297067e-05, + "loss": 1.2442, + "step": 173500 + }, + { + "epoch": 0.66, + "learning_rate": 3.9029653980155025e-05, + "loss": 1.2567, + "step": 174000 + }, + { + "epoch": 0.66, + "learning_rate": 3.899812999733938e-05, + "loss": 1.2435, + "step": 174500 + }, + { + "epoch": 0.66, + "learning_rate": 3.896660601452373e-05, + "loss": 1.2391, + "step": 175000 + }, + { + "epoch": 0.66, + "learning_rate": 3.8935082031708086e-05, + "loss": 1.2427, + "step": 175500 + }, + { + "epoch": 0.67, + "learning_rate": 3.890355804889244e-05, + "loss": 1.2344, + "step": 176000 + }, + { + "epoch": 0.67, + "learning_rate": 3.887203406607679e-05, + "loss": 1.237, + "step": 176500 + }, + { + "epoch": 0.67, + "learning_rate": 3.8840510083261146e-05, + "loss": 1.2406, + "step": 177000 + }, + { + "epoch": 0.67, + "learning_rate": 3.88089861004455e-05, + "loss": 1.2416, + "step": 177500 + }, + { + "epoch": 0.67, + "learning_rate": 3.877746211762985e-05, + "loss": 1.2482, + "step": 178000 + }, + { + "epoch": 0.68, + "learning_rate": 3.8745938134814206e-05, + "loss": 1.2516, + "step": 178500 + }, + { + "epoch": 0.68, + "learning_rate": 3.871441415199856e-05, + "loss": 1.2459, + "step": 179000 + }, + { + "epoch": 0.68, + "learning_rate": 3.868289016918291e-05, + "loss": 1.2477, + "step": 179500 + }, + { + "epoch": 0.68, + "learning_rate": 3.8651366186367266e-05, + "loss": 1.2469, + "step": 180000 + }, + { + "epoch": 0.68, + "learning_rate": 3.861984220355162e-05, + "loss": 1.2431, + "step": 180500 + }, + { + "epoch": 0.68, + "learning_rate": 3.858831822073597e-05, + "loss": 1.2371, + "step": 181000 + }, + { + "epoch": 0.69, + "learning_rate": 3.855679423792033e-05, + "loss": 1.2475, + "step": 181500 + }, + { + "epoch": 0.69, + "learning_rate": 3.852527025510468e-05, + "loss": 1.2437, + "step": 182000 + }, + { + "epoch": 0.69, + "learning_rate": 3.8493746272289034e-05, + "loss": 1.246, + "step": 182500 + }, + { + "epoch": 0.69, + "learning_rate": 3.846222228947339e-05, + "loss": 1.2453, + "step": 183000 + }, + { + "epoch": 0.69, + "learning_rate": 3.843069830665774e-05, + "loss": 1.2343, + "step": 183500 + }, + { + "epoch": 0.7, + "learning_rate": 3.8399174323842094e-05, + "loss": 1.2378, + "step": 184000 + }, + { + "epoch": 0.7, + "learning_rate": 3.836765034102645e-05, + "loss": 1.2409, + "step": 184500 + }, + { + "epoch": 0.7, + "learning_rate": 3.83361263582108e-05, + "loss": 1.2416, + "step": 185000 + }, + { + "epoch": 0.7, + "learning_rate": 3.8304602375395154e-05, + "loss": 1.237, + "step": 185500 + }, + { + "epoch": 0.7, + "learning_rate": 3.827307839257951e-05, + "loss": 1.2444, + "step": 186000 + }, + { + "epoch": 0.71, + "learning_rate": 3.824155440976386e-05, + "loss": 1.2306, + "step": 186500 + }, + { + "epoch": 0.71, + "learning_rate": 3.8210030426948215e-05, + "loss": 1.2276, + "step": 187000 + }, + { + "epoch": 0.71, + "learning_rate": 3.817850644413257e-05, + "loss": 1.2383, + "step": 187500 + }, + { + "epoch": 0.71, + "learning_rate": 3.814698246131692e-05, + "loss": 1.2423, + "step": 188000 + }, + { + "epoch": 0.71, + "learning_rate": 3.8115458478501275e-05, + "loss": 1.2351, + "step": 188500 + }, + { + "epoch": 0.71, + "learning_rate": 3.808393449568563e-05, + "loss": 1.2293, + "step": 189000 + }, + { + "epoch": 0.72, + "learning_rate": 3.805241051286998e-05, + "loss": 1.2369, + "step": 189500 + }, + { + "epoch": 0.72, + "learning_rate": 3.802088653005434e-05, + "loss": 1.2348, + "step": 190000 + }, + { + "epoch": 0.72, + "learning_rate": 3.798936254723869e-05, + "loss": 1.2385, + "step": 190500 + }, + { + "epoch": 0.72, + "learning_rate": 3.795783856442304e-05, + "loss": 1.2393, + "step": 191000 + }, + { + "epoch": 0.72, + "learning_rate": 3.7926314581607395e-05, + "loss": 1.2429, + "step": 191500 + }, + { + "epoch": 0.73, + "learning_rate": 3.789479059879175e-05, + "loss": 1.23, + "step": 192000 + }, + { + "epoch": 0.73, + "learning_rate": 3.786326661597611e-05, + "loss": 1.2318, + "step": 192500 + }, + { + "epoch": 0.73, + "learning_rate": 3.7831742633160456e-05, + "loss": 1.2271, + "step": 193000 + }, + { + "epoch": 0.73, + "learning_rate": 3.780021865034481e-05, + "loss": 1.2345, + "step": 193500 + }, + { + "epoch": 0.73, + "learning_rate": 3.776869466752916e-05, + "loss": 1.2294, + "step": 194000 + }, + { + "epoch": 0.74, + "learning_rate": 3.7737170684713516e-05, + "loss": 1.2528, + "step": 194500 + }, + { + "epoch": 0.74, + "learning_rate": 3.7705646701897876e-05, + "loss": 1.23, + "step": 195000 + }, + { + "epoch": 0.74, + "learning_rate": 3.767412271908223e-05, + "loss": 1.2373, + "step": 195500 + }, + { + "epoch": 0.74, + "learning_rate": 3.7642598736266576e-05, + "loss": 1.2306, + "step": 196000 + }, + { + "epoch": 0.74, + "learning_rate": 3.761107475345093e-05, + "loss": 1.2324, + "step": 196500 + }, + { + "epoch": 0.75, + "learning_rate": 3.757955077063528e-05, + "loss": 1.2337, + "step": 197000 + }, + { + "epoch": 0.75, + "learning_rate": 3.7548026787819643e-05, + "loss": 1.2266, + "step": 197500 + }, + { + "epoch": 0.75, + "learning_rate": 3.7516502805004e-05, + "loss": 1.2244, + "step": 198000 + }, + { + "epoch": 0.75, + "learning_rate": 3.7484978822188344e-05, + "loss": 1.2234, + "step": 198500 + }, + { + "epoch": 0.75, + "learning_rate": 3.74534548393727e-05, + "loss": 1.2287, + "step": 199000 + }, + { + "epoch": 0.75, + "learning_rate": 3.742193085655705e-05, + "loss": 1.2306, + "step": 199500 + }, + { + "epoch": 0.76, + "learning_rate": 3.7390406873741404e-05, + "loss": 1.2294, + "step": 200000 + }, + { + "epoch": 0.76, + "learning_rate": 3.7358882890925764e-05, + "loss": 1.2289, + "step": 200500 + }, + { + "epoch": 0.76, + "learning_rate": 3.732735890811012e-05, + "loss": 1.2345, + "step": 201000 + }, + { + "epoch": 0.76, + "learning_rate": 3.7295834925294464e-05, + "loss": 1.2345, + "step": 201500 + }, + { + "epoch": 0.76, + "learning_rate": 3.726431094247882e-05, + "loss": 1.2319, + "step": 202000 + }, + { + "epoch": 0.77, + "learning_rate": 3.723278695966317e-05, + "loss": 1.22, + "step": 202500 + }, + { + "epoch": 0.77, + "learning_rate": 3.720126297684753e-05, + "loss": 1.2248, + "step": 203000 + }, + { + "epoch": 0.77, + "learning_rate": 3.7169738994031885e-05, + "loss": 1.2157, + "step": 203500 + }, + { + "epoch": 0.77, + "learning_rate": 3.713821501121624e-05, + "loss": 1.2358, + "step": 204000 + }, + { + "epoch": 0.77, + "learning_rate": 3.7106691028400585e-05, + "loss": 1.2328, + "step": 204500 + }, + { + "epoch": 0.78, + "learning_rate": 3.707516704558494e-05, + "loss": 1.2153, + "step": 205000 + }, + { + "epoch": 0.78, + "learning_rate": 3.70436430627693e-05, + "loss": 1.2313, + "step": 205500 + }, + { + "epoch": 0.78, + "learning_rate": 3.701211907995365e-05, + "loss": 1.2219, + "step": 206000 + }, + { + "epoch": 0.78, + "learning_rate": 3.6980595097138005e-05, + "loss": 1.2297, + "step": 206500 + }, + { + "epoch": 0.78, + "learning_rate": 3.694907111432235e-05, + "loss": 1.2327, + "step": 207000 + }, + { + "epoch": 0.78, + "learning_rate": 3.6917547131506705e-05, + "loss": 1.2318, + "step": 207500 + }, + { + "epoch": 0.79, + "learning_rate": 3.6886023148691066e-05, + "loss": 1.2226, + "step": 208000 + }, + { + "epoch": 0.79, + "learning_rate": 3.685449916587542e-05, + "loss": 1.2297, + "step": 208500 + }, + { + "epoch": 0.79, + "learning_rate": 3.682297518305977e-05, + "loss": 1.224, + "step": 209000 + }, + { + "epoch": 0.79, + "learning_rate": 3.6791451200244126e-05, + "loss": 1.2257, + "step": 209500 + }, + { + "epoch": 0.79, + "learning_rate": 3.675992721742847e-05, + "loss": 1.2258, + "step": 210000 + }, + { + "epoch": 0.8, + "learning_rate": 3.6728403234612826e-05, + "loss": 1.2274, + "step": 210500 + }, + { + "epoch": 0.8, + "learning_rate": 3.6696879251797186e-05, + "loss": 1.2217, + "step": 211000 + }, + { + "epoch": 0.8, + "learning_rate": 3.666535526898154e-05, + "loss": 1.2228, + "step": 211500 + }, + { + "epoch": 0.8, + "learning_rate": 3.663383128616589e-05, + "loss": 1.2263, + "step": 212000 + }, + { + "epoch": 0.8, + "learning_rate": 3.6602307303350247e-05, + "loss": 1.215, + "step": 212500 + }, + { + "epoch": 0.81, + "learning_rate": 3.657078332053459e-05, + "loss": 1.2297, + "step": 213000 + }, + { + "epoch": 0.81, + "learning_rate": 3.6539259337718953e-05, + "loss": 1.2213, + "step": 213500 + }, + { + "epoch": 0.81, + "learning_rate": 3.650773535490331e-05, + "loss": 1.2106, + "step": 214000 + }, + { + "epoch": 0.81, + "learning_rate": 3.647621137208766e-05, + "loss": 1.2199, + "step": 214500 + }, + { + "epoch": 0.81, + "learning_rate": 3.6444687389272014e-05, + "loss": 1.2183, + "step": 215000 + }, + { + "epoch": 0.82, + "learning_rate": 3.641316340645636e-05, + "loss": 1.2175, + "step": 215500 + }, + { + "epoch": 0.82, + "learning_rate": 3.638163942364072e-05, + "loss": 1.2245, + "step": 216000 + }, + { + "epoch": 0.82, + "learning_rate": 3.6350115440825074e-05, + "loss": 1.2266, + "step": 216500 + }, + { + "epoch": 0.82, + "learning_rate": 3.631859145800943e-05, + "loss": 1.2147, + "step": 217000 + }, + { + "epoch": 0.82, + "learning_rate": 3.628706747519378e-05, + "loss": 1.2244, + "step": 217500 + }, + { + "epoch": 0.82, + "learning_rate": 3.6255543492378134e-05, + "loss": 1.2199, + "step": 218000 + }, + { + "epoch": 0.83, + "learning_rate": 3.622401950956249e-05, + "loss": 1.2264, + "step": 218500 + }, + { + "epoch": 0.83, + "learning_rate": 3.619249552674684e-05, + "loss": 1.2274, + "step": 219000 + }, + { + "epoch": 0.83, + "learning_rate": 3.6160971543931195e-05, + "loss": 1.2145, + "step": 219500 + }, + { + "epoch": 0.83, + "learning_rate": 3.612944756111555e-05, + "loss": 1.2281, + "step": 220000 + }, + { + "epoch": 0.83, + "learning_rate": 3.60979235782999e-05, + "loss": 1.225, + "step": 220500 + }, + { + "epoch": 0.84, + "learning_rate": 3.606639959548425e-05, + "loss": 1.2272, + "step": 221000 + }, + { + "epoch": 0.84, + "learning_rate": 3.603487561266861e-05, + "loss": 1.2137, + "step": 221500 + }, + { + "epoch": 0.84, + "learning_rate": 3.600335162985296e-05, + "loss": 1.2131, + "step": 222000 + }, + { + "epoch": 0.84, + "learning_rate": 3.5971827647037315e-05, + "loss": 1.2113, + "step": 222500 + }, + { + "epoch": 0.84, + "learning_rate": 3.594030366422167e-05, + "loss": 1.2021, + "step": 223000 + }, + { + "epoch": 0.85, + "learning_rate": 3.590877968140602e-05, + "loss": 1.2248, + "step": 223500 + }, + { + "epoch": 0.85, + "learning_rate": 3.5877255698590376e-05, + "loss": 1.2194, + "step": 224000 + }, + { + "epoch": 0.85, + "learning_rate": 3.584573171577473e-05, + "loss": 1.2115, + "step": 224500 + }, + { + "epoch": 0.85, + "learning_rate": 3.581420773295908e-05, + "loss": 1.2223, + "step": 225000 + }, + { + "epoch": 0.85, + "learning_rate": 3.5782683750143436e-05, + "loss": 1.2088, + "step": 225500 + }, + { + "epoch": 0.85, + "learning_rate": 3.575115976732779e-05, + "loss": 1.218, + "step": 226000 + }, + { + "epoch": 0.86, + "learning_rate": 3.571963578451214e-05, + "loss": 1.2144, + "step": 226500 + }, + { + "epoch": 0.86, + "learning_rate": 3.5688111801696496e-05, + "loss": 1.2152, + "step": 227000 + }, + { + "epoch": 0.86, + "learning_rate": 3.565658781888085e-05, + "loss": 1.2081, + "step": 227500 + }, + { + "epoch": 0.86, + "learning_rate": 3.56250638360652e-05, + "loss": 1.2153, + "step": 228000 + }, + { + "epoch": 0.86, + "learning_rate": 3.5593539853249557e-05, + "loss": 1.2123, + "step": 228500 + }, + { + "epoch": 0.87, + "learning_rate": 3.556201587043391e-05, + "loss": 1.2165, + "step": 229000 + }, + { + "epoch": 0.87, + "learning_rate": 3.553049188761826e-05, + "loss": 1.2143, + "step": 229500 + }, + { + "epoch": 0.87, + "learning_rate": 3.549896790480262e-05, + "loss": 1.2039, + "step": 230000 + }, + { + "epoch": 0.87, + "learning_rate": 3.546744392198697e-05, + "loss": 1.2169, + "step": 230500 + }, + { + "epoch": 0.87, + "learning_rate": 3.5435919939171324e-05, + "loss": 1.2179, + "step": 231000 + }, + { + "epoch": 0.88, + "learning_rate": 3.540439595635568e-05, + "loss": 1.2178, + "step": 231500 + }, + { + "epoch": 0.88, + "learning_rate": 3.537287197354003e-05, + "loss": 1.2184, + "step": 232000 + }, + { + "epoch": 0.88, + "learning_rate": 3.5341347990724384e-05, + "loss": 1.2141, + "step": 232500 + }, + { + "epoch": 0.88, + "learning_rate": 3.530982400790874e-05, + "loss": 1.2104, + "step": 233000 + }, + { + "epoch": 0.88, + "learning_rate": 3.527830002509309e-05, + "loss": 1.2114, + "step": 233500 + }, + { + "epoch": 0.89, + "learning_rate": 3.5246776042277444e-05, + "loss": 1.2092, + "step": 234000 + }, + { + "epoch": 0.89, + "learning_rate": 3.52152520594618e-05, + "loss": 1.2129, + "step": 234500 + }, + { + "epoch": 0.89, + "learning_rate": 3.518372807664616e-05, + "loss": 1.2038, + "step": 235000 + }, + { + "epoch": 0.89, + "learning_rate": 3.5152204093830505e-05, + "loss": 1.213, + "step": 235500 + }, + { + "epoch": 0.89, + "learning_rate": 3.512068011101486e-05, + "loss": 1.2105, + "step": 236000 + }, + { + "epoch": 0.89, + "learning_rate": 3.508915612819921e-05, + "loss": 1.2051, + "step": 236500 + }, + { + "epoch": 0.9, + "learning_rate": 3.5057632145383565e-05, + "loss": 1.2105, + "step": 237000 + }, + { + "epoch": 0.9, + "learning_rate": 3.5026108162567925e-05, + "loss": 1.2066, + "step": 237500 + }, + { + "epoch": 0.9, + "learning_rate": 3.499458417975227e-05, + "loss": 1.2162, + "step": 238000 + }, + { + "epoch": 0.9, + "learning_rate": 3.4963060196936625e-05, + "loss": 1.2113, + "step": 238500 + }, + { + "epoch": 0.9, + "learning_rate": 3.493153621412098e-05, + "loss": 1.2056, + "step": 239000 + }, + { + "epoch": 0.91, + "learning_rate": 3.490001223130533e-05, + "loss": 1.2125, + "step": 239500 + }, + { + "epoch": 0.91, + "learning_rate": 3.486848824848969e-05, + "loss": 1.2104, + "step": 240000 + }, + { + "epoch": 0.91, + "learning_rate": 3.4836964265674046e-05, + "loss": 1.2174, + "step": 240500 + }, + { + "epoch": 0.91, + "learning_rate": 3.480544028285839e-05, + "loss": 1.2045, + "step": 241000 + }, + { + "epoch": 0.91, + "learning_rate": 3.4773916300042746e-05, + "loss": 1.2054, + "step": 241500 + }, + { + "epoch": 0.92, + "learning_rate": 3.47423923172271e-05, + "loss": 1.2001, + "step": 242000 + }, + { + "epoch": 0.92, + "learning_rate": 3.471086833441145e-05, + "loss": 1.1995, + "step": 242500 + }, + { + "epoch": 0.92, + "learning_rate": 3.467934435159581e-05, + "loss": 1.2107, + "step": 243000 + }, + { + "epoch": 0.92, + "learning_rate": 3.464782036878016e-05, + "loss": 1.2138, + "step": 243500 + }, + { + "epoch": 0.92, + "learning_rate": 3.461629638596451e-05, + "loss": 1.2026, + "step": 244000 + }, + { + "epoch": 0.92, + "learning_rate": 3.4584772403148866e-05, + "loss": 1.2116, + "step": 244500 + }, + { + "epoch": 0.93, + "learning_rate": 3.455324842033322e-05, + "loss": 1.2067, + "step": 245000 + }, + { + "epoch": 0.93, + "learning_rate": 3.452172443751758e-05, + "loss": 1.2112, + "step": 245500 + }, + { + "epoch": 0.93, + "learning_rate": 3.4490200454701934e-05, + "loss": 1.1996, + "step": 246000 + }, + { + "epoch": 0.93, + "learning_rate": 3.445867647188628e-05, + "loss": 1.1978, + "step": 246500 + }, + { + "epoch": 0.93, + "learning_rate": 3.4427152489070634e-05, + "loss": 1.2188, + "step": 247000 + }, + { + "epoch": 0.94, + "learning_rate": 3.439562850625499e-05, + "loss": 1.2098, + "step": 247500 + }, + { + "epoch": 0.94, + "learning_rate": 3.436410452343935e-05, + "loss": 1.2038, + "step": 248000 + }, + { + "epoch": 0.94, + "learning_rate": 3.43325805406237e-05, + "loss": 1.1974, + "step": 248500 + }, + { + "epoch": 0.94, + "learning_rate": 3.4301056557808054e-05, + "loss": 1.1995, + "step": 249000 + }, + { + "epoch": 0.94, + "learning_rate": 3.42695325749924e-05, + "loss": 1.2148, + "step": 249500 + }, + { + "epoch": 0.95, + "learning_rate": 3.4238008592176754e-05, + "loss": 1.2018, + "step": 250000 + }, + { + "epoch": 0.95, + "learning_rate": 3.4206484609361114e-05, + "loss": 1.2058, + "step": 250500 + }, + { + "epoch": 0.95, + "learning_rate": 3.417496062654547e-05, + "loss": 1.2057, + "step": 251000 + }, + { + "epoch": 0.95, + "learning_rate": 3.414343664372982e-05, + "loss": 1.2059, + "step": 251500 + }, + { + "epoch": 0.95, + "learning_rate": 3.411191266091417e-05, + "loss": 1.2017, + "step": 252000 + }, + { + "epoch": 0.96, + "learning_rate": 3.408038867809852e-05, + "loss": 1.2028, + "step": 252500 + }, + { + "epoch": 0.96, + "learning_rate": 3.4048864695282875e-05, + "loss": 1.2041, + "step": 253000 + }, + { + "epoch": 0.96, + "learning_rate": 3.4017340712467235e-05, + "loss": 1.1936, + "step": 253500 + }, + { + "epoch": 0.96, + "learning_rate": 3.398581672965159e-05, + "loss": 1.2032, + "step": 254000 + }, + { + "epoch": 0.96, + "learning_rate": 3.395429274683594e-05, + "loss": 1.2067, + "step": 254500 + }, + { + "epoch": 0.96, + "learning_rate": 3.392276876402029e-05, + "loss": 1.1945, + "step": 255000 + }, + { + "epoch": 0.97, + "learning_rate": 3.389124478120464e-05, + "loss": 1.2049, + "step": 255500 + }, + { + "epoch": 0.97, + "learning_rate": 3.3859720798389e-05, + "loss": 1.1975, + "step": 256000 + }, + { + "epoch": 0.97, + "learning_rate": 3.3828196815573356e-05, + "loss": 1.1973, + "step": 256500 + }, + { + "epoch": 0.97, + "learning_rate": 3.379667283275771e-05, + "loss": 1.2014, + "step": 257000 + }, + { + "epoch": 0.97, + "learning_rate": 3.376514884994206e-05, + "loss": 1.1997, + "step": 257500 + }, + { + "epoch": 0.98, + "learning_rate": 3.373362486712641e-05, + "loss": 1.2044, + "step": 258000 + }, + { + "epoch": 0.98, + "learning_rate": 3.370210088431077e-05, + "loss": 1.2105, + "step": 258500 + }, + { + "epoch": 0.98, + "learning_rate": 3.367057690149512e-05, + "loss": 1.1934, + "step": 259000 + }, + { + "epoch": 0.98, + "learning_rate": 3.3639052918679476e-05, + "loss": 1.1967, + "step": 259500 + }, + { + "epoch": 0.98, + "learning_rate": 3.360752893586383e-05, + "loss": 1.201, + "step": 260000 + }, + { + "epoch": 0.99, + "learning_rate": 3.3576004953048176e-05, + "loss": 1.2017, + "step": 260500 + }, + { + "epoch": 0.99, + "learning_rate": 3.354448097023254e-05, + "loss": 1.2053, + "step": 261000 + }, + { + "epoch": 0.99, + "learning_rate": 3.351295698741689e-05, + "loss": 1.1973, + "step": 261500 + }, + { + "epoch": 0.99, + "learning_rate": 3.3481433004601243e-05, + "loss": 1.1972, + "step": 262000 + }, + { + "epoch": 0.99, + "learning_rate": 3.34499090217856e-05, + "loss": 1.1902, + "step": 262500 + }, + { + "epoch": 0.99, + "learning_rate": 3.341838503896995e-05, + "loss": 1.2027, + "step": 263000 + }, + { + "epoch": 1.0, + "learning_rate": 3.3386861056154304e-05, + "loss": 1.1981, + "step": 263500 + }, + { + "epoch": 1.0, + "learning_rate": 3.335533707333866e-05, + "loss": 1.201, + "step": 264000 + }, + { + "epoch": 1.0, + "learning_rate": 3.332381309052301e-05, + "loss": 1.1885, + "step": 264500 + }, + { + "epoch": 1.0, + "learning_rate": 3.3292289107707364e-05, + "loss": 1.1934, + "step": 265000 + }, + { + "epoch": 1.0, + "learning_rate": 3.326076512489172e-05, + "loss": 1.1933, + "step": 265500 + }, + { + "epoch": 1.01, + "learning_rate": 3.322924114207607e-05, + "loss": 1.1872, + "step": 266000 + }, + { + "epoch": 1.01, + "learning_rate": 3.3197717159260424e-05, + "loss": 1.1967, + "step": 266500 + }, + { + "epoch": 1.01, + "learning_rate": 3.316619317644478e-05, + "loss": 1.201, + "step": 267000 + }, + { + "epoch": 1.01, + "learning_rate": 3.313466919362913e-05, + "loss": 1.1889, + "step": 267500 + }, + { + "epoch": 1.01, + "learning_rate": 3.3103145210813485e-05, + "loss": 1.1834, + "step": 268000 + }, + { + "epoch": 1.02, + "learning_rate": 3.307162122799784e-05, + "loss": 1.1921, + "step": 268500 + }, + { + "epoch": 1.02, + "learning_rate": 3.304009724518219e-05, + "loss": 1.1926, + "step": 269000 + }, + { + "epoch": 1.02, + "learning_rate": 3.3008573262366545e-05, + "loss": 1.191, + "step": 269500 + }, + { + "epoch": 1.02, + "learning_rate": 3.29770492795509e-05, + "loss": 1.1933, + "step": 270000 + }, + { + "epoch": 1.02, + "learning_rate": 3.294552529673525e-05, + "loss": 1.1884, + "step": 270500 + }, + { + "epoch": 1.03, + "learning_rate": 3.2914001313919605e-05, + "loss": 1.1909, + "step": 271000 + }, + { + "epoch": 1.03, + "learning_rate": 3.288247733110396e-05, + "loss": 1.1941, + "step": 271500 + }, + { + "epoch": 1.03, + "learning_rate": 3.285095334828831e-05, + "loss": 1.194, + "step": 272000 + }, + { + "epoch": 1.03, + "learning_rate": 3.2819429365472666e-05, + "loss": 1.1948, + "step": 272500 + }, + { + "epoch": 1.03, + "learning_rate": 3.278790538265702e-05, + "loss": 1.1893, + "step": 273000 + }, + { + "epoch": 1.03, + "learning_rate": 3.275638139984137e-05, + "loss": 1.1865, + "step": 273500 + }, + { + "epoch": 1.04, + "learning_rate": 3.2724857417025726e-05, + "loss": 1.1893, + "step": 274000 + }, + { + "epoch": 1.04, + "learning_rate": 3.269333343421008e-05, + "loss": 1.1983, + "step": 274500 + }, + { + "epoch": 1.04, + "learning_rate": 3.266180945139443e-05, + "loss": 1.191, + "step": 275000 + }, + { + "epoch": 1.04, + "learning_rate": 3.2630285468578786e-05, + "loss": 1.1934, + "step": 275500 + }, + { + "epoch": 1.04, + "learning_rate": 3.259876148576314e-05, + "loss": 1.1913, + "step": 276000 + }, + { + "epoch": 1.05, + "learning_rate": 3.256723750294749e-05, + "loss": 1.1924, + "step": 276500 + }, + { + "epoch": 1.05, + "learning_rate": 3.2535713520131847e-05, + "loss": 1.188, + "step": 277000 + }, + { + "epoch": 1.05, + "learning_rate": 3.25041895373162e-05, + "loss": 1.1844, + "step": 277500 + }, + { + "epoch": 1.05, + "learning_rate": 3.2472665554500553e-05, + "loss": 1.196, + "step": 278000 + }, + { + "epoch": 1.05, + "learning_rate": 3.244114157168491e-05, + "loss": 1.1861, + "step": 278500 + }, + { + "epoch": 1.06, + "learning_rate": 3.240961758886926e-05, + "loss": 1.1922, + "step": 279000 + }, + { + "epoch": 1.06, + "learning_rate": 3.2378093606053614e-05, + "loss": 1.1899, + "step": 279500 + }, + { + "epoch": 1.06, + "learning_rate": 3.2346569623237974e-05, + "loss": 1.1898, + "step": 280000 + }, + { + "epoch": 1.06, + "learning_rate": 3.231504564042232e-05, + "loss": 1.1885, + "step": 280500 + }, + { + "epoch": 1.06, + "learning_rate": 3.2283521657606674e-05, + "loss": 1.1911, + "step": 281000 + }, + { + "epoch": 1.06, + "learning_rate": 3.225199767479103e-05, + "loss": 1.1901, + "step": 281500 + }, + { + "epoch": 1.07, + "learning_rate": 3.222047369197538e-05, + "loss": 1.192, + "step": 282000 + }, + { + "epoch": 1.07, + "learning_rate": 3.218894970915974e-05, + "loss": 1.1859, + "step": 282500 + }, + { + "epoch": 1.07, + "learning_rate": 3.215742572634409e-05, + "loss": 1.1943, + "step": 283000 + }, + { + "epoch": 1.07, + "learning_rate": 3.212590174352844e-05, + "loss": 1.1932, + "step": 283500 + }, + { + "epoch": 1.07, + "learning_rate": 3.2094377760712795e-05, + "loss": 1.1895, + "step": 284000 + }, + { + "epoch": 1.08, + "learning_rate": 3.206285377789715e-05, + "loss": 1.1864, + "step": 284500 + }, + { + "epoch": 1.08, + "learning_rate": 3.203132979508151e-05, + "loss": 1.1811, + "step": 285000 + }, + { + "epoch": 1.08, + "learning_rate": 3.199980581226586e-05, + "loss": 1.188, + "step": 285500 + }, + { + "epoch": 1.08, + "learning_rate": 3.196828182945021e-05, + "loss": 1.1901, + "step": 286000 + }, + { + "epoch": 1.08, + "learning_rate": 3.193675784663456e-05, + "loss": 1.1814, + "step": 286500 + }, + { + "epoch": 1.09, + "learning_rate": 3.1905233863818915e-05, + "loss": 1.1816, + "step": 287000 + }, + { + "epoch": 1.09, + "learning_rate": 3.187370988100327e-05, + "loss": 1.1975, + "step": 287500 + }, + { + "epoch": 1.09, + "learning_rate": 3.184218589818763e-05, + "loss": 1.1889, + "step": 288000 + }, + { + "epoch": 1.09, + "learning_rate": 3.181066191537198e-05, + "loss": 1.1843, + "step": 288500 + }, + { + "epoch": 1.09, + "learning_rate": 3.177913793255633e-05, + "loss": 1.1848, + "step": 289000 + }, + { + "epoch": 1.1, + "learning_rate": 3.174761394974068e-05, + "loss": 1.1829, + "step": 289500 + }, + { + "epoch": 1.1, + "learning_rate": 3.1716089966925036e-05, + "loss": 1.1828, + "step": 290000 + }, + { + "epoch": 1.1, + "learning_rate": 3.1684565984109396e-05, + "loss": 1.1915, + "step": 290500 + }, + { + "epoch": 1.1, + "learning_rate": 3.165304200129375e-05, + "loss": 1.1803, + "step": 291000 + }, + { + "epoch": 1.1, + "learning_rate": 3.1621518018478096e-05, + "loss": 1.1891, + "step": 291500 + }, + { + "epoch": 1.1, + "learning_rate": 3.158999403566245e-05, + "loss": 1.18, + "step": 292000 + }, + { + "epoch": 1.11, + "learning_rate": 3.15584700528468e-05, + "loss": 1.1817, + "step": 292500 + }, + { + "epoch": 1.11, + "learning_rate": 3.152694607003116e-05, + "loss": 1.1788, + "step": 293000 + }, + { + "epoch": 1.11, + "learning_rate": 3.149542208721552e-05, + "loss": 1.1777, + "step": 293500 + }, + { + "epoch": 1.11, + "learning_rate": 3.146389810439987e-05, + "loss": 1.172, + "step": 294000 + }, + { + "epoch": 1.11, + "learning_rate": 3.143237412158422e-05, + "loss": 1.1754, + "step": 294500 + }, + { + "epoch": 1.12, + "learning_rate": 3.140085013876857e-05, + "loss": 1.1897, + "step": 295000 + }, + { + "epoch": 1.12, + "learning_rate": 3.136932615595293e-05, + "loss": 1.178, + "step": 295500 + }, + { + "epoch": 1.12, + "learning_rate": 3.1337802173137284e-05, + "loss": 1.1772, + "step": 296000 + }, + { + "epoch": 1.12, + "learning_rate": 3.130627819032164e-05, + "loss": 1.1848, + "step": 296500 + }, + { + "epoch": 1.12, + "learning_rate": 3.1274754207505984e-05, + "loss": 1.1804, + "step": 297000 + }, + { + "epoch": 1.13, + "learning_rate": 3.124323022469034e-05, + "loss": 1.179, + "step": 297500 + }, + { + "epoch": 1.13, + "learning_rate": 3.121170624187469e-05, + "loss": 1.1881, + "step": 298000 + }, + { + "epoch": 1.13, + "learning_rate": 3.118018225905905e-05, + "loss": 1.1759, + "step": 298500 + }, + { + "epoch": 1.13, + "learning_rate": 3.1148658276243405e-05, + "loss": 1.181, + "step": 299000 + }, + { + "epoch": 1.13, + "learning_rate": 3.111713429342776e-05, + "loss": 1.1858, + "step": 299500 + }, + { + "epoch": 1.13, + "learning_rate": 3.1085610310612105e-05, + "loss": 1.1794, + "step": 300000 + }, + { + "epoch": 1.14, + "learning_rate": 3.105408632779646e-05, + "loss": 1.1764, + "step": 300500 + }, + { + "epoch": 1.14, + "learning_rate": 3.102256234498082e-05, + "loss": 1.1845, + "step": 301000 + }, + { + "epoch": 1.14, + "learning_rate": 3.099103836216517e-05, + "loss": 1.1801, + "step": 301500 + }, + { + "epoch": 1.14, + "learning_rate": 3.0959514379349525e-05, + "loss": 1.1797, + "step": 302000 + }, + { + "epoch": 1.14, + "learning_rate": 3.092799039653388e-05, + "loss": 1.183, + "step": 302500 + }, + { + "epoch": 1.15, + "learning_rate": 3.0896466413718225e-05, + "loss": 1.1838, + "step": 303000 + }, + { + "epoch": 1.15, + "learning_rate": 3.0864942430902585e-05, + "loss": 1.1831, + "step": 303500 + }, + { + "epoch": 1.15, + "learning_rate": 3.083341844808694e-05, + "loss": 1.1847, + "step": 304000 + }, + { + "epoch": 1.15, + "learning_rate": 3.080189446527129e-05, + "loss": 1.1718, + "step": 304500 + }, + { + "epoch": 1.15, + "learning_rate": 3.0770370482455646e-05, + "loss": 1.1844, + "step": 305000 + }, + { + "epoch": 1.16, + "learning_rate": 3.073884649963999e-05, + "loss": 1.1784, + "step": 305500 + }, + { + "epoch": 1.16, + "learning_rate": 3.070732251682435e-05, + "loss": 1.1768, + "step": 306000 + }, + { + "epoch": 1.16, + "learning_rate": 3.0675798534008706e-05, + "loss": 1.1803, + "step": 306500 + }, + { + "epoch": 1.16, + "learning_rate": 3.064427455119306e-05, + "loss": 1.1822, + "step": 307000 + }, + { + "epoch": 1.16, + "learning_rate": 3.061275056837741e-05, + "loss": 1.169, + "step": 307500 + }, + { + "epoch": 1.17, + "learning_rate": 3.0581226585561766e-05, + "loss": 1.176, + "step": 308000 + }, + { + "epoch": 1.17, + "learning_rate": 3.054970260274611e-05, + "loss": 1.1807, + "step": 308500 + }, + { + "epoch": 1.17, + "learning_rate": 3.051817861993047e-05, + "loss": 1.1791, + "step": 309000 + }, + { + "epoch": 1.17, + "learning_rate": 3.0486654637114827e-05, + "loss": 1.1832, + "step": 309500 + }, + { + "epoch": 1.17, + "learning_rate": 3.045513065429918e-05, + "loss": 1.1734, + "step": 310000 + }, + { + "epoch": 1.17, + "learning_rate": 3.0423606671483534e-05, + "loss": 1.1731, + "step": 310500 + }, + { + "epoch": 1.18, + "learning_rate": 3.039208268866789e-05, + "loss": 1.1725, + "step": 311000 + }, + { + "epoch": 1.18, + "learning_rate": 3.0360558705852237e-05, + "loss": 1.1691, + "step": 311500 + }, + { + "epoch": 1.18, + "learning_rate": 3.0329034723036594e-05, + "loss": 1.1698, + "step": 312000 + }, + { + "epoch": 1.18, + "learning_rate": 3.0297510740220947e-05, + "loss": 1.1782, + "step": 312500 + }, + { + "epoch": 1.18, + "learning_rate": 3.02659867574053e-05, + "loss": 1.1785, + "step": 313000 + }, + { + "epoch": 1.19, + "learning_rate": 3.0234462774589654e-05, + "loss": 1.1708, + "step": 313500 + }, + { + "epoch": 1.19, + "learning_rate": 3.0202938791774004e-05, + "loss": 1.1863, + "step": 314000 + }, + { + "epoch": 1.19, + "learning_rate": 3.0171414808958358e-05, + "loss": 1.1716, + "step": 314500 + }, + { + "epoch": 1.19, + "learning_rate": 3.0139890826142714e-05, + "loss": 1.1781, + "step": 315000 + }, + { + "epoch": 1.19, + "learning_rate": 3.0108366843327068e-05, + "loss": 1.1697, + "step": 315500 + }, + { + "epoch": 1.2, + "learning_rate": 3.007684286051142e-05, + "loss": 1.177, + "step": 316000 + }, + { + "epoch": 1.2, + "learning_rate": 3.0045318877695778e-05, + "loss": 1.171, + "step": 316500 + }, + { + "epoch": 1.2, + "learning_rate": 3.0013794894880125e-05, + "loss": 1.1679, + "step": 317000 + }, + { + "epoch": 1.2, + "learning_rate": 2.998227091206448e-05, + "loss": 1.173, + "step": 317500 + }, + { + "epoch": 1.2, + "learning_rate": 2.9950746929248835e-05, + "loss": 1.1747, + "step": 318000 + }, + { + "epoch": 1.2, + "learning_rate": 2.991922294643319e-05, + "loss": 1.1665, + "step": 318500 + }, + { + "epoch": 1.21, + "learning_rate": 2.9887698963617545e-05, + "loss": 1.1717, + "step": 319000 + }, + { + "epoch": 1.21, + "learning_rate": 2.9856174980801892e-05, + "loss": 1.1799, + "step": 319500 + }, + { + "epoch": 1.21, + "learning_rate": 2.982465099798625e-05, + "loss": 1.1683, + "step": 320000 + }, + { + "epoch": 1.21, + "learning_rate": 2.9793127015170602e-05, + "loss": 1.1765, + "step": 320500 + }, + { + "epoch": 1.21, + "learning_rate": 2.9761603032354956e-05, + "loss": 1.1657, + "step": 321000 + }, + { + "epoch": 1.22, + "learning_rate": 2.9730079049539313e-05, + "loss": 1.1745, + "step": 321500 + }, + { + "epoch": 1.22, + "learning_rate": 2.9698555066723666e-05, + "loss": 1.1861, + "step": 322000 + }, + { + "epoch": 1.22, + "learning_rate": 2.9667031083908016e-05, + "loss": 1.1799, + "step": 322500 + }, + { + "epoch": 1.22, + "learning_rate": 2.963550710109237e-05, + "loss": 1.168, + "step": 323000 + }, + { + "epoch": 1.22, + "learning_rate": 2.9603983118276723e-05, + "loss": 1.1693, + "step": 323500 + }, + { + "epoch": 1.23, + "learning_rate": 2.957245913546108e-05, + "loss": 1.1737, + "step": 324000 + }, + { + "epoch": 1.23, + "learning_rate": 2.9540935152645433e-05, + "loss": 1.1775, + "step": 324500 + }, + { + "epoch": 1.23, + "learning_rate": 2.9509411169829787e-05, + "loss": 1.1732, + "step": 325000 + }, + { + "epoch": 1.23, + "learning_rate": 2.9477887187014137e-05, + "loss": 1.1633, + "step": 325500 + }, + { + "epoch": 1.23, + "learning_rate": 2.944636320419849e-05, + "loss": 1.1749, + "step": 326000 + }, + { + "epoch": 1.24, + "learning_rate": 2.9414839221382844e-05, + "loss": 1.1698, + "step": 326500 + }, + { + "epoch": 1.24, + "learning_rate": 2.93833152385672e-05, + "loss": 1.1632, + "step": 327000 + }, + { + "epoch": 1.24, + "learning_rate": 2.9351791255751554e-05, + "loss": 1.1621, + "step": 327500 + }, + { + "epoch": 1.24, + "learning_rate": 2.9320267272935904e-05, + "loss": 1.1646, + "step": 328000 + }, + { + "epoch": 1.24, + "learning_rate": 2.9288743290120257e-05, + "loss": 1.1695, + "step": 328500 + }, + { + "epoch": 1.24, + "learning_rate": 2.925721930730461e-05, + "loss": 1.1661, + "step": 329000 + }, + { + "epoch": 1.25, + "learning_rate": 2.9225695324488968e-05, + "loss": 1.1669, + "step": 329500 + }, + { + "epoch": 1.25, + "learning_rate": 2.919417134167332e-05, + "loss": 1.1718, + "step": 330000 + }, + { + "epoch": 1.25, + "learning_rate": 2.9162647358857674e-05, + "loss": 1.1702, + "step": 330500 + }, + { + "epoch": 1.25, + "learning_rate": 2.9131123376042024e-05, + "loss": 1.1697, + "step": 331000 + }, + { + "epoch": 1.25, + "learning_rate": 2.9099599393226378e-05, + "loss": 1.1607, + "step": 331500 + }, + { + "epoch": 1.26, + "learning_rate": 2.9068075410410735e-05, + "loss": 1.168, + "step": 332000 + }, + { + "epoch": 1.26, + "learning_rate": 2.9036551427595088e-05, + "loss": 1.1717, + "step": 332500 + }, + { + "epoch": 1.26, + "learning_rate": 2.900502744477944e-05, + "loss": 1.1638, + "step": 333000 + }, + { + "epoch": 1.26, + "learning_rate": 2.89735034619638e-05, + "loss": 1.1763, + "step": 333500 + }, + { + "epoch": 1.26, + "learning_rate": 2.8941979479148145e-05, + "loss": 1.1716, + "step": 334000 + }, + { + "epoch": 1.27, + "learning_rate": 2.8910455496332502e-05, + "loss": 1.1693, + "step": 334500 + }, + { + "epoch": 1.27, + "learning_rate": 2.8878931513516855e-05, + "loss": 1.1638, + "step": 335000 + }, + { + "epoch": 1.27, + "learning_rate": 2.884740753070121e-05, + "loss": 1.1697, + "step": 335500 + }, + { + "epoch": 1.27, + "learning_rate": 2.8815883547885562e-05, + "loss": 1.1701, + "step": 336000 + }, + { + "epoch": 1.27, + "learning_rate": 2.8784359565069912e-05, + "loss": 1.163, + "step": 336500 + }, + { + "epoch": 1.27, + "learning_rate": 2.8752835582254266e-05, + "loss": 1.1738, + "step": 337000 + }, + { + "epoch": 1.28, + "learning_rate": 2.8721311599438622e-05, + "loss": 1.1666, + "step": 337500 + }, + { + "epoch": 1.28, + "learning_rate": 2.8689787616622976e-05, + "loss": 1.167, + "step": 338000 + }, + { + "epoch": 1.28, + "learning_rate": 2.865826363380733e-05, + "loss": 1.1655, + "step": 338500 + }, + { + "epoch": 1.28, + "learning_rate": 2.8626739650991686e-05, + "loss": 1.1576, + "step": 339000 + }, + { + "epoch": 1.28, + "learning_rate": 2.8595215668176033e-05, + "loss": 1.1653, + "step": 339500 + }, + { + "epoch": 1.29, + "learning_rate": 2.856369168536039e-05, + "loss": 1.1618, + "step": 340000 + }, + { + "epoch": 1.29, + "learning_rate": 2.8532167702544743e-05, + "loss": 1.1714, + "step": 340500 + }, + { + "epoch": 1.29, + "learning_rate": 2.8500643719729097e-05, + "loss": 1.1704, + "step": 341000 + }, + { + "epoch": 1.29, + "learning_rate": 2.8469119736913453e-05, + "loss": 1.1672, + "step": 341500 + }, + { + "epoch": 1.29, + "learning_rate": 2.8437595754097807e-05, + "loss": 1.1655, + "step": 342000 + }, + { + "epoch": 1.3, + "learning_rate": 2.8406071771282157e-05, + "loss": 1.1657, + "step": 342500 + }, + { + "epoch": 1.3, + "learning_rate": 2.837454778846651e-05, + "loss": 1.1631, + "step": 343000 + }, + { + "epoch": 1.3, + "learning_rate": 2.8343023805650864e-05, + "loss": 1.1749, + "step": 343500 + }, + { + "epoch": 1.3, + "learning_rate": 2.831149982283522e-05, + "loss": 1.1672, + "step": 344000 + }, + { + "epoch": 1.3, + "learning_rate": 2.8279975840019574e-05, + "loss": 1.154, + "step": 344500 + }, + { + "epoch": 1.31, + "learning_rate": 2.8248451857203924e-05, + "loss": 1.1668, + "step": 345000 + }, + { + "epoch": 1.31, + "learning_rate": 2.8216927874388277e-05, + "loss": 1.1682, + "step": 345500 + }, + { + "epoch": 1.31, + "learning_rate": 2.818540389157263e-05, + "loss": 1.1652, + "step": 346000 + }, + { + "epoch": 1.31, + "learning_rate": 2.8153879908756984e-05, + "loss": 1.1582, + "step": 346500 + }, + { + "epoch": 1.31, + "learning_rate": 2.812235592594134e-05, + "loss": 1.1633, + "step": 347000 + }, + { + "epoch": 1.31, + "learning_rate": 2.8090831943125695e-05, + "loss": 1.1594, + "step": 347500 + }, + { + "epoch": 1.32, + "learning_rate": 2.8059307960310045e-05, + "loss": 1.1639, + "step": 348000 + }, + { + "epoch": 1.32, + "learning_rate": 2.8027783977494398e-05, + "loss": 1.1659, + "step": 348500 + }, + { + "epoch": 1.32, + "learning_rate": 2.799625999467875e-05, + "loss": 1.1676, + "step": 349000 + }, + { + "epoch": 1.32, + "learning_rate": 2.796473601186311e-05, + "loss": 1.1583, + "step": 349500 + }, + { + "epoch": 1.32, + "learning_rate": 2.7933212029047462e-05, + "loss": 1.1621, + "step": 350000 + }, + { + "epoch": 1.33, + "learning_rate": 2.7901688046231812e-05, + "loss": 1.1594, + "step": 350500 + }, + { + "epoch": 1.33, + "learning_rate": 2.7870164063416165e-05, + "loss": 1.161, + "step": 351000 + }, + { + "epoch": 1.33, + "learning_rate": 2.783864008060052e-05, + "loss": 1.1571, + "step": 351500 + }, + { + "epoch": 1.33, + "learning_rate": 2.7807116097784876e-05, + "loss": 1.1655, + "step": 352000 + }, + { + "epoch": 1.33, + "learning_rate": 2.777559211496923e-05, + "loss": 1.1636, + "step": 352500 + }, + { + "epoch": 1.34, + "learning_rate": 2.7744068132153582e-05, + "loss": 1.1568, + "step": 353000 + }, + { + "epoch": 1.34, + "learning_rate": 2.7712544149337932e-05, + "loss": 1.161, + "step": 353500 + }, + { + "epoch": 1.34, + "learning_rate": 2.7681020166522286e-05, + "loss": 1.1611, + "step": 354000 + }, + { + "epoch": 1.34, + "learning_rate": 2.7649496183706643e-05, + "loss": 1.156, + "step": 354500 + }, + { + "epoch": 1.34, + "learning_rate": 2.7617972200890996e-05, + "loss": 1.1614, + "step": 355000 + }, + { + "epoch": 1.34, + "learning_rate": 2.758644821807535e-05, + "loss": 1.1648, + "step": 355500 + }, + { + "epoch": 1.35, + "learning_rate": 2.7554924235259706e-05, + "loss": 1.1559, + "step": 356000 + }, + { + "epoch": 1.35, + "learning_rate": 2.7523400252444053e-05, + "loss": 1.1606, + "step": 356500 + }, + { + "epoch": 1.35, + "learning_rate": 2.749187626962841e-05, + "loss": 1.1581, + "step": 357000 + }, + { + "epoch": 1.35, + "learning_rate": 2.7460352286812763e-05, + "loss": 1.1583, + "step": 357500 + }, + { + "epoch": 1.35, + "learning_rate": 2.7428828303997117e-05, + "loss": 1.1593, + "step": 358000 + }, + { + "epoch": 1.36, + "learning_rate": 2.739730432118147e-05, + "loss": 1.1539, + "step": 358500 + }, + { + "epoch": 1.36, + "learning_rate": 2.736578033836582e-05, + "loss": 1.1579, + "step": 359000 + }, + { + "epoch": 1.36, + "learning_rate": 2.7334256355550174e-05, + "loss": 1.1578, + "step": 359500 + }, + { + "epoch": 1.36, + "learning_rate": 2.730273237273453e-05, + "loss": 1.16, + "step": 360000 + }, + { + "epoch": 1.36, + "learning_rate": 2.7271208389918884e-05, + "loss": 1.1641, + "step": 360500 + }, + { + "epoch": 1.37, + "learning_rate": 2.7239684407103237e-05, + "loss": 1.1561, + "step": 361000 + }, + { + "epoch": 1.37, + "learning_rate": 2.7208160424287594e-05, + "loss": 1.164, + "step": 361500 + }, + { + "epoch": 1.37, + "learning_rate": 2.717663644147194e-05, + "loss": 1.153, + "step": 362000 + }, + { + "epoch": 1.37, + "learning_rate": 2.7145112458656298e-05, + "loss": 1.1539, + "step": 362500 + }, + { + "epoch": 1.37, + "learning_rate": 2.711358847584065e-05, + "loss": 1.1532, + "step": 363000 + }, + { + "epoch": 1.38, + "learning_rate": 2.7082064493025005e-05, + "loss": 1.1611, + "step": 363500 + }, + { + "epoch": 1.38, + "learning_rate": 2.705054051020936e-05, + "loss": 1.1558, + "step": 364000 + }, + { + "epoch": 1.38, + "learning_rate": 2.7019016527393715e-05, + "loss": 1.1565, + "step": 364500 + }, + { + "epoch": 1.38, + "learning_rate": 2.6987492544578065e-05, + "loss": 1.1502, + "step": 365000 + }, + { + "epoch": 1.38, + "learning_rate": 2.6955968561762418e-05, + "loss": 1.1487, + "step": 365500 + }, + { + "epoch": 1.38, + "learning_rate": 2.6924444578946772e-05, + "loss": 1.1519, + "step": 366000 + }, + { + "epoch": 1.39, + "learning_rate": 2.689292059613113e-05, + "loss": 1.1592, + "step": 366500 + }, + { + "epoch": 1.39, + "learning_rate": 2.6861396613315482e-05, + "loss": 1.1537, + "step": 367000 + }, + { + "epoch": 1.39, + "learning_rate": 2.6829872630499832e-05, + "loss": 1.1574, + "step": 367500 + }, + { + "epoch": 1.39, + "learning_rate": 2.6798348647684185e-05, + "loss": 1.153, + "step": 368000 + }, + { + "epoch": 1.39, + "learning_rate": 2.676682466486854e-05, + "loss": 1.1462, + "step": 368500 + }, + { + "epoch": 1.4, + "learning_rate": 2.6735300682052892e-05, + "loss": 1.1511, + "step": 369000 + }, + { + "epoch": 1.4, + "learning_rate": 2.670377669923725e-05, + "loss": 1.1518, + "step": 369500 + }, + { + "epoch": 1.4, + "learning_rate": 2.6672252716421603e-05, + "loss": 1.1539, + "step": 370000 + }, + { + "epoch": 1.4, + "learning_rate": 2.6640728733605953e-05, + "loss": 1.1613, + "step": 370500 + }, + { + "epoch": 1.4, + "learning_rate": 2.6609204750790306e-05, + "loss": 1.1626, + "step": 371000 + }, + { + "epoch": 1.41, + "learning_rate": 2.657768076797466e-05, + "loss": 1.1557, + "step": 371500 + }, + { + "epoch": 1.41, + "learning_rate": 2.6546156785159016e-05, + "loss": 1.1559, + "step": 372000 + }, + { + "epoch": 1.41, + "learning_rate": 2.651463280234337e-05, + "loss": 1.1476, + "step": 372500 + }, + { + "epoch": 1.41, + "learning_rate": 2.648310881952772e-05, + "loss": 1.146, + "step": 373000 + }, + { + "epoch": 1.41, + "learning_rate": 2.6451584836712073e-05, + "loss": 1.1551, + "step": 373500 + }, + { + "epoch": 1.41, + "learning_rate": 2.6420060853896427e-05, + "loss": 1.1516, + "step": 374000 + }, + { + "epoch": 1.42, + "learning_rate": 2.6388536871080784e-05, + "loss": 1.1531, + "step": 374500 + }, + { + "epoch": 1.42, + "learning_rate": 2.6357012888265137e-05, + "loss": 1.1548, + "step": 375000 + }, + { + "epoch": 1.42, + "learning_rate": 2.632548890544949e-05, + "loss": 1.1588, + "step": 375500 + }, + { + "epoch": 1.42, + "learning_rate": 2.629396492263384e-05, + "loss": 1.1569, + "step": 376000 + }, + { + "epoch": 1.42, + "learning_rate": 2.6262440939818194e-05, + "loss": 1.1547, + "step": 376500 + }, + { + "epoch": 1.43, + "learning_rate": 2.623091695700255e-05, + "loss": 1.1444, + "step": 377000 + }, + { + "epoch": 1.43, + "learning_rate": 2.6199392974186904e-05, + "loss": 1.1588, + "step": 377500 + }, + { + "epoch": 1.43, + "learning_rate": 2.6167868991371258e-05, + "loss": 1.1513, + "step": 378000 + }, + { + "epoch": 1.43, + "learning_rate": 2.6136345008555614e-05, + "loss": 1.1513, + "step": 378500 + }, + { + "epoch": 1.43, + "learning_rate": 2.610482102573996e-05, + "loss": 1.1551, + "step": 379000 + }, + { + "epoch": 1.44, + "learning_rate": 2.6073297042924314e-05, + "loss": 1.1521, + "step": 379500 + }, + { + "epoch": 1.44, + "learning_rate": 2.604177306010867e-05, + "loss": 1.154, + "step": 380000 + }, + { + "epoch": 1.44, + "learning_rate": 2.6010249077293025e-05, + "loss": 1.157, + "step": 380500 + }, + { + "epoch": 1.44, + "learning_rate": 2.5978725094477378e-05, + "loss": 1.1499, + "step": 381000 + }, + { + "epoch": 1.44, + "learning_rate": 2.5947201111661728e-05, + "loss": 1.1456, + "step": 381500 + }, + { + "epoch": 1.45, + "learning_rate": 2.591567712884608e-05, + "loss": 1.1486, + "step": 382000 + }, + { + "epoch": 1.45, + "learning_rate": 2.588415314603044e-05, + "loss": 1.1507, + "step": 382500 + }, + { + "epoch": 1.45, + "learning_rate": 2.5852629163214792e-05, + "loss": 1.1544, + "step": 383000 + }, + { + "epoch": 1.45, + "learning_rate": 2.5821105180399145e-05, + "loss": 1.1478, + "step": 383500 + }, + { + "epoch": 1.45, + "learning_rate": 2.5789581197583502e-05, + "loss": 1.1473, + "step": 384000 + }, + { + "epoch": 1.45, + "learning_rate": 2.575805721476785e-05, + "loss": 1.1528, + "step": 384500 + }, + { + "epoch": 1.46, + "learning_rate": 2.5726533231952206e-05, + "loss": 1.1443, + "step": 385000 + }, + { + "epoch": 1.46, + "learning_rate": 2.569500924913656e-05, + "loss": 1.1494, + "step": 385500 + }, + { + "epoch": 1.46, + "learning_rate": 2.5663485266320913e-05, + "loss": 1.148, + "step": 386000 + }, + { + "epoch": 1.46, + "learning_rate": 2.563196128350527e-05, + "loss": 1.1475, + "step": 386500 + }, + { + "epoch": 1.46, + "learning_rate": 2.5600437300689623e-05, + "loss": 1.1492, + "step": 387000 + }, + { + "epoch": 1.47, + "learning_rate": 2.5568913317873973e-05, + "loss": 1.1483, + "step": 387500 + }, + { + "epoch": 1.47, + "learning_rate": 2.5537389335058326e-05, + "loss": 1.1499, + "step": 388000 + }, + { + "epoch": 1.47, + "learning_rate": 2.550586535224268e-05, + "loss": 1.1446, + "step": 388500 + }, + { + "epoch": 1.47, + "learning_rate": 2.5474341369427037e-05, + "loss": 1.142, + "step": 389000 + }, + { + "epoch": 1.47, + "learning_rate": 2.544281738661139e-05, + "loss": 1.1489, + "step": 389500 + }, + { + "epoch": 1.48, + "learning_rate": 2.541129340379574e-05, + "loss": 1.1574, + "step": 390000 + }, + { + "epoch": 1.48, + "learning_rate": 2.5379769420980093e-05, + "loss": 1.1388, + "step": 390500 + }, + { + "epoch": 1.48, + "learning_rate": 2.5348245438164447e-05, + "loss": 1.1489, + "step": 391000 + }, + { + "epoch": 1.48, + "learning_rate": 2.53167214553488e-05, + "loss": 1.1436, + "step": 391500 + }, + { + "epoch": 1.48, + "learning_rate": 2.5285197472533157e-05, + "loss": 1.1476, + "step": 392000 + }, + { + "epoch": 1.48, + "learning_rate": 2.525367348971751e-05, + "loss": 1.1431, + "step": 392500 + }, + { + "epoch": 1.49, + "learning_rate": 2.522214950690186e-05, + "loss": 1.1512, + "step": 393000 + }, + { + "epoch": 1.49, + "learning_rate": 2.5190625524086214e-05, + "loss": 1.1446, + "step": 393500 + }, + { + "epoch": 1.49, + "learning_rate": 2.5159101541270568e-05, + "loss": 1.1529, + "step": 394000 + }, + { + "epoch": 1.49, + "learning_rate": 2.5127577558454924e-05, + "loss": 1.1408, + "step": 394500 + }, + { + "epoch": 1.49, + "learning_rate": 2.5096053575639278e-05, + "loss": 1.1436, + "step": 395000 + }, + { + "epoch": 1.5, + "learning_rate": 2.506452959282363e-05, + "loss": 1.1522, + "step": 395500 + }, + { + "epoch": 1.5, + "learning_rate": 2.503300561000798e-05, + "loss": 1.1495, + "step": 396000 + }, + { + "epoch": 1.5, + "learning_rate": 2.5001481627192335e-05, + "loss": 1.1469, + "step": 396500 + }, + { + "epoch": 1.5, + "learning_rate": 2.496995764437669e-05, + "loss": 1.1473, + "step": 397000 + }, + { + "epoch": 1.5, + "learning_rate": 2.4938433661561045e-05, + "loss": 1.1339, + "step": 397500 + }, + { + "epoch": 1.51, + "learning_rate": 2.4906909678745395e-05, + "loss": 1.1437, + "step": 398000 + }, + { + "epoch": 1.51, + "learning_rate": 2.4875385695929752e-05, + "loss": 1.1392, + "step": 398500 + }, + { + "epoch": 1.51, + "learning_rate": 2.4843861713114105e-05, + "loss": 1.1421, + "step": 399000 + }, + { + "epoch": 1.51, + "learning_rate": 2.481233773029846e-05, + "loss": 1.1483, + "step": 399500 + }, + { + "epoch": 1.51, + "learning_rate": 2.4780813747482812e-05, + "loss": 1.137, + "step": 400000 + }, + { + "epoch": 1.52, + "learning_rate": 2.4749289764667162e-05, + "loss": 1.1475, + "step": 400500 + }, + { + "epoch": 1.52, + "learning_rate": 2.471776578185152e-05, + "loss": 1.1365, + "step": 401000 + }, + { + "epoch": 1.52, + "learning_rate": 2.4686241799035872e-05, + "loss": 1.1503, + "step": 401500 + }, + { + "epoch": 1.52, + "learning_rate": 2.4654717816220222e-05, + "loss": 1.149, + "step": 402000 + }, + { + "epoch": 1.52, + "learning_rate": 2.462319383340458e-05, + "loss": 1.147, + "step": 402500 + }, + { + "epoch": 1.52, + "learning_rate": 2.4591669850588933e-05, + "loss": 1.1412, + "step": 403000 + }, + { + "epoch": 1.53, + "learning_rate": 2.4560145867773286e-05, + "loss": 1.1497, + "step": 403500 + }, + { + "epoch": 1.53, + "learning_rate": 2.452862188495764e-05, + "loss": 1.1409, + "step": 404000 + }, + { + "epoch": 1.53, + "learning_rate": 2.4497097902141993e-05, + "loss": 1.1371, + "step": 404500 + }, + { + "epoch": 1.53, + "learning_rate": 2.4465573919326347e-05, + "loss": 1.1587, + "step": 405000 + }, + { + "epoch": 1.53, + "learning_rate": 2.44340499365107e-05, + "loss": 1.144, + "step": 405500 + }, + { + "epoch": 1.54, + "learning_rate": 2.4402525953695053e-05, + "loss": 1.1456, + "step": 406000 + }, + { + "epoch": 1.54, + "learning_rate": 2.4371001970879407e-05, + "loss": 1.1413, + "step": 406500 + }, + { + "epoch": 1.54, + "learning_rate": 2.433947798806376e-05, + "loss": 1.1506, + "step": 407000 + }, + { + "epoch": 1.54, + "learning_rate": 2.4307954005248114e-05, + "loss": 1.1397, + "step": 407500 + }, + { + "epoch": 1.54, + "learning_rate": 2.4276430022432467e-05, + "loss": 1.1385, + "step": 408000 + }, + { + "epoch": 1.55, + "learning_rate": 2.424490603961682e-05, + "loss": 1.1399, + "step": 408500 + }, + { + "epoch": 1.55, + "learning_rate": 2.4213382056801174e-05, + "loss": 1.1373, + "step": 409000 + }, + { + "epoch": 1.55, + "learning_rate": 2.4181858073985527e-05, + "loss": 1.1437, + "step": 409500 + }, + { + "epoch": 1.55, + "learning_rate": 2.415033409116988e-05, + "loss": 1.1429, + "step": 410000 + }, + { + "epoch": 1.55, + "learning_rate": 2.4118810108354234e-05, + "loss": 1.1454, + "step": 410500 + }, + { + "epoch": 1.55, + "learning_rate": 2.4087286125538588e-05, + "loss": 1.1479, + "step": 411000 + }, + { + "epoch": 1.56, + "learning_rate": 2.4055762142722945e-05, + "loss": 1.1334, + "step": 411500 + }, + { + "epoch": 1.56, + "learning_rate": 2.4024238159907295e-05, + "loss": 1.1325, + "step": 412000 + }, + { + "epoch": 1.56, + "learning_rate": 2.3992714177091648e-05, + "loss": 1.1382, + "step": 412500 + }, + { + "epoch": 1.56, + "learning_rate": 2.3961190194276005e-05, + "loss": 1.1407, + "step": 413000 + }, + { + "epoch": 1.56, + "learning_rate": 2.3929666211460355e-05, + "loss": 1.1441, + "step": 413500 + }, + { + "epoch": 1.57, + "learning_rate": 2.389814222864471e-05, + "loss": 1.1395, + "step": 414000 + }, + { + "epoch": 1.57, + "learning_rate": 2.3866618245829065e-05, + "loss": 1.1428, + "step": 414500 + }, + { + "epoch": 1.57, + "learning_rate": 2.3835094263013415e-05, + "loss": 1.1471, + "step": 415000 + }, + { + "epoch": 1.57, + "learning_rate": 2.3803570280197772e-05, + "loss": 1.1455, + "step": 415500 + }, + { + "epoch": 1.57, + "learning_rate": 2.3772046297382122e-05, + "loss": 1.1428, + "step": 416000 + }, + { + "epoch": 1.58, + "learning_rate": 2.3740522314566476e-05, + "loss": 1.1418, + "step": 416500 + }, + { + "epoch": 1.58, + "learning_rate": 2.3708998331750832e-05, + "loss": 1.1344, + "step": 417000 + }, + { + "epoch": 1.58, + "learning_rate": 2.3677474348935182e-05, + "loss": 1.1401, + "step": 417500 + }, + { + "epoch": 1.58, + "learning_rate": 2.3645950366119536e-05, + "loss": 1.1418, + "step": 418000 + }, + { + "epoch": 1.58, + "learning_rate": 2.3614426383303893e-05, + "loss": 1.137, + "step": 418500 + }, + { + "epoch": 1.59, + "learning_rate": 2.3582902400488243e-05, + "loss": 1.1241, + "step": 419000 + }, + { + "epoch": 1.59, + "learning_rate": 2.35513784176726e-05, + "loss": 1.1377, + "step": 419500 + }, + { + "epoch": 1.59, + "learning_rate": 2.3519854434856953e-05, + "loss": 1.1325, + "step": 420000 + }, + { + "epoch": 1.59, + "learning_rate": 2.3488330452041303e-05, + "loss": 1.147, + "step": 420500 + }, + { + "epoch": 1.59, + "learning_rate": 2.345680646922566e-05, + "loss": 1.135, + "step": 421000 + }, + { + "epoch": 1.59, + "learning_rate": 2.3425282486410013e-05, + "loss": 1.1351, + "step": 421500 + }, + { + "epoch": 1.6, + "learning_rate": 2.3393758503594367e-05, + "loss": 1.1491, + "step": 422000 + }, + { + "epoch": 1.6, + "learning_rate": 2.336223452077872e-05, + "loss": 1.1423, + "step": 422500 + }, + { + "epoch": 1.6, + "learning_rate": 2.333071053796307e-05, + "loss": 1.1393, + "step": 423000 + }, + { + "epoch": 1.6, + "learning_rate": 2.3299186555147427e-05, + "loss": 1.1271, + "step": 423500 + }, + { + "epoch": 1.6, + "learning_rate": 2.326766257233178e-05, + "loss": 1.1357, + "step": 424000 + }, + { + "epoch": 1.61, + "learning_rate": 2.323613858951613e-05, + "loss": 1.1344, + "step": 424500 + }, + { + "epoch": 1.61, + "learning_rate": 2.3204614606700487e-05, + "loss": 1.1291, + "step": 425000 + }, + { + "epoch": 1.61, + "learning_rate": 2.317309062388484e-05, + "loss": 1.1355, + "step": 425500 + }, + { + "epoch": 1.61, + "learning_rate": 2.3141566641069194e-05, + "loss": 1.1287, + "step": 426000 + }, + { + "epoch": 1.61, + "learning_rate": 2.3110042658253548e-05, + "loss": 1.135, + "step": 426500 + }, + { + "epoch": 1.62, + "learning_rate": 2.30785186754379e-05, + "loss": 1.1446, + "step": 427000 + }, + { + "epoch": 1.62, + "learning_rate": 2.3046994692622255e-05, + "loss": 1.1432, + "step": 427500 + }, + { + "epoch": 1.62, + "learning_rate": 2.3015470709806608e-05, + "loss": 1.1303, + "step": 428000 + }, + { + "epoch": 1.62, + "learning_rate": 2.298394672699096e-05, + "loss": 1.1442, + "step": 428500 + }, + { + "epoch": 1.62, + "learning_rate": 2.2952422744175315e-05, + "loss": 1.1309, + "step": 429000 + }, + { + "epoch": 1.62, + "learning_rate": 2.2920898761359668e-05, + "loss": 1.1268, + "step": 429500 + }, + { + "epoch": 1.63, + "learning_rate": 2.288937477854402e-05, + "loss": 1.1306, + "step": 430000 + }, + { + "epoch": 1.63, + "learning_rate": 2.2857850795728375e-05, + "loss": 1.1389, + "step": 430500 + }, + { + "epoch": 1.63, + "learning_rate": 2.282632681291273e-05, + "loss": 1.1343, + "step": 431000 + }, + { + "epoch": 1.63, + "learning_rate": 2.2794802830097082e-05, + "loss": 1.1333, + "step": 431500 + }, + { + "epoch": 1.63, + "learning_rate": 2.2763278847281435e-05, + "loss": 1.1383, + "step": 432000 + }, + { + "epoch": 1.64, + "learning_rate": 2.273175486446579e-05, + "loss": 1.1341, + "step": 432500 + }, + { + "epoch": 1.64, + "learning_rate": 2.2700230881650142e-05, + "loss": 1.1343, + "step": 433000 + }, + { + "epoch": 1.64, + "learning_rate": 2.2668706898834496e-05, + "loss": 1.1287, + "step": 433500 + }, + { + "epoch": 1.64, + "learning_rate": 2.2637182916018853e-05, + "loss": 1.1397, + "step": 434000 + }, + { + "epoch": 1.64, + "learning_rate": 2.2605658933203203e-05, + "loss": 1.136, + "step": 434500 + }, + { + "epoch": 1.65, + "learning_rate": 2.2574134950387556e-05, + "loss": 1.1247, + "step": 435000 + }, + { + "epoch": 1.65, + "learning_rate": 2.2542610967571913e-05, + "loss": 1.1283, + "step": 435500 + }, + { + "epoch": 1.65, + "learning_rate": 2.2511086984756263e-05, + "loss": 1.1349, + "step": 436000 + }, + { + "epoch": 1.65, + "learning_rate": 2.2479563001940616e-05, + "loss": 1.1261, + "step": 436500 + }, + { + "epoch": 1.65, + "learning_rate": 2.2448039019124973e-05, + "loss": 1.1287, + "step": 437000 + }, + { + "epoch": 1.66, + "learning_rate": 2.2416515036309323e-05, + "loss": 1.1357, + "step": 437500 + }, + { + "epoch": 1.66, + "learning_rate": 2.238499105349368e-05, + "loss": 1.1362, + "step": 438000 + }, + { + "epoch": 1.66, + "learning_rate": 2.235346707067803e-05, + "loss": 1.1276, + "step": 438500 + }, + { + "epoch": 1.66, + "learning_rate": 2.2321943087862384e-05, + "loss": 1.1325, + "step": 439000 + }, + { + "epoch": 1.66, + "learning_rate": 2.229041910504674e-05, + "loss": 1.1226, + "step": 439500 + }, + { + "epoch": 1.66, + "learning_rate": 2.225889512223109e-05, + "loss": 1.1207, + "step": 440000 + }, + { + "epoch": 1.67, + "learning_rate": 2.2227371139415444e-05, + "loss": 1.1301, + "step": 440500 + }, + { + "epoch": 1.67, + "learning_rate": 2.21958471565998e-05, + "loss": 1.1343, + "step": 441000 + }, + { + "epoch": 1.67, + "learning_rate": 2.216432317378415e-05, + "loss": 1.1279, + "step": 441500 + }, + { + "epoch": 1.67, + "learning_rate": 2.2132799190968508e-05, + "loss": 1.124, + "step": 442000 + }, + { + "epoch": 1.67, + "learning_rate": 2.210127520815286e-05, + "loss": 1.1229, + "step": 442500 + }, + { + "epoch": 1.68, + "learning_rate": 2.206975122533721e-05, + "loss": 1.1377, + "step": 443000 + }, + { + "epoch": 1.68, + "learning_rate": 2.2038227242521568e-05, + "loss": 1.1277, + "step": 443500 + }, + { + "epoch": 1.68, + "learning_rate": 2.200670325970592e-05, + "loss": 1.1284, + "step": 444000 + }, + { + "epoch": 1.68, + "learning_rate": 2.1975179276890275e-05, + "loss": 1.1266, + "step": 444500 + }, + { + "epoch": 1.68, + "learning_rate": 2.1943655294074628e-05, + "loss": 1.1242, + "step": 445000 + }, + { + "epoch": 1.69, + "learning_rate": 2.1912131311258978e-05, + "loss": 1.119, + "step": 445500 + }, + { + "epoch": 1.69, + "learning_rate": 2.1880607328443335e-05, + "loss": 1.1285, + "step": 446000 + }, + { + "epoch": 1.69, + "learning_rate": 2.184908334562769e-05, + "loss": 1.133, + "step": 446500 + }, + { + "epoch": 1.69, + "learning_rate": 2.181755936281204e-05, + "loss": 1.1283, + "step": 447000 + }, + { + "epoch": 1.69, + "learning_rate": 2.1786035379996395e-05, + "loss": 1.1312, + "step": 447500 + }, + { + "epoch": 1.69, + "learning_rate": 2.175451139718075e-05, + "loss": 1.1295, + "step": 448000 + }, + { + "epoch": 1.7, + "learning_rate": 2.1722987414365102e-05, + "loss": 1.1331, + "step": 448500 + }, + { + "epoch": 1.7, + "learning_rate": 2.1691463431549456e-05, + "loss": 1.1275, + "step": 449000 + }, + { + "epoch": 1.7, + "learning_rate": 2.165993944873381e-05, + "loss": 1.1229, + "step": 449500 + }, + { + "epoch": 1.7, + "learning_rate": 2.1628415465918163e-05, + "loss": 1.1197, + "step": 450000 + }, + { + "epoch": 1.7, + "learning_rate": 2.1596891483102516e-05, + "loss": 1.1211, + "step": 450500 + }, + { + "epoch": 1.71, + "learning_rate": 2.156536750028687e-05, + "loss": 1.1299, + "step": 451000 + }, + { + "epoch": 1.71, + "learning_rate": 2.1533843517471223e-05, + "loss": 1.1399, + "step": 451500 + }, + { + "epoch": 1.71, + "learning_rate": 2.1502319534655576e-05, + "loss": 1.1205, + "step": 452000 + }, + { + "epoch": 1.71, + "learning_rate": 2.147079555183993e-05, + "loss": 1.1271, + "step": 452500 + }, + { + "epoch": 1.71, + "learning_rate": 2.1439271569024283e-05, + "loss": 1.1253, + "step": 453000 + }, + { + "epoch": 1.72, + "learning_rate": 2.1407747586208637e-05, + "loss": 1.1318, + "step": 453500 + }, + { + "epoch": 1.72, + "learning_rate": 2.137622360339299e-05, + "loss": 1.124, + "step": 454000 + }, + { + "epoch": 1.72, + "learning_rate": 2.1344699620577343e-05, + "loss": 1.1271, + "step": 454500 + }, + { + "epoch": 1.72, + "learning_rate": 2.1313175637761697e-05, + "loss": 1.1284, + "step": 455000 + }, + { + "epoch": 1.72, + "learning_rate": 2.128165165494605e-05, + "loss": 1.1278, + "step": 455500 + }, + { + "epoch": 1.72, + "learning_rate": 2.1250127672130404e-05, + "loss": 1.1375, + "step": 456000 + }, + { + "epoch": 1.73, + "learning_rate": 2.1218603689314757e-05, + "loss": 1.1224, + "step": 456500 + }, + { + "epoch": 1.73, + "learning_rate": 2.118707970649911e-05, + "loss": 1.1182, + "step": 457000 + }, + { + "epoch": 1.73, + "learning_rate": 2.1155555723683464e-05, + "loss": 1.1236, + "step": 457500 + }, + { + "epoch": 1.73, + "learning_rate": 2.112403174086782e-05, + "loss": 1.1216, + "step": 458000 + }, + { + "epoch": 1.73, + "learning_rate": 2.109250775805217e-05, + "loss": 1.1148, + "step": 458500 + }, + { + "epoch": 1.74, + "learning_rate": 2.1060983775236524e-05, + "loss": 1.1259, + "step": 459000 + }, + { + "epoch": 1.74, + "learning_rate": 2.102945979242088e-05, + "loss": 1.1209, + "step": 459500 + }, + { + "epoch": 1.74, + "learning_rate": 2.099793580960523e-05, + "loss": 1.1278, + "step": 460000 + }, + { + "epoch": 1.74, + "learning_rate": 2.0966411826789588e-05, + "loss": 1.1265, + "step": 460500 + }, + { + "epoch": 1.74, + "learning_rate": 2.0934887843973938e-05, + "loss": 1.1267, + "step": 461000 + }, + { + "epoch": 1.75, + "learning_rate": 2.090336386115829e-05, + "loss": 1.1236, + "step": 461500 + }, + { + "epoch": 1.75, + "learning_rate": 2.087183987834265e-05, + "loss": 1.1223, + "step": 462000 + }, + { + "epoch": 1.75, + "learning_rate": 2.0840315895527e-05, + "loss": 1.1282, + "step": 462500 + }, + { + "epoch": 1.75, + "learning_rate": 2.0808791912711352e-05, + "loss": 1.1196, + "step": 463000 + }, + { + "epoch": 1.75, + "learning_rate": 2.077726792989571e-05, + "loss": 1.1282, + "step": 463500 + }, + { + "epoch": 1.76, + "learning_rate": 2.074574394708006e-05, + "loss": 1.1278, + "step": 464000 + }, + { + "epoch": 1.76, + "learning_rate": 2.0714219964264416e-05, + "loss": 1.1204, + "step": 464500 + }, + { + "epoch": 1.76, + "learning_rate": 2.068269598144877e-05, + "loss": 1.1181, + "step": 465000 + }, + { + "epoch": 1.76, + "learning_rate": 2.065117199863312e-05, + "loss": 1.1217, + "step": 465500 + }, + { + "epoch": 1.76, + "learning_rate": 2.0619648015817476e-05, + "loss": 1.1149, + "step": 466000 + }, + { + "epoch": 1.76, + "learning_rate": 2.058812403300183e-05, + "loss": 1.1294, + "step": 466500 + }, + { + "epoch": 1.77, + "learning_rate": 2.0556600050186183e-05, + "loss": 1.1255, + "step": 467000 + }, + { + "epoch": 1.77, + "learning_rate": 2.0525076067370536e-05, + "loss": 1.115, + "step": 467500 + }, + { + "epoch": 1.77, + "learning_rate": 2.0493552084554886e-05, + "loss": 1.1232, + "step": 468000 + }, + { + "epoch": 1.77, + "learning_rate": 2.0462028101739243e-05, + "loss": 1.1268, + "step": 468500 + }, + { + "epoch": 1.77, + "learning_rate": 2.0430504118923596e-05, + "loss": 1.1208, + "step": 469000 + }, + { + "epoch": 1.78, + "learning_rate": 2.0398980136107947e-05, + "loss": 1.1205, + "step": 469500 + }, + { + "epoch": 1.78, + "learning_rate": 2.0367456153292303e-05, + "loss": 1.127, + "step": 470000 + }, + { + "epoch": 1.78, + "learning_rate": 2.0335932170476657e-05, + "loss": 1.1257, + "step": 470500 + }, + { + "epoch": 1.78, + "learning_rate": 2.030440818766101e-05, + "loss": 1.1201, + "step": 471000 + }, + { + "epoch": 1.78, + "learning_rate": 2.0272884204845364e-05, + "loss": 1.128, + "step": 471500 + }, + { + "epoch": 1.79, + "learning_rate": 2.0241360222029717e-05, + "loss": 1.1132, + "step": 472000 + }, + { + "epoch": 1.79, + "learning_rate": 2.020983623921407e-05, + "loss": 1.1115, + "step": 472500 + }, + { + "epoch": 1.79, + "learning_rate": 2.0178312256398424e-05, + "loss": 1.1164, + "step": 473000 + }, + { + "epoch": 1.79, + "learning_rate": 2.0146788273582777e-05, + "loss": 1.1181, + "step": 473500 + }, + { + "epoch": 1.79, + "learning_rate": 2.011526429076713e-05, + "loss": 1.1184, + "step": 474000 + }, + { + "epoch": 1.79, + "learning_rate": 2.0083740307951484e-05, + "loss": 1.1179, + "step": 474500 + }, + { + "epoch": 1.8, + "learning_rate": 2.0052216325135838e-05, + "loss": 1.1156, + "step": 475000 + }, + { + "epoch": 1.8, + "learning_rate": 2.002069234232019e-05, + "loss": 1.1261, + "step": 475500 + }, + { + "epoch": 1.8, + "learning_rate": 1.9989168359504545e-05, + "loss": 1.1247, + "step": 476000 + }, + { + "epoch": 1.8, + "learning_rate": 1.9957644376688898e-05, + "loss": 1.1248, + "step": 476500 + }, + { + "epoch": 1.8, + "learning_rate": 1.992612039387325e-05, + "loss": 1.1202, + "step": 477000 + }, + { + "epoch": 1.81, + "learning_rate": 1.9894596411057605e-05, + "loss": 1.1165, + "step": 477500 + }, + { + "epoch": 1.81, + "learning_rate": 1.986307242824196e-05, + "loss": 1.1134, + "step": 478000 + }, + { + "epoch": 1.81, + "learning_rate": 1.9831548445426312e-05, + "loss": 1.1234, + "step": 478500 + }, + { + "epoch": 1.81, + "learning_rate": 1.9800024462610665e-05, + "loss": 1.1199, + "step": 479000 + }, + { + "epoch": 1.81, + "learning_rate": 1.976850047979502e-05, + "loss": 1.1192, + "step": 479500 + }, + { + "epoch": 1.82, + "learning_rate": 1.9736976496979372e-05, + "loss": 1.1215, + "step": 480000 + }, + { + "epoch": 1.82, + "learning_rate": 1.970545251416373e-05, + "loss": 1.1169, + "step": 480500 + }, + { + "epoch": 1.82, + "learning_rate": 1.967392853134808e-05, + "loss": 1.1111, + "step": 481000 + }, + { + "epoch": 1.82, + "learning_rate": 1.9642404548532432e-05, + "loss": 1.1167, + "step": 481500 + }, + { + "epoch": 1.82, + "learning_rate": 1.961088056571679e-05, + "loss": 1.1197, + "step": 482000 + }, + { + "epoch": 1.83, + "learning_rate": 1.957935658290114e-05, + "loss": 1.1203, + "step": 482500 + }, + { + "epoch": 1.83, + "learning_rate": 1.9547832600085496e-05, + "loss": 1.1139, + "step": 483000 + }, + { + "epoch": 1.83, + "learning_rate": 1.9516308617269846e-05, + "loss": 1.1198, + "step": 483500 + }, + { + "epoch": 1.83, + "learning_rate": 1.94847846344542e-05, + "loss": 1.117, + "step": 484000 + }, + { + "epoch": 1.83, + "learning_rate": 1.9453260651638556e-05, + "loss": 1.1217, + "step": 484500 + }, + { + "epoch": 1.83, + "learning_rate": 1.9421736668822906e-05, + "loss": 1.1194, + "step": 485000 + }, + { + "epoch": 1.84, + "learning_rate": 1.939021268600726e-05, + "loss": 1.1257, + "step": 485500 + }, + { + "epoch": 1.84, + "learning_rate": 1.9358688703191617e-05, + "loss": 1.109, + "step": 486000 + }, + { + "epoch": 1.84, + "learning_rate": 1.9327164720375967e-05, + "loss": 1.1206, + "step": 486500 + }, + { + "epoch": 1.84, + "learning_rate": 1.9295640737560324e-05, + "loss": 1.1119, + "step": 487000 + }, + { + "epoch": 1.84, + "learning_rate": 1.9264116754744677e-05, + "loss": 1.1128, + "step": 487500 + }, + { + "epoch": 1.85, + "learning_rate": 1.9232592771929027e-05, + "loss": 1.1165, + "step": 488000 + }, + { + "epoch": 1.85, + "learning_rate": 1.9201068789113384e-05, + "loss": 1.1098, + "step": 488500 + }, + { + "epoch": 1.85, + "learning_rate": 1.9169544806297737e-05, + "loss": 1.1197, + "step": 489000 + }, + { + "epoch": 1.85, + "learning_rate": 1.9138020823482087e-05, + "loss": 1.1133, + "step": 489500 + }, + { + "epoch": 1.85, + "learning_rate": 1.9106496840666444e-05, + "loss": 1.1117, + "step": 490000 + }, + { + "epoch": 1.86, + "learning_rate": 1.9074972857850798e-05, + "loss": 1.1185, + "step": 490500 + }, + { + "epoch": 1.86, + "learning_rate": 1.904344887503515e-05, + "loss": 1.1189, + "step": 491000 + }, + { + "epoch": 1.86, + "learning_rate": 1.9011924892219504e-05, + "loss": 1.1308, + "step": 491500 + }, + { + "epoch": 1.86, + "learning_rate": 1.8980400909403855e-05, + "loss": 1.112, + "step": 492000 + }, + { + "epoch": 1.86, + "learning_rate": 1.894887692658821e-05, + "loss": 1.1164, + "step": 492500 + }, + { + "epoch": 1.86, + "learning_rate": 1.8917352943772565e-05, + "loss": 1.1147, + "step": 493000 + }, + { + "epoch": 1.87, + "learning_rate": 1.8885828960956918e-05, + "loss": 1.1175, + "step": 493500 + }, + { + "epoch": 1.87, + "learning_rate": 1.885430497814127e-05, + "loss": 1.1223, + "step": 494000 + }, + { + "epoch": 1.87, + "learning_rate": 1.8822780995325625e-05, + "loss": 1.1118, + "step": 494500 + }, + { + "epoch": 1.87, + "learning_rate": 1.879125701250998e-05, + "loss": 1.1173, + "step": 495000 + }, + { + "epoch": 1.87, + "learning_rate": 1.8759733029694332e-05, + "loss": 1.1157, + "step": 495500 + }, + { + "epoch": 1.88, + "learning_rate": 1.8728209046878685e-05, + "loss": 1.1181, + "step": 496000 + }, + { + "epoch": 1.88, + "learning_rate": 1.869668506406304e-05, + "loss": 1.109, + "step": 496500 + }, + { + "epoch": 1.88, + "learning_rate": 1.8665161081247392e-05, + "loss": 1.1123, + "step": 497000 + }, + { + "epoch": 1.88, + "learning_rate": 1.8633637098431746e-05, + "loss": 1.1169, + "step": 497500 + }, + { + "epoch": 1.88, + "learning_rate": 1.86021131156161e-05, + "loss": 1.1142, + "step": 498000 + }, + { + "epoch": 1.89, + "learning_rate": 1.8570589132800453e-05, + "loss": 1.1084, + "step": 498500 + }, + { + "epoch": 1.89, + "learning_rate": 1.8539065149984806e-05, + "loss": 1.1145, + "step": 499000 + }, + { + "epoch": 1.89, + "learning_rate": 1.850754116716916e-05, + "loss": 1.1159, + "step": 499500 + }, + { + "epoch": 1.89, + "learning_rate": 1.8476017184353513e-05, + "loss": 1.1052, + "step": 500000 + }, + { + "epoch": 1.89, + "learning_rate": 1.8444493201537866e-05, + "loss": 1.1137, + "step": 500500 + }, + { + "epoch": 1.9, + "learning_rate": 1.841296921872222e-05, + "loss": 1.1116, + "step": 501000 + }, + { + "epoch": 1.9, + "learning_rate": 1.8381445235906573e-05, + "loss": 1.1164, + "step": 501500 + }, + { + "epoch": 1.9, + "learning_rate": 1.8349921253090927e-05, + "loss": 1.1186, + "step": 502000 + }, + { + "epoch": 1.9, + "learning_rate": 1.831839727027528e-05, + "loss": 1.1086, + "step": 502500 + }, + { + "epoch": 1.9, + "learning_rate": 1.8286873287459637e-05, + "loss": 1.1108, + "step": 503000 + }, + { + "epoch": 1.9, + "learning_rate": 1.8255349304643987e-05, + "loss": 1.1076, + "step": 503500 + }, + { + "epoch": 1.91, + "learning_rate": 1.822382532182834e-05, + "loss": 1.1165, + "step": 504000 + }, + { + "epoch": 1.91, + "learning_rate": 1.8192301339012697e-05, + "loss": 1.1067, + "step": 504500 + }, + { + "epoch": 1.91, + "learning_rate": 1.8160777356197047e-05, + "loss": 1.1057, + "step": 505000 + }, + { + "epoch": 1.91, + "learning_rate": 1.8129253373381404e-05, + "loss": 1.1001, + "step": 505500 + }, + { + "epoch": 1.91, + "learning_rate": 1.8097729390565754e-05, + "loss": 1.1077, + "step": 506000 + }, + { + "epoch": 1.92, + "learning_rate": 1.8066205407750108e-05, + "loss": 1.1201, + "step": 506500 + }, + { + "epoch": 1.92, + "learning_rate": 1.8034681424934464e-05, + "loss": 1.1105, + "step": 507000 + }, + { + "epoch": 1.92, + "learning_rate": 1.8003157442118814e-05, + "loss": 1.1053, + "step": 507500 + }, + { + "epoch": 1.92, + "learning_rate": 1.7971633459303168e-05, + "loss": 1.1007, + "step": 508000 + }, + { + "epoch": 1.92, + "learning_rate": 1.7940109476487525e-05, + "loss": 1.1128, + "step": 508500 + }, + { + "epoch": 1.93, + "learning_rate": 1.7908585493671875e-05, + "loss": 1.1074, + "step": 509000 + }, + { + "epoch": 1.93, + "learning_rate": 1.787706151085623e-05, + "loss": 1.0999, + "step": 509500 + }, + { + "epoch": 1.93, + "learning_rate": 1.7845537528040585e-05, + "loss": 1.1167, + "step": 510000 + }, + { + "epoch": 1.93, + "learning_rate": 1.7814013545224935e-05, + "loss": 1.1109, + "step": 510500 + }, + { + "epoch": 1.93, + "learning_rate": 1.7782489562409292e-05, + "loss": 1.115, + "step": 511000 + }, + { + "epoch": 1.93, + "learning_rate": 1.7750965579593645e-05, + "loss": 1.1081, + "step": 511500 + }, + { + "epoch": 1.94, + "learning_rate": 1.7719441596777995e-05, + "loss": 1.1029, + "step": 512000 + }, + { + "epoch": 1.94, + "learning_rate": 1.7687917613962352e-05, + "loss": 1.1092, + "step": 512500 + }, + { + "epoch": 1.94, + "learning_rate": 1.7656393631146706e-05, + "loss": 1.1162, + "step": 513000 + }, + { + "epoch": 1.94, + "learning_rate": 1.762486964833106e-05, + "loss": 1.1019, + "step": 513500 + }, + { + "epoch": 1.94, + "learning_rate": 1.7593345665515412e-05, + "loss": 1.1105, + "step": 514000 + }, + { + "epoch": 1.95, + "learning_rate": 1.7561821682699763e-05, + "loss": 1.1034, + "step": 514500 + }, + { + "epoch": 1.95, + "learning_rate": 1.753029769988412e-05, + "loss": 1.1069, + "step": 515000 + }, + { + "epoch": 1.95, + "learning_rate": 1.7498773717068473e-05, + "loss": 1.1044, + "step": 515500 + }, + { + "epoch": 1.95, + "learning_rate": 1.7467249734252826e-05, + "loss": 1.1081, + "step": 516000 + }, + { + "epoch": 1.95, + "learning_rate": 1.743572575143718e-05, + "loss": 1.109, + "step": 516500 + }, + { + "epoch": 1.96, + "learning_rate": 1.7404201768621533e-05, + "loss": 1.1102, + "step": 517000 + }, + { + "epoch": 1.96, + "learning_rate": 1.7372677785805887e-05, + "loss": 1.0989, + "step": 517500 + }, + { + "epoch": 1.96, + "learning_rate": 1.734115380299024e-05, + "loss": 1.0978, + "step": 518000 + }, + { + "epoch": 1.96, + "learning_rate": 1.7309629820174593e-05, + "loss": 1.1085, + "step": 518500 + }, + { + "epoch": 1.96, + "learning_rate": 1.7278105837358947e-05, + "loss": 1.1097, + "step": 519000 + }, + { + "epoch": 1.97, + "learning_rate": 1.72465818545433e-05, + "loss": 1.1037, + "step": 519500 + }, + { + "epoch": 1.97, + "learning_rate": 1.7215057871727654e-05, + "loss": 1.1099, + "step": 520000 + }, + { + "epoch": 1.97, + "learning_rate": 1.7183533888912007e-05, + "loss": 1.1201, + "step": 520500 + }, + { + "epoch": 1.97, + "learning_rate": 1.715200990609636e-05, + "loss": 1.1012, + "step": 521000 + }, + { + "epoch": 1.97, + "learning_rate": 1.7120485923280714e-05, + "loss": 1.1104, + "step": 521500 + }, + { + "epoch": 1.97, + "learning_rate": 1.7088961940465067e-05, + "loss": 1.11, + "step": 522000 + }, + { + "epoch": 1.98, + "learning_rate": 1.705743795764942e-05, + "loss": 1.1128, + "step": 522500 + }, + { + "epoch": 1.98, + "learning_rate": 1.7025913974833774e-05, + "loss": 1.1035, + "step": 523000 + }, + { + "epoch": 1.98, + "learning_rate": 1.6994389992018128e-05, + "loss": 1.0976, + "step": 523500 + }, + { + "epoch": 1.98, + "learning_rate": 1.696286600920248e-05, + "loss": 1.1013, + "step": 524000 + }, + { + "epoch": 1.98, + "learning_rate": 1.6931342026386835e-05, + "loss": 1.1072, + "step": 524500 + }, + { + "epoch": 1.99, + "learning_rate": 1.6899818043571188e-05, + "loss": 1.1065, + "step": 525000 + }, + { + "epoch": 1.99, + "learning_rate": 1.6868294060755545e-05, + "loss": 1.0969, + "step": 525500 + }, + { + "epoch": 1.99, + "learning_rate": 1.6836770077939895e-05, + "loss": 1.1157, + "step": 526000 + }, + { + "epoch": 1.99, + "learning_rate": 1.680524609512425e-05, + "loss": 1.1016, + "step": 526500 + }, + { + "epoch": 1.99, + "learning_rate": 1.6773722112308605e-05, + "loss": 1.0994, + "step": 527000 + }, + { + "epoch": 2.0, + "learning_rate": 1.6742198129492955e-05, + "loss": 1.1045, + "step": 527500 + }, + { + "epoch": 2.0, + "learning_rate": 1.671067414667731e-05, + "loss": 1.1107, + "step": 528000 + }, + { + "epoch": 2.0, + "learning_rate": 1.6679150163861666e-05, + "loss": 1.0976, + "step": 528500 + }, + { + "epoch": 2.0, + "learning_rate": 1.6647626181046016e-05, + "loss": 1.0981, + "step": 529000 + }, + { + "epoch": 2.0, + "learning_rate": 1.6616102198230372e-05, + "loss": 1.0969, + "step": 529500 + }, + { + "epoch": 2.0, + "learning_rate": 1.6584578215414722e-05, + "loss": 1.1059, + "step": 530000 + }, + { + "epoch": 2.01, + "learning_rate": 1.6553054232599076e-05, + "loss": 1.1098, + "step": 530500 + }, + { + "epoch": 2.01, + "learning_rate": 1.6521530249783433e-05, + "loss": 1.0973, + "step": 531000 + }, + { + "epoch": 2.01, + "learning_rate": 1.6490006266967783e-05, + "loss": 1.0941, + "step": 531500 + }, + { + "epoch": 2.01, + "learning_rate": 1.645848228415214e-05, + "loss": 1.1041, + "step": 532000 + }, + { + "epoch": 2.01, + "learning_rate": 1.6426958301336493e-05, + "loss": 1.0995, + "step": 532500 + }, + { + "epoch": 2.02, + "learning_rate": 1.6395434318520843e-05, + "loss": 1.1055, + "step": 533000 + }, + { + "epoch": 2.02, + "learning_rate": 1.63639103357052e-05, + "loss": 1.0983, + "step": 533500 + }, + { + "epoch": 2.02, + "learning_rate": 1.6332386352889553e-05, + "loss": 1.0968, + "step": 534000 + }, + { + "epoch": 2.02, + "learning_rate": 1.6300862370073903e-05, + "loss": 1.1039, + "step": 534500 + }, + { + "epoch": 2.02, + "learning_rate": 1.626933838725826e-05, + "loss": 1.0923, + "step": 535000 + }, + { + "epoch": 2.03, + "learning_rate": 1.6237814404442614e-05, + "loss": 1.0991, + "step": 535500 + }, + { + "epoch": 2.03, + "learning_rate": 1.6206290421626967e-05, + "loss": 1.1015, + "step": 536000 + }, + { + "epoch": 2.03, + "learning_rate": 1.617476643881132e-05, + "loss": 1.1005, + "step": 536500 + }, + { + "epoch": 2.03, + "learning_rate": 1.614324245599567e-05, + "loss": 1.1046, + "step": 537000 + }, + { + "epoch": 2.03, + "learning_rate": 1.6111718473180027e-05, + "loss": 1.0857, + "step": 537500 + }, + { + "epoch": 2.04, + "learning_rate": 1.608019449036438e-05, + "loss": 1.0974, + "step": 538000 + }, + { + "epoch": 2.04, + "learning_rate": 1.6048670507548734e-05, + "loss": 1.0986, + "step": 538500 + }, + { + "epoch": 2.04, + "learning_rate": 1.6017146524733088e-05, + "loss": 1.0967, + "step": 539000 + }, + { + "epoch": 2.04, + "learning_rate": 1.598562254191744e-05, + "loss": 1.0952, + "step": 539500 + }, + { + "epoch": 2.04, + "learning_rate": 1.5954098559101795e-05, + "loss": 1.0955, + "step": 540000 + }, + { + "epoch": 2.04, + "learning_rate": 1.5922574576286148e-05, + "loss": 1.0955, + "step": 540500 + }, + { + "epoch": 2.05, + "learning_rate": 1.58910505934705e-05, + "loss": 1.098, + "step": 541000 + }, + { + "epoch": 2.05, + "learning_rate": 1.5859526610654855e-05, + "loss": 1.0937, + "step": 541500 + }, + { + "epoch": 2.05, + "learning_rate": 1.5828002627839208e-05, + "loss": 1.09, + "step": 542000 + }, + { + "epoch": 2.05, + "learning_rate": 1.5796478645023562e-05, + "loss": 1.0914, + "step": 542500 + }, + { + "epoch": 2.05, + "learning_rate": 1.5764954662207915e-05, + "loss": 1.0964, + "step": 543000 + }, + { + "epoch": 2.06, + "learning_rate": 1.573343067939227e-05, + "loss": 1.092, + "step": 543500 + }, + { + "epoch": 2.06, + "learning_rate": 1.5701906696576622e-05, + "loss": 1.0949, + "step": 544000 + }, + { + "epoch": 2.06, + "learning_rate": 1.5670382713760975e-05, + "loss": 1.0893, + "step": 544500 + }, + { + "epoch": 2.06, + "learning_rate": 1.563885873094533e-05, + "loss": 1.1014, + "step": 545000 + }, + { + "epoch": 2.06, + "learning_rate": 1.5607334748129682e-05, + "loss": 1.089, + "step": 545500 + }, + { + "epoch": 2.07, + "learning_rate": 1.5575810765314036e-05, + "loss": 1.088, + "step": 546000 + }, + { + "epoch": 2.07, + "learning_rate": 1.554428678249839e-05, + "loss": 1.1041, + "step": 546500 + }, + { + "epoch": 2.07, + "learning_rate": 1.5512762799682743e-05, + "loss": 1.0843, + "step": 547000 + }, + { + "epoch": 2.07, + "learning_rate": 1.5481238816867096e-05, + "loss": 1.1017, + "step": 547500 + }, + { + "epoch": 2.07, + "learning_rate": 1.5449714834051453e-05, + "loss": 1.0963, + "step": 548000 + }, + { + "epoch": 2.07, + "learning_rate": 1.5418190851235803e-05, + "loss": 1.0915, + "step": 548500 + }, + { + "epoch": 2.08, + "learning_rate": 1.5386666868420156e-05, + "loss": 1.0932, + "step": 549000 + }, + { + "epoch": 2.08, + "learning_rate": 1.5355142885604513e-05, + "loss": 1.0918, + "step": 549500 + }, + { + "epoch": 2.08, + "learning_rate": 1.5323618902788863e-05, + "loss": 1.0964, + "step": 550000 + }, + { + "epoch": 2.08, + "learning_rate": 1.5292094919973217e-05, + "loss": 1.0858, + "step": 550500 + }, + { + "epoch": 2.08, + "learning_rate": 1.5260570937157574e-05, + "loss": 1.0971, + "step": 551000 + }, + { + "epoch": 2.09, + "learning_rate": 1.5229046954341925e-05, + "loss": 1.0964, + "step": 551500 + }, + { + "epoch": 2.09, + "learning_rate": 1.5197522971526279e-05, + "loss": 1.0904, + "step": 552000 + }, + { + "epoch": 2.09, + "learning_rate": 1.516599898871063e-05, + "loss": 1.099, + "step": 552500 + }, + { + "epoch": 2.09, + "learning_rate": 1.5134475005894986e-05, + "loss": 1.0942, + "step": 553000 + }, + { + "epoch": 2.09, + "learning_rate": 1.5102951023079339e-05, + "loss": 1.0921, + "step": 553500 + }, + { + "epoch": 2.1, + "learning_rate": 1.507142704026369e-05, + "loss": 1.0842, + "step": 554000 + }, + { + "epoch": 2.1, + "learning_rate": 1.5039903057448046e-05, + "loss": 1.0905, + "step": 554500 + }, + { + "epoch": 2.1, + "learning_rate": 1.5008379074632401e-05, + "loss": 1.0972, + "step": 555000 + }, + { + "epoch": 2.1, + "learning_rate": 1.4976855091816753e-05, + "loss": 1.0933, + "step": 555500 + }, + { + "epoch": 2.1, + "learning_rate": 1.4945331109001106e-05, + "loss": 1.0964, + "step": 556000 + }, + { + "epoch": 2.11, + "learning_rate": 1.4913807126185461e-05, + "loss": 1.0975, + "step": 556500 + }, + { + "epoch": 2.11, + "learning_rate": 1.4882283143369813e-05, + "loss": 1.1022, + "step": 557000 + }, + { + "epoch": 2.11, + "learning_rate": 1.4850759160554168e-05, + "loss": 1.0903, + "step": 557500 + }, + { + "epoch": 2.11, + "learning_rate": 1.4819235177738522e-05, + "loss": 1.0947, + "step": 558000 + }, + { + "epoch": 2.11, + "learning_rate": 1.4787711194922873e-05, + "loss": 1.0968, + "step": 558500 + }, + { + "epoch": 2.11, + "learning_rate": 1.4756187212107228e-05, + "loss": 1.0953, + "step": 559000 + }, + { + "epoch": 2.12, + "learning_rate": 1.472466322929158e-05, + "loss": 1.0967, + "step": 559500 + }, + { + "epoch": 2.12, + "learning_rate": 1.4693139246475934e-05, + "loss": 1.0867, + "step": 560000 + }, + { + "epoch": 2.12, + "learning_rate": 1.4661615263660289e-05, + "loss": 1.0931, + "step": 560500 + }, + { + "epoch": 2.12, + "learning_rate": 1.463009128084464e-05, + "loss": 1.0917, + "step": 561000 + }, + { + "epoch": 2.12, + "learning_rate": 1.4598567298028996e-05, + "loss": 1.0869, + "step": 561500 + }, + { + "epoch": 2.13, + "learning_rate": 1.4567043315213349e-05, + "loss": 1.089, + "step": 562000 + }, + { + "epoch": 2.13, + "learning_rate": 1.4535519332397701e-05, + "loss": 1.0868, + "step": 562500 + }, + { + "epoch": 2.13, + "learning_rate": 1.4503995349582056e-05, + "loss": 1.0973, + "step": 563000 + }, + { + "epoch": 2.13, + "learning_rate": 1.447247136676641e-05, + "loss": 1.0955, + "step": 563500 + }, + { + "epoch": 2.13, + "learning_rate": 1.4440947383950761e-05, + "loss": 1.0871, + "step": 564000 + }, + { + "epoch": 2.14, + "learning_rate": 1.4409423401135116e-05, + "loss": 1.0893, + "step": 564500 + }, + { + "epoch": 2.14, + "learning_rate": 1.4377899418319471e-05, + "loss": 1.0931, + "step": 565000 + }, + { + "epoch": 2.14, + "learning_rate": 1.4346375435503823e-05, + "loss": 1.0915, + "step": 565500 + }, + { + "epoch": 2.14, + "learning_rate": 1.4314851452688177e-05, + "loss": 1.0816, + "step": 566000 + }, + { + "epoch": 2.14, + "learning_rate": 1.4283327469872532e-05, + "loss": 1.0903, + "step": 566500 + }, + { + "epoch": 2.14, + "learning_rate": 1.4251803487056883e-05, + "loss": 1.0905, + "step": 567000 + }, + { + "epoch": 2.15, + "learning_rate": 1.4220279504241239e-05, + "loss": 1.0958, + "step": 567500 + }, + { + "epoch": 2.15, + "learning_rate": 1.418875552142559e-05, + "loss": 1.0923, + "step": 568000 + }, + { + "epoch": 2.15, + "learning_rate": 1.4157231538609944e-05, + "loss": 1.0845, + "step": 568500 + }, + { + "epoch": 2.15, + "learning_rate": 1.4125707555794299e-05, + "loss": 1.0887, + "step": 569000 + }, + { + "epoch": 2.15, + "learning_rate": 1.409418357297865e-05, + "loss": 1.0908, + "step": 569500 + }, + { + "epoch": 2.16, + "learning_rate": 1.4062659590163004e-05, + "loss": 1.0826, + "step": 570000 + }, + { + "epoch": 2.16, + "learning_rate": 1.403113560734736e-05, + "loss": 1.0899, + "step": 570500 + }, + { + "epoch": 2.16, + "learning_rate": 1.3999611624531711e-05, + "loss": 1.0909, + "step": 571000 + }, + { + "epoch": 2.16, + "learning_rate": 1.3968087641716066e-05, + "loss": 1.0922, + "step": 571500 + }, + { + "epoch": 2.16, + "learning_rate": 1.393656365890042e-05, + "loss": 1.0922, + "step": 572000 + }, + { + "epoch": 2.17, + "learning_rate": 1.3905039676084771e-05, + "loss": 1.0844, + "step": 572500 + }, + { + "epoch": 2.17, + "learning_rate": 1.3873515693269126e-05, + "loss": 1.0919, + "step": 573000 + }, + { + "epoch": 2.17, + "learning_rate": 1.3841991710453482e-05, + "loss": 1.0872, + "step": 573500 + }, + { + "epoch": 2.17, + "learning_rate": 1.3810467727637833e-05, + "loss": 1.0807, + "step": 574000 + }, + { + "epoch": 2.17, + "learning_rate": 1.3778943744822187e-05, + "loss": 1.0824, + "step": 574500 + }, + { + "epoch": 2.18, + "learning_rate": 1.3747419762006538e-05, + "loss": 1.0874, + "step": 575000 + }, + { + "epoch": 2.18, + "learning_rate": 1.3715895779190894e-05, + "loss": 1.0842, + "step": 575500 + }, + { + "epoch": 2.18, + "learning_rate": 1.3684371796375247e-05, + "loss": 1.0887, + "step": 576000 + }, + { + "epoch": 2.18, + "learning_rate": 1.3652847813559599e-05, + "loss": 1.0903, + "step": 576500 + }, + { + "epoch": 2.18, + "learning_rate": 1.3621323830743954e-05, + "loss": 1.0836, + "step": 577000 + }, + { + "epoch": 2.18, + "learning_rate": 1.3589799847928309e-05, + "loss": 1.0807, + "step": 577500 + }, + { + "epoch": 2.19, + "learning_rate": 1.355827586511266e-05, + "loss": 1.0868, + "step": 578000 + }, + { + "epoch": 2.19, + "learning_rate": 1.3526751882297014e-05, + "loss": 1.0871, + "step": 578500 + }, + { + "epoch": 2.19, + "learning_rate": 1.349522789948137e-05, + "loss": 1.0815, + "step": 579000 + }, + { + "epoch": 2.19, + "learning_rate": 1.3463703916665721e-05, + "loss": 1.0849, + "step": 579500 + }, + { + "epoch": 2.19, + "learning_rate": 1.3432179933850074e-05, + "loss": 1.0831, + "step": 580000 + }, + { + "epoch": 2.2, + "learning_rate": 1.340065595103443e-05, + "loss": 1.0814, + "step": 580500 + }, + { + "epoch": 2.2, + "learning_rate": 1.3369131968218781e-05, + "loss": 1.0887, + "step": 581000 + }, + { + "epoch": 2.2, + "learning_rate": 1.3337607985403136e-05, + "loss": 1.0825, + "step": 581500 + }, + { + "epoch": 2.2, + "learning_rate": 1.3306084002587488e-05, + "loss": 1.0776, + "step": 582000 + }, + { + "epoch": 2.2, + "learning_rate": 1.3274560019771842e-05, + "loss": 1.0856, + "step": 582500 + }, + { + "epoch": 2.21, + "learning_rate": 1.3243036036956197e-05, + "loss": 1.0776, + "step": 583000 + }, + { + "epoch": 2.21, + "learning_rate": 1.3211512054140549e-05, + "loss": 1.0791, + "step": 583500 + }, + { + "epoch": 2.21, + "learning_rate": 1.3179988071324904e-05, + "loss": 1.0893, + "step": 584000 + }, + { + "epoch": 2.21, + "learning_rate": 1.3148464088509257e-05, + "loss": 1.0837, + "step": 584500 + }, + { + "epoch": 2.21, + "learning_rate": 1.3116940105693609e-05, + "loss": 1.0708, + "step": 585000 + }, + { + "epoch": 2.21, + "learning_rate": 1.3085416122877964e-05, + "loss": 1.0831, + "step": 585500 + }, + { + "epoch": 2.22, + "learning_rate": 1.3053892140062317e-05, + "loss": 1.0876, + "step": 586000 + }, + { + "epoch": 2.22, + "learning_rate": 1.302236815724667e-05, + "loss": 1.0829, + "step": 586500 + }, + { + "epoch": 2.22, + "learning_rate": 1.2990844174431024e-05, + "loss": 1.0824, + "step": 587000 + }, + { + "epoch": 2.22, + "learning_rate": 1.295932019161538e-05, + "loss": 1.0865, + "step": 587500 + }, + { + "epoch": 2.22, + "learning_rate": 1.2927796208799731e-05, + "loss": 1.0822, + "step": 588000 + }, + { + "epoch": 2.23, + "learning_rate": 1.2896272225984085e-05, + "loss": 1.0788, + "step": 588500 + }, + { + "epoch": 2.23, + "learning_rate": 1.286474824316844e-05, + "loss": 1.0901, + "step": 589000 + }, + { + "epoch": 2.23, + "learning_rate": 1.2833224260352791e-05, + "loss": 1.0816, + "step": 589500 + }, + { + "epoch": 2.23, + "learning_rate": 1.2801700277537147e-05, + "loss": 1.0801, + "step": 590000 + }, + { + "epoch": 2.23, + "learning_rate": 1.2770176294721498e-05, + "loss": 1.0791, + "step": 590500 + }, + { + "epoch": 2.24, + "learning_rate": 1.2738652311905852e-05, + "loss": 1.0807, + "step": 591000 + }, + { + "epoch": 2.24, + "learning_rate": 1.2707128329090207e-05, + "loss": 1.0823, + "step": 591500 + }, + { + "epoch": 2.24, + "learning_rate": 1.2675604346274559e-05, + "loss": 1.0822, + "step": 592000 + }, + { + "epoch": 2.24, + "learning_rate": 1.2644080363458912e-05, + "loss": 1.0845, + "step": 592500 + }, + { + "epoch": 2.24, + "learning_rate": 1.2612556380643267e-05, + "loss": 1.0854, + "step": 593000 + }, + { + "epoch": 2.25, + "learning_rate": 1.2581032397827619e-05, + "loss": 1.0809, + "step": 593500 + }, + { + "epoch": 2.25, + "learning_rate": 1.2549508415011974e-05, + "loss": 1.0927, + "step": 594000 + }, + { + "epoch": 2.25, + "learning_rate": 1.2517984432196328e-05, + "loss": 1.0874, + "step": 594500 + }, + { + "epoch": 2.25, + "learning_rate": 1.2486460449380681e-05, + "loss": 1.0818, + "step": 595000 + }, + { + "epoch": 2.25, + "learning_rate": 1.2454936466565034e-05, + "loss": 1.0845, + "step": 595500 + }, + { + "epoch": 2.25, + "learning_rate": 1.2423412483749388e-05, + "loss": 1.0699, + "step": 596000 + }, + { + "epoch": 2.26, + "learning_rate": 1.2391888500933741e-05, + "loss": 1.0745, + "step": 596500 + }, + { + "epoch": 2.26, + "learning_rate": 1.2360364518118095e-05, + "loss": 1.0893, + "step": 597000 + }, + { + "epoch": 2.26, + "learning_rate": 1.2328840535302448e-05, + "loss": 1.0815, + "step": 597500 + }, + { + "epoch": 2.26, + "learning_rate": 1.2297316552486802e-05, + "loss": 1.0809, + "step": 598000 + }, + { + "epoch": 2.26, + "learning_rate": 1.2265792569671155e-05, + "loss": 1.0717, + "step": 598500 + }, + { + "epoch": 2.27, + "learning_rate": 1.2234268586855508e-05, + "loss": 1.0795, + "step": 599000 + }, + { + "epoch": 2.27, + "learning_rate": 1.2202744604039862e-05, + "loss": 1.0765, + "step": 599500 + }, + { + "epoch": 2.27, + "learning_rate": 1.2171220621224215e-05, + "loss": 1.0786, + "step": 600000 + }, + { + "epoch": 2.27, + "learning_rate": 1.2139696638408569e-05, + "loss": 1.0821, + "step": 600500 + }, + { + "epoch": 2.27, + "learning_rate": 1.2108172655592922e-05, + "loss": 1.0718, + "step": 601000 + }, + { + "epoch": 2.28, + "learning_rate": 1.2076648672777276e-05, + "loss": 1.0853, + "step": 601500 + }, + { + "epoch": 2.28, + "learning_rate": 1.204512468996163e-05, + "loss": 1.0858, + "step": 602000 + }, + { + "epoch": 2.28, + "learning_rate": 1.2013600707145983e-05, + "loss": 1.0842, + "step": 602500 + }, + { + "epoch": 2.28, + "learning_rate": 1.1982076724330336e-05, + "loss": 1.0794, + "step": 603000 + }, + { + "epoch": 2.28, + "learning_rate": 1.195055274151469e-05, + "loss": 1.0829, + "step": 603500 + }, + { + "epoch": 2.28, + "learning_rate": 1.1919028758699045e-05, + "loss": 1.0785, + "step": 604000 + }, + { + "epoch": 2.29, + "learning_rate": 1.1887504775883398e-05, + "loss": 1.0776, + "step": 604500 + }, + { + "epoch": 2.29, + "learning_rate": 1.185598079306775e-05, + "loss": 1.0774, + "step": 605000 + }, + { + "epoch": 2.29, + "learning_rate": 1.1824456810252105e-05, + "loss": 1.077, + "step": 605500 + }, + { + "epoch": 2.29, + "learning_rate": 1.1792932827436458e-05, + "loss": 1.0753, + "step": 606000 + }, + { + "epoch": 2.29, + "learning_rate": 1.1761408844620812e-05, + "loss": 1.0839, + "step": 606500 + }, + { + "epoch": 2.3, + "learning_rate": 1.1729884861805163e-05, + "loss": 1.075, + "step": 607000 + }, + { + "epoch": 2.3, + "learning_rate": 1.1698360878989519e-05, + "loss": 1.0784, + "step": 607500 + }, + { + "epoch": 2.3, + "learning_rate": 1.1666836896173872e-05, + "loss": 1.0788, + "step": 608000 + }, + { + "epoch": 2.3, + "learning_rate": 1.1635312913358225e-05, + "loss": 1.0812, + "step": 608500 + }, + { + "epoch": 2.3, + "learning_rate": 1.1603788930542579e-05, + "loss": 1.0786, + "step": 609000 + }, + { + "epoch": 2.31, + "learning_rate": 1.1572264947726932e-05, + "loss": 1.0876, + "step": 609500 + }, + { + "epoch": 2.31, + "learning_rate": 1.1540740964911286e-05, + "loss": 1.0838, + "step": 610000 + }, + { + "epoch": 2.31, + "learning_rate": 1.150921698209564e-05, + "loss": 1.0773, + "step": 610500 + }, + { + "epoch": 2.31, + "learning_rate": 1.1477692999279993e-05, + "loss": 1.072, + "step": 611000 + }, + { + "epoch": 2.31, + "learning_rate": 1.1446169016464346e-05, + "loss": 1.0727, + "step": 611500 + }, + { + "epoch": 2.32, + "learning_rate": 1.14146450336487e-05, + "loss": 1.0839, + "step": 612000 + }, + { + "epoch": 2.32, + "learning_rate": 1.1383121050833055e-05, + "loss": 1.0851, + "step": 612500 + }, + { + "epoch": 2.32, + "learning_rate": 1.1351597068017406e-05, + "loss": 1.078, + "step": 613000 + }, + { + "epoch": 2.32, + "learning_rate": 1.132007308520176e-05, + "loss": 1.0738, + "step": 613500 + }, + { + "epoch": 2.32, + "learning_rate": 1.1288549102386115e-05, + "loss": 1.077, + "step": 614000 + }, + { + "epoch": 2.32, + "learning_rate": 1.1257025119570468e-05, + "loss": 1.0792, + "step": 614500 + }, + { + "epoch": 2.33, + "learning_rate": 1.122550113675482e-05, + "loss": 1.0807, + "step": 615000 + }, + { + "epoch": 2.33, + "learning_rate": 1.1193977153939174e-05, + "loss": 1.078, + "step": 615500 + }, + { + "epoch": 2.33, + "learning_rate": 1.1162453171123529e-05, + "loss": 1.0773, + "step": 616000 + }, + { + "epoch": 2.33, + "learning_rate": 1.1130929188307882e-05, + "loss": 1.0827, + "step": 616500 + }, + { + "epoch": 2.33, + "learning_rate": 1.1099405205492234e-05, + "loss": 1.0712, + "step": 617000 + }, + { + "epoch": 2.34, + "learning_rate": 1.1067881222676589e-05, + "loss": 1.0739, + "step": 617500 + }, + { + "epoch": 2.34, + "learning_rate": 1.1036357239860942e-05, + "loss": 1.0767, + "step": 618000 + }, + { + "epoch": 2.34, + "learning_rate": 1.1004833257045296e-05, + "loss": 1.0806, + "step": 618500 + }, + { + "epoch": 2.34, + "learning_rate": 1.0973309274229648e-05, + "loss": 1.078, + "step": 619000 + }, + { + "epoch": 2.34, + "learning_rate": 1.0941785291414003e-05, + "loss": 1.0696, + "step": 619500 + }, + { + "epoch": 2.35, + "learning_rate": 1.0910261308598356e-05, + "loss": 1.0727, + "step": 620000 + }, + { + "epoch": 2.35, + "learning_rate": 1.087873732578271e-05, + "loss": 1.0634, + "step": 620500 + }, + { + "epoch": 2.35, + "learning_rate": 1.0847213342967063e-05, + "loss": 1.0717, + "step": 621000 + }, + { + "epoch": 2.35, + "learning_rate": 1.0815689360151416e-05, + "loss": 1.0734, + "step": 621500 + }, + { + "epoch": 2.35, + "learning_rate": 1.078416537733577e-05, + "loss": 1.0742, + "step": 622000 + }, + { + "epoch": 2.35, + "learning_rate": 1.0752641394520123e-05, + "loss": 1.0776, + "step": 622500 + }, + { + "epoch": 2.36, + "learning_rate": 1.0721117411704477e-05, + "loss": 1.0619, + "step": 623000 + }, + { + "epoch": 2.36, + "learning_rate": 1.068959342888883e-05, + "loss": 1.072, + "step": 623500 + }, + { + "epoch": 2.36, + "learning_rate": 1.0658069446073184e-05, + "loss": 1.0676, + "step": 624000 + }, + { + "epoch": 2.36, + "learning_rate": 1.0626545463257539e-05, + "loss": 1.0761, + "step": 624500 + }, + { + "epoch": 2.36, + "learning_rate": 1.059502148044189e-05, + "loss": 1.0741, + "step": 625000 + }, + { + "epoch": 2.37, + "learning_rate": 1.0563497497626244e-05, + "loss": 1.0759, + "step": 625500 + }, + { + "epoch": 2.37, + "learning_rate": 1.0531973514810597e-05, + "loss": 1.0691, + "step": 626000 + }, + { + "epoch": 2.37, + "learning_rate": 1.0500449531994953e-05, + "loss": 1.0722, + "step": 626500 + }, + { + "epoch": 2.37, + "learning_rate": 1.0468925549179304e-05, + "loss": 1.0736, + "step": 627000 + }, + { + "epoch": 2.37, + "learning_rate": 1.0437401566363658e-05, + "loss": 1.0748, + "step": 627500 + }, + { + "epoch": 2.38, + "learning_rate": 1.0405877583548013e-05, + "loss": 1.0707, + "step": 628000 + }, + { + "epoch": 2.38, + "learning_rate": 1.0374353600732366e-05, + "loss": 1.0712, + "step": 628500 + }, + { + "epoch": 2.38, + "learning_rate": 1.034282961791672e-05, + "loss": 1.0782, + "step": 629000 + }, + { + "epoch": 2.38, + "learning_rate": 1.0311305635101071e-05, + "loss": 1.0667, + "step": 629500 + }, + { + "epoch": 2.38, + "learning_rate": 1.0279781652285427e-05, + "loss": 1.0692, + "step": 630000 + }, + { + "epoch": 2.39, + "learning_rate": 1.024825766946978e-05, + "loss": 1.0671, + "step": 630500 + }, + { + "epoch": 2.39, + "learning_rate": 1.0216733686654133e-05, + "loss": 1.0794, + "step": 631000 + }, + { + "epoch": 2.39, + "learning_rate": 1.0185209703838487e-05, + "loss": 1.069, + "step": 631500 + }, + { + "epoch": 2.39, + "learning_rate": 1.015368572102284e-05, + "loss": 1.0762, + "step": 632000 + }, + { + "epoch": 2.39, + "learning_rate": 1.0122161738207194e-05, + "loss": 1.0761, + "step": 632500 + }, + { + "epoch": 2.39, + "learning_rate": 1.0090637755391547e-05, + "loss": 1.0722, + "step": 633000 + }, + { + "epoch": 2.4, + "learning_rate": 1.00591137725759e-05, + "loss": 1.0746, + "step": 633500 + }, + { + "epoch": 2.4, + "learning_rate": 1.0027589789760254e-05, + "loss": 1.0727, + "step": 634000 + }, + { + "epoch": 2.4, + "learning_rate": 9.996065806944607e-06, + "loss": 1.0706, + "step": 634500 + }, + { + "epoch": 2.4, + "learning_rate": 9.964541824128961e-06, + "loss": 1.0719, + "step": 635000 + }, + { + "epoch": 2.4, + "learning_rate": 9.933017841313314e-06, + "loss": 1.0761, + "step": 635500 + }, + { + "epoch": 2.41, + "learning_rate": 9.901493858497668e-06, + "loss": 1.0737, + "step": 636000 + }, + { + "epoch": 2.41, + "learning_rate": 9.869969875682023e-06, + "loss": 1.0711, + "step": 636500 + }, + { + "epoch": 2.41, + "learning_rate": 9.838445892866376e-06, + "loss": 1.075, + "step": 637000 + }, + { + "epoch": 2.41, + "learning_rate": 9.806921910050728e-06, + "loss": 1.072, + "step": 637500 + }, + { + "epoch": 2.41, + "learning_rate": 9.775397927235082e-06, + "loss": 1.0734, + "step": 638000 + }, + { + "epoch": 2.42, + "learning_rate": 9.743873944419437e-06, + "loss": 1.0737, + "step": 638500 + }, + { + "epoch": 2.42, + "learning_rate": 9.71234996160379e-06, + "loss": 1.0697, + "step": 639000 + }, + { + "epoch": 2.42, + "learning_rate": 9.680825978788142e-06, + "loss": 1.0767, + "step": 639500 + }, + { + "epoch": 2.42, + "learning_rate": 9.649301995972497e-06, + "loss": 1.0688, + "step": 640000 + }, + { + "epoch": 2.42, + "learning_rate": 9.61777801315685e-06, + "loss": 1.0666, + "step": 640500 + }, + { + "epoch": 2.42, + "learning_rate": 9.586254030341204e-06, + "loss": 1.0604, + "step": 641000 + }, + { + "epoch": 2.43, + "learning_rate": 9.554730047525556e-06, + "loss": 1.0782, + "step": 641500 + }, + { + "epoch": 2.43, + "learning_rate": 9.52320606470991e-06, + "loss": 1.0756, + "step": 642000 + }, + { + "epoch": 2.43, + "learning_rate": 9.491682081894264e-06, + "loss": 1.0665, + "step": 642500 + }, + { + "epoch": 2.43, + "learning_rate": 9.460158099078618e-06, + "loss": 1.0649, + "step": 643000 + }, + { + "epoch": 2.43, + "learning_rate": 9.428634116262971e-06, + "loss": 1.068, + "step": 643500 + }, + { + "epoch": 2.44, + "learning_rate": 9.397110133447324e-06, + "loss": 1.0761, + "step": 644000 + }, + { + "epoch": 2.44, + "learning_rate": 9.365586150631678e-06, + "loss": 1.0633, + "step": 644500 + }, + { + "epoch": 2.44, + "learning_rate": 9.334062167816031e-06, + "loss": 1.0723, + "step": 645000 + }, + { + "epoch": 2.44, + "learning_rate": 9.302538185000385e-06, + "loss": 1.064, + "step": 645500 + }, + { + "epoch": 2.44, + "learning_rate": 9.271014202184738e-06, + "loss": 1.0751, + "step": 646000 + }, + { + "epoch": 2.45, + "learning_rate": 9.239490219369092e-06, + "loss": 1.0682, + "step": 646500 + }, + { + "epoch": 2.45, + "learning_rate": 9.207966236553447e-06, + "loss": 1.0813, + "step": 647000 + }, + { + "epoch": 2.45, + "learning_rate": 9.176442253737799e-06, + "loss": 1.073, + "step": 647500 + }, + { + "epoch": 2.45, + "learning_rate": 9.144918270922152e-06, + "loss": 1.0689, + "step": 648000 + }, + { + "epoch": 2.45, + "learning_rate": 9.113394288106505e-06, + "loss": 1.0619, + "step": 648500 + }, + { + "epoch": 2.46, + "learning_rate": 9.08187030529086e-06, + "loss": 1.0722, + "step": 649000 + }, + { + "epoch": 2.46, + "learning_rate": 9.050346322475212e-06, + "loss": 1.065, + "step": 649500 + }, + { + "epoch": 2.46, + "learning_rate": 9.018822339659566e-06, + "loss": 1.0622, + "step": 650000 + }, + { + "epoch": 2.46, + "learning_rate": 8.98729835684392e-06, + "loss": 1.0621, + "step": 650500 + }, + { + "epoch": 2.46, + "learning_rate": 8.955774374028274e-06, + "loss": 1.0701, + "step": 651000 + }, + { + "epoch": 2.46, + "learning_rate": 8.924250391212628e-06, + "loss": 1.0673, + "step": 651500 + }, + { + "epoch": 2.47, + "learning_rate": 8.892726408396981e-06, + "loss": 1.063, + "step": 652000 + }, + { + "epoch": 2.47, + "learning_rate": 8.861202425581335e-06, + "loss": 1.0746, + "step": 652500 + }, + { + "epoch": 2.47, + "learning_rate": 8.829678442765688e-06, + "loss": 1.0715, + "step": 653000 + }, + { + "epoch": 2.47, + "learning_rate": 8.798154459950041e-06, + "loss": 1.064, + "step": 653500 + }, + { + "epoch": 2.47, + "learning_rate": 8.766630477134395e-06, + "loss": 1.0637, + "step": 654000 + }, + { + "epoch": 2.48, + "learning_rate": 8.735106494318748e-06, + "loss": 1.0663, + "step": 654500 + }, + { + "epoch": 2.48, + "learning_rate": 8.703582511503102e-06, + "loss": 1.0693, + "step": 655000 + }, + { + "epoch": 2.48, + "learning_rate": 8.672058528687455e-06, + "loss": 1.0673, + "step": 655500 + }, + { + "epoch": 2.48, + "learning_rate": 8.640534545871809e-06, + "loss": 1.0606, + "step": 656000 + }, + { + "epoch": 2.48, + "learning_rate": 8.609010563056162e-06, + "loss": 1.0609, + "step": 656500 + }, + { + "epoch": 2.49, + "learning_rate": 8.577486580240515e-06, + "loss": 1.0726, + "step": 657000 + }, + { + "epoch": 2.49, + "learning_rate": 8.545962597424869e-06, + "loss": 1.0634, + "step": 657500 + }, + { + "epoch": 2.49, + "learning_rate": 8.514438614609222e-06, + "loss": 1.0589, + "step": 658000 + }, + { + "epoch": 2.49, + "learning_rate": 8.482914631793576e-06, + "loss": 1.0693, + "step": 658500 + }, + { + "epoch": 2.49, + "learning_rate": 8.451390648977931e-06, + "loss": 1.0608, + "step": 659000 + }, + { + "epoch": 2.49, + "learning_rate": 8.419866666162284e-06, + "loss": 1.07, + "step": 659500 + }, + { + "epoch": 2.5, + "learning_rate": 8.388342683346636e-06, + "loss": 1.0728, + "step": 660000 + }, + { + "epoch": 2.5, + "learning_rate": 8.35681870053099e-06, + "loss": 1.0619, + "step": 660500 + }, + { + "epoch": 2.5, + "learning_rate": 8.325294717715345e-06, + "loss": 1.0645, + "step": 661000 + }, + { + "epoch": 2.5, + "learning_rate": 8.293770734899698e-06, + "loss": 1.0606, + "step": 661500 + }, + { + "epoch": 2.5, + "learning_rate": 8.26224675208405e-06, + "loss": 1.0631, + "step": 662000 + }, + { + "epoch": 2.51, + "learning_rate": 8.230722769268405e-06, + "loss": 1.0733, + "step": 662500 + }, + { + "epoch": 2.51, + "learning_rate": 8.199198786452758e-06, + "loss": 1.064, + "step": 663000 + }, + { + "epoch": 2.51, + "learning_rate": 8.167674803637112e-06, + "loss": 1.0628, + "step": 663500 + }, + { + "epoch": 2.51, + "learning_rate": 8.136150820821464e-06, + "loss": 1.0653, + "step": 664000 + }, + { + "epoch": 2.51, + "learning_rate": 8.104626838005819e-06, + "loss": 1.0561, + "step": 664500 + }, + { + "epoch": 2.52, + "learning_rate": 8.073102855190172e-06, + "loss": 1.0631, + "step": 665000 + }, + { + "epoch": 2.52, + "learning_rate": 8.041578872374526e-06, + "loss": 1.0613, + "step": 665500 + }, + { + "epoch": 2.52, + "learning_rate": 8.010054889558879e-06, + "loss": 1.0615, + "step": 666000 + }, + { + "epoch": 2.52, + "learning_rate": 7.978530906743232e-06, + "loss": 1.0692, + "step": 666500 + }, + { + "epoch": 2.52, + "learning_rate": 7.947006923927586e-06, + "loss": 1.0685, + "step": 667000 + }, + { + "epoch": 2.53, + "learning_rate": 7.91548294111194e-06, + "loss": 1.0625, + "step": 667500 + }, + { + "epoch": 2.53, + "learning_rate": 7.883958958296293e-06, + "loss": 1.0702, + "step": 668000 + }, + { + "epoch": 2.53, + "learning_rate": 7.852434975480646e-06, + "loss": 1.0648, + "step": 668500 + }, + { + "epoch": 2.53, + "learning_rate": 7.820910992665e-06, + "loss": 1.0631, + "step": 669000 + }, + { + "epoch": 2.53, + "learning_rate": 7.789387009849355e-06, + "loss": 1.0653, + "step": 669500 + }, + { + "epoch": 2.53, + "learning_rate": 7.757863027033707e-06, + "loss": 1.0647, + "step": 670000 + }, + { + "epoch": 2.54, + "learning_rate": 7.72633904421806e-06, + "loss": 1.0595, + "step": 670500 + }, + { + "epoch": 2.54, + "learning_rate": 7.694815061402415e-06, + "loss": 1.0665, + "step": 671000 + }, + { + "epoch": 2.54, + "learning_rate": 7.663291078586769e-06, + "loss": 1.0611, + "step": 671500 + }, + { + "epoch": 2.54, + "learning_rate": 7.63176709577112e-06, + "loss": 1.0597, + "step": 672000 + }, + { + "epoch": 2.54, + "learning_rate": 7.600243112955474e-06, + "loss": 1.0616, + "step": 672500 + }, + { + "epoch": 2.55, + "learning_rate": 7.568719130139829e-06, + "loss": 1.0685, + "step": 673000 + }, + { + "epoch": 2.55, + "learning_rate": 7.537195147324181e-06, + "loss": 1.0672, + "step": 673500 + }, + { + "epoch": 2.55, + "learning_rate": 7.505671164508535e-06, + "loss": 1.0586, + "step": 674000 + }, + { + "epoch": 2.55, + "learning_rate": 7.474147181692889e-06, + "loss": 1.063, + "step": 674500 + }, + { + "epoch": 2.55, + "learning_rate": 7.4426231988772426e-06, + "loss": 1.0588, + "step": 675000 + }, + { + "epoch": 2.56, + "learning_rate": 7.411099216061595e-06, + "loss": 1.0593, + "step": 675500 + }, + { + "epoch": 2.56, + "learning_rate": 7.379575233245949e-06, + "loss": 1.0564, + "step": 676000 + }, + { + "epoch": 2.56, + "learning_rate": 7.348051250430303e-06, + "loss": 1.065, + "step": 676500 + }, + { + "epoch": 2.56, + "learning_rate": 7.316527267614656e-06, + "loss": 1.0657, + "step": 677000 + }, + { + "epoch": 2.56, + "learning_rate": 7.28500328479901e-06, + "loss": 1.0555, + "step": 677500 + }, + { + "epoch": 2.56, + "learning_rate": 7.253479301983364e-06, + "loss": 1.062, + "step": 678000 + }, + { + "epoch": 2.57, + "learning_rate": 7.221955319167717e-06, + "loss": 1.0612, + "step": 678500 + }, + { + "epoch": 2.57, + "learning_rate": 7.19043133635207e-06, + "loss": 1.0648, + "step": 679000 + }, + { + "epoch": 2.57, + "learning_rate": 7.1589073535364235e-06, + "loss": 1.0595, + "step": 679500 + }, + { + "epoch": 2.57, + "learning_rate": 7.127383370720778e-06, + "loss": 1.0601, + "step": 680000 + }, + { + "epoch": 2.57, + "learning_rate": 7.09585938790513e-06, + "loss": 1.0595, + "step": 680500 + }, + { + "epoch": 2.58, + "learning_rate": 7.064335405089484e-06, + "loss": 1.0532, + "step": 681000 + }, + { + "epoch": 2.58, + "learning_rate": 7.032811422273838e-06, + "loss": 1.0577, + "step": 681500 + }, + { + "epoch": 2.58, + "learning_rate": 7.0012874394581915e-06, + "loss": 1.0652, + "step": 682000 + }, + { + "epoch": 2.58, + "learning_rate": 6.969763456642545e-06, + "loss": 1.0537, + "step": 682500 + }, + { + "epoch": 2.58, + "learning_rate": 6.9382394738268975e-06, + "loss": 1.0668, + "step": 683000 + }, + { + "epoch": 2.59, + "learning_rate": 6.906715491011252e-06, + "loss": 1.07, + "step": 683500 + }, + { + "epoch": 2.59, + "learning_rate": 6.875191508195605e-06, + "loss": 1.0588, + "step": 684000 + }, + { + "epoch": 2.59, + "learning_rate": 6.843667525379959e-06, + "loss": 1.0543, + "step": 684500 + }, + { + "epoch": 2.59, + "learning_rate": 6.812143542564313e-06, + "loss": 1.0557, + "step": 685000 + }, + { + "epoch": 2.59, + "learning_rate": 6.780619559748666e-06, + "loss": 1.0475, + "step": 685500 + }, + { + "epoch": 2.6, + "learning_rate": 6.749095576933019e-06, + "loss": 1.0608, + "step": 686000 + }, + { + "epoch": 2.6, + "learning_rate": 6.7175715941173724e-06, + "loss": 1.0565, + "step": 686500 + }, + { + "epoch": 2.6, + "learning_rate": 6.686047611301727e-06, + "loss": 1.0513, + "step": 687000 + }, + { + "epoch": 2.6, + "learning_rate": 6.65452362848608e-06, + "loss": 1.0601, + "step": 687500 + }, + { + "epoch": 2.6, + "learning_rate": 6.622999645670433e-06, + "loss": 1.0524, + "step": 688000 + }, + { + "epoch": 2.6, + "learning_rate": 6.591475662854787e-06, + "loss": 1.0611, + "step": 688500 + }, + { + "epoch": 2.61, + "learning_rate": 6.5599516800391405e-06, + "loss": 1.0514, + "step": 689000 + }, + { + "epoch": 2.61, + "learning_rate": 6.528427697223494e-06, + "loss": 1.0609, + "step": 689500 + }, + { + "epoch": 2.61, + "learning_rate": 6.496903714407848e-06, + "loss": 1.0616, + "step": 690000 + }, + { + "epoch": 2.61, + "learning_rate": 6.465379731592202e-06, + "loss": 1.0495, + "step": 690500 + }, + { + "epoch": 2.61, + "learning_rate": 6.433855748776554e-06, + "loss": 1.0681, + "step": 691000 + }, + { + "epoch": 2.62, + "learning_rate": 6.402331765960908e-06, + "loss": 1.0629, + "step": 691500 + }, + { + "epoch": 2.62, + "learning_rate": 6.370807783145262e-06, + "loss": 1.0577, + "step": 692000 + }, + { + "epoch": 2.62, + "learning_rate": 6.339283800329615e-06, + "loss": 1.0577, + "step": 692500 + }, + { + "epoch": 2.62, + "learning_rate": 6.307759817513968e-06, + "loss": 1.062, + "step": 693000 + }, + { + "epoch": 2.62, + "learning_rate": 6.276235834698323e-06, + "loss": 1.0612, + "step": 693500 + }, + { + "epoch": 2.63, + "learning_rate": 6.244711851882676e-06, + "loss": 1.0552, + "step": 694000 + }, + { + "epoch": 2.63, + "learning_rate": 6.213187869067029e-06, + "loss": 1.0621, + "step": 694500 + }, + { + "epoch": 2.63, + "learning_rate": 6.1816638862513826e-06, + "loss": 1.0647, + "step": 695000 + }, + { + "epoch": 2.63, + "learning_rate": 6.150139903435736e-06, + "loss": 1.0584, + "step": 695500 + }, + { + "epoch": 2.63, + "learning_rate": 6.1186159206200894e-06, + "loss": 1.0503, + "step": 696000 + }, + { + "epoch": 2.63, + "learning_rate": 6.087091937804444e-06, + "loss": 1.057, + "step": 696500 + }, + { + "epoch": 2.64, + "learning_rate": 6.055567954988796e-06, + "loss": 1.0624, + "step": 697000 + }, + { + "epoch": 2.64, + "learning_rate": 6.024043972173151e-06, + "loss": 1.0536, + "step": 697500 + }, + { + "epoch": 2.64, + "learning_rate": 5.992519989357503e-06, + "loss": 1.0544, + "step": 698000 + }, + { + "epoch": 2.64, + "learning_rate": 5.9609960065418575e-06, + "loss": 1.046, + "step": 698500 + }, + { + "epoch": 2.64, + "learning_rate": 5.92947202372621e-06, + "loss": 1.06, + "step": 699000 + }, + { + "epoch": 2.65, + "learning_rate": 5.897948040910564e-06, + "loss": 1.0442, + "step": 699500 + }, + { + "epoch": 2.65, + "learning_rate": 5.866424058094918e-06, + "loss": 1.0605, + "step": 700000 + }, + { + "epoch": 2.65, + "learning_rate": 5.834900075279271e-06, + "loss": 1.0518, + "step": 700500 + }, + { + "epoch": 2.65, + "learning_rate": 5.803376092463625e-06, + "loss": 1.0561, + "step": 701000 + }, + { + "epoch": 2.65, + "learning_rate": 5.771852109647978e-06, + "loss": 1.0522, + "step": 701500 + }, + { + "epoch": 2.66, + "learning_rate": 5.7403281268323315e-06, + "loss": 1.0593, + "step": 702000 + }, + { + "epoch": 2.66, + "learning_rate": 5.708804144016686e-06, + "loss": 1.055, + "step": 702500 + }, + { + "epoch": 2.66, + "learning_rate": 5.677280161201038e-06, + "loss": 1.0623, + "step": 703000 + }, + { + "epoch": 2.66, + "learning_rate": 5.645756178385393e-06, + "loss": 1.0621, + "step": 703500 + }, + { + "epoch": 2.66, + "learning_rate": 5.614232195569745e-06, + "loss": 1.0551, + "step": 704000 + }, + { + "epoch": 2.67, + "learning_rate": 5.5827082127540995e-06, + "loss": 1.0457, + "step": 704500 + }, + { + "epoch": 2.67, + "learning_rate": 5.551184229938453e-06, + "loss": 1.0516, + "step": 705000 + }, + { + "epoch": 2.67, + "learning_rate": 5.519660247122806e-06, + "loss": 1.0605, + "step": 705500 + }, + { + "epoch": 2.67, + "learning_rate": 5.48813626430716e-06, + "loss": 1.0568, + "step": 706000 + }, + { + "epoch": 2.67, + "learning_rate": 5.456612281491513e-06, + "loss": 1.0498, + "step": 706500 + }, + { + "epoch": 2.67, + "learning_rate": 5.425088298675867e-06, + "loss": 1.0547, + "step": 707000 + }, + { + "epoch": 2.68, + "learning_rate": 5.39356431586022e-06, + "loss": 1.0549, + "step": 707500 + }, + { + "epoch": 2.68, + "learning_rate": 5.362040333044574e-06, + "loss": 1.0595, + "step": 708000 + }, + { + "epoch": 2.68, + "learning_rate": 5.330516350228927e-06, + "loss": 1.0626, + "step": 708500 + }, + { + "epoch": 2.68, + "learning_rate": 5.298992367413281e-06, + "loss": 1.0501, + "step": 709000 + }, + { + "epoch": 2.68, + "learning_rate": 5.267468384597635e-06, + "loss": 1.0593, + "step": 709500 + }, + { + "epoch": 2.69, + "learning_rate": 5.235944401781988e-06, + "loss": 1.0556, + "step": 710000 + }, + { + "epoch": 2.69, + "learning_rate": 5.204420418966342e-06, + "loss": 1.0623, + "step": 710500 + }, + { + "epoch": 2.69, + "learning_rate": 5.172896436150695e-06, + "loss": 1.0494, + "step": 711000 + }, + { + "epoch": 2.69, + "learning_rate": 5.1413724533350485e-06, + "loss": 1.0515, + "step": 711500 + }, + { + "epoch": 2.69, + "learning_rate": 5.109848470519402e-06, + "loss": 1.0591, + "step": 712000 + }, + { + "epoch": 2.7, + "learning_rate": 5.078324487703755e-06, + "loss": 1.0514, + "step": 712500 + }, + { + "epoch": 2.7, + "learning_rate": 5.04680050488811e-06, + "loss": 1.0553, + "step": 713000 + }, + { + "epoch": 2.7, + "learning_rate": 5.015276522072462e-06, + "loss": 1.0578, + "step": 713500 + }, + { + "epoch": 2.7, + "learning_rate": 4.9837525392568165e-06, + "loss": 1.0506, + "step": 714000 + }, + { + "epoch": 2.7, + "learning_rate": 4.952228556441169e-06, + "loss": 1.0539, + "step": 714500 + }, + { + "epoch": 2.7, + "learning_rate": 4.920704573625523e-06, + "loss": 1.0544, + "step": 715000 + }, + { + "epoch": 2.71, + "learning_rate": 4.889180590809877e-06, + "loss": 1.0539, + "step": 715500 + }, + { + "epoch": 2.71, + "learning_rate": 4.85765660799423e-06, + "loss": 1.0479, + "step": 716000 + }, + { + "epoch": 2.71, + "learning_rate": 4.826132625178584e-06, + "loss": 1.0552, + "step": 716500 + }, + { + "epoch": 2.71, + "learning_rate": 4.794608642362937e-06, + "loss": 1.0532, + "step": 717000 + }, + { + "epoch": 2.71, + "learning_rate": 4.7630846595472906e-06, + "loss": 1.0511, + "step": 717500 + }, + { + "epoch": 2.72, + "learning_rate": 4.731560676731644e-06, + "loss": 1.0522, + "step": 718000 + }, + { + "epoch": 2.72, + "learning_rate": 4.7000366939159974e-06, + "loss": 1.0478, + "step": 718500 + }, + { + "epoch": 2.72, + "learning_rate": 4.668512711100352e-06, + "loss": 1.0505, + "step": 719000 + }, + { + "epoch": 2.72, + "learning_rate": 4.636988728284704e-06, + "loss": 1.0561, + "step": 719500 + }, + { + "epoch": 2.72, + "learning_rate": 4.605464745469059e-06, + "loss": 1.0526, + "step": 720000 + }, + { + "epoch": 2.73, + "learning_rate": 4.573940762653411e-06, + "loss": 1.0546, + "step": 720500 + }, + { + "epoch": 2.73, + "learning_rate": 4.5424167798377655e-06, + "loss": 1.057, + "step": 721000 + }, + { + "epoch": 2.73, + "learning_rate": 4.510892797022119e-06, + "loss": 1.0569, + "step": 721500 + }, + { + "epoch": 2.73, + "learning_rate": 4.479368814206472e-06, + "loss": 1.0517, + "step": 722000 + }, + { + "epoch": 2.73, + "learning_rate": 4.447844831390826e-06, + "loss": 1.0565, + "step": 722500 + }, + { + "epoch": 2.74, + "learning_rate": 4.416320848575179e-06, + "loss": 1.0603, + "step": 723000 + }, + { + "epoch": 2.74, + "learning_rate": 4.384796865759533e-06, + "loss": 1.0549, + "step": 723500 + }, + { + "epoch": 2.74, + "learning_rate": 4.353272882943886e-06, + "loss": 1.0549, + "step": 724000 + }, + { + "epoch": 2.74, + "learning_rate": 4.3217489001282395e-06, + "loss": 1.0482, + "step": 724500 + }, + { + "epoch": 2.74, + "learning_rate": 4.290224917312594e-06, + "loss": 1.0547, + "step": 725000 + }, + { + "epoch": 2.74, + "learning_rate": 4.258700934496946e-06, + "loss": 1.0559, + "step": 725500 + }, + { + "epoch": 2.75, + "learning_rate": 4.227176951681301e-06, + "loss": 1.0486, + "step": 726000 + }, + { + "epoch": 2.75, + "learning_rate": 4.195652968865653e-06, + "loss": 1.0547, + "step": 726500 + }, + { + "epoch": 2.75, + "learning_rate": 4.1641289860500075e-06, + "loss": 1.0576, + "step": 727000 + }, + { + "epoch": 2.75, + "learning_rate": 4.13260500323436e-06, + "loss": 1.0508, + "step": 727500 + }, + { + "epoch": 2.75, + "learning_rate": 4.101081020418714e-06, + "loss": 1.055, + "step": 728000 + }, + { + "epoch": 2.76, + "learning_rate": 4.069557037603068e-06, + "loss": 1.053, + "step": 728500 + }, + { + "epoch": 2.76, + "learning_rate": 4.038033054787421e-06, + "loss": 1.0527, + "step": 729000 + }, + { + "epoch": 2.76, + "learning_rate": 4.006509071971775e-06, + "loss": 1.0536, + "step": 729500 + }, + { + "epoch": 2.76, + "learning_rate": 3.974985089156128e-06, + "loss": 1.0515, + "step": 730000 + }, + { + "epoch": 2.76, + "learning_rate": 3.943461106340482e-06, + "loss": 1.0488, + "step": 730500 + }, + { + "epoch": 2.77, + "learning_rate": 3.911937123524836e-06, + "loss": 1.0453, + "step": 731000 + }, + { + "epoch": 2.77, + "learning_rate": 3.8804131407091885e-06, + "loss": 1.0514, + "step": 731500 + }, + { + "epoch": 2.77, + "learning_rate": 3.848889157893543e-06, + "loss": 1.0507, + "step": 732000 + }, + { + "epoch": 2.77, + "learning_rate": 3.817365175077896e-06, + "loss": 1.0403, + "step": 732500 + }, + { + "epoch": 2.77, + "learning_rate": 3.7858411922622496e-06, + "loss": 1.0421, + "step": 733000 + }, + { + "epoch": 2.77, + "learning_rate": 3.7543172094466026e-06, + "loss": 1.0459, + "step": 733500 + }, + { + "epoch": 2.78, + "learning_rate": 3.7227932266309565e-06, + "loss": 1.0415, + "step": 734000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6912692438153103e-06, + "loss": 1.057, + "step": 734500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6597452609996634e-06, + "loss": 1.0531, + "step": 735000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6282212781840172e-06, + "loss": 1.0478, + "step": 735500 + }, + { + "epoch": 2.78, + "learning_rate": 3.5966972953683702e-06, + "loss": 1.0532, + "step": 736000 + }, + { + "epoch": 2.79, + "learning_rate": 3.565173312552724e-06, + "loss": 1.0481, + "step": 736500 + }, + { + "epoch": 2.79, + "learning_rate": 3.533649329737077e-06, + "loss": 1.043, + "step": 737000 + }, + { + "epoch": 2.79, + "learning_rate": 3.502125346921431e-06, + "loss": 1.0521, + "step": 737500 + }, + { + "epoch": 2.79, + "learning_rate": 3.470601364105785e-06, + "loss": 1.0489, + "step": 738000 + }, + { + "epoch": 2.79, + "learning_rate": 3.439077381290138e-06, + "loss": 1.0536, + "step": 738500 + }, + { + "epoch": 2.8, + "learning_rate": 3.4075533984744917e-06, + "loss": 1.0472, + "step": 739000 + }, + { + "epoch": 2.8, + "learning_rate": 3.3760294156588447e-06, + "loss": 1.048, + "step": 739500 + }, + { + "epoch": 2.8, + "learning_rate": 3.3445054328431986e-06, + "loss": 1.0474, + "step": 740000 + }, + { + "epoch": 2.8, + "learning_rate": 3.3129814500275524e-06, + "loss": 1.042, + "step": 740500 + }, + { + "epoch": 2.8, + "learning_rate": 3.2814574672119054e-06, + "loss": 1.055, + "step": 741000 + }, + { + "epoch": 2.81, + "learning_rate": 3.2499334843962593e-06, + "loss": 1.048, + "step": 741500 + }, + { + "epoch": 2.81, + "learning_rate": 3.2184095015806123e-06, + "loss": 1.0434, + "step": 742000 + }, + { + "epoch": 2.81, + "learning_rate": 3.186885518764966e-06, + "loss": 1.0595, + "step": 742500 + }, + { + "epoch": 2.81, + "learning_rate": 3.155361535949319e-06, + "loss": 1.0464, + "step": 743000 + }, + { + "epoch": 2.81, + "learning_rate": 3.123837553133673e-06, + "loss": 1.0456, + "step": 743500 + }, + { + "epoch": 2.81, + "learning_rate": 3.0923135703180265e-06, + "loss": 1.0464, + "step": 744000 + }, + { + "epoch": 2.82, + "learning_rate": 3.06078958750238e-06, + "loss": 1.0556, + "step": 744500 + }, + { + "epoch": 2.82, + "learning_rate": 3.0292656046867333e-06, + "loss": 1.0398, + "step": 745000 + }, + { + "epoch": 2.82, + "learning_rate": 2.997741621871087e-06, + "loss": 1.0468, + "step": 745500 + }, + { + "epoch": 2.82, + "learning_rate": 2.9662176390554406e-06, + "loss": 1.0475, + "step": 746000 + }, + { + "epoch": 2.82, + "learning_rate": 2.934693656239794e-06, + "loss": 1.0468, + "step": 746500 + }, + { + "epoch": 2.83, + "learning_rate": 2.9031696734241475e-06, + "loss": 1.0412, + "step": 747000 + }, + { + "epoch": 2.83, + "learning_rate": 2.8716456906085014e-06, + "loss": 1.041, + "step": 747500 + }, + { + "epoch": 2.83, + "learning_rate": 2.840121707792855e-06, + "loss": 1.0431, + "step": 748000 + }, + { + "epoch": 2.83, + "learning_rate": 2.8085977249772082e-06, + "loss": 1.047, + "step": 748500 + }, + { + "epoch": 2.83, + "learning_rate": 2.777073742161562e-06, + "loss": 1.055, + "step": 749000 + }, + { + "epoch": 2.84, + "learning_rate": 2.7455497593459155e-06, + "loss": 1.0456, + "step": 749500 + }, + { + "epoch": 2.84, + "learning_rate": 2.714025776530269e-06, + "loss": 1.0545, + "step": 750000 + }, + { + "epoch": 2.84, + "learning_rate": 2.6825017937146224e-06, + "loss": 1.0522, + "step": 750500 + }, + { + "epoch": 2.84, + "learning_rate": 2.650977810898976e-06, + "loss": 1.0421, + "step": 751000 + }, + { + "epoch": 2.84, + "learning_rate": 2.6194538280833297e-06, + "loss": 1.0401, + "step": 751500 + }, + { + "epoch": 2.84, + "learning_rate": 2.587929845267683e-06, + "loss": 1.0476, + "step": 752000 + }, + { + "epoch": 2.85, + "learning_rate": 2.5564058624520366e-06, + "loss": 1.0394, + "step": 752500 + }, + { + "epoch": 2.85, + "learning_rate": 2.52488187963639e-06, + "loss": 1.0383, + "step": 753000 + }, + { + "epoch": 2.85, + "learning_rate": 2.4933578968207435e-06, + "loss": 1.0448, + "step": 753500 + }, + { + "epoch": 2.85, + "learning_rate": 2.461833914005097e-06, + "loss": 1.056, + "step": 754000 + }, + { + "epoch": 2.85, + "learning_rate": 2.4303099311894503e-06, + "loss": 1.0415, + "step": 754500 + }, + { + "epoch": 2.86, + "learning_rate": 2.398785948373804e-06, + "loss": 1.0383, + "step": 755000 + }, + { + "epoch": 2.86, + "learning_rate": 2.3672619655581576e-06, + "loss": 1.0481, + "step": 755500 + }, + { + "epoch": 2.86, + "learning_rate": 2.335737982742511e-06, + "loss": 1.0483, + "step": 756000 + }, + { + "epoch": 2.86, + "learning_rate": 2.3042139999268645e-06, + "loss": 1.0469, + "step": 756500 + }, + { + "epoch": 2.86, + "learning_rate": 2.272690017111218e-06, + "loss": 1.0493, + "step": 757000 + }, + { + "epoch": 2.87, + "learning_rate": 2.2411660342955714e-06, + "loss": 1.0497, + "step": 757500 + }, + { + "epoch": 2.87, + "learning_rate": 2.2096420514799252e-06, + "loss": 1.0426, + "step": 758000 + }, + { + "epoch": 2.87, + "learning_rate": 2.1781180686642787e-06, + "loss": 1.0399, + "step": 758500 + }, + { + "epoch": 2.87, + "learning_rate": 2.146594085848632e-06, + "loss": 1.0505, + "step": 759000 + }, + { + "epoch": 2.87, + "learning_rate": 2.1150701030329855e-06, + "loss": 1.043, + "step": 759500 + }, + { + "epoch": 2.87, + "learning_rate": 2.083546120217339e-06, + "loss": 1.0407, + "step": 760000 + }, + { + "epoch": 2.88, + "learning_rate": 2.0520221374016924e-06, + "loss": 1.0512, + "step": 760500 + }, + { + "epoch": 2.88, + "learning_rate": 2.0204981545860463e-06, + "loss": 1.0439, + "step": 761000 + }, + { + "epoch": 2.88, + "learning_rate": 1.9889741717703997e-06, + "loss": 1.0465, + "step": 761500 + }, + { + "epoch": 2.88, + "learning_rate": 1.957450188954753e-06, + "loss": 1.0413, + "step": 762000 + }, + { + "epoch": 2.88, + "learning_rate": 1.9259262061391066e-06, + "loss": 1.0398, + "step": 762500 + }, + { + "epoch": 2.89, + "learning_rate": 1.89440222332346e-06, + "loss": 1.0443, + "step": 763000 + }, + { + "epoch": 2.89, + "learning_rate": 1.8628782405078134e-06, + "loss": 1.0505, + "step": 763500 + }, + { + "epoch": 2.89, + "learning_rate": 1.8313542576921669e-06, + "loss": 1.0475, + "step": 764000 + }, + { + "epoch": 2.89, + "learning_rate": 1.7998302748765207e-06, + "loss": 1.0464, + "step": 764500 + }, + { + "epoch": 2.89, + "learning_rate": 1.7683062920608742e-06, + "loss": 1.0379, + "step": 765000 + }, + { + "epoch": 2.9, + "learning_rate": 1.7367823092452276e-06, + "loss": 1.0433, + "step": 765500 + }, + { + "epoch": 2.9, + "learning_rate": 1.705258326429581e-06, + "loss": 1.0483, + "step": 766000 + }, + { + "epoch": 2.9, + "learning_rate": 1.6737343436139347e-06, + "loss": 1.0492, + "step": 766500 + }, + { + "epoch": 2.9, + "learning_rate": 1.6422103607982881e-06, + "loss": 1.0481, + "step": 767000 + }, + { + "epoch": 2.9, + "learning_rate": 1.6106863779826418e-06, + "loss": 1.0435, + "step": 767500 + }, + { + "epoch": 2.91, + "learning_rate": 1.5791623951669952e-06, + "loss": 1.0428, + "step": 768000 + }, + { + "epoch": 2.91, + "learning_rate": 1.5476384123513489e-06, + "loss": 1.0431, + "step": 768500 + }, + { + "epoch": 2.91, + "learning_rate": 1.5161144295357023e-06, + "loss": 1.0402, + "step": 769000 + }, + { + "epoch": 2.91, + "learning_rate": 1.4845904467200557e-06, + "loss": 1.0375, + "step": 769500 + }, + { + "epoch": 2.91, + "learning_rate": 1.4530664639044094e-06, + "loss": 1.039, + "step": 770000 + }, + { + "epoch": 2.91, + "learning_rate": 1.4215424810887628e-06, + "loss": 1.0465, + "step": 770500 + }, + { + "epoch": 2.92, + "learning_rate": 1.3900184982731163e-06, + "loss": 1.0445, + "step": 771000 + }, + { + "epoch": 2.92, + "learning_rate": 1.35849451545747e-06, + "loss": 1.0495, + "step": 771500 + }, + { + "epoch": 2.92, + "learning_rate": 1.3269705326418233e-06, + "loss": 1.0417, + "step": 772000 + }, + { + "epoch": 2.92, + "learning_rate": 1.2954465498261768e-06, + "loss": 1.0418, + "step": 772500 + }, + { + "epoch": 2.92, + "learning_rate": 1.2639225670105304e-06, + "loss": 1.0442, + "step": 773000 + }, + { + "epoch": 2.93, + "learning_rate": 1.2323985841948839e-06, + "loss": 1.0474, + "step": 773500 + }, + { + "epoch": 2.93, + "learning_rate": 1.2008746013792373e-06, + "loss": 1.0429, + "step": 774000 + }, + { + "epoch": 2.93, + "learning_rate": 1.169350618563591e-06, + "loss": 1.0394, + "step": 774500 + }, + { + "epoch": 2.93, + "learning_rate": 1.1378266357479444e-06, + "loss": 1.0453, + "step": 775000 + }, + { + "epoch": 2.93, + "learning_rate": 1.1063026529322978e-06, + "loss": 1.0387, + "step": 775500 + }, + { + "epoch": 2.94, + "learning_rate": 1.0747786701166515e-06, + "loss": 1.0438, + "step": 776000 + }, + { + "epoch": 2.94, + "learning_rate": 1.0432546873010049e-06, + "loss": 1.0435, + "step": 776500 + }, + { + "epoch": 2.94, + "learning_rate": 1.0117307044853583e-06, + "loss": 1.046, + "step": 777000 + }, + { + "epoch": 2.94, + "learning_rate": 9.80206721669712e-07, + "loss": 1.0365, + "step": 777500 + }, + { + "epoch": 2.94, + "learning_rate": 9.486827388540655e-07, + "loss": 1.0443, + "step": 778000 + }, + { + "epoch": 2.94, + "learning_rate": 9.17158756038419e-07, + "loss": 1.0454, + "step": 778500 + }, + { + "epoch": 2.95, + "learning_rate": 8.856347732227724e-07, + "loss": 1.0427, + "step": 779000 + }, + { + "epoch": 2.95, + "learning_rate": 8.54110790407126e-07, + "loss": 1.0434, + "step": 779500 + }, + { + "epoch": 2.95, + "learning_rate": 8.225868075914795e-07, + "loss": 1.0393, + "step": 780000 + }, + { + "epoch": 2.95, + "learning_rate": 7.910628247758329e-07, + "loss": 1.0326, + "step": 780500 + }, + { + "epoch": 2.95, + "learning_rate": 7.595388419601865e-07, + "loss": 1.0494, + "step": 781000 + }, + { + "epoch": 2.96, + "learning_rate": 7.2801485914454e-07, + "loss": 1.0307, + "step": 781500 + }, + { + "epoch": 2.96, + "learning_rate": 6.964908763288935e-07, + "loss": 1.041, + "step": 782000 + }, + { + "epoch": 2.96, + "learning_rate": 6.64966893513247e-07, + "loss": 1.0451, + "step": 782500 + }, + { + "epoch": 2.96, + "learning_rate": 6.334429106976005e-07, + "loss": 1.0507, + "step": 783000 + }, + { + "epoch": 2.96, + "learning_rate": 6.019189278819541e-07, + "loss": 1.0379, + "step": 783500 + }, + { + "epoch": 2.97, + "learning_rate": 5.703949450663076e-07, + "loss": 1.0445, + "step": 784000 + }, + { + "epoch": 2.97, + "learning_rate": 5.388709622506611e-07, + "loss": 1.0423, + "step": 784500 + }, + { + "epoch": 2.97, + "learning_rate": 5.073469794350146e-07, + "loss": 1.0455, + "step": 785000 + }, + { + "epoch": 2.97, + "learning_rate": 4.758229966193681e-07, + "loss": 1.0432, + "step": 785500 + }, + { + "epoch": 2.97, + "learning_rate": 4.4429901380372166e-07, + "loss": 1.0409, + "step": 786000 + }, + { + "epoch": 2.98, + "learning_rate": 4.127750309880751e-07, + "loss": 1.0406, + "step": 786500 + }, + { + "epoch": 2.98, + "learning_rate": 3.8125104817242864e-07, + "loss": 1.0428, + "step": 787000 + }, + { + "epoch": 2.98, + "learning_rate": 3.497270653567821e-07, + "loss": 1.0337, + "step": 787500 + }, + { + "epoch": 2.98, + "learning_rate": 3.1820308254113567e-07, + "loss": 1.0387, + "step": 788000 + }, + { + "epoch": 2.98, + "learning_rate": 2.8667909972548916e-07, + "loss": 1.0403, + "step": 788500 + }, + { + "epoch": 2.98, + "learning_rate": 2.551551169098427e-07, + "loss": 1.046, + "step": 789000 + }, + { + "epoch": 2.99, + "learning_rate": 2.236311340941962e-07, + "loss": 1.0402, + "step": 789500 + }, + { + "epoch": 2.99, + "learning_rate": 1.921071512785497e-07, + "loss": 1.0478, + "step": 790000 + }, + { + "epoch": 2.99, + "learning_rate": 1.6058316846290322e-07, + "loss": 1.0432, + "step": 790500 + }, + { + "epoch": 2.99, + "learning_rate": 1.290591856472567e-07, + "loss": 1.0443, + "step": 791000 + }, + { + "epoch": 2.99, + "learning_rate": 9.753520283161024e-08, + "loss": 1.0442, + "step": 791500 + }, + { + "epoch": 3.0, + "learning_rate": 6.601122001596375e-08, + "loss": 1.0512, + "step": 792000 + }, + { + "epoch": 3.0, + "learning_rate": 3.448723720031726e-08, + "loss": 1.0413, + "step": 792500 + }, + { + "epoch": 3.0, + "learning_rate": 2.96325438467077e-09, + "loss": 1.0338, + "step": 793000 + }, + { + "epoch": 3.0, + "step": 793047, + "total_flos": 3.339750619344292e+18, + "train_loss": 0.9585861873120558, + "train_runtime": 247104.2719, + "train_samples_per_second": 51.35, + "train_steps_per_second": 3.209 + } + ], + "max_steps": 793047, + "num_train_epochs": 3, + "total_flos": 3.339750619344292e+18, + "trial_name": null, + "trial_params": null +}