{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 793047, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9968476017184356e-05, "loss": 2.2908, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.993695203436871e-05, "loss": 1.9499, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.990542805155306e-05, "loss": 1.8515, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.9873904068737416e-05, "loss": 1.7764, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.984238008592177e-05, "loss": 1.7312, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.981085610310612e-05, "loss": 1.7093, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.9779332120290476e-05, "loss": 1.6757, "step": 3500 }, { "epoch": 0.02, "learning_rate": 4.974780813747483e-05, "loss": 1.6496, "step": 4000 }, { "epoch": 0.02, "learning_rate": 4.971628415465918e-05, "loss": 1.6336, "step": 4500 }, { "epoch": 0.02, "learning_rate": 4.968476017184354e-05, "loss": 1.6159, "step": 5000 }, { "epoch": 0.02, "learning_rate": 4.965323618902789e-05, "loss": 1.6152, "step": 5500 }, { "epoch": 0.02, "learning_rate": 4.9621712206212244e-05, "loss": 1.5905, "step": 6000 }, { "epoch": 0.02, "learning_rate": 4.95901882233966e-05, "loss": 1.5704, "step": 6500 }, { "epoch": 0.03, "learning_rate": 4.955866424058096e-05, "loss": 1.5667, "step": 7000 }, { "epoch": 0.03, "learning_rate": 4.9527140257765304e-05, "loss": 1.5538, "step": 7500 }, { "epoch": 0.03, "learning_rate": 4.949561627494966e-05, "loss": 1.5581, "step": 8000 }, { "epoch": 0.03, "learning_rate": 4.946409229213401e-05, "loss": 1.5497, "step": 8500 }, { "epoch": 0.03, "learning_rate": 4.9432568309318364e-05, "loss": 1.5373, "step": 9000 }, { "epoch": 0.04, "learning_rate": 4.9401044326502724e-05, "loss": 1.5261, "step": 9500 }, { "epoch": 0.04, "learning_rate": 4.936952034368707e-05, "loss": 1.5222, "step": 10000 }, { "epoch": 0.04, "learning_rate": 4.9337996360871425e-05, "loss": 1.5082, "step": 10500 }, { "epoch": 0.04, "learning_rate": 4.930647237805578e-05, "loss": 1.5114, "step": 11000 }, { "epoch": 0.04, "learning_rate": 4.927494839524013e-05, "loss": 1.5152, "step": 11500 }, { "epoch": 0.05, "learning_rate": 4.9243424412424485e-05, "loss": 1.5012, "step": 12000 }, { "epoch": 0.05, "learning_rate": 4.9211900429608845e-05, "loss": 1.4883, "step": 12500 }, { "epoch": 0.05, "learning_rate": 4.918037644679319e-05, "loss": 1.5023, "step": 13000 }, { "epoch": 0.05, "learning_rate": 4.9148852463977545e-05, "loss": 1.4903, "step": 13500 }, { "epoch": 0.05, "learning_rate": 4.91173284811619e-05, "loss": 1.4818, "step": 14000 }, { "epoch": 0.05, "learning_rate": 4.908580449834625e-05, "loss": 1.4761, "step": 14500 }, { "epoch": 0.06, "learning_rate": 4.905428051553061e-05, "loss": 1.4685, "step": 15000 }, { "epoch": 0.06, "learning_rate": 4.902275653271496e-05, "loss": 1.4781, "step": 15500 }, { "epoch": 0.06, "learning_rate": 4.899123254989931e-05, "loss": 1.474, "step": 16000 }, { "epoch": 0.06, "learning_rate": 4.8959708567083666e-05, "loss": 1.4616, "step": 16500 }, { "epoch": 0.06, "learning_rate": 4.892818458426802e-05, "loss": 1.4584, "step": 17000 }, { "epoch": 0.07, "learning_rate": 4.889666060145238e-05, "loss": 1.4573, "step": 17500 }, { "epoch": 0.07, "learning_rate": 4.886513661863673e-05, "loss": 1.4563, "step": 18000 }, { "epoch": 0.07, "learning_rate": 4.883361263582108e-05, "loss": 1.4433, "step": 18500 }, { "epoch": 0.07, "learning_rate": 4.880208865300543e-05, "loss": 1.4416, "step": 19000 }, { "epoch": 0.07, "learning_rate": 4.8770564670189786e-05, "loss": 1.4432, "step": 19500 }, { "epoch": 0.08, "learning_rate": 4.8739040687374147e-05, "loss": 1.4404, "step": 20000 }, { "epoch": 0.08, "learning_rate": 4.87075167045585e-05, "loss": 1.4457, "step": 20500 }, { "epoch": 0.08, "learning_rate": 4.8675992721742853e-05, "loss": 1.4489, "step": 21000 }, { "epoch": 0.08, "learning_rate": 4.86444687389272e-05, "loss": 1.4411, "step": 21500 }, { "epoch": 0.08, "learning_rate": 4.8612944756111554e-05, "loss": 1.4365, "step": 22000 }, { "epoch": 0.09, "learning_rate": 4.858142077329591e-05, "loss": 1.4291, "step": 22500 }, { "epoch": 0.09, "learning_rate": 4.854989679048027e-05, "loss": 1.4225, "step": 23000 }, { "epoch": 0.09, "learning_rate": 4.851837280766462e-05, "loss": 1.4206, "step": 23500 }, { "epoch": 0.09, "learning_rate": 4.848684882484897e-05, "loss": 1.4259, "step": 24000 }, { "epoch": 0.09, "learning_rate": 4.845532484203332e-05, "loss": 1.4254, "step": 24500 }, { "epoch": 0.09, "learning_rate": 4.8423800859217674e-05, "loss": 1.4368, "step": 25000 }, { "epoch": 0.1, "learning_rate": 4.8392276876402034e-05, "loss": 1.4213, "step": 25500 }, { "epoch": 0.1, "learning_rate": 4.836075289358639e-05, "loss": 1.4223, "step": 26000 }, { "epoch": 0.1, "learning_rate": 4.832922891077074e-05, "loss": 1.4233, "step": 26500 }, { "epoch": 0.1, "learning_rate": 4.829770492795509e-05, "loss": 1.4174, "step": 27000 }, { "epoch": 0.1, "learning_rate": 4.826618094513944e-05, "loss": 1.4244, "step": 27500 }, { "epoch": 0.11, "learning_rate": 4.82346569623238e-05, "loss": 1.4063, "step": 28000 }, { "epoch": 0.11, "learning_rate": 4.8203132979508155e-05, "loss": 1.4058, "step": 28500 }, { "epoch": 0.11, "learning_rate": 4.817160899669251e-05, "loss": 1.4124, "step": 29000 }, { "epoch": 0.11, "learning_rate": 4.814008501387686e-05, "loss": 1.3993, "step": 29500 }, { "epoch": 0.11, "learning_rate": 4.810856103106121e-05, "loss": 1.4086, "step": 30000 }, { "epoch": 0.12, "learning_rate": 4.807703704824557e-05, "loss": 1.4062, "step": 30500 }, { "epoch": 0.12, "learning_rate": 4.804551306542992e-05, "loss": 1.3945, "step": 31000 }, { "epoch": 0.12, "learning_rate": 4.8013989082614276e-05, "loss": 1.4016, "step": 31500 }, { "epoch": 0.12, "learning_rate": 4.798246509979863e-05, "loss": 1.4041, "step": 32000 }, { "epoch": 0.12, "learning_rate": 4.7950941116982976e-05, "loss": 1.4049, "step": 32500 }, { "epoch": 0.12, "learning_rate": 4.7919417134167336e-05, "loss": 1.4018, "step": 33000 }, { "epoch": 0.13, "learning_rate": 4.788789315135169e-05, "loss": 1.3937, "step": 33500 }, { "epoch": 0.13, "learning_rate": 4.785636916853604e-05, "loss": 1.3996, "step": 34000 }, { "epoch": 0.13, "learning_rate": 4.7824845185720396e-05, "loss": 1.3855, "step": 34500 }, { "epoch": 0.13, "learning_rate": 4.779332120290475e-05, "loss": 1.392, "step": 35000 }, { "epoch": 0.13, "learning_rate": 4.7761797220089096e-05, "loss": 1.3924, "step": 35500 }, { "epoch": 0.14, "learning_rate": 4.7730273237273457e-05, "loss": 1.3935, "step": 36000 }, { "epoch": 0.14, "learning_rate": 4.769874925445781e-05, "loss": 1.3912, "step": 36500 }, { "epoch": 0.14, "learning_rate": 4.7667225271642163e-05, "loss": 1.3808, "step": 37000 }, { "epoch": 0.14, "learning_rate": 4.763570128882652e-05, "loss": 1.3806, "step": 37500 }, { "epoch": 0.14, "learning_rate": 4.760417730601087e-05, "loss": 1.3829, "step": 38000 }, { "epoch": 0.15, "learning_rate": 4.7572653323195224e-05, "loss": 1.3838, "step": 38500 }, { "epoch": 0.15, "learning_rate": 4.754112934037958e-05, "loss": 1.3819, "step": 39000 }, { "epoch": 0.15, "learning_rate": 4.750960535756393e-05, "loss": 1.3824, "step": 39500 }, { "epoch": 0.15, "learning_rate": 4.7478081374748284e-05, "loss": 1.3787, "step": 40000 }, { "epoch": 0.15, "learning_rate": 4.744655739193264e-05, "loss": 1.3675, "step": 40500 }, { "epoch": 0.16, "learning_rate": 4.741503340911699e-05, "loss": 1.3719, "step": 41000 }, { "epoch": 0.16, "learning_rate": 4.7383509426301344e-05, "loss": 1.3732, "step": 41500 }, { "epoch": 0.16, "learning_rate": 4.73519854434857e-05, "loss": 1.3658, "step": 42000 }, { "epoch": 0.16, "learning_rate": 4.732046146067005e-05, "loss": 1.3841, "step": 42500 }, { "epoch": 0.16, "learning_rate": 4.7288937477854405e-05, "loss": 1.3722, "step": 43000 }, { "epoch": 0.16, "learning_rate": 4.725741349503876e-05, "loss": 1.3592, "step": 43500 }, { "epoch": 0.17, "learning_rate": 4.722588951222311e-05, "loss": 1.3792, "step": 44000 }, { "epoch": 0.17, "learning_rate": 4.7194365529407465e-05, "loss": 1.3712, "step": 44500 }, { "epoch": 0.17, "learning_rate": 4.716284154659182e-05, "loss": 1.374, "step": 45000 }, { "epoch": 0.17, "learning_rate": 4.713131756377617e-05, "loss": 1.3716, "step": 45500 }, { "epoch": 0.17, "learning_rate": 4.7099793580960525e-05, "loss": 1.367, "step": 46000 }, { "epoch": 0.18, "learning_rate": 4.706826959814488e-05, "loss": 1.364, "step": 46500 }, { "epoch": 0.18, "learning_rate": 4.703674561532923e-05, "loss": 1.3562, "step": 47000 }, { "epoch": 0.18, "learning_rate": 4.7005221632513586e-05, "loss": 1.3589, "step": 47500 }, { "epoch": 0.18, "learning_rate": 4.697369764969794e-05, "loss": 1.3608, "step": 48000 }, { "epoch": 0.18, "learning_rate": 4.694217366688229e-05, "loss": 1.3528, "step": 48500 }, { "epoch": 0.19, "learning_rate": 4.6910649684066646e-05, "loss": 1.3687, "step": 49000 }, { "epoch": 0.19, "learning_rate": 4.6879125701251e-05, "loss": 1.3676, "step": 49500 }, { "epoch": 0.19, "learning_rate": 4.684760171843535e-05, "loss": 1.3578, "step": 50000 }, { "epoch": 0.19, "learning_rate": 4.6816077735619706e-05, "loss": 1.3606, "step": 50500 }, { "epoch": 0.19, "learning_rate": 4.678455375280406e-05, "loss": 1.3575, "step": 51000 }, { "epoch": 0.19, "learning_rate": 4.675302976998841e-05, "loss": 1.3589, "step": 51500 }, { "epoch": 0.2, "learning_rate": 4.672150578717277e-05, "loss": 1.3624, "step": 52000 }, { "epoch": 0.2, "learning_rate": 4.668998180435712e-05, "loss": 1.353, "step": 52500 }, { "epoch": 0.2, "learning_rate": 4.665845782154147e-05, "loss": 1.3658, "step": 53000 }, { "epoch": 0.2, "learning_rate": 4.662693383872583e-05, "loss": 1.3542, "step": 53500 }, { "epoch": 0.2, "learning_rate": 4.659540985591018e-05, "loss": 1.3519, "step": 54000 }, { "epoch": 0.21, "learning_rate": 4.656388587309454e-05, "loss": 1.3473, "step": 54500 }, { "epoch": 0.21, "learning_rate": 4.653236189027889e-05, "loss": 1.3512, "step": 55000 }, { "epoch": 0.21, "learning_rate": 4.650083790746324e-05, "loss": 1.3439, "step": 55500 }, { "epoch": 0.21, "learning_rate": 4.6469313924647594e-05, "loss": 1.3446, "step": 56000 }, { "epoch": 0.21, "learning_rate": 4.643778994183195e-05, "loss": 1.3454, "step": 56500 }, { "epoch": 0.22, "learning_rate": 4.64062659590163e-05, "loss": 1.35, "step": 57000 }, { "epoch": 0.22, "learning_rate": 4.637474197620066e-05, "loss": 1.3424, "step": 57500 }, { "epoch": 0.22, "learning_rate": 4.634321799338501e-05, "loss": 1.3411, "step": 58000 }, { "epoch": 0.22, "learning_rate": 4.631169401056936e-05, "loss": 1.3423, "step": 58500 }, { "epoch": 0.22, "learning_rate": 4.6280170027753715e-05, "loss": 1.337, "step": 59000 }, { "epoch": 0.23, "learning_rate": 4.624864604493807e-05, "loss": 1.3488, "step": 59500 }, { "epoch": 0.23, "learning_rate": 4.621712206212243e-05, "loss": 1.3368, "step": 60000 }, { "epoch": 0.23, "learning_rate": 4.618559807930678e-05, "loss": 1.3392, "step": 60500 }, { "epoch": 0.23, "learning_rate": 4.615407409649113e-05, "loss": 1.3441, "step": 61000 }, { "epoch": 0.23, "learning_rate": 4.612255011367548e-05, "loss": 1.3338, "step": 61500 }, { "epoch": 0.23, "learning_rate": 4.6091026130859835e-05, "loss": 1.3369, "step": 62000 }, { "epoch": 0.24, "learning_rate": 4.6059502148044195e-05, "loss": 1.3319, "step": 62500 }, { "epoch": 0.24, "learning_rate": 4.602797816522855e-05, "loss": 1.3293, "step": 63000 }, { "epoch": 0.24, "learning_rate": 4.5996454182412896e-05, "loss": 1.3421, "step": 63500 }, { "epoch": 0.24, "learning_rate": 4.596493019959725e-05, "loss": 1.3262, "step": 64000 }, { "epoch": 0.24, "learning_rate": 4.59334062167816e-05, "loss": 1.3422, "step": 64500 }, { "epoch": 0.25, "learning_rate": 4.590188223396596e-05, "loss": 1.3312, "step": 65000 }, { "epoch": 0.25, "learning_rate": 4.5870358251150316e-05, "loss": 1.3497, "step": 65500 }, { "epoch": 0.25, "learning_rate": 4.583883426833467e-05, "loss": 1.3355, "step": 66000 }, { "epoch": 0.25, "learning_rate": 4.5807310285519016e-05, "loss": 1.3249, "step": 66500 }, { "epoch": 0.25, "learning_rate": 4.577578630270337e-05, "loss": 1.3319, "step": 67000 }, { "epoch": 0.26, "learning_rate": 4.574426231988772e-05, "loss": 1.326, "step": 67500 }, { "epoch": 0.26, "learning_rate": 4.571273833707208e-05, "loss": 1.3303, "step": 68000 }, { "epoch": 0.26, "learning_rate": 4.568121435425644e-05, "loss": 1.3296, "step": 68500 }, { "epoch": 0.26, "learning_rate": 4.564969037144078e-05, "loss": 1.3301, "step": 69000 }, { "epoch": 0.26, "learning_rate": 4.561816638862514e-05, "loss": 1.3399, "step": 69500 }, { "epoch": 0.26, "learning_rate": 4.558664240580949e-05, "loss": 1.3273, "step": 70000 }, { "epoch": 0.27, "learning_rate": 4.555511842299385e-05, "loss": 1.3289, "step": 70500 }, { "epoch": 0.27, "learning_rate": 4.5523594440178204e-05, "loss": 1.3256, "step": 71000 }, { "epoch": 0.27, "learning_rate": 4.549207045736256e-05, "loss": 1.3309, "step": 71500 }, { "epoch": 0.27, "learning_rate": 4.5460546474546904e-05, "loss": 1.3286, "step": 72000 }, { "epoch": 0.27, "learning_rate": 4.542902249173126e-05, "loss": 1.3211, "step": 72500 }, { "epoch": 0.28, "learning_rate": 4.539749850891562e-05, "loss": 1.3264, "step": 73000 }, { "epoch": 0.28, "learning_rate": 4.536597452609997e-05, "loss": 1.3232, "step": 73500 }, { "epoch": 0.28, "learning_rate": 4.5334450543284324e-05, "loss": 1.3264, "step": 74000 }, { "epoch": 0.28, "learning_rate": 4.530292656046868e-05, "loss": 1.3244, "step": 74500 }, { "epoch": 0.28, "learning_rate": 4.5271402577653025e-05, "loss": 1.3139, "step": 75000 }, { "epoch": 0.29, "learning_rate": 4.5239878594837385e-05, "loss": 1.3191, "step": 75500 }, { "epoch": 0.29, "learning_rate": 4.520835461202174e-05, "loss": 1.3205, "step": 76000 }, { "epoch": 0.29, "learning_rate": 4.517683062920609e-05, "loss": 1.3061, "step": 76500 }, { "epoch": 0.29, "learning_rate": 4.5145306646390445e-05, "loss": 1.3169, "step": 77000 }, { "epoch": 0.29, "learning_rate": 4.511378266357479e-05, "loss": 1.3283, "step": 77500 }, { "epoch": 0.3, "learning_rate": 4.5082258680759145e-05, "loss": 1.3166, "step": 78000 }, { "epoch": 0.3, "learning_rate": 4.5050734697943505e-05, "loss": 1.3215, "step": 78500 }, { "epoch": 0.3, "learning_rate": 4.501921071512786e-05, "loss": 1.327, "step": 79000 }, { "epoch": 0.3, "learning_rate": 4.498768673231221e-05, "loss": 1.3162, "step": 79500 }, { "epoch": 0.3, "learning_rate": 4.4956162749496566e-05, "loss": 1.3175, "step": 80000 }, { "epoch": 0.3, "learning_rate": 4.492463876668091e-05, "loss": 1.3113, "step": 80500 }, { "epoch": 0.31, "learning_rate": 4.489311478386527e-05, "loss": 1.3117, "step": 81000 }, { "epoch": 0.31, "learning_rate": 4.4861590801049626e-05, "loss": 1.3138, "step": 81500 }, { "epoch": 0.31, "learning_rate": 4.483006681823398e-05, "loss": 1.3192, "step": 82000 }, { "epoch": 0.31, "learning_rate": 4.479854283541833e-05, "loss": 1.3126, "step": 82500 }, { "epoch": 0.31, "learning_rate": 4.4767018852602686e-05, "loss": 1.3262, "step": 83000 }, { "epoch": 0.32, "learning_rate": 4.473549486978704e-05, "loss": 1.313, "step": 83500 }, { "epoch": 0.32, "learning_rate": 4.470397088697139e-05, "loss": 1.3137, "step": 84000 }, { "epoch": 0.32, "learning_rate": 4.467244690415575e-05, "loss": 1.3055, "step": 84500 }, { "epoch": 0.32, "learning_rate": 4.46409229213401e-05, "loss": 1.3179, "step": 85000 }, { "epoch": 0.32, "learning_rate": 4.4609398938524453e-05, "loss": 1.3109, "step": 85500 }, { "epoch": 0.33, "learning_rate": 4.457787495570881e-05, "loss": 1.3089, "step": 86000 }, { "epoch": 0.33, "learning_rate": 4.454635097289316e-05, "loss": 1.318, "step": 86500 }, { "epoch": 0.33, "learning_rate": 4.4514826990077514e-05, "loss": 1.3125, "step": 87000 }, { "epoch": 0.33, "learning_rate": 4.448330300726187e-05, "loss": 1.3045, "step": 87500 }, { "epoch": 0.33, "learning_rate": 4.445177902444622e-05, "loss": 1.3032, "step": 88000 }, { "epoch": 0.33, "learning_rate": 4.4420255041630574e-05, "loss": 1.3108, "step": 88500 }, { "epoch": 0.34, "learning_rate": 4.438873105881493e-05, "loss": 1.3126, "step": 89000 }, { "epoch": 0.34, "learning_rate": 4.435720707599928e-05, "loss": 1.3027, "step": 89500 }, { "epoch": 0.34, "learning_rate": 4.4325683093183634e-05, "loss": 1.3019, "step": 90000 }, { "epoch": 0.34, "learning_rate": 4.429415911036799e-05, "loss": 1.3121, "step": 90500 }, { "epoch": 0.34, "learning_rate": 4.426263512755234e-05, "loss": 1.3049, "step": 91000 }, { "epoch": 0.35, "learning_rate": 4.4231111144736695e-05, "loss": 1.3035, "step": 91500 }, { "epoch": 0.35, "learning_rate": 4.419958716192105e-05, "loss": 1.3013, "step": 92000 }, { "epoch": 0.35, "learning_rate": 4.41680631791054e-05, "loss": 1.3063, "step": 92500 }, { "epoch": 0.35, "learning_rate": 4.4136539196289755e-05, "loss": 1.3069, "step": 93000 }, { "epoch": 0.35, "learning_rate": 4.410501521347411e-05, "loss": 1.2973, "step": 93500 }, { "epoch": 0.36, "learning_rate": 4.407349123065846e-05, "loss": 1.3056, "step": 94000 }, { "epoch": 0.36, "learning_rate": 4.4041967247842815e-05, "loss": 1.3061, "step": 94500 }, { "epoch": 0.36, "learning_rate": 4.401044326502717e-05, "loss": 1.3061, "step": 95000 }, { "epoch": 0.36, "learning_rate": 4.397891928221152e-05, "loss": 1.2963, "step": 95500 }, { "epoch": 0.36, "learning_rate": 4.3947395299395876e-05, "loss": 1.2948, "step": 96000 }, { "epoch": 0.37, "learning_rate": 4.391587131658023e-05, "loss": 1.3008, "step": 96500 }, { "epoch": 0.37, "learning_rate": 4.388434733376459e-05, "loss": 1.2946, "step": 97000 }, { "epoch": 0.37, "learning_rate": 4.3852823350948936e-05, "loss": 1.2885, "step": 97500 }, { "epoch": 0.37, "learning_rate": 4.382129936813329e-05, "loss": 1.2894, "step": 98000 }, { "epoch": 0.37, "learning_rate": 4.378977538531764e-05, "loss": 1.2977, "step": 98500 }, { "epoch": 0.37, "learning_rate": 4.3758251402501996e-05, "loss": 1.2929, "step": 99000 }, { "epoch": 0.38, "learning_rate": 4.372672741968635e-05, "loss": 1.2893, "step": 99500 }, { "epoch": 0.38, "learning_rate": 4.36952034368707e-05, "loss": 1.2903, "step": 100000 }, { "epoch": 0.38, "learning_rate": 4.3663679454055057e-05, "loss": 1.294, "step": 100500 }, { "epoch": 0.38, "learning_rate": 4.363215547123941e-05, "loss": 1.296, "step": 101000 }, { "epoch": 0.38, "learning_rate": 4.3600631488423763e-05, "loss": 1.2844, "step": 101500 }, { "epoch": 0.39, "learning_rate": 4.356910750560812e-05, "loss": 1.2975, "step": 102000 }, { "epoch": 0.39, "learning_rate": 4.353758352279248e-05, "loss": 1.2952, "step": 102500 }, { "epoch": 0.39, "learning_rate": 4.3506059539976824e-05, "loss": 1.2894, "step": 103000 }, { "epoch": 0.39, "learning_rate": 4.347453555716118e-05, "loss": 1.2931, "step": 103500 }, { "epoch": 0.39, "learning_rate": 4.344301157434553e-05, "loss": 1.2907, "step": 104000 }, { "epoch": 0.4, "learning_rate": 4.3411487591529884e-05, "loss": 1.3006, "step": 104500 }, { "epoch": 0.4, "learning_rate": 4.3379963608714244e-05, "loss": 1.2961, "step": 105000 }, { "epoch": 0.4, "learning_rate": 4.33484396258986e-05, "loss": 1.2926, "step": 105500 }, { "epoch": 0.4, "learning_rate": 4.3316915643082944e-05, "loss": 1.2992, "step": 106000 }, { "epoch": 0.4, "learning_rate": 4.32853916602673e-05, "loss": 1.2856, "step": 106500 }, { "epoch": 0.4, "learning_rate": 4.325386767745165e-05, "loss": 1.2947, "step": 107000 }, { "epoch": 0.41, "learning_rate": 4.322234369463601e-05, "loss": 1.2989, "step": 107500 }, { "epoch": 0.41, "learning_rate": 4.3190819711820365e-05, "loss": 1.2981, "step": 108000 }, { "epoch": 0.41, "learning_rate": 4.315929572900471e-05, "loss": 1.284, "step": 108500 }, { "epoch": 0.41, "learning_rate": 4.3127771746189065e-05, "loss": 1.282, "step": 109000 }, { "epoch": 0.41, "learning_rate": 4.309624776337342e-05, "loss": 1.281, "step": 109500 }, { "epoch": 0.42, "learning_rate": 4.306472378055777e-05, "loss": 1.2902, "step": 110000 }, { "epoch": 0.42, "learning_rate": 4.303319979774213e-05, "loss": 1.2881, "step": 110500 }, { "epoch": 0.42, "learning_rate": 4.3001675814926486e-05, "loss": 1.2849, "step": 111000 }, { "epoch": 0.42, "learning_rate": 4.297015183211083e-05, "loss": 1.2867, "step": 111500 }, { "epoch": 0.42, "learning_rate": 4.2938627849295186e-05, "loss": 1.2795, "step": 112000 }, { "epoch": 0.43, "learning_rate": 4.290710386647954e-05, "loss": 1.2809, "step": 112500 }, { "epoch": 0.43, "learning_rate": 4.28755798836639e-05, "loss": 1.2786, "step": 113000 }, { "epoch": 0.43, "learning_rate": 4.284405590084825e-05, "loss": 1.2806, "step": 113500 }, { "epoch": 0.43, "learning_rate": 4.2812531918032606e-05, "loss": 1.2939, "step": 114000 }, { "epoch": 0.43, "learning_rate": 4.278100793521695e-05, "loss": 1.2797, "step": 114500 }, { "epoch": 0.44, "learning_rate": 4.2749483952401306e-05, "loss": 1.2836, "step": 115000 }, { "epoch": 0.44, "learning_rate": 4.2717959969585666e-05, "loss": 1.274, "step": 115500 }, { "epoch": 0.44, "learning_rate": 4.268643598677002e-05, "loss": 1.2819, "step": 116000 }, { "epoch": 0.44, "learning_rate": 4.265491200395437e-05, "loss": 1.2758, "step": 116500 }, { "epoch": 0.44, "learning_rate": 4.262338802113872e-05, "loss": 1.2819, "step": 117000 }, { "epoch": 0.44, "learning_rate": 4.2591864038323073e-05, "loss": 1.2888, "step": 117500 }, { "epoch": 0.45, "learning_rate": 4.2560340055507434e-05, "loss": 1.2723, "step": 118000 }, { "epoch": 0.45, "learning_rate": 4.252881607269179e-05, "loss": 1.2848, "step": 118500 }, { "epoch": 0.45, "learning_rate": 4.249729208987614e-05, "loss": 1.2675, "step": 119000 }, { "epoch": 0.45, "learning_rate": 4.2465768107060494e-05, "loss": 1.2842, "step": 119500 }, { "epoch": 0.45, "learning_rate": 4.243424412424484e-05, "loss": 1.2751, "step": 120000 }, { "epoch": 0.46, "learning_rate": 4.24027201414292e-05, "loss": 1.278, "step": 120500 }, { "epoch": 0.46, "learning_rate": 4.2371196158613554e-05, "loss": 1.2786, "step": 121000 }, { "epoch": 0.46, "learning_rate": 4.233967217579791e-05, "loss": 1.2816, "step": 121500 }, { "epoch": 0.46, "learning_rate": 4.230814819298226e-05, "loss": 1.2707, "step": 122000 }, { "epoch": 0.46, "learning_rate": 4.227662421016661e-05, "loss": 1.2715, "step": 122500 }, { "epoch": 0.47, "learning_rate": 4.224510022735096e-05, "loss": 1.2801, "step": 123000 }, { "epoch": 0.47, "learning_rate": 4.221357624453532e-05, "loss": 1.2818, "step": 123500 }, { "epoch": 0.47, "learning_rate": 4.2182052261719675e-05, "loss": 1.2813, "step": 124000 }, { "epoch": 0.47, "learning_rate": 4.215052827890403e-05, "loss": 1.2805, "step": 124500 }, { "epoch": 0.47, "learning_rate": 4.211900429608838e-05, "loss": 1.2766, "step": 125000 }, { "epoch": 0.47, "learning_rate": 4.208748031327273e-05, "loss": 1.2736, "step": 125500 }, { "epoch": 0.48, "learning_rate": 4.205595633045709e-05, "loss": 1.2794, "step": 126000 }, { "epoch": 0.48, "learning_rate": 4.202443234764144e-05, "loss": 1.2829, "step": 126500 }, { "epoch": 0.48, "learning_rate": 4.1992908364825795e-05, "loss": 1.274, "step": 127000 }, { "epoch": 0.48, "learning_rate": 4.196138438201015e-05, "loss": 1.2724, "step": 127500 }, { "epoch": 0.48, "learning_rate": 4.19298603991945e-05, "loss": 1.2762, "step": 128000 }, { "epoch": 0.49, "learning_rate": 4.1898336416378856e-05, "loss": 1.2685, "step": 128500 }, { "epoch": 0.49, "learning_rate": 4.186681243356321e-05, "loss": 1.2634, "step": 129000 }, { "epoch": 0.49, "learning_rate": 4.183528845074756e-05, "loss": 1.2662, "step": 129500 }, { "epoch": 0.49, "learning_rate": 4.1803764467931916e-05, "loss": 1.2812, "step": 130000 }, { "epoch": 0.49, "learning_rate": 4.177224048511627e-05, "loss": 1.2663, "step": 130500 }, { "epoch": 0.5, "learning_rate": 4.174071650230062e-05, "loss": 1.279, "step": 131000 }, { "epoch": 0.5, "learning_rate": 4.1709192519484976e-05, "loss": 1.2679, "step": 131500 }, { "epoch": 0.5, "learning_rate": 4.167766853666933e-05, "loss": 1.2692, "step": 132000 }, { "epoch": 0.5, "learning_rate": 4.164614455385368e-05, "loss": 1.2751, "step": 132500 }, { "epoch": 0.5, "learning_rate": 4.161462057103804e-05, "loss": 1.272, "step": 133000 }, { "epoch": 0.51, "learning_rate": 4.158309658822239e-05, "loss": 1.2676, "step": 133500 }, { "epoch": 0.51, "learning_rate": 4.1551572605406744e-05, "loss": 1.2581, "step": 134000 }, { "epoch": 0.51, "learning_rate": 4.15200486225911e-05, "loss": 1.2685, "step": 134500 }, { "epoch": 0.51, "learning_rate": 4.148852463977545e-05, "loss": 1.2698, "step": 135000 }, { "epoch": 0.51, "learning_rate": 4.1457000656959804e-05, "loss": 1.2633, "step": 135500 }, { "epoch": 0.51, "learning_rate": 4.142547667414416e-05, "loss": 1.267, "step": 136000 }, { "epoch": 0.52, "learning_rate": 4.139395269132851e-05, "loss": 1.2745, "step": 136500 }, { "epoch": 0.52, "learning_rate": 4.1362428708512864e-05, "loss": 1.2688, "step": 137000 }, { "epoch": 0.52, "learning_rate": 4.133090472569722e-05, "loss": 1.2691, "step": 137500 }, { "epoch": 0.52, "learning_rate": 4.129938074288157e-05, "loss": 1.2614, "step": 138000 }, { "epoch": 0.52, "learning_rate": 4.1267856760065924e-05, "loss": 1.2656, "step": 138500 }, { "epoch": 0.53, "learning_rate": 4.123633277725028e-05, "loss": 1.2741, "step": 139000 }, { "epoch": 0.53, "learning_rate": 4.120480879443463e-05, "loss": 1.2673, "step": 139500 }, { "epoch": 0.53, "learning_rate": 4.1173284811618985e-05, "loss": 1.2641, "step": 140000 }, { "epoch": 0.53, "learning_rate": 4.114176082880334e-05, "loss": 1.2654, "step": 140500 }, { "epoch": 0.53, "learning_rate": 4.111023684598769e-05, "loss": 1.2634, "step": 141000 }, { "epoch": 0.54, "learning_rate": 4.1078712863172045e-05, "loss": 1.2698, "step": 141500 }, { "epoch": 0.54, "learning_rate": 4.1047188880356405e-05, "loss": 1.2551, "step": 142000 }, { "epoch": 0.54, "learning_rate": 4.101566489754075e-05, "loss": 1.2571, "step": 142500 }, { "epoch": 0.54, "learning_rate": 4.0984140914725105e-05, "loss": 1.2685, "step": 143000 }, { "epoch": 0.54, "learning_rate": 4.095261693190946e-05, "loss": 1.2608, "step": 143500 }, { "epoch": 0.54, "learning_rate": 4.092109294909381e-05, "loss": 1.2628, "step": 144000 }, { "epoch": 0.55, "learning_rate": 4.0889568966278166e-05, "loss": 1.2666, "step": 144500 }, { "epoch": 0.55, "learning_rate": 4.085804498346252e-05, "loss": 1.2568, "step": 145000 }, { "epoch": 0.55, "learning_rate": 4.082652100064687e-05, "loss": 1.2561, "step": 145500 }, { "epoch": 0.55, "learning_rate": 4.0794997017831226e-05, "loss": 1.2623, "step": 146000 }, { "epoch": 0.55, "learning_rate": 4.076347303501558e-05, "loss": 1.2662, "step": 146500 }, { "epoch": 0.56, "learning_rate": 4.073194905219993e-05, "loss": 1.2621, "step": 147000 }, { "epoch": 0.56, "learning_rate": 4.070042506938429e-05, "loss": 1.2644, "step": 147500 }, { "epoch": 0.56, "learning_rate": 4.066890108656864e-05, "loss": 1.2604, "step": 148000 }, { "epoch": 0.56, "learning_rate": 4.063737710375299e-05, "loss": 1.2581, "step": 148500 }, { "epoch": 0.56, "learning_rate": 4.060585312093735e-05, "loss": 1.2607, "step": 149000 }, { "epoch": 0.57, "learning_rate": 4.05743291381217e-05, "loss": 1.2517, "step": 149500 }, { "epoch": 0.57, "learning_rate": 4.054280515530606e-05, "loss": 1.2597, "step": 150000 }, { "epoch": 0.57, "learning_rate": 4.0511281172490414e-05, "loss": 1.2582, "step": 150500 }, { "epoch": 0.57, "learning_rate": 4.047975718967476e-05, "loss": 1.249, "step": 151000 }, { "epoch": 0.57, "learning_rate": 4.0448233206859114e-05, "loss": 1.2691, "step": 151500 }, { "epoch": 0.57, "learning_rate": 4.041670922404347e-05, "loss": 1.2587, "step": 152000 }, { "epoch": 0.58, "learning_rate": 4.038518524122783e-05, "loss": 1.2517, "step": 152500 }, { "epoch": 0.58, "learning_rate": 4.035366125841218e-05, "loss": 1.2535, "step": 153000 }, { "epoch": 0.58, "learning_rate": 4.032213727559653e-05, "loss": 1.2532, "step": 153500 }, { "epoch": 0.58, "learning_rate": 4.029061329278088e-05, "loss": 1.2524, "step": 154000 }, { "epoch": 0.58, "learning_rate": 4.0259089309965234e-05, "loss": 1.2587, "step": 154500 }, { "epoch": 0.59, "learning_rate": 4.022756532714959e-05, "loss": 1.2537, "step": 155000 }, { "epoch": 0.59, "learning_rate": 4.019604134433395e-05, "loss": 1.2535, "step": 155500 }, { "epoch": 0.59, "learning_rate": 4.01645173615183e-05, "loss": 1.2587, "step": 156000 }, { "epoch": 0.59, "learning_rate": 4.013299337870265e-05, "loss": 1.2454, "step": 156500 }, { "epoch": 0.59, "learning_rate": 4.0101469395887e-05, "loss": 1.245, "step": 157000 }, { "epoch": 0.6, "learning_rate": 4.0069945413071355e-05, "loss": 1.2485, "step": 157500 }, { "epoch": 0.6, "learning_rate": 4.0038421430255715e-05, "loss": 1.2613, "step": 158000 }, { "epoch": 0.6, "learning_rate": 4.000689744744007e-05, "loss": 1.2438, "step": 158500 }, { "epoch": 0.6, "learning_rate": 3.997537346462442e-05, "loss": 1.2495, "step": 159000 }, { "epoch": 0.6, "learning_rate": 3.994384948180877e-05, "loss": 1.2535, "step": 159500 }, { "epoch": 0.61, "learning_rate": 3.991232549899312e-05, "loss": 1.2556, "step": 160000 }, { "epoch": 0.61, "learning_rate": 3.988080151617748e-05, "loss": 1.2549, "step": 160500 }, { "epoch": 0.61, "learning_rate": 3.9849277533361836e-05, "loss": 1.2447, "step": 161000 }, { "epoch": 0.61, "learning_rate": 3.981775355054619e-05, "loss": 1.2486, "step": 161500 }, { "epoch": 0.61, "learning_rate": 3.9786229567730536e-05, "loss": 1.253, "step": 162000 }, { "epoch": 0.61, "learning_rate": 3.975470558491489e-05, "loss": 1.2464, "step": 162500 }, { "epoch": 0.62, "learning_rate": 3.972318160209925e-05, "loss": 1.2555, "step": 163000 }, { "epoch": 0.62, "learning_rate": 3.96916576192836e-05, "loss": 1.2475, "step": 163500 }, { "epoch": 0.62, "learning_rate": 3.9660133636467956e-05, "loss": 1.2646, "step": 164000 }, { "epoch": 0.62, "learning_rate": 3.962860965365231e-05, "loss": 1.2571, "step": 164500 }, { "epoch": 0.62, "learning_rate": 3.9597085670836657e-05, "loss": 1.2415, "step": 165000 }, { "epoch": 0.63, "learning_rate": 3.956556168802101e-05, "loss": 1.2467, "step": 165500 }, { "epoch": 0.63, "learning_rate": 3.953403770520537e-05, "loss": 1.2416, "step": 166000 }, { "epoch": 0.63, "learning_rate": 3.9502513722389724e-05, "loss": 1.249, "step": 166500 }, { "epoch": 0.63, "learning_rate": 3.947098973957408e-05, "loss": 1.2416, "step": 167000 }, { "epoch": 0.63, "learning_rate": 3.9439465756758424e-05, "loss": 1.2555, "step": 167500 }, { "epoch": 0.64, "learning_rate": 3.940794177394278e-05, "loss": 1.2515, "step": 168000 }, { "epoch": 0.64, "learning_rate": 3.937641779112714e-05, "loss": 1.2445, "step": 168500 }, { "epoch": 0.64, "learning_rate": 3.934489380831149e-05, "loss": 1.2498, "step": 169000 }, { "epoch": 0.64, "learning_rate": 3.9313369825495844e-05, "loss": 1.2459, "step": 169500 }, { "epoch": 0.64, "learning_rate": 3.92818458426802e-05, "loss": 1.2501, "step": 170000 }, { "epoch": 0.64, "learning_rate": 3.9250321859864544e-05, "loss": 1.2514, "step": 170500 }, { "epoch": 0.65, "learning_rate": 3.9218797877048905e-05, "loss": 1.2483, "step": 171000 }, { "epoch": 0.65, "learning_rate": 3.918727389423326e-05, "loss": 1.2369, "step": 171500 }, { "epoch": 0.65, "learning_rate": 3.915574991141761e-05, "loss": 1.2338, "step": 172000 }, { "epoch": 0.65, "learning_rate": 3.9124225928601965e-05, "loss": 1.2439, "step": 172500 }, { "epoch": 0.65, "learning_rate": 3.909270194578632e-05, "loss": 1.2419, "step": 173000 }, { "epoch": 0.66, "learning_rate": 3.906117796297067e-05, "loss": 1.2442, "step": 173500 }, { "epoch": 0.66, "learning_rate": 3.9029653980155025e-05, "loss": 1.2567, "step": 174000 }, { "epoch": 0.66, "learning_rate": 3.899812999733938e-05, "loss": 1.2435, "step": 174500 }, { "epoch": 0.66, "learning_rate": 3.896660601452373e-05, "loss": 1.2391, "step": 175000 }, { "epoch": 0.66, "learning_rate": 3.8935082031708086e-05, "loss": 1.2427, "step": 175500 }, { "epoch": 0.67, "learning_rate": 3.890355804889244e-05, "loss": 1.2344, "step": 176000 }, { "epoch": 0.67, "learning_rate": 3.887203406607679e-05, "loss": 1.237, "step": 176500 }, { "epoch": 0.67, "learning_rate": 3.8840510083261146e-05, "loss": 1.2406, "step": 177000 }, { "epoch": 0.67, "learning_rate": 3.88089861004455e-05, "loss": 1.2416, "step": 177500 }, { "epoch": 0.67, "learning_rate": 3.877746211762985e-05, "loss": 1.2482, "step": 178000 }, { "epoch": 0.68, "learning_rate": 3.8745938134814206e-05, "loss": 1.2516, "step": 178500 }, { "epoch": 0.68, "learning_rate": 3.871441415199856e-05, "loss": 1.2459, "step": 179000 }, { "epoch": 0.68, "learning_rate": 3.868289016918291e-05, "loss": 1.2477, "step": 179500 }, { "epoch": 0.68, "learning_rate": 3.8651366186367266e-05, "loss": 1.2469, "step": 180000 }, { "epoch": 0.68, "learning_rate": 3.861984220355162e-05, "loss": 1.2431, "step": 180500 }, { "epoch": 0.68, "learning_rate": 3.858831822073597e-05, "loss": 1.2371, "step": 181000 }, { "epoch": 0.69, "learning_rate": 3.855679423792033e-05, "loss": 1.2475, "step": 181500 }, { "epoch": 0.69, "learning_rate": 3.852527025510468e-05, "loss": 1.2437, "step": 182000 }, { "epoch": 0.69, "learning_rate": 3.8493746272289034e-05, "loss": 1.246, "step": 182500 }, { "epoch": 0.69, "learning_rate": 3.846222228947339e-05, "loss": 1.2453, "step": 183000 }, { "epoch": 0.69, "learning_rate": 3.843069830665774e-05, "loss": 1.2343, "step": 183500 }, { "epoch": 0.7, "learning_rate": 3.8399174323842094e-05, "loss": 1.2378, "step": 184000 }, { "epoch": 0.7, "learning_rate": 3.836765034102645e-05, "loss": 1.2409, "step": 184500 }, { "epoch": 0.7, "learning_rate": 3.83361263582108e-05, "loss": 1.2416, "step": 185000 }, { "epoch": 0.7, "learning_rate": 3.8304602375395154e-05, "loss": 1.237, "step": 185500 }, { "epoch": 0.7, "learning_rate": 3.827307839257951e-05, "loss": 1.2444, "step": 186000 }, { "epoch": 0.71, "learning_rate": 3.824155440976386e-05, "loss": 1.2306, "step": 186500 }, { "epoch": 0.71, "learning_rate": 3.8210030426948215e-05, "loss": 1.2276, "step": 187000 }, { "epoch": 0.71, "learning_rate": 3.817850644413257e-05, "loss": 1.2383, "step": 187500 }, { "epoch": 0.71, "learning_rate": 3.814698246131692e-05, "loss": 1.2423, "step": 188000 }, { "epoch": 0.71, "learning_rate": 3.8115458478501275e-05, "loss": 1.2351, "step": 188500 }, { "epoch": 0.71, "learning_rate": 3.808393449568563e-05, "loss": 1.2293, "step": 189000 }, { "epoch": 0.72, "learning_rate": 3.805241051286998e-05, "loss": 1.2369, "step": 189500 }, { "epoch": 0.72, "learning_rate": 3.802088653005434e-05, "loss": 1.2348, "step": 190000 }, { "epoch": 0.72, "learning_rate": 3.798936254723869e-05, "loss": 1.2385, "step": 190500 }, { "epoch": 0.72, "learning_rate": 3.795783856442304e-05, "loss": 1.2393, "step": 191000 }, { "epoch": 0.72, "learning_rate": 3.7926314581607395e-05, "loss": 1.2429, "step": 191500 }, { "epoch": 0.73, "learning_rate": 3.789479059879175e-05, "loss": 1.23, "step": 192000 }, { "epoch": 0.73, "learning_rate": 3.786326661597611e-05, "loss": 1.2318, "step": 192500 }, { "epoch": 0.73, "learning_rate": 3.7831742633160456e-05, "loss": 1.2271, "step": 193000 }, { "epoch": 0.73, "learning_rate": 3.780021865034481e-05, "loss": 1.2345, "step": 193500 }, { "epoch": 0.73, "learning_rate": 3.776869466752916e-05, "loss": 1.2294, "step": 194000 }, { "epoch": 0.74, "learning_rate": 3.7737170684713516e-05, "loss": 1.2528, "step": 194500 }, { "epoch": 0.74, "learning_rate": 3.7705646701897876e-05, "loss": 1.23, "step": 195000 }, { "epoch": 0.74, "learning_rate": 3.767412271908223e-05, "loss": 1.2373, "step": 195500 }, { "epoch": 0.74, "learning_rate": 3.7642598736266576e-05, "loss": 1.2306, "step": 196000 }, { "epoch": 0.74, "learning_rate": 3.761107475345093e-05, "loss": 1.2324, "step": 196500 }, { "epoch": 0.75, "learning_rate": 3.757955077063528e-05, "loss": 1.2337, "step": 197000 }, { "epoch": 0.75, "learning_rate": 3.7548026787819643e-05, "loss": 1.2266, "step": 197500 }, { "epoch": 0.75, "learning_rate": 3.7516502805004e-05, "loss": 1.2244, "step": 198000 }, { "epoch": 0.75, "learning_rate": 3.7484978822188344e-05, "loss": 1.2234, "step": 198500 }, { "epoch": 0.75, "learning_rate": 3.74534548393727e-05, "loss": 1.2287, "step": 199000 }, { "epoch": 0.75, "learning_rate": 3.742193085655705e-05, "loss": 1.2306, "step": 199500 }, { "epoch": 0.76, "learning_rate": 3.7390406873741404e-05, "loss": 1.2294, "step": 200000 }, { "epoch": 0.76, "learning_rate": 3.7358882890925764e-05, "loss": 1.2289, "step": 200500 }, { "epoch": 0.76, "learning_rate": 3.732735890811012e-05, "loss": 1.2345, "step": 201000 }, { "epoch": 0.76, "learning_rate": 3.7295834925294464e-05, "loss": 1.2345, "step": 201500 }, { "epoch": 0.76, "learning_rate": 3.726431094247882e-05, "loss": 1.2319, "step": 202000 }, { "epoch": 0.77, "learning_rate": 3.723278695966317e-05, "loss": 1.22, "step": 202500 }, { "epoch": 0.77, "learning_rate": 3.720126297684753e-05, "loss": 1.2248, "step": 203000 }, { "epoch": 0.77, "learning_rate": 3.7169738994031885e-05, "loss": 1.2157, "step": 203500 }, { "epoch": 0.77, "learning_rate": 3.713821501121624e-05, "loss": 1.2358, "step": 204000 }, { "epoch": 0.77, "learning_rate": 3.7106691028400585e-05, "loss": 1.2328, "step": 204500 }, { "epoch": 0.78, "learning_rate": 3.707516704558494e-05, "loss": 1.2153, "step": 205000 }, { "epoch": 0.78, "learning_rate": 3.70436430627693e-05, "loss": 1.2313, "step": 205500 }, { "epoch": 0.78, "learning_rate": 3.701211907995365e-05, "loss": 1.2219, "step": 206000 }, { "epoch": 0.78, "learning_rate": 3.6980595097138005e-05, "loss": 1.2297, "step": 206500 }, { "epoch": 0.78, "learning_rate": 3.694907111432235e-05, "loss": 1.2327, "step": 207000 }, { "epoch": 0.78, "learning_rate": 3.6917547131506705e-05, "loss": 1.2318, "step": 207500 }, { "epoch": 0.79, "learning_rate": 3.6886023148691066e-05, "loss": 1.2226, "step": 208000 }, { "epoch": 0.79, "learning_rate": 3.685449916587542e-05, "loss": 1.2297, "step": 208500 }, { "epoch": 0.79, "learning_rate": 3.682297518305977e-05, "loss": 1.224, "step": 209000 }, { "epoch": 0.79, "learning_rate": 3.6791451200244126e-05, "loss": 1.2257, "step": 209500 }, { "epoch": 0.79, "learning_rate": 3.675992721742847e-05, "loss": 1.2258, "step": 210000 }, { "epoch": 0.8, "learning_rate": 3.6728403234612826e-05, "loss": 1.2274, "step": 210500 }, { "epoch": 0.8, "learning_rate": 3.6696879251797186e-05, "loss": 1.2217, "step": 211000 }, { "epoch": 0.8, "learning_rate": 3.666535526898154e-05, "loss": 1.2228, "step": 211500 }, { "epoch": 0.8, "learning_rate": 3.663383128616589e-05, "loss": 1.2263, "step": 212000 }, { "epoch": 0.8, "learning_rate": 3.6602307303350247e-05, "loss": 1.215, "step": 212500 }, { "epoch": 0.81, "learning_rate": 3.657078332053459e-05, "loss": 1.2297, "step": 213000 }, { "epoch": 0.81, "learning_rate": 3.6539259337718953e-05, "loss": 1.2213, "step": 213500 }, { "epoch": 0.81, "learning_rate": 3.650773535490331e-05, "loss": 1.2106, "step": 214000 }, { "epoch": 0.81, "learning_rate": 3.647621137208766e-05, "loss": 1.2199, "step": 214500 }, { "epoch": 0.81, "learning_rate": 3.6444687389272014e-05, "loss": 1.2183, "step": 215000 }, { "epoch": 0.82, "learning_rate": 3.641316340645636e-05, "loss": 1.2175, "step": 215500 }, { "epoch": 0.82, "learning_rate": 3.638163942364072e-05, "loss": 1.2245, "step": 216000 }, { "epoch": 0.82, "learning_rate": 3.6350115440825074e-05, "loss": 1.2266, "step": 216500 }, { "epoch": 0.82, "learning_rate": 3.631859145800943e-05, "loss": 1.2147, "step": 217000 }, { "epoch": 0.82, "learning_rate": 3.628706747519378e-05, "loss": 1.2244, "step": 217500 }, { "epoch": 0.82, "learning_rate": 3.6255543492378134e-05, "loss": 1.2199, "step": 218000 }, { "epoch": 0.83, "learning_rate": 3.622401950956249e-05, "loss": 1.2264, "step": 218500 }, { "epoch": 0.83, "learning_rate": 3.619249552674684e-05, "loss": 1.2274, "step": 219000 }, { "epoch": 0.83, "learning_rate": 3.6160971543931195e-05, "loss": 1.2145, "step": 219500 }, { "epoch": 0.83, "learning_rate": 3.612944756111555e-05, "loss": 1.2281, "step": 220000 }, { "epoch": 0.83, "learning_rate": 3.60979235782999e-05, "loss": 1.225, "step": 220500 }, { "epoch": 0.84, "learning_rate": 3.606639959548425e-05, "loss": 1.2272, "step": 221000 }, { "epoch": 0.84, "learning_rate": 3.603487561266861e-05, "loss": 1.2137, "step": 221500 }, { "epoch": 0.84, "learning_rate": 3.600335162985296e-05, "loss": 1.2131, "step": 222000 }, { "epoch": 0.84, "learning_rate": 3.5971827647037315e-05, "loss": 1.2113, "step": 222500 }, { "epoch": 0.84, "learning_rate": 3.594030366422167e-05, "loss": 1.2021, "step": 223000 }, { "epoch": 0.85, "learning_rate": 3.590877968140602e-05, "loss": 1.2248, "step": 223500 }, { "epoch": 0.85, "learning_rate": 3.5877255698590376e-05, "loss": 1.2194, "step": 224000 }, { "epoch": 0.85, "learning_rate": 3.584573171577473e-05, "loss": 1.2115, "step": 224500 }, { "epoch": 0.85, "learning_rate": 3.581420773295908e-05, "loss": 1.2223, "step": 225000 }, { "epoch": 0.85, "learning_rate": 3.5782683750143436e-05, "loss": 1.2088, "step": 225500 }, { "epoch": 0.85, "learning_rate": 3.575115976732779e-05, "loss": 1.218, "step": 226000 }, { "epoch": 0.86, "learning_rate": 3.571963578451214e-05, "loss": 1.2144, "step": 226500 }, { "epoch": 0.86, "learning_rate": 3.5688111801696496e-05, "loss": 1.2152, "step": 227000 }, { "epoch": 0.86, "learning_rate": 3.565658781888085e-05, "loss": 1.2081, "step": 227500 }, { "epoch": 0.86, "learning_rate": 3.56250638360652e-05, "loss": 1.2153, "step": 228000 }, { "epoch": 0.86, "learning_rate": 3.5593539853249557e-05, "loss": 1.2123, "step": 228500 }, { "epoch": 0.87, "learning_rate": 3.556201587043391e-05, "loss": 1.2165, "step": 229000 }, { "epoch": 0.87, "learning_rate": 3.553049188761826e-05, "loss": 1.2143, "step": 229500 }, { "epoch": 0.87, "learning_rate": 3.549896790480262e-05, "loss": 1.2039, "step": 230000 }, { "epoch": 0.87, "learning_rate": 3.546744392198697e-05, "loss": 1.2169, "step": 230500 }, { "epoch": 0.87, "learning_rate": 3.5435919939171324e-05, "loss": 1.2179, "step": 231000 }, { "epoch": 0.88, "learning_rate": 3.540439595635568e-05, "loss": 1.2178, "step": 231500 }, { "epoch": 0.88, "learning_rate": 3.537287197354003e-05, "loss": 1.2184, "step": 232000 }, { "epoch": 0.88, "learning_rate": 3.5341347990724384e-05, "loss": 1.2141, "step": 232500 }, { "epoch": 0.88, "learning_rate": 3.530982400790874e-05, "loss": 1.2104, "step": 233000 }, { "epoch": 0.88, "learning_rate": 3.527830002509309e-05, "loss": 1.2114, "step": 233500 }, { "epoch": 0.89, "learning_rate": 3.5246776042277444e-05, "loss": 1.2092, "step": 234000 }, { "epoch": 0.89, "learning_rate": 3.52152520594618e-05, "loss": 1.2129, "step": 234500 }, { "epoch": 0.89, "learning_rate": 3.518372807664616e-05, "loss": 1.2038, "step": 235000 }, { "epoch": 0.89, "learning_rate": 3.5152204093830505e-05, "loss": 1.213, "step": 235500 }, { "epoch": 0.89, "learning_rate": 3.512068011101486e-05, "loss": 1.2105, "step": 236000 }, { "epoch": 0.89, "learning_rate": 3.508915612819921e-05, "loss": 1.2051, "step": 236500 }, { "epoch": 0.9, "learning_rate": 3.5057632145383565e-05, "loss": 1.2105, "step": 237000 }, { "epoch": 0.9, "learning_rate": 3.5026108162567925e-05, "loss": 1.2066, "step": 237500 }, { "epoch": 0.9, "learning_rate": 3.499458417975227e-05, "loss": 1.2162, "step": 238000 }, { "epoch": 0.9, "learning_rate": 3.4963060196936625e-05, "loss": 1.2113, "step": 238500 }, { "epoch": 0.9, "learning_rate": 3.493153621412098e-05, "loss": 1.2056, "step": 239000 }, { "epoch": 0.91, "learning_rate": 3.490001223130533e-05, "loss": 1.2125, "step": 239500 }, { "epoch": 0.91, "learning_rate": 3.486848824848969e-05, "loss": 1.2104, "step": 240000 }, { "epoch": 0.91, "learning_rate": 3.4836964265674046e-05, "loss": 1.2174, "step": 240500 }, { "epoch": 0.91, "learning_rate": 3.480544028285839e-05, "loss": 1.2045, "step": 241000 }, { "epoch": 0.91, "learning_rate": 3.4773916300042746e-05, "loss": 1.2054, "step": 241500 }, { "epoch": 0.92, "learning_rate": 3.47423923172271e-05, "loss": 1.2001, "step": 242000 }, { "epoch": 0.92, "learning_rate": 3.471086833441145e-05, "loss": 1.1995, "step": 242500 }, { "epoch": 0.92, "learning_rate": 3.467934435159581e-05, "loss": 1.2107, "step": 243000 }, { "epoch": 0.92, "learning_rate": 3.464782036878016e-05, "loss": 1.2138, "step": 243500 }, { "epoch": 0.92, "learning_rate": 3.461629638596451e-05, "loss": 1.2026, "step": 244000 }, { "epoch": 0.92, "learning_rate": 3.4584772403148866e-05, "loss": 1.2116, "step": 244500 }, { "epoch": 0.93, "learning_rate": 3.455324842033322e-05, "loss": 1.2067, "step": 245000 }, { "epoch": 0.93, "learning_rate": 3.452172443751758e-05, "loss": 1.2112, "step": 245500 }, { "epoch": 0.93, "learning_rate": 3.4490200454701934e-05, "loss": 1.1996, "step": 246000 }, { "epoch": 0.93, "learning_rate": 3.445867647188628e-05, "loss": 1.1978, "step": 246500 }, { "epoch": 0.93, "learning_rate": 3.4427152489070634e-05, "loss": 1.2188, "step": 247000 }, { "epoch": 0.94, "learning_rate": 3.439562850625499e-05, "loss": 1.2098, "step": 247500 }, { "epoch": 0.94, "learning_rate": 3.436410452343935e-05, "loss": 1.2038, "step": 248000 }, { "epoch": 0.94, "learning_rate": 3.43325805406237e-05, "loss": 1.1974, "step": 248500 }, { "epoch": 0.94, "learning_rate": 3.4301056557808054e-05, "loss": 1.1995, "step": 249000 }, { "epoch": 0.94, "learning_rate": 3.42695325749924e-05, "loss": 1.2148, "step": 249500 }, { "epoch": 0.95, "learning_rate": 3.4238008592176754e-05, "loss": 1.2018, "step": 250000 }, { "epoch": 0.95, "learning_rate": 3.4206484609361114e-05, "loss": 1.2058, "step": 250500 }, { "epoch": 0.95, "learning_rate": 3.417496062654547e-05, "loss": 1.2057, "step": 251000 }, { "epoch": 0.95, "learning_rate": 3.414343664372982e-05, "loss": 1.2059, "step": 251500 }, { "epoch": 0.95, "learning_rate": 3.411191266091417e-05, "loss": 1.2017, "step": 252000 }, { "epoch": 0.96, "learning_rate": 3.408038867809852e-05, "loss": 1.2028, "step": 252500 }, { "epoch": 0.96, "learning_rate": 3.4048864695282875e-05, "loss": 1.2041, "step": 253000 }, { "epoch": 0.96, "learning_rate": 3.4017340712467235e-05, "loss": 1.1936, "step": 253500 }, { "epoch": 0.96, "learning_rate": 3.398581672965159e-05, "loss": 1.2032, "step": 254000 }, { "epoch": 0.96, "learning_rate": 3.395429274683594e-05, "loss": 1.2067, "step": 254500 }, { "epoch": 0.96, "learning_rate": 3.392276876402029e-05, "loss": 1.1945, "step": 255000 }, { "epoch": 0.97, "learning_rate": 3.389124478120464e-05, "loss": 1.2049, "step": 255500 }, { "epoch": 0.97, "learning_rate": 3.3859720798389e-05, "loss": 1.1975, "step": 256000 }, { "epoch": 0.97, "learning_rate": 3.3828196815573356e-05, "loss": 1.1973, "step": 256500 }, { "epoch": 0.97, "learning_rate": 3.379667283275771e-05, "loss": 1.2014, "step": 257000 }, { "epoch": 0.97, "learning_rate": 3.376514884994206e-05, "loss": 1.1997, "step": 257500 }, { "epoch": 0.98, "learning_rate": 3.373362486712641e-05, "loss": 1.2044, "step": 258000 }, { "epoch": 0.98, "learning_rate": 3.370210088431077e-05, "loss": 1.2105, "step": 258500 }, { "epoch": 0.98, "learning_rate": 3.367057690149512e-05, "loss": 1.1934, "step": 259000 }, { "epoch": 0.98, "learning_rate": 3.3639052918679476e-05, "loss": 1.1967, "step": 259500 }, { "epoch": 0.98, "learning_rate": 3.360752893586383e-05, "loss": 1.201, "step": 260000 }, { "epoch": 0.99, "learning_rate": 3.3576004953048176e-05, "loss": 1.2017, "step": 260500 }, { "epoch": 0.99, "learning_rate": 3.354448097023254e-05, "loss": 1.2053, "step": 261000 }, { "epoch": 0.99, "learning_rate": 3.351295698741689e-05, "loss": 1.1973, "step": 261500 }, { "epoch": 0.99, "learning_rate": 3.3481433004601243e-05, "loss": 1.1972, "step": 262000 }, { "epoch": 0.99, "learning_rate": 3.34499090217856e-05, "loss": 1.1902, "step": 262500 }, { "epoch": 0.99, "learning_rate": 3.341838503896995e-05, "loss": 1.2027, "step": 263000 }, { "epoch": 1.0, "learning_rate": 3.3386861056154304e-05, "loss": 1.1981, "step": 263500 }, { "epoch": 1.0, "learning_rate": 3.335533707333866e-05, "loss": 1.201, "step": 264000 }, { "epoch": 1.0, "learning_rate": 3.332381309052301e-05, "loss": 1.1885, "step": 264500 }, { "epoch": 1.0, "learning_rate": 3.3292289107707364e-05, "loss": 1.1934, "step": 265000 }, { "epoch": 1.0, "learning_rate": 3.326076512489172e-05, "loss": 1.1933, "step": 265500 }, { "epoch": 1.01, "learning_rate": 3.322924114207607e-05, "loss": 1.1872, "step": 266000 }, { "epoch": 1.01, "learning_rate": 3.3197717159260424e-05, "loss": 1.1967, "step": 266500 }, { "epoch": 1.01, "learning_rate": 3.316619317644478e-05, "loss": 1.201, "step": 267000 }, { "epoch": 1.01, "learning_rate": 3.313466919362913e-05, "loss": 1.1889, "step": 267500 }, { "epoch": 1.01, "learning_rate": 3.3103145210813485e-05, "loss": 1.1834, "step": 268000 }, { "epoch": 1.02, "learning_rate": 3.307162122799784e-05, "loss": 1.1921, "step": 268500 }, { "epoch": 1.02, "learning_rate": 3.304009724518219e-05, "loss": 1.1926, "step": 269000 }, { "epoch": 1.02, "learning_rate": 3.3008573262366545e-05, "loss": 1.191, "step": 269500 }, { "epoch": 1.02, "learning_rate": 3.29770492795509e-05, "loss": 1.1933, "step": 270000 }, { "epoch": 1.02, "learning_rate": 3.294552529673525e-05, "loss": 1.1884, "step": 270500 }, { "epoch": 1.03, "learning_rate": 3.2914001313919605e-05, "loss": 1.1909, "step": 271000 }, { "epoch": 1.03, "learning_rate": 3.288247733110396e-05, "loss": 1.1941, "step": 271500 }, { "epoch": 1.03, "learning_rate": 3.285095334828831e-05, "loss": 1.194, "step": 272000 }, { "epoch": 1.03, "learning_rate": 3.2819429365472666e-05, "loss": 1.1948, "step": 272500 }, { "epoch": 1.03, "learning_rate": 3.278790538265702e-05, "loss": 1.1893, "step": 273000 }, { "epoch": 1.03, "learning_rate": 3.275638139984137e-05, "loss": 1.1865, "step": 273500 }, { "epoch": 1.04, "learning_rate": 3.2724857417025726e-05, "loss": 1.1893, "step": 274000 }, { "epoch": 1.04, "learning_rate": 3.269333343421008e-05, "loss": 1.1983, "step": 274500 }, { "epoch": 1.04, "learning_rate": 3.266180945139443e-05, "loss": 1.191, "step": 275000 }, { "epoch": 1.04, "learning_rate": 3.2630285468578786e-05, "loss": 1.1934, "step": 275500 }, { "epoch": 1.04, "learning_rate": 3.259876148576314e-05, "loss": 1.1913, "step": 276000 }, { "epoch": 1.05, "learning_rate": 3.256723750294749e-05, "loss": 1.1924, "step": 276500 }, { "epoch": 1.05, "learning_rate": 3.2535713520131847e-05, "loss": 1.188, "step": 277000 }, { "epoch": 1.05, "learning_rate": 3.25041895373162e-05, "loss": 1.1844, "step": 277500 }, { "epoch": 1.05, "learning_rate": 3.2472665554500553e-05, "loss": 1.196, "step": 278000 }, { "epoch": 1.05, "learning_rate": 3.244114157168491e-05, "loss": 1.1861, "step": 278500 }, { "epoch": 1.06, "learning_rate": 3.240961758886926e-05, "loss": 1.1922, "step": 279000 }, { "epoch": 1.06, "learning_rate": 3.2378093606053614e-05, "loss": 1.1899, "step": 279500 }, { "epoch": 1.06, "learning_rate": 3.2346569623237974e-05, "loss": 1.1898, "step": 280000 }, { "epoch": 1.06, "learning_rate": 3.231504564042232e-05, "loss": 1.1885, "step": 280500 }, { "epoch": 1.06, "learning_rate": 3.2283521657606674e-05, "loss": 1.1911, "step": 281000 }, { "epoch": 1.06, "learning_rate": 3.225199767479103e-05, "loss": 1.1901, "step": 281500 }, { "epoch": 1.07, "learning_rate": 3.222047369197538e-05, "loss": 1.192, "step": 282000 }, { "epoch": 1.07, "learning_rate": 3.218894970915974e-05, "loss": 1.1859, "step": 282500 }, { "epoch": 1.07, "learning_rate": 3.215742572634409e-05, "loss": 1.1943, "step": 283000 }, { "epoch": 1.07, "learning_rate": 3.212590174352844e-05, "loss": 1.1932, "step": 283500 }, { "epoch": 1.07, "learning_rate": 3.2094377760712795e-05, "loss": 1.1895, "step": 284000 }, { "epoch": 1.08, "learning_rate": 3.206285377789715e-05, "loss": 1.1864, "step": 284500 }, { "epoch": 1.08, "learning_rate": 3.203132979508151e-05, "loss": 1.1811, "step": 285000 }, { "epoch": 1.08, "learning_rate": 3.199980581226586e-05, "loss": 1.188, "step": 285500 }, { "epoch": 1.08, "learning_rate": 3.196828182945021e-05, "loss": 1.1901, "step": 286000 }, { "epoch": 1.08, "learning_rate": 3.193675784663456e-05, "loss": 1.1814, "step": 286500 }, { "epoch": 1.09, "learning_rate": 3.1905233863818915e-05, "loss": 1.1816, "step": 287000 }, { "epoch": 1.09, "learning_rate": 3.187370988100327e-05, "loss": 1.1975, "step": 287500 }, { "epoch": 1.09, "learning_rate": 3.184218589818763e-05, "loss": 1.1889, "step": 288000 }, { "epoch": 1.09, "learning_rate": 3.181066191537198e-05, "loss": 1.1843, "step": 288500 }, { "epoch": 1.09, "learning_rate": 3.177913793255633e-05, "loss": 1.1848, "step": 289000 }, { "epoch": 1.1, "learning_rate": 3.174761394974068e-05, "loss": 1.1829, "step": 289500 }, { "epoch": 1.1, "learning_rate": 3.1716089966925036e-05, "loss": 1.1828, "step": 290000 }, { "epoch": 1.1, "learning_rate": 3.1684565984109396e-05, "loss": 1.1915, "step": 290500 }, { "epoch": 1.1, "learning_rate": 3.165304200129375e-05, "loss": 1.1803, "step": 291000 }, { "epoch": 1.1, "learning_rate": 3.1621518018478096e-05, "loss": 1.1891, "step": 291500 }, { "epoch": 1.1, "learning_rate": 3.158999403566245e-05, "loss": 1.18, "step": 292000 }, { "epoch": 1.11, "learning_rate": 3.15584700528468e-05, "loss": 1.1817, "step": 292500 }, { "epoch": 1.11, "learning_rate": 3.152694607003116e-05, "loss": 1.1788, "step": 293000 }, { "epoch": 1.11, "learning_rate": 3.149542208721552e-05, "loss": 1.1777, "step": 293500 }, { "epoch": 1.11, "learning_rate": 3.146389810439987e-05, "loss": 1.172, "step": 294000 }, { "epoch": 1.11, "learning_rate": 3.143237412158422e-05, "loss": 1.1754, "step": 294500 }, { "epoch": 1.12, "learning_rate": 3.140085013876857e-05, "loss": 1.1897, "step": 295000 }, { "epoch": 1.12, "learning_rate": 3.136932615595293e-05, "loss": 1.178, "step": 295500 }, { "epoch": 1.12, "learning_rate": 3.1337802173137284e-05, "loss": 1.1772, "step": 296000 }, { "epoch": 1.12, "learning_rate": 3.130627819032164e-05, "loss": 1.1848, "step": 296500 }, { "epoch": 1.12, "learning_rate": 3.1274754207505984e-05, "loss": 1.1804, "step": 297000 }, { "epoch": 1.13, "learning_rate": 3.124323022469034e-05, "loss": 1.179, "step": 297500 }, { "epoch": 1.13, "learning_rate": 3.121170624187469e-05, "loss": 1.1881, "step": 298000 }, { "epoch": 1.13, "learning_rate": 3.118018225905905e-05, "loss": 1.1759, "step": 298500 }, { "epoch": 1.13, "learning_rate": 3.1148658276243405e-05, "loss": 1.181, "step": 299000 }, { "epoch": 1.13, "learning_rate": 3.111713429342776e-05, "loss": 1.1858, "step": 299500 }, { "epoch": 1.13, "learning_rate": 3.1085610310612105e-05, "loss": 1.1794, "step": 300000 }, { "epoch": 1.14, "learning_rate": 3.105408632779646e-05, "loss": 1.1764, "step": 300500 }, { "epoch": 1.14, "learning_rate": 3.102256234498082e-05, "loss": 1.1845, "step": 301000 }, { "epoch": 1.14, "learning_rate": 3.099103836216517e-05, "loss": 1.1801, "step": 301500 }, { "epoch": 1.14, "learning_rate": 3.0959514379349525e-05, "loss": 1.1797, "step": 302000 }, { "epoch": 1.14, "learning_rate": 3.092799039653388e-05, "loss": 1.183, "step": 302500 }, { "epoch": 1.15, "learning_rate": 3.0896466413718225e-05, "loss": 1.1838, "step": 303000 }, { "epoch": 1.15, "learning_rate": 3.0864942430902585e-05, "loss": 1.1831, "step": 303500 }, { "epoch": 1.15, "learning_rate": 3.083341844808694e-05, "loss": 1.1847, "step": 304000 }, { "epoch": 1.15, "learning_rate": 3.080189446527129e-05, "loss": 1.1718, "step": 304500 }, { "epoch": 1.15, "learning_rate": 3.0770370482455646e-05, "loss": 1.1844, "step": 305000 }, { "epoch": 1.16, "learning_rate": 3.073884649963999e-05, "loss": 1.1784, "step": 305500 }, { "epoch": 1.16, "learning_rate": 3.070732251682435e-05, "loss": 1.1768, "step": 306000 }, { "epoch": 1.16, "learning_rate": 3.0675798534008706e-05, "loss": 1.1803, "step": 306500 }, { "epoch": 1.16, "learning_rate": 3.064427455119306e-05, "loss": 1.1822, "step": 307000 }, { "epoch": 1.16, "learning_rate": 3.061275056837741e-05, "loss": 1.169, "step": 307500 }, { "epoch": 1.17, "learning_rate": 3.0581226585561766e-05, "loss": 1.176, "step": 308000 }, { "epoch": 1.17, "learning_rate": 3.054970260274611e-05, "loss": 1.1807, "step": 308500 }, { "epoch": 1.17, "learning_rate": 3.051817861993047e-05, "loss": 1.1791, "step": 309000 }, { "epoch": 1.17, "learning_rate": 3.0486654637114827e-05, "loss": 1.1832, "step": 309500 }, { "epoch": 1.17, "learning_rate": 3.045513065429918e-05, "loss": 1.1734, "step": 310000 }, { "epoch": 1.17, "learning_rate": 3.0423606671483534e-05, "loss": 1.1731, "step": 310500 }, { "epoch": 1.18, "learning_rate": 3.039208268866789e-05, "loss": 1.1725, "step": 311000 }, { "epoch": 1.18, "learning_rate": 3.0360558705852237e-05, "loss": 1.1691, "step": 311500 }, { "epoch": 1.18, "learning_rate": 3.0329034723036594e-05, "loss": 1.1698, "step": 312000 }, { "epoch": 1.18, "learning_rate": 3.0297510740220947e-05, "loss": 1.1782, "step": 312500 }, { "epoch": 1.18, "learning_rate": 3.02659867574053e-05, "loss": 1.1785, "step": 313000 }, { "epoch": 1.19, "learning_rate": 3.0234462774589654e-05, "loss": 1.1708, "step": 313500 }, { "epoch": 1.19, "learning_rate": 3.0202938791774004e-05, "loss": 1.1863, "step": 314000 }, { "epoch": 1.19, "learning_rate": 3.0171414808958358e-05, "loss": 1.1716, "step": 314500 }, { "epoch": 1.19, "learning_rate": 3.0139890826142714e-05, "loss": 1.1781, "step": 315000 }, { "epoch": 1.19, "learning_rate": 3.0108366843327068e-05, "loss": 1.1697, "step": 315500 }, { "epoch": 1.2, "learning_rate": 3.007684286051142e-05, "loss": 1.177, "step": 316000 }, { "epoch": 1.2, "learning_rate": 3.0045318877695778e-05, "loss": 1.171, "step": 316500 }, { "epoch": 1.2, "learning_rate": 3.0013794894880125e-05, "loss": 1.1679, "step": 317000 }, { "epoch": 1.2, "learning_rate": 2.998227091206448e-05, "loss": 1.173, "step": 317500 }, { "epoch": 1.2, "learning_rate": 2.9950746929248835e-05, "loss": 1.1747, "step": 318000 }, { "epoch": 1.2, "learning_rate": 2.991922294643319e-05, "loss": 1.1665, "step": 318500 }, { "epoch": 1.21, "learning_rate": 2.9887698963617545e-05, "loss": 1.1717, "step": 319000 }, { "epoch": 1.21, "learning_rate": 2.9856174980801892e-05, "loss": 1.1799, "step": 319500 }, { "epoch": 1.21, "learning_rate": 2.982465099798625e-05, "loss": 1.1683, "step": 320000 }, { "epoch": 1.21, "learning_rate": 2.9793127015170602e-05, "loss": 1.1765, "step": 320500 }, { "epoch": 1.21, "learning_rate": 2.9761603032354956e-05, "loss": 1.1657, "step": 321000 }, { "epoch": 1.22, "learning_rate": 2.9730079049539313e-05, "loss": 1.1745, "step": 321500 }, { "epoch": 1.22, "learning_rate": 2.9698555066723666e-05, "loss": 1.1861, "step": 322000 }, { "epoch": 1.22, "learning_rate": 2.9667031083908016e-05, "loss": 1.1799, "step": 322500 }, { "epoch": 1.22, "learning_rate": 2.963550710109237e-05, "loss": 1.168, "step": 323000 }, { "epoch": 1.22, "learning_rate": 2.9603983118276723e-05, "loss": 1.1693, "step": 323500 }, { "epoch": 1.23, "learning_rate": 2.957245913546108e-05, "loss": 1.1737, "step": 324000 }, { "epoch": 1.23, "learning_rate": 2.9540935152645433e-05, "loss": 1.1775, "step": 324500 }, { "epoch": 1.23, "learning_rate": 2.9509411169829787e-05, "loss": 1.1732, "step": 325000 }, { "epoch": 1.23, "learning_rate": 2.9477887187014137e-05, "loss": 1.1633, "step": 325500 }, { "epoch": 1.23, "learning_rate": 2.944636320419849e-05, "loss": 1.1749, "step": 326000 }, { "epoch": 1.24, "learning_rate": 2.9414839221382844e-05, "loss": 1.1698, "step": 326500 }, { "epoch": 1.24, "learning_rate": 2.93833152385672e-05, "loss": 1.1632, "step": 327000 }, { "epoch": 1.24, "learning_rate": 2.9351791255751554e-05, "loss": 1.1621, "step": 327500 }, { "epoch": 1.24, "learning_rate": 2.9320267272935904e-05, "loss": 1.1646, "step": 328000 }, { "epoch": 1.24, "learning_rate": 2.9288743290120257e-05, "loss": 1.1695, "step": 328500 }, { "epoch": 1.24, "learning_rate": 2.925721930730461e-05, "loss": 1.1661, "step": 329000 }, { "epoch": 1.25, "learning_rate": 2.9225695324488968e-05, "loss": 1.1669, "step": 329500 }, { "epoch": 1.25, "learning_rate": 2.919417134167332e-05, "loss": 1.1718, "step": 330000 }, { "epoch": 1.25, "learning_rate": 2.9162647358857674e-05, "loss": 1.1702, "step": 330500 }, { "epoch": 1.25, "learning_rate": 2.9131123376042024e-05, "loss": 1.1697, "step": 331000 }, { "epoch": 1.25, "learning_rate": 2.9099599393226378e-05, "loss": 1.1607, "step": 331500 }, { "epoch": 1.26, "learning_rate": 2.9068075410410735e-05, "loss": 1.168, "step": 332000 }, { "epoch": 1.26, "learning_rate": 2.9036551427595088e-05, "loss": 1.1717, "step": 332500 }, { "epoch": 1.26, "learning_rate": 2.900502744477944e-05, "loss": 1.1638, "step": 333000 }, { "epoch": 1.26, "learning_rate": 2.89735034619638e-05, "loss": 1.1763, "step": 333500 }, { "epoch": 1.26, "learning_rate": 2.8941979479148145e-05, "loss": 1.1716, "step": 334000 }, { "epoch": 1.27, "learning_rate": 2.8910455496332502e-05, "loss": 1.1693, "step": 334500 }, { "epoch": 1.27, "learning_rate": 2.8878931513516855e-05, "loss": 1.1638, "step": 335000 }, { "epoch": 1.27, "learning_rate": 2.884740753070121e-05, "loss": 1.1697, "step": 335500 }, { "epoch": 1.27, "learning_rate": 2.8815883547885562e-05, "loss": 1.1701, "step": 336000 }, { "epoch": 1.27, "learning_rate": 2.8784359565069912e-05, "loss": 1.163, "step": 336500 }, { "epoch": 1.27, "learning_rate": 2.8752835582254266e-05, "loss": 1.1738, "step": 337000 }, { "epoch": 1.28, "learning_rate": 2.8721311599438622e-05, "loss": 1.1666, "step": 337500 }, { "epoch": 1.28, "learning_rate": 2.8689787616622976e-05, "loss": 1.167, "step": 338000 }, { "epoch": 1.28, "learning_rate": 2.865826363380733e-05, "loss": 1.1655, "step": 338500 }, { "epoch": 1.28, "learning_rate": 2.8626739650991686e-05, "loss": 1.1576, "step": 339000 }, { "epoch": 1.28, "learning_rate": 2.8595215668176033e-05, "loss": 1.1653, "step": 339500 }, { "epoch": 1.29, "learning_rate": 2.856369168536039e-05, "loss": 1.1618, "step": 340000 }, { "epoch": 1.29, "learning_rate": 2.8532167702544743e-05, "loss": 1.1714, "step": 340500 }, { "epoch": 1.29, "learning_rate": 2.8500643719729097e-05, "loss": 1.1704, "step": 341000 }, { "epoch": 1.29, "learning_rate": 2.8469119736913453e-05, "loss": 1.1672, "step": 341500 }, { "epoch": 1.29, "learning_rate": 2.8437595754097807e-05, "loss": 1.1655, "step": 342000 }, { "epoch": 1.3, "learning_rate": 2.8406071771282157e-05, "loss": 1.1657, "step": 342500 }, { "epoch": 1.3, "learning_rate": 2.837454778846651e-05, "loss": 1.1631, "step": 343000 }, { "epoch": 1.3, "learning_rate": 2.8343023805650864e-05, "loss": 1.1749, "step": 343500 }, { "epoch": 1.3, "learning_rate": 2.831149982283522e-05, "loss": 1.1672, "step": 344000 }, { "epoch": 1.3, "learning_rate": 2.8279975840019574e-05, "loss": 1.154, "step": 344500 }, { "epoch": 1.31, "learning_rate": 2.8248451857203924e-05, "loss": 1.1668, "step": 345000 }, { "epoch": 1.31, "learning_rate": 2.8216927874388277e-05, "loss": 1.1682, "step": 345500 }, { "epoch": 1.31, "learning_rate": 2.818540389157263e-05, "loss": 1.1652, "step": 346000 }, { "epoch": 1.31, "learning_rate": 2.8153879908756984e-05, "loss": 1.1582, "step": 346500 }, { "epoch": 1.31, "learning_rate": 2.812235592594134e-05, "loss": 1.1633, "step": 347000 }, { "epoch": 1.31, "learning_rate": 2.8090831943125695e-05, "loss": 1.1594, "step": 347500 }, { "epoch": 1.32, "learning_rate": 2.8059307960310045e-05, "loss": 1.1639, "step": 348000 }, { "epoch": 1.32, "learning_rate": 2.8027783977494398e-05, "loss": 1.1659, "step": 348500 }, { "epoch": 1.32, "learning_rate": 2.799625999467875e-05, "loss": 1.1676, "step": 349000 }, { "epoch": 1.32, "learning_rate": 2.796473601186311e-05, "loss": 1.1583, "step": 349500 }, { "epoch": 1.32, "learning_rate": 2.7933212029047462e-05, "loss": 1.1621, "step": 350000 }, { "epoch": 1.33, "learning_rate": 2.7901688046231812e-05, "loss": 1.1594, "step": 350500 }, { "epoch": 1.33, "learning_rate": 2.7870164063416165e-05, "loss": 1.161, "step": 351000 }, { "epoch": 1.33, "learning_rate": 2.783864008060052e-05, "loss": 1.1571, "step": 351500 }, { "epoch": 1.33, "learning_rate": 2.7807116097784876e-05, "loss": 1.1655, "step": 352000 }, { "epoch": 1.33, "learning_rate": 2.777559211496923e-05, "loss": 1.1636, "step": 352500 }, { "epoch": 1.34, "learning_rate": 2.7744068132153582e-05, "loss": 1.1568, "step": 353000 }, { "epoch": 1.34, "learning_rate": 2.7712544149337932e-05, "loss": 1.161, "step": 353500 }, { "epoch": 1.34, "learning_rate": 2.7681020166522286e-05, "loss": 1.1611, "step": 354000 }, { "epoch": 1.34, "learning_rate": 2.7649496183706643e-05, "loss": 1.156, "step": 354500 }, { "epoch": 1.34, "learning_rate": 2.7617972200890996e-05, "loss": 1.1614, "step": 355000 }, { "epoch": 1.34, "learning_rate": 2.758644821807535e-05, "loss": 1.1648, "step": 355500 }, { "epoch": 1.35, "learning_rate": 2.7554924235259706e-05, "loss": 1.1559, "step": 356000 }, { "epoch": 1.35, "learning_rate": 2.7523400252444053e-05, "loss": 1.1606, "step": 356500 }, { "epoch": 1.35, "learning_rate": 2.749187626962841e-05, "loss": 1.1581, "step": 357000 }, { "epoch": 1.35, "learning_rate": 2.7460352286812763e-05, "loss": 1.1583, "step": 357500 }, { "epoch": 1.35, "learning_rate": 2.7428828303997117e-05, "loss": 1.1593, "step": 358000 }, { "epoch": 1.36, "learning_rate": 2.739730432118147e-05, "loss": 1.1539, "step": 358500 }, { "epoch": 1.36, "learning_rate": 2.736578033836582e-05, "loss": 1.1579, "step": 359000 }, { "epoch": 1.36, "learning_rate": 2.7334256355550174e-05, "loss": 1.1578, "step": 359500 }, { "epoch": 1.36, "learning_rate": 2.730273237273453e-05, "loss": 1.16, "step": 360000 }, { "epoch": 1.36, "learning_rate": 2.7271208389918884e-05, "loss": 1.1641, "step": 360500 }, { "epoch": 1.37, "learning_rate": 2.7239684407103237e-05, "loss": 1.1561, "step": 361000 }, { "epoch": 1.37, "learning_rate": 2.7208160424287594e-05, "loss": 1.164, "step": 361500 }, { "epoch": 1.37, "learning_rate": 2.717663644147194e-05, "loss": 1.153, "step": 362000 }, { "epoch": 1.37, "learning_rate": 2.7145112458656298e-05, "loss": 1.1539, "step": 362500 }, { "epoch": 1.37, "learning_rate": 2.711358847584065e-05, "loss": 1.1532, "step": 363000 }, { "epoch": 1.38, "learning_rate": 2.7082064493025005e-05, "loss": 1.1611, "step": 363500 }, { "epoch": 1.38, "learning_rate": 2.705054051020936e-05, "loss": 1.1558, "step": 364000 }, { "epoch": 1.38, "learning_rate": 2.7019016527393715e-05, "loss": 1.1565, "step": 364500 }, { "epoch": 1.38, "learning_rate": 2.6987492544578065e-05, "loss": 1.1502, "step": 365000 }, { "epoch": 1.38, "learning_rate": 2.6955968561762418e-05, "loss": 1.1487, "step": 365500 }, { "epoch": 1.38, "learning_rate": 2.6924444578946772e-05, "loss": 1.1519, "step": 366000 }, { "epoch": 1.39, "learning_rate": 2.689292059613113e-05, "loss": 1.1592, "step": 366500 }, { "epoch": 1.39, "learning_rate": 2.6861396613315482e-05, "loss": 1.1537, "step": 367000 }, { "epoch": 1.39, "learning_rate": 2.6829872630499832e-05, "loss": 1.1574, "step": 367500 }, { "epoch": 1.39, "learning_rate": 2.6798348647684185e-05, "loss": 1.153, "step": 368000 }, { "epoch": 1.39, "learning_rate": 2.676682466486854e-05, "loss": 1.1462, "step": 368500 }, { "epoch": 1.4, "learning_rate": 2.6735300682052892e-05, "loss": 1.1511, "step": 369000 }, { "epoch": 1.4, "learning_rate": 2.670377669923725e-05, "loss": 1.1518, "step": 369500 }, { "epoch": 1.4, "learning_rate": 2.6672252716421603e-05, "loss": 1.1539, "step": 370000 }, { "epoch": 1.4, "learning_rate": 2.6640728733605953e-05, "loss": 1.1613, "step": 370500 }, { "epoch": 1.4, "learning_rate": 2.6609204750790306e-05, "loss": 1.1626, "step": 371000 }, { "epoch": 1.41, "learning_rate": 2.657768076797466e-05, "loss": 1.1557, "step": 371500 }, { "epoch": 1.41, "learning_rate": 2.6546156785159016e-05, "loss": 1.1559, "step": 372000 }, { "epoch": 1.41, "learning_rate": 2.651463280234337e-05, "loss": 1.1476, "step": 372500 }, { "epoch": 1.41, "learning_rate": 2.648310881952772e-05, "loss": 1.146, "step": 373000 }, { "epoch": 1.41, "learning_rate": 2.6451584836712073e-05, "loss": 1.1551, "step": 373500 }, { "epoch": 1.41, "learning_rate": 2.6420060853896427e-05, "loss": 1.1516, "step": 374000 }, { "epoch": 1.42, "learning_rate": 2.6388536871080784e-05, "loss": 1.1531, "step": 374500 }, { "epoch": 1.42, "learning_rate": 2.6357012888265137e-05, "loss": 1.1548, "step": 375000 }, { "epoch": 1.42, "learning_rate": 2.632548890544949e-05, "loss": 1.1588, "step": 375500 }, { "epoch": 1.42, "learning_rate": 2.629396492263384e-05, "loss": 1.1569, "step": 376000 }, { "epoch": 1.42, "learning_rate": 2.6262440939818194e-05, "loss": 1.1547, "step": 376500 }, { "epoch": 1.43, "learning_rate": 2.623091695700255e-05, "loss": 1.1444, "step": 377000 }, { "epoch": 1.43, "learning_rate": 2.6199392974186904e-05, "loss": 1.1588, "step": 377500 }, { "epoch": 1.43, "learning_rate": 2.6167868991371258e-05, "loss": 1.1513, "step": 378000 }, { "epoch": 1.43, "learning_rate": 2.6136345008555614e-05, "loss": 1.1513, "step": 378500 }, { "epoch": 1.43, "learning_rate": 2.610482102573996e-05, "loss": 1.1551, "step": 379000 }, { "epoch": 1.44, "learning_rate": 2.6073297042924314e-05, "loss": 1.1521, "step": 379500 }, { "epoch": 1.44, "learning_rate": 2.604177306010867e-05, "loss": 1.154, "step": 380000 }, { "epoch": 1.44, "learning_rate": 2.6010249077293025e-05, "loss": 1.157, "step": 380500 }, { "epoch": 1.44, "learning_rate": 2.5978725094477378e-05, "loss": 1.1499, "step": 381000 }, { "epoch": 1.44, "learning_rate": 2.5947201111661728e-05, "loss": 1.1456, "step": 381500 }, { "epoch": 1.45, "learning_rate": 2.591567712884608e-05, "loss": 1.1486, "step": 382000 }, { "epoch": 1.45, "learning_rate": 2.588415314603044e-05, "loss": 1.1507, "step": 382500 }, { "epoch": 1.45, "learning_rate": 2.5852629163214792e-05, "loss": 1.1544, "step": 383000 }, { "epoch": 1.45, "learning_rate": 2.5821105180399145e-05, "loss": 1.1478, "step": 383500 }, { "epoch": 1.45, "learning_rate": 2.5789581197583502e-05, "loss": 1.1473, "step": 384000 }, { "epoch": 1.45, "learning_rate": 2.575805721476785e-05, "loss": 1.1528, "step": 384500 }, { "epoch": 1.46, "learning_rate": 2.5726533231952206e-05, "loss": 1.1443, "step": 385000 }, { "epoch": 1.46, "learning_rate": 2.569500924913656e-05, "loss": 1.1494, "step": 385500 }, { "epoch": 1.46, "learning_rate": 2.5663485266320913e-05, "loss": 1.148, "step": 386000 }, { "epoch": 1.46, "learning_rate": 2.563196128350527e-05, "loss": 1.1475, "step": 386500 }, { "epoch": 1.46, "learning_rate": 2.5600437300689623e-05, "loss": 1.1492, "step": 387000 }, { "epoch": 1.47, "learning_rate": 2.5568913317873973e-05, "loss": 1.1483, "step": 387500 }, { "epoch": 1.47, "learning_rate": 2.5537389335058326e-05, "loss": 1.1499, "step": 388000 }, { "epoch": 1.47, "learning_rate": 2.550586535224268e-05, "loss": 1.1446, "step": 388500 }, { "epoch": 1.47, "learning_rate": 2.5474341369427037e-05, "loss": 1.142, "step": 389000 }, { "epoch": 1.47, "learning_rate": 2.544281738661139e-05, "loss": 1.1489, "step": 389500 }, { "epoch": 1.48, "learning_rate": 2.541129340379574e-05, "loss": 1.1574, "step": 390000 }, { "epoch": 1.48, "learning_rate": 2.5379769420980093e-05, "loss": 1.1388, "step": 390500 }, { "epoch": 1.48, "learning_rate": 2.5348245438164447e-05, "loss": 1.1489, "step": 391000 }, { "epoch": 1.48, "learning_rate": 2.53167214553488e-05, "loss": 1.1436, "step": 391500 }, { "epoch": 1.48, "learning_rate": 2.5285197472533157e-05, "loss": 1.1476, "step": 392000 }, { "epoch": 1.48, "learning_rate": 2.525367348971751e-05, "loss": 1.1431, "step": 392500 }, { "epoch": 1.49, "learning_rate": 2.522214950690186e-05, "loss": 1.1512, "step": 393000 }, { "epoch": 1.49, "learning_rate": 2.5190625524086214e-05, "loss": 1.1446, "step": 393500 }, { "epoch": 1.49, "learning_rate": 2.5159101541270568e-05, "loss": 1.1529, "step": 394000 }, { "epoch": 1.49, "learning_rate": 2.5127577558454924e-05, "loss": 1.1408, "step": 394500 }, { "epoch": 1.49, "learning_rate": 2.5096053575639278e-05, "loss": 1.1436, "step": 395000 }, { "epoch": 1.5, "learning_rate": 2.506452959282363e-05, "loss": 1.1522, "step": 395500 }, { "epoch": 1.5, "learning_rate": 2.503300561000798e-05, "loss": 1.1495, "step": 396000 }, { "epoch": 1.5, "learning_rate": 2.5001481627192335e-05, "loss": 1.1469, "step": 396500 }, { "epoch": 1.5, "learning_rate": 2.496995764437669e-05, "loss": 1.1473, "step": 397000 }, { "epoch": 1.5, "learning_rate": 2.4938433661561045e-05, "loss": 1.1339, "step": 397500 }, { "epoch": 1.51, "learning_rate": 2.4906909678745395e-05, "loss": 1.1437, "step": 398000 }, { "epoch": 1.51, "learning_rate": 2.4875385695929752e-05, "loss": 1.1392, "step": 398500 }, { "epoch": 1.51, "learning_rate": 2.4843861713114105e-05, "loss": 1.1421, "step": 399000 }, { "epoch": 1.51, "learning_rate": 2.481233773029846e-05, "loss": 1.1483, "step": 399500 }, { "epoch": 1.51, "learning_rate": 2.4780813747482812e-05, "loss": 1.137, "step": 400000 }, { "epoch": 1.52, "learning_rate": 2.4749289764667162e-05, "loss": 1.1475, "step": 400500 }, { "epoch": 1.52, "learning_rate": 2.471776578185152e-05, "loss": 1.1365, "step": 401000 }, { "epoch": 1.52, "learning_rate": 2.4686241799035872e-05, "loss": 1.1503, "step": 401500 }, { "epoch": 1.52, "learning_rate": 2.4654717816220222e-05, "loss": 1.149, "step": 402000 }, { "epoch": 1.52, "learning_rate": 2.462319383340458e-05, "loss": 1.147, "step": 402500 }, { "epoch": 1.52, "learning_rate": 2.4591669850588933e-05, "loss": 1.1412, "step": 403000 }, { "epoch": 1.53, "learning_rate": 2.4560145867773286e-05, "loss": 1.1497, "step": 403500 }, { "epoch": 1.53, "learning_rate": 2.452862188495764e-05, "loss": 1.1409, "step": 404000 }, { "epoch": 1.53, "learning_rate": 2.4497097902141993e-05, "loss": 1.1371, "step": 404500 }, { "epoch": 1.53, "learning_rate": 2.4465573919326347e-05, "loss": 1.1587, "step": 405000 }, { "epoch": 1.53, "learning_rate": 2.44340499365107e-05, "loss": 1.144, "step": 405500 }, { "epoch": 1.54, "learning_rate": 2.4402525953695053e-05, "loss": 1.1456, "step": 406000 }, { "epoch": 1.54, "learning_rate": 2.4371001970879407e-05, "loss": 1.1413, "step": 406500 }, { "epoch": 1.54, "learning_rate": 2.433947798806376e-05, "loss": 1.1506, "step": 407000 }, { "epoch": 1.54, "learning_rate": 2.4307954005248114e-05, "loss": 1.1397, "step": 407500 }, { "epoch": 1.54, "learning_rate": 2.4276430022432467e-05, "loss": 1.1385, "step": 408000 }, { "epoch": 1.55, "learning_rate": 2.424490603961682e-05, "loss": 1.1399, "step": 408500 }, { "epoch": 1.55, "learning_rate": 2.4213382056801174e-05, "loss": 1.1373, "step": 409000 }, { "epoch": 1.55, "learning_rate": 2.4181858073985527e-05, "loss": 1.1437, "step": 409500 }, { "epoch": 1.55, "learning_rate": 2.415033409116988e-05, "loss": 1.1429, "step": 410000 }, { "epoch": 1.55, "learning_rate": 2.4118810108354234e-05, "loss": 1.1454, "step": 410500 }, { "epoch": 1.55, "learning_rate": 2.4087286125538588e-05, "loss": 1.1479, "step": 411000 }, { "epoch": 1.56, "learning_rate": 2.4055762142722945e-05, "loss": 1.1334, "step": 411500 }, { "epoch": 1.56, "learning_rate": 2.4024238159907295e-05, "loss": 1.1325, "step": 412000 }, { "epoch": 1.56, "learning_rate": 2.3992714177091648e-05, "loss": 1.1382, "step": 412500 }, { "epoch": 1.56, "learning_rate": 2.3961190194276005e-05, "loss": 1.1407, "step": 413000 }, { "epoch": 1.56, "learning_rate": 2.3929666211460355e-05, "loss": 1.1441, "step": 413500 }, { "epoch": 1.57, "learning_rate": 2.389814222864471e-05, "loss": 1.1395, "step": 414000 }, { "epoch": 1.57, "learning_rate": 2.3866618245829065e-05, "loss": 1.1428, "step": 414500 }, { "epoch": 1.57, "learning_rate": 2.3835094263013415e-05, "loss": 1.1471, "step": 415000 }, { "epoch": 1.57, "learning_rate": 2.3803570280197772e-05, "loss": 1.1455, "step": 415500 }, { "epoch": 1.57, "learning_rate": 2.3772046297382122e-05, "loss": 1.1428, "step": 416000 }, { "epoch": 1.58, "learning_rate": 2.3740522314566476e-05, "loss": 1.1418, "step": 416500 }, { "epoch": 1.58, "learning_rate": 2.3708998331750832e-05, "loss": 1.1344, "step": 417000 }, { "epoch": 1.58, "learning_rate": 2.3677474348935182e-05, "loss": 1.1401, "step": 417500 }, { "epoch": 1.58, "learning_rate": 2.3645950366119536e-05, "loss": 1.1418, "step": 418000 }, { "epoch": 1.58, "learning_rate": 2.3614426383303893e-05, "loss": 1.137, "step": 418500 }, { "epoch": 1.59, "learning_rate": 2.3582902400488243e-05, "loss": 1.1241, "step": 419000 }, { "epoch": 1.59, "learning_rate": 2.35513784176726e-05, "loss": 1.1377, "step": 419500 }, { "epoch": 1.59, "learning_rate": 2.3519854434856953e-05, "loss": 1.1325, "step": 420000 }, { "epoch": 1.59, "learning_rate": 2.3488330452041303e-05, "loss": 1.147, "step": 420500 }, { "epoch": 1.59, "learning_rate": 2.345680646922566e-05, "loss": 1.135, "step": 421000 }, { "epoch": 1.59, "learning_rate": 2.3425282486410013e-05, "loss": 1.1351, "step": 421500 }, { "epoch": 1.6, "learning_rate": 2.3393758503594367e-05, "loss": 1.1491, "step": 422000 }, { "epoch": 1.6, "learning_rate": 2.336223452077872e-05, "loss": 1.1423, "step": 422500 }, { "epoch": 1.6, "learning_rate": 2.333071053796307e-05, "loss": 1.1393, "step": 423000 }, { "epoch": 1.6, "learning_rate": 2.3299186555147427e-05, "loss": 1.1271, "step": 423500 }, { "epoch": 1.6, "learning_rate": 2.326766257233178e-05, "loss": 1.1357, "step": 424000 }, { "epoch": 1.61, "learning_rate": 2.323613858951613e-05, "loss": 1.1344, "step": 424500 }, { "epoch": 1.61, "learning_rate": 2.3204614606700487e-05, "loss": 1.1291, "step": 425000 }, { "epoch": 1.61, "learning_rate": 2.317309062388484e-05, "loss": 1.1355, "step": 425500 }, { "epoch": 1.61, "learning_rate": 2.3141566641069194e-05, "loss": 1.1287, "step": 426000 }, { "epoch": 1.61, "learning_rate": 2.3110042658253548e-05, "loss": 1.135, "step": 426500 }, { "epoch": 1.62, "learning_rate": 2.30785186754379e-05, "loss": 1.1446, "step": 427000 }, { "epoch": 1.62, "learning_rate": 2.3046994692622255e-05, "loss": 1.1432, "step": 427500 }, { "epoch": 1.62, "learning_rate": 2.3015470709806608e-05, "loss": 1.1303, "step": 428000 }, { "epoch": 1.62, "learning_rate": 2.298394672699096e-05, "loss": 1.1442, "step": 428500 }, { "epoch": 1.62, "learning_rate": 2.2952422744175315e-05, "loss": 1.1309, "step": 429000 }, { "epoch": 1.62, "learning_rate": 2.2920898761359668e-05, "loss": 1.1268, "step": 429500 }, { "epoch": 1.63, "learning_rate": 2.288937477854402e-05, "loss": 1.1306, "step": 430000 }, { "epoch": 1.63, "learning_rate": 2.2857850795728375e-05, "loss": 1.1389, "step": 430500 }, { "epoch": 1.63, "learning_rate": 2.282632681291273e-05, "loss": 1.1343, "step": 431000 }, { "epoch": 1.63, "learning_rate": 2.2794802830097082e-05, "loss": 1.1333, "step": 431500 }, { "epoch": 1.63, "learning_rate": 2.2763278847281435e-05, "loss": 1.1383, "step": 432000 }, { "epoch": 1.64, "learning_rate": 2.273175486446579e-05, "loss": 1.1341, "step": 432500 }, { "epoch": 1.64, "learning_rate": 2.2700230881650142e-05, "loss": 1.1343, "step": 433000 }, { "epoch": 1.64, "learning_rate": 2.2668706898834496e-05, "loss": 1.1287, "step": 433500 }, { "epoch": 1.64, "learning_rate": 2.2637182916018853e-05, "loss": 1.1397, "step": 434000 }, { "epoch": 1.64, "learning_rate": 2.2605658933203203e-05, "loss": 1.136, "step": 434500 }, { "epoch": 1.65, "learning_rate": 2.2574134950387556e-05, "loss": 1.1247, "step": 435000 }, { "epoch": 1.65, "learning_rate": 2.2542610967571913e-05, "loss": 1.1283, "step": 435500 }, { "epoch": 1.65, "learning_rate": 2.2511086984756263e-05, "loss": 1.1349, "step": 436000 }, { "epoch": 1.65, "learning_rate": 2.2479563001940616e-05, "loss": 1.1261, "step": 436500 }, { "epoch": 1.65, "learning_rate": 2.2448039019124973e-05, "loss": 1.1287, "step": 437000 }, { "epoch": 1.66, "learning_rate": 2.2416515036309323e-05, "loss": 1.1357, "step": 437500 }, { "epoch": 1.66, "learning_rate": 2.238499105349368e-05, "loss": 1.1362, "step": 438000 }, { "epoch": 1.66, "learning_rate": 2.235346707067803e-05, "loss": 1.1276, "step": 438500 }, { "epoch": 1.66, "learning_rate": 2.2321943087862384e-05, "loss": 1.1325, "step": 439000 }, { "epoch": 1.66, "learning_rate": 2.229041910504674e-05, "loss": 1.1226, "step": 439500 }, { "epoch": 1.66, "learning_rate": 2.225889512223109e-05, "loss": 1.1207, "step": 440000 }, { "epoch": 1.67, "learning_rate": 2.2227371139415444e-05, "loss": 1.1301, "step": 440500 }, { "epoch": 1.67, "learning_rate": 2.21958471565998e-05, "loss": 1.1343, "step": 441000 }, { "epoch": 1.67, "learning_rate": 2.216432317378415e-05, "loss": 1.1279, "step": 441500 }, { "epoch": 1.67, "learning_rate": 2.2132799190968508e-05, "loss": 1.124, "step": 442000 }, { "epoch": 1.67, "learning_rate": 2.210127520815286e-05, "loss": 1.1229, "step": 442500 }, { "epoch": 1.68, "learning_rate": 2.206975122533721e-05, "loss": 1.1377, "step": 443000 }, { "epoch": 1.68, "learning_rate": 2.2038227242521568e-05, "loss": 1.1277, "step": 443500 }, { "epoch": 1.68, "learning_rate": 2.200670325970592e-05, "loss": 1.1284, "step": 444000 }, { "epoch": 1.68, "learning_rate": 2.1975179276890275e-05, "loss": 1.1266, "step": 444500 }, { "epoch": 1.68, "learning_rate": 2.1943655294074628e-05, "loss": 1.1242, "step": 445000 }, { "epoch": 1.69, "learning_rate": 2.1912131311258978e-05, "loss": 1.119, "step": 445500 }, { "epoch": 1.69, "learning_rate": 2.1880607328443335e-05, "loss": 1.1285, "step": 446000 }, { "epoch": 1.69, "learning_rate": 2.184908334562769e-05, "loss": 1.133, "step": 446500 }, { "epoch": 1.69, "learning_rate": 2.181755936281204e-05, "loss": 1.1283, "step": 447000 }, { "epoch": 1.69, "learning_rate": 2.1786035379996395e-05, "loss": 1.1312, "step": 447500 }, { "epoch": 1.69, "learning_rate": 2.175451139718075e-05, "loss": 1.1295, "step": 448000 }, { "epoch": 1.7, "learning_rate": 2.1722987414365102e-05, "loss": 1.1331, "step": 448500 }, { "epoch": 1.7, "learning_rate": 2.1691463431549456e-05, "loss": 1.1275, "step": 449000 }, { "epoch": 1.7, "learning_rate": 2.165993944873381e-05, "loss": 1.1229, "step": 449500 }, { "epoch": 1.7, "learning_rate": 2.1628415465918163e-05, "loss": 1.1197, "step": 450000 }, { "epoch": 1.7, "learning_rate": 2.1596891483102516e-05, "loss": 1.1211, "step": 450500 }, { "epoch": 1.71, "learning_rate": 2.156536750028687e-05, "loss": 1.1299, "step": 451000 }, { "epoch": 1.71, "learning_rate": 2.1533843517471223e-05, "loss": 1.1399, "step": 451500 }, { "epoch": 1.71, "learning_rate": 2.1502319534655576e-05, "loss": 1.1205, "step": 452000 }, { "epoch": 1.71, "learning_rate": 2.147079555183993e-05, "loss": 1.1271, "step": 452500 }, { "epoch": 1.71, "learning_rate": 2.1439271569024283e-05, "loss": 1.1253, "step": 453000 }, { "epoch": 1.72, "learning_rate": 2.1407747586208637e-05, "loss": 1.1318, "step": 453500 }, { "epoch": 1.72, "learning_rate": 2.137622360339299e-05, "loss": 1.124, "step": 454000 }, { "epoch": 1.72, "learning_rate": 2.1344699620577343e-05, "loss": 1.1271, "step": 454500 }, { "epoch": 1.72, "learning_rate": 2.1313175637761697e-05, "loss": 1.1284, "step": 455000 }, { "epoch": 1.72, "learning_rate": 2.128165165494605e-05, "loss": 1.1278, "step": 455500 }, { "epoch": 1.72, "learning_rate": 2.1250127672130404e-05, "loss": 1.1375, "step": 456000 }, { "epoch": 1.73, "learning_rate": 2.1218603689314757e-05, "loss": 1.1224, "step": 456500 }, { "epoch": 1.73, "learning_rate": 2.118707970649911e-05, "loss": 1.1182, "step": 457000 }, { "epoch": 1.73, "learning_rate": 2.1155555723683464e-05, "loss": 1.1236, "step": 457500 }, { "epoch": 1.73, "learning_rate": 2.112403174086782e-05, "loss": 1.1216, "step": 458000 }, { "epoch": 1.73, "learning_rate": 2.109250775805217e-05, "loss": 1.1148, "step": 458500 }, { "epoch": 1.74, "learning_rate": 2.1060983775236524e-05, "loss": 1.1259, "step": 459000 }, { "epoch": 1.74, "learning_rate": 2.102945979242088e-05, "loss": 1.1209, "step": 459500 }, { "epoch": 1.74, "learning_rate": 2.099793580960523e-05, "loss": 1.1278, "step": 460000 }, { "epoch": 1.74, "learning_rate": 2.0966411826789588e-05, "loss": 1.1265, "step": 460500 }, { "epoch": 1.74, "learning_rate": 2.0934887843973938e-05, "loss": 1.1267, "step": 461000 }, { "epoch": 1.75, "learning_rate": 2.090336386115829e-05, "loss": 1.1236, "step": 461500 }, { "epoch": 1.75, "learning_rate": 2.087183987834265e-05, "loss": 1.1223, "step": 462000 }, { "epoch": 1.75, "learning_rate": 2.0840315895527e-05, "loss": 1.1282, "step": 462500 }, { "epoch": 1.75, "learning_rate": 2.0808791912711352e-05, "loss": 1.1196, "step": 463000 }, { "epoch": 1.75, "learning_rate": 2.077726792989571e-05, "loss": 1.1282, "step": 463500 }, { "epoch": 1.76, "learning_rate": 2.074574394708006e-05, "loss": 1.1278, "step": 464000 }, { "epoch": 1.76, "learning_rate": 2.0714219964264416e-05, "loss": 1.1204, "step": 464500 }, { "epoch": 1.76, "learning_rate": 2.068269598144877e-05, "loss": 1.1181, "step": 465000 }, { "epoch": 1.76, "learning_rate": 2.065117199863312e-05, "loss": 1.1217, "step": 465500 }, { "epoch": 1.76, "learning_rate": 2.0619648015817476e-05, "loss": 1.1149, "step": 466000 }, { "epoch": 1.76, "learning_rate": 2.058812403300183e-05, "loss": 1.1294, "step": 466500 }, { "epoch": 1.77, "learning_rate": 2.0556600050186183e-05, "loss": 1.1255, "step": 467000 }, { "epoch": 1.77, "learning_rate": 2.0525076067370536e-05, "loss": 1.115, "step": 467500 }, { "epoch": 1.77, "learning_rate": 2.0493552084554886e-05, "loss": 1.1232, "step": 468000 }, { "epoch": 1.77, "learning_rate": 2.0462028101739243e-05, "loss": 1.1268, "step": 468500 }, { "epoch": 1.77, "learning_rate": 2.0430504118923596e-05, "loss": 1.1208, "step": 469000 }, { "epoch": 1.78, "learning_rate": 2.0398980136107947e-05, "loss": 1.1205, "step": 469500 }, { "epoch": 1.78, "learning_rate": 2.0367456153292303e-05, "loss": 1.127, "step": 470000 }, { "epoch": 1.78, "learning_rate": 2.0335932170476657e-05, "loss": 1.1257, "step": 470500 }, { "epoch": 1.78, "learning_rate": 2.030440818766101e-05, "loss": 1.1201, "step": 471000 }, { "epoch": 1.78, "learning_rate": 2.0272884204845364e-05, "loss": 1.128, "step": 471500 }, { "epoch": 1.79, "learning_rate": 2.0241360222029717e-05, "loss": 1.1132, "step": 472000 }, { "epoch": 1.79, "learning_rate": 2.020983623921407e-05, "loss": 1.1115, "step": 472500 }, { "epoch": 1.79, "learning_rate": 2.0178312256398424e-05, "loss": 1.1164, "step": 473000 }, { "epoch": 1.79, "learning_rate": 2.0146788273582777e-05, "loss": 1.1181, "step": 473500 }, { "epoch": 1.79, "learning_rate": 2.011526429076713e-05, "loss": 1.1184, "step": 474000 }, { "epoch": 1.79, "learning_rate": 2.0083740307951484e-05, "loss": 1.1179, "step": 474500 }, { "epoch": 1.8, "learning_rate": 2.0052216325135838e-05, "loss": 1.1156, "step": 475000 }, { "epoch": 1.8, "learning_rate": 2.002069234232019e-05, "loss": 1.1261, "step": 475500 }, { "epoch": 1.8, "learning_rate": 1.9989168359504545e-05, "loss": 1.1247, "step": 476000 }, { "epoch": 1.8, "learning_rate": 1.9957644376688898e-05, "loss": 1.1248, "step": 476500 }, { "epoch": 1.8, "learning_rate": 1.992612039387325e-05, "loss": 1.1202, "step": 477000 }, { "epoch": 1.81, "learning_rate": 1.9894596411057605e-05, "loss": 1.1165, "step": 477500 }, { "epoch": 1.81, "learning_rate": 1.986307242824196e-05, "loss": 1.1134, "step": 478000 }, { "epoch": 1.81, "learning_rate": 1.9831548445426312e-05, "loss": 1.1234, "step": 478500 }, { "epoch": 1.81, "learning_rate": 1.9800024462610665e-05, "loss": 1.1199, "step": 479000 }, { "epoch": 1.81, "learning_rate": 1.976850047979502e-05, "loss": 1.1192, "step": 479500 }, { "epoch": 1.82, "learning_rate": 1.9736976496979372e-05, "loss": 1.1215, "step": 480000 }, { "epoch": 1.82, "learning_rate": 1.970545251416373e-05, "loss": 1.1169, "step": 480500 }, { "epoch": 1.82, "learning_rate": 1.967392853134808e-05, "loss": 1.1111, "step": 481000 }, { "epoch": 1.82, "learning_rate": 1.9642404548532432e-05, "loss": 1.1167, "step": 481500 }, { "epoch": 1.82, "learning_rate": 1.961088056571679e-05, "loss": 1.1197, "step": 482000 }, { "epoch": 1.83, "learning_rate": 1.957935658290114e-05, "loss": 1.1203, "step": 482500 }, { "epoch": 1.83, "learning_rate": 1.9547832600085496e-05, "loss": 1.1139, "step": 483000 }, { "epoch": 1.83, "learning_rate": 1.9516308617269846e-05, "loss": 1.1198, "step": 483500 }, { "epoch": 1.83, "learning_rate": 1.94847846344542e-05, "loss": 1.117, "step": 484000 }, { "epoch": 1.83, "learning_rate": 1.9453260651638556e-05, "loss": 1.1217, "step": 484500 }, { "epoch": 1.83, "learning_rate": 1.9421736668822906e-05, "loss": 1.1194, "step": 485000 }, { "epoch": 1.84, "learning_rate": 1.939021268600726e-05, "loss": 1.1257, "step": 485500 }, { "epoch": 1.84, "learning_rate": 1.9358688703191617e-05, "loss": 1.109, "step": 486000 }, { "epoch": 1.84, "learning_rate": 1.9327164720375967e-05, "loss": 1.1206, "step": 486500 }, { "epoch": 1.84, "learning_rate": 1.9295640737560324e-05, "loss": 1.1119, "step": 487000 }, { "epoch": 1.84, "learning_rate": 1.9264116754744677e-05, "loss": 1.1128, "step": 487500 }, { "epoch": 1.85, "learning_rate": 1.9232592771929027e-05, "loss": 1.1165, "step": 488000 }, { "epoch": 1.85, "learning_rate": 1.9201068789113384e-05, "loss": 1.1098, "step": 488500 }, { "epoch": 1.85, "learning_rate": 1.9169544806297737e-05, "loss": 1.1197, "step": 489000 }, { "epoch": 1.85, "learning_rate": 1.9138020823482087e-05, "loss": 1.1133, "step": 489500 }, { "epoch": 1.85, "learning_rate": 1.9106496840666444e-05, "loss": 1.1117, "step": 490000 }, { "epoch": 1.86, "learning_rate": 1.9074972857850798e-05, "loss": 1.1185, "step": 490500 }, { "epoch": 1.86, "learning_rate": 1.904344887503515e-05, "loss": 1.1189, "step": 491000 }, { "epoch": 1.86, "learning_rate": 1.9011924892219504e-05, "loss": 1.1308, "step": 491500 }, { "epoch": 1.86, "learning_rate": 1.8980400909403855e-05, "loss": 1.112, "step": 492000 }, { "epoch": 1.86, "learning_rate": 1.894887692658821e-05, "loss": 1.1164, "step": 492500 }, { "epoch": 1.86, "learning_rate": 1.8917352943772565e-05, "loss": 1.1147, "step": 493000 }, { "epoch": 1.87, "learning_rate": 1.8885828960956918e-05, "loss": 1.1175, "step": 493500 }, { "epoch": 1.87, "learning_rate": 1.885430497814127e-05, "loss": 1.1223, "step": 494000 }, { "epoch": 1.87, "learning_rate": 1.8822780995325625e-05, "loss": 1.1118, "step": 494500 }, { "epoch": 1.87, "learning_rate": 1.879125701250998e-05, "loss": 1.1173, "step": 495000 }, { "epoch": 1.87, "learning_rate": 1.8759733029694332e-05, "loss": 1.1157, "step": 495500 }, { "epoch": 1.88, "learning_rate": 1.8728209046878685e-05, "loss": 1.1181, "step": 496000 }, { "epoch": 1.88, "learning_rate": 1.869668506406304e-05, "loss": 1.109, "step": 496500 }, { "epoch": 1.88, "learning_rate": 1.8665161081247392e-05, "loss": 1.1123, "step": 497000 }, { "epoch": 1.88, "learning_rate": 1.8633637098431746e-05, "loss": 1.1169, "step": 497500 }, { "epoch": 1.88, "learning_rate": 1.86021131156161e-05, "loss": 1.1142, "step": 498000 }, { "epoch": 1.89, "learning_rate": 1.8570589132800453e-05, "loss": 1.1084, "step": 498500 }, { "epoch": 1.89, "learning_rate": 1.8539065149984806e-05, "loss": 1.1145, "step": 499000 }, { "epoch": 1.89, "learning_rate": 1.850754116716916e-05, "loss": 1.1159, "step": 499500 }, { "epoch": 1.89, "learning_rate": 1.8476017184353513e-05, "loss": 1.1052, "step": 500000 }, { "epoch": 1.89, "learning_rate": 1.8444493201537866e-05, "loss": 1.1137, "step": 500500 }, { "epoch": 1.9, "learning_rate": 1.841296921872222e-05, "loss": 1.1116, "step": 501000 }, { "epoch": 1.9, "learning_rate": 1.8381445235906573e-05, "loss": 1.1164, "step": 501500 }, { "epoch": 1.9, "learning_rate": 1.8349921253090927e-05, "loss": 1.1186, "step": 502000 }, { "epoch": 1.9, "learning_rate": 1.831839727027528e-05, "loss": 1.1086, "step": 502500 }, { "epoch": 1.9, "learning_rate": 1.8286873287459637e-05, "loss": 1.1108, "step": 503000 }, { "epoch": 1.9, "learning_rate": 1.8255349304643987e-05, "loss": 1.1076, "step": 503500 }, { "epoch": 1.91, "learning_rate": 1.822382532182834e-05, "loss": 1.1165, "step": 504000 }, { "epoch": 1.91, "learning_rate": 1.8192301339012697e-05, "loss": 1.1067, "step": 504500 }, { "epoch": 1.91, "learning_rate": 1.8160777356197047e-05, "loss": 1.1057, "step": 505000 }, { "epoch": 1.91, "learning_rate": 1.8129253373381404e-05, "loss": 1.1001, "step": 505500 }, { "epoch": 1.91, "learning_rate": 1.8097729390565754e-05, "loss": 1.1077, "step": 506000 }, { "epoch": 1.92, "learning_rate": 1.8066205407750108e-05, "loss": 1.1201, "step": 506500 }, { "epoch": 1.92, "learning_rate": 1.8034681424934464e-05, "loss": 1.1105, "step": 507000 }, { "epoch": 1.92, "learning_rate": 1.8003157442118814e-05, "loss": 1.1053, "step": 507500 }, { "epoch": 1.92, "learning_rate": 1.7971633459303168e-05, "loss": 1.1007, "step": 508000 }, { "epoch": 1.92, "learning_rate": 1.7940109476487525e-05, "loss": 1.1128, "step": 508500 }, { "epoch": 1.93, "learning_rate": 1.7908585493671875e-05, "loss": 1.1074, "step": 509000 }, { "epoch": 1.93, "learning_rate": 1.787706151085623e-05, "loss": 1.0999, "step": 509500 }, { "epoch": 1.93, "learning_rate": 1.7845537528040585e-05, "loss": 1.1167, "step": 510000 }, { "epoch": 1.93, "learning_rate": 1.7814013545224935e-05, "loss": 1.1109, "step": 510500 }, { "epoch": 1.93, "learning_rate": 1.7782489562409292e-05, "loss": 1.115, "step": 511000 }, { "epoch": 1.93, "learning_rate": 1.7750965579593645e-05, "loss": 1.1081, "step": 511500 }, { "epoch": 1.94, "learning_rate": 1.7719441596777995e-05, "loss": 1.1029, "step": 512000 }, { "epoch": 1.94, "learning_rate": 1.7687917613962352e-05, "loss": 1.1092, "step": 512500 }, { "epoch": 1.94, "learning_rate": 1.7656393631146706e-05, "loss": 1.1162, "step": 513000 }, { "epoch": 1.94, "learning_rate": 1.762486964833106e-05, "loss": 1.1019, "step": 513500 }, { "epoch": 1.94, "learning_rate": 1.7593345665515412e-05, "loss": 1.1105, "step": 514000 }, { "epoch": 1.95, "learning_rate": 1.7561821682699763e-05, "loss": 1.1034, "step": 514500 }, { "epoch": 1.95, "learning_rate": 1.753029769988412e-05, "loss": 1.1069, "step": 515000 }, { "epoch": 1.95, "learning_rate": 1.7498773717068473e-05, "loss": 1.1044, "step": 515500 }, { "epoch": 1.95, "learning_rate": 1.7467249734252826e-05, "loss": 1.1081, "step": 516000 }, { "epoch": 1.95, "learning_rate": 1.743572575143718e-05, "loss": 1.109, "step": 516500 }, { "epoch": 1.96, "learning_rate": 1.7404201768621533e-05, "loss": 1.1102, "step": 517000 }, { "epoch": 1.96, "learning_rate": 1.7372677785805887e-05, "loss": 1.0989, "step": 517500 }, { "epoch": 1.96, "learning_rate": 1.734115380299024e-05, "loss": 1.0978, "step": 518000 }, { "epoch": 1.96, "learning_rate": 1.7309629820174593e-05, "loss": 1.1085, "step": 518500 }, { "epoch": 1.96, "learning_rate": 1.7278105837358947e-05, "loss": 1.1097, "step": 519000 }, { "epoch": 1.97, "learning_rate": 1.72465818545433e-05, "loss": 1.1037, "step": 519500 }, { "epoch": 1.97, "learning_rate": 1.7215057871727654e-05, "loss": 1.1099, "step": 520000 }, { "epoch": 1.97, "learning_rate": 1.7183533888912007e-05, "loss": 1.1201, "step": 520500 }, { "epoch": 1.97, "learning_rate": 1.715200990609636e-05, "loss": 1.1012, "step": 521000 }, { "epoch": 1.97, "learning_rate": 1.7120485923280714e-05, "loss": 1.1104, "step": 521500 }, { "epoch": 1.97, "learning_rate": 1.7088961940465067e-05, "loss": 1.11, "step": 522000 }, { "epoch": 1.98, "learning_rate": 1.705743795764942e-05, "loss": 1.1128, "step": 522500 }, { "epoch": 1.98, "learning_rate": 1.7025913974833774e-05, "loss": 1.1035, "step": 523000 }, { "epoch": 1.98, "learning_rate": 1.6994389992018128e-05, "loss": 1.0976, "step": 523500 }, { "epoch": 1.98, "learning_rate": 1.696286600920248e-05, "loss": 1.1013, "step": 524000 }, { "epoch": 1.98, "learning_rate": 1.6931342026386835e-05, "loss": 1.1072, "step": 524500 }, { "epoch": 1.99, "learning_rate": 1.6899818043571188e-05, "loss": 1.1065, "step": 525000 }, { "epoch": 1.99, "learning_rate": 1.6868294060755545e-05, "loss": 1.0969, "step": 525500 }, { "epoch": 1.99, "learning_rate": 1.6836770077939895e-05, "loss": 1.1157, "step": 526000 }, { "epoch": 1.99, "learning_rate": 1.680524609512425e-05, "loss": 1.1016, "step": 526500 }, { "epoch": 1.99, "learning_rate": 1.6773722112308605e-05, "loss": 1.0994, "step": 527000 }, { "epoch": 2.0, "learning_rate": 1.6742198129492955e-05, "loss": 1.1045, "step": 527500 }, { "epoch": 2.0, "learning_rate": 1.671067414667731e-05, "loss": 1.1107, "step": 528000 }, { "epoch": 2.0, "learning_rate": 1.6679150163861666e-05, "loss": 1.0976, "step": 528500 }, { "epoch": 2.0, "learning_rate": 1.6647626181046016e-05, "loss": 1.0981, "step": 529000 }, { "epoch": 2.0, "learning_rate": 1.6616102198230372e-05, "loss": 1.0969, "step": 529500 }, { "epoch": 2.0, "learning_rate": 1.6584578215414722e-05, "loss": 1.1059, "step": 530000 }, { "epoch": 2.01, "learning_rate": 1.6553054232599076e-05, "loss": 1.1098, "step": 530500 }, { "epoch": 2.01, "learning_rate": 1.6521530249783433e-05, "loss": 1.0973, "step": 531000 }, { "epoch": 2.01, "learning_rate": 1.6490006266967783e-05, "loss": 1.0941, "step": 531500 }, { "epoch": 2.01, "learning_rate": 1.645848228415214e-05, "loss": 1.1041, "step": 532000 }, { "epoch": 2.01, "learning_rate": 1.6426958301336493e-05, "loss": 1.0995, "step": 532500 }, { "epoch": 2.02, "learning_rate": 1.6395434318520843e-05, "loss": 1.1055, "step": 533000 }, { "epoch": 2.02, "learning_rate": 1.63639103357052e-05, "loss": 1.0983, "step": 533500 }, { "epoch": 2.02, "learning_rate": 1.6332386352889553e-05, "loss": 1.0968, "step": 534000 }, { "epoch": 2.02, "learning_rate": 1.6300862370073903e-05, "loss": 1.1039, "step": 534500 }, { "epoch": 2.02, "learning_rate": 1.626933838725826e-05, "loss": 1.0923, "step": 535000 }, { "epoch": 2.03, "learning_rate": 1.6237814404442614e-05, "loss": 1.0991, "step": 535500 }, { "epoch": 2.03, "learning_rate": 1.6206290421626967e-05, "loss": 1.1015, "step": 536000 }, { "epoch": 2.03, "learning_rate": 1.617476643881132e-05, "loss": 1.1005, "step": 536500 }, { "epoch": 2.03, "learning_rate": 1.614324245599567e-05, "loss": 1.1046, "step": 537000 }, { "epoch": 2.03, "learning_rate": 1.6111718473180027e-05, "loss": 1.0857, "step": 537500 }, { "epoch": 2.04, "learning_rate": 1.608019449036438e-05, "loss": 1.0974, "step": 538000 }, { "epoch": 2.04, "learning_rate": 1.6048670507548734e-05, "loss": 1.0986, "step": 538500 }, { "epoch": 2.04, "learning_rate": 1.6017146524733088e-05, "loss": 1.0967, "step": 539000 }, { "epoch": 2.04, "learning_rate": 1.598562254191744e-05, "loss": 1.0952, "step": 539500 }, { "epoch": 2.04, "learning_rate": 1.5954098559101795e-05, "loss": 1.0955, "step": 540000 }, { "epoch": 2.04, "learning_rate": 1.5922574576286148e-05, "loss": 1.0955, "step": 540500 }, { "epoch": 2.05, "learning_rate": 1.58910505934705e-05, "loss": 1.098, "step": 541000 }, { "epoch": 2.05, "learning_rate": 1.5859526610654855e-05, "loss": 1.0937, "step": 541500 }, { "epoch": 2.05, "learning_rate": 1.5828002627839208e-05, "loss": 1.09, "step": 542000 }, { "epoch": 2.05, "learning_rate": 1.5796478645023562e-05, "loss": 1.0914, "step": 542500 }, { "epoch": 2.05, "learning_rate": 1.5764954662207915e-05, "loss": 1.0964, "step": 543000 }, { "epoch": 2.06, "learning_rate": 1.573343067939227e-05, "loss": 1.092, "step": 543500 }, { "epoch": 2.06, "learning_rate": 1.5701906696576622e-05, "loss": 1.0949, "step": 544000 }, { "epoch": 2.06, "learning_rate": 1.5670382713760975e-05, "loss": 1.0893, "step": 544500 }, { "epoch": 2.06, "learning_rate": 1.563885873094533e-05, "loss": 1.1014, "step": 545000 }, { "epoch": 2.06, "learning_rate": 1.5607334748129682e-05, "loss": 1.089, "step": 545500 }, { "epoch": 2.07, "learning_rate": 1.5575810765314036e-05, "loss": 1.088, "step": 546000 }, { "epoch": 2.07, "learning_rate": 1.554428678249839e-05, "loss": 1.1041, "step": 546500 }, { "epoch": 2.07, "learning_rate": 1.5512762799682743e-05, "loss": 1.0843, "step": 547000 }, { "epoch": 2.07, "learning_rate": 1.5481238816867096e-05, "loss": 1.1017, "step": 547500 }, { "epoch": 2.07, "learning_rate": 1.5449714834051453e-05, "loss": 1.0963, "step": 548000 }, { "epoch": 2.07, "learning_rate": 1.5418190851235803e-05, "loss": 1.0915, "step": 548500 }, { "epoch": 2.08, "learning_rate": 1.5386666868420156e-05, "loss": 1.0932, "step": 549000 }, { "epoch": 2.08, "learning_rate": 1.5355142885604513e-05, "loss": 1.0918, "step": 549500 }, { "epoch": 2.08, "learning_rate": 1.5323618902788863e-05, "loss": 1.0964, "step": 550000 }, { "epoch": 2.08, "learning_rate": 1.5292094919973217e-05, "loss": 1.0858, "step": 550500 }, { "epoch": 2.08, "learning_rate": 1.5260570937157574e-05, "loss": 1.0971, "step": 551000 }, { "epoch": 2.09, "learning_rate": 1.5229046954341925e-05, "loss": 1.0964, "step": 551500 }, { "epoch": 2.09, "learning_rate": 1.5197522971526279e-05, "loss": 1.0904, "step": 552000 }, { "epoch": 2.09, "learning_rate": 1.516599898871063e-05, "loss": 1.099, "step": 552500 }, { "epoch": 2.09, "learning_rate": 1.5134475005894986e-05, "loss": 1.0942, "step": 553000 }, { "epoch": 2.09, "learning_rate": 1.5102951023079339e-05, "loss": 1.0921, "step": 553500 }, { "epoch": 2.1, "learning_rate": 1.507142704026369e-05, "loss": 1.0842, "step": 554000 }, { "epoch": 2.1, "learning_rate": 1.5039903057448046e-05, "loss": 1.0905, "step": 554500 }, { "epoch": 2.1, "learning_rate": 1.5008379074632401e-05, "loss": 1.0972, "step": 555000 }, { "epoch": 2.1, "learning_rate": 1.4976855091816753e-05, "loss": 1.0933, "step": 555500 }, { "epoch": 2.1, "learning_rate": 1.4945331109001106e-05, "loss": 1.0964, "step": 556000 }, { "epoch": 2.11, "learning_rate": 1.4913807126185461e-05, "loss": 1.0975, "step": 556500 }, { "epoch": 2.11, "learning_rate": 1.4882283143369813e-05, "loss": 1.1022, "step": 557000 }, { "epoch": 2.11, "learning_rate": 1.4850759160554168e-05, "loss": 1.0903, "step": 557500 }, { "epoch": 2.11, "learning_rate": 1.4819235177738522e-05, "loss": 1.0947, "step": 558000 }, { "epoch": 2.11, "learning_rate": 1.4787711194922873e-05, "loss": 1.0968, "step": 558500 }, { "epoch": 2.11, "learning_rate": 1.4756187212107228e-05, "loss": 1.0953, "step": 559000 }, { "epoch": 2.12, "learning_rate": 1.472466322929158e-05, "loss": 1.0967, "step": 559500 }, { "epoch": 2.12, "learning_rate": 1.4693139246475934e-05, "loss": 1.0867, "step": 560000 }, { "epoch": 2.12, "learning_rate": 1.4661615263660289e-05, "loss": 1.0931, "step": 560500 }, { "epoch": 2.12, "learning_rate": 1.463009128084464e-05, "loss": 1.0917, "step": 561000 }, { "epoch": 2.12, "learning_rate": 1.4598567298028996e-05, "loss": 1.0869, "step": 561500 }, { "epoch": 2.13, "learning_rate": 1.4567043315213349e-05, "loss": 1.089, "step": 562000 }, { "epoch": 2.13, "learning_rate": 1.4535519332397701e-05, "loss": 1.0868, "step": 562500 }, { "epoch": 2.13, "learning_rate": 1.4503995349582056e-05, "loss": 1.0973, "step": 563000 }, { "epoch": 2.13, "learning_rate": 1.447247136676641e-05, "loss": 1.0955, "step": 563500 }, { "epoch": 2.13, "learning_rate": 1.4440947383950761e-05, "loss": 1.0871, "step": 564000 }, { "epoch": 2.14, "learning_rate": 1.4409423401135116e-05, "loss": 1.0893, "step": 564500 }, { "epoch": 2.14, "learning_rate": 1.4377899418319471e-05, "loss": 1.0931, "step": 565000 }, { "epoch": 2.14, "learning_rate": 1.4346375435503823e-05, "loss": 1.0915, "step": 565500 }, { "epoch": 2.14, "learning_rate": 1.4314851452688177e-05, "loss": 1.0816, "step": 566000 }, { "epoch": 2.14, "learning_rate": 1.4283327469872532e-05, "loss": 1.0903, "step": 566500 }, { "epoch": 2.14, "learning_rate": 1.4251803487056883e-05, "loss": 1.0905, "step": 567000 }, { "epoch": 2.15, "learning_rate": 1.4220279504241239e-05, "loss": 1.0958, "step": 567500 }, { "epoch": 2.15, "learning_rate": 1.418875552142559e-05, "loss": 1.0923, "step": 568000 }, { "epoch": 2.15, "learning_rate": 1.4157231538609944e-05, "loss": 1.0845, "step": 568500 }, { "epoch": 2.15, "learning_rate": 1.4125707555794299e-05, "loss": 1.0887, "step": 569000 }, { "epoch": 2.15, "learning_rate": 1.409418357297865e-05, "loss": 1.0908, "step": 569500 }, { "epoch": 2.16, "learning_rate": 1.4062659590163004e-05, "loss": 1.0826, "step": 570000 }, { "epoch": 2.16, "learning_rate": 1.403113560734736e-05, "loss": 1.0899, "step": 570500 }, { "epoch": 2.16, "learning_rate": 1.3999611624531711e-05, "loss": 1.0909, "step": 571000 }, { "epoch": 2.16, "learning_rate": 1.3968087641716066e-05, "loss": 1.0922, "step": 571500 }, { "epoch": 2.16, "learning_rate": 1.393656365890042e-05, "loss": 1.0922, "step": 572000 }, { "epoch": 2.17, "learning_rate": 1.3905039676084771e-05, "loss": 1.0844, "step": 572500 }, { "epoch": 2.17, "learning_rate": 1.3873515693269126e-05, "loss": 1.0919, "step": 573000 }, { "epoch": 2.17, "learning_rate": 1.3841991710453482e-05, "loss": 1.0872, "step": 573500 }, { "epoch": 2.17, "learning_rate": 1.3810467727637833e-05, "loss": 1.0807, "step": 574000 }, { "epoch": 2.17, "learning_rate": 1.3778943744822187e-05, "loss": 1.0824, "step": 574500 }, { "epoch": 2.18, "learning_rate": 1.3747419762006538e-05, "loss": 1.0874, "step": 575000 }, { "epoch": 2.18, "learning_rate": 1.3715895779190894e-05, "loss": 1.0842, "step": 575500 }, { "epoch": 2.18, "learning_rate": 1.3684371796375247e-05, "loss": 1.0887, "step": 576000 }, { "epoch": 2.18, "learning_rate": 1.3652847813559599e-05, "loss": 1.0903, "step": 576500 }, { "epoch": 2.18, "learning_rate": 1.3621323830743954e-05, "loss": 1.0836, "step": 577000 }, { "epoch": 2.18, "learning_rate": 1.3589799847928309e-05, "loss": 1.0807, "step": 577500 }, { "epoch": 2.19, "learning_rate": 1.355827586511266e-05, "loss": 1.0868, "step": 578000 }, { "epoch": 2.19, "learning_rate": 1.3526751882297014e-05, "loss": 1.0871, "step": 578500 }, { "epoch": 2.19, "learning_rate": 1.349522789948137e-05, "loss": 1.0815, "step": 579000 }, { "epoch": 2.19, "learning_rate": 1.3463703916665721e-05, "loss": 1.0849, "step": 579500 }, { "epoch": 2.19, "learning_rate": 1.3432179933850074e-05, "loss": 1.0831, "step": 580000 }, { "epoch": 2.2, "learning_rate": 1.340065595103443e-05, "loss": 1.0814, "step": 580500 }, { "epoch": 2.2, "learning_rate": 1.3369131968218781e-05, "loss": 1.0887, "step": 581000 }, { "epoch": 2.2, "learning_rate": 1.3337607985403136e-05, "loss": 1.0825, "step": 581500 }, { "epoch": 2.2, "learning_rate": 1.3306084002587488e-05, "loss": 1.0776, "step": 582000 }, { "epoch": 2.2, "learning_rate": 1.3274560019771842e-05, "loss": 1.0856, "step": 582500 }, { "epoch": 2.21, "learning_rate": 1.3243036036956197e-05, "loss": 1.0776, "step": 583000 }, { "epoch": 2.21, "learning_rate": 1.3211512054140549e-05, "loss": 1.0791, "step": 583500 }, { "epoch": 2.21, "learning_rate": 1.3179988071324904e-05, "loss": 1.0893, "step": 584000 }, { "epoch": 2.21, "learning_rate": 1.3148464088509257e-05, "loss": 1.0837, "step": 584500 }, { "epoch": 2.21, "learning_rate": 1.3116940105693609e-05, "loss": 1.0708, "step": 585000 }, { "epoch": 2.21, "learning_rate": 1.3085416122877964e-05, "loss": 1.0831, "step": 585500 }, { "epoch": 2.22, "learning_rate": 1.3053892140062317e-05, "loss": 1.0876, "step": 586000 }, { "epoch": 2.22, "learning_rate": 1.302236815724667e-05, "loss": 1.0829, "step": 586500 }, { "epoch": 2.22, "learning_rate": 1.2990844174431024e-05, "loss": 1.0824, "step": 587000 }, { "epoch": 2.22, "learning_rate": 1.295932019161538e-05, "loss": 1.0865, "step": 587500 }, { "epoch": 2.22, "learning_rate": 1.2927796208799731e-05, "loss": 1.0822, "step": 588000 }, { "epoch": 2.23, "learning_rate": 1.2896272225984085e-05, "loss": 1.0788, "step": 588500 }, { "epoch": 2.23, "learning_rate": 1.286474824316844e-05, "loss": 1.0901, "step": 589000 }, { "epoch": 2.23, "learning_rate": 1.2833224260352791e-05, "loss": 1.0816, "step": 589500 }, { "epoch": 2.23, "learning_rate": 1.2801700277537147e-05, "loss": 1.0801, "step": 590000 }, { "epoch": 2.23, "learning_rate": 1.2770176294721498e-05, "loss": 1.0791, "step": 590500 }, { "epoch": 2.24, "learning_rate": 1.2738652311905852e-05, "loss": 1.0807, "step": 591000 }, { "epoch": 2.24, "learning_rate": 1.2707128329090207e-05, "loss": 1.0823, "step": 591500 }, { "epoch": 2.24, "learning_rate": 1.2675604346274559e-05, "loss": 1.0822, "step": 592000 }, { "epoch": 2.24, "learning_rate": 1.2644080363458912e-05, "loss": 1.0845, "step": 592500 }, { "epoch": 2.24, "learning_rate": 1.2612556380643267e-05, "loss": 1.0854, "step": 593000 }, { "epoch": 2.25, "learning_rate": 1.2581032397827619e-05, "loss": 1.0809, "step": 593500 }, { "epoch": 2.25, "learning_rate": 1.2549508415011974e-05, "loss": 1.0927, "step": 594000 }, { "epoch": 2.25, "learning_rate": 1.2517984432196328e-05, "loss": 1.0874, "step": 594500 }, { "epoch": 2.25, "learning_rate": 1.2486460449380681e-05, "loss": 1.0818, "step": 595000 }, { "epoch": 2.25, "learning_rate": 1.2454936466565034e-05, "loss": 1.0845, "step": 595500 }, { "epoch": 2.25, "learning_rate": 1.2423412483749388e-05, "loss": 1.0699, "step": 596000 }, { "epoch": 2.26, "learning_rate": 1.2391888500933741e-05, "loss": 1.0745, "step": 596500 }, { "epoch": 2.26, "learning_rate": 1.2360364518118095e-05, "loss": 1.0893, "step": 597000 }, { "epoch": 2.26, "learning_rate": 1.2328840535302448e-05, "loss": 1.0815, "step": 597500 }, { "epoch": 2.26, "learning_rate": 1.2297316552486802e-05, "loss": 1.0809, "step": 598000 }, { "epoch": 2.26, "learning_rate": 1.2265792569671155e-05, "loss": 1.0717, "step": 598500 }, { "epoch": 2.27, "learning_rate": 1.2234268586855508e-05, "loss": 1.0795, "step": 599000 }, { "epoch": 2.27, "learning_rate": 1.2202744604039862e-05, "loss": 1.0765, "step": 599500 }, { "epoch": 2.27, "learning_rate": 1.2171220621224215e-05, "loss": 1.0786, "step": 600000 }, { "epoch": 2.27, "learning_rate": 1.2139696638408569e-05, "loss": 1.0821, "step": 600500 }, { "epoch": 2.27, "learning_rate": 1.2108172655592922e-05, "loss": 1.0718, "step": 601000 }, { "epoch": 2.28, "learning_rate": 1.2076648672777276e-05, "loss": 1.0853, "step": 601500 }, { "epoch": 2.28, "learning_rate": 1.204512468996163e-05, "loss": 1.0858, "step": 602000 }, { "epoch": 2.28, "learning_rate": 1.2013600707145983e-05, "loss": 1.0842, "step": 602500 }, { "epoch": 2.28, "learning_rate": 1.1982076724330336e-05, "loss": 1.0794, "step": 603000 }, { "epoch": 2.28, "learning_rate": 1.195055274151469e-05, "loss": 1.0829, "step": 603500 }, { "epoch": 2.28, "learning_rate": 1.1919028758699045e-05, "loss": 1.0785, "step": 604000 }, { "epoch": 2.29, "learning_rate": 1.1887504775883398e-05, "loss": 1.0776, "step": 604500 }, { "epoch": 2.29, "learning_rate": 1.185598079306775e-05, "loss": 1.0774, "step": 605000 }, { "epoch": 2.29, "learning_rate": 1.1824456810252105e-05, "loss": 1.077, "step": 605500 }, { "epoch": 2.29, "learning_rate": 1.1792932827436458e-05, "loss": 1.0753, "step": 606000 }, { "epoch": 2.29, "learning_rate": 1.1761408844620812e-05, "loss": 1.0839, "step": 606500 }, { "epoch": 2.3, "learning_rate": 1.1729884861805163e-05, "loss": 1.075, "step": 607000 }, { "epoch": 2.3, "learning_rate": 1.1698360878989519e-05, "loss": 1.0784, "step": 607500 }, { "epoch": 2.3, "learning_rate": 1.1666836896173872e-05, "loss": 1.0788, "step": 608000 }, { "epoch": 2.3, "learning_rate": 1.1635312913358225e-05, "loss": 1.0812, "step": 608500 }, { "epoch": 2.3, "learning_rate": 1.1603788930542579e-05, "loss": 1.0786, "step": 609000 }, { "epoch": 2.31, "learning_rate": 1.1572264947726932e-05, "loss": 1.0876, "step": 609500 }, { "epoch": 2.31, "learning_rate": 1.1540740964911286e-05, "loss": 1.0838, "step": 610000 }, { "epoch": 2.31, "learning_rate": 1.150921698209564e-05, "loss": 1.0773, "step": 610500 }, { "epoch": 2.31, "learning_rate": 1.1477692999279993e-05, "loss": 1.072, "step": 611000 }, { "epoch": 2.31, "learning_rate": 1.1446169016464346e-05, "loss": 1.0727, "step": 611500 }, { "epoch": 2.32, "learning_rate": 1.14146450336487e-05, "loss": 1.0839, "step": 612000 }, { "epoch": 2.32, "learning_rate": 1.1383121050833055e-05, "loss": 1.0851, "step": 612500 }, { "epoch": 2.32, "learning_rate": 1.1351597068017406e-05, "loss": 1.078, "step": 613000 }, { "epoch": 2.32, "learning_rate": 1.132007308520176e-05, "loss": 1.0738, "step": 613500 }, { "epoch": 2.32, "learning_rate": 1.1288549102386115e-05, "loss": 1.077, "step": 614000 }, { "epoch": 2.32, "learning_rate": 1.1257025119570468e-05, "loss": 1.0792, "step": 614500 }, { "epoch": 2.33, "learning_rate": 1.122550113675482e-05, "loss": 1.0807, "step": 615000 }, { "epoch": 2.33, "learning_rate": 1.1193977153939174e-05, "loss": 1.078, "step": 615500 }, { "epoch": 2.33, "learning_rate": 1.1162453171123529e-05, "loss": 1.0773, "step": 616000 }, { "epoch": 2.33, "learning_rate": 1.1130929188307882e-05, "loss": 1.0827, "step": 616500 }, { "epoch": 2.33, "learning_rate": 1.1099405205492234e-05, "loss": 1.0712, "step": 617000 }, { "epoch": 2.34, "learning_rate": 1.1067881222676589e-05, "loss": 1.0739, "step": 617500 }, { "epoch": 2.34, "learning_rate": 1.1036357239860942e-05, "loss": 1.0767, "step": 618000 }, { "epoch": 2.34, "learning_rate": 1.1004833257045296e-05, "loss": 1.0806, "step": 618500 }, { "epoch": 2.34, "learning_rate": 1.0973309274229648e-05, "loss": 1.078, "step": 619000 }, { "epoch": 2.34, "learning_rate": 1.0941785291414003e-05, "loss": 1.0696, "step": 619500 }, { "epoch": 2.35, "learning_rate": 1.0910261308598356e-05, "loss": 1.0727, "step": 620000 }, { "epoch": 2.35, "learning_rate": 1.087873732578271e-05, "loss": 1.0634, "step": 620500 }, { "epoch": 2.35, "learning_rate": 1.0847213342967063e-05, "loss": 1.0717, "step": 621000 }, { "epoch": 2.35, "learning_rate": 1.0815689360151416e-05, "loss": 1.0734, "step": 621500 }, { "epoch": 2.35, "learning_rate": 1.078416537733577e-05, "loss": 1.0742, "step": 622000 }, { "epoch": 2.35, "learning_rate": 1.0752641394520123e-05, "loss": 1.0776, "step": 622500 }, { "epoch": 2.36, "learning_rate": 1.0721117411704477e-05, "loss": 1.0619, "step": 623000 }, { "epoch": 2.36, "learning_rate": 1.068959342888883e-05, "loss": 1.072, "step": 623500 }, { "epoch": 2.36, "learning_rate": 1.0658069446073184e-05, "loss": 1.0676, "step": 624000 }, { "epoch": 2.36, "learning_rate": 1.0626545463257539e-05, "loss": 1.0761, "step": 624500 }, { "epoch": 2.36, "learning_rate": 1.059502148044189e-05, "loss": 1.0741, "step": 625000 }, { "epoch": 2.37, "learning_rate": 1.0563497497626244e-05, "loss": 1.0759, "step": 625500 }, { "epoch": 2.37, "learning_rate": 1.0531973514810597e-05, "loss": 1.0691, "step": 626000 }, { "epoch": 2.37, "learning_rate": 1.0500449531994953e-05, "loss": 1.0722, "step": 626500 }, { "epoch": 2.37, "learning_rate": 1.0468925549179304e-05, "loss": 1.0736, "step": 627000 }, { "epoch": 2.37, "learning_rate": 1.0437401566363658e-05, "loss": 1.0748, "step": 627500 }, { "epoch": 2.38, "learning_rate": 1.0405877583548013e-05, "loss": 1.0707, "step": 628000 }, { "epoch": 2.38, "learning_rate": 1.0374353600732366e-05, "loss": 1.0712, "step": 628500 }, { "epoch": 2.38, "learning_rate": 1.034282961791672e-05, "loss": 1.0782, "step": 629000 }, { "epoch": 2.38, "learning_rate": 1.0311305635101071e-05, "loss": 1.0667, "step": 629500 }, { "epoch": 2.38, "learning_rate": 1.0279781652285427e-05, "loss": 1.0692, "step": 630000 }, { "epoch": 2.39, "learning_rate": 1.024825766946978e-05, "loss": 1.0671, "step": 630500 }, { "epoch": 2.39, "learning_rate": 1.0216733686654133e-05, "loss": 1.0794, "step": 631000 }, { "epoch": 2.39, "learning_rate": 1.0185209703838487e-05, "loss": 1.069, "step": 631500 }, { "epoch": 2.39, "learning_rate": 1.015368572102284e-05, "loss": 1.0762, "step": 632000 }, { "epoch": 2.39, "learning_rate": 1.0122161738207194e-05, "loss": 1.0761, "step": 632500 }, { "epoch": 2.39, "learning_rate": 1.0090637755391547e-05, "loss": 1.0722, "step": 633000 }, { "epoch": 2.4, "learning_rate": 1.00591137725759e-05, "loss": 1.0746, "step": 633500 }, { "epoch": 2.4, "learning_rate": 1.0027589789760254e-05, "loss": 1.0727, "step": 634000 }, { "epoch": 2.4, "learning_rate": 9.996065806944607e-06, "loss": 1.0706, "step": 634500 }, { "epoch": 2.4, "learning_rate": 9.964541824128961e-06, "loss": 1.0719, "step": 635000 }, { "epoch": 2.4, "learning_rate": 9.933017841313314e-06, "loss": 1.0761, "step": 635500 }, { "epoch": 2.41, "learning_rate": 9.901493858497668e-06, "loss": 1.0737, "step": 636000 }, { "epoch": 2.41, "learning_rate": 9.869969875682023e-06, "loss": 1.0711, "step": 636500 }, { "epoch": 2.41, "learning_rate": 9.838445892866376e-06, "loss": 1.075, "step": 637000 }, { "epoch": 2.41, "learning_rate": 9.806921910050728e-06, "loss": 1.072, "step": 637500 }, { "epoch": 2.41, "learning_rate": 9.775397927235082e-06, "loss": 1.0734, "step": 638000 }, { "epoch": 2.42, "learning_rate": 9.743873944419437e-06, "loss": 1.0737, "step": 638500 }, { "epoch": 2.42, "learning_rate": 9.71234996160379e-06, "loss": 1.0697, "step": 639000 }, { "epoch": 2.42, "learning_rate": 9.680825978788142e-06, "loss": 1.0767, "step": 639500 }, { "epoch": 2.42, "learning_rate": 9.649301995972497e-06, "loss": 1.0688, "step": 640000 }, { "epoch": 2.42, "learning_rate": 9.61777801315685e-06, "loss": 1.0666, "step": 640500 }, { "epoch": 2.42, "learning_rate": 9.586254030341204e-06, "loss": 1.0604, "step": 641000 }, { "epoch": 2.43, "learning_rate": 9.554730047525556e-06, "loss": 1.0782, "step": 641500 }, { "epoch": 2.43, "learning_rate": 9.52320606470991e-06, "loss": 1.0756, "step": 642000 }, { "epoch": 2.43, "learning_rate": 9.491682081894264e-06, "loss": 1.0665, "step": 642500 }, { "epoch": 2.43, "learning_rate": 9.460158099078618e-06, "loss": 1.0649, "step": 643000 }, { "epoch": 2.43, "learning_rate": 9.428634116262971e-06, "loss": 1.068, "step": 643500 }, { "epoch": 2.44, "learning_rate": 9.397110133447324e-06, "loss": 1.0761, "step": 644000 }, { "epoch": 2.44, "learning_rate": 9.365586150631678e-06, "loss": 1.0633, "step": 644500 }, { "epoch": 2.44, "learning_rate": 9.334062167816031e-06, "loss": 1.0723, "step": 645000 }, { "epoch": 2.44, "learning_rate": 9.302538185000385e-06, "loss": 1.064, "step": 645500 }, { "epoch": 2.44, "learning_rate": 9.271014202184738e-06, "loss": 1.0751, "step": 646000 }, { "epoch": 2.45, "learning_rate": 9.239490219369092e-06, "loss": 1.0682, "step": 646500 }, { "epoch": 2.45, "learning_rate": 9.207966236553447e-06, "loss": 1.0813, "step": 647000 }, { "epoch": 2.45, "learning_rate": 9.176442253737799e-06, "loss": 1.073, "step": 647500 }, { "epoch": 2.45, "learning_rate": 9.144918270922152e-06, "loss": 1.0689, "step": 648000 }, { "epoch": 2.45, "learning_rate": 9.113394288106505e-06, "loss": 1.0619, "step": 648500 }, { "epoch": 2.46, "learning_rate": 9.08187030529086e-06, "loss": 1.0722, "step": 649000 }, { "epoch": 2.46, "learning_rate": 9.050346322475212e-06, "loss": 1.065, "step": 649500 }, { "epoch": 2.46, "learning_rate": 9.018822339659566e-06, "loss": 1.0622, "step": 650000 }, { "epoch": 2.46, "learning_rate": 8.98729835684392e-06, "loss": 1.0621, "step": 650500 }, { "epoch": 2.46, "learning_rate": 8.955774374028274e-06, "loss": 1.0701, "step": 651000 }, { "epoch": 2.46, "learning_rate": 8.924250391212628e-06, "loss": 1.0673, "step": 651500 }, { "epoch": 2.47, "learning_rate": 8.892726408396981e-06, "loss": 1.063, "step": 652000 }, { "epoch": 2.47, "learning_rate": 8.861202425581335e-06, "loss": 1.0746, "step": 652500 }, { "epoch": 2.47, "learning_rate": 8.829678442765688e-06, "loss": 1.0715, "step": 653000 }, { "epoch": 2.47, "learning_rate": 8.798154459950041e-06, "loss": 1.064, "step": 653500 }, { "epoch": 2.47, "learning_rate": 8.766630477134395e-06, "loss": 1.0637, "step": 654000 }, { "epoch": 2.48, "learning_rate": 8.735106494318748e-06, "loss": 1.0663, "step": 654500 }, { "epoch": 2.48, "learning_rate": 8.703582511503102e-06, "loss": 1.0693, "step": 655000 }, { "epoch": 2.48, "learning_rate": 8.672058528687455e-06, "loss": 1.0673, "step": 655500 }, { "epoch": 2.48, "learning_rate": 8.640534545871809e-06, "loss": 1.0606, "step": 656000 }, { "epoch": 2.48, "learning_rate": 8.609010563056162e-06, "loss": 1.0609, "step": 656500 }, { "epoch": 2.49, "learning_rate": 8.577486580240515e-06, "loss": 1.0726, "step": 657000 }, { "epoch": 2.49, "learning_rate": 8.545962597424869e-06, "loss": 1.0634, "step": 657500 }, { "epoch": 2.49, "learning_rate": 8.514438614609222e-06, "loss": 1.0589, "step": 658000 }, { "epoch": 2.49, "learning_rate": 8.482914631793576e-06, "loss": 1.0693, "step": 658500 }, { "epoch": 2.49, "learning_rate": 8.451390648977931e-06, "loss": 1.0608, "step": 659000 }, { "epoch": 2.49, "learning_rate": 8.419866666162284e-06, "loss": 1.07, "step": 659500 }, { "epoch": 2.5, "learning_rate": 8.388342683346636e-06, "loss": 1.0728, "step": 660000 }, { "epoch": 2.5, "learning_rate": 8.35681870053099e-06, "loss": 1.0619, "step": 660500 }, { "epoch": 2.5, "learning_rate": 8.325294717715345e-06, "loss": 1.0645, "step": 661000 }, { "epoch": 2.5, "learning_rate": 8.293770734899698e-06, "loss": 1.0606, "step": 661500 }, { "epoch": 2.5, "learning_rate": 8.26224675208405e-06, "loss": 1.0631, "step": 662000 }, { "epoch": 2.51, "learning_rate": 8.230722769268405e-06, "loss": 1.0733, "step": 662500 }, { "epoch": 2.51, "learning_rate": 8.199198786452758e-06, "loss": 1.064, "step": 663000 }, { "epoch": 2.51, "learning_rate": 8.167674803637112e-06, "loss": 1.0628, "step": 663500 }, { "epoch": 2.51, "learning_rate": 8.136150820821464e-06, "loss": 1.0653, "step": 664000 }, { "epoch": 2.51, "learning_rate": 8.104626838005819e-06, "loss": 1.0561, "step": 664500 }, { "epoch": 2.52, "learning_rate": 8.073102855190172e-06, "loss": 1.0631, "step": 665000 }, { "epoch": 2.52, "learning_rate": 8.041578872374526e-06, "loss": 1.0613, "step": 665500 }, { "epoch": 2.52, "learning_rate": 8.010054889558879e-06, "loss": 1.0615, "step": 666000 }, { "epoch": 2.52, "learning_rate": 7.978530906743232e-06, "loss": 1.0692, "step": 666500 }, { "epoch": 2.52, "learning_rate": 7.947006923927586e-06, "loss": 1.0685, "step": 667000 }, { "epoch": 2.53, "learning_rate": 7.91548294111194e-06, "loss": 1.0625, "step": 667500 }, { "epoch": 2.53, "learning_rate": 7.883958958296293e-06, "loss": 1.0702, "step": 668000 }, { "epoch": 2.53, "learning_rate": 7.852434975480646e-06, "loss": 1.0648, "step": 668500 }, { "epoch": 2.53, "learning_rate": 7.820910992665e-06, "loss": 1.0631, "step": 669000 }, { "epoch": 2.53, "learning_rate": 7.789387009849355e-06, "loss": 1.0653, "step": 669500 }, { "epoch": 2.53, "learning_rate": 7.757863027033707e-06, "loss": 1.0647, "step": 670000 }, { "epoch": 2.54, "learning_rate": 7.72633904421806e-06, "loss": 1.0595, "step": 670500 }, { "epoch": 2.54, "learning_rate": 7.694815061402415e-06, "loss": 1.0665, "step": 671000 }, { "epoch": 2.54, "learning_rate": 7.663291078586769e-06, "loss": 1.0611, "step": 671500 }, { "epoch": 2.54, "learning_rate": 7.63176709577112e-06, "loss": 1.0597, "step": 672000 }, { "epoch": 2.54, "learning_rate": 7.600243112955474e-06, "loss": 1.0616, "step": 672500 }, { "epoch": 2.55, "learning_rate": 7.568719130139829e-06, "loss": 1.0685, "step": 673000 }, { "epoch": 2.55, "learning_rate": 7.537195147324181e-06, "loss": 1.0672, "step": 673500 }, { "epoch": 2.55, "learning_rate": 7.505671164508535e-06, "loss": 1.0586, "step": 674000 }, { "epoch": 2.55, "learning_rate": 7.474147181692889e-06, "loss": 1.063, "step": 674500 }, { "epoch": 2.55, "learning_rate": 7.4426231988772426e-06, "loss": 1.0588, "step": 675000 }, { "epoch": 2.56, "learning_rate": 7.411099216061595e-06, "loss": 1.0593, "step": 675500 }, { "epoch": 2.56, "learning_rate": 7.379575233245949e-06, "loss": 1.0564, "step": 676000 }, { "epoch": 2.56, "learning_rate": 7.348051250430303e-06, "loss": 1.065, "step": 676500 }, { "epoch": 2.56, "learning_rate": 7.316527267614656e-06, "loss": 1.0657, "step": 677000 }, { "epoch": 2.56, "learning_rate": 7.28500328479901e-06, "loss": 1.0555, "step": 677500 }, { "epoch": 2.56, "learning_rate": 7.253479301983364e-06, "loss": 1.062, "step": 678000 }, { "epoch": 2.57, "learning_rate": 7.221955319167717e-06, "loss": 1.0612, "step": 678500 }, { "epoch": 2.57, "learning_rate": 7.19043133635207e-06, "loss": 1.0648, "step": 679000 }, { "epoch": 2.57, "learning_rate": 7.1589073535364235e-06, "loss": 1.0595, "step": 679500 }, { "epoch": 2.57, "learning_rate": 7.127383370720778e-06, "loss": 1.0601, "step": 680000 }, { "epoch": 2.57, "learning_rate": 7.09585938790513e-06, "loss": 1.0595, "step": 680500 }, { "epoch": 2.58, "learning_rate": 7.064335405089484e-06, "loss": 1.0532, "step": 681000 }, { "epoch": 2.58, "learning_rate": 7.032811422273838e-06, "loss": 1.0577, "step": 681500 }, { "epoch": 2.58, "learning_rate": 7.0012874394581915e-06, "loss": 1.0652, "step": 682000 }, { "epoch": 2.58, "learning_rate": 6.969763456642545e-06, "loss": 1.0537, "step": 682500 }, { "epoch": 2.58, "learning_rate": 6.9382394738268975e-06, "loss": 1.0668, "step": 683000 }, { "epoch": 2.59, "learning_rate": 6.906715491011252e-06, "loss": 1.07, "step": 683500 }, { "epoch": 2.59, "learning_rate": 6.875191508195605e-06, "loss": 1.0588, "step": 684000 }, { "epoch": 2.59, "learning_rate": 6.843667525379959e-06, "loss": 1.0543, "step": 684500 }, { "epoch": 2.59, "learning_rate": 6.812143542564313e-06, "loss": 1.0557, "step": 685000 }, { "epoch": 2.59, "learning_rate": 6.780619559748666e-06, "loss": 1.0475, "step": 685500 }, { "epoch": 2.6, "learning_rate": 6.749095576933019e-06, "loss": 1.0608, "step": 686000 }, { "epoch": 2.6, "learning_rate": 6.7175715941173724e-06, "loss": 1.0565, "step": 686500 }, { "epoch": 2.6, "learning_rate": 6.686047611301727e-06, "loss": 1.0513, "step": 687000 }, { "epoch": 2.6, "learning_rate": 6.65452362848608e-06, "loss": 1.0601, "step": 687500 }, { "epoch": 2.6, "learning_rate": 6.622999645670433e-06, "loss": 1.0524, "step": 688000 }, { "epoch": 2.6, "learning_rate": 6.591475662854787e-06, "loss": 1.0611, "step": 688500 }, { "epoch": 2.61, "learning_rate": 6.5599516800391405e-06, "loss": 1.0514, "step": 689000 }, { "epoch": 2.61, "learning_rate": 6.528427697223494e-06, "loss": 1.0609, "step": 689500 }, { "epoch": 2.61, "learning_rate": 6.496903714407848e-06, "loss": 1.0616, "step": 690000 }, { "epoch": 2.61, "learning_rate": 6.465379731592202e-06, "loss": 1.0495, "step": 690500 }, { "epoch": 2.61, "learning_rate": 6.433855748776554e-06, "loss": 1.0681, "step": 691000 }, { "epoch": 2.62, "learning_rate": 6.402331765960908e-06, "loss": 1.0629, "step": 691500 }, { "epoch": 2.62, "learning_rate": 6.370807783145262e-06, "loss": 1.0577, "step": 692000 }, { "epoch": 2.62, "learning_rate": 6.339283800329615e-06, "loss": 1.0577, "step": 692500 }, { "epoch": 2.62, "learning_rate": 6.307759817513968e-06, "loss": 1.062, "step": 693000 }, { "epoch": 2.62, "learning_rate": 6.276235834698323e-06, "loss": 1.0612, "step": 693500 }, { "epoch": 2.63, "learning_rate": 6.244711851882676e-06, "loss": 1.0552, "step": 694000 }, { "epoch": 2.63, "learning_rate": 6.213187869067029e-06, "loss": 1.0621, "step": 694500 }, { "epoch": 2.63, "learning_rate": 6.1816638862513826e-06, "loss": 1.0647, "step": 695000 }, { "epoch": 2.63, "learning_rate": 6.150139903435736e-06, "loss": 1.0584, "step": 695500 }, { "epoch": 2.63, "learning_rate": 6.1186159206200894e-06, "loss": 1.0503, "step": 696000 }, { "epoch": 2.63, "learning_rate": 6.087091937804444e-06, "loss": 1.057, "step": 696500 }, { "epoch": 2.64, "learning_rate": 6.055567954988796e-06, "loss": 1.0624, "step": 697000 }, { "epoch": 2.64, "learning_rate": 6.024043972173151e-06, "loss": 1.0536, "step": 697500 }, { "epoch": 2.64, "learning_rate": 5.992519989357503e-06, "loss": 1.0544, "step": 698000 }, { "epoch": 2.64, "learning_rate": 5.9609960065418575e-06, "loss": 1.046, "step": 698500 }, { "epoch": 2.64, "learning_rate": 5.92947202372621e-06, "loss": 1.06, "step": 699000 }, { "epoch": 2.65, "learning_rate": 5.897948040910564e-06, "loss": 1.0442, "step": 699500 }, { "epoch": 2.65, "learning_rate": 5.866424058094918e-06, "loss": 1.0605, "step": 700000 }, { "epoch": 2.65, "learning_rate": 5.834900075279271e-06, "loss": 1.0518, "step": 700500 }, { "epoch": 2.65, "learning_rate": 5.803376092463625e-06, "loss": 1.0561, "step": 701000 }, { "epoch": 2.65, "learning_rate": 5.771852109647978e-06, "loss": 1.0522, "step": 701500 }, { "epoch": 2.66, "learning_rate": 5.7403281268323315e-06, "loss": 1.0593, "step": 702000 }, { "epoch": 2.66, "learning_rate": 5.708804144016686e-06, "loss": 1.055, "step": 702500 }, { "epoch": 2.66, "learning_rate": 5.677280161201038e-06, "loss": 1.0623, "step": 703000 }, { "epoch": 2.66, "learning_rate": 5.645756178385393e-06, "loss": 1.0621, "step": 703500 }, { "epoch": 2.66, "learning_rate": 5.614232195569745e-06, "loss": 1.0551, "step": 704000 }, { "epoch": 2.67, "learning_rate": 5.5827082127540995e-06, "loss": 1.0457, "step": 704500 }, { "epoch": 2.67, "learning_rate": 5.551184229938453e-06, "loss": 1.0516, "step": 705000 }, { "epoch": 2.67, "learning_rate": 5.519660247122806e-06, "loss": 1.0605, "step": 705500 }, { "epoch": 2.67, "learning_rate": 5.48813626430716e-06, "loss": 1.0568, "step": 706000 }, { "epoch": 2.67, "learning_rate": 5.456612281491513e-06, "loss": 1.0498, "step": 706500 }, { "epoch": 2.67, "learning_rate": 5.425088298675867e-06, "loss": 1.0547, "step": 707000 }, { "epoch": 2.68, "learning_rate": 5.39356431586022e-06, "loss": 1.0549, "step": 707500 }, { "epoch": 2.68, "learning_rate": 5.362040333044574e-06, "loss": 1.0595, "step": 708000 }, { "epoch": 2.68, "learning_rate": 5.330516350228927e-06, "loss": 1.0626, "step": 708500 }, { "epoch": 2.68, "learning_rate": 5.298992367413281e-06, "loss": 1.0501, "step": 709000 }, { "epoch": 2.68, "learning_rate": 5.267468384597635e-06, "loss": 1.0593, "step": 709500 }, { "epoch": 2.69, "learning_rate": 5.235944401781988e-06, "loss": 1.0556, "step": 710000 }, { "epoch": 2.69, "learning_rate": 5.204420418966342e-06, "loss": 1.0623, "step": 710500 }, { "epoch": 2.69, "learning_rate": 5.172896436150695e-06, "loss": 1.0494, "step": 711000 }, { "epoch": 2.69, "learning_rate": 5.1413724533350485e-06, "loss": 1.0515, "step": 711500 }, { "epoch": 2.69, "learning_rate": 5.109848470519402e-06, "loss": 1.0591, "step": 712000 }, { "epoch": 2.7, "learning_rate": 5.078324487703755e-06, "loss": 1.0514, "step": 712500 }, { "epoch": 2.7, "learning_rate": 5.04680050488811e-06, "loss": 1.0553, "step": 713000 }, { "epoch": 2.7, "learning_rate": 5.015276522072462e-06, "loss": 1.0578, "step": 713500 }, { "epoch": 2.7, "learning_rate": 4.9837525392568165e-06, "loss": 1.0506, "step": 714000 }, { "epoch": 2.7, "learning_rate": 4.952228556441169e-06, "loss": 1.0539, "step": 714500 }, { "epoch": 2.7, "learning_rate": 4.920704573625523e-06, "loss": 1.0544, "step": 715000 }, { "epoch": 2.71, "learning_rate": 4.889180590809877e-06, "loss": 1.0539, "step": 715500 }, { "epoch": 2.71, "learning_rate": 4.85765660799423e-06, "loss": 1.0479, "step": 716000 }, { "epoch": 2.71, "learning_rate": 4.826132625178584e-06, "loss": 1.0552, "step": 716500 }, { "epoch": 2.71, "learning_rate": 4.794608642362937e-06, "loss": 1.0532, "step": 717000 }, { "epoch": 2.71, "learning_rate": 4.7630846595472906e-06, "loss": 1.0511, "step": 717500 }, { "epoch": 2.72, "learning_rate": 4.731560676731644e-06, "loss": 1.0522, "step": 718000 }, { "epoch": 2.72, "learning_rate": 4.7000366939159974e-06, "loss": 1.0478, "step": 718500 }, { "epoch": 2.72, "learning_rate": 4.668512711100352e-06, "loss": 1.0505, "step": 719000 }, { "epoch": 2.72, "learning_rate": 4.636988728284704e-06, "loss": 1.0561, "step": 719500 }, { "epoch": 2.72, "learning_rate": 4.605464745469059e-06, "loss": 1.0526, "step": 720000 }, { "epoch": 2.73, "learning_rate": 4.573940762653411e-06, "loss": 1.0546, "step": 720500 }, { "epoch": 2.73, "learning_rate": 4.5424167798377655e-06, "loss": 1.057, "step": 721000 }, { "epoch": 2.73, "learning_rate": 4.510892797022119e-06, "loss": 1.0569, "step": 721500 }, { "epoch": 2.73, "learning_rate": 4.479368814206472e-06, "loss": 1.0517, "step": 722000 }, { "epoch": 2.73, "learning_rate": 4.447844831390826e-06, "loss": 1.0565, "step": 722500 }, { "epoch": 2.74, "learning_rate": 4.416320848575179e-06, "loss": 1.0603, "step": 723000 }, { "epoch": 2.74, "learning_rate": 4.384796865759533e-06, "loss": 1.0549, "step": 723500 }, { "epoch": 2.74, "learning_rate": 4.353272882943886e-06, "loss": 1.0549, "step": 724000 }, { "epoch": 2.74, "learning_rate": 4.3217489001282395e-06, "loss": 1.0482, "step": 724500 }, { "epoch": 2.74, "learning_rate": 4.290224917312594e-06, "loss": 1.0547, "step": 725000 }, { "epoch": 2.74, "learning_rate": 4.258700934496946e-06, "loss": 1.0559, "step": 725500 }, { "epoch": 2.75, "learning_rate": 4.227176951681301e-06, "loss": 1.0486, "step": 726000 }, { "epoch": 2.75, "learning_rate": 4.195652968865653e-06, "loss": 1.0547, "step": 726500 }, { "epoch": 2.75, "learning_rate": 4.1641289860500075e-06, "loss": 1.0576, "step": 727000 }, { "epoch": 2.75, "learning_rate": 4.13260500323436e-06, "loss": 1.0508, "step": 727500 }, { "epoch": 2.75, "learning_rate": 4.101081020418714e-06, "loss": 1.055, "step": 728000 }, { "epoch": 2.76, "learning_rate": 4.069557037603068e-06, "loss": 1.053, "step": 728500 }, { "epoch": 2.76, "learning_rate": 4.038033054787421e-06, "loss": 1.0527, "step": 729000 }, { "epoch": 2.76, "learning_rate": 4.006509071971775e-06, "loss": 1.0536, "step": 729500 }, { "epoch": 2.76, "learning_rate": 3.974985089156128e-06, "loss": 1.0515, "step": 730000 }, { "epoch": 2.76, "learning_rate": 3.943461106340482e-06, "loss": 1.0488, "step": 730500 }, { "epoch": 2.77, "learning_rate": 3.911937123524836e-06, "loss": 1.0453, "step": 731000 }, { "epoch": 2.77, "learning_rate": 3.8804131407091885e-06, "loss": 1.0514, "step": 731500 }, { "epoch": 2.77, "learning_rate": 3.848889157893543e-06, "loss": 1.0507, "step": 732000 }, { "epoch": 2.77, "learning_rate": 3.817365175077896e-06, "loss": 1.0403, "step": 732500 }, { "epoch": 2.77, "learning_rate": 3.7858411922622496e-06, "loss": 1.0421, "step": 733000 }, { "epoch": 2.77, "learning_rate": 3.7543172094466026e-06, "loss": 1.0459, "step": 733500 }, { "epoch": 2.78, "learning_rate": 3.7227932266309565e-06, "loss": 1.0415, "step": 734000 }, { "epoch": 2.78, "learning_rate": 3.6912692438153103e-06, "loss": 1.057, "step": 734500 }, { "epoch": 2.78, "learning_rate": 3.6597452609996634e-06, "loss": 1.0531, "step": 735000 }, { "epoch": 2.78, "learning_rate": 3.6282212781840172e-06, "loss": 1.0478, "step": 735500 }, { "epoch": 2.78, "learning_rate": 3.5966972953683702e-06, "loss": 1.0532, "step": 736000 }, { "epoch": 2.79, "learning_rate": 3.565173312552724e-06, "loss": 1.0481, "step": 736500 }, { "epoch": 2.79, "learning_rate": 3.533649329737077e-06, "loss": 1.043, "step": 737000 }, { "epoch": 2.79, "learning_rate": 3.502125346921431e-06, "loss": 1.0521, "step": 737500 }, { "epoch": 2.79, "learning_rate": 3.470601364105785e-06, "loss": 1.0489, "step": 738000 }, { "epoch": 2.79, "learning_rate": 3.439077381290138e-06, "loss": 1.0536, "step": 738500 }, { "epoch": 2.8, "learning_rate": 3.4075533984744917e-06, "loss": 1.0472, "step": 739000 }, { "epoch": 2.8, "learning_rate": 3.3760294156588447e-06, "loss": 1.048, "step": 739500 }, { "epoch": 2.8, "learning_rate": 3.3445054328431986e-06, "loss": 1.0474, "step": 740000 }, { "epoch": 2.8, "learning_rate": 3.3129814500275524e-06, "loss": 1.042, "step": 740500 }, { "epoch": 2.8, "learning_rate": 3.2814574672119054e-06, "loss": 1.055, "step": 741000 }, { "epoch": 2.81, "learning_rate": 3.2499334843962593e-06, "loss": 1.048, "step": 741500 }, { "epoch": 2.81, "learning_rate": 3.2184095015806123e-06, "loss": 1.0434, "step": 742000 }, { "epoch": 2.81, "learning_rate": 3.186885518764966e-06, "loss": 1.0595, "step": 742500 }, { "epoch": 2.81, "learning_rate": 3.155361535949319e-06, "loss": 1.0464, "step": 743000 }, { "epoch": 2.81, "learning_rate": 3.123837553133673e-06, "loss": 1.0456, "step": 743500 }, { "epoch": 2.81, "learning_rate": 3.0923135703180265e-06, "loss": 1.0464, "step": 744000 }, { "epoch": 2.82, "learning_rate": 3.06078958750238e-06, "loss": 1.0556, "step": 744500 }, { "epoch": 2.82, "learning_rate": 3.0292656046867333e-06, "loss": 1.0398, "step": 745000 }, { "epoch": 2.82, "learning_rate": 2.997741621871087e-06, "loss": 1.0468, "step": 745500 }, { "epoch": 2.82, "learning_rate": 2.9662176390554406e-06, "loss": 1.0475, "step": 746000 }, { "epoch": 2.82, "learning_rate": 2.934693656239794e-06, "loss": 1.0468, "step": 746500 }, { "epoch": 2.83, "learning_rate": 2.9031696734241475e-06, "loss": 1.0412, "step": 747000 }, { "epoch": 2.83, "learning_rate": 2.8716456906085014e-06, "loss": 1.041, "step": 747500 }, { "epoch": 2.83, "learning_rate": 2.840121707792855e-06, "loss": 1.0431, "step": 748000 }, { "epoch": 2.83, "learning_rate": 2.8085977249772082e-06, "loss": 1.047, "step": 748500 }, { "epoch": 2.83, "learning_rate": 2.777073742161562e-06, "loss": 1.055, "step": 749000 }, { "epoch": 2.84, "learning_rate": 2.7455497593459155e-06, "loss": 1.0456, "step": 749500 }, { "epoch": 2.84, "learning_rate": 2.714025776530269e-06, "loss": 1.0545, "step": 750000 }, { "epoch": 2.84, "learning_rate": 2.6825017937146224e-06, "loss": 1.0522, "step": 750500 }, { "epoch": 2.84, "learning_rate": 2.650977810898976e-06, "loss": 1.0421, "step": 751000 }, { "epoch": 2.84, "learning_rate": 2.6194538280833297e-06, "loss": 1.0401, "step": 751500 }, { "epoch": 2.84, "learning_rate": 2.587929845267683e-06, "loss": 1.0476, "step": 752000 }, { "epoch": 2.85, "learning_rate": 2.5564058624520366e-06, "loss": 1.0394, "step": 752500 }, { "epoch": 2.85, "learning_rate": 2.52488187963639e-06, "loss": 1.0383, "step": 753000 }, { "epoch": 2.85, "learning_rate": 2.4933578968207435e-06, "loss": 1.0448, "step": 753500 }, { "epoch": 2.85, "learning_rate": 2.461833914005097e-06, "loss": 1.056, "step": 754000 }, { "epoch": 2.85, "learning_rate": 2.4303099311894503e-06, "loss": 1.0415, "step": 754500 }, { "epoch": 2.86, "learning_rate": 2.398785948373804e-06, "loss": 1.0383, "step": 755000 }, { "epoch": 2.86, "learning_rate": 2.3672619655581576e-06, "loss": 1.0481, "step": 755500 }, { "epoch": 2.86, "learning_rate": 2.335737982742511e-06, "loss": 1.0483, "step": 756000 }, { "epoch": 2.86, "learning_rate": 2.3042139999268645e-06, "loss": 1.0469, "step": 756500 }, { "epoch": 2.86, "learning_rate": 2.272690017111218e-06, "loss": 1.0493, "step": 757000 }, { "epoch": 2.87, "learning_rate": 2.2411660342955714e-06, "loss": 1.0497, "step": 757500 }, { "epoch": 2.87, "learning_rate": 2.2096420514799252e-06, "loss": 1.0426, "step": 758000 }, { "epoch": 2.87, "learning_rate": 2.1781180686642787e-06, "loss": 1.0399, "step": 758500 }, { "epoch": 2.87, "learning_rate": 2.146594085848632e-06, "loss": 1.0505, "step": 759000 }, { "epoch": 2.87, "learning_rate": 2.1150701030329855e-06, "loss": 1.043, "step": 759500 }, { "epoch": 2.87, "learning_rate": 2.083546120217339e-06, "loss": 1.0407, "step": 760000 }, { "epoch": 2.88, "learning_rate": 2.0520221374016924e-06, "loss": 1.0512, "step": 760500 }, { "epoch": 2.88, "learning_rate": 2.0204981545860463e-06, "loss": 1.0439, "step": 761000 }, { "epoch": 2.88, "learning_rate": 1.9889741717703997e-06, "loss": 1.0465, "step": 761500 }, { "epoch": 2.88, "learning_rate": 1.957450188954753e-06, "loss": 1.0413, "step": 762000 }, { "epoch": 2.88, "learning_rate": 1.9259262061391066e-06, "loss": 1.0398, "step": 762500 }, { "epoch": 2.89, "learning_rate": 1.89440222332346e-06, "loss": 1.0443, "step": 763000 }, { "epoch": 2.89, "learning_rate": 1.8628782405078134e-06, "loss": 1.0505, "step": 763500 }, { "epoch": 2.89, "learning_rate": 1.8313542576921669e-06, "loss": 1.0475, "step": 764000 }, { "epoch": 2.89, "learning_rate": 1.7998302748765207e-06, "loss": 1.0464, "step": 764500 }, { "epoch": 2.89, "learning_rate": 1.7683062920608742e-06, "loss": 1.0379, "step": 765000 }, { "epoch": 2.9, "learning_rate": 1.7367823092452276e-06, "loss": 1.0433, "step": 765500 }, { "epoch": 2.9, "learning_rate": 1.705258326429581e-06, "loss": 1.0483, "step": 766000 }, { "epoch": 2.9, "learning_rate": 1.6737343436139347e-06, "loss": 1.0492, "step": 766500 }, { "epoch": 2.9, "learning_rate": 1.6422103607982881e-06, "loss": 1.0481, "step": 767000 }, { "epoch": 2.9, "learning_rate": 1.6106863779826418e-06, "loss": 1.0435, "step": 767500 }, { "epoch": 2.91, "learning_rate": 1.5791623951669952e-06, "loss": 1.0428, "step": 768000 }, { "epoch": 2.91, "learning_rate": 1.5476384123513489e-06, "loss": 1.0431, "step": 768500 }, { "epoch": 2.91, "learning_rate": 1.5161144295357023e-06, "loss": 1.0402, "step": 769000 }, { "epoch": 2.91, "learning_rate": 1.4845904467200557e-06, "loss": 1.0375, "step": 769500 }, { "epoch": 2.91, "learning_rate": 1.4530664639044094e-06, "loss": 1.039, "step": 770000 }, { "epoch": 2.91, "learning_rate": 1.4215424810887628e-06, "loss": 1.0465, "step": 770500 }, { "epoch": 2.92, "learning_rate": 1.3900184982731163e-06, "loss": 1.0445, "step": 771000 }, { "epoch": 2.92, "learning_rate": 1.35849451545747e-06, "loss": 1.0495, "step": 771500 }, { "epoch": 2.92, "learning_rate": 1.3269705326418233e-06, "loss": 1.0417, "step": 772000 }, { "epoch": 2.92, "learning_rate": 1.2954465498261768e-06, "loss": 1.0418, "step": 772500 }, { "epoch": 2.92, "learning_rate": 1.2639225670105304e-06, "loss": 1.0442, "step": 773000 }, { "epoch": 2.93, "learning_rate": 1.2323985841948839e-06, "loss": 1.0474, "step": 773500 }, { "epoch": 2.93, "learning_rate": 1.2008746013792373e-06, "loss": 1.0429, "step": 774000 }, { "epoch": 2.93, "learning_rate": 1.169350618563591e-06, "loss": 1.0394, "step": 774500 }, { "epoch": 2.93, "learning_rate": 1.1378266357479444e-06, "loss": 1.0453, "step": 775000 }, { "epoch": 2.93, "learning_rate": 1.1063026529322978e-06, "loss": 1.0387, "step": 775500 }, { "epoch": 2.94, "learning_rate": 1.0747786701166515e-06, "loss": 1.0438, "step": 776000 }, { "epoch": 2.94, "learning_rate": 1.0432546873010049e-06, "loss": 1.0435, "step": 776500 }, { "epoch": 2.94, "learning_rate": 1.0117307044853583e-06, "loss": 1.046, "step": 777000 }, { "epoch": 2.94, "learning_rate": 9.80206721669712e-07, "loss": 1.0365, "step": 777500 }, { "epoch": 2.94, "learning_rate": 9.486827388540655e-07, "loss": 1.0443, "step": 778000 }, { "epoch": 2.94, "learning_rate": 9.17158756038419e-07, "loss": 1.0454, "step": 778500 }, { "epoch": 2.95, "learning_rate": 8.856347732227724e-07, "loss": 1.0427, "step": 779000 }, { "epoch": 2.95, "learning_rate": 8.54110790407126e-07, "loss": 1.0434, "step": 779500 }, { "epoch": 2.95, "learning_rate": 8.225868075914795e-07, "loss": 1.0393, "step": 780000 }, { "epoch": 2.95, "learning_rate": 7.910628247758329e-07, "loss": 1.0326, "step": 780500 }, { "epoch": 2.95, "learning_rate": 7.595388419601865e-07, "loss": 1.0494, "step": 781000 }, { "epoch": 2.96, "learning_rate": 7.2801485914454e-07, "loss": 1.0307, "step": 781500 }, { "epoch": 2.96, "learning_rate": 6.964908763288935e-07, "loss": 1.041, "step": 782000 }, { "epoch": 2.96, "learning_rate": 6.64966893513247e-07, "loss": 1.0451, "step": 782500 }, { "epoch": 2.96, "learning_rate": 6.334429106976005e-07, "loss": 1.0507, "step": 783000 }, { "epoch": 2.96, "learning_rate": 6.019189278819541e-07, "loss": 1.0379, "step": 783500 }, { "epoch": 2.97, "learning_rate": 5.703949450663076e-07, "loss": 1.0445, "step": 784000 }, { "epoch": 2.97, "learning_rate": 5.388709622506611e-07, "loss": 1.0423, "step": 784500 }, { "epoch": 2.97, "learning_rate": 5.073469794350146e-07, "loss": 1.0455, "step": 785000 }, { "epoch": 2.97, "learning_rate": 4.758229966193681e-07, "loss": 1.0432, "step": 785500 }, { "epoch": 2.97, "learning_rate": 4.4429901380372166e-07, "loss": 1.0409, "step": 786000 }, { "epoch": 2.98, "learning_rate": 4.127750309880751e-07, "loss": 1.0406, "step": 786500 }, { "epoch": 2.98, "learning_rate": 3.8125104817242864e-07, "loss": 1.0428, "step": 787000 }, { "epoch": 2.98, "learning_rate": 3.497270653567821e-07, "loss": 1.0337, "step": 787500 }, { "epoch": 2.98, "learning_rate": 3.1820308254113567e-07, "loss": 1.0387, "step": 788000 }, { "epoch": 2.98, "learning_rate": 2.8667909972548916e-07, "loss": 1.0403, "step": 788500 }, { "epoch": 2.98, "learning_rate": 2.551551169098427e-07, "loss": 1.046, "step": 789000 }, { "epoch": 2.99, "learning_rate": 2.236311340941962e-07, "loss": 1.0402, "step": 789500 }, { "epoch": 2.99, "learning_rate": 1.921071512785497e-07, "loss": 1.0478, "step": 790000 }, { "epoch": 2.99, "learning_rate": 1.6058316846290322e-07, "loss": 1.0432, "step": 790500 }, { "epoch": 2.99, "learning_rate": 1.290591856472567e-07, "loss": 1.0443, "step": 791000 }, { "epoch": 2.99, "learning_rate": 9.753520283161024e-08, "loss": 1.0442, "step": 791500 }, { "epoch": 3.0, "learning_rate": 6.601122001596375e-08, "loss": 1.0512, "step": 792000 }, { "epoch": 3.0, "learning_rate": 3.448723720031726e-08, "loss": 1.0413, "step": 792500 }, { "epoch": 3.0, "learning_rate": 2.96325438467077e-09, "loss": 1.0338, "step": 793000 }, { "epoch": 3.0, "step": 793047, "total_flos": 3.339750619344292e+18, "train_loss": 0.9585861873120558, "train_runtime": 247104.2719, "train_samples_per_second": 51.35, "train_steps_per_second": 3.209 } ], "max_steps": 793047, "num_train_epochs": 3, "total_flos": 3.339750619344292e+18, "trial_name": null, "trial_params": null }