{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 53830, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 4.953836150845253e-05, "loss": 3.2037, "step": 500 }, { "epoch": 0.19, "learning_rate": 4.9073936466654286e-05, "loss": 3.166, "step": 1000 }, { "epoch": 0.28, "learning_rate": 4.860951142485603e-05, "loss": 3.1434, "step": 1500 }, { "epoch": 0.37, "learning_rate": 4.814508638305778e-05, "loss": 3.1104, "step": 2000 }, { "epoch": 0.46, "learning_rate": 4.7680661341259524e-05, "loss": 3.0987, "step": 2500 }, { "epoch": 0.56, "learning_rate": 4.721623629946127e-05, "loss": 3.0677, "step": 3000 }, { "epoch": 0.65, "learning_rate": 4.6751811257663017e-05, "loss": 3.0787, "step": 3500 }, { "epoch": 0.74, "learning_rate": 4.628738621586476e-05, "loss": 3.047, "step": 4000 }, { "epoch": 0.84, "learning_rate": 4.582389002415011e-05, "loss": 3.0487, "step": 4500 }, { "epoch": 0.93, "learning_rate": 4.5359464982351854e-05, "loss": 3.0396, "step": 5000 }, { "epoch": 1.02, "learning_rate": 4.489596879063719e-05, "loss": 2.983, "step": 5500 }, { "epoch": 1.11, "learning_rate": 4.443154374883894e-05, "loss": 2.8114, "step": 6000 }, { "epoch": 1.21, "learning_rate": 4.3967118707040685e-05, "loss": 2.8242, "step": 6500 }, { "epoch": 1.3, "learning_rate": 4.350269366524243e-05, "loss": 2.8101, "step": 7000 }, { "epoch": 1.39, "learning_rate": 4.303826862344418e-05, "loss": 2.8055, "step": 7500 }, { "epoch": 1.49, "learning_rate": 4.2573843581645924e-05, "loss": 2.821, "step": 8000 }, { "epoch": 1.58, "learning_rate": 4.210941853984767e-05, "loss": 2.8286, "step": 8500 }, { "epoch": 1.67, "learning_rate": 4.1644993498049416e-05, "loss": 2.8026, "step": 9000 }, { "epoch": 1.76, "learning_rate": 4.118056845625116e-05, "loss": 2.8078, "step": 9500 }, { "epoch": 1.86, "learning_rate": 4.0716143414452915e-05, "loss": 2.7948, "step": 10000 }, { "epoch": 1.95, "learning_rate": 4.0251718372654655e-05, "loss": 2.8053, "step": 10500 }, { "epoch": 2.04, "learning_rate": 3.97872933308564e-05, "loss": 2.6906, "step": 11000 }, { "epoch": 2.14, "learning_rate": 3.9323797139141746e-05, "loss": 2.5819, "step": 11500 }, { "epoch": 2.23, "learning_rate": 3.8859372097343485e-05, "loss": 2.5768, "step": 12000 }, { "epoch": 2.32, "learning_rate": 3.839587590562883e-05, "loss": 2.597, "step": 12500 }, { "epoch": 2.42, "learning_rate": 3.7931450863830584e-05, "loss": 2.5986, "step": 13000 }, { "epoch": 2.51, "learning_rate": 3.746702582203232e-05, "loss": 2.6172, "step": 13500 }, { "epoch": 2.6, "learning_rate": 3.700260078023407e-05, "loss": 2.5917, "step": 14000 }, { "epoch": 2.69, "learning_rate": 3.653817573843582e-05, "loss": 2.6019, "step": 14500 }, { "epoch": 2.79, "learning_rate": 3.607375069663756e-05, "loss": 2.598, "step": 15000 }, { "epoch": 2.88, "learning_rate": 3.560932565483931e-05, "loss": 2.5893, "step": 15500 }, { "epoch": 2.97, "learning_rate": 3.514490061304106e-05, "loss": 2.589, "step": 16000 }, { "epoch": 3.07, "learning_rate": 3.468047557124281e-05, "loss": 2.4718, "step": 16500 }, { "epoch": 3.16, "learning_rate": 3.4216979379528146e-05, "loss": 2.41, "step": 17000 }, { "epoch": 3.25, "learning_rate": 3.375255433772989e-05, "loss": 2.3767, "step": 17500 }, { "epoch": 3.34, "learning_rate": 3.328812929593164e-05, "loss": 2.3999, "step": 18000 }, { "epoch": 3.44, "learning_rate": 3.2823704254133384e-05, "loss": 2.4211, "step": 18500 }, { "epoch": 3.53, "learning_rate": 3.236020806241873e-05, "loss": 2.4202, "step": 19000 }, { "epoch": 3.62, "learning_rate": 3.1895783020620476e-05, "loss": 2.4131, "step": 19500 }, { "epoch": 3.72, "learning_rate": 3.1431357978822215e-05, "loss": 2.4155, "step": 20000 }, { "epoch": 3.81, "learning_rate": 3.096693293702397e-05, "loss": 2.3968, "step": 20500 }, { "epoch": 3.9, "learning_rate": 3.050343674530931e-05, "loss": 2.4189, "step": 21000 }, { "epoch": 3.99, "learning_rate": 3.0039011703511056e-05, "loss": 2.406, "step": 21500 }, { "epoch": 4.09, "learning_rate": 2.95745866617128e-05, "loss": 2.2482, "step": 22000 }, { "epoch": 4.18, "learning_rate": 2.911016161991455e-05, "loss": 2.2432, "step": 22500 }, { "epoch": 4.27, "learning_rate": 2.8646665428199894e-05, "loss": 2.2532, "step": 23000 }, { "epoch": 4.37, "learning_rate": 2.8182240386401637e-05, "loss": 2.2649, "step": 23500 }, { "epoch": 4.46, "learning_rate": 2.7717815344603383e-05, "loss": 2.2365, "step": 24000 }, { "epoch": 4.55, "learning_rate": 2.7253390302805126e-05, "loss": 2.233, "step": 24500 }, { "epoch": 4.64, "learning_rate": 2.6788965261006875e-05, "loss": 2.2569, "step": 25000 }, { "epoch": 4.74, "learning_rate": 2.632454021920862e-05, "loss": 2.2609, "step": 25500 }, { "epoch": 4.83, "learning_rate": 2.5860115177410364e-05, "loss": 2.256, "step": 26000 }, { "epoch": 4.92, "learning_rate": 2.5395690135612117e-05, "loss": 2.2553, "step": 26500 }, { "epoch": 5.02, "learning_rate": 2.4933122793981055e-05, "loss": 2.1951, "step": 27000 }, { "epoch": 5.11, "learning_rate": 2.4468697752182798e-05, "loss": 2.1061, "step": 27500 }, { "epoch": 5.2, "learning_rate": 2.4004272710384544e-05, "loss": 2.0723, "step": 28000 }, { "epoch": 5.29, "learning_rate": 2.353984766858629e-05, "loss": 2.0796, "step": 28500 }, { "epoch": 5.39, "learning_rate": 2.3075422626788036e-05, "loss": 2.1206, "step": 29000 }, { "epoch": 5.48, "learning_rate": 2.261192643507338e-05, "loss": 2.1276, "step": 29500 }, { "epoch": 5.57, "learning_rate": 2.2147501393275128e-05, "loss": 2.1108, "step": 30000 }, { "epoch": 5.67, "learning_rate": 2.168400520156047e-05, "loss": 2.1241, "step": 30500 }, { "epoch": 5.76, "learning_rate": 2.1219580159762216e-05, "loss": 2.1392, "step": 31000 }, { "epoch": 5.85, "learning_rate": 2.0755155117963962e-05, "loss": 2.1427, "step": 31500 }, { "epoch": 5.94, "learning_rate": 2.0291658926249304e-05, "loss": 2.0999, "step": 32000 }, { "epoch": 6.04, "learning_rate": 1.982723388445105e-05, "loss": 2.0637, "step": 32500 }, { "epoch": 6.13, "learning_rate": 1.9362808842652796e-05, "loss": 1.9977, "step": 33000 }, { "epoch": 6.22, "learning_rate": 1.8898383800854543e-05, "loss": 1.98, "step": 33500 }, { "epoch": 6.32, "learning_rate": 1.843395875905629e-05, "loss": 1.9836, "step": 34000 }, { "epoch": 6.41, "learning_rate": 1.7969533717258035e-05, "loss": 2.0058, "step": 34500 }, { "epoch": 6.5, "learning_rate": 1.750510867545978e-05, "loss": 1.9843, "step": 35000 }, { "epoch": 6.59, "learning_rate": 1.7040683633661527e-05, "loss": 1.9899, "step": 35500 }, { "epoch": 6.69, "learning_rate": 1.6576258591863277e-05, "loss": 2.0094, "step": 36000 }, { "epoch": 6.78, "learning_rate": 1.611183355006502e-05, "loss": 2.0092, "step": 36500 }, { "epoch": 6.87, "learning_rate": 1.5647408508266766e-05, "loss": 2.0013, "step": 37000 }, { "epoch": 6.97, "learning_rate": 1.5182983466468512e-05, "loss": 2.0088, "step": 37500 }, { "epoch": 7.06, "learning_rate": 1.4719487274753854e-05, "loss": 1.9496, "step": 38000 }, { "epoch": 7.15, "learning_rate": 1.4255062232955602e-05, "loss": 1.8963, "step": 38500 }, { "epoch": 7.25, "learning_rate": 1.379063719115735e-05, "loss": 1.8895, "step": 39000 }, { "epoch": 7.34, "learning_rate": 1.3326212149359094e-05, "loss": 1.9075, "step": 39500 }, { "epoch": 7.43, "learning_rate": 1.2862715957644436e-05, "loss": 1.9191, "step": 40000 }, { "epoch": 7.52, "learning_rate": 1.2398290915846184e-05, "loss": 1.9075, "step": 40500 }, { "epoch": 7.62, "learning_rate": 1.193386587404793e-05, "loss": 1.9071, "step": 41000 }, { "epoch": 7.71, "learning_rate": 1.1469440832249676e-05, "loss": 1.9168, "step": 41500 }, { "epoch": 7.8, "learning_rate": 1.100501579045142e-05, "loss": 1.9142, "step": 42000 }, { "epoch": 7.9, "learning_rate": 1.0540590748653167e-05, "loss": 1.8864, "step": 42500 }, { "epoch": 7.99, "learning_rate": 1.0076165706854915e-05, "loss": 1.9086, "step": 43000 }, { "epoch": 8.08, "learning_rate": 9.61174066505666e-06, "loss": 1.8377, "step": 43500 }, { "epoch": 8.17, "learning_rate": 9.148244473342003e-06, "loss": 1.8389, "step": 44000 }, { "epoch": 8.27, "learning_rate": 8.683819431543749e-06, "loss": 1.8276, "step": 44500 }, { "epoch": 8.36, "learning_rate": 8.219394389745495e-06, "loss": 1.8399, "step": 45000 }, { "epoch": 8.45, "learning_rate": 7.754969347947241e-06, "loss": 1.8389, "step": 45500 }, { "epoch": 8.55, "learning_rate": 7.29240200631618e-06, "loss": 1.8308, "step": 46000 }, { "epoch": 8.64, "learning_rate": 6.827976964517926e-06, "loss": 1.8329, "step": 46500 }, { "epoch": 8.73, "learning_rate": 6.363551922719674e-06, "loss": 1.8351, "step": 47000 }, { "epoch": 8.82, "learning_rate": 5.8991268809214195e-06, "loss": 1.8573, "step": 47500 }, { "epoch": 8.92, "learning_rate": 5.434701839123166e-06, "loss": 1.8221, "step": 48000 }, { "epoch": 9.01, "learning_rate": 4.9712056474085085e-06, "loss": 1.8384, "step": 48500 }, { "epoch": 9.1, "learning_rate": 4.506780605610255e-06, "loss": 1.7756, "step": 49000 }, { "epoch": 9.2, "learning_rate": 4.042355563812001e-06, "loss": 1.7839, "step": 49500 }, { "epoch": 9.29, "learning_rate": 3.577930522013747e-06, "loss": 1.7839, "step": 50000 }, { "epoch": 9.38, "learning_rate": 3.113505480215493e-06, "loss": 1.8112, "step": 50500 }, { "epoch": 9.47, "learning_rate": 2.6490804384172397e-06, "loss": 1.8078, "step": 51000 }, { "epoch": 9.57, "learning_rate": 2.184655396618986e-06, "loss": 1.7745, "step": 51500 }, { "epoch": 9.66, "learning_rate": 1.720230354820732e-06, "loss": 1.7955, "step": 52000 }, { "epoch": 9.75, "learning_rate": 1.2558053130224781e-06, "loss": 1.7978, "step": 52500 }, { "epoch": 9.85, "learning_rate": 7.923091213078211e-07, "loss": 1.8062, "step": 53000 }, { "epoch": 9.94, "learning_rate": 3.2788407950956715e-07, "loss": 1.7702, "step": 53500 }, { "epoch": 10.0, "step": 53830, "total_flos": 9.998391677288448e+16, "train_loss": 2.279561182284652, "train_runtime": 34779.9602, "train_samples_per_second": 1.548, "train_steps_per_second": 1.548 } ], "max_steps": 53830, "num_train_epochs": 10, "total_flos": 9.998391677288448e+16, "trial_name": null, "trial_params": null }