{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 31110, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 4.919639987142398e-05, "loss": 3.736, "step": 500 }, { "epoch": 0.32, "learning_rate": 4.839279974284796e-05, "loss": 2.694, "step": 1000 }, { "epoch": 0.48, "learning_rate": 4.758919961427194e-05, "loss": 2.3793, "step": 1500 }, { "epoch": 0.64, "learning_rate": 4.678559948569592e-05, "loss": 2.2157, "step": 2000 }, { "epoch": 0.8, "learning_rate": 4.59819993571199e-05, "loss": 2.0865, "step": 2500 }, { "epoch": 0.96, "learning_rate": 4.517839922854388e-05, "loss": 1.9888, "step": 3000 }, { "epoch": 1.13, "learning_rate": 4.4374799099967856e-05, "loss": 1.7791, "step": 3500 }, { "epoch": 1.29, "learning_rate": 4.357119897139183e-05, "loss": 1.7181, "step": 4000 }, { "epoch": 1.45, "learning_rate": 4.2767598842815816e-05, "loss": 1.6788, "step": 4500 }, { "epoch": 1.61, "learning_rate": 4.196399871423979e-05, "loss": 1.6565, "step": 5000 }, { "epoch": 1.77, "learning_rate": 4.1160398585663775e-05, "loss": 1.6128, "step": 5500 }, { "epoch": 1.93, "learning_rate": 4.035679845708776e-05, "loss": 1.5839, "step": 6000 }, { "epoch": 2.09, "learning_rate": 3.9553198328511734e-05, "loss": 1.4628, "step": 6500 }, { "epoch": 2.25, "learning_rate": 3.874959819993572e-05, "loss": 1.3677, "step": 7000 }, { "epoch": 2.41, "learning_rate": 3.7945998071359693e-05, "loss": 1.3458, "step": 7500 }, { "epoch": 2.57, "learning_rate": 3.7142397942783676e-05, "loss": 1.3436, "step": 8000 }, { "epoch": 2.73, "learning_rate": 3.633879781420765e-05, "loss": 1.3476, "step": 8500 }, { "epoch": 2.89, "learning_rate": 3.5535197685631636e-05, "loss": 1.3265, "step": 9000 }, { "epoch": 3.05, "learning_rate": 3.473159755705561e-05, "loss": 1.2552, "step": 9500 }, { "epoch": 3.21, "learning_rate": 3.392799742847959e-05, "loss": 1.1139, "step": 10000 }, { "epoch": 3.38, "learning_rate": 3.312439729990357e-05, "loss": 1.1205, "step": 10500 }, { "epoch": 3.54, "learning_rate": 3.232079717132755e-05, "loss": 1.1202, "step": 11000 }, { "epoch": 3.7, "learning_rate": 3.151719704275153e-05, "loss": 1.1305, "step": 11500 }, { "epoch": 3.86, "learning_rate": 3.071359691417551e-05, "loss": 1.1291, "step": 12000 }, { "epoch": 4.02, "learning_rate": 2.9909996785599486e-05, "loss": 1.1071, "step": 12500 }, { "epoch": 4.18, "learning_rate": 2.9106396657023466e-05, "loss": 0.9143, "step": 13000 }, { "epoch": 4.34, "learning_rate": 2.8302796528447446e-05, "loss": 0.9409, "step": 13500 }, { "epoch": 4.5, "learning_rate": 2.7499196399871425e-05, "loss": 0.9381, "step": 14000 }, { "epoch": 4.66, "learning_rate": 2.6695596271295405e-05, "loss": 0.96, "step": 14500 }, { "epoch": 4.82, "learning_rate": 2.589199614271938e-05, "loss": 0.9616, "step": 15000 }, { "epoch": 4.98, "learning_rate": 2.508839601414336e-05, "loss": 0.948, "step": 15500 }, { "epoch": 5.14, "learning_rate": 2.4284795885567344e-05, "loss": 0.7787, "step": 16000 }, { "epoch": 5.3, "learning_rate": 2.3481195756991323e-05, "loss": 0.7821, "step": 16500 }, { "epoch": 5.46, "learning_rate": 2.2677595628415303e-05, "loss": 0.7927, "step": 17000 }, { "epoch": 5.63, "learning_rate": 2.1873995499839283e-05, "loss": 0.7907, "step": 17500 }, { "epoch": 5.79, "learning_rate": 2.107039537126326e-05, "loss": 0.7904, "step": 18000 }, { "epoch": 5.95, "learning_rate": 2.026679524268724e-05, "loss": 0.804, "step": 18500 }, { "epoch": 6.11, "learning_rate": 1.9463195114111218e-05, "loss": 0.6899, "step": 19000 }, { "epoch": 6.27, "learning_rate": 1.8659594985535198e-05, "loss": 0.6512, "step": 19500 }, { "epoch": 6.43, "learning_rate": 1.7855994856959177e-05, "loss": 0.6662, "step": 20000 }, { "epoch": 6.59, "learning_rate": 1.7052394728383157e-05, "loss": 0.6688, "step": 20500 }, { "epoch": 6.75, "learning_rate": 1.6248794599807137e-05, "loss": 0.6663, "step": 21000 }, { "epoch": 6.91, "learning_rate": 1.5445194471231116e-05, "loss": 0.6584, "step": 21500 }, { "epoch": 7.07, "learning_rate": 1.4641594342655096e-05, "loss": 0.6115, "step": 22000 }, { "epoch": 7.23, "learning_rate": 1.3837994214079075e-05, "loss": 0.5433, "step": 22500 }, { "epoch": 7.39, "learning_rate": 1.3034394085503055e-05, "loss": 0.5653, "step": 23000 }, { "epoch": 7.55, "learning_rate": 1.2230793956927033e-05, "loss": 0.5509, "step": 23500 }, { "epoch": 7.71, "learning_rate": 1.1427193828351013e-05, "loss": 0.553, "step": 24000 }, { "epoch": 7.88, "learning_rate": 1.0623593699774992e-05, "loss": 0.5659, "step": 24500 }, { "epoch": 8.04, "learning_rate": 9.819993571198972e-06, "loss": 0.5369, "step": 25000 }, { "epoch": 8.2, "learning_rate": 9.016393442622952e-06, "loss": 0.4594, "step": 25500 }, { "epoch": 8.36, "learning_rate": 8.212793314046931e-06, "loss": 0.4692, "step": 26000 }, { "epoch": 8.52, "learning_rate": 7.40919318547091e-06, "loss": 0.4773, "step": 26500 }, { "epoch": 8.68, "learning_rate": 6.605593056894889e-06, "loss": 0.479, "step": 27000 }, { "epoch": 8.84, "learning_rate": 5.801992928318869e-06, "loss": 0.4784, "step": 27500 }, { "epoch": 9.0, "learning_rate": 4.998392799742848e-06, "loss": 0.4781, "step": 28000 }, { "epoch": 9.16, "learning_rate": 4.194792671166828e-06, "loss": 0.423, "step": 28500 }, { "epoch": 9.32, "learning_rate": 3.391192542590807e-06, "loss": 0.4236, "step": 29000 }, { "epoch": 9.48, "learning_rate": 2.5875924140147865e-06, "loss": 0.4197, "step": 29500 }, { "epoch": 9.64, "learning_rate": 1.7839922854387657e-06, "loss": 0.4091, "step": 30000 }, { "epoch": 9.8, "learning_rate": 9.80392156862745e-07, "loss": 0.4076, "step": 30500 }, { "epoch": 9.96, "learning_rate": 1.7679202828672455e-07, "loss": 0.4182, "step": 31000 }, { "epoch": 10.0, "step": 31110, "total_flos": 4.532670747485798e+16, "train_loss": 1.0457585005237526, "train_runtime": 9612.1472, "train_samples_per_second": 32.359, "train_steps_per_second": 3.237 } ], "max_steps": 31110, "num_train_epochs": 10, "total_flos": 4.532670747485798e+16, "trial_name": null, "trial_params": null }