|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 31110, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.919639987142398e-05, |
|
"loss": 3.736, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.839279974284796e-05, |
|
"loss": 2.694, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.758919961427194e-05, |
|
"loss": 2.3793, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.678559948569592e-05, |
|
"loss": 2.2157, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.59819993571199e-05, |
|
"loss": 2.0865, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.517839922854388e-05, |
|
"loss": 1.9888, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.4374799099967856e-05, |
|
"loss": 1.7791, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.357119897139183e-05, |
|
"loss": 1.7181, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.2767598842815816e-05, |
|
"loss": 1.6788, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.196399871423979e-05, |
|
"loss": 1.6565, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.1160398585663775e-05, |
|
"loss": 1.6128, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 4.035679845708776e-05, |
|
"loss": 1.5839, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.9553198328511734e-05, |
|
"loss": 1.4628, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.874959819993572e-05, |
|
"loss": 1.3677, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.7945998071359693e-05, |
|
"loss": 1.3458, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 3.7142397942783676e-05, |
|
"loss": 1.3436, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 3.633879781420765e-05, |
|
"loss": 1.3476, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.5535197685631636e-05, |
|
"loss": 1.3265, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.473159755705561e-05, |
|
"loss": 1.2552, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.392799742847959e-05, |
|
"loss": 1.1139, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.312439729990357e-05, |
|
"loss": 1.1205, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3.232079717132755e-05, |
|
"loss": 1.1202, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 3.151719704275153e-05, |
|
"loss": 1.1305, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 3.071359691417551e-05, |
|
"loss": 1.1291, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 2.9909996785599486e-05, |
|
"loss": 1.1071, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 2.9106396657023466e-05, |
|
"loss": 0.9143, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 2.8302796528447446e-05, |
|
"loss": 0.9409, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2.7499196399871425e-05, |
|
"loss": 0.9381, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 2.6695596271295405e-05, |
|
"loss": 0.96, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 2.589199614271938e-05, |
|
"loss": 0.9616, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 2.508839601414336e-05, |
|
"loss": 0.948, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 2.4284795885567344e-05, |
|
"loss": 0.7787, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 2.3481195756991323e-05, |
|
"loss": 0.7821, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 2.2677595628415303e-05, |
|
"loss": 0.7927, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 2.1873995499839283e-05, |
|
"loss": 0.7907, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 2.107039537126326e-05, |
|
"loss": 0.7904, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 2.026679524268724e-05, |
|
"loss": 0.804, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 1.9463195114111218e-05, |
|
"loss": 0.6899, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 1.8659594985535198e-05, |
|
"loss": 0.6512, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 1.7855994856959177e-05, |
|
"loss": 0.6662, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 1.7052394728383157e-05, |
|
"loss": 0.6688, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 1.6248794599807137e-05, |
|
"loss": 0.6663, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 1.5445194471231116e-05, |
|
"loss": 0.6584, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.4641594342655096e-05, |
|
"loss": 0.6115, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 1.3837994214079075e-05, |
|
"loss": 0.5433, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 1.3034394085503055e-05, |
|
"loss": 0.5653, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.2230793956927033e-05, |
|
"loss": 0.5509, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 1.1427193828351013e-05, |
|
"loss": 0.553, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 1.0623593699774992e-05, |
|
"loss": 0.5659, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 9.819993571198972e-06, |
|
"loss": 0.5369, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 9.016393442622952e-06, |
|
"loss": 0.4594, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 8.212793314046931e-06, |
|
"loss": 0.4692, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 7.40919318547091e-06, |
|
"loss": 0.4773, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 6.605593056894889e-06, |
|
"loss": 0.479, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 5.801992928318869e-06, |
|
"loss": 0.4784, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.998392799742848e-06, |
|
"loss": 0.4781, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 4.194792671166828e-06, |
|
"loss": 0.423, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 3.391192542590807e-06, |
|
"loss": 0.4236, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 2.5875924140147865e-06, |
|
"loss": 0.4197, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 1.7839922854387657e-06, |
|
"loss": 0.4091, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 9.80392156862745e-07, |
|
"loss": 0.4076, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 1.7679202828672455e-07, |
|
"loss": 0.4182, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 31110, |
|
"total_flos": 4.532670747485798e+16, |
|
"train_loss": 1.0457585005237526, |
|
"train_runtime": 9612.1472, |
|
"train_samples_per_second": 32.359, |
|
"train_steps_per_second": 3.237 |
|
} |
|
], |
|
"max_steps": 31110, |
|
"num_train_epochs": 10, |
|
"total_flos": 4.532670747485798e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|