|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 2700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.8478, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.3793, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 16.2484, |
|
"eval_gen_len": 18.4167, |
|
"eval_loss": 0.3601939380168915, |
|
"eval_meteor": 0.4221, |
|
"eval_runtime": 13.5749, |
|
"eval_samples_per_second": 4.42, |
|
"eval_steps_per_second": 2.21, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.3099, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.274, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.2437, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 17.2012, |
|
"eval_gen_len": 18.45, |
|
"eval_loss": 0.32714077830314636, |
|
"eval_meteor": 0.4235, |
|
"eval_runtime": 11.4881, |
|
"eval_samples_per_second": 5.223, |
|
"eval_steps_per_second": 2.611, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.2115, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.1758, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.1911, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 17.6526, |
|
"eval_gen_len": 18.5, |
|
"eval_loss": 0.3079971969127655, |
|
"eval_meteor": 0.4244, |
|
"eval_runtime": 11.4927, |
|
"eval_samples_per_second": 5.221, |
|
"eval_steps_per_second": 2.61, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.136, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.1508, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 17.6733, |
|
"eval_gen_len": 18.5, |
|
"eval_loss": 0.30199024081230164, |
|
"eval_meteor": 0.4227, |
|
"eval_runtime": 11.3901, |
|
"eval_samples_per_second": 5.268, |
|
"eval_steps_per_second": 2.634, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.1336, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0853, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.1042, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 17.1567, |
|
"eval_gen_len": 18.45, |
|
"eval_loss": 0.43005862832069397, |
|
"eval_meteor": 0.421, |
|
"eval_runtime": 11.4383, |
|
"eval_samples_per_second": 5.246, |
|
"eval_steps_per_second": 2.623, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.0795, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0776, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.0712, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 17.5338, |
|
"eval_gen_len": 18.5, |
|
"eval_loss": 0.39506202936172485, |
|
"eval_meteor": 0.4211, |
|
"eval_runtime": 11.4467, |
|
"eval_samples_per_second": 5.242, |
|
"eval_steps_per_second": 2.621, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.0548, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0516, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 17.1366, |
|
"eval_gen_len": 18.4667, |
|
"eval_loss": 0.44624677300453186, |
|
"eval_meteor": 0.4185, |
|
"eval_runtime": 11.4875, |
|
"eval_samples_per_second": 5.223, |
|
"eval_steps_per_second": 2.612, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.0557, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.0485, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.043, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 17.6692, |
|
"eval_gen_len": 18.5, |
|
"eval_loss": 0.4671081006526947, |
|
"eval_meteor": 0.4217, |
|
"eval_runtime": 11.4117, |
|
"eval_samples_per_second": 5.258, |
|
"eval_steps_per_second": 2.629, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.0412, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.0402, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0243, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 17.5591, |
|
"eval_gen_len": 18.5, |
|
"eval_loss": 0.5049145221710205, |
|
"eval_meteor": 0.4214, |
|
"eval_runtime": 11.458, |
|
"eval_samples_per_second": 5.237, |
|
"eval_steps_per_second": 2.618, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.0475, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.0324, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0313, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 17.4599, |
|
"eval_gen_len": 18.5, |
|
"eval_loss": 0.5202592015266418, |
|
"eval_meteor": 0.4218, |
|
"eval_runtime": 11.5675, |
|
"eval_samples_per_second": 5.187, |
|
"eval_steps_per_second": 2.593, |
|
"step": 2700 |
|
} |
|
], |
|
"max_steps": 2700, |
|
"num_train_epochs": 10, |
|
"total_flos": 163370071388160.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|