|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 3930, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.0452, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0331772565841675, |
|
"eval_runtime": 41.1909, |
|
"eval_samples_per_second": 19.058, |
|
"eval_steps_per_second": 1.214, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.3638676844783716e-05, |
|
"loss": 1.5229, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.1159, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.912860095500946, |
|
"eval_runtime": 38.6643, |
|
"eval_samples_per_second": 20.303, |
|
"eval_steps_per_second": 1.293, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.727735368956743e-05, |
|
"loss": 1.0464, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.1479, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8561409115791321, |
|
"eval_runtime": 38.5663, |
|
"eval_samples_per_second": 20.355, |
|
"eval_steps_per_second": 1.296, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 3.091603053435115e-05, |
|
"loss": 0.9477, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.1678, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8148155808448792, |
|
"eval_runtime": 38.0933, |
|
"eval_samples_per_second": 20.607, |
|
"eval_steps_per_second": 1.313, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.1791, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7879176735877991, |
|
"eval_runtime": 38.3603, |
|
"eval_samples_per_second": 20.464, |
|
"eval_steps_per_second": 1.303, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.455470737913486e-05, |
|
"loss": 0.8888, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.1628, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7707892656326294, |
|
"eval_runtime": 38.214, |
|
"eval_samples_per_second": 20.542, |
|
"eval_steps_per_second": 1.308, |
|
"step": 2358 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 1.8193384223918574e-05, |
|
"loss": 0.8517, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.1582, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7597165107727051, |
|
"eval_runtime": 38.4006, |
|
"eval_samples_per_second": 20.442, |
|
"eval_steps_per_second": 1.302, |
|
"step": 2751 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 1.1832061068702292e-05, |
|
"loss": 0.8279, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.1683, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7492462992668152, |
|
"eval_runtime": 38.3938, |
|
"eval_samples_per_second": 20.446, |
|
"eval_steps_per_second": 1.302, |
|
"step": 3144 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 5.470737913486006e-06, |
|
"loss": 0.8107, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.1704, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7442331314086914, |
|
"eval_runtime": 38.261, |
|
"eval_samples_per_second": 20.517, |
|
"eval_steps_per_second": 1.307, |
|
"step": 3537 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.1683, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7417959570884705, |
|
"eval_runtime": 38.3788, |
|
"eval_samples_per_second": 20.454, |
|
"eval_steps_per_second": 1.303, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 3930, |
|
"total_flos": 2.232877359218688e+16, |
|
"train_loss": 0.9653779240964934, |
|
"train_runtime": 2075.6207, |
|
"train_samples_per_second": 30.251, |
|
"train_steps_per_second": 1.893 |
|
} |
|
], |
|
"max_steps": 3930, |
|
"num_train_epochs": 10, |
|
"total_flos": 2.232877359218688e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|