|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 17865, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.893374919891357, |
|
"learning_rate": 4e-05, |
|
"loss": 0.6352, |
|
"step": 3573 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 0.46136122941970825, |
|
"eval_rouge1": 0.3871, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.3846, |
|
"eval_rougeLsum": 0.3884, |
|
"eval_runtime": 336.5055, |
|
"eval_samples_per_second": 2.22, |
|
"eval_steps_per_second": 0.279, |
|
"step": 3573 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.110297679901123, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4361, |
|
"step": 7146 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 0.4357260465621948, |
|
"eval_rouge1": 0.3574, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.3543, |
|
"eval_rougeLsum": 0.3544, |
|
"eval_runtime": 327.4958, |
|
"eval_samples_per_second": 2.281, |
|
"eval_steps_per_second": 0.287, |
|
"step": 7146 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.471202850341797, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3391, |
|
"step": 10719 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 0.4478732943534851, |
|
"eval_rouge1": 0.3973, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.3975, |
|
"eval_rougeLsum": 0.4009, |
|
"eval_runtime": 333.0142, |
|
"eval_samples_per_second": 2.243, |
|
"eval_steps_per_second": 0.282, |
|
"step": 10719 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 4.917109966278076, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2686, |
|
"step": 14292 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 0.4638811945915222, |
|
"eval_rouge1": 0.4113, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.4102, |
|
"eval_rougeLsum": 0.4115, |
|
"eval_runtime": 334.1059, |
|
"eval_samples_per_second": 2.236, |
|
"eval_steps_per_second": 0.281, |
|
"step": 14292 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.981204092502594, |
|
"learning_rate": 0.0, |
|
"loss": 0.2221, |
|
"step": 17865 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 1.0, |
|
"eval_loss": 0.48620468378067017, |
|
"eval_rouge1": 0.3867, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.3833, |
|
"eval_rougeLsum": 0.386, |
|
"eval_runtime": 329.3468, |
|
"eval_samples_per_second": 2.268, |
|
"eval_steps_per_second": 0.285, |
|
"step": 17865 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 17865, |
|
"total_flos": 6.55200095561257e+16, |
|
"train_loss": 0.38020934802358836, |
|
"train_runtime": 9310.5317, |
|
"train_samples_per_second": 7.674, |
|
"train_steps_per_second": 1.919 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 17865, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 6.55200095561257e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|