|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9923224568138194, |
|
"eval_steps": 500, |
|
"global_step": 1560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.8966, |
|
"eval_gen_len": 19.970909090909092, |
|
"eval_loss": 1.5708835124969482, |
|
"eval_precision": 0.9093, |
|
"eval_recall": 0.8846, |
|
"eval_rouge1": 0.4119, |
|
"eval_rouge2": 0.2002, |
|
"eval_rougeL": 0.3529, |
|
"eval_rougeLsum": 0.3527, |
|
"eval_runtime": 231.9368, |
|
"eval_samples_per_second": 11.857, |
|
"eval_steps_per_second": 0.742, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.3589743589743592e-05, |
|
"loss": 1.8155, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.9003, |
|
"eval_gen_len": 19.918545454545455, |
|
"eval_loss": 1.5360783338546753, |
|
"eval_precision": 0.9123, |
|
"eval_recall": 0.8889, |
|
"eval_rouge1": 0.4331, |
|
"eval_rouge2": 0.2157, |
|
"eval_rougeL": 0.3717, |
|
"eval_rougeLsum": 0.3717, |
|
"eval_runtime": 228.1491, |
|
"eval_samples_per_second": 12.054, |
|
"eval_steps_per_second": 0.754, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.17948717948718e-06, |
|
"loss": 1.5875, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.899, |
|
"eval_gen_len": 19.954545454545453, |
|
"eval_loss": 1.5030012130737305, |
|
"eval_precision": 0.9117, |
|
"eval_recall": 0.8871, |
|
"eval_rouge1": 0.4263, |
|
"eval_rouge2": 0.2129, |
|
"eval_rougeL": 0.3671, |
|
"eval_rougeLsum": 0.3673, |
|
"eval_runtime": 228.8181, |
|
"eval_samples_per_second": 12.018, |
|
"eval_steps_per_second": 0.752, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 7.692307692307694e-07, |
|
"loss": 1.4978, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_f1": 0.9002, |
|
"eval_gen_len": 19.925454545454546, |
|
"eval_loss": 1.499870777130127, |
|
"eval_precision": 0.9125, |
|
"eval_recall": 0.8885, |
|
"eval_rouge1": 0.4331, |
|
"eval_rouge2": 0.2164, |
|
"eval_rougeL": 0.3724, |
|
"eval_rougeLsum": 0.3725, |
|
"eval_runtime": 229.5833, |
|
"eval_samples_per_second": 11.978, |
|
"eval_steps_per_second": 0.749, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"step": 1560, |
|
"total_flos": 2.163117129644114e+17, |
|
"train_loss": 1.6275942337818634, |
|
"train_runtime": 5933.9925, |
|
"train_samples_per_second": 33.704, |
|
"train_steps_per_second": 0.263 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1560, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 2.163117129644114e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|