{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.9923224568138194, "eval_steps": 500, "global_step": 1560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_f1": 0.8966, "eval_gen_len": 19.970909090909092, "eval_loss": 1.5708835124969482, "eval_precision": 0.9093, "eval_recall": 0.8846, "eval_rouge1": 0.4119, "eval_rouge2": 0.2002, "eval_rougeL": 0.3529, "eval_rougeLsum": 0.3527, "eval_runtime": 231.9368, "eval_samples_per_second": 11.857, "eval_steps_per_second": 0.742, "step": 390 }, { "epoch": 1.28, "learning_rate": 1.3589743589743592e-05, "loss": 1.8155, "step": 500 }, { "epoch": 2.0, "eval_f1": 0.9003, "eval_gen_len": 19.918545454545455, "eval_loss": 1.5360783338546753, "eval_precision": 0.9123, "eval_recall": 0.8889, "eval_rouge1": 0.4331, "eval_rouge2": 0.2157, "eval_rougeL": 0.3717, "eval_rougeLsum": 0.3717, "eval_runtime": 228.1491, "eval_samples_per_second": 12.054, "eval_steps_per_second": 0.754, "step": 781 }, { "epoch": 2.56, "learning_rate": 7.17948717948718e-06, "loss": 1.5875, "step": 1000 }, { "epoch": 3.0, "eval_f1": 0.899, "eval_gen_len": 19.954545454545453, "eval_loss": 1.5030012130737305, "eval_precision": 0.9117, "eval_recall": 0.8871, "eval_rouge1": 0.4263, "eval_rouge2": 0.2129, "eval_rougeL": 0.3671, "eval_rougeLsum": 0.3673, "eval_runtime": 228.8181, "eval_samples_per_second": 12.018, "eval_steps_per_second": 0.752, "step": 1172 }, { "epoch": 3.84, "learning_rate": 7.692307692307694e-07, "loss": 1.4978, "step": 1500 }, { "epoch": 3.99, "eval_f1": 0.9002, "eval_gen_len": 19.925454545454546, "eval_loss": 1.499870777130127, "eval_precision": 0.9125, "eval_recall": 0.8885, "eval_rouge1": 0.4331, "eval_rouge2": 0.2164, "eval_rougeL": 0.3724, "eval_rougeLsum": 0.3725, "eval_runtime": 229.5833, "eval_samples_per_second": 11.978, "eval_steps_per_second": 0.749, "step": 1560 }, { "epoch": 3.99, "step": 1560, "total_flos": 2.163117129644114e+17, "train_loss": 1.6275942337818634, "train_runtime": 5933.9925, "train_samples_per_second": 33.704, "train_steps_per_second": 0.263 } ], "logging_steps": 500, "max_steps": 1560, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 2.163117129644114e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }