{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 4460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.7220906615257263, "learning_rate": 0.0008, "loss": 0.8166, "step": 892 }, { "epoch": 1.0, "eval_gen_len": 1.0, "eval_loss": 0.541189432144165, "eval_rouge1": 0.6958, "eval_rouge2": 0.0, "eval_rougeL": 0.6933, "eval_rougeLsum": 0.6944, "eval_runtime": 38.2682, "eval_samples_per_second": 19.599, "eval_steps_per_second": 0.627, "step": 892 }, { "epoch": 2.0, "grad_norm": 0.7783016562461853, "learning_rate": 0.0006, "loss": 0.6181, "step": 1784 }, { "epoch": 2.0, "eval_gen_len": 1.0, "eval_loss": 0.5291976928710938, "eval_rouge1": 0.6767, "eval_rouge2": 0.0, "eval_rougeL": 0.6749, "eval_rougeLsum": 0.6733, "eval_runtime": 38.0243, "eval_samples_per_second": 19.724, "eval_steps_per_second": 0.631, "step": 1784 }, { "epoch": 3.0, "grad_norm": 0.717491865158081, "learning_rate": 0.0004, "loss": 0.5768, "step": 2676 }, { "epoch": 3.0, "eval_gen_len": 1.0, "eval_loss": 0.4997425377368927, "eval_rouge1": 0.6835, "eval_rouge2": 0.0, "eval_rougeL": 0.6819, "eval_rougeLsum": 0.6808, "eval_runtime": 37.8683, "eval_samples_per_second": 19.805, "eval_steps_per_second": 0.634, "step": 2676 }, { "epoch": 4.0, "grad_norm": 0.5613729953765869, "learning_rate": 0.0002, "loss": 0.5516, "step": 3568 }, { "epoch": 4.0, "eval_gen_len": 1.0, "eval_loss": 0.5060973763465881, "eval_rouge1": 0.6866, "eval_rouge2": 0.0, "eval_rougeL": 0.6872, "eval_rougeLsum": 0.6829, "eval_runtime": 37.6793, "eval_samples_per_second": 19.905, "eval_steps_per_second": 0.637, "step": 3568 }, { "epoch": 5.0, "grad_norm": 0.717795193195343, "learning_rate": 0.0, "loss": 0.5337, "step": 4460 }, { "epoch": 5.0, "eval_gen_len": 1.0, "eval_loss": 0.49971678853034973, "eval_rouge1": 0.7062, "eval_rouge2": 0.0, "eval_rougeL": 0.7054, "eval_rougeLsum": 0.7026, "eval_runtime": 37.7942, "eval_samples_per_second": 19.844, "eval_steps_per_second": 0.635, "step": 4460 }, { "epoch": 5.0, "step": 4460, "total_flos": 4.90238608932864e+16, "train_loss": 0.6193654355446854, "train_runtime": 3915.3347, "train_samples_per_second": 18.213, "train_steps_per_second": 1.139 } ], "logging_steps": 500, "max_steps": 4460, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 4.90238608932864e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }