{ "best_metric": 2.578360080718994, "best_model_checkpoint": "output/sum-41/checkpoint-108", "epoch": 6.0, "global_step": 108, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.28, "learning_rate": 0.00011269523002449659, "loss": 3.268, "step": 5 }, { "epoch": 0.56, "learning_rate": 5.668773501204858e-05, "loss": 2.9209, "step": 10 }, { "epoch": 0.83, "learning_rate": 9.190657300387505e-06, "loss": 2.9717, "step": 15 }, { "epoch": 1.0, "eval_loss": 2.928959369659424, "eval_runtime": 1.1458, "eval_samples_per_second": 22.691, "eval_steps_per_second": 3.491, "step": 18 }, { "epoch": 1.11, "learning_rate": 4.137086214086682e-06, "loss": 2.7011, "step": 20 }, { "epoch": 1.39, "learning_rate": 4.513741816785908e-05, "loss": 2.8934, "step": 25 }, { "epoch": 1.67, "learning_rate": 0.00010290000000000001, "loss": 2.793, "step": 30 }, { "epoch": 1.94, "learning_rate": 0.00013615781185663748, "loss": 2.7355, "step": 35 }, { "epoch": 2.0, "eval_loss": 2.6775753498077393, "eval_runtime": 1.2274, "eval_samples_per_second": 22.813, "eval_steps_per_second": 3.259, "step": 36 }, { "epoch": 2.22, "learning_rate": 0.0001211506487979619, "loss": 2.6552, "step": 40 }, { "epoch": 2.5, "learning_rate": 6.860000000000001e-05, "loss": 2.4943, "step": 45 }, { "epoch": 2.78, "learning_rate": 1.6049351202038163e-05, "loss": 2.2804, "step": 50 }, { "epoch": 3.0, "eval_loss": 2.632376194000244, "eval_runtime": 1.2528, "eval_samples_per_second": 22.349, "eval_steps_per_second": 3.193, "step": 54 }, { "epoch": 3.06, "learning_rate": 1.0421881433625223e-06, "loss": 2.6341, "step": 55 }, { "epoch": 3.33, "learning_rate": 3.4300000000000014e-05, "loss": 2.3676, "step": 60 }, { "epoch": 3.61, "learning_rate": 9.206258183214083e-05, "loss": 2.2182, "step": 65 }, { "epoch": 3.89, "learning_rate": 0.00013306291378591332, "loss": 2.4212, "step": 70 }, { "epoch": 4.0, "eval_loss": 2.5963516235351562, "eval_runtime": 1.2572, "eval_samples_per_second": 22.272, "eval_steps_per_second": 3.182, "step": 72 }, { "epoch": 4.17, "learning_rate": 0.00012800934269961248, "loss": 2.2587, "step": 75 }, { "epoch": 4.44, "learning_rate": 8.051226498795145e-05, "loss": 2.1767, "step": 80 }, { "epoch": 4.72, "learning_rate": 2.4504769975503385e-05, "loss": 2.1842, "step": 85 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 2.2178, "step": 90 }, { "epoch": 5.0, "eval_loss": 2.578657388687134, "eval_runtime": 1.2377, "eval_samples_per_second": 22.622, "eval_steps_per_second": 3.232, "step": 90 }, { "epoch": 5.28, "learning_rate": 2.4504769975503317e-05, "loss": 2.0438, "step": 95 }, { "epoch": 5.56, "learning_rate": 8.051226498795124e-05, "loss": 2.1724, "step": 100 }, { "epoch": 5.83, "learning_rate": 0.00012800934269961248, "loss": 1.9973, "step": 105 }, { "epoch": 6.0, "eval_loss": 2.578360080718994, "eval_runtime": 1.2443, "eval_samples_per_second": 22.503, "eval_steps_per_second": 3.215, "step": 108 } ], "max_steps": 198, "num_train_epochs": 11, "total_flos": 110787821568000.0, "trial_name": null, "trial_params": null }