{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 91, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 2.857142857142857e-05, "loss": 1.8496, "step": 2 }, { "epoch": 0.09, "learning_rate": 5.714285714285714e-05, "loss": 1.9673, "step": 4 }, { "epoch": 0.13, "learning_rate": 8.571428571428571e-05, "loss": 1.8491, "step": 6 }, { "epoch": 0.18, "learning_rate": 0.00011428571428571428, "loss": 1.9002, "step": 8 }, { "epoch": 0.22, "learning_rate": 0.00014285714285714287, "loss": 1.873, "step": 10 }, { "epoch": 0.26, "learning_rate": 0.00017142857142857143, "loss": 1.9039, "step": 12 }, { "epoch": 0.31, "learning_rate": 0.0002, "loss": 1.8956, "step": 14 }, { "epoch": 0.35, "learning_rate": 0.0001966942148760331, "loss": 1.8618, "step": 16 }, { "epoch": 0.4, "learning_rate": 0.0001933884297520661, "loss": 1.7929, "step": 18 }, { "epoch": 0.44, "learning_rate": 0.0001900826446280992, "loss": 1.852, "step": 20 }, { "epoch": 0.48, "learning_rate": 0.00018677685950413224, "loss": 1.8218, "step": 22 }, { "epoch": 0.53, "learning_rate": 0.00018347107438016532, "loss": 1.8421, "step": 24 }, { "epoch": 0.57, "learning_rate": 0.00018016528925619835, "loss": 1.7531, "step": 26 }, { "epoch": 0.62, "learning_rate": 0.00017685950413223143, "loss": 1.7133, "step": 28 }, { "epoch": 0.66, "learning_rate": 0.00017520661157024794, "loss": 2.2706, "step": 30 }, { "epoch": 0.7, "learning_rate": 0.00017190082644628102, "loss": 1.7739, "step": 32 }, { "epoch": 0.75, "learning_rate": 0.00016859504132231404, "loss": 1.7082, "step": 34 }, { "epoch": 0.79, "learning_rate": 0.00016528925619834712, "loss": 1.7831, "step": 36 }, { "epoch": 0.84, "learning_rate": 0.00016198347107438017, "loss": 1.778, "step": 38 }, { "epoch": 0.88, "learning_rate": 0.00015867768595041322, "loss": 1.7482, "step": 40 }, { "epoch": 0.92, "learning_rate": 0.00015537190082644627, "loss": 1.8466, "step": 42 }, { "epoch": 0.97, "learning_rate": 0.00015206611570247935, "loss": 1.7918, "step": 44 }, { "epoch": 1.01, "learning_rate": 0.0001487603305785124, "loss": 1.7058, "step": 46 }, { "epoch": 1.05, "learning_rate": 0.00014545454545454546, "loss": 1.7294, "step": 48 }, { "epoch": 1.1, "learning_rate": 0.0001421487603305785, "loss": 1.7285, "step": 50 }, { "epoch": 1.14, "learning_rate": 0.0001388429752066116, "loss": 1.6392, "step": 52 }, { "epoch": 1.19, "learning_rate": 0.00013553719008264464, "loss": 1.712, "step": 54 }, { "epoch": 1.23, "learning_rate": 0.0001322314049586777, "loss": 1.6992, "step": 56 }, { "epoch": 1.27, "learning_rate": 0.00012892561983471074, "loss": 1.7677, "step": 58 }, { "epoch": 1.32, "learning_rate": 0.00012561983471074382, "loss": 1.7096, "step": 60 }, { "epoch": 1.36, "learning_rate": 0.00012231404958677685, "loss": 1.7602, "step": 62 }, { "epoch": 1.41, "learning_rate": 0.00011900826446280992, "loss": 1.7128, "step": 64 }, { "epoch": 1.45, "learning_rate": 0.00011570247933884298, "loss": 1.6605, "step": 66 }, { "epoch": 1.49, "learning_rate": 0.00011239669421487604, "loss": 1.6559, "step": 68 }, { "epoch": 1.54, "learning_rate": 0.00010909090909090909, "loss": 1.6775, "step": 70 }, { "epoch": 1.58, "learning_rate": 0.00010578512396694216, "loss": 1.8318, "step": 72 }, { "epoch": 1.63, "learning_rate": 0.00010247933884297521, "loss": 1.7568, "step": 74 }, { "epoch": 1.67, "learning_rate": 9.917355371900827e-05, "loss": 1.7349, "step": 76 }, { "epoch": 1.71, "learning_rate": 9.586776859504133e-05, "loss": 1.7956, "step": 78 }, { "epoch": 1.76, "learning_rate": 9.256198347107439e-05, "loss": 1.684, "step": 80 }, { "epoch": 1.8, "learning_rate": 8.925619834710744e-05, "loss": 1.6579, "step": 82 }, { "epoch": 1.85, "learning_rate": 8.595041322314051e-05, "loss": 1.7188, "step": 84 }, { "epoch": 1.89, "learning_rate": 8.264462809917356e-05, "loss": 1.6533, "step": 86 }, { "epoch": 1.93, "learning_rate": 8.099173553719009e-05, "loss": 1.9246, "step": 88 }, { "epoch": 1.98, "learning_rate": 7.768595041322314e-05, "loss": 1.7969, "step": 90 } ], "logging_steps": 2, "max_steps": 135, "num_train_epochs": 3, "save_steps": 500, "total_flos": 5.946469637318246e+16, "trial_name": null, "trial_params": null }