{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8075370121130552, "eval_steps": 20, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 1.9393939393939395e-05, "loss": 1.8863, "step": 20 }, { "epoch": 0.05, "eval_loss": 1.8873858451843262, "eval_runtime": 239.4635, "eval_samples_per_second": 1.424, "eval_steps_per_second": 0.359, "step": 20 }, { "epoch": 0.11, "learning_rate": 1.8585858585858588e-05, "loss": 1.8793, "step": 40 }, { "epoch": 0.11, "eval_loss": 1.8863250017166138, "eval_runtime": 239.6014, "eval_samples_per_second": 1.423, "eval_steps_per_second": 0.359, "step": 40 }, { "epoch": 0.16, "learning_rate": 1.7777777777777777e-05, "loss": 1.8959, "step": 60 }, { "epoch": 0.16, "eval_loss": 1.8841357231140137, "eval_runtime": 239.357, "eval_samples_per_second": 1.425, "eval_steps_per_second": 0.359, "step": 60 }, { "epoch": 0.22, "learning_rate": 1.6969696969696972e-05, "loss": 1.8621, "step": 80 }, { "epoch": 0.22, "eval_loss": 1.8818137645721436, "eval_runtime": 238.5328, "eval_samples_per_second": 1.43, "eval_steps_per_second": 0.361, "step": 80 }, { "epoch": 0.27, "learning_rate": 1.616161616161616e-05, "loss": 1.909, "step": 100 }, { "epoch": 0.27, "eval_loss": 1.879331111907959, "eval_runtime": 236.4968, "eval_samples_per_second": 1.442, "eval_steps_per_second": 0.364, "step": 100 }, { "epoch": 0.32, "learning_rate": 1.5353535353535354e-05, "loss": 1.8668, "step": 120 }, { "epoch": 0.32, "eval_loss": 1.876706838607788, "eval_runtime": 236.7299, "eval_samples_per_second": 1.44, "eval_steps_per_second": 0.363, "step": 120 }, { "epoch": 0.38, "learning_rate": 1.4545454545454546e-05, "loss": 1.8485, "step": 140 }, { "epoch": 0.38, "eval_loss": 1.8741997480392456, "eval_runtime": 235.9214, "eval_samples_per_second": 1.445, "eval_steps_per_second": 0.365, "step": 140 }, { "epoch": 0.43, "learning_rate": 1.3737373737373739e-05, "loss": 1.8689, "step": 160 }, { "epoch": 0.43, "eval_loss": 1.8719853162765503, "eval_runtime": 236.0536, "eval_samples_per_second": 1.445, "eval_steps_per_second": 0.364, "step": 160 }, { "epoch": 0.48, "learning_rate": 1.2929292929292931e-05, "loss": 1.8971, "step": 180 }, { "epoch": 0.48, "eval_loss": 1.8698476552963257, "eval_runtime": 238.0229, "eval_samples_per_second": 1.433, "eval_steps_per_second": 0.361, "step": 180 }, { "epoch": 0.54, "learning_rate": 1.2121212121212122e-05, "loss": 1.841, "step": 200 }, { "epoch": 0.54, "eval_loss": 1.8679039478302002, "eval_runtime": 240.0449, "eval_samples_per_second": 1.421, "eval_steps_per_second": 0.358, "step": 200 }, { "epoch": 0.59, "learning_rate": 1.1313131313131314e-05, "loss": 1.8491, "step": 220 }, { "epoch": 0.59, "eval_loss": 1.8661690950393677, "eval_runtime": 239.9828, "eval_samples_per_second": 1.421, "eval_steps_per_second": 0.358, "step": 220 }, { "epoch": 0.65, "learning_rate": 1.0505050505050507e-05, "loss": 1.8359, "step": 240 }, { "epoch": 0.65, "eval_loss": 1.8646190166473389, "eval_runtime": 239.8868, "eval_samples_per_second": 1.422, "eval_steps_per_second": 0.359, "step": 240 }, { "epoch": 0.7, "learning_rate": 9.696969696969698e-06, "loss": 1.8449, "step": 260 }, { "epoch": 0.7, "eval_loss": 1.8632733821868896, "eval_runtime": 240.1384, "eval_samples_per_second": 1.42, "eval_steps_per_second": 0.358, "step": 260 }, { "epoch": 0.75, "learning_rate": 8.888888888888888e-06, "loss": 1.8539, "step": 280 }, { "epoch": 0.75, "eval_loss": 1.862046241760254, "eval_runtime": 240.0518, "eval_samples_per_second": 1.421, "eval_steps_per_second": 0.358, "step": 280 }, { "epoch": 0.81, "learning_rate": 8.08080808080808e-06, "loss": 1.8268, "step": 300 }, { "epoch": 0.81, "eval_loss": 1.8610001802444458, "eval_runtime": 239.6398, "eval_samples_per_second": 1.423, "eval_steps_per_second": 0.359, "step": 300 } ], "logging_steps": 20, "max_steps": 500, "num_train_epochs": 2, "save_steps": 20, "total_flos": 2.089574637502464e+17, "trial_name": null, "trial_params": null }