{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 1713, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.9883720930232557e-05, "loss": 0.8354, "step": 114 }, { "epoch": 0.4, "learning_rate": 2.890979883192732e-05, "loss": 0.5722, "step": 228 }, { "epoch": 0.6, "learning_rate": 2.6690460739779364e-05, "loss": 0.4889, "step": 342 }, { "epoch": 0.8, "learning_rate": 2.4471122647631407e-05, "loss": 0.457, "step": 456 }, { "epoch": 1.0, "learning_rate": 2.2251784555483453e-05, "loss": 0.3767, "step": 570 }, { "epoch": 1.2, "learning_rate": 2.0032446463335497e-05, "loss": 0.3405, "step": 684 }, { "epoch": 1.4, "learning_rate": 1.781310837118754e-05, "loss": 0.324, "step": 798 }, { "epoch": 1.6, "learning_rate": 1.5593770279039583e-05, "loss": 0.3289, "step": 912 }, { "epoch": 1.8, "learning_rate": 1.3374432186891629e-05, "loss": 0.3386, "step": 1026 }, { "epoch": 2.0, "learning_rate": 1.1155094094743672e-05, "loss": 0.3321, "step": 1140 }, { "epoch": 2.2, "learning_rate": 8.935756002595717e-06, "loss": 0.3055, "step": 1254 }, { "epoch": 2.4, "learning_rate": 6.716417910447762e-06, "loss": 0.304, "step": 1368 }, { "epoch": 2.6, "learning_rate": 4.4970798182998056e-06, "loss": 0.2991, "step": 1482 }, { "epoch": 2.8, "learning_rate": 2.2777417261518495e-06, "loss": 0.3181, "step": 1596 }, { "epoch": 2.99, "learning_rate": 5.840363400389358e-08, "loss": 0.3057, "step": 1710 } ], "logging_steps": 114, "max_steps": 1713, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.3925820288466944e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }