{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.17173278378842521, "eval_steps": 50, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.3773773773773775e-05, "loss": 0.9928, "step": 50 }, { "epoch": 0.01, "eval_loss": 0.8941472172737122, "eval_runtime": 5245.4303, "eval_samples_per_second": 0.987, "eval_steps_per_second": 0.123, "step": 50 }, { "epoch": 0.02, "learning_rate": 2.2522522522522523e-05, "loss": 0.877, "step": 100 }, { "epoch": 0.02, "eval_loss": 0.8646272420883179, "eval_runtime": 5219.884, "eval_samples_per_second": 0.992, "eval_steps_per_second": 0.124, "step": 100 }, { "epoch": 0.03, "learning_rate": 2.1271271271271275e-05, "loss": 0.8642, "step": 150 }, { "epoch": 0.03, "eval_loss": 0.8599761128425598, "eval_runtime": 5214.4761, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.124, "step": 150 }, { "epoch": 0.03, "learning_rate": 2.0020020020020023e-05, "loss": 0.8576, "step": 200 }, { "epoch": 0.03, "eval_loss": 0.8569617867469788, "eval_runtime": 5214.0664, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.124, "step": 200 }, { "epoch": 0.04, "learning_rate": 1.8768768768768768e-05, "loss": 0.8349, "step": 250 }, { "epoch": 0.04, "eval_loss": 0.8535052537918091, "eval_runtime": 5222.9322, "eval_samples_per_second": 0.991, "eval_steps_per_second": 0.124, "step": 250 }, { "epoch": 0.05, "learning_rate": 1.7517517517517516e-05, "loss": 0.85, "step": 300 }, { "epoch": 0.05, "eval_loss": 0.8515381813049316, "eval_runtime": 5214.0267, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.124, "step": 300 }, { "epoch": 0.06, "learning_rate": 1.6266266266266268e-05, "loss": 0.8548, "step": 350 }, { "epoch": 0.06, "eval_loss": 0.8497709035873413, "eval_runtime": 5213.6211, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.124, "step": 350 }, { "epoch": 0.07, "learning_rate": 1.5015015015015016e-05, "loss": 0.8366, "step": 400 }, { "epoch": 0.07, "eval_loss": 0.8475283980369568, "eval_runtime": 5213.1999, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.124, "step": 400 }, { "epoch": 0.08, "learning_rate": 1.3763763763763765e-05, "loss": 0.8408, "step": 450 }, { "epoch": 0.08, "eval_loss": 0.8464268445968628, "eval_runtime": 5213.2855, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.124, "step": 450 }, { "epoch": 0.09, "learning_rate": 1.2512512512512515e-05, "loss": 0.8645, "step": 500 }, { "epoch": 0.09, "eval_loss": 0.8458148241043091, "eval_runtime": 5213.7104, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.124, "step": 500 }, { "epoch": 0.09, "learning_rate": 1.1261261261261261e-05, "loss": 0.8507, "step": 550 }, { "epoch": 0.09, "eval_loss": 0.8435949087142944, "eval_runtime": 5210.7804, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.124, "step": 550 }, { "epoch": 0.1, "learning_rate": 1.0010010010010011e-05, "loss": 0.8592, "step": 600 }, { "epoch": 0.1, "eval_loss": 0.8434337973594666, "eval_runtime": 5210.4901, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.124, "step": 600 }, { "epoch": 0.11, "learning_rate": 8.758758758758758e-06, "loss": 0.8326, "step": 650 }, { "epoch": 0.11, "eval_loss": 0.8415650129318237, "eval_runtime": 5212.5986, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.124, "step": 650 }, { "epoch": 0.12, "learning_rate": 7.507507507507508e-06, "loss": 0.8272, "step": 700 }, { "epoch": 0.12, "eval_loss": 0.8408710360527039, "eval_runtime": 5217.8607, "eval_samples_per_second": 0.992, "eval_steps_per_second": 0.124, "step": 700 }, { "epoch": 0.13, "learning_rate": 6.256256256256257e-06, "loss": 0.82, "step": 750 }, { "epoch": 0.13, "eval_loss": 0.8401119709014893, "eval_runtime": 5220.4921, "eval_samples_per_second": 0.991, "eval_steps_per_second": 0.124, "step": 750 }, { "epoch": 0.14, "learning_rate": 5.005005005005006e-06, "loss": 0.826, "step": 800 }, { "epoch": 0.14, "eval_loss": 0.8393945097923279, "eval_runtime": 5287.3794, "eval_samples_per_second": 0.979, "eval_steps_per_second": 0.122, "step": 800 }, { "epoch": 0.15, "learning_rate": 3.753753753753754e-06, "loss": 0.8468, "step": 850 }, { "epoch": 0.15, "eval_loss": 0.8389515280723572, "eval_runtime": 5212.07, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.124, "step": 850 }, { "epoch": 0.15, "learning_rate": 2.502502502502503e-06, "loss": 0.8438, "step": 900 }, { "epoch": 0.15, "eval_loss": 0.8384743928909302, "eval_runtime": 5211.9686, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.124, "step": 900 }, { "epoch": 0.16, "learning_rate": 1.2512512512512514e-06, "loss": 0.8384, "step": 950 }, { "epoch": 0.16, "eval_loss": 0.838046669960022, "eval_runtime": 5209.2343, "eval_samples_per_second": 0.994, "eval_steps_per_second": 0.124, "step": 950 }, { "epoch": 0.17, "learning_rate": 0.0, "loss": 0.8527, "step": 1000 }, { "epoch": 0.17, "eval_loss": 0.8379368782043457, "eval_runtime": 5209.3452, "eval_samples_per_second": 0.994, "eval_steps_per_second": 0.124, "step": 1000 } ], "logging_steps": 50, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "total_flos": 1.76841553870848e+17, "trial_name": null, "trial_params": null }