{ "best_metric": 1.3409229516983032, "best_model_checkpoint": "results/checkpoint-2000", "epoch": 1.4553392759687103, "eval_steps": 250, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18191740949608878, "grad_norm": 0.2860318124294281, "learning_rate": 2.976310216494586e-05, "loss": 1.4647, "step": 250 }, { "epoch": 0.18191740949608878, "eval_loss": 1.352885127067566, "eval_runtime": 13.6056, "eval_samples_per_second": 163.241, "eval_steps_per_second": 20.433, "step": 250 }, { "epoch": 0.36383481899217757, "grad_norm": 0.2909524440765381, "learning_rate": 2.8342402320213494e-05, "loss": 1.4643, "step": 500 }, { "epoch": 0.36383481899217757, "eval_loss": 1.3520773649215698, "eval_runtime": 13.6233, "eval_samples_per_second": 163.029, "eval_steps_per_second": 20.406, "step": 500 }, { "epoch": 0.5457522284882663, "grad_norm": 0.2827763557434082, "learning_rate": 2.5756526053283042e-05, "loss": 1.4622, "step": 750 }, { "epoch": 0.5457522284882663, "eval_loss": 1.350306749343872, "eval_runtime": 13.6314, "eval_samples_per_second": 162.932, "eval_steps_per_second": 20.394, "step": 750 }, { "epoch": 0.7276696379843551, "grad_norm": 0.29242751002311707, "learning_rate": 2.223129492047081e-05, "loss": 1.4625, "step": 1000 }, { "epoch": 0.7276696379843551, "eval_loss": 1.3474653959274292, "eval_runtime": 13.6357, "eval_samples_per_second": 162.882, "eval_steps_per_second": 20.388, "step": 1000 }, { "epoch": 0.9095870474804438, "grad_norm": 0.2826482653617859, "learning_rate": 1.8074563242173716e-05, "loss": 1.4591, "step": 1250 }, { "epoch": 0.9095870474804438, "eval_loss": 1.3454625606536865, "eval_runtime": 13.6252, "eval_samples_per_second": 163.006, "eval_steps_per_second": 20.403, "step": 1250 }, { "epoch": 1.0915044569765326, "grad_norm": 0.28281259536743164, "learning_rate": 1.3649333544377501e-05, "loss": 1.4496, "step": 1500 }, { "epoch": 1.0915044569765326, "eval_loss": 1.3437364101409912, "eval_runtime": 13.6378, "eval_samples_per_second": 162.856, "eval_steps_per_second": 20.384, "step": 1500 }, { "epoch": 1.2734218664726216, "grad_norm": 0.2805255651473999, "learning_rate": 9.34205597173652e-06, "loss": 1.4453, "step": 1750 }, { "epoch": 1.2734218664726216, "eval_loss": 1.3426544666290283, "eval_runtime": 13.6433, "eval_samples_per_second": 162.791, "eval_steps_per_second": 20.376, "step": 1750 }, { "epoch": 1.4553392759687103, "grad_norm": 0.2710939645767212, "learning_rate": 5.528880047481714e-06, "loss": 1.4438, "step": 2000 }, { "epoch": 1.4553392759687103, "eval_loss": 1.3409229516983032, "eval_runtime": 13.6488, "eval_samples_per_second": 162.725, "eval_steps_per_second": 20.368, "step": 2000 } ], "logging_steps": 250, "max_steps": 2748, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.67219584303104e+17, "train_batch_size": 40, "trial_name": null, "trial_params": null }