{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.991869918699187, "eval_steps": 500, "global_step": 61, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016260162601626018, "grad_norm": 44.75566557733456, "learning_rate": 2.8571428571428573e-06, "loss": 1.819, "step": 1 }, { "epoch": 0.08130081300813008, "grad_norm": 22.155759748420145, "learning_rate": 1.4285714285714287e-05, "loss": 1.6903, "step": 5 }, { "epoch": 0.16260162601626016, "grad_norm": 13.109775732485431, "learning_rate": 1.9848077530122083e-05, "loss": 1.4186, "step": 10 }, { "epoch": 0.24390243902439024, "grad_norm": 13.52084711445301, "learning_rate": 1.8936326403234125e-05, "loss": 1.2585, "step": 15 }, { "epoch": 0.3252032520325203, "grad_norm": 12.039835289226906, "learning_rate": 1.7273736415730488e-05, "loss": 1.0465, "step": 20 }, { "epoch": 0.4065040650406504, "grad_norm": 8.6077622745051, "learning_rate": 1.5000000000000002e-05, "loss": 0.8583, "step": 25 }, { "epoch": 0.4878048780487805, "grad_norm": 5.273120012817056, "learning_rate": 1.2306158707424402e-05, "loss": 0.7416, "step": 30 }, { "epoch": 0.5691056910569106, "grad_norm": 2.4961015181305304, "learning_rate": 9.418551710895243e-06, "loss": 0.699, "step": 35 }, { "epoch": 0.6504065040650406, "grad_norm": 1.9961638134484518, "learning_rate": 6.579798566743314e-06, "loss": 0.6797, "step": 40 }, { "epoch": 0.7317073170731707, "grad_norm": 1.6758021856566212, "learning_rate": 4.028414082972141e-06, "loss": 0.6653, "step": 45 }, { "epoch": 0.8130081300813008, "grad_norm": 1.466022130424723, "learning_rate": 1.9787680724495617e-06, "loss": 0.6514, "step": 50 }, { "epoch": 0.8943089430894309, "grad_norm": 1.2570236883108519, "learning_rate": 6.030737921409169e-07, "loss": 0.6458, "step": 55 }, { "epoch": 0.975609756097561, "grad_norm": 1.2304481698504517, "learning_rate": 1.6918417287318245e-08, "loss": 0.6421, "step": 60 }, { "epoch": 0.991869918699187, "eval_loss": 2.0331625938415527, "eval_runtime": 0.7095, "eval_samples_per_second": 33.828, "eval_steps_per_second": 1.409, "step": 61 }, { "epoch": 0.991869918699187, "step": 61, "total_flos": 25439628165120.0, "train_loss": 0.9141011159928119, "train_runtime": 665.6488, "train_samples_per_second": 47.124, "train_steps_per_second": 0.092 } ], "logging_steps": 5, "max_steps": 61, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 25439628165120.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }