{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.876543209876543, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9876543209876543, "grad_norm": 8.28109359741211, "learning_rate": 4.5e-05, "loss": 0.8062, "step": 20 }, { "epoch": 0.9876543209876543, "eval_accuracy": 0.818815331010453, "eval_loss": 0.4432358145713806, "eval_runtime": 2.3752, "eval_samples_per_second": 120.833, "eval_steps_per_second": 3.789, "step": 20 }, { "epoch": 1.9753086419753085, "grad_norm": 9.780616760253906, "learning_rate": 4e-05, "loss": 0.4153, "step": 40 }, { "epoch": 1.9753086419753085, "eval_accuracy": 0.8536585365853658, "eval_loss": 0.3407208323478699, "eval_runtime": 2.4373, "eval_samples_per_second": 117.752, "eval_steps_per_second": 3.693, "step": 40 }, { "epoch": 2.962962962962963, "grad_norm": 6.261844635009766, "learning_rate": 3.5e-05, "loss": 0.3213, "step": 60 }, { "epoch": 2.962962962962963, "eval_accuracy": 0.9372822299651568, "eval_loss": 0.1876000016927719, "eval_runtime": 2.4811, "eval_samples_per_second": 115.675, "eval_steps_per_second": 3.627, "step": 60 }, { "epoch": 4.0, "grad_norm": 10.462350845336914, "learning_rate": 2.975e-05, "loss": 0.2633, "step": 81 }, { "epoch": 4.0, "eval_accuracy": 0.9442508710801394, "eval_loss": 0.15536989271640778, "eval_runtime": 2.8656, "eval_samples_per_second": 100.154, "eval_steps_per_second": 3.141, "step": 81 }, { "epoch": 4.987654320987654, "grad_norm": 6.831620693206787, "learning_rate": 2.4750000000000002e-05, "loss": 0.2201, "step": 101 }, { "epoch": 4.987654320987654, "eval_accuracy": 0.9547038327526133, "eval_loss": 0.13280798494815826, "eval_runtime": 2.4786, "eval_samples_per_second": 115.792, "eval_steps_per_second": 3.631, "step": 101 }, { "epoch": 5.9753086419753085, "grad_norm": 8.320969581604004, "learning_rate": 1.9750000000000002e-05, "loss": 0.2087, "step": 121 }, { "epoch": 5.9753086419753085, "eval_accuracy": 0.9721254355400697, "eval_loss": 0.08554696291685104, "eval_runtime": 2.6901, "eval_samples_per_second": 106.686, "eval_steps_per_second": 3.346, "step": 121 }, { "epoch": 6.962962962962963, "grad_norm": 5.462257385253906, "learning_rate": 1.475e-05, "loss": 0.1797, "step": 141 }, { "epoch": 6.962962962962963, "eval_accuracy": 0.9442508710801394, "eval_loss": 0.12809309363365173, "eval_runtime": 2.4948, "eval_samples_per_second": 115.041, "eval_steps_per_second": 3.608, "step": 141 }, { "epoch": 8.0, "grad_norm": 6.069087982177734, "learning_rate": 9.5e-06, "loss": 0.1478, "step": 162 }, { "epoch": 8.0, "eval_accuracy": 0.9721254355400697, "eval_loss": 0.08397921919822693, "eval_runtime": 2.5152, "eval_samples_per_second": 114.108, "eval_steps_per_second": 3.578, "step": 162 }, { "epoch": 8.987654320987655, "grad_norm": 12.428985595703125, "learning_rate": 4.5e-06, "loss": 0.1545, "step": 182 }, { "epoch": 8.987654320987655, "eval_accuracy": 0.9686411149825784, "eval_loss": 0.08367497473955154, "eval_runtime": 2.4429, "eval_samples_per_second": 117.485, "eval_steps_per_second": 3.684, "step": 182 }, { "epoch": 9.876543209876543, "grad_norm": 2.292888641357422, "learning_rate": 0.0, "loss": 0.1315, "step": 200 }, { "epoch": 9.876543209876543, "eval_accuracy": 0.9721254355400697, "eval_loss": 0.07933783531188965, "eval_runtime": 2.7805, "eval_samples_per_second": 103.217, "eval_steps_per_second": 3.237, "step": 200 }, { "epoch": 9.876543209876543, "step": 200, "total_flos": 6.343354306682266e+17, "train_loss": 0.2855896496772766, "train_runtime": 464.8772, "train_samples_per_second": 55.563, "train_steps_per_second": 0.43 }, { "epoch": 9.876543209876543, "eval_accuracy": 0.9562524196670538, "eval_loss": 0.11971130222082138, "eval_runtime": 24.8934, "eval_samples_per_second": 103.763, "eval_steps_per_second": 3.254, "step": 200 }, { "epoch": 9.876543209876543, "eval_accuracy": 0.9562524196670538, "eval_loss": 0.12554492056369781, "eval_runtime": 27.9535, "eval_samples_per_second": 92.403, "eval_steps_per_second": 2.898, "step": 200 }, { "epoch": 9.876543209876543, "eval_accuracy": 0.9721254355400697, "eval_loss": 0.07933783531188965, "eval_runtime": 2.5048, "eval_samples_per_second": 114.582, "eval_steps_per_second": 3.593, "step": 200 }, { "epoch": 9.876543209876543, "eval_accuracy": 0.9519938056523423, "eval_loss": 0.1284445822238922, "eval_runtime": 22.1837, "eval_samples_per_second": 116.437, "eval_steps_per_second": 3.651, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.343354306682266e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }