|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.876543209876543, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9876543209876543, |
|
"grad_norm": 8.28109359741211, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.8062, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9876543209876543, |
|
"eval_accuracy": 0.818815331010453, |
|
"eval_loss": 0.4432358145713806, |
|
"eval_runtime": 2.3752, |
|
"eval_samples_per_second": 120.833, |
|
"eval_steps_per_second": 3.789, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.9753086419753085, |
|
"grad_norm": 9.780616760253906, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4153, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.9753086419753085, |
|
"eval_accuracy": 0.8536585365853658, |
|
"eval_loss": 0.3407208323478699, |
|
"eval_runtime": 2.4373, |
|
"eval_samples_per_second": 117.752, |
|
"eval_steps_per_second": 3.693, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"grad_norm": 6.261844635009766, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3213, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"eval_accuracy": 0.9372822299651568, |
|
"eval_loss": 0.1876000016927719, |
|
"eval_runtime": 2.4811, |
|
"eval_samples_per_second": 115.675, |
|
"eval_steps_per_second": 3.627, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 10.462350845336914, |
|
"learning_rate": 2.975e-05, |
|
"loss": 0.2633, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9442508710801394, |
|
"eval_loss": 0.15536989271640778, |
|
"eval_runtime": 2.8656, |
|
"eval_samples_per_second": 100.154, |
|
"eval_steps_per_second": 3.141, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 4.987654320987654, |
|
"grad_norm": 6.831620693206787, |
|
"learning_rate": 2.4750000000000002e-05, |
|
"loss": 0.2201, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 4.987654320987654, |
|
"eval_accuracy": 0.9547038327526133, |
|
"eval_loss": 0.13280798494815826, |
|
"eval_runtime": 2.4786, |
|
"eval_samples_per_second": 115.792, |
|
"eval_steps_per_second": 3.631, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 5.9753086419753085, |
|
"grad_norm": 8.320969581604004, |
|
"learning_rate": 1.9750000000000002e-05, |
|
"loss": 0.2087, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 5.9753086419753085, |
|
"eval_accuracy": 0.9721254355400697, |
|
"eval_loss": 0.08554696291685104, |
|
"eval_runtime": 2.6901, |
|
"eval_samples_per_second": 106.686, |
|
"eval_steps_per_second": 3.346, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 6.962962962962963, |
|
"grad_norm": 5.462257385253906, |
|
"learning_rate": 1.475e-05, |
|
"loss": 0.1797, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 6.962962962962963, |
|
"eval_accuracy": 0.9442508710801394, |
|
"eval_loss": 0.12809309363365173, |
|
"eval_runtime": 2.4948, |
|
"eval_samples_per_second": 115.041, |
|
"eval_steps_per_second": 3.608, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 6.069087982177734, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.1478, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9721254355400697, |
|
"eval_loss": 0.08397921919822693, |
|
"eval_runtime": 2.5152, |
|
"eval_samples_per_second": 114.108, |
|
"eval_steps_per_second": 3.578, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 8.987654320987655, |
|
"grad_norm": 12.428985595703125, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.1545, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 8.987654320987655, |
|
"eval_accuracy": 0.9686411149825784, |
|
"eval_loss": 0.08367497473955154, |
|
"eval_runtime": 2.4429, |
|
"eval_samples_per_second": 117.485, |
|
"eval_steps_per_second": 3.684, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 9.876543209876543, |
|
"grad_norm": 2.292888641357422, |
|
"learning_rate": 0.0, |
|
"loss": 0.1315, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.876543209876543, |
|
"eval_accuracy": 0.9721254355400697, |
|
"eval_loss": 0.07933783531188965, |
|
"eval_runtime": 2.7805, |
|
"eval_samples_per_second": 103.217, |
|
"eval_steps_per_second": 3.237, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.876543209876543, |
|
"step": 200, |
|
"total_flos": 6.343354306682266e+17, |
|
"train_loss": 0.2855896496772766, |
|
"train_runtime": 464.8772, |
|
"train_samples_per_second": 55.563, |
|
"train_steps_per_second": 0.43 |
|
}, |
|
{ |
|
"epoch": 9.876543209876543, |
|
"eval_accuracy": 0.9562524196670538, |
|
"eval_loss": 0.11971130222082138, |
|
"eval_runtime": 24.8934, |
|
"eval_samples_per_second": 103.763, |
|
"eval_steps_per_second": 3.254, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.876543209876543, |
|
"eval_accuracy": 0.9562524196670538, |
|
"eval_loss": 0.12554492056369781, |
|
"eval_runtime": 27.9535, |
|
"eval_samples_per_second": 92.403, |
|
"eval_steps_per_second": 2.898, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.876543209876543, |
|
"eval_accuracy": 0.9721254355400697, |
|
"eval_loss": 0.07933783531188965, |
|
"eval_runtime": 2.5048, |
|
"eval_samples_per_second": 114.582, |
|
"eval_steps_per_second": 3.593, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.876543209876543, |
|
"eval_accuracy": 0.9519938056523423, |
|
"eval_loss": 0.1284445822238922, |
|
"eval_runtime": 22.1837, |
|
"eval_samples_per_second": 116.437, |
|
"eval_steps_per_second": 3.651, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.343354306682266e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|