|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.1847133757961785, |
|
"eval_steps": 25, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.3797595190380762e-05, |
|
"loss": 1.5869, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.343457579612732, |
|
"eval_runtime": 45.0202, |
|
"eval_samples_per_second": 6.775, |
|
"eval_steps_per_second": 0.444, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.2545090180360722e-05, |
|
"loss": 0.998, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.230006456375122, |
|
"eval_runtime": 45.0029, |
|
"eval_samples_per_second": 6.777, |
|
"eval_steps_per_second": 0.444, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.1292585170340683e-05, |
|
"loss": 0.8627, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.1211909055709839, |
|
"eval_runtime": 45.0426, |
|
"eval_samples_per_second": 6.771, |
|
"eval_steps_per_second": 0.444, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0040080160320643e-05, |
|
"loss": 0.8958, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.1045145988464355, |
|
"eval_runtime": 45.2025, |
|
"eval_samples_per_second": 6.747, |
|
"eval_steps_per_second": 0.442, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.87875751503006e-05, |
|
"loss": 0.8205, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.0842006206512451, |
|
"eval_runtime": 45.2525, |
|
"eval_samples_per_second": 6.74, |
|
"eval_steps_per_second": 0.442, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.7535070140280564e-05, |
|
"loss": 0.8141, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.0894731283187866, |
|
"eval_runtime": 45.1805, |
|
"eval_samples_per_second": 6.751, |
|
"eval_steps_per_second": 0.443, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.628256513026052e-05, |
|
"loss": 0.6384, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 1.1109672784805298, |
|
"eval_runtime": 45.2193, |
|
"eval_samples_per_second": 6.745, |
|
"eval_steps_per_second": 0.442, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.5030060120240483e-05, |
|
"loss": 0.5813, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 1.1189604997634888, |
|
"eval_runtime": 45.2591, |
|
"eval_samples_per_second": 6.739, |
|
"eval_steps_per_second": 0.442, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.3777555110220442e-05, |
|
"loss": 0.568, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 1.1027684211730957, |
|
"eval_runtime": 45.2328, |
|
"eval_samples_per_second": 6.743, |
|
"eval_steps_per_second": 0.442, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.25250501002004e-05, |
|
"loss": 0.5605, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 1.1073968410491943, |
|
"eval_runtime": 45.4023, |
|
"eval_samples_per_second": 6.718, |
|
"eval_steps_per_second": 0.441, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.1272545090180361e-05, |
|
"loss": 0.6342, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.091808557510376, |
|
"eval_runtime": 45.3362, |
|
"eval_samples_per_second": 6.728, |
|
"eval_steps_per_second": 0.441, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.0020040080160322e-05, |
|
"loss": 0.5254, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 1.1066254377365112, |
|
"eval_runtime": 45.2929, |
|
"eval_samples_per_second": 6.734, |
|
"eval_steps_per_second": 0.442, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 8.767535070140282e-06, |
|
"loss": 0.4553, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 1.1803815364837646, |
|
"eval_runtime": 45.0968, |
|
"eval_samples_per_second": 6.763, |
|
"eval_steps_per_second": 0.443, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.515030060120242e-06, |
|
"loss": 0.3712, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_loss": 1.1674922704696655, |
|
"eval_runtime": 45.0974, |
|
"eval_samples_per_second": 6.763, |
|
"eval_steps_per_second": 0.443, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.2625250501002e-06, |
|
"loss": 0.363, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 1.180160403251648, |
|
"eval_runtime": 45.1473, |
|
"eval_samples_per_second": 6.756, |
|
"eval_steps_per_second": 0.443, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 5.010020040080161e-06, |
|
"loss": 0.3732, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.1637239456176758, |
|
"eval_runtime": 45.18, |
|
"eval_samples_per_second": 6.751, |
|
"eval_steps_per_second": 0.443, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 3.757515030060121e-06, |
|
"loss": 0.4188, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 1.1796648502349854, |
|
"eval_runtime": 45.1853, |
|
"eval_samples_per_second": 6.75, |
|
"eval_steps_per_second": 0.443, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.5050100200400804e-06, |
|
"loss": 0.3395, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_loss": 1.179069995880127, |
|
"eval_runtime": 45.3296, |
|
"eval_samples_per_second": 6.728, |
|
"eval_steps_per_second": 0.441, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.2525050100200402e-06, |
|
"loss": 0.3736, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 1.176107406616211, |
|
"eval_runtime": 45.2686, |
|
"eval_samples_per_second": 6.738, |
|
"eval_steps_per_second": 0.442, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.0, |
|
"loss": 0.2538, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_loss": 1.2055637836456299, |
|
"eval_runtime": 45.3063, |
|
"eval_samples_per_second": 6.732, |
|
"eval_steps_per_second": 0.441, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 25, |
|
"total_flos": 6.571164044781158e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|