|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 48.0, |
|
"eval_steps": 500, |
|
"global_step": 57180, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002399244332493703, |
|
"loss": 1.697, |
|
"step": 1906 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0004798488664987406, |
|
"loss": 1.6518, |
|
"step": 3812 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0007192695214105793, |
|
"loss": 1.6587, |
|
"step": 5718 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0009591939546599496, |
|
"loss": 1.7073, |
|
"step": 7624 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.001198866498740554, |
|
"loss": 1.7625, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0014389168765743072, |
|
"loss": 1.8284, |
|
"step": 11436 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 0.001480142737195634, |
|
"loss": 1.8887, |
|
"step": 13342 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.0014534844668345928, |
|
"loss": 1.9438, |
|
"step": 15248 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 0.0014268122026308424, |
|
"loss": 1.9359, |
|
"step": 17154 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0014001679261125106, |
|
"loss": 2.0, |
|
"step": 19060 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 0.0013735236495941786, |
|
"loss": 2.5228, |
|
"step": 20966 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 33.1374, |
|
"step": 22872 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 24778 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 26684 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 28590 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 30496 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 32402 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 34308 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 36214 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 38120 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 40026 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 41932 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 43838 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 45744 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 47650 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 49556 |
|
}, |
|
{ |
|
"epoch": 43.2, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 51462 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 53368 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 55274 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 0.0013571228659389869, |
|
"loss": 0.0, |
|
"step": 57180 |
|
} |
|
], |
|
"logging_steps": 1906, |
|
"max_steps": 119100, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 2.0003746684323496e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|