|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 235, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0005454545454545455, |
|
"loss": 1.1356, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.001090909090909091, |
|
"loss": 0.707, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.0016363636363636365, |
|
"loss": 0.4786, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00197979797979798, |
|
"loss": 0.3701, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.0019191919191919192, |
|
"loss": 0.3513, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.0018585858585858585, |
|
"loss": 0.2933, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.001797979797979798, |
|
"loss": 0.2733, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.0017373737373737375, |
|
"loss": 0.2281, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.0016767676767676766, |
|
"loss": 0.1794, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 0.0016161616161616162, |
|
"loss": 0.15, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.0015555555555555557, |
|
"loss": 0.1268, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 0.001494949494949495, |
|
"loss": 0.1058, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.0014343434343434343, |
|
"loss": 0.091, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 0.0013737373737373738, |
|
"loss": 0.0752, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 0.0013131313131313131, |
|
"loss": 0.07, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 0.0012525252525252527, |
|
"loss": 0.0628, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 0.0011919191919191917, |
|
"loss": 0.0569, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"learning_rate": 0.0011313131313131313, |
|
"loss": 0.0447, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 0.0010707070707070708, |
|
"loss": 0.0399, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 0.00101010101010101, |
|
"loss": 0.0355, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 0.0009494949494949495, |
|
"loss": 0.0307, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"learning_rate": 0.0008888888888888888, |
|
"loss": 0.0253, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 17.62, |
|
"learning_rate": 0.0008282828282828283, |
|
"loss": 0.021, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 18.38, |
|
"learning_rate": 0.0007676767676767677, |
|
"loss": 0.0187, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 0.0007070707070707071, |
|
"loss": 0.0157, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 0.0006464646464646465, |
|
"loss": 0.013, |
|
"step": 234 |
|
} |
|
], |
|
"logging_steps": 9, |
|
"max_steps": 330, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 6.559465734144e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|