|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 328, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019393939393939395, |
|
"loss": 0.5722, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00017898305084745764, |
|
"loss": 0.6073, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00015728813559322036, |
|
"loss": 0.6517, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00013559322033898305, |
|
"loss": 0.4895, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00011389830508474577, |
|
"loss": 0.6097, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.220338983050847e-05, |
|
"loss": 0.6041, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 7.05084745762712e-05, |
|
"loss": 0.4871, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.88135593220339e-05, |
|
"loss": 0.601, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.711864406779661e-05, |
|
"loss": 0.5946, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.423728813559322e-06, |
|
"loss": 0.5771, |
|
"step": 320 |
|
} |
|
], |
|
"logging_steps": 32, |
|
"max_steps": 328, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 1.016694903743447e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|