|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.895193977996526, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.8093032233159624e-05, |
|
"loss": 5.93, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.617062343176993e-05, |
|
"loss": 5.1165, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4240494113105579e-05, |
|
"loss": 4.748, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 4.125702381134033, |
|
"eval_runtime": 102.7952, |
|
"eval_samples_per_second": 299.518, |
|
"eval_steps_per_second": 37.443, |
|
"step": 1727 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2314225053078557e-05, |
|
"loss": 4.5216, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.0384095734414205e-05, |
|
"loss": 4.3356, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.457826674387185e-06, |
|
"loss": 4.1988, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 3.5067617893218994, |
|
"eval_runtime": 103.1393, |
|
"eval_samples_per_second": 298.519, |
|
"eval_steps_per_second": 37.318, |
|
"step": 3454 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 6.527697355722834e-06, |
|
"loss": 4.0849, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.597568037058483e-06, |
|
"loss": 3.9975, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.6674387183941326e-06, |
|
"loss": 3.9402, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 7.37309399729782e-07, |
|
"loss": 3.8973, |
|
"step": 5000 |
|
} |
|
], |
|
"max_steps": 5181, |
|
"num_train_epochs": 3, |
|
"total_flos": 2.74331103920128e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|