|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9446693657219973, |
|
"eval_steps": 50, |
|
"global_step": 700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.4132792949676514, |
|
"eval_runtime": 24.6579, |
|
"eval_samples_per_second": 4.056, |
|
"eval_steps_per_second": 0.527, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.3802086114883423, |
|
"eval_runtime": 24.6958, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.526, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.3620883226394653, |
|
"eval_runtime": 24.6721, |
|
"eval_samples_per_second": 4.053, |
|
"eval_steps_per_second": 0.527, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.34859037399292, |
|
"eval_runtime": 24.7313, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.526, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.3401516675949097, |
|
"eval_runtime": 24.6981, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.526, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.3350552320480347, |
|
"eval_runtime": 24.7526, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.525, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.3288078308105469, |
|
"eval_runtime": 24.7114, |
|
"eval_samples_per_second": 4.047, |
|
"eval_steps_per_second": 0.526, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.3192832469940186, |
|
"eval_runtime": 24.7246, |
|
"eval_samples_per_second": 4.045, |
|
"eval_steps_per_second": 0.526, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.3144173622131348, |
|
"eval_runtime": 24.6861, |
|
"eval_samples_per_second": 4.051, |
|
"eval_steps_per_second": 0.527, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.224606580829757e-06, |
|
"loss": 1.3853, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.3092302083969116, |
|
"eval_runtime": 24.7017, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.526, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.303202509880066, |
|
"eval_runtime": 24.6861, |
|
"eval_samples_per_second": 4.051, |
|
"eval_steps_per_second": 0.527, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 1.29935884475708, |
|
"eval_runtime": 24.6791, |
|
"eval_samples_per_second": 4.052, |
|
"eval_steps_per_second": 0.527, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 1.2970906496047974, |
|
"eval_runtime": 24.7065, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.526, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 1.2959811687469482, |
|
"eval_runtime": 24.7115, |
|
"eval_samples_per_second": 4.047, |
|
"eval_steps_per_second": 0.526, |
|
"step": 700 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 700, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"total_flos": 4.49645833728e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|