|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"global_step": 4992, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.4750000000000002e-05, |
|
"loss": 1.0323, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.975e-05, |
|
"loss": 0.8721, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6721768379211426, |
|
"eval_loss": 0.8143573999404907, |
|
"eval_runtime": 184.4068, |
|
"eval_samples_per_second": 54.456, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.38001002004008e-05, |
|
"loss": 0.7668, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.7537575150300605e-05, |
|
"loss": 0.6107, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7067317366600037, |
|
"eval_loss": 0.8285614252090454, |
|
"eval_runtime": 184.2958, |
|
"eval_samples_per_second": 54.488, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.12875751503006e-05, |
|
"loss": 0.5935, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.5025050100200405e-05, |
|
"loss": 0.2335, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.8762525050100204e-05, |
|
"loss": 0.2216, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.731925904750824, |
|
"eval_loss": 1.0460597276687622, |
|
"eval_runtime": 184.6236, |
|
"eval_samples_per_second": 54.392, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1372, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 6.2374749498998e-06, |
|
"loss": 0.0771, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7498506307601929, |
|
"eval_loss": 1.6021397113800049, |
|
"eval_runtime": 184.1793, |
|
"eval_samples_per_second": 54.523, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 4992, |
|
"total_flos": 0, |
|
"train_runtime": 4895.7804, |
|
"train_samples_per_second": 1.02 |
|
} |
|
], |
|
"max_steps": 4992, |
|
"num_train_epochs": 4, |
|
"total_flos": 0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|