|
{ |
|
"best_metric": 0.915035799522673, |
|
"best_model_checkpoint": "./save_models/qnli/roberta-base_lr1e-05/checkpoint-2214", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 3690, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8865871121718377, |
|
"eval_loss": 0.28326648473739624, |
|
"eval_runtime": 8.5588, |
|
"eval_samples_per_second": 1223.884, |
|
"eval_steps_per_second": 4.79, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 9.198385236447522e-06, |
|
"loss": 0.4241, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9020525059665871, |
|
"eval_loss": 0.24534918367862701, |
|
"eval_runtime": 8.5693, |
|
"eval_samples_per_second": 1222.389, |
|
"eval_steps_per_second": 4.785, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 7.756632064590542e-06, |
|
"loss": 0.2508, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9043436754176611, |
|
"eval_loss": 0.2600307762622833, |
|
"eval_runtime": 8.557, |
|
"eval_samples_per_second": 1224.147, |
|
"eval_steps_per_second": 4.791, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9120763723150358, |
|
"eval_loss": 0.23106025159358978, |
|
"eval_runtime": 8.5249, |
|
"eval_samples_per_second": 1228.76, |
|
"eval_steps_per_second": 4.809, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 6.314878892733565e-06, |
|
"loss": 0.2044, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9064439140811456, |
|
"eval_loss": 0.2730174660682678, |
|
"eval_runtime": 8.491, |
|
"eval_samples_per_second": 1233.666, |
|
"eval_steps_per_second": 4.829, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 4.873125720876586e-06, |
|
"loss": 0.1703, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.915035799522673, |
|
"eval_loss": 0.2521490752696991, |
|
"eval_runtime": 8.662, |
|
"eval_samples_per_second": 1209.303, |
|
"eval_steps_per_second": 4.733, |
|
"step": 2214 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 3.431372549019608e-06, |
|
"loss": 0.1503, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9115990453460621, |
|
"eval_loss": 0.26341933012008667, |
|
"eval_runtime": 8.515, |
|
"eval_samples_per_second": 1230.176, |
|
"eval_steps_per_second": 4.815, |
|
"step": 2583 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9114081145584726, |
|
"eval_loss": 0.2649821639060974, |
|
"eval_runtime": 8.6484, |
|
"eval_samples_per_second": 1211.201, |
|
"eval_steps_per_second": 4.741, |
|
"step": 2952 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 1.9896193771626298e-06, |
|
"loss": 0.1338, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9138902147971361, |
|
"eval_loss": 0.2706736922264099, |
|
"eval_runtime": 8.5199, |
|
"eval_samples_per_second": 1229.479, |
|
"eval_steps_per_second": 4.812, |
|
"step": 3321 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 5.478662053056517e-07, |
|
"loss": 0.1216, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9143675417661098, |
|
"eval_loss": 0.2753640413284302, |
|
"eval_runtime": 8.4934, |
|
"eval_samples_per_second": 1233.316, |
|
"eval_steps_per_second": 4.827, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 3690, |
|
"total_flos": 6.001437167232744e+16, |
|
"train_loss": 0.20311757839792144, |
|
"train_runtime": 1451.9977, |
|
"train_samples_per_second": 649.23, |
|
"train_steps_per_second": 2.541 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3690, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 6.001437167232744e+16, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|