|
{ |
|
"best_metric": 0.9868823000898472, |
|
"best_model_checkpoint": "pasha/checkpoint-1000", |
|
"epoch": 21.27659574468085, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.13, |
|
"eval_accuracy": 0.9566055930568949, |
|
"eval_f1": 0.9482633863965269, |
|
"eval_loss": 0.2661653161048889, |
|
"eval_precision": 0.9523982558139535, |
|
"eval_recall": 0.944164265129683, |
|
"eval_runtime": 15.4131, |
|
"eval_samples_per_second": 12.262, |
|
"eval_steps_per_second": 0.779, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"eval_accuracy": 0.9850530376084861, |
|
"eval_f1": 0.9795185052102047, |
|
"eval_loss": 0.1026068776845932, |
|
"eval_precision": 0.9770609318996416, |
|
"eval_recall": 0.9819884726224783, |
|
"eval_runtime": 15.204, |
|
"eval_samples_per_second": 12.431, |
|
"eval_steps_per_second": 0.789, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"eval_accuracy": 0.9884281581485053, |
|
"eval_f1": 0.9849137931034483, |
|
"eval_loss": 0.07217290997505188, |
|
"eval_precision": 0.9820916905444126, |
|
"eval_recall": 0.9877521613832853, |
|
"eval_runtime": 15.2143, |
|
"eval_samples_per_second": 12.423, |
|
"eval_steps_per_second": 0.789, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"eval_accuracy": 0.9891513982642237, |
|
"eval_f1": 0.9857785778577858, |
|
"eval_loss": 0.060767240822315216, |
|
"eval_precision": 0.9852464915437208, |
|
"eval_recall": 0.9863112391930836, |
|
"eval_runtime": 15.3275, |
|
"eval_samples_per_second": 12.331, |
|
"eval_steps_per_second": 0.783, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2962, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"eval_accuracy": 0.9889103182256509, |
|
"eval_f1": 0.9854185418541853, |
|
"eval_loss": 0.060581281781196594, |
|
"eval_precision": 0.9848866498740554, |
|
"eval_recall": 0.9859510086455331, |
|
"eval_runtime": 14.8027, |
|
"eval_samples_per_second": 12.768, |
|
"eval_steps_per_second": 0.811, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"eval_accuracy": 0.9920443587270974, |
|
"eval_f1": 0.988501616960115, |
|
"eval_loss": 0.0517994724214077, |
|
"eval_precision": 0.986021505376344, |
|
"eval_recall": 0.9909942363112392, |
|
"eval_runtime": 14.7335, |
|
"eval_samples_per_second": 12.828, |
|
"eval_steps_per_second": 0.814, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"eval_accuracy": 0.9922854387656702, |
|
"eval_f1": 0.988679245283019, |
|
"eval_loss": 0.052589546889066696, |
|
"eval_precision": 0.9863750448189316, |
|
"eval_recall": 0.9909942363112392, |
|
"eval_runtime": 14.9339, |
|
"eval_samples_per_second": 12.656, |
|
"eval_steps_per_second": 0.804, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"eval_accuracy": 0.991321118611379, |
|
"eval_f1": 0.9872416891284815, |
|
"eval_loss": 0.05428989231586456, |
|
"eval_precision": 0.984940839010398, |
|
"eval_recall": 0.9895533141210374, |
|
"eval_runtime": 14.8022, |
|
"eval_samples_per_second": 12.768, |
|
"eval_steps_per_second": 0.811, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"eval_accuracy": 0.9910800385728061, |
|
"eval_f1": 0.9867002156721782, |
|
"eval_loss": 0.05573796480894089, |
|
"eval_precision": 0.9845767575322812, |
|
"eval_recall": 0.9888328530259366, |
|
"eval_runtime": 14.7741, |
|
"eval_samples_per_second": 12.793, |
|
"eval_steps_per_second": 0.812, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"learning_rate": 0.0, |
|
"loss": 0.0255, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"eval_accuracy": 0.9908389585342333, |
|
"eval_f1": 0.9868823000898472, |
|
"eval_loss": 0.05581057444214821, |
|
"eval_precision": 0.9845822875582646, |
|
"eval_recall": 0.989193083573487, |
|
"eval_runtime": 15.103, |
|
"eval_samples_per_second": 12.514, |
|
"eval_steps_per_second": 0.795, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"step": 1000, |
|
"total_flos": 4247054450688000.0, |
|
"train_loss": 0.1608761215209961, |
|
"train_runtime": 1880.5995, |
|
"train_samples_per_second": 8.508, |
|
"train_steps_per_second": 0.532 |
|
} |
|
], |
|
"max_steps": 1000, |
|
"num_train_epochs": 22, |
|
"total_flos": 4247054450688000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|