pasha / trainer_state.json
mijungkim's picture
End of training
a831e94
raw
history blame
4.52 kB
{
"best_metric": 0.9868823000898472,
"best_model_checkpoint": "pasha/checkpoint-1000",
"epoch": 21.27659574468085,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.13,
"eval_accuracy": 0.9566055930568949,
"eval_f1": 0.9482633863965269,
"eval_loss": 0.2661653161048889,
"eval_precision": 0.9523982558139535,
"eval_recall": 0.944164265129683,
"eval_runtime": 15.4131,
"eval_samples_per_second": 12.262,
"eval_steps_per_second": 0.779,
"step": 100
},
{
"epoch": 4.26,
"eval_accuracy": 0.9850530376084861,
"eval_f1": 0.9795185052102047,
"eval_loss": 0.1026068776845932,
"eval_precision": 0.9770609318996416,
"eval_recall": 0.9819884726224783,
"eval_runtime": 15.204,
"eval_samples_per_second": 12.431,
"eval_steps_per_second": 0.789,
"step": 200
},
{
"epoch": 6.38,
"eval_accuracy": 0.9884281581485053,
"eval_f1": 0.9849137931034483,
"eval_loss": 0.07217290997505188,
"eval_precision": 0.9820916905444126,
"eval_recall": 0.9877521613832853,
"eval_runtime": 15.2143,
"eval_samples_per_second": 12.423,
"eval_steps_per_second": 0.789,
"step": 300
},
{
"epoch": 8.51,
"eval_accuracy": 0.9891513982642237,
"eval_f1": 0.9857785778577858,
"eval_loss": 0.060767240822315216,
"eval_precision": 0.9852464915437208,
"eval_recall": 0.9863112391930836,
"eval_runtime": 15.3275,
"eval_samples_per_second": 12.331,
"eval_steps_per_second": 0.783,
"step": 400
},
{
"epoch": 10.64,
"learning_rate": 5e-06,
"loss": 0.2962,
"step": 500
},
{
"epoch": 10.64,
"eval_accuracy": 0.9889103182256509,
"eval_f1": 0.9854185418541853,
"eval_loss": 0.060581281781196594,
"eval_precision": 0.9848866498740554,
"eval_recall": 0.9859510086455331,
"eval_runtime": 14.8027,
"eval_samples_per_second": 12.768,
"eval_steps_per_second": 0.811,
"step": 500
},
{
"epoch": 12.77,
"eval_accuracy": 0.9920443587270974,
"eval_f1": 0.988501616960115,
"eval_loss": 0.0517994724214077,
"eval_precision": 0.986021505376344,
"eval_recall": 0.9909942363112392,
"eval_runtime": 14.7335,
"eval_samples_per_second": 12.828,
"eval_steps_per_second": 0.814,
"step": 600
},
{
"epoch": 14.89,
"eval_accuracy": 0.9922854387656702,
"eval_f1": 0.988679245283019,
"eval_loss": 0.052589546889066696,
"eval_precision": 0.9863750448189316,
"eval_recall": 0.9909942363112392,
"eval_runtime": 14.9339,
"eval_samples_per_second": 12.656,
"eval_steps_per_second": 0.804,
"step": 700
},
{
"epoch": 17.02,
"eval_accuracy": 0.991321118611379,
"eval_f1": 0.9872416891284815,
"eval_loss": 0.05428989231586456,
"eval_precision": 0.984940839010398,
"eval_recall": 0.9895533141210374,
"eval_runtime": 14.8022,
"eval_samples_per_second": 12.768,
"eval_steps_per_second": 0.811,
"step": 800
},
{
"epoch": 19.15,
"eval_accuracy": 0.9910800385728061,
"eval_f1": 0.9867002156721782,
"eval_loss": 0.05573796480894089,
"eval_precision": 0.9845767575322812,
"eval_recall": 0.9888328530259366,
"eval_runtime": 14.7741,
"eval_samples_per_second": 12.793,
"eval_steps_per_second": 0.812,
"step": 900
},
{
"epoch": 21.28,
"learning_rate": 0.0,
"loss": 0.0255,
"step": 1000
},
{
"epoch": 21.28,
"eval_accuracy": 0.9908389585342333,
"eval_f1": 0.9868823000898472,
"eval_loss": 0.05581057444214821,
"eval_precision": 0.9845822875582646,
"eval_recall": 0.989193083573487,
"eval_runtime": 15.103,
"eval_samples_per_second": 12.514,
"eval_steps_per_second": 0.795,
"step": 1000
},
{
"epoch": 21.28,
"step": 1000,
"total_flos": 4247054450688000.0,
"train_loss": 0.1608761215209961,
"train_runtime": 1880.5995,
"train_samples_per_second": 8.508,
"train_steps_per_second": 0.532
}
],
"max_steps": 1000,
"num_train_epochs": 22,
"total_flos": 4247054450688000.0,
"trial_name": null,
"trial_params": null
}