|
{ |
|
"best_metric": 1.098163366317749, |
|
"best_model_checkpoint": "hBERTv1_mnli/checkpoint-19942", |
|
"epoch": 18.0, |
|
"global_step": 27612, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 1.1001, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.0993714332580566, |
|
"eval_runtime": 12.6071, |
|
"eval_samples_per_second": 778.528, |
|
"eval_steps_per_second": 3.093, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.0988, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.0989975929260254, |
|
"eval_runtime": 12.5819, |
|
"eval_samples_per_second": 780.088, |
|
"eval_steps_per_second": 3.1, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.0987, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.3273560876209883, |
|
"eval_loss": 1.099189043045044, |
|
"eval_runtime": 12.6822, |
|
"eval_samples_per_second": 773.921, |
|
"eval_steps_per_second": 3.075, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.0987, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3273560876209883, |
|
"eval_loss": 1.0986238718032837, |
|
"eval_runtime": 12.5133, |
|
"eval_samples_per_second": 784.368, |
|
"eval_steps_per_second": 3.117, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.50006518904824e-05, |
|
"loss": 1.0987, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0984646081924438, |
|
"eval_runtime": 12.6008, |
|
"eval_samples_per_second": 778.92, |
|
"eval_steps_per_second": 3.095, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.40013037809648e-05, |
|
"loss": 1.0986, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.3273560876209883, |
|
"eval_loss": 1.0987476110458374, |
|
"eval_runtime": 12.4519, |
|
"eval_samples_per_second": 788.232, |
|
"eval_steps_per_second": 3.132, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3003259452411996e-05, |
|
"loss": 1.105, |
|
"step": 10738 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.3273560876209883, |
|
"eval_loss": 1.0986319780349731, |
|
"eval_runtime": 12.6436, |
|
"eval_samples_per_second": 776.285, |
|
"eval_steps_per_second": 3.085, |
|
"step": 10738 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2003259452412e-05, |
|
"loss": 1.1045, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.0985896587371826, |
|
"eval_runtime": 12.6272, |
|
"eval_samples_per_second": 777.291, |
|
"eval_steps_per_second": 3.089, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1003259452411995e-05, |
|
"loss": 1.0988, |
|
"step": 13806 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.3273560876209883, |
|
"eval_loss": 1.0983130931854248, |
|
"eval_runtime": 12.4879, |
|
"eval_samples_per_second": 785.962, |
|
"eval_steps_per_second": 3.123, |
|
"step": 13806 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.0003259452412e-05, |
|
"loss": 1.0987, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.098677158355713, |
|
"eval_runtime": 12.4301, |
|
"eval_samples_per_second": 789.616, |
|
"eval_steps_per_second": 3.138, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9003259452411994e-05, |
|
"loss": 1.0987, |
|
"step": 16874 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.0991103649139404, |
|
"eval_runtime": 12.5586, |
|
"eval_samples_per_second": 781.533, |
|
"eval_steps_per_second": 3.105, |
|
"step": 16874 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.8003911342894394e-05, |
|
"loss": 1.0986, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0986063480377197, |
|
"eval_runtime": 12.5872, |
|
"eval_samples_per_second": 779.762, |
|
"eval_steps_per_second": 3.098, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.700391134289439e-05, |
|
"loss": 1.0986, |
|
"step": 19942 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.098163366317749, |
|
"eval_runtime": 12.4723, |
|
"eval_samples_per_second": 786.944, |
|
"eval_steps_per_second": 3.127, |
|
"step": 19942 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.600456323337679e-05, |
|
"loss": 1.0986, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0988693237304688, |
|
"eval_runtime": 12.5684, |
|
"eval_samples_per_second": 780.924, |
|
"eval_steps_per_second": 3.103, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 3.500521512385919e-05, |
|
"loss": 1.0986, |
|
"step": 23010 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.098739504814148, |
|
"eval_runtime": 12.6125, |
|
"eval_samples_per_second": 778.196, |
|
"eval_steps_per_second": 3.092, |
|
"step": 23010 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3.4005215123859194e-05, |
|
"loss": 1.0986, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.0985974073410034, |
|
"eval_runtime": 12.4698, |
|
"eval_samples_per_second": 787.103, |
|
"eval_steps_per_second": 3.128, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 3.300586701434159e-05, |
|
"loss": 1.0986, |
|
"step": 26078 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.3544574630667346, |
|
"eval_loss": 1.098615288734436, |
|
"eval_runtime": 12.5264, |
|
"eval_samples_per_second": 783.545, |
|
"eval_steps_per_second": 3.113, |
|
"step": 26078 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.200586701434159e-05, |
|
"loss": 1.0986, |
|
"step": 27612 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.31818644931227713, |
|
"eval_loss": 1.0982955694198608, |
|
"eval_runtime": 12.8565, |
|
"eval_samples_per_second": 763.425, |
|
"eval_steps_per_second": 3.033, |
|
"step": 27612 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"step": 27612, |
|
"total_flos": 8.928550500811407e+17, |
|
"train_loss": 1.0994189529164742, |
|
"train_runtime": 25959.7521, |
|
"train_samples_per_second": 756.367, |
|
"train_steps_per_second": 2.955 |
|
} |
|
], |
|
"max_steps": 76700, |
|
"num_train_epochs": 50, |
|
"total_flos": 8.928550500811407e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|