|
{ |
|
"best_metric": 0.4402828514575958, |
|
"best_model_checkpoint": "./runtime-text-classification/electra-small-discriminator-CoLA/checkpoint-68", |
|
"epoch": 8.0, |
|
"global_step": 136, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5.714285714285715e-05, |
|
"loss": 0.6849, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.989329134654207e-05, |
|
"loss": 0.6448, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.924324356368777e-05, |
|
"loss": 0.6139, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.5997378826141357, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 0.8941, |
|
"eval_samples_per_second": 1166.585, |
|
"eval_steps_per_second": 73.82, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 7.801204537254331e-05, |
|
"loss": 0.5956, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 7.621792949972588e-05, |
|
"loss": 0.5731, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.388746487984242e-05, |
|
"loss": 0.5315, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.4889530539512634, |
|
"eval_matthews_correlation": 0.5153742778418894, |
|
"eval_runtime": 0.9571, |
|
"eval_samples_per_second": 1089.761, |
|
"eval_steps_per_second": 68.959, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 7.105516319802298e-05, |
|
"loss": 0.4982, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.776296780882537e-05, |
|
"loss": 0.457, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.405963260007481e-05, |
|
"loss": 0.4453, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 6.000000000000001e-05, |
|
"loss": 0.4244, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.4469350576400757, |
|
"eval_matthews_correlation": 0.5432575763528743, |
|
"eval_runtime": 0.926, |
|
"eval_samples_per_second": 1126.29, |
|
"eval_steps_per_second": 71.271, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 5.564418881960624e-05, |
|
"loss": 0.3968, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 5.105670395747087e-05, |
|
"loss": 0.3774, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 4.6305481151278946e-05, |
|
"loss": 0.3568, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.4402828514575958, |
|
"eval_matthews_correlation": 0.5510400717227824, |
|
"eval_runtime": 0.9239, |
|
"eval_samples_per_second": 1128.958, |
|
"eval_steps_per_second": 71.439, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 4.1460880922306367e-05, |
|
"loss": 0.342, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 3.6594646611454575e-05, |
|
"loss": 0.3341, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 3.177884193720798e-05, |
|
"loss": 0.3171, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.7084783809142164e-05, |
|
"loss": 0.319, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.4517458379268646, |
|
"eval_matthews_correlation": 0.5653604748370356, |
|
"eval_runtime": 0.9654, |
|
"eval_samples_per_second": 1080.428, |
|
"eval_steps_per_second": 68.368, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 2.2581986200872356e-05, |
|
"loss": 0.3019, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 1.833713072255262e-05, |
|
"loss": 0.2781, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 1.4413079137644358e-05, |
|
"loss": 0.2887, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.46560344099998474, |
|
"eval_matthews_correlation": 0.5727969336224868, |
|
"eval_runtime": 0.9477, |
|
"eval_samples_per_second": 1100.569, |
|
"eval_steps_per_second": 69.643, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 1.0867942447523366e-05, |
|
"loss": 0.3025, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 7.75422032978549e-06, |
|
"loss": 0.2775, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 5.118023674247692e-06, |
|
"loss": 0.2771, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.4558466076850891, |
|
"eval_matthews_correlation": 0.5882977917441249, |
|
"eval_runtime": 1.0446, |
|
"eval_samples_per_second": 998.496, |
|
"eval_steps_per_second": 63.184, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 2.9983917300539224e-06, |
|
"loss": 0.2732, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 1.426713976205658e-06, |
|
"loss": 0.2865, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 4.2626527699558996e-07, |
|
"loss": 0.2695, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 1.1861206194425657e-08, |
|
"loss": 0.2729, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.4568850100040436, |
|
"eval_matthews_correlation": 0.5857509882742485, |
|
"eval_runtime": 0.8945, |
|
"eval_samples_per_second": 1165.969, |
|
"eval_steps_per_second": 73.781, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 136, |
|
"total_flos": 503134700437504.0, |
|
"train_loss": 0.39676498534048305, |
|
"train_runtime": 212.9463, |
|
"train_samples_per_second": 321.245, |
|
"train_steps_per_second": 0.639 |
|
} |
|
], |
|
"max_steps": 136, |
|
"num_train_epochs": 8, |
|
"total_flos": 503134700437504.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|