cares-roberta-clinical / trainer_state.json
chizhik
re-trained model after eliminating annotation errors
4c29c9d
{
"best_metric": 0.8328207869559483,
"best_model_checkpoint": "./CARES/checkpoints/roberta-stratified/run-9/checkpoint-2272",
"epoch": 32.0,
"global_step": 2272,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.0,
"eval_loss": 0.29745519161224365,
"eval_macro_f1": 0.0,
"eval_macro_precision": 0.0,
"eval_macro_recall": 0.0,
"eval_micro_f1": 0.0,
"eval_micro_precision": 0.0,
"eval_micro_recall": 0.0,
"eval_runtime": 2.6698,
"eval_samples_per_second": 361.828,
"eval_steps_per_second": 11.611,
"step": 142
},
{
"epoch": 4.0,
"eval_loss": 0.2143602967262268,
"eval_macro_f1": 0.20760247776727883,
"eval_macro_precision": 0.2427021619684663,
"eval_macro_recall": 0.19452630677199997,
"eval_micro_f1": 0.5696619950535862,
"eval_micro_precision": 0.7601760176017601,
"eval_micro_recall": 0.45550428477257743,
"eval_runtime": 2.6814,
"eval_samples_per_second": 360.254,
"eval_steps_per_second": 11.561,
"step": 284
},
{
"epoch": 6.0,
"eval_loss": 0.15995128452777863,
"eval_macro_f1": 0.32385167310781526,
"eval_macro_precision": 0.42763364413607774,
"eval_macro_recall": 0.2931228227099484,
"eval_micro_f1": 0.6900908014212397,
"eval_micro_precision": 0.860236220472441,
"eval_micro_recall": 0.5761371127224786,
"eval_runtime": 2.674,
"eval_samples_per_second": 361.255,
"eval_steps_per_second": 11.593,
"step": 426
},
{
"epoch": 7.04,
"learning_rate": 4.40741152818786e-05,
"loss": 0.2773,
"step": 500
},
{
"epoch": 8.0,
"eval_loss": 0.13520723581314087,
"eval_macro_f1": 0.4425371105794407,
"eval_macro_precision": 0.49169850541474464,
"eval_macro_recall": 0.41330740987880255,
"eval_micro_f1": 0.7547309833024118,
"eval_micro_precision": 0.8633276740237691,
"eval_micro_recall": 0.6704021094264997,
"eval_runtime": 2.6984,
"eval_samples_per_second": 357.994,
"eval_steps_per_second": 11.488,
"step": 568
},
{
"epoch": 10.0,
"eval_loss": 0.10962910205125809,
"eval_macro_f1": 0.5655053778813368,
"eval_macro_precision": 0.6696587937430376,
"eval_macro_recall": 0.5338063461136978,
"eval_micro_f1": 0.8189806678383128,
"eval_micro_precision": 0.8772590361445783,
"eval_micro_recall": 0.7679630850362558,
"eval_runtime": 2.6744,
"eval_samples_per_second": 361.208,
"eval_steps_per_second": 11.592,
"step": 710
},
{
"epoch": 12.0,
"eval_loss": 0.0956677794456482,
"eval_macro_f1": 0.6602463081601572,
"eval_macro_precision": 0.799820247637967,
"eval_macro_recall": 0.5970945139878616,
"eval_micro_f1": 0.8437173686042465,
"eval_micro_precision": 0.8938053097345132,
"eval_micro_recall": 0.7989452867501649,
"eval_runtime": 2.6891,
"eval_samples_per_second": 359.223,
"eval_steps_per_second": 11.528,
"step": 852
},
{
"epoch": 14.0,
"eval_loss": 0.09418635815382004,
"eval_macro_f1": 0.709171253471717,
"eval_macro_precision": 0.8508463365856882,
"eval_macro_recall": 0.6566230286073916,
"eval_micro_f1": 0.8551865799383773,
"eval_micro_precision": 0.8896011396011396,
"eval_micro_recall": 0.8233355306526038,
"eval_runtime": 2.6733,
"eval_samples_per_second": 361.353,
"eval_steps_per_second": 11.596,
"step": 994
},
{
"epoch": 14.08,
"learning_rate": 4.073516715446356e-05,
"loss": 0.068,
"step": 1000
},
{
"epoch": 16.0,
"eval_loss": 0.09525582939386368,
"eval_macro_f1": 0.776452344525862,
"eval_macro_precision": 0.8422867152488994,
"eval_macro_recall": 0.7394950574942071,
"eval_micro_f1": 0.8662207357859532,
"eval_micro_precision": 0.8791581805838425,
"eval_micro_recall": 0.8536585365853658,
"eval_runtime": 2.6899,
"eval_samples_per_second": 359.119,
"eval_steps_per_second": 11.525,
"step": 1136
},
{
"epoch": 18.0,
"eval_loss": 0.09120669960975647,
"eval_macro_f1": 0.7799395687308482,
"eval_macro_precision": 0.8259713713716451,
"eval_macro_recall": 0.7560566435810081,
"eval_micro_f1": 0.8660743665679499,
"eval_micro_precision": 0.8646517739816032,
"eval_micro_recall": 0.8675016479894528,
"eval_runtime": 2.6882,
"eval_samples_per_second": 359.349,
"eval_steps_per_second": 11.532,
"step": 1278
},
{
"epoch": 20.0,
"eval_loss": 0.09322977066040039,
"eval_macro_f1": 0.7717526983031062,
"eval_macro_precision": 0.8213810289257493,
"eval_macro_recall": 0.747806253729357,
"eval_micro_f1": 0.8603205757278378,
"eval_micro_precision": 0.8538961038961039,
"eval_micro_recall": 0.8668424522083059,
"eval_runtime": 2.6911,
"eval_samples_per_second": 358.966,
"eval_steps_per_second": 11.52,
"step": 1420
},
{
"epoch": 21.13,
"learning_rate": 3.739621902704851e-05,
"loss": 0.0222,
"step": 1500
},
{
"epoch": 22.0,
"eval_loss": 0.10442250967025757,
"eval_macro_f1": 0.765921285849591,
"eval_macro_precision": 0.7782427042161657,
"eval_macro_recall": 0.774471020810195,
"eval_micro_f1": 0.8516003879728419,
"eval_micro_precision": 0.8356598984771574,
"eval_micro_recall": 0.8681608437705999,
"eval_runtime": 2.6921,
"eval_samples_per_second": 358.824,
"eval_steps_per_second": 11.515,
"step": 1562
},
{
"epoch": 24.0,
"eval_loss": 0.09634574502706528,
"eval_macro_f1": 0.805432074935829,
"eval_macro_precision": 0.9021531097855335,
"eval_macro_recall": 0.7712696478949495,
"eval_micro_f1": 0.8704318936877077,
"eval_micro_precision": 0.8774279973208305,
"eval_micro_recall": 0.8635464733025708,
"eval_runtime": 2.6753,
"eval_samples_per_second": 361.083,
"eval_steps_per_second": 11.588,
"step": 1704
},
{
"epoch": 26.0,
"eval_loss": 0.09631907194852829,
"eval_macro_f1": 0.8054621279402976,
"eval_macro_precision": 0.8819456215653025,
"eval_macro_recall": 0.782841919580731,
"eval_micro_f1": 0.8722700198544011,
"eval_micro_precision": 0.8757475083056478,
"eval_micro_recall": 0.8688200395517469,
"eval_runtime": 2.6758,
"eval_samples_per_second": 361.016,
"eval_steps_per_second": 11.585,
"step": 1846
},
{
"epoch": 28.0,
"eval_loss": 0.09827031195163727,
"eval_macro_f1": 0.824449032097833,
"eval_macro_precision": 0.8941626132550853,
"eval_macro_recall": 0.7913713438345684,
"eval_micro_f1": 0.8718459495351926,
"eval_micro_precision": 0.8782608695652174,
"eval_micro_recall": 0.8655240606460118,
"eval_runtime": 2.6758,
"eval_samples_per_second": 361.008,
"eval_steps_per_second": 11.585,
"step": 1988
},
{
"epoch": 28.17,
"learning_rate": 3.4057270899633464e-05,
"loss": 0.011,
"step": 2000
},
{
"epoch": 30.0,
"eval_loss": 0.10059890896081924,
"eval_macro_f1": 0.8165877105799546,
"eval_macro_precision": 0.884478008536637,
"eval_macro_recall": 0.7957804401453603,
"eval_micro_f1": 0.8693088765149033,
"eval_micro_precision": 0.8639322916666666,
"eval_micro_recall": 0.8747528015820699,
"eval_runtime": 2.6759,
"eval_samples_per_second": 361.006,
"eval_steps_per_second": 11.585,
"step": 2130
},
{
"epoch": 32.0,
"eval_loss": 0.10830199718475342,
"eval_macro_f1": 0.8328207869559483,
"eval_macro_precision": 0.8739781063350807,
"eval_macro_recall": 0.8212976019561394,
"eval_micro_f1": 0.8676422764227643,
"eval_micro_precision": 0.8562259306803595,
"eval_micro_recall": 0.8793671720500988,
"eval_runtime": 2.674,
"eval_samples_per_second": 361.259,
"eval_steps_per_second": 11.593,
"step": 2272
}
],
"max_steps": 7100,
"num_train_epochs": 100,
"total_flos": 1.673289313217472e+16,
"trial_name": null,
"trial_params": {
"adam_epsilon": 1.724439344881123e-07,
"learning_rate": 4.540969453284462e-05,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"seed": 324,
"warmup_steps": 300,
"weight_decay": 0.00598936569463419
}
}