|
{ |
|
"best_metric": 0.8328207869559483, |
|
"best_model_checkpoint": "./CARES/checkpoints/roberta-stratified/run-9/checkpoint-2272", |
|
"epoch": 32.0, |
|
"global_step": 2272, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.29745519161224365, |
|
"eval_macro_f1": 0.0, |
|
"eval_macro_precision": 0.0, |
|
"eval_macro_recall": 0.0, |
|
"eval_micro_f1": 0.0, |
|
"eval_micro_precision": 0.0, |
|
"eval_micro_recall": 0.0, |
|
"eval_runtime": 2.6698, |
|
"eval_samples_per_second": 361.828, |
|
"eval_steps_per_second": 11.611, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.2143602967262268, |
|
"eval_macro_f1": 0.20760247776727883, |
|
"eval_macro_precision": 0.2427021619684663, |
|
"eval_macro_recall": 0.19452630677199997, |
|
"eval_micro_f1": 0.5696619950535862, |
|
"eval_micro_precision": 0.7601760176017601, |
|
"eval_micro_recall": 0.45550428477257743, |
|
"eval_runtime": 2.6814, |
|
"eval_samples_per_second": 360.254, |
|
"eval_steps_per_second": 11.561, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.15995128452777863, |
|
"eval_macro_f1": 0.32385167310781526, |
|
"eval_macro_precision": 0.42763364413607774, |
|
"eval_macro_recall": 0.2931228227099484, |
|
"eval_micro_f1": 0.6900908014212397, |
|
"eval_micro_precision": 0.860236220472441, |
|
"eval_micro_recall": 0.5761371127224786, |
|
"eval_runtime": 2.674, |
|
"eval_samples_per_second": 361.255, |
|
"eval_steps_per_second": 11.593, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 4.40741152818786e-05, |
|
"loss": 0.2773, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.13520723581314087, |
|
"eval_macro_f1": 0.4425371105794407, |
|
"eval_macro_precision": 0.49169850541474464, |
|
"eval_macro_recall": 0.41330740987880255, |
|
"eval_micro_f1": 0.7547309833024118, |
|
"eval_micro_precision": 0.8633276740237691, |
|
"eval_micro_recall": 0.6704021094264997, |
|
"eval_runtime": 2.6984, |
|
"eval_samples_per_second": 357.994, |
|
"eval_steps_per_second": 11.488, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.10962910205125809, |
|
"eval_macro_f1": 0.5655053778813368, |
|
"eval_macro_precision": 0.6696587937430376, |
|
"eval_macro_recall": 0.5338063461136978, |
|
"eval_micro_f1": 0.8189806678383128, |
|
"eval_micro_precision": 0.8772590361445783, |
|
"eval_micro_recall": 0.7679630850362558, |
|
"eval_runtime": 2.6744, |
|
"eval_samples_per_second": 361.208, |
|
"eval_steps_per_second": 11.592, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.0956677794456482, |
|
"eval_macro_f1": 0.6602463081601572, |
|
"eval_macro_precision": 0.799820247637967, |
|
"eval_macro_recall": 0.5970945139878616, |
|
"eval_micro_f1": 0.8437173686042465, |
|
"eval_micro_precision": 0.8938053097345132, |
|
"eval_micro_recall": 0.7989452867501649, |
|
"eval_runtime": 2.6891, |
|
"eval_samples_per_second": 359.223, |
|
"eval_steps_per_second": 11.528, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.09418635815382004, |
|
"eval_macro_f1": 0.709171253471717, |
|
"eval_macro_precision": 0.8508463365856882, |
|
"eval_macro_recall": 0.6566230286073916, |
|
"eval_micro_f1": 0.8551865799383773, |
|
"eval_micro_precision": 0.8896011396011396, |
|
"eval_micro_recall": 0.8233355306526038, |
|
"eval_runtime": 2.6733, |
|
"eval_samples_per_second": 361.353, |
|
"eval_steps_per_second": 11.596, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 4.073516715446356e-05, |
|
"loss": 0.068, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.09525582939386368, |
|
"eval_macro_f1": 0.776452344525862, |
|
"eval_macro_precision": 0.8422867152488994, |
|
"eval_macro_recall": 0.7394950574942071, |
|
"eval_micro_f1": 0.8662207357859532, |
|
"eval_micro_precision": 0.8791581805838425, |
|
"eval_micro_recall": 0.8536585365853658, |
|
"eval_runtime": 2.6899, |
|
"eval_samples_per_second": 359.119, |
|
"eval_steps_per_second": 11.525, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.09120669960975647, |
|
"eval_macro_f1": 0.7799395687308482, |
|
"eval_macro_precision": 0.8259713713716451, |
|
"eval_macro_recall": 0.7560566435810081, |
|
"eval_micro_f1": 0.8660743665679499, |
|
"eval_micro_precision": 0.8646517739816032, |
|
"eval_micro_recall": 0.8675016479894528, |
|
"eval_runtime": 2.6882, |
|
"eval_samples_per_second": 359.349, |
|
"eval_steps_per_second": 11.532, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.09322977066040039, |
|
"eval_macro_f1": 0.7717526983031062, |
|
"eval_macro_precision": 0.8213810289257493, |
|
"eval_macro_recall": 0.747806253729357, |
|
"eval_micro_f1": 0.8603205757278378, |
|
"eval_micro_precision": 0.8538961038961039, |
|
"eval_micro_recall": 0.8668424522083059, |
|
"eval_runtime": 2.6911, |
|
"eval_samples_per_second": 358.966, |
|
"eval_steps_per_second": 11.52, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 21.13, |
|
"learning_rate": 3.739621902704851e-05, |
|
"loss": 0.0222, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.10442250967025757, |
|
"eval_macro_f1": 0.765921285849591, |
|
"eval_macro_precision": 0.7782427042161657, |
|
"eval_macro_recall": 0.774471020810195, |
|
"eval_micro_f1": 0.8516003879728419, |
|
"eval_micro_precision": 0.8356598984771574, |
|
"eval_micro_recall": 0.8681608437705999, |
|
"eval_runtime": 2.6921, |
|
"eval_samples_per_second": 358.824, |
|
"eval_steps_per_second": 11.515, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.09634574502706528, |
|
"eval_macro_f1": 0.805432074935829, |
|
"eval_macro_precision": 0.9021531097855335, |
|
"eval_macro_recall": 0.7712696478949495, |
|
"eval_micro_f1": 0.8704318936877077, |
|
"eval_micro_precision": 0.8774279973208305, |
|
"eval_micro_recall": 0.8635464733025708, |
|
"eval_runtime": 2.6753, |
|
"eval_samples_per_second": 361.083, |
|
"eval_steps_per_second": 11.588, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.09631907194852829, |
|
"eval_macro_f1": 0.8054621279402976, |
|
"eval_macro_precision": 0.8819456215653025, |
|
"eval_macro_recall": 0.782841919580731, |
|
"eval_micro_f1": 0.8722700198544011, |
|
"eval_micro_precision": 0.8757475083056478, |
|
"eval_micro_recall": 0.8688200395517469, |
|
"eval_runtime": 2.6758, |
|
"eval_samples_per_second": 361.016, |
|
"eval_steps_per_second": 11.585, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.09827031195163727, |
|
"eval_macro_f1": 0.824449032097833, |
|
"eval_macro_precision": 0.8941626132550853, |
|
"eval_macro_recall": 0.7913713438345684, |
|
"eval_micro_f1": 0.8718459495351926, |
|
"eval_micro_precision": 0.8782608695652174, |
|
"eval_micro_recall": 0.8655240606460118, |
|
"eval_runtime": 2.6758, |
|
"eval_samples_per_second": 361.008, |
|
"eval_steps_per_second": 11.585, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 28.17, |
|
"learning_rate": 3.4057270899633464e-05, |
|
"loss": 0.011, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.10059890896081924, |
|
"eval_macro_f1": 0.8165877105799546, |
|
"eval_macro_precision": 0.884478008536637, |
|
"eval_macro_recall": 0.7957804401453603, |
|
"eval_micro_f1": 0.8693088765149033, |
|
"eval_micro_precision": 0.8639322916666666, |
|
"eval_micro_recall": 0.8747528015820699, |
|
"eval_runtime": 2.6759, |
|
"eval_samples_per_second": 361.006, |
|
"eval_steps_per_second": 11.585, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.10830199718475342, |
|
"eval_macro_f1": 0.8328207869559483, |
|
"eval_macro_precision": 0.8739781063350807, |
|
"eval_macro_recall": 0.8212976019561394, |
|
"eval_micro_f1": 0.8676422764227643, |
|
"eval_micro_precision": 0.8562259306803595, |
|
"eval_micro_recall": 0.8793671720500988, |
|
"eval_runtime": 2.674, |
|
"eval_samples_per_second": 361.259, |
|
"eval_steps_per_second": 11.593, |
|
"step": 2272 |
|
} |
|
], |
|
"max_steps": 7100, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.673289313217472e+16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"adam_epsilon": 1.724439344881123e-07, |
|
"learning_rate": 4.540969453284462e-05, |
|
"per_device_eval_batch_size": 32, |
|
"per_device_train_batch_size": 32, |
|
"seed": 324, |
|
"warmup_steps": 300, |
|
"weight_decay": 0.00598936569463419 |
|
} |
|
} |
|
|