|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9963985594237696, |
|
"global_step": 624, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.936507936507937e-07, |
|
"loss": 1.0855, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 0.9105, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 0.4616, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 0.3415, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 0.2655, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 0.265, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.919786096256685e-05, |
|
"loss": 0.2216, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.8128342245989304e-05, |
|
"loss": 0.2317, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 0.194, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.598930481283423e-05, |
|
"loss": 0.1895, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.491978609625669e-05, |
|
"loss": 0.1928, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.385026737967914e-05, |
|
"loss": 0.1852, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.2780748663101606e-05, |
|
"loss": 0.1881, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.171122994652407e-05, |
|
"loss": 0.1763, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0641711229946525e-05, |
|
"loss": 0.1852, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.957219251336899e-05, |
|
"loss": 0.1757, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8502673796791445e-05, |
|
"loss": 0.1675, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.743315508021391e-05, |
|
"loss": 0.1535, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_Macro F1": 0.571105133243632, |
|
"eval_Macro Precision": 0.5530940794496672, |
|
"eval_Macro Recall": 0.5924688273721617, |
|
"eval_Micro F1": 0.9622476495882223, |
|
"eval_Micro Precision": 0.9622476495882224, |
|
"eval_Micro Recall": 0.9622476495882224, |
|
"eval_Weighted F1": 0.9596774503435738, |
|
"eval_Weighted Precision": 0.9577464168056835, |
|
"eval_Weighted Recall": 0.9622476495882224, |
|
"eval_accuracy": 0.9622476495882224, |
|
"eval_loss": 0.11255443841218948, |
|
"eval_runtime": 2995.4719, |
|
"eval_samples_per_second": 4.581, |
|
"eval_steps_per_second": 0.143, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 0.1777, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 0.1577, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.4224598930481284e-05, |
|
"loss": 0.1372, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3155080213903747e-05, |
|
"loss": 0.1446, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.20855614973262e-05, |
|
"loss": 0.1367, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.1016042780748666e-05, |
|
"loss": 0.1283, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.9946524064171122e-05, |
|
"loss": 0.1521, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8877005347593582e-05, |
|
"loss": 0.1703, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7807486631016045e-05, |
|
"loss": 0.1481, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6737967914438505e-05, |
|
"loss": 0.1353, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5668449197860968e-05, |
|
"loss": 0.1283, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.4598930481283424e-05, |
|
"loss": 0.1317, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 0.1344, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2459893048128343e-05, |
|
"loss": 0.12, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1390374331550803e-05, |
|
"loss": 0.1232, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0320855614973263e-05, |
|
"loss": 0.1463, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.9251336898395722e-05, |
|
"loss": 0.1195, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_Macro F1": 0.8501706517182783, |
|
"eval_Macro Precision": 0.9287080582761646, |
|
"eval_Macro Recall": 0.8036955912090624, |
|
"eval_Micro F1": 0.973835726259019, |
|
"eval_Micro Precision": 0.973835726259019, |
|
"eval_Micro Recall": 0.973835726259019, |
|
"eval_Weighted F1": 0.9735756522364626, |
|
"eval_Weighted Precision": 0.9741111567981213, |
|
"eval_Weighted Recall": 0.973835726259019, |
|
"eval_accuracy": 0.973835726259019, |
|
"eval_loss": 0.08434688299894333, |
|
"eval_runtime": 2834.0055, |
|
"eval_samples_per_second": 4.842, |
|
"eval_steps_per_second": 0.151, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.1102, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7112299465240642e-05, |
|
"loss": 0.1248, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.60427807486631e-05, |
|
"loss": 0.108, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.4973262032085561e-05, |
|
"loss": 0.118, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3903743315508022e-05, |
|
"loss": 0.1131, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.2834224598930484e-05, |
|
"loss": 0.1177, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 0.1409, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0695187165775402e-05, |
|
"loss": 0.1261, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.625668449197861e-06, |
|
"loss": 0.1004, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.556149732620321e-06, |
|
"loss": 0.1269, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.4866310160427806e-06, |
|
"loss": 0.1265, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.417112299465242e-06, |
|
"loss": 0.098, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.347593582887701e-06, |
|
"loss": 0.126, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.2780748663101604e-06, |
|
"loss": 0.0876, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.208556149732621e-06, |
|
"loss": 0.1014, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1390374331550802e-06, |
|
"loss": 0.1111, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0695187165775401e-06, |
|
"loss": 0.1361, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0979, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_Macro F1": 0.9154020288755031, |
|
"eval_Macro Precision": 0.9314436651998532, |
|
"eval_Macro Recall": 0.9019139504496776, |
|
"eval_Micro F1": 0.976678084687705, |
|
"eval_Micro Precision": 0.976678084687705, |
|
"eval_Micro Recall": 0.976678084687705, |
|
"eval_Weighted F1": 0.9768442651021538, |
|
"eval_Weighted Precision": 0.9771017843393915, |
|
"eval_Weighted Recall": 0.976678084687705, |
|
"eval_accuracy": 0.976678084687705, |
|
"eval_loss": 0.07859531044960022, |
|
"eval_runtime": 3028.7856, |
|
"eval_samples_per_second": 4.53, |
|
"eval_steps_per_second": 0.142, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 624, |
|
"total_flos": 6.186805817770303e+18, |
|
"train_loss": 0.17273353269466987, |
|
"train_runtime": 60970.1196, |
|
"train_samples_per_second": 1.311, |
|
"train_steps_per_second": 0.01 |
|
} |
|
], |
|
"max_steps": 624, |
|
"num_train_epochs": 3, |
|
"total_flos": 6.186805817770303e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|