{ "best_metric": 0.8875182540327614, "best_model_checkpoint": "cls_comment-phobert-base-v2-v2.2/checkpoint-2100", "epoch": 27.36842105263158, "eval_steps": 100, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.05, "grad_norm": 1.7893420457839966, "learning_rate": 2.5e-06, "loss": 1.663, "step": 100 }, { "epoch": 1.05, "eval_accuracy": 0.5041240514681623, "eval_f1_score": 0.11172040652189807, "eval_loss": 1.4870415925979614, "eval_precision": 0.08402067524469371, "eval_recall": 0.16666666666666666, "eval_runtime": 5.0768, "eval_samples_per_second": 597.028, "eval_steps_per_second": 9.455, "step": 100 }, { "epoch": 2.11, "grad_norm": 2.4307305812835693, "learning_rate": 5e-06, "loss": 1.294, "step": 200 }, { "epoch": 2.11, "eval_accuracy": 0.6974595842956121, "eval_f1_score": 0.39113317224801986, "eval_loss": 0.9955642223358154, "eval_precision": 0.49018024013865347, "eval_recall": 0.3899919974680958, "eval_runtime": 5.1701, "eval_samples_per_second": 586.259, "eval_steps_per_second": 9.284, "step": 200 }, { "epoch": 3.16, "grad_norm": 3.8080785274505615, "learning_rate": 7.500000000000001e-06, "loss": 0.898, "step": 300 }, { "epoch": 3.16, "eval_accuracy": 0.8231606730451996, "eval_f1_score": 0.5499484227307703, "eval_loss": 0.6778652667999268, "eval_precision": 0.535367810981718, "eval_recall": 0.5697240354177526, "eval_runtime": 5.2797, "eval_samples_per_second": 574.084, "eval_steps_per_second": 9.091, "step": 300 }, { "epoch": 4.21, "grad_norm": 4.932918548583984, "learning_rate": 1e-05, "loss": 0.6411, "step": 400 }, { "epoch": 4.21, "eval_accuracy": 0.8568129330254042, "eval_f1_score": 0.5739966477156212, "eval_loss": 0.5163660645484924, "eval_precision": 0.5613296135162277, "eval_recall": 0.5895128268802295, "eval_runtime": 5.1719, "eval_samples_per_second": 586.047, "eval_steps_per_second": 9.281, "step": 400 }, { "epoch": 5.26, "grad_norm": 3.761213541030884, "learning_rate": 9.722222222222223e-06, "loss": 0.5031, "step": 500 }, { "epoch": 5.26, "eval_accuracy": 0.8937644341801386, "eval_f1_score": 0.7180655496783576, "eval_loss": 0.4105552136898041, "eval_precision": 0.7319397223704595, "eval_recall": 0.7114016304859992, "eval_runtime": 5.1958, "eval_samples_per_second": 583.36, "eval_steps_per_second": 9.238, "step": 500 }, { "epoch": 6.32, "grad_norm": 5.329339504241943, "learning_rate": 9.444444444444445e-06, "loss": 0.38, "step": 600 }, { "epoch": 6.32, "eval_accuracy": 0.9096007918178819, "eval_f1_score": 0.8325922485904789, "eval_loss": 0.3474419116973877, "eval_precision": 0.8738654107630431, "eval_recall": 0.8144519804951473, "eval_runtime": 5.1729, "eval_samples_per_second": 585.934, "eval_steps_per_second": 9.279, "step": 600 }, { "epoch": 7.37, "grad_norm": 5.044618129730225, "learning_rate": 9.166666666666666e-06, "loss": 0.2927, "step": 700 }, { "epoch": 7.37, "eval_accuracy": 0.9142197294622237, "eval_f1_score": 0.8598244657605173, "eval_loss": 0.3109701871871948, "eval_precision": 0.8809676600022426, "eval_recall": 0.8455076124995076, "eval_runtime": 6.0257, "eval_samples_per_second": 503.009, "eval_steps_per_second": 7.966, "step": 700 }, { "epoch": 8.42, "grad_norm": 4.748721599578857, "learning_rate": 8.888888888888888e-06, "loss": 0.2532, "step": 800 }, { "epoch": 8.42, "eval_accuracy": 0.9188386671065655, "eval_f1_score": 0.8702202017241888, "eval_loss": 0.3045769929885864, "eval_precision": 0.8880655151226781, "eval_recall": 0.85512094950349, "eval_runtime": 5.1742, "eval_samples_per_second": 585.795, "eval_steps_per_second": 9.277, "step": 800 }, { "epoch": 9.47, "grad_norm": 4.976996898651123, "learning_rate": 8.611111111111112e-06, "loss": 0.2049, "step": 900 }, { "epoch": 9.47, "eval_accuracy": 0.9218079841636424, "eval_f1_score": 0.8688677581871923, "eval_loss": 0.28507497906684875, "eval_precision": 0.890238230846033, "eval_recall": 0.8539333664213857, "eval_runtime": 5.1661, "eval_samples_per_second": 586.706, "eval_steps_per_second": 9.291, "step": 900 }, { "epoch": 10.53, "grad_norm": 5.230645656585693, "learning_rate": 8.333333333333334e-06, "loss": 0.1785, "step": 1000 }, { "epoch": 10.53, "eval_accuracy": 0.9251072253381722, "eval_f1_score": 0.8768747602558463, "eval_loss": 0.2801537811756134, "eval_precision": 0.9044956542270928, "eval_recall": 0.8561455297131971, "eval_runtime": 5.2425, "eval_samples_per_second": 578.155, "eval_steps_per_second": 9.156, "step": 1000 }, { "epoch": 11.58, "grad_norm": 5.533747673034668, "learning_rate": 8.055555555555557e-06, "loss": 0.1511, "step": 1100 }, { "epoch": 11.58, "eval_accuracy": 0.9231276806334543, "eval_f1_score": 0.8744252567641876, "eval_loss": 0.28748995065689087, "eval_precision": 0.8770259775941726, "eval_recall": 0.8748474059729406, "eval_runtime": 5.2089, "eval_samples_per_second": 581.891, "eval_steps_per_second": 9.215, "step": 1100 }, { "epoch": 12.63, "grad_norm": 5.897303104400635, "learning_rate": 7.77777777777778e-06, "loss": 0.1392, "step": 1200 }, { "epoch": 12.63, "eval_accuracy": 0.9264269218079841, "eval_f1_score": 0.8774620123424183, "eval_loss": 0.2811359465122223, "eval_precision": 0.9005448213619175, "eval_recall": 0.8596724578451488, "eval_runtime": 5.1974, "eval_samples_per_second": 583.175, "eval_steps_per_second": 9.235, "step": 1200 }, { "epoch": 13.68, "grad_norm": 3.5150811672210693, "learning_rate": 7.500000000000001e-06, "loss": 0.1166, "step": 1300 }, { "epoch": 13.68, "eval_accuracy": 0.9247773012207192, "eval_f1_score": 0.875133246925271, "eval_loss": 0.27574026584625244, "eval_precision": 0.8785572499470087, "eval_recall": 0.8746042908646392, "eval_runtime": 5.198, "eval_samples_per_second": 583.104, "eval_steps_per_second": 9.234, "step": 1300 }, { "epoch": 14.74, "grad_norm": 5.184170722961426, "learning_rate": 7.222222222222223e-06, "loss": 0.1087, "step": 1400 }, { "epoch": 14.74, "eval_accuracy": 0.9257670735730782, "eval_f1_score": 0.8803669741747352, "eval_loss": 0.272718608379364, "eval_precision": 0.8857591375966419, "eval_recall": 0.8761427115927698, "eval_runtime": 5.2326, "eval_samples_per_second": 579.256, "eval_steps_per_second": 9.173, "step": 1400 }, { "epoch": 15.79, "grad_norm": 2.6620497703552246, "learning_rate": 6.944444444444445e-06, "loss": 0.0918, "step": 1500 }, { "epoch": 15.79, "eval_accuracy": 0.9284064665127021, "eval_f1_score": 0.8829680164086238, "eval_loss": 0.286248117685318, "eval_precision": 0.8987862948246464, "eval_recall": 0.8711650603773539, "eval_runtime": 5.184, "eval_samples_per_second": 584.68, "eval_steps_per_second": 9.259, "step": 1500 }, { "epoch": 16.84, "grad_norm": 1.70383882522583, "learning_rate": 6.666666666666667e-06, "loss": 0.0824, "step": 1600 }, { "epoch": 16.84, "eval_accuracy": 0.929066314747608, "eval_f1_score": 0.8832576833606276, "eval_loss": 0.2915453612804413, "eval_precision": 0.9008601442216951, "eval_recall": 0.868938029158805, "eval_runtime": 5.1614, "eval_samples_per_second": 587.245, "eval_steps_per_second": 9.3, "step": 1600 }, { "epoch": 17.89, "grad_norm": 7.806049823760986, "learning_rate": 6.3888888888888885e-06, "loss": 0.0745, "step": 1700 }, { "epoch": 17.89, "eval_accuracy": 0.929066314747608, "eval_f1_score": 0.8796971687211418, "eval_loss": 0.2994127869606018, "eval_precision": 0.8847125167911893, "eval_recall": 0.8796360078693516, "eval_runtime": 5.1775, "eval_samples_per_second": 585.421, "eval_steps_per_second": 9.271, "step": 1700 }, { "epoch": 18.95, "grad_norm": 9.288145065307617, "learning_rate": 6.111111111111112e-06, "loss": 0.0743, "step": 1800 }, { "epoch": 18.95, "eval_accuracy": 0.9254371494556252, "eval_f1_score": 0.8782515232850638, "eval_loss": 0.3092294931411743, "eval_precision": 0.8909710349230974, "eval_recall": 0.8686312796266131, "eval_runtime": 5.2161, "eval_samples_per_second": 581.085, "eval_steps_per_second": 9.202, "step": 1800 }, { "epoch": 20.0, "grad_norm": 0.29120656847953796, "learning_rate": 5.833333333333334e-06, "loss": 0.0636, "step": 1900 }, { "epoch": 20.0, "eval_accuracy": 0.929066314747608, "eval_f1_score": 0.8810930807565414, "eval_loss": 0.3142439126968384, "eval_precision": 0.8915600488637722, "eval_recall": 0.8743221351307482, "eval_runtime": 5.1856, "eval_samples_per_second": 584.505, "eval_steps_per_second": 9.256, "step": 1900 }, { "epoch": 21.05, "grad_norm": 3.0424692630767822, "learning_rate": 5.555555555555557e-06, "loss": 0.0605, "step": 2000 }, { "epoch": 21.05, "eval_accuracy": 0.929066314747608, "eval_f1_score": 0.8823037485745876, "eval_loss": 0.30989953875541687, "eval_precision": 0.8974412044321375, "eval_recall": 0.8700202850817763, "eval_runtime": 5.2587, "eval_samples_per_second": 576.383, "eval_steps_per_second": 9.128, "step": 2000 }, { "epoch": 22.11, "grad_norm": 0.18823295831680298, "learning_rate": 5.2777777777777785e-06, "loss": 0.0501, "step": 2100 }, { "epoch": 22.11, "eval_accuracy": 0.9317057076872319, "eval_f1_score": 0.8875182540327614, "eval_loss": 0.3163454234600067, "eval_precision": 0.9014604415235737, "eval_recall": 0.8777053165164634, "eval_runtime": 5.1733, "eval_samples_per_second": 585.895, "eval_steps_per_second": 9.278, "step": 2100 }, { "epoch": 23.16, "grad_norm": 0.22511476278305054, "learning_rate": 5e-06, "loss": 0.0519, "step": 2200 }, { "epoch": 23.16, "eval_accuracy": 0.929726162982514, "eval_f1_score": 0.883696747826972, "eval_loss": 0.3289995491504669, "eval_precision": 0.9010520871368185, "eval_recall": 0.8692223543133784, "eval_runtime": 5.2183, "eval_samples_per_second": 580.844, "eval_steps_per_second": 9.198, "step": 2200 }, { "epoch": 24.21, "grad_norm": 8.104951858520508, "learning_rate": 4.722222222222222e-06, "loss": 0.0464, "step": 2300 }, { "epoch": 24.21, "eval_accuracy": 0.9274166941603431, "eval_f1_score": 0.8804834286728226, "eval_loss": 0.34061628580093384, "eval_precision": 0.8871850911314553, "eval_recall": 0.8771793667200236, "eval_runtime": 5.1789, "eval_samples_per_second": 585.258, "eval_steps_per_second": 9.268, "step": 2300 }, { "epoch": 25.26, "grad_norm": 2.2458958625793457, "learning_rate": 4.444444444444444e-06, "loss": 0.0432, "step": 2400 }, { "epoch": 25.26, "eval_accuracy": 0.9284064665127021, "eval_f1_score": 0.8809944439022924, "eval_loss": 0.3304508328437805, "eval_precision": 0.8875581681302606, "eval_recall": 0.8774637238579275, "eval_runtime": 5.2785, "eval_samples_per_second": 574.218, "eval_steps_per_second": 9.094, "step": 2400 }, { "epoch": 26.32, "grad_norm": 8.245588302612305, "learning_rate": 4.166666666666667e-06, "loss": 0.0404, "step": 2500 }, { "epoch": 26.32, "eval_accuracy": 0.9293962388650611, "eval_f1_score": 0.8825762014276358, "eval_loss": 0.3377806544303894, "eval_precision": 0.890131895776857, "eval_recall": 0.8785119382363503, "eval_runtime": 5.1758, "eval_samples_per_second": 585.606, "eval_steps_per_second": 9.274, "step": 2500 }, { "epoch": 27.37, "grad_norm": 5.161190032958984, "learning_rate": 3.88888888888889e-06, "loss": 0.0416, "step": 2600 }, { "epoch": 27.37, "eval_accuracy": 0.9284064665127021, "eval_f1_score": 0.8829688694414259, "eval_loss": 0.343578040599823, "eval_precision": 0.897684321914049, "eval_recall": 0.8726474609617948, "eval_runtime": 5.2526, "eval_samples_per_second": 577.048, "eval_steps_per_second": 9.138, "step": 2600 }, { "epoch": 27.37, "step": 2600, "total_flos": 5414987099379456.0, "train_loss": 0.2901941172893231, "train_runtime": 2142.8815, "train_samples_per_second": 238.931, "train_steps_per_second": 1.867 } ], "logging_steps": 100, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 43, "save_steps": 100, "total_flos": 5414987099379456.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }