tiennguyenbnbk's picture
End of training
d4e7d77 verified
{
"best_metric": 0.9319305111443131,
"best_model_checkpoint": "cls_comment-phobert-base-v2-v3.2.1/checkpoint-3700",
"epoch": 34.78260869565217,
"eval_steps": 100,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8695652173913043,
"grad_norm": 1.2353936433792114,
"learning_rate": 2.5e-06,
"loss": 1.8571,
"step": 100
},
{
"epoch": 0.8695652173913043,
"eval_accuracy": 0.3985861881457314,
"eval_f1_score": 0.08142634970006665,
"eval_loss": 1.6996082067489624,
"eval_precision": 0.05694088402081877,
"eval_recall": 0.14285714285714285,
"eval_runtime": 6.7198,
"eval_samples_per_second": 547.337,
"eval_steps_per_second": 8.631,
"step": 100
},
{
"epoch": 1.7391304347826086,
"grad_norm": 2.164222240447998,
"learning_rate": 5e-06,
"loss": 1.552,
"step": 200
},
{
"epoch": 1.7391304347826086,
"eval_accuracy": 0.6150081566068516,
"eval_f1_score": 0.25519317041574235,
"eval_loss": 1.287759780883789,
"eval_precision": 0.27379414130499563,
"eval_recall": 0.28595668999910956,
"eval_runtime": 6.6372,
"eval_samples_per_second": 554.145,
"eval_steps_per_second": 8.739,
"step": 200
},
{
"epoch": 2.608695652173913,
"grad_norm": 5.309168815612793,
"learning_rate": 7.500000000000001e-06,
"loss": 1.1701,
"step": 300
},
{
"epoch": 2.608695652173913,
"eval_accuracy": 0.7746057640021751,
"eval_f1_score": 0.5380183437718388,
"eval_loss": 0.9308958649635315,
"eval_precision": 0.5797370934127978,
"eval_recall": 0.5249138773909011,
"eval_runtime": 6.5583,
"eval_samples_per_second": 560.819,
"eval_steps_per_second": 8.844,
"step": 300
},
{
"epoch": 3.4782608695652173,
"grad_norm": 6.065152645111084,
"learning_rate": 1e-05,
"loss": 0.8958,
"step": 400
},
{
"epoch": 3.4782608695652173,
"eval_accuracy": 0.8371397498640566,
"eval_f1_score": 0.6099241108684058,
"eval_loss": 0.7467954754829407,
"eval_precision": 0.6113332602554015,
"eval_recall": 0.6120619930302185,
"eval_runtime": 6.7113,
"eval_samples_per_second": 548.029,
"eval_steps_per_second": 8.642,
"step": 400
},
{
"epoch": 4.3478260869565215,
"grad_norm": 6.0916032791137695,
"learning_rate": 9.722222222222223e-06,
"loss": 0.7463,
"step": 500
},
{
"epoch": 4.3478260869565215,
"eval_accuracy": 0.8640565524741708,
"eval_f1_score": 0.6758460592010713,
"eval_loss": 0.6539974808692932,
"eval_precision": 0.7556366523817865,
"eval_recall": 0.6741270736536388,
"eval_runtime": 6.6507,
"eval_samples_per_second": 553.021,
"eval_steps_per_second": 8.721,
"step": 500
},
{
"epoch": 5.217391304347826,
"grad_norm": 5.243598461151123,
"learning_rate": 9.444444444444445e-06,
"loss": 0.6489,
"step": 600
},
{
"epoch": 5.217391304347826,
"eval_accuracy": 0.8866231647634584,
"eval_f1_score": 0.7501544920720539,
"eval_loss": 0.5884435772895813,
"eval_precision": 0.7611116878989773,
"eval_recall": 0.7443224665881154,
"eval_runtime": 6.5994,
"eval_samples_per_second": 557.321,
"eval_steps_per_second": 8.789,
"step": 600
},
{
"epoch": 6.086956521739131,
"grad_norm": 4.099198341369629,
"learning_rate": 9.166666666666666e-06,
"loss": 0.5604,
"step": 700
},
{
"epoch": 6.086956521739131,
"eval_accuracy": 0.9010331702011963,
"eval_f1_score": 0.8349576474350455,
"eval_loss": 0.5296739339828491,
"eval_precision": 0.906039613501241,
"eval_recall": 0.8195735271786126,
"eval_runtime": 6.5841,
"eval_samples_per_second": 558.615,
"eval_steps_per_second": 8.809,
"step": 700
},
{
"epoch": 6.956521739130435,
"grad_norm": 7.3427557945251465,
"learning_rate": 8.888888888888888e-06,
"loss": 0.4907,
"step": 800
},
{
"epoch": 6.956521739130435,
"eval_accuracy": 0.9170744970092441,
"eval_f1_score": 0.8961939258721013,
"eval_loss": 0.4928275942802429,
"eval_precision": 0.9189847475576503,
"eval_recall": 0.8769461858418355,
"eval_runtime": 6.5949,
"eval_samples_per_second": 557.707,
"eval_steps_per_second": 8.795,
"step": 800
},
{
"epoch": 7.826086956521739,
"grad_norm": 5.116893768310547,
"learning_rate": 8.611111111111112e-06,
"loss": 0.4428,
"step": 900
},
{
"epoch": 7.826086956521739,
"eval_accuracy": 0.921968461120174,
"eval_f1_score": 0.9048377528141078,
"eval_loss": 0.46924909949302673,
"eval_precision": 0.9169978450568272,
"eval_recall": 0.8957923531168384,
"eval_runtime": 6.6744,
"eval_samples_per_second": 551.061,
"eval_steps_per_second": 8.69,
"step": 900
},
{
"epoch": 8.695652173913043,
"grad_norm": 3.862825393676758,
"learning_rate": 8.333333333333334e-06,
"loss": 0.4086,
"step": 1000
},
{
"epoch": 8.695652173913043,
"eval_accuracy": 0.9235997824904839,
"eval_f1_score": 0.9073197371323319,
"eval_loss": 0.4600367546081543,
"eval_precision": 0.8977722877003532,
"eval_recall": 0.9183169396463787,
"eval_runtime": 6.5594,
"eval_samples_per_second": 560.722,
"eval_steps_per_second": 8.842,
"step": 1000
},
{
"epoch": 9.565217391304348,
"grad_norm": 3.6134090423583984,
"learning_rate": 8.055555555555557e-06,
"loss": 0.3892,
"step": 1100
},
{
"epoch": 9.565217391304348,
"eval_accuracy": 0.9293094072865687,
"eval_f1_score": 0.9155883832712851,
"eval_loss": 0.45303475856781006,
"eval_precision": 0.9155788758247504,
"eval_recall": 0.9158764039642001,
"eval_runtime": 6.5742,
"eval_samples_per_second": 559.46,
"eval_steps_per_second": 8.822,
"step": 1100
},
{
"epoch": 10.434782608695652,
"grad_norm": 6.361451148986816,
"learning_rate": 7.77777777777778e-06,
"loss": 0.3659,
"step": 1200
},
{
"epoch": 10.434782608695652,
"eval_accuracy": 0.9257748776508973,
"eval_f1_score": 0.915368183730898,
"eval_loss": 0.4573982357978821,
"eval_precision": 0.9071440635058929,
"eval_recall": 0.9257672878056209,
"eval_runtime": 6.6383,
"eval_samples_per_second": 554.054,
"eval_steps_per_second": 8.737,
"step": 1200
},
{
"epoch": 11.304347826086957,
"grad_norm": 6.6150312423706055,
"learning_rate": 7.500000000000001e-06,
"loss": 0.3577,
"step": 1300
},
{
"epoch": 11.304347826086957,
"eval_accuracy": 0.9287656334964655,
"eval_f1_score": 0.9158955580622885,
"eval_loss": 0.45325955748558044,
"eval_precision": 0.9151528325914676,
"eval_recall": 0.9176855536409609,
"eval_runtime": 6.6572,
"eval_samples_per_second": 552.485,
"eval_steps_per_second": 8.712,
"step": 1300
},
{
"epoch": 12.173913043478262,
"grad_norm": 5.580691814422607,
"learning_rate": 7.222222222222223e-06,
"loss": 0.338,
"step": 1400
},
{
"epoch": 12.173913043478262,
"eval_accuracy": 0.933931484502447,
"eval_f1_score": 0.9203137031893097,
"eval_loss": 0.44535157084465027,
"eval_precision": 0.9127875535751154,
"eval_recall": 0.9284587781330601,
"eval_runtime": 6.607,
"eval_samples_per_second": 556.686,
"eval_steps_per_second": 8.779,
"step": 1400
},
{
"epoch": 13.043478260869565,
"grad_norm": 3.8554763793945312,
"learning_rate": 6.944444444444445e-06,
"loss": 0.3302,
"step": 1500
},
{
"epoch": 13.043478260869565,
"eval_accuracy": 0.9312126155519304,
"eval_f1_score": 0.9179345627885324,
"eval_loss": 0.4539467692375183,
"eval_precision": 0.919620307390688,
"eval_recall": 0.9172409654840804,
"eval_runtime": 6.6136,
"eval_samples_per_second": 556.129,
"eval_steps_per_second": 8.77,
"step": 1500
},
{
"epoch": 13.91304347826087,
"grad_norm": 11.763055801391602,
"learning_rate": 6.666666666666667e-06,
"loss": 0.3186,
"step": 1600
},
{
"epoch": 13.91304347826087,
"eval_accuracy": 0.9320282762370854,
"eval_f1_score": 0.9219566242363927,
"eval_loss": 0.4532802700996399,
"eval_precision": 0.9298453666516142,
"eval_recall": 0.914622870958479,
"eval_runtime": 6.6786,
"eval_samples_per_second": 550.714,
"eval_steps_per_second": 8.684,
"step": 1600
},
{
"epoch": 14.782608695652174,
"grad_norm": 4.790759563446045,
"learning_rate": 6.3888888888888885e-06,
"loss": 0.3146,
"step": 1700
},
{
"epoch": 14.782608695652174,
"eval_accuracy": 0.935562805872757,
"eval_f1_score": 0.9245866574233509,
"eval_loss": 0.4484989047050476,
"eval_precision": 0.9280507721712984,
"eval_recall": 0.9224481039362212,
"eval_runtime": 6.5869,
"eval_samples_per_second": 558.384,
"eval_steps_per_second": 8.805,
"step": 1700
},
{
"epoch": 15.652173913043478,
"grad_norm": 5.715475559234619,
"learning_rate": 6.111111111111112e-06,
"loss": 0.3093,
"step": 1800
},
{
"epoch": 15.652173913043478,
"eval_accuracy": 0.9325720500271887,
"eval_f1_score": 0.9193696012739166,
"eval_loss": 0.45573264360427856,
"eval_precision": 0.9290964969084188,
"eval_recall": 0.9124598145426113,
"eval_runtime": 6.6184,
"eval_samples_per_second": 555.721,
"eval_steps_per_second": 8.763,
"step": 1800
},
{
"epoch": 16.52173913043478,
"grad_norm": 7.8289079666137695,
"learning_rate": 5.833333333333334e-06,
"loss": 0.3019,
"step": 1900
},
{
"epoch": 16.52173913043478,
"eval_accuracy": 0.9290375203915171,
"eval_f1_score": 0.9169347716468026,
"eval_loss": 0.46843111515045166,
"eval_precision": 0.9128246753731022,
"eval_recall": 0.923370474591805,
"eval_runtime": 6.6032,
"eval_samples_per_second": 557.0,
"eval_steps_per_second": 8.784,
"step": 1900
},
{
"epoch": 17.391304347826086,
"grad_norm": 5.883206844329834,
"learning_rate": 5.555555555555557e-06,
"loss": 0.2985,
"step": 2000
},
{
"epoch": 17.391304347826086,
"eval_accuracy": 0.9347471451876019,
"eval_f1_score": 0.92475047525614,
"eval_loss": 0.4544869661331177,
"eval_precision": 0.9259386916015938,
"eval_recall": 0.9237552045152712,
"eval_runtime": 6.6237,
"eval_samples_per_second": 555.275,
"eval_steps_per_second": 8.756,
"step": 2000
},
{
"epoch": 18.26086956521739,
"grad_norm": 8.993196487426758,
"learning_rate": 5.2777777777777785e-06,
"loss": 0.2959,
"step": 2100
},
{
"epoch": 18.26086956521739,
"eval_accuracy": 0.9333877107123436,
"eval_f1_score": 0.9219602449474701,
"eval_loss": 0.46893206238746643,
"eval_precision": 0.92490748541024,
"eval_recall": 0.9208243882860574,
"eval_runtime": 6.7132,
"eval_samples_per_second": 547.875,
"eval_steps_per_second": 8.64,
"step": 2100
},
{
"epoch": 19.130434782608695,
"grad_norm": 6.87612771987915,
"learning_rate": 5e-06,
"loss": 0.2891,
"step": 2200
},
{
"epoch": 19.130434782608695,
"eval_accuracy": 0.9385535617183252,
"eval_f1_score": 0.9262085591552154,
"eval_loss": 0.4558440148830414,
"eval_precision": 0.9360271609767613,
"eval_recall": 0.9180112460726695,
"eval_runtime": 6.5974,
"eval_samples_per_second": 557.491,
"eval_steps_per_second": 8.791,
"step": 2200
},
{
"epoch": 20.0,
"grad_norm": 8.078782081604004,
"learning_rate": 4.722222222222222e-06,
"loss": 0.2905,
"step": 2300
},
{
"epoch": 20.0,
"eval_accuracy": 0.9358346927678086,
"eval_f1_score": 0.9227437963525783,
"eval_loss": 0.45897340774536133,
"eval_precision": 0.9307760201675803,
"eval_recall": 0.9162662496043394,
"eval_runtime": 6.6433,
"eval_samples_per_second": 553.643,
"eval_steps_per_second": 8.731,
"step": 2300
},
{
"epoch": 20.869565217391305,
"grad_norm": 6.95852518081665,
"learning_rate": 4.444444444444444e-06,
"loss": 0.2875,
"step": 2400
},
{
"epoch": 20.869565217391305,
"eval_accuracy": 0.9306688417618271,
"eval_f1_score": 0.9192946959387055,
"eval_loss": 0.4796580672264099,
"eval_precision": 0.9267958791034487,
"eval_recall": 0.9145913486548557,
"eval_runtime": 6.5982,
"eval_samples_per_second": 557.426,
"eval_steps_per_second": 8.79,
"step": 2400
},
{
"epoch": 21.73913043478261,
"grad_norm": 5.104602336883545,
"learning_rate": 4.166666666666667e-06,
"loss": 0.2812,
"step": 2500
},
{
"epoch": 21.73913043478261,
"eval_accuracy": 0.935562805872757,
"eval_f1_score": 0.9246927415584684,
"eval_loss": 0.46965479850769043,
"eval_precision": 0.9241768322453005,
"eval_recall": 0.9257346798390873,
"eval_runtime": 6.6685,
"eval_samples_per_second": 551.547,
"eval_steps_per_second": 8.698,
"step": 2500
},
{
"epoch": 22.608695652173914,
"grad_norm": 1.9872806072235107,
"learning_rate": 3.88888888888889e-06,
"loss": 0.2789,
"step": 2600
},
{
"epoch": 22.608695652173914,
"eval_accuracy": 0.9380097879282219,
"eval_f1_score": 0.9255376836601789,
"eval_loss": 0.46675482392311096,
"eval_precision": 0.9271185300366118,
"eval_recall": 0.9250032647695392,
"eval_runtime": 6.6231,
"eval_samples_per_second": 555.328,
"eval_steps_per_second": 8.757,
"step": 2600
},
{
"epoch": 23.47826086956522,
"grad_norm": 4.836044788360596,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.2785,
"step": 2700
},
{
"epoch": 23.47826086956522,
"eval_accuracy": 0.9382816748232735,
"eval_f1_score": 0.9293325929996209,
"eval_loss": 0.4671032130718231,
"eval_precision": 0.9288859327813286,
"eval_recall": 0.9301070328166979,
"eval_runtime": 6.6888,
"eval_samples_per_second": 549.877,
"eval_steps_per_second": 8.671,
"step": 2700
},
{
"epoch": 24.347826086956523,
"grad_norm": 2.3744585514068604,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.2773,
"step": 2800
},
{
"epoch": 24.347826086956523,
"eval_accuracy": 0.9390973355084284,
"eval_f1_score": 0.9293323313487988,
"eval_loss": 0.46571776270866394,
"eval_precision": 0.9327721009515447,
"eval_recall": 0.927360363082818,
"eval_runtime": 6.6343,
"eval_samples_per_second": 554.389,
"eval_steps_per_second": 8.742,
"step": 2800
},
{
"epoch": 25.217391304347824,
"grad_norm": 4.463809490203857,
"learning_rate": 3.055555555555556e-06,
"loss": 0.2814,
"step": 2900
},
{
"epoch": 25.217391304347824,
"eval_accuracy": 0.9361065796628603,
"eval_f1_score": 0.9259199302104238,
"eval_loss": 0.47015631198883057,
"eval_precision": 0.924434133110186,
"eval_recall": 0.9285494955527653,
"eval_runtime": 6.6115,
"eval_samples_per_second": 556.303,
"eval_steps_per_second": 8.773,
"step": 2900
},
{
"epoch": 26.08695652173913,
"grad_norm": 11.97252082824707,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.2744,
"step": 3000
},
{
"epoch": 26.08695652173913,
"eval_accuracy": 0.9352909189777052,
"eval_f1_score": 0.9273773946315609,
"eval_loss": 0.4731716811656952,
"eval_precision": 0.9272954212892072,
"eval_recall": 0.929014459015713,
"eval_runtime": 6.6523,
"eval_samples_per_second": 552.888,
"eval_steps_per_second": 8.719,
"step": 3000
},
{
"epoch": 26.956521739130434,
"grad_norm": 8.698735237121582,
"learning_rate": 2.5e-06,
"loss": 0.2772,
"step": 3100
},
{
"epoch": 26.956521739130434,
"eval_accuracy": 0.9388254486133768,
"eval_f1_score": 0.9280828388852939,
"eval_loss": 0.46764281392097473,
"eval_precision": 0.9264283970809257,
"eval_recall": 0.9301128430609281,
"eval_runtime": 6.6577,
"eval_samples_per_second": 552.441,
"eval_steps_per_second": 8.712,
"step": 3100
},
{
"epoch": 27.82608695652174,
"grad_norm": 9.868401527404785,
"learning_rate": 2.222222222222222e-06,
"loss": 0.2736,
"step": 3200
},
{
"epoch": 27.82608695652174,
"eval_accuracy": 0.9393692224034802,
"eval_f1_score": 0.9280880031444269,
"eval_loss": 0.46609246730804443,
"eval_precision": 0.9325449452470522,
"eval_recall": 0.9241991833476634,
"eval_runtime": 6.6629,
"eval_samples_per_second": 552.016,
"eval_steps_per_second": 8.705,
"step": 3200
},
{
"epoch": 28.695652173913043,
"grad_norm": 13.52723217010498,
"learning_rate": 1.944444444444445e-06,
"loss": 0.2754,
"step": 3300
},
{
"epoch": 28.695652173913043,
"eval_accuracy": 0.9366503534529635,
"eval_f1_score": 0.925681100132558,
"eval_loss": 0.4745844602584839,
"eval_precision": 0.9287802003680891,
"eval_recall": 0.9233293953593443,
"eval_runtime": 6.6962,
"eval_samples_per_second": 549.264,
"eval_steps_per_second": 8.662,
"step": 3300
},
{
"epoch": 29.565217391304348,
"grad_norm": 0.09545432776212692,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.2717,
"step": 3400
},
{
"epoch": 29.565217391304348,
"eval_accuracy": 0.9380097879282219,
"eval_f1_score": 0.9283345025534331,
"eval_loss": 0.46884092688560486,
"eval_precision": 0.9315263692536144,
"eval_recall": 0.925530596287616,
"eval_runtime": 6.6275,
"eval_samples_per_second": 554.958,
"eval_steps_per_second": 8.751,
"step": 3400
},
{
"epoch": 30.434782608695652,
"grad_norm": 1.8131216764450073,
"learning_rate": 1.3888888888888892e-06,
"loss": 0.27,
"step": 3500
},
{
"epoch": 30.434782608695652,
"eval_accuracy": 0.9388254486133768,
"eval_f1_score": 0.9303895468552515,
"eval_loss": 0.4696621894836426,
"eval_precision": 0.9308188528131611,
"eval_recall": 0.9307349860958422,
"eval_runtime": 6.6391,
"eval_samples_per_second": 553.987,
"eval_steps_per_second": 8.736,
"step": 3500
},
{
"epoch": 31.304347826086957,
"grad_norm": 1.2362151145935059,
"learning_rate": 1.111111111111111e-06,
"loss": 0.2674,
"step": 3600
},
{
"epoch": 31.304347826086957,
"eval_accuracy": 0.9390973355084284,
"eval_f1_score": 0.9273897248556044,
"eval_loss": 0.466818243265152,
"eval_precision": 0.9240182135116143,
"eval_recall": 0.9311490745870729,
"eval_runtime": 6.6874,
"eval_samples_per_second": 549.992,
"eval_steps_per_second": 8.673,
"step": 3600
},
{
"epoch": 32.17391304347826,
"grad_norm": 6.612317085266113,
"learning_rate": 8.333333333333333e-07,
"loss": 0.2693,
"step": 3700
},
{
"epoch": 32.17391304347826,
"eval_accuracy": 0.9407286568787384,
"eval_f1_score": 0.9319305111443131,
"eval_loss": 0.46566537022590637,
"eval_precision": 0.9319435555053062,
"eval_recall": 0.9325682397024007,
"eval_runtime": 6.6594,
"eval_samples_per_second": 552.298,
"eval_steps_per_second": 8.709,
"step": 3700
},
{
"epoch": 33.04347826086956,
"grad_norm": 3.6944832801818848,
"learning_rate": 5.555555555555555e-07,
"loss": 0.2685,
"step": 3800
},
{
"epoch": 33.04347826086956,
"eval_accuracy": 0.9401848830886351,
"eval_f1_score": 0.9298408543588726,
"eval_loss": 0.46719253063201904,
"eval_precision": 0.9297325884730158,
"eval_recall": 0.9303720415200172,
"eval_runtime": 6.6751,
"eval_samples_per_second": 551.006,
"eval_steps_per_second": 8.689,
"step": 3800
},
{
"epoch": 33.91304347826087,
"grad_norm": 0.5045357346534729,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.268,
"step": 3900
},
{
"epoch": 33.91304347826087,
"eval_accuracy": 0.94100054377379,
"eval_f1_score": 0.9316787163771599,
"eval_loss": 0.46683618426322937,
"eval_precision": 0.9328330606111085,
"eval_recall": 0.9311209947261558,
"eval_runtime": 6.6584,
"eval_samples_per_second": 552.388,
"eval_steps_per_second": 8.711,
"step": 3900
},
{
"epoch": 34.78260869565217,
"grad_norm": 0.15777729451656342,
"learning_rate": 0.0,
"loss": 0.272,
"step": 4000
},
{
"epoch": 34.78260869565217,
"eval_accuracy": 0.9401848830886351,
"eval_f1_score": 0.9309979115269789,
"eval_loss": 0.46536290645599365,
"eval_precision": 0.9324573806162856,
"eval_recall": 0.9300452091014509,
"eval_runtime": 6.6565,
"eval_samples_per_second": 552.546,
"eval_steps_per_second": 8.713,
"step": 4000
},
{
"epoch": 34.78260869565217,
"step": 4000,
"total_flos": 8662131210539100.0,
"train_loss": 0.4411096167564392,
"train_runtime": 3432.8242,
"train_samples_per_second": 149.148,
"train_steps_per_second": 1.165
}
],
"logging_steps": 100,
"max_steps": 4000,
"num_input_tokens_seen": 0,
"num_train_epochs": 35,
"save_steps": 100,
"total_flos": 8662131210539100.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}