|
{ |
|
"best_metric": 0.9319305111443131, |
|
"best_model_checkpoint": "cls_comment-phobert-base-v2-v3.2.1/checkpoint-3700", |
|
"epoch": 34.78260869565217, |
|
"eval_steps": 100, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 1.2353936433792114, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.8571, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"eval_accuracy": 0.3985861881457314, |
|
"eval_f1_score": 0.08142634970006665, |
|
"eval_loss": 1.6996082067489624, |
|
"eval_precision": 0.05694088402081877, |
|
"eval_recall": 0.14285714285714285, |
|
"eval_runtime": 6.7198, |
|
"eval_samples_per_second": 547.337, |
|
"eval_steps_per_second": 8.631, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"grad_norm": 2.164222240447998, |
|
"learning_rate": 5e-06, |
|
"loss": 1.552, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"eval_accuracy": 0.6150081566068516, |
|
"eval_f1_score": 0.25519317041574235, |
|
"eval_loss": 1.287759780883789, |
|
"eval_precision": 0.27379414130499563, |
|
"eval_recall": 0.28595668999910956, |
|
"eval_runtime": 6.6372, |
|
"eval_samples_per_second": 554.145, |
|
"eval_steps_per_second": 8.739, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.608695652173913, |
|
"grad_norm": 5.309168815612793, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.1701, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.608695652173913, |
|
"eval_accuracy": 0.7746057640021751, |
|
"eval_f1_score": 0.5380183437718388, |
|
"eval_loss": 0.9308958649635315, |
|
"eval_precision": 0.5797370934127978, |
|
"eval_recall": 0.5249138773909011, |
|
"eval_runtime": 6.5583, |
|
"eval_samples_per_second": 560.819, |
|
"eval_steps_per_second": 8.844, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.4782608695652173, |
|
"grad_norm": 6.065152645111084, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8958, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.4782608695652173, |
|
"eval_accuracy": 0.8371397498640566, |
|
"eval_f1_score": 0.6099241108684058, |
|
"eval_loss": 0.7467954754829407, |
|
"eval_precision": 0.6113332602554015, |
|
"eval_recall": 0.6120619930302185, |
|
"eval_runtime": 6.7113, |
|
"eval_samples_per_second": 548.029, |
|
"eval_steps_per_second": 8.642, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.3478260869565215, |
|
"grad_norm": 6.0916032791137695, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.7463, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.3478260869565215, |
|
"eval_accuracy": 0.8640565524741708, |
|
"eval_f1_score": 0.6758460592010713, |
|
"eval_loss": 0.6539974808692932, |
|
"eval_precision": 0.7556366523817865, |
|
"eval_recall": 0.6741270736536388, |
|
"eval_runtime": 6.6507, |
|
"eval_samples_per_second": 553.021, |
|
"eval_steps_per_second": 8.721, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.217391304347826, |
|
"grad_norm": 5.243598461151123, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.6489, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.217391304347826, |
|
"eval_accuracy": 0.8866231647634584, |
|
"eval_f1_score": 0.7501544920720539, |
|
"eval_loss": 0.5884435772895813, |
|
"eval_precision": 0.7611116878989773, |
|
"eval_recall": 0.7443224665881154, |
|
"eval_runtime": 6.5994, |
|
"eval_samples_per_second": 557.321, |
|
"eval_steps_per_second": 8.789, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.086956521739131, |
|
"grad_norm": 4.099198341369629, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.5604, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.086956521739131, |
|
"eval_accuracy": 0.9010331702011963, |
|
"eval_f1_score": 0.8349576474350455, |
|
"eval_loss": 0.5296739339828491, |
|
"eval_precision": 0.906039613501241, |
|
"eval_recall": 0.8195735271786126, |
|
"eval_runtime": 6.5841, |
|
"eval_samples_per_second": 558.615, |
|
"eval_steps_per_second": 8.809, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.956521739130435, |
|
"grad_norm": 7.3427557945251465, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.4907, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.956521739130435, |
|
"eval_accuracy": 0.9170744970092441, |
|
"eval_f1_score": 0.8961939258721013, |
|
"eval_loss": 0.4928275942802429, |
|
"eval_precision": 0.9189847475576503, |
|
"eval_recall": 0.8769461858418355, |
|
"eval_runtime": 6.5949, |
|
"eval_samples_per_second": 557.707, |
|
"eval_steps_per_second": 8.795, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.826086956521739, |
|
"grad_norm": 5.116893768310547, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 0.4428, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.826086956521739, |
|
"eval_accuracy": 0.921968461120174, |
|
"eval_f1_score": 0.9048377528141078, |
|
"eval_loss": 0.46924909949302673, |
|
"eval_precision": 0.9169978450568272, |
|
"eval_recall": 0.8957923531168384, |
|
"eval_runtime": 6.6744, |
|
"eval_samples_per_second": 551.061, |
|
"eval_steps_per_second": 8.69, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.695652173913043, |
|
"grad_norm": 3.862825393676758, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.4086, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.695652173913043, |
|
"eval_accuracy": 0.9235997824904839, |
|
"eval_f1_score": 0.9073197371323319, |
|
"eval_loss": 0.4600367546081543, |
|
"eval_precision": 0.8977722877003532, |
|
"eval_recall": 0.9183169396463787, |
|
"eval_runtime": 6.5594, |
|
"eval_samples_per_second": 560.722, |
|
"eval_steps_per_second": 8.842, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.565217391304348, |
|
"grad_norm": 3.6134090423583984, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 0.3892, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.565217391304348, |
|
"eval_accuracy": 0.9293094072865687, |
|
"eval_f1_score": 0.9155883832712851, |
|
"eval_loss": 0.45303475856781006, |
|
"eval_precision": 0.9155788758247504, |
|
"eval_recall": 0.9158764039642001, |
|
"eval_runtime": 6.5742, |
|
"eval_samples_per_second": 559.46, |
|
"eval_steps_per_second": 8.822, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.434782608695652, |
|
"grad_norm": 6.361451148986816, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.3659, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.434782608695652, |
|
"eval_accuracy": 0.9257748776508973, |
|
"eval_f1_score": 0.915368183730898, |
|
"eval_loss": 0.4573982357978821, |
|
"eval_precision": 0.9071440635058929, |
|
"eval_recall": 0.9257672878056209, |
|
"eval_runtime": 6.6383, |
|
"eval_samples_per_second": 554.054, |
|
"eval_steps_per_second": 8.737, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.304347826086957, |
|
"grad_norm": 6.6150312423706055, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.3577, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.304347826086957, |
|
"eval_accuracy": 0.9287656334964655, |
|
"eval_f1_score": 0.9158955580622885, |
|
"eval_loss": 0.45325955748558044, |
|
"eval_precision": 0.9151528325914676, |
|
"eval_recall": 0.9176855536409609, |
|
"eval_runtime": 6.6572, |
|
"eval_samples_per_second": 552.485, |
|
"eval_steps_per_second": 8.712, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.173913043478262, |
|
"grad_norm": 5.580691814422607, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 0.338, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.173913043478262, |
|
"eval_accuracy": 0.933931484502447, |
|
"eval_f1_score": 0.9203137031893097, |
|
"eval_loss": 0.44535157084465027, |
|
"eval_precision": 0.9127875535751154, |
|
"eval_recall": 0.9284587781330601, |
|
"eval_runtime": 6.607, |
|
"eval_samples_per_second": 556.686, |
|
"eval_steps_per_second": 8.779, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.043478260869565, |
|
"grad_norm": 3.8554763793945312, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.3302, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.043478260869565, |
|
"eval_accuracy": 0.9312126155519304, |
|
"eval_f1_score": 0.9179345627885324, |
|
"eval_loss": 0.4539467692375183, |
|
"eval_precision": 0.919620307390688, |
|
"eval_recall": 0.9172409654840804, |
|
"eval_runtime": 6.6136, |
|
"eval_samples_per_second": 556.129, |
|
"eval_steps_per_second": 8.77, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.91304347826087, |
|
"grad_norm": 11.763055801391602, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.3186, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.91304347826087, |
|
"eval_accuracy": 0.9320282762370854, |
|
"eval_f1_score": 0.9219566242363927, |
|
"eval_loss": 0.4532802700996399, |
|
"eval_precision": 0.9298453666516142, |
|
"eval_recall": 0.914622870958479, |
|
"eval_runtime": 6.6786, |
|
"eval_samples_per_second": 550.714, |
|
"eval_steps_per_second": 8.684, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 14.782608695652174, |
|
"grad_norm": 4.790759563446045, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 0.3146, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 14.782608695652174, |
|
"eval_accuracy": 0.935562805872757, |
|
"eval_f1_score": 0.9245866574233509, |
|
"eval_loss": 0.4484989047050476, |
|
"eval_precision": 0.9280507721712984, |
|
"eval_recall": 0.9224481039362212, |
|
"eval_runtime": 6.5869, |
|
"eval_samples_per_second": 558.384, |
|
"eval_steps_per_second": 8.805, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 15.652173913043478, |
|
"grad_norm": 5.715475559234619, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.3093, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.652173913043478, |
|
"eval_accuracy": 0.9325720500271887, |
|
"eval_f1_score": 0.9193696012739166, |
|
"eval_loss": 0.45573264360427856, |
|
"eval_precision": 0.9290964969084188, |
|
"eval_recall": 0.9124598145426113, |
|
"eval_runtime": 6.6184, |
|
"eval_samples_per_second": 555.721, |
|
"eval_steps_per_second": 8.763, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.52173913043478, |
|
"grad_norm": 7.8289079666137695, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.3019, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 16.52173913043478, |
|
"eval_accuracy": 0.9290375203915171, |
|
"eval_f1_score": 0.9169347716468026, |
|
"eval_loss": 0.46843111515045166, |
|
"eval_precision": 0.9128246753731022, |
|
"eval_recall": 0.923370474591805, |
|
"eval_runtime": 6.6032, |
|
"eval_samples_per_second": 557.0, |
|
"eval_steps_per_second": 8.784, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 17.391304347826086, |
|
"grad_norm": 5.883206844329834, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.2985, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 17.391304347826086, |
|
"eval_accuracy": 0.9347471451876019, |
|
"eval_f1_score": 0.92475047525614, |
|
"eval_loss": 0.4544869661331177, |
|
"eval_precision": 0.9259386916015938, |
|
"eval_recall": 0.9237552045152712, |
|
"eval_runtime": 6.6237, |
|
"eval_samples_per_second": 555.275, |
|
"eval_steps_per_second": 8.756, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.26086956521739, |
|
"grad_norm": 8.993196487426758, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 0.2959, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 18.26086956521739, |
|
"eval_accuracy": 0.9333877107123436, |
|
"eval_f1_score": 0.9219602449474701, |
|
"eval_loss": 0.46893206238746643, |
|
"eval_precision": 0.92490748541024, |
|
"eval_recall": 0.9208243882860574, |
|
"eval_runtime": 6.7132, |
|
"eval_samples_per_second": 547.875, |
|
"eval_steps_per_second": 8.64, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.130434782608695, |
|
"grad_norm": 6.87612771987915, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2891, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 19.130434782608695, |
|
"eval_accuracy": 0.9385535617183252, |
|
"eval_f1_score": 0.9262085591552154, |
|
"eval_loss": 0.4558440148830414, |
|
"eval_precision": 0.9360271609767613, |
|
"eval_recall": 0.9180112460726695, |
|
"eval_runtime": 6.5974, |
|
"eval_samples_per_second": 557.491, |
|
"eval_steps_per_second": 8.791, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 8.078782081604004, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.2905, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9358346927678086, |
|
"eval_f1_score": 0.9227437963525783, |
|
"eval_loss": 0.45897340774536133, |
|
"eval_precision": 0.9307760201675803, |
|
"eval_recall": 0.9162662496043394, |
|
"eval_runtime": 6.6433, |
|
"eval_samples_per_second": 553.643, |
|
"eval_steps_per_second": 8.731, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.869565217391305, |
|
"grad_norm": 6.95852518081665, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.2875, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 20.869565217391305, |
|
"eval_accuracy": 0.9306688417618271, |
|
"eval_f1_score": 0.9192946959387055, |
|
"eval_loss": 0.4796580672264099, |
|
"eval_precision": 0.9267958791034487, |
|
"eval_recall": 0.9145913486548557, |
|
"eval_runtime": 6.5982, |
|
"eval_samples_per_second": 557.426, |
|
"eval_steps_per_second": 8.79, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 21.73913043478261, |
|
"grad_norm": 5.104602336883545, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.2812, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 21.73913043478261, |
|
"eval_accuracy": 0.935562805872757, |
|
"eval_f1_score": 0.9246927415584684, |
|
"eval_loss": 0.46965479850769043, |
|
"eval_precision": 0.9241768322453005, |
|
"eval_recall": 0.9257346798390873, |
|
"eval_runtime": 6.6685, |
|
"eval_samples_per_second": 551.547, |
|
"eval_steps_per_second": 8.698, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 22.608695652173914, |
|
"grad_norm": 1.9872806072235107, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.2789, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 22.608695652173914, |
|
"eval_accuracy": 0.9380097879282219, |
|
"eval_f1_score": 0.9255376836601789, |
|
"eval_loss": 0.46675482392311096, |
|
"eval_precision": 0.9271185300366118, |
|
"eval_recall": 0.9250032647695392, |
|
"eval_runtime": 6.6231, |
|
"eval_samples_per_second": 555.328, |
|
"eval_steps_per_second": 8.757, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 23.47826086956522, |
|
"grad_norm": 4.836044788360596, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.2785, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 23.47826086956522, |
|
"eval_accuracy": 0.9382816748232735, |
|
"eval_f1_score": 0.9293325929996209, |
|
"eval_loss": 0.4671032130718231, |
|
"eval_precision": 0.9288859327813286, |
|
"eval_recall": 0.9301070328166979, |
|
"eval_runtime": 6.6888, |
|
"eval_samples_per_second": 549.877, |
|
"eval_steps_per_second": 8.671, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 24.347826086956523, |
|
"grad_norm": 2.3744585514068604, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.2773, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 24.347826086956523, |
|
"eval_accuracy": 0.9390973355084284, |
|
"eval_f1_score": 0.9293323313487988, |
|
"eval_loss": 0.46571776270866394, |
|
"eval_precision": 0.9327721009515447, |
|
"eval_recall": 0.927360363082818, |
|
"eval_runtime": 6.6343, |
|
"eval_samples_per_second": 554.389, |
|
"eval_steps_per_second": 8.742, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 25.217391304347824, |
|
"grad_norm": 4.463809490203857, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.2814, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 25.217391304347824, |
|
"eval_accuracy": 0.9361065796628603, |
|
"eval_f1_score": 0.9259199302104238, |
|
"eval_loss": 0.47015631198883057, |
|
"eval_precision": 0.924434133110186, |
|
"eval_recall": 0.9285494955527653, |
|
"eval_runtime": 6.6115, |
|
"eval_samples_per_second": 556.303, |
|
"eval_steps_per_second": 8.773, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 26.08695652173913, |
|
"grad_norm": 11.97252082824707, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.2744, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.08695652173913, |
|
"eval_accuracy": 0.9352909189777052, |
|
"eval_f1_score": 0.9273773946315609, |
|
"eval_loss": 0.4731716811656952, |
|
"eval_precision": 0.9272954212892072, |
|
"eval_recall": 0.929014459015713, |
|
"eval_runtime": 6.6523, |
|
"eval_samples_per_second": 552.888, |
|
"eval_steps_per_second": 8.719, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.956521739130434, |
|
"grad_norm": 8.698735237121582, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2772, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 26.956521739130434, |
|
"eval_accuracy": 0.9388254486133768, |
|
"eval_f1_score": 0.9280828388852939, |
|
"eval_loss": 0.46764281392097473, |
|
"eval_precision": 0.9264283970809257, |
|
"eval_recall": 0.9301128430609281, |
|
"eval_runtime": 6.6577, |
|
"eval_samples_per_second": 552.441, |
|
"eval_steps_per_second": 8.712, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 27.82608695652174, |
|
"grad_norm": 9.868401527404785, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.2736, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 27.82608695652174, |
|
"eval_accuracy": 0.9393692224034802, |
|
"eval_f1_score": 0.9280880031444269, |
|
"eval_loss": 0.46609246730804443, |
|
"eval_precision": 0.9325449452470522, |
|
"eval_recall": 0.9241991833476634, |
|
"eval_runtime": 6.6629, |
|
"eval_samples_per_second": 552.016, |
|
"eval_steps_per_second": 8.705, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 28.695652173913043, |
|
"grad_norm": 13.52723217010498, |
|
"learning_rate": 1.944444444444445e-06, |
|
"loss": 0.2754, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 28.695652173913043, |
|
"eval_accuracy": 0.9366503534529635, |
|
"eval_f1_score": 0.925681100132558, |
|
"eval_loss": 0.4745844602584839, |
|
"eval_precision": 0.9287802003680891, |
|
"eval_recall": 0.9233293953593443, |
|
"eval_runtime": 6.6962, |
|
"eval_samples_per_second": 549.264, |
|
"eval_steps_per_second": 8.662, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 29.565217391304348, |
|
"grad_norm": 0.09545432776212692, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.2717, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 29.565217391304348, |
|
"eval_accuracy": 0.9380097879282219, |
|
"eval_f1_score": 0.9283345025534331, |
|
"eval_loss": 0.46884092688560486, |
|
"eval_precision": 0.9315263692536144, |
|
"eval_recall": 0.925530596287616, |
|
"eval_runtime": 6.6275, |
|
"eval_samples_per_second": 554.958, |
|
"eval_steps_per_second": 8.751, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 30.434782608695652, |
|
"grad_norm": 1.8131216764450073, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.27, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 30.434782608695652, |
|
"eval_accuracy": 0.9388254486133768, |
|
"eval_f1_score": 0.9303895468552515, |
|
"eval_loss": 0.4696621894836426, |
|
"eval_precision": 0.9308188528131611, |
|
"eval_recall": 0.9307349860958422, |
|
"eval_runtime": 6.6391, |
|
"eval_samples_per_second": 553.987, |
|
"eval_steps_per_second": 8.736, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 31.304347826086957, |
|
"grad_norm": 1.2362151145935059, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.2674, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 31.304347826086957, |
|
"eval_accuracy": 0.9390973355084284, |
|
"eval_f1_score": 0.9273897248556044, |
|
"eval_loss": 0.466818243265152, |
|
"eval_precision": 0.9240182135116143, |
|
"eval_recall": 0.9311490745870729, |
|
"eval_runtime": 6.6874, |
|
"eval_samples_per_second": 549.992, |
|
"eval_steps_per_second": 8.673, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 32.17391304347826, |
|
"grad_norm": 6.612317085266113, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.2693, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 32.17391304347826, |
|
"eval_accuracy": 0.9407286568787384, |
|
"eval_f1_score": 0.9319305111443131, |
|
"eval_loss": 0.46566537022590637, |
|
"eval_precision": 0.9319435555053062, |
|
"eval_recall": 0.9325682397024007, |
|
"eval_runtime": 6.6594, |
|
"eval_samples_per_second": 552.298, |
|
"eval_steps_per_second": 8.709, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 33.04347826086956, |
|
"grad_norm": 3.6944832801818848, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.2685, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 33.04347826086956, |
|
"eval_accuracy": 0.9401848830886351, |
|
"eval_f1_score": 0.9298408543588726, |
|
"eval_loss": 0.46719253063201904, |
|
"eval_precision": 0.9297325884730158, |
|
"eval_recall": 0.9303720415200172, |
|
"eval_runtime": 6.6751, |
|
"eval_samples_per_second": 551.006, |
|
"eval_steps_per_second": 8.689, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 33.91304347826087, |
|
"grad_norm": 0.5045357346534729, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"loss": 0.268, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 33.91304347826087, |
|
"eval_accuracy": 0.94100054377379, |
|
"eval_f1_score": 0.9316787163771599, |
|
"eval_loss": 0.46683618426322937, |
|
"eval_precision": 0.9328330606111085, |
|
"eval_recall": 0.9311209947261558, |
|
"eval_runtime": 6.6584, |
|
"eval_samples_per_second": 552.388, |
|
"eval_steps_per_second": 8.711, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 34.78260869565217, |
|
"grad_norm": 0.15777729451656342, |
|
"learning_rate": 0.0, |
|
"loss": 0.272, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 34.78260869565217, |
|
"eval_accuracy": 0.9401848830886351, |
|
"eval_f1_score": 0.9309979115269789, |
|
"eval_loss": 0.46536290645599365, |
|
"eval_precision": 0.9324573806162856, |
|
"eval_recall": 0.9300452091014509, |
|
"eval_runtime": 6.6565, |
|
"eval_samples_per_second": 552.546, |
|
"eval_steps_per_second": 8.713, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 34.78260869565217, |
|
"step": 4000, |
|
"total_flos": 8662131210539100.0, |
|
"train_loss": 0.4411096167564392, |
|
"train_runtime": 3432.8242, |
|
"train_samples_per_second": 149.148, |
|
"train_steps_per_second": 1.165 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 35, |
|
"save_steps": 100, |
|
"total_flos": 8662131210539100.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|