{ "best_metric": 0.9261695691084951, "best_model_checkpoint": "cls_comment-phobert-base-v2-v3.2.1/checkpoint-2000", "epoch": 26.08695652173913, "eval_steps": 100, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8695652173913043, "grad_norm": 2.4986989498138428, "learning_rate": 2.5e-06, "loss": 1.8947, "step": 100 }, { "epoch": 0.8695652173913043, "eval_accuracy": 0.4001087251970644, "eval_f1_score": 0.08320411950694513, "eval_loss": 1.68748140335083, "eval_precision": 0.14637409036074248, "eval_recall": 0.14367816091954025, "eval_runtime": 6.6876, "eval_samples_per_second": 550.125, "eval_steps_per_second": 8.673, "step": 100 }, { "epoch": 1.7391304347826086, "grad_norm": 1.596021294593811, "learning_rate": 5e-06, "loss": 1.5395, "step": 200 }, { "epoch": 1.7391304347826086, "eval_accuracy": 0.5849415602065778, "eval_f1_score": 0.2355807809182458, "eval_loss": 1.2897096872329712, "eval_precision": 0.27516139357553443, "eval_recall": 0.26320585050663253, "eval_runtime": 6.7112, "eval_samples_per_second": 548.189, "eval_steps_per_second": 8.642, "step": 200 }, { "epoch": 2.608695652173913, "grad_norm": 5.161496162414551, "learning_rate": 7.500000000000001e-06, "loss": 1.1205, "step": 300 }, { "epoch": 2.608695652173913, "eval_accuracy": 0.7999456374014677, "eval_f1_score": 0.5833178086765388, "eval_loss": 0.8468331098556519, "eval_precision": 0.5889764394952819, "eval_recall": 0.5810488238671468, "eval_runtime": 6.6686, "eval_samples_per_second": 551.694, "eval_steps_per_second": 8.698, "step": 300 }, { "epoch": 3.4782608695652173, "grad_norm": 5.227330207824707, "learning_rate": 1e-05, "loss": 0.82, "step": 400 }, { "epoch": 3.4782608695652173, "eval_accuracy": 0.8369122044033704, "eval_f1_score": 0.6179371343772609, "eval_loss": 0.6537477374076843, "eval_precision": 0.6062100200393906, "eval_recall": 0.6355302315827523, "eval_runtime": 6.728, "eval_samples_per_second": 546.823, "eval_steps_per_second": 8.621, "step": 400 }, { "epoch": 4.3478260869565215, "grad_norm": 5.6816534996032715, "learning_rate": 9.722222222222223e-06, "loss": 0.6232, "step": 500 }, { "epoch": 4.3478260869565215, "eval_accuracy": 0.8537646099483556, "eval_f1_score": 0.633743239294036, "eval_loss": 0.537100613117218, "eval_precision": 0.7525070200257705, "eval_recall": 0.6518017678843925, "eval_runtime": 6.7932, "eval_samples_per_second": 541.573, "eval_steps_per_second": 8.538, "step": 500 }, { "epoch": 5.217391304347826, "grad_norm": 5.096814155578613, "learning_rate": 9.444444444444445e-06, "loss": 0.5148, "step": 600 }, { "epoch": 5.217391304347826, "eval_accuracy": 0.872791519434629, "eval_f1_score": 0.7299293979398146, "eval_loss": 0.46505650877952576, "eval_precision": 0.7548552896750885, "eval_recall": 0.7210618976649555, "eval_runtime": 6.7028, "eval_samples_per_second": 548.875, "eval_steps_per_second": 8.653, "step": 600 }, { "epoch": 6.086956521739131, "grad_norm": 5.458530902862549, "learning_rate": 9.166666666666666e-06, "loss": 0.4204, "step": 700 }, { "epoch": 6.086956521739131, "eval_accuracy": 0.8869257950530035, "eval_f1_score": 0.7654329783869755, "eval_loss": 0.40097591280937195, "eval_precision": 0.8914471413846636, "eval_recall": 0.7712133932759179, "eval_runtime": 6.7443, "eval_samples_per_second": 545.497, "eval_steps_per_second": 8.6, "step": 700 }, { "epoch": 6.956521739130435, "grad_norm": 6.144416809082031, "learning_rate": 8.888888888888888e-06, "loss": 0.3421, "step": 800 }, { "epoch": 6.956521739130435, "eval_accuracy": 0.9051372655612938, "eval_f1_score": 0.8713582894968701, "eval_loss": 0.3648029565811157, "eval_precision": 0.8940734807154502, "eval_recall": 0.8588405388993653, "eval_runtime": 6.7352, "eval_samples_per_second": 546.232, "eval_steps_per_second": 8.611, "step": 800 }, { "epoch": 7.826086956521739, "grad_norm": 9.907292366027832, "learning_rate": 8.611111111111112e-06, "loss": 0.2841, "step": 900 }, { "epoch": 7.826086956521739, "eval_accuracy": 0.9181842892090242, "eval_f1_score": 0.9006880118200489, "eval_loss": 0.3239505887031555, "eval_precision": 0.8978148514278343, "eval_recall": 0.9038252102525616, "eval_runtime": 6.7315, "eval_samples_per_second": 546.532, "eval_steps_per_second": 8.616, "step": 900 }, { "epoch": 8.695652173913043, "grad_norm": 6.941843032836914, "learning_rate": 8.333333333333334e-06, "loss": 0.2319, "step": 1000 }, { "epoch": 8.695652173913043, "eval_accuracy": 0.9203587931503125, "eval_f1_score": 0.9060754755748909, "eval_loss": 0.3025033473968506, "eval_precision": 0.9175362378163865, "eval_recall": 0.8975903509513042, "eval_runtime": 6.6908, "eval_samples_per_second": 549.861, "eval_steps_per_second": 8.669, "step": 1000 }, { "epoch": 9.565217391304348, "grad_norm": 4.255012035369873, "learning_rate": 8.055555555555557e-06, "loss": 0.205, "step": 1100 }, { "epoch": 9.565217391304348, "eval_accuracy": 0.9209024191356346, "eval_f1_score": 0.9098640550303895, "eval_loss": 0.29862046241760254, "eval_precision": 0.9123097696068861, "eval_recall": 0.9086287269577242, "eval_runtime": 6.7134, "eval_samples_per_second": 548.01, "eval_steps_per_second": 8.639, "step": 1100 }, { "epoch": 10.434782608695652, "grad_norm": 5.848569393157959, "learning_rate": 7.77777777777778e-06, "loss": 0.1783, "step": 1200 }, { "epoch": 10.434782608695652, "eval_accuracy": 0.9206306061429737, "eval_f1_score": 0.9104384776037051, "eval_loss": 0.3047122657299042, "eval_precision": 0.9024848857165658, "eval_recall": 0.9207396220750284, "eval_runtime": 6.6561, "eval_samples_per_second": 552.726, "eval_steps_per_second": 8.714, "step": 1200 }, { "epoch": 11.304347826086957, "grad_norm": 7.340043544769287, "learning_rate": 7.500000000000001e-06, "loss": 0.1587, "step": 1300 }, { "epoch": 11.304347826086957, "eval_accuracy": 0.9296004349007883, "eval_f1_score": 0.9202832724978299, "eval_loss": 0.2757803201675415, "eval_precision": 0.9233347498988893, "eval_recall": 0.917658614989255, "eval_runtime": 6.6787, "eval_samples_per_second": 550.859, "eval_steps_per_second": 8.684, "step": 1300 }, { "epoch": 12.173913043478262, "grad_norm": 5.315700054168701, "learning_rate": 7.222222222222223e-06, "loss": 0.1286, "step": 1400 }, { "epoch": 12.173913043478262, "eval_accuracy": 0.9266104919815167, "eval_f1_score": 0.9144278995332229, "eval_loss": 0.29267847537994385, "eval_precision": 0.9100638576136009, "eval_recall": 0.9198715425139269, "eval_runtime": 6.7676, "eval_samples_per_second": 543.619, "eval_steps_per_second": 8.57, "step": 1400 }, { "epoch": 13.043478260869565, "grad_norm": 5.173799514770508, "learning_rate": 6.944444444444445e-06, "loss": 0.1221, "step": 1500 }, { "epoch": 13.043478260869565, "eval_accuracy": 0.9317749388420766, "eval_f1_score": 0.9245023460604546, "eval_loss": 0.28211963176727295, "eval_precision": 0.9309417300478454, "eval_recall": 0.9193579289135766, "eval_runtime": 6.7091, "eval_samples_per_second": 548.359, "eval_steps_per_second": 8.645, "step": 1500 }, { "epoch": 13.91304347826087, "grad_norm": 8.639619827270508, "learning_rate": 6.666666666666667e-06, "loss": 0.1087, "step": 1600 }, { "epoch": 13.91304347826087, "eval_accuracy": 0.9293286219081273, "eval_f1_score": 0.9159607873769989, "eval_loss": 0.27890825271606445, "eval_precision": 0.9090390134661626, "eval_recall": 0.9236924050215896, "eval_runtime": 6.7017, "eval_samples_per_second": 548.966, "eval_steps_per_second": 8.655, "step": 1600 }, { "epoch": 14.782608695652174, "grad_norm": 6.424872398376465, "learning_rate": 6.3888888888888885e-06, "loss": 0.0982, "step": 1700 }, { "epoch": 14.782608695652174, "eval_accuracy": 0.9290568089154662, "eval_f1_score": 0.9196461825352048, "eval_loss": 0.2833573520183563, "eval_precision": 0.9187836187318232, "eval_recall": 0.9213402050339831, "eval_runtime": 6.7096, "eval_samples_per_second": 548.32, "eval_steps_per_second": 8.644, "step": 1700 }, { "epoch": 15.652173913043478, "grad_norm": 4.618613243103027, "learning_rate": 6.111111111111112e-06, "loss": 0.089, "step": 1800 }, { "epoch": 15.652173913043478, "eval_accuracy": 0.9298722478934494, "eval_f1_score": 0.9202166850732406, "eval_loss": 0.28280356526374817, "eval_precision": 0.9151663252588741, "eval_recall": 0.9260674008256092, "eval_runtime": 6.7345, "eval_samples_per_second": 546.292, "eval_steps_per_second": 8.612, "step": 1800 }, { "epoch": 16.52173913043478, "grad_norm": 1.9568698406219482, "learning_rate": 5.833333333333334e-06, "loss": 0.0795, "step": 1900 }, { "epoch": 16.52173913043478, "eval_accuracy": 0.9331340038053819, "eval_f1_score": 0.9244095368032713, "eval_loss": 0.273701936006546, "eval_precision": 0.925343846727414, "eval_recall": 0.9238732382441093, "eval_runtime": 6.7425, "eval_samples_per_second": 545.641, "eval_steps_per_second": 8.602, "step": 1900 }, { "epoch": 17.391304347826086, "grad_norm": 2.161759614944458, "learning_rate": 5.555555555555557e-06, "loss": 0.0684, "step": 2000 }, { "epoch": 17.391304347826086, "eval_accuracy": 0.9323185648273987, "eval_f1_score": 0.9261695691084951, "eval_loss": 0.2873239815235138, "eval_precision": 0.9319834922740233, "eval_recall": 0.9216726996777184, "eval_runtime": 6.7577, "eval_samples_per_second": 544.415, "eval_steps_per_second": 8.583, "step": 2000 }, { "epoch": 18.26086956521739, "grad_norm": 4.607916355133057, "learning_rate": 5.2777777777777785e-06, "loss": 0.0673, "step": 2100 }, { "epoch": 18.26086956521739, "eval_accuracy": 0.9320467518347377, "eval_f1_score": 0.925184613434992, "eval_loss": 0.2904324531555176, "eval_precision": 0.9332741752610002, "eval_recall": 0.9184445089519725, "eval_runtime": 6.7294, "eval_samples_per_second": 546.702, "eval_steps_per_second": 8.619, "step": 2100 }, { "epoch": 19.130434782608695, "grad_norm": 6.327251434326172, "learning_rate": 5e-06, "loss": 0.0571, "step": 2200 }, { "epoch": 19.130434782608695, "eval_accuracy": 0.9293286219081273, "eval_f1_score": 0.9221668516434853, "eval_loss": 0.3166205883026123, "eval_precision": 0.925137476734381, "eval_recall": 0.920952609526737, "eval_runtime": 6.7037, "eval_samples_per_second": 548.799, "eval_steps_per_second": 8.652, "step": 2200 }, { "epoch": 20.0, "grad_norm": 9.082805633544922, "learning_rate": 4.722222222222222e-06, "loss": 0.0561, "step": 2300 }, { "epoch": 20.0, "eval_accuracy": 0.9317749388420766, "eval_f1_score": 0.9221280725480369, "eval_loss": 0.2922111749649048, "eval_precision": 0.9150274852978553, "eval_recall": 0.9297539237688469, "eval_runtime": 6.7503, "eval_samples_per_second": 545.017, "eval_steps_per_second": 8.592, "step": 2300 }, { "epoch": 20.869565217391305, "grad_norm": 5.283856391906738, "learning_rate": 4.444444444444444e-06, "loss": 0.0511, "step": 2400 }, { "epoch": 20.869565217391305, "eval_accuracy": 0.9315031258494156, "eval_f1_score": 0.9190655007648246, "eval_loss": 0.29927295446395874, "eval_precision": 0.9088064828335735, "eval_recall": 0.9303236730969998, "eval_runtime": 6.7281, "eval_samples_per_second": 546.809, "eval_steps_per_second": 8.621, "step": 2400 }, { "epoch": 21.73913043478261, "grad_norm": 6.0074896812438965, "learning_rate": 4.166666666666667e-06, "loss": 0.0442, "step": 2500 }, { "epoch": 21.73913043478261, "eval_accuracy": 0.9266104919815167, "eval_f1_score": 0.9161795338292905, "eval_loss": 0.32011494040489197, "eval_precision": 0.9060451440252857, "eval_recall": 0.9280493422296427, "eval_runtime": 6.7127, "eval_samples_per_second": 548.067, "eval_steps_per_second": 8.64, "step": 2500 }, { "epoch": 22.608695652173914, "grad_norm": 3.1078407764434814, "learning_rate": 3.88888888888889e-06, "loss": 0.0447, "step": 2600 }, { "epoch": 22.608695652173914, "eval_accuracy": 0.928241369937483, "eval_f1_score": 0.9137497551284842, "eval_loss": 0.3155056834220886, "eval_precision": 0.9009580466238951, "eval_recall": 0.9281730038314259, "eval_runtime": 6.7337, "eval_samples_per_second": 546.354, "eval_steps_per_second": 8.613, "step": 2600 }, { "epoch": 23.47826086956522, "grad_norm": 2.9584195613861084, "learning_rate": 3.6111111111111115e-06, "loss": 0.0415, "step": 2700 }, { "epoch": 23.47826086956522, "eval_accuracy": 0.9334058167980429, "eval_f1_score": 0.9226018260362496, "eval_loss": 0.30177852511405945, "eval_precision": 0.9185179495480513, "eval_recall": 0.9269833265460256, "eval_runtime": 6.7411, "eval_samples_per_second": 545.757, "eval_steps_per_second": 8.604, "step": 2700 }, { "epoch": 24.347826086956523, "grad_norm": 12.190321922302246, "learning_rate": 3.3333333333333333e-06, "loss": 0.0359, "step": 2800 }, { "epoch": 24.347826086956523, "eval_accuracy": 0.9298722478934494, "eval_f1_score": 0.9177278989948806, "eval_loss": 0.31918126344680786, "eval_precision": 0.9062664068560837, "eval_recall": 0.9308234663752396, "eval_runtime": 6.7802, "eval_samples_per_second": 542.606, "eval_steps_per_second": 8.554, "step": 2800 }, { "epoch": 25.217391304347824, "grad_norm": 0.2598835527896881, "learning_rate": 3.055555555555556e-06, "loss": 0.0369, "step": 2900 }, { "epoch": 25.217391304347824, "eval_accuracy": 0.933677629790704, "eval_f1_score": 0.9210521238209074, "eval_loss": 0.3063570559024811, "eval_precision": 0.9140578271273506, "eval_recall": 0.9285610502121662, "eval_runtime": 6.7729, "eval_samples_per_second": 543.197, "eval_steps_per_second": 8.564, "step": 2900 }, { "epoch": 26.08695652173913, "grad_norm": 0.24433408677577972, "learning_rate": 2.7777777777777783e-06, "loss": 0.0296, "step": 3000 }, { "epoch": 26.08695652173913, "eval_accuracy": 0.9328621908127208, "eval_f1_score": 0.9237047805131925, "eval_loss": 0.311038613319397, "eval_precision": 0.9198460229141495, "eval_recall": 0.9279424126946928, "eval_runtime": 6.8161, "eval_samples_per_second": 539.754, "eval_steps_per_second": 8.509, "step": 3000 }, { "epoch": 26.08695652173913, "step": 3000, "total_flos": 6579999363349350.0, "train_loss": 0.31640464369455973, "train_runtime": 3238.6183, "train_samples_per_second": 158.092, "train_steps_per_second": 1.235 } ], "logging_steps": 100, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 35, "save_steps": 100, "total_flos": 6579999363349350.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }