{ "best_metric": 0.9842381786339754, "best_model_checkpoint": "teacher-status-van-tiny-256-2/checkpoint-420", "epoch": 29.714285714285715, "eval_steps": 500, "global_step": 780, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.38, "learning_rate": 6.41025641025641e-06, "loss": 0.6928, "step": 10 }, { "epoch": 0.76, "learning_rate": 1.282051282051282e-05, "loss": 0.6896, "step": 20 }, { "epoch": 0.99, "eval_accuracy": 0.7700534759358288, "eval_f1_score": 0.8700906344410876, "eval_loss": 0.670672595500946, "eval_precision": 0.7700534759358288, "eval_recall": 1.0, "eval_runtime": 2.6151, "eval_samples_per_second": 143.014, "eval_steps_per_second": 4.589, "step": 26 }, { "epoch": 1.14, "learning_rate": 1.923076923076923e-05, "loss": 0.6763, "step": 30 }, { "epoch": 1.52, "learning_rate": 2.564102564102564e-05, "loss": 0.6376, "step": 40 }, { "epoch": 1.9, "learning_rate": 3.205128205128206e-05, "loss": 0.5438, "step": 50 }, { "epoch": 1.98, "eval_accuracy": 0.7700534759358288, "eval_f1_score": 0.8700906344410876, "eval_loss": 0.43023183941841125, "eval_precision": 0.7700534759358288, "eval_recall": 1.0, "eval_runtime": 2.5825, "eval_samples_per_second": 144.822, "eval_steps_per_second": 4.647, "step": 52 }, { "epoch": 2.29, "learning_rate": 3.846153846153846e-05, "loss": 0.4252, "step": 60 }, { "epoch": 2.67, "learning_rate": 4.4871794871794874e-05, "loss": 0.3756, "step": 70 }, { "epoch": 2.97, "eval_accuracy": 0.8850267379679144, "eval_f1_score": 0.9284525790349417, "eval_loss": 0.2762328088283539, "eval_precision": 0.8913738019169329, "eval_recall": 0.96875, "eval_runtime": 2.711, "eval_samples_per_second": 137.957, "eval_steps_per_second": 4.426, "step": 78 }, { "epoch": 3.05, "learning_rate": 4.985754985754986e-05, "loss": 0.3572, "step": 80 }, { "epoch": 3.43, "learning_rate": 4.9145299145299147e-05, "loss": 0.3217, "step": 90 }, { "epoch": 3.81, "learning_rate": 4.8433048433048433e-05, "loss": 0.3017, "step": 100 }, { "epoch": 4.0, "eval_accuracy": 0.9224598930481284, "eval_f1_score": 0.9502572898799314, "eval_loss": 0.20020952820777893, "eval_precision": 0.9389830508474576, "eval_recall": 0.9618055555555556, "eval_runtime": 2.7006, "eval_samples_per_second": 138.487, "eval_steps_per_second": 4.443, "step": 105 }, { "epoch": 4.19, "learning_rate": 4.772079772079772e-05, "loss": 0.2563, "step": 110 }, { "epoch": 4.57, "learning_rate": 4.700854700854701e-05, "loss": 0.2548, "step": 120 }, { "epoch": 4.95, "learning_rate": 4.62962962962963e-05, "loss": 0.257, "step": 130 }, { "epoch": 4.99, "eval_accuracy": 0.9385026737967914, "eval_f1_score": 0.9605488850771869, "eval_loss": 0.17939455807209015, "eval_precision": 0.9491525423728814, "eval_recall": 0.9722222222222222, "eval_runtime": 2.4102, "eval_samples_per_second": 155.173, "eval_steps_per_second": 4.979, "step": 131 }, { "epoch": 5.33, "learning_rate": 4.558404558404559e-05, "loss": 0.2484, "step": 140 }, { "epoch": 5.71, "learning_rate": 4.4871794871794874e-05, "loss": 0.2345, "step": 150 }, { "epoch": 5.98, "eval_accuracy": 0.9358288770053476, "eval_f1_score": 0.9581881533101045, "eval_loss": 0.14852212369441986, "eval_precision": 0.9615384615384616, "eval_recall": 0.9548611111111112, "eval_runtime": 2.3968, "eval_samples_per_second": 156.039, "eval_steps_per_second": 5.007, "step": 157 }, { "epoch": 6.1, "learning_rate": 4.415954415954416e-05, "loss": 0.2514, "step": 160 }, { "epoch": 6.48, "learning_rate": 4.344729344729345e-05, "loss": 0.2179, "step": 170 }, { "epoch": 6.86, "learning_rate": 4.2735042735042735e-05, "loss": 0.2318, "step": 180 }, { "epoch": 6.97, "eval_accuracy": 0.9438502673796791, "eval_f1_score": 0.9630931458699472, "eval_loss": 0.13021136820316315, "eval_precision": 0.9750889679715302, "eval_recall": 0.9513888888888888, "eval_runtime": 2.3767, "eval_samples_per_second": 157.361, "eval_steps_per_second": 5.049, "step": 183 }, { "epoch": 7.24, "learning_rate": 4.202279202279202e-05, "loss": 0.2151, "step": 190 }, { "epoch": 7.62, "learning_rate": 4.131054131054131e-05, "loss": 0.2311, "step": 200 }, { "epoch": 8.0, "learning_rate": 4.05982905982906e-05, "loss": 0.2173, "step": 210 }, { "epoch": 8.0, "eval_accuracy": 0.9518716577540107, "eval_f1_score": 0.9688581314878894, "eval_loss": 0.12773053348064423, "eval_precision": 0.9655172413793104, "eval_recall": 0.9722222222222222, "eval_runtime": 2.3674, "eval_samples_per_second": 157.977, "eval_steps_per_second": 5.069, "step": 210 }, { "epoch": 8.38, "learning_rate": 3.988603988603989e-05, "loss": 0.1934, "step": 220 }, { "epoch": 8.76, "learning_rate": 3.9173789173789176e-05, "loss": 0.2058, "step": 230 }, { "epoch": 8.99, "eval_accuracy": 0.9572192513368984, "eval_f1_score": 0.9722222222222222, "eval_loss": 0.1269279271364212, "eval_precision": 0.9722222222222222, "eval_recall": 0.9722222222222222, "eval_runtime": 2.2694, "eval_samples_per_second": 164.802, "eval_steps_per_second": 5.288, "step": 236 }, { "epoch": 9.14, "learning_rate": 3.846153846153846e-05, "loss": 0.1959, "step": 240 }, { "epoch": 9.52, "learning_rate": 3.774928774928775e-05, "loss": 0.2016, "step": 250 }, { "epoch": 9.9, "learning_rate": 3.7037037037037037e-05, "loss": 0.1955, "step": 260 }, { "epoch": 9.98, "eval_accuracy": 0.9572192513368984, "eval_f1_score": 0.9724137931034483, "eval_loss": 0.11462008953094482, "eval_precision": 0.9657534246575342, "eval_recall": 0.9791666666666666, "eval_runtime": 2.2006, "eval_samples_per_second": 169.954, "eval_steps_per_second": 5.453, "step": 262 }, { "epoch": 10.29, "learning_rate": 3.6324786324786323e-05, "loss": 0.2038, "step": 270 }, { "epoch": 10.67, "learning_rate": 3.561253561253561e-05, "loss": 0.2083, "step": 280 }, { "epoch": 10.97, "eval_accuracy": 0.9652406417112299, "eval_f1_score": 0.9772329246935202, "eval_loss": 0.1083158627152443, "eval_precision": 0.9858657243816255, "eval_recall": 0.96875, "eval_runtime": 2.2496, "eval_samples_per_second": 166.25, "eval_steps_per_second": 5.334, "step": 288 }, { "epoch": 11.05, "learning_rate": 3.4900284900284904e-05, "loss": 0.2107, "step": 290 }, { "epoch": 11.43, "learning_rate": 3.418803418803419e-05, "loss": 0.1725, "step": 300 }, { "epoch": 11.81, "learning_rate": 3.347578347578348e-05, "loss": 0.1886, "step": 310 }, { "epoch": 12.0, "eval_accuracy": 0.9598930481283422, "eval_f1_score": 0.9740932642487047, "eval_loss": 0.10481037944555283, "eval_precision": 0.9690721649484536, "eval_recall": 0.9791666666666666, "eval_runtime": 2.1714, "eval_samples_per_second": 172.242, "eval_steps_per_second": 5.526, "step": 315 }, { "epoch": 12.19, "learning_rate": 3.2763532763532764e-05, "loss": 0.1703, "step": 320 }, { "epoch": 12.57, "learning_rate": 3.205128205128206e-05, "loss": 0.1817, "step": 330 }, { "epoch": 12.95, "learning_rate": 3.133903133903134e-05, "loss": 0.1618, "step": 340 }, { "epoch": 12.99, "eval_accuracy": 0.9625668449197861, "eval_f1_score": 0.9756944444444444, "eval_loss": 0.10334747284650803, "eval_precision": 0.9756944444444444, "eval_recall": 0.9756944444444444, "eval_runtime": 2.1514, "eval_samples_per_second": 173.842, "eval_steps_per_second": 5.578, "step": 341 }, { "epoch": 13.33, "learning_rate": 3.0626780626780625e-05, "loss": 0.1733, "step": 350 }, { "epoch": 13.71, "learning_rate": 2.9914529914529915e-05, "loss": 0.1908, "step": 360 }, { "epoch": 13.98, "eval_accuracy": 0.9598930481283422, "eval_f1_score": 0.9739130434782608, "eval_loss": 0.10439594089984894, "eval_precision": 0.975609756097561, "eval_recall": 0.9722222222222222, "eval_runtime": 2.2063, "eval_samples_per_second": 169.517, "eval_steps_per_second": 5.439, "step": 367 }, { "epoch": 14.1, "learning_rate": 2.9202279202279202e-05, "loss": 0.1677, "step": 370 }, { "epoch": 14.48, "learning_rate": 2.8490028490028492e-05, "loss": 0.1641, "step": 380 }, { "epoch": 14.86, "learning_rate": 2.777777777777778e-05, "loss": 0.1594, "step": 390 }, { "epoch": 14.97, "eval_accuracy": 0.9625668449197861, "eval_f1_score": 0.9757785467128027, "eval_loss": 0.09152617305517197, "eval_precision": 0.9724137931034482, "eval_recall": 0.9791666666666666, "eval_runtime": 2.146, "eval_samples_per_second": 174.274, "eval_steps_per_second": 5.592, "step": 393 }, { "epoch": 15.24, "learning_rate": 2.706552706552707e-05, "loss": 0.1621, "step": 400 }, { "epoch": 15.62, "learning_rate": 2.6353276353276356e-05, "loss": 0.1698, "step": 410 }, { "epoch": 16.0, "learning_rate": 2.564102564102564e-05, "loss": 0.1474, "step": 420 }, { "epoch": 16.0, "eval_accuracy": 0.9759358288770054, "eval_f1_score": 0.9842381786339754, "eval_loss": 0.09159436821937561, "eval_precision": 0.9929328621908127, "eval_recall": 0.9756944444444444, "eval_runtime": 2.1573, "eval_samples_per_second": 173.367, "eval_steps_per_second": 5.563, "step": 420 }, { "epoch": 16.38, "learning_rate": 2.492877492877493e-05, "loss": 0.1326, "step": 430 }, { "epoch": 16.76, "learning_rate": 2.4216524216524217e-05, "loss": 0.1734, "step": 440 }, { "epoch": 16.99, "eval_accuracy": 0.9652406417112299, "eval_f1_score": 0.9773123909249563, "eval_loss": 0.09513744711875916, "eval_precision": 0.9824561403508771, "eval_recall": 0.9722222222222222, "eval_runtime": 2.1734, "eval_samples_per_second": 172.078, "eval_steps_per_second": 5.521, "step": 446 }, { "epoch": 17.14, "learning_rate": 2.3504273504273504e-05, "loss": 0.1827, "step": 450 }, { "epoch": 17.52, "learning_rate": 2.2792022792022794e-05, "loss": 0.1419, "step": 460 }, { "epoch": 17.9, "learning_rate": 2.207977207977208e-05, "loss": 0.1484, "step": 470 }, { "epoch": 17.98, "eval_accuracy": 0.9705882352941176, "eval_f1_score": 0.9808695652173912, "eval_loss": 0.10494749993085861, "eval_precision": 0.9825783972125436, "eval_recall": 0.9791666666666666, "eval_runtime": 2.155, "eval_samples_per_second": 173.55, "eval_steps_per_second": 5.568, "step": 472 }, { "epoch": 18.29, "learning_rate": 2.1367521367521368e-05, "loss": 0.1515, "step": 480 }, { "epoch": 18.67, "learning_rate": 2.0655270655270654e-05, "loss": 0.1495, "step": 490 }, { "epoch": 18.97, "eval_accuracy": 0.9679144385026738, "eval_f1_score": 0.9790940766550522, "eval_loss": 0.09304243326187134, "eval_precision": 0.9825174825174825, "eval_recall": 0.9756944444444444, "eval_runtime": 2.1676, "eval_samples_per_second": 172.538, "eval_steps_per_second": 5.536, "step": 498 }, { "epoch": 19.05, "learning_rate": 1.9943019943019945e-05, "loss": 0.1525, "step": 500 }, { "epoch": 19.43, "learning_rate": 1.923076923076923e-05, "loss": 0.1575, "step": 510 }, { "epoch": 19.81, "learning_rate": 1.8518518518518518e-05, "loss": 0.1385, "step": 520 }, { "epoch": 20.0, "eval_accuracy": 0.9625668449197861, "eval_f1_score": 0.9758620689655172, "eval_loss": 0.095456063747406, "eval_precision": 0.9691780821917808, "eval_recall": 0.9826388888888888, "eval_runtime": 2.217, "eval_samples_per_second": 168.695, "eval_steps_per_second": 5.413, "step": 525 }, { "epoch": 20.19, "learning_rate": 1.7806267806267805e-05, "loss": 0.139, "step": 530 }, { "epoch": 20.57, "learning_rate": 1.7094017094017095e-05, "loss": 0.1567, "step": 540 }, { "epoch": 20.95, "learning_rate": 1.6381766381766382e-05, "loss": 0.1492, "step": 550 }, { "epoch": 20.99, "eval_accuracy": 0.9598930481283422, "eval_f1_score": 0.9740932642487047, "eval_loss": 0.09114021807909012, "eval_precision": 0.9690721649484536, "eval_recall": 0.9791666666666666, "eval_runtime": 2.1474, "eval_samples_per_second": 174.163, "eval_steps_per_second": 5.588, "step": 551 }, { "epoch": 21.33, "learning_rate": 1.566951566951567e-05, "loss": 0.1664, "step": 560 }, { "epoch": 21.71, "learning_rate": 1.4957264957264958e-05, "loss": 0.1401, "step": 570 }, { "epoch": 21.98, "eval_accuracy": 0.9705882352941176, "eval_f1_score": 0.9808695652173912, "eval_loss": 0.09274759143590927, "eval_precision": 0.9825783972125436, "eval_recall": 0.9791666666666666, "eval_runtime": 2.1665, "eval_samples_per_second": 172.63, "eval_steps_per_second": 5.539, "step": 577 }, { "epoch": 22.1, "learning_rate": 1.4245014245014246e-05, "loss": 0.1538, "step": 580 }, { "epoch": 22.48, "learning_rate": 1.3532763532763535e-05, "loss": 0.1245, "step": 590 }, { "epoch": 22.86, "learning_rate": 1.282051282051282e-05, "loss": 0.1288, "step": 600 }, { "epoch": 22.97, "eval_accuracy": 0.9705882352941176, "eval_f1_score": 0.9808695652173912, "eval_loss": 0.09401033818721771, "eval_precision": 0.9825783972125436, "eval_recall": 0.9791666666666666, "eval_runtime": 2.1911, "eval_samples_per_second": 170.692, "eval_steps_per_second": 5.477, "step": 603 }, { "epoch": 23.24, "learning_rate": 1.2108262108262108e-05, "loss": 0.1422, "step": 610 }, { "epoch": 23.62, "learning_rate": 1.1396011396011397e-05, "loss": 0.1262, "step": 620 }, { "epoch": 24.0, "learning_rate": 1.0683760683760684e-05, "loss": 0.1304, "step": 630 }, { "epoch": 24.0, "eval_accuracy": 0.9652406417112299, "eval_f1_score": 0.9775474956822107, "eval_loss": 0.09132420271635056, "eval_precision": 0.9725085910652921, "eval_recall": 0.9826388888888888, "eval_runtime": 2.1685, "eval_samples_per_second": 172.467, "eval_steps_per_second": 5.534, "step": 630 }, { "epoch": 24.38, "learning_rate": 9.971509971509972e-06, "loss": 0.1542, "step": 640 }, { "epoch": 24.76, "learning_rate": 9.259259259259259e-06, "loss": 0.14, "step": 650 }, { "epoch": 24.99, "eval_accuracy": 0.9652406417112299, "eval_f1_score": 0.9776247848537005, "eval_loss": 0.09787322580814362, "eval_precision": 0.9692832764505119, "eval_recall": 0.9861111111111112, "eval_runtime": 2.1719, "eval_samples_per_second": 172.201, "eval_steps_per_second": 5.525, "step": 656 }, { "epoch": 25.14, "learning_rate": 8.547008547008548e-06, "loss": 0.1529, "step": 660 }, { "epoch": 25.52, "learning_rate": 7.834757834757835e-06, "loss": 0.1367, "step": 670 }, { "epoch": 25.9, "learning_rate": 7.122507122507123e-06, "loss": 0.1461, "step": 680 }, { "epoch": 25.98, "eval_accuracy": 0.9705882352941176, "eval_f1_score": 0.9810017271157168, "eval_loss": 0.08736571669578552, "eval_precision": 0.9759450171821306, "eval_recall": 0.9861111111111112, "eval_runtime": 2.1664, "eval_samples_per_second": 172.637, "eval_steps_per_second": 5.539, "step": 682 }, { "epoch": 26.29, "learning_rate": 6.41025641025641e-06, "loss": 0.1349, "step": 690 }, { "epoch": 26.67, "learning_rate": 5.6980056980056985e-06, "loss": 0.1429, "step": 700 }, { "epoch": 26.97, "eval_accuracy": 0.9705882352941176, "eval_f1_score": 0.9808027923211169, "eval_loss": 0.08370037376880646, "eval_precision": 0.9859649122807017, "eval_recall": 0.9756944444444444, "eval_runtime": 2.1544, "eval_samples_per_second": 173.601, "eval_steps_per_second": 5.57, "step": 708 }, { "epoch": 27.05, "learning_rate": 4.985754985754986e-06, "loss": 0.134, "step": 710 }, { "epoch": 27.43, "learning_rate": 4.273504273504274e-06, "loss": 0.1366, "step": 720 }, { "epoch": 27.81, "learning_rate": 3.5612535612535615e-06, "loss": 0.1444, "step": 730 }, { "epoch": 28.0, "eval_accuracy": 0.9679144385026738, "eval_f1_score": 0.9791666666666666, "eval_loss": 0.08762019872665405, "eval_precision": 0.9791666666666666, "eval_recall": 0.9791666666666666, "eval_runtime": 2.1635, "eval_samples_per_second": 172.868, "eval_steps_per_second": 5.547, "step": 735 }, { "epoch": 28.19, "learning_rate": 2.8490028490028492e-06, "loss": 0.1167, "step": 740 }, { "epoch": 28.57, "learning_rate": 2.136752136752137e-06, "loss": 0.1129, "step": 750 }, { "epoch": 28.95, "learning_rate": 1.4245014245014246e-06, "loss": 0.145, "step": 760 }, { "epoch": 28.99, "eval_accuracy": 0.9705882352941176, "eval_f1_score": 0.9808695652173912, "eval_loss": 0.09031202644109726, "eval_precision": 0.9825783972125436, "eval_recall": 0.9791666666666666, "eval_runtime": 2.1632, "eval_samples_per_second": 172.893, "eval_steps_per_second": 5.547, "step": 761 }, { "epoch": 29.33, "learning_rate": 7.122507122507123e-07, "loss": 0.1334, "step": 770 }, { "epoch": 29.71, "learning_rate": 0.0, "loss": 0.1445, "step": 780 }, { "epoch": 29.71, "eval_accuracy": 0.9679144385026738, "eval_f1_score": 0.9790940766550522, "eval_loss": 0.08815235644578934, "eval_precision": 0.9825174825174825, "eval_recall": 0.9756944444444444, "eval_runtime": 3.4052, "eval_samples_per_second": 109.832, "eval_steps_per_second": 3.524, "step": 780 }, { "epoch": 29.71, "step": 780, "total_flos": 4.5315392030480794e+17, "train_loss": 0.2131147768252935, "train_runtime": 1561.1697, "train_samples_per_second": 64.529, "train_steps_per_second": 0.5 } ], "logging_steps": 10, "max_steps": 780, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 4.5315392030480794e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }