{ "best_metric": 0.9872068230277186, "best_model_checkpoint": "teacher-status-van-tiny-256-0/checkpoint-1140", "epoch": 29.68421052631579, "eval_steps": 500, "global_step": 1410, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.21, "learning_rate": 3.5460992907801423e-06, "loss": 0.693, "step": 10 }, { "epoch": 0.42, "learning_rate": 7.092198581560285e-06, "loss": 0.692, "step": 20 }, { "epoch": 0.63, "learning_rate": 1.0638297872340426e-05, "loss": 0.6887, "step": 30 }, { "epoch": 0.84, "learning_rate": 1.418439716312057e-05, "loss": 0.6788, "step": 40 }, { "epoch": 0.99, "eval_accuracy": 0.6933333333333334, "eval_f1_score": 0.8188976377952756, "eval_loss": 0.6436793804168701, "eval_precision": 0.6933333333333334, "eval_recall": 1.0, "eval_runtime": 3.9449, "eval_samples_per_second": 171.109, "eval_steps_per_second": 5.577, "step": 47 }, { "epoch": 1.05, "learning_rate": 1.773049645390071e-05, "loss": 0.6568, "step": 50 }, { "epoch": 1.26, "learning_rate": 2.1276595744680852e-05, "loss": 0.6118, "step": 60 }, { "epoch": 1.47, "learning_rate": 2.4822695035460995e-05, "loss": 0.5534, "step": 70 }, { "epoch": 1.68, "learning_rate": 2.836879432624114e-05, "loss": 0.4849, "step": 80 }, { "epoch": 1.89, "learning_rate": 3.191489361702128e-05, "loss": 0.463, "step": 90 }, { "epoch": 2.0, "eval_accuracy": 0.8755555555555555, "eval_f1_score": 0.9161676646706586, "eval_loss": 0.3405998945236206, "eval_precision": 0.8595505617977528, "eval_recall": 0.9807692307692307, "eval_runtime": 4.4067, "eval_samples_per_second": 153.174, "eval_steps_per_second": 4.992, "step": 95 }, { "epoch": 2.11, "learning_rate": 3.546099290780142e-05, "loss": 0.4401, "step": 100 }, { "epoch": 2.32, "learning_rate": 3.900709219858156e-05, "loss": 0.4196, "step": 110 }, { "epoch": 2.53, "learning_rate": 4.2553191489361704e-05, "loss": 0.3933, "step": 120 }, { "epoch": 2.74, "learning_rate": 4.609929078014185e-05, "loss": 0.3977, "step": 130 }, { "epoch": 2.95, "learning_rate": 4.964539007092199e-05, "loss": 0.3596, "step": 140 }, { "epoch": 2.99, "eval_accuracy": 0.9303703703703704, "eval_f1_score": 0.9503695881731784, "eval_loss": 0.2071676254272461, "eval_precision": 0.9394572025052192, "eval_recall": 0.9615384615384616, "eval_runtime": 3.9276, "eval_samples_per_second": 171.861, "eval_steps_per_second": 5.601, "step": 142 }, { "epoch": 3.16, "learning_rate": 4.964539007092199e-05, "loss": 0.3506, "step": 150 }, { "epoch": 3.37, "learning_rate": 4.9251379038613084e-05, "loss": 0.354, "step": 160 }, { "epoch": 3.58, "learning_rate": 4.885736800630418e-05, "loss": 0.3132, "step": 170 }, { "epoch": 3.79, "learning_rate": 4.846335697399527e-05, "loss": 0.3324, "step": 180 }, { "epoch": 4.0, "learning_rate": 4.806934594168637e-05, "loss": 0.3505, "step": 190 }, { "epoch": 4.0, "eval_accuracy": 0.9525925925925925, "eval_f1_score": 0.9661016949152542, "eval_loss": 0.15637531876564026, "eval_precision": 0.957983193277311, "eval_recall": 0.9743589743589743, "eval_runtime": 4.0891, "eval_samples_per_second": 165.073, "eval_steps_per_second": 5.38, "step": 190 }, { "epoch": 4.21, "learning_rate": 4.7675334909377466e-05, "loss": 0.3181, "step": 200 }, { "epoch": 4.42, "learning_rate": 4.728132387706856e-05, "loss": 0.3174, "step": 210 }, { "epoch": 4.63, "learning_rate": 4.6887312844759653e-05, "loss": 0.3161, "step": 220 }, { "epoch": 4.84, "learning_rate": 4.6493301812450754e-05, "loss": 0.2962, "step": 230 }, { "epoch": 4.99, "eval_accuracy": 0.9555555555555556, "eval_f1_score": 0.9680851063829786, "eval_loss": 0.12618891894817352, "eval_precision": 0.9639830508474576, "eval_recall": 0.9722222222222222, "eval_runtime": 4.4801, "eval_samples_per_second": 150.665, "eval_steps_per_second": 4.911, "step": 237 }, { "epoch": 5.05, "learning_rate": 4.609929078014185e-05, "loss": 0.3017, "step": 240 }, { "epoch": 5.26, "learning_rate": 4.570527974783294e-05, "loss": 0.2852, "step": 250 }, { "epoch": 5.47, "learning_rate": 4.5311268715524035e-05, "loss": 0.3222, "step": 260 }, { "epoch": 5.68, "learning_rate": 4.491725768321513e-05, "loss": 0.2662, "step": 270 }, { "epoch": 5.89, "learning_rate": 4.452324665090622e-05, "loss": 0.2762, "step": 280 }, { "epoch": 6.0, "eval_accuracy": 0.9644444444444444, "eval_f1_score": 0.9745222929936307, "eval_loss": 0.10375303775072098, "eval_precision": 0.9683544303797469, "eval_recall": 0.9807692307692307, "eval_runtime": 3.8894, "eval_samples_per_second": 173.548, "eval_steps_per_second": 5.656, "step": 285 }, { "epoch": 6.11, "learning_rate": 4.412923561859732e-05, "loss": 0.2803, "step": 290 }, { "epoch": 6.32, "learning_rate": 4.373522458628842e-05, "loss": 0.2672, "step": 300 }, { "epoch": 6.53, "learning_rate": 4.334121355397952e-05, "loss": 0.2768, "step": 310 }, { "epoch": 6.74, "learning_rate": 4.294720252167061e-05, "loss": 0.2709, "step": 320 }, { "epoch": 6.95, "learning_rate": 4.2553191489361704e-05, "loss": 0.2604, "step": 330 }, { "epoch": 6.99, "eval_accuracy": 0.9718518518518519, "eval_f1_score": 0.979765708200213, "eval_loss": 0.09324438869953156, "eval_precision": 0.9766454352441614, "eval_recall": 0.9829059829059829, "eval_runtime": 3.9359, "eval_samples_per_second": 171.498, "eval_steps_per_second": 5.59, "step": 332 }, { "epoch": 7.16, "learning_rate": 4.21591804570528e-05, "loss": 0.2647, "step": 340 }, { "epoch": 7.37, "learning_rate": 4.176516942474389e-05, "loss": 0.2632, "step": 350 }, { "epoch": 7.58, "learning_rate": 4.1371158392434986e-05, "loss": 0.2688, "step": 360 }, { "epoch": 7.79, "learning_rate": 4.0977147360126086e-05, "loss": 0.2699, "step": 370 }, { "epoch": 8.0, "learning_rate": 4.058313632781718e-05, "loss": 0.2427, "step": 380 }, { "epoch": 8.0, "eval_accuracy": 0.9718518518518519, "eval_f1_score": 0.9796791443850267, "eval_loss": 0.09280087053775787, "eval_precision": 0.9807280513918629, "eval_recall": 0.9786324786324786, "eval_runtime": 4.2848, "eval_samples_per_second": 157.534, "eval_steps_per_second": 5.134, "step": 380 }, { "epoch": 8.21, "learning_rate": 4.018912529550828e-05, "loss": 0.241, "step": 390 }, { "epoch": 8.42, "learning_rate": 3.9795114263199374e-05, "loss": 0.2311, "step": 400 }, { "epoch": 8.63, "learning_rate": 3.940110323089047e-05, "loss": 0.2447, "step": 410 }, { "epoch": 8.84, "learning_rate": 3.900709219858156e-05, "loss": 0.2465, "step": 420 }, { "epoch": 8.99, "eval_accuracy": 0.9718518518518519, "eval_f1_score": 0.9796791443850267, "eval_loss": 0.08982550352811813, "eval_precision": 0.9807280513918629, "eval_recall": 0.9786324786324786, "eval_runtime": 3.8511, "eval_samples_per_second": 175.276, "eval_steps_per_second": 5.713, "step": 427 }, { "epoch": 9.05, "learning_rate": 3.8613081166272655e-05, "loss": 0.2579, "step": 430 }, { "epoch": 9.26, "learning_rate": 3.8219070133963755e-05, "loss": 0.2393, "step": 440 }, { "epoch": 9.47, "learning_rate": 3.782505910165485e-05, "loss": 0.2532, "step": 450 }, { "epoch": 9.68, "learning_rate": 3.743104806934594e-05, "loss": 0.2501, "step": 460 }, { "epoch": 9.89, "learning_rate": 3.7037037037037037e-05, "loss": 0.2519, "step": 470 }, { "epoch": 10.0, "eval_accuracy": 0.9688888888888889, "eval_f1_score": 0.9775401069518715, "eval_loss": 0.09134020656347275, "eval_precision": 0.9785867237687366, "eval_recall": 0.9764957264957265, "eval_runtime": 4.1234, "eval_samples_per_second": 163.699, "eval_steps_per_second": 5.335, "step": 475 }, { "epoch": 10.11, "learning_rate": 3.664302600472813e-05, "loss": 0.2177, "step": 480 }, { "epoch": 10.32, "learning_rate": 3.624901497241923e-05, "loss": 0.2248, "step": 490 }, { "epoch": 10.53, "learning_rate": 3.5855003940110324e-05, "loss": 0.2583, "step": 500 }, { "epoch": 10.74, "learning_rate": 3.546099290780142e-05, "loss": 0.2344, "step": 510 }, { "epoch": 10.95, "learning_rate": 3.506698187549252e-05, "loss": 0.2258, "step": 520 }, { "epoch": 10.99, "eval_accuracy": 0.9733333333333334, "eval_f1_score": 0.980891719745223, "eval_loss": 0.08469923585653305, "eval_precision": 0.9746835443037974, "eval_recall": 0.9871794871794872, "eval_runtime": 4.0414, "eval_samples_per_second": 167.022, "eval_steps_per_second": 5.444, "step": 522 }, { "epoch": 11.16, "learning_rate": 3.467297084318361e-05, "loss": 0.2357, "step": 530 }, { "epoch": 11.37, "learning_rate": 3.4278959810874706e-05, "loss": 0.2057, "step": 540 }, { "epoch": 11.58, "learning_rate": 3.38849487785658e-05, "loss": 0.2107, "step": 550 }, { "epoch": 11.79, "learning_rate": 3.349093774625689e-05, "loss": 0.228, "step": 560 }, { "epoch": 12.0, "learning_rate": 3.309692671394799e-05, "loss": 0.2184, "step": 570 }, { "epoch": 12.0, "eval_accuracy": 0.9792592592592593, "eval_f1_score": 0.9851063829787234, "eval_loss": 0.08121450245380402, "eval_precision": 0.9809322033898306, "eval_recall": 0.9893162393162394, "eval_runtime": 3.8823, "eval_samples_per_second": 173.868, "eval_steps_per_second": 5.667, "step": 570 }, { "epoch": 12.21, "learning_rate": 3.270291568163909e-05, "loss": 0.1992, "step": 580 }, { "epoch": 12.42, "learning_rate": 3.230890464933019e-05, "loss": 0.2289, "step": 590 }, { "epoch": 12.63, "learning_rate": 3.191489361702128e-05, "loss": 0.2029, "step": 600 }, { "epoch": 12.84, "learning_rate": 3.1520882584712375e-05, "loss": 0.2208, "step": 610 }, { "epoch": 12.99, "eval_accuracy": 0.9807407407407407, "eval_f1_score": 0.9861259338313767, "eval_loss": 0.06925630569458008, "eval_precision": 0.9850746268656716, "eval_recall": 0.9871794871794872, "eval_runtime": 4.4316, "eval_samples_per_second": 152.316, "eval_steps_per_second": 4.964, "step": 617 }, { "epoch": 13.05, "learning_rate": 3.112687155240347e-05, "loss": 0.2284, "step": 620 }, { "epoch": 13.26, "learning_rate": 3.073286052009456e-05, "loss": 0.2129, "step": 630 }, { "epoch": 13.47, "learning_rate": 3.033884948778566e-05, "loss": 0.2219, "step": 640 }, { "epoch": 13.68, "learning_rate": 2.9944838455476754e-05, "loss": 0.219, "step": 650 }, { "epoch": 13.89, "learning_rate": 2.9550827423167847e-05, "loss": 0.2201, "step": 660 }, { "epoch": 14.0, "eval_accuracy": 0.9762962962962963, "eval_f1_score": 0.9829424307036249, "eval_loss": 0.06279809772968292, "eval_precision": 0.9808510638297873, "eval_recall": 0.9850427350427351, "eval_runtime": 3.8891, "eval_samples_per_second": 173.56, "eval_steps_per_second": 5.657, "step": 665 }, { "epoch": 14.11, "learning_rate": 2.9156816390858944e-05, "loss": 0.223, "step": 670 }, { "epoch": 14.32, "learning_rate": 2.8762805358550045e-05, "loss": 0.2151, "step": 680 }, { "epoch": 14.53, "learning_rate": 2.836879432624114e-05, "loss": 0.2096, "step": 690 }, { "epoch": 14.74, "learning_rate": 2.7974783293932232e-05, "loss": 0.2081, "step": 700 }, { "epoch": 14.95, "learning_rate": 2.758077226162333e-05, "loss": 0.2251, "step": 710 }, { "epoch": 14.99, "eval_accuracy": 0.9733333333333334, "eval_f1_score": 0.9809725158562368, "eval_loss": 0.08107414096593857, "eval_precision": 0.9707112970711297, "eval_recall": 0.9914529914529915, "eval_runtime": 3.8972, "eval_samples_per_second": 173.203, "eval_steps_per_second": 5.645, "step": 712 }, { "epoch": 15.16, "learning_rate": 2.7186761229314423e-05, "loss": 0.2067, "step": 720 }, { "epoch": 15.37, "learning_rate": 2.6792750197005517e-05, "loss": 0.1864, "step": 730 }, { "epoch": 15.58, "learning_rate": 2.639873916469661e-05, "loss": 0.208, "step": 740 }, { "epoch": 15.79, "learning_rate": 2.6004728132387708e-05, "loss": 0.1931, "step": 750 }, { "epoch": 16.0, "learning_rate": 2.56107171000788e-05, "loss": 0.2135, "step": 760 }, { "epoch": 16.0, "eval_accuracy": 0.9762962962962963, "eval_f1_score": 0.9829424307036249, "eval_loss": 0.0718333050608635, "eval_precision": 0.9808510638297873, "eval_recall": 0.9850427350427351, "eval_runtime": 4.4867, "eval_samples_per_second": 150.444, "eval_steps_per_second": 4.903, "step": 760 }, { "epoch": 16.21, "learning_rate": 2.5216706067769895e-05, "loss": 0.2, "step": 770 }, { "epoch": 16.42, "learning_rate": 2.4822695035460995e-05, "loss": 0.2025, "step": 780 }, { "epoch": 16.63, "learning_rate": 2.442868400315209e-05, "loss": 0.2017, "step": 790 }, { "epoch": 16.84, "learning_rate": 2.4034672970843186e-05, "loss": 0.1851, "step": 800 }, { "epoch": 16.99, "eval_accuracy": 0.9762962962962963, "eval_f1_score": 0.9829787234042553, "eval_loss": 0.07912527769804001, "eval_precision": 0.9788135593220338, "eval_recall": 0.9871794871794872, "eval_runtime": 3.9283, "eval_samples_per_second": 171.83, "eval_steps_per_second": 5.6, "step": 807 }, { "epoch": 17.05, "learning_rate": 2.364066193853428e-05, "loss": 0.1901, "step": 810 }, { "epoch": 17.26, "learning_rate": 2.3246650906225377e-05, "loss": 0.2272, "step": 820 }, { "epoch": 17.47, "learning_rate": 2.285263987391647e-05, "loss": 0.1965, "step": 830 }, { "epoch": 17.68, "learning_rate": 2.2458628841607564e-05, "loss": 0.1854, "step": 840 }, { "epoch": 17.89, "learning_rate": 2.206461780929866e-05, "loss": 0.2152, "step": 850 }, { "epoch": 18.0, "eval_accuracy": 0.9748148148148148, "eval_f1_score": 0.9818181818181818, "eval_loss": 0.0737040787935257, "eval_precision": 0.9828693790149893, "eval_recall": 0.9807692307692307, "eval_runtime": 3.9311, "eval_samples_per_second": 171.706, "eval_steps_per_second": 5.596, "step": 855 }, { "epoch": 18.11, "learning_rate": 2.167060677698976e-05, "loss": 0.1991, "step": 860 }, { "epoch": 18.32, "learning_rate": 2.1276595744680852e-05, "loss": 0.1637, "step": 870 }, { "epoch": 18.53, "learning_rate": 2.0882584712371946e-05, "loss": 0.1906, "step": 880 }, { "epoch": 18.74, "learning_rate": 2.0488573680063043e-05, "loss": 0.1899, "step": 890 }, { "epoch": 18.95, "learning_rate": 2.009456264775414e-05, "loss": 0.1871, "step": 900 }, { "epoch": 18.99, "eval_accuracy": 0.9762962962962963, "eval_f1_score": 0.9829787234042553, "eval_loss": 0.08143588900566101, "eval_precision": 0.9788135593220338, "eval_recall": 0.9871794871794872, "eval_runtime": 4.2729, "eval_samples_per_second": 157.973, "eval_steps_per_second": 5.149, "step": 902 }, { "epoch": 19.16, "learning_rate": 1.9700551615445234e-05, "loss": 0.1708, "step": 910 }, { "epoch": 19.37, "learning_rate": 1.9306540583136327e-05, "loss": 0.1864, "step": 920 }, { "epoch": 19.58, "learning_rate": 1.8912529550827425e-05, "loss": 0.1991, "step": 930 }, { "epoch": 19.79, "learning_rate": 1.8518518518518518e-05, "loss": 0.2152, "step": 940 }, { "epoch": 20.0, "learning_rate": 1.8124507486209615e-05, "loss": 0.1714, "step": 950 }, { "epoch": 20.0, "eval_accuracy": 0.9762962962962963, "eval_f1_score": 0.9830148619957537, "eval_loss": 0.06924613565206528, "eval_precision": 0.9767932489451476, "eval_recall": 0.9893162393162394, "eval_runtime": 3.9174, "eval_samples_per_second": 172.306, "eval_steps_per_second": 5.616, "step": 950 }, { "epoch": 20.21, "learning_rate": 1.773049645390071e-05, "loss": 0.1878, "step": 960 }, { "epoch": 20.42, "learning_rate": 1.7336485421591806e-05, "loss": 0.1782, "step": 970 }, { "epoch": 20.63, "learning_rate": 1.69424743892829e-05, "loss": 0.187, "step": 980 }, { "epoch": 20.84, "learning_rate": 1.6548463356973994e-05, "loss": 0.188, "step": 990 }, { "epoch": 20.99, "eval_accuracy": 0.9777777777777777, "eval_f1_score": 0.983991462113127, "eval_loss": 0.06410012394189835, "eval_precision": 0.9829424307036247, "eval_recall": 0.9850427350427351, "eval_runtime": 4.4961, "eval_samples_per_second": 150.132, "eval_steps_per_second": 4.893, "step": 997 }, { "epoch": 21.05, "learning_rate": 1.6154452324665094e-05, "loss": 0.1543, "step": 1000 }, { "epoch": 21.26, "learning_rate": 1.5760441292356188e-05, "loss": 0.1752, "step": 1010 }, { "epoch": 21.47, "learning_rate": 1.536643026004728e-05, "loss": 0.1606, "step": 1020 }, { "epoch": 21.68, "learning_rate": 1.4972419227738377e-05, "loss": 0.184, "step": 1030 }, { "epoch": 21.89, "learning_rate": 1.4578408195429472e-05, "loss": 0.191, "step": 1040 }, { "epoch": 22.0, "eval_accuracy": 0.9792592592592593, "eval_f1_score": 0.9850746268656716, "eval_loss": 0.06437370181083679, "eval_precision": 0.9829787234042553, "eval_recall": 0.9871794871794872, "eval_runtime": 3.9414, "eval_samples_per_second": 171.258, "eval_steps_per_second": 5.582, "step": 1045 }, { "epoch": 22.11, "learning_rate": 1.418439716312057e-05, "loss": 0.177, "step": 1050 }, { "epoch": 22.32, "learning_rate": 1.3790386130811665e-05, "loss": 0.1727, "step": 1060 }, { "epoch": 22.53, "learning_rate": 1.3396375098502758e-05, "loss": 0.1872, "step": 1070 }, { "epoch": 22.74, "learning_rate": 1.3002364066193854e-05, "loss": 0.1558, "step": 1080 }, { "epoch": 22.95, "learning_rate": 1.2608353033884947e-05, "loss": 0.2025, "step": 1090 }, { "epoch": 22.99, "eval_accuracy": 0.9792592592592593, "eval_f1_score": 0.9850107066381155, "eval_loss": 0.06750550121068954, "eval_precision": 0.9871244635193133, "eval_recall": 0.9829059829059829, "eval_runtime": 3.9141, "eval_samples_per_second": 172.455, "eval_steps_per_second": 5.621, "step": 1092 }, { "epoch": 23.16, "learning_rate": 1.2214342001576045e-05, "loss": 0.1766, "step": 1100 }, { "epoch": 23.37, "learning_rate": 1.182033096926714e-05, "loss": 0.167, "step": 1110 }, { "epoch": 23.58, "learning_rate": 1.1426319936958235e-05, "loss": 0.1995, "step": 1120 }, { "epoch": 23.79, "learning_rate": 1.103230890464933e-05, "loss": 0.1775, "step": 1130 }, { "epoch": 24.0, "learning_rate": 1.0638297872340426e-05, "loss": 0.1753, "step": 1140 }, { "epoch": 24.0, "eval_accuracy": 0.9822222222222222, "eval_f1_score": 0.9872068230277186, "eval_loss": 0.06550905108451843, "eval_precision": 0.9851063829787234, "eval_recall": 0.9893162393162394, "eval_runtime": 4.3235, "eval_samples_per_second": 156.125, "eval_steps_per_second": 5.089, "step": 1140 }, { "epoch": 24.21, "learning_rate": 1.0244286840031522e-05, "loss": 0.1825, "step": 1150 }, { "epoch": 24.42, "learning_rate": 9.850275807722617e-06, "loss": 0.1587, "step": 1160 }, { "epoch": 24.63, "learning_rate": 9.456264775413712e-06, "loss": 0.1846, "step": 1170 }, { "epoch": 24.84, "learning_rate": 9.062253743104808e-06, "loss": 0.1857, "step": 1180 }, { "epoch": 24.99, "eval_accuracy": 0.9792592592592593, "eval_f1_score": 0.9851380042462845, "eval_loss": 0.07306463271379471, "eval_precision": 0.9789029535864979, "eval_recall": 0.9914529914529915, "eval_runtime": 3.9407, "eval_samples_per_second": 171.288, "eval_steps_per_second": 5.583, "step": 1187 }, { "epoch": 25.05, "learning_rate": 8.668242710795903e-06, "loss": 0.1747, "step": 1190 }, { "epoch": 25.26, "learning_rate": 8.274231678486997e-06, "loss": 0.165, "step": 1200 }, { "epoch": 25.47, "learning_rate": 7.880220646178094e-06, "loss": 0.1774, "step": 1210 }, { "epoch": 25.68, "learning_rate": 7.486209613869188e-06, "loss": 0.1762, "step": 1220 }, { "epoch": 25.89, "learning_rate": 7.092198581560285e-06, "loss": 0.2007, "step": 1230 }, { "epoch": 26.0, "eval_accuracy": 0.9792592592592593, "eval_f1_score": 0.9851380042462845, "eval_loss": 0.06768698245286942, "eval_precision": 0.9789029535864979, "eval_recall": 0.9914529914529915, "eval_runtime": 4.3978, "eval_samples_per_second": 153.486, "eval_steps_per_second": 5.002, "step": 1235 }, { "epoch": 26.11, "learning_rate": 6.698187549251379e-06, "loss": 0.1588, "step": 1240 }, { "epoch": 26.32, "learning_rate": 6.304176516942474e-06, "loss": 0.166, "step": 1250 }, { "epoch": 26.53, "learning_rate": 5.91016548463357e-06, "loss": 0.191, "step": 1260 }, { "epoch": 26.74, "learning_rate": 5.516154452324665e-06, "loss": 0.1603, "step": 1270 }, { "epoch": 26.95, "learning_rate": 5.122143420015761e-06, "loss": 0.2086, "step": 1280 }, { "epoch": 26.99, "eval_accuracy": 0.9792592592592593, "eval_f1_score": 0.9851063829787234, "eval_loss": 0.0640312060713768, "eval_precision": 0.9809322033898306, "eval_recall": 0.9893162393162394, "eval_runtime": 3.8919, "eval_samples_per_second": 173.437, "eval_steps_per_second": 5.653, "step": 1282 }, { "epoch": 27.16, "learning_rate": 4.728132387706856e-06, "loss": 0.1807, "step": 1290 }, { "epoch": 27.37, "learning_rate": 4.3341213553979515e-06, "loss": 0.1697, "step": 1300 }, { "epoch": 27.58, "learning_rate": 3.940110323089047e-06, "loss": 0.1799, "step": 1310 }, { "epoch": 27.79, "learning_rate": 3.5460992907801423e-06, "loss": 0.1551, "step": 1320 }, { "epoch": 28.0, "learning_rate": 3.152088258471237e-06, "loss": 0.1666, "step": 1330 }, { "epoch": 28.0, "eval_accuracy": 0.9777777777777777, "eval_f1_score": 0.9840595111583422, "eval_loss": 0.07120572775602341, "eval_precision": 0.9788583509513742, "eval_recall": 0.9893162393162394, "eval_runtime": 3.8788, "eval_samples_per_second": 174.025, "eval_steps_per_second": 5.672, "step": 1330 }, { "epoch": 28.21, "learning_rate": 2.7580772261623327e-06, "loss": 0.1736, "step": 1340 }, { "epoch": 28.42, "learning_rate": 2.364066193853428e-06, "loss": 0.2003, "step": 1350 }, { "epoch": 28.63, "learning_rate": 1.9700551615445235e-06, "loss": 0.1777, "step": 1360 }, { "epoch": 28.84, "learning_rate": 1.5760441292356184e-06, "loss": 0.157, "step": 1370 }, { "epoch": 28.99, "eval_accuracy": 0.9807407407407407, "eval_f1_score": 0.9861554845580405, "eval_loss": 0.06606751680374146, "eval_precision": 0.9830148619957537, "eval_recall": 0.9893162393162394, "eval_runtime": 4.4114, "eval_samples_per_second": 153.014, "eval_steps_per_second": 4.987, "step": 1377 }, { "epoch": 29.05, "learning_rate": 1.182033096926714e-06, "loss": 0.169, "step": 1380 }, { "epoch": 29.26, "learning_rate": 7.880220646178092e-07, "loss": 0.1736, "step": 1390 }, { "epoch": 29.47, "learning_rate": 3.940110323089046e-07, "loss": 0.1748, "step": 1400 }, { "epoch": 29.68, "learning_rate": 0.0, "loss": 0.1758, "step": 1410 }, { "epoch": 29.68, "eval_accuracy": 0.9777777777777777, "eval_f1_score": 0.9840595111583422, "eval_loss": 0.06716117262840271, "eval_precision": 0.9788583509513742, "eval_recall": 0.9893162393162394, "eval_runtime": 4.087, "eval_samples_per_second": 165.159, "eval_steps_per_second": 5.383, "step": 1410 }, { "epoch": 29.68, "step": 1410, "total_flos": 8.181902891907809e+17, "train_loss": 0.24775470698133428, "train_runtime": 3242.8553, "train_samples_per_second": 56.145, "train_steps_per_second": 0.435 } ], "logging_steps": 10, "max_steps": 1410, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 8.181902891907809e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }