|
{ |
|
"best_metric": 0.9872068230277186, |
|
"best_model_checkpoint": "teacher-status-van-tiny-256-0/checkpoint-1140", |
|
"epoch": 29.68421052631579, |
|
"eval_steps": 500, |
|
"global_step": 1410, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.5460992907801423e-06, |
|
"loss": 0.693, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.092198581560285e-06, |
|
"loss": 0.692, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.0638297872340426e-05, |
|
"loss": 0.6887, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.418439716312057e-05, |
|
"loss": 0.6788, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.6933333333333334, |
|
"eval_f1_score": 0.8188976377952756, |
|
"eval_loss": 0.6436793804168701, |
|
"eval_precision": 0.6933333333333334, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 3.9449, |
|
"eval_samples_per_second": 171.109, |
|
"eval_steps_per_second": 5.577, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.773049645390071e-05, |
|
"loss": 0.6568, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.1276595744680852e-05, |
|
"loss": 0.6118, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.4822695035460995e-05, |
|
"loss": 0.5534, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.836879432624114e-05, |
|
"loss": 0.4849, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.191489361702128e-05, |
|
"loss": 0.463, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8755555555555555, |
|
"eval_f1_score": 0.9161676646706586, |
|
"eval_loss": 0.3405998945236206, |
|
"eval_precision": 0.8595505617977528, |
|
"eval_recall": 0.9807692307692307, |
|
"eval_runtime": 4.4067, |
|
"eval_samples_per_second": 153.174, |
|
"eval_steps_per_second": 4.992, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.546099290780142e-05, |
|
"loss": 0.4401, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.900709219858156e-05, |
|
"loss": 0.4196, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.2553191489361704e-05, |
|
"loss": 0.3933, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.609929078014185e-05, |
|
"loss": 0.3977, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.964539007092199e-05, |
|
"loss": 0.3596, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.9303703703703704, |
|
"eval_f1_score": 0.9503695881731784, |
|
"eval_loss": 0.2071676254272461, |
|
"eval_precision": 0.9394572025052192, |
|
"eval_recall": 0.9615384615384616, |
|
"eval_runtime": 3.9276, |
|
"eval_samples_per_second": 171.861, |
|
"eval_steps_per_second": 5.601, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 4.964539007092199e-05, |
|
"loss": 0.3506, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 4.9251379038613084e-05, |
|
"loss": 0.354, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 4.885736800630418e-05, |
|
"loss": 0.3132, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 4.846335697399527e-05, |
|
"loss": 0.3324, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.806934594168637e-05, |
|
"loss": 0.3505, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9525925925925925, |
|
"eval_f1_score": 0.9661016949152542, |
|
"eval_loss": 0.15637531876564026, |
|
"eval_precision": 0.957983193277311, |
|
"eval_recall": 0.9743589743589743, |
|
"eval_runtime": 4.0891, |
|
"eval_samples_per_second": 165.073, |
|
"eval_steps_per_second": 5.38, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 4.7675334909377466e-05, |
|
"loss": 0.3181, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 4.728132387706856e-05, |
|
"loss": 0.3174, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 4.6887312844759653e-05, |
|
"loss": 0.3161, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 4.6493301812450754e-05, |
|
"loss": 0.2962, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.9555555555555556, |
|
"eval_f1_score": 0.9680851063829786, |
|
"eval_loss": 0.12618891894817352, |
|
"eval_precision": 0.9639830508474576, |
|
"eval_recall": 0.9722222222222222, |
|
"eval_runtime": 4.4801, |
|
"eval_samples_per_second": 150.665, |
|
"eval_steps_per_second": 4.911, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 4.609929078014185e-05, |
|
"loss": 0.3017, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 4.570527974783294e-05, |
|
"loss": 0.2852, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 4.5311268715524035e-05, |
|
"loss": 0.3222, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 4.491725768321513e-05, |
|
"loss": 0.2662, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 4.452324665090622e-05, |
|
"loss": 0.2762, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9644444444444444, |
|
"eval_f1_score": 0.9745222929936307, |
|
"eval_loss": 0.10375303775072098, |
|
"eval_precision": 0.9683544303797469, |
|
"eval_recall": 0.9807692307692307, |
|
"eval_runtime": 3.8894, |
|
"eval_samples_per_second": 173.548, |
|
"eval_steps_per_second": 5.656, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 4.412923561859732e-05, |
|
"loss": 0.2803, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 4.373522458628842e-05, |
|
"loss": 0.2672, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 4.334121355397952e-05, |
|
"loss": 0.2768, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 4.294720252167061e-05, |
|
"loss": 0.2709, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 4.2553191489361704e-05, |
|
"loss": 0.2604, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.9718518518518519, |
|
"eval_f1_score": 0.979765708200213, |
|
"eval_loss": 0.09324438869953156, |
|
"eval_precision": 0.9766454352441614, |
|
"eval_recall": 0.9829059829059829, |
|
"eval_runtime": 3.9359, |
|
"eval_samples_per_second": 171.498, |
|
"eval_steps_per_second": 5.59, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 4.21591804570528e-05, |
|
"loss": 0.2647, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 4.176516942474389e-05, |
|
"loss": 0.2632, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 4.1371158392434986e-05, |
|
"loss": 0.2688, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 4.0977147360126086e-05, |
|
"loss": 0.2699, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.058313632781718e-05, |
|
"loss": 0.2427, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9718518518518519, |
|
"eval_f1_score": 0.9796791443850267, |
|
"eval_loss": 0.09280087053775787, |
|
"eval_precision": 0.9807280513918629, |
|
"eval_recall": 0.9786324786324786, |
|
"eval_runtime": 4.2848, |
|
"eval_samples_per_second": 157.534, |
|
"eval_steps_per_second": 5.134, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 4.018912529550828e-05, |
|
"loss": 0.241, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 3.9795114263199374e-05, |
|
"loss": 0.2311, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 3.940110323089047e-05, |
|
"loss": 0.2447, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 3.900709219858156e-05, |
|
"loss": 0.2465, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.9718518518518519, |
|
"eval_f1_score": 0.9796791443850267, |
|
"eval_loss": 0.08982550352811813, |
|
"eval_precision": 0.9807280513918629, |
|
"eval_recall": 0.9786324786324786, |
|
"eval_runtime": 3.8511, |
|
"eval_samples_per_second": 175.276, |
|
"eval_steps_per_second": 5.713, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 3.8613081166272655e-05, |
|
"loss": 0.2579, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 3.8219070133963755e-05, |
|
"loss": 0.2393, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 3.782505910165485e-05, |
|
"loss": 0.2532, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 3.743104806934594e-05, |
|
"loss": 0.2501, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.2519, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9688888888888889, |
|
"eval_f1_score": 0.9775401069518715, |
|
"eval_loss": 0.09134020656347275, |
|
"eval_precision": 0.9785867237687366, |
|
"eval_recall": 0.9764957264957265, |
|
"eval_runtime": 4.1234, |
|
"eval_samples_per_second": 163.699, |
|
"eval_steps_per_second": 5.335, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 3.664302600472813e-05, |
|
"loss": 0.2177, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 3.624901497241923e-05, |
|
"loss": 0.2248, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 3.5855003940110324e-05, |
|
"loss": 0.2583, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 3.546099290780142e-05, |
|
"loss": 0.2344, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 3.506698187549252e-05, |
|
"loss": 0.2258, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.9733333333333334, |
|
"eval_f1_score": 0.980891719745223, |
|
"eval_loss": 0.08469923585653305, |
|
"eval_precision": 0.9746835443037974, |
|
"eval_recall": 0.9871794871794872, |
|
"eval_runtime": 4.0414, |
|
"eval_samples_per_second": 167.022, |
|
"eval_steps_per_second": 5.444, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"learning_rate": 3.467297084318361e-05, |
|
"loss": 0.2357, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 3.4278959810874706e-05, |
|
"loss": 0.2057, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 3.38849487785658e-05, |
|
"loss": 0.2107, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 3.349093774625689e-05, |
|
"loss": 0.228, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.309692671394799e-05, |
|
"loss": 0.2184, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9792592592592593, |
|
"eval_f1_score": 0.9851063829787234, |
|
"eval_loss": 0.08121450245380402, |
|
"eval_precision": 0.9809322033898306, |
|
"eval_recall": 0.9893162393162394, |
|
"eval_runtime": 3.8823, |
|
"eval_samples_per_second": 173.868, |
|
"eval_steps_per_second": 5.667, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 3.270291568163909e-05, |
|
"loss": 0.1992, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"learning_rate": 3.230890464933019e-05, |
|
"loss": 0.2289, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 3.191489361702128e-05, |
|
"loss": 0.2029, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 12.84, |
|
"learning_rate": 3.1520882584712375e-05, |
|
"loss": 0.2208, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.9807407407407407, |
|
"eval_f1_score": 0.9861259338313767, |
|
"eval_loss": 0.06925630569458008, |
|
"eval_precision": 0.9850746268656716, |
|
"eval_recall": 0.9871794871794872, |
|
"eval_runtime": 4.4316, |
|
"eval_samples_per_second": 152.316, |
|
"eval_steps_per_second": 4.964, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 3.112687155240347e-05, |
|
"loss": 0.2284, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 3.073286052009456e-05, |
|
"loss": 0.2129, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 3.033884948778566e-05, |
|
"loss": 0.2219, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 2.9944838455476754e-05, |
|
"loss": 0.219, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 2.9550827423167847e-05, |
|
"loss": 0.2201, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9762962962962963, |
|
"eval_f1_score": 0.9829424307036249, |
|
"eval_loss": 0.06279809772968292, |
|
"eval_precision": 0.9808510638297873, |
|
"eval_recall": 0.9850427350427351, |
|
"eval_runtime": 3.8891, |
|
"eval_samples_per_second": 173.56, |
|
"eval_steps_per_second": 5.657, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 2.9156816390858944e-05, |
|
"loss": 0.223, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 2.8762805358550045e-05, |
|
"loss": 0.2151, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 14.53, |
|
"learning_rate": 2.836879432624114e-05, |
|
"loss": 0.2096, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 2.7974783293932232e-05, |
|
"loss": 0.2081, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"learning_rate": 2.758077226162333e-05, |
|
"loss": 0.2251, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.9733333333333334, |
|
"eval_f1_score": 0.9809725158562368, |
|
"eval_loss": 0.08107414096593857, |
|
"eval_precision": 0.9707112970711297, |
|
"eval_recall": 0.9914529914529915, |
|
"eval_runtime": 3.8972, |
|
"eval_samples_per_second": 173.203, |
|
"eval_steps_per_second": 5.645, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"learning_rate": 2.7186761229314423e-05, |
|
"loss": 0.2067, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 2.6792750197005517e-05, |
|
"loss": 0.1864, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 2.639873916469661e-05, |
|
"loss": 0.208, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 2.6004728132387708e-05, |
|
"loss": 0.1931, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.56107171000788e-05, |
|
"loss": 0.2135, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9762962962962963, |
|
"eval_f1_score": 0.9829424307036249, |
|
"eval_loss": 0.0718333050608635, |
|
"eval_precision": 0.9808510638297873, |
|
"eval_recall": 0.9850427350427351, |
|
"eval_runtime": 4.4867, |
|
"eval_samples_per_second": 150.444, |
|
"eval_steps_per_second": 4.903, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 2.5216706067769895e-05, |
|
"loss": 0.2, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 16.42, |
|
"learning_rate": 2.4822695035460995e-05, |
|
"loss": 0.2025, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"learning_rate": 2.442868400315209e-05, |
|
"loss": 0.2017, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 2.4034672970843186e-05, |
|
"loss": 0.1851, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.9762962962962963, |
|
"eval_f1_score": 0.9829787234042553, |
|
"eval_loss": 0.07912527769804001, |
|
"eval_precision": 0.9788135593220338, |
|
"eval_recall": 0.9871794871794872, |
|
"eval_runtime": 3.9283, |
|
"eval_samples_per_second": 171.83, |
|
"eval_steps_per_second": 5.6, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"learning_rate": 2.364066193853428e-05, |
|
"loss": 0.1901, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 17.26, |
|
"learning_rate": 2.3246650906225377e-05, |
|
"loss": 0.2272, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 17.47, |
|
"learning_rate": 2.285263987391647e-05, |
|
"loss": 0.1965, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"learning_rate": 2.2458628841607564e-05, |
|
"loss": 0.1854, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"learning_rate": 2.206461780929866e-05, |
|
"loss": 0.2152, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9748148148148148, |
|
"eval_f1_score": 0.9818181818181818, |
|
"eval_loss": 0.0737040787935257, |
|
"eval_precision": 0.9828693790149893, |
|
"eval_recall": 0.9807692307692307, |
|
"eval_runtime": 3.9311, |
|
"eval_samples_per_second": 171.706, |
|
"eval_steps_per_second": 5.596, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 18.11, |
|
"learning_rate": 2.167060677698976e-05, |
|
"loss": 0.1991, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"learning_rate": 2.1276595744680852e-05, |
|
"loss": 0.1637, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 2.0882584712371946e-05, |
|
"loss": 0.1906, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 2.0488573680063043e-05, |
|
"loss": 0.1899, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"learning_rate": 2.009456264775414e-05, |
|
"loss": 0.1871, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.9762962962962963, |
|
"eval_f1_score": 0.9829787234042553, |
|
"eval_loss": 0.08143588900566101, |
|
"eval_precision": 0.9788135593220338, |
|
"eval_recall": 0.9871794871794872, |
|
"eval_runtime": 4.2729, |
|
"eval_samples_per_second": 157.973, |
|
"eval_steps_per_second": 5.149, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 19.16, |
|
"learning_rate": 1.9700551615445234e-05, |
|
"loss": 0.1708, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 19.37, |
|
"learning_rate": 1.9306540583136327e-05, |
|
"loss": 0.1864, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 19.58, |
|
"learning_rate": 1.8912529550827425e-05, |
|
"loss": 0.1991, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.2152, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1.8124507486209615e-05, |
|
"loss": 0.1714, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9762962962962963, |
|
"eval_f1_score": 0.9830148619957537, |
|
"eval_loss": 0.06924613565206528, |
|
"eval_precision": 0.9767932489451476, |
|
"eval_recall": 0.9893162393162394, |
|
"eval_runtime": 3.9174, |
|
"eval_samples_per_second": 172.306, |
|
"eval_steps_per_second": 5.616, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 1.773049645390071e-05, |
|
"loss": 0.1878, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 20.42, |
|
"learning_rate": 1.7336485421591806e-05, |
|
"loss": 0.1782, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 20.63, |
|
"learning_rate": 1.69424743892829e-05, |
|
"loss": 0.187, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 20.84, |
|
"learning_rate": 1.6548463356973994e-05, |
|
"loss": 0.188, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.9777777777777777, |
|
"eval_f1_score": 0.983991462113127, |
|
"eval_loss": 0.06410012394189835, |
|
"eval_precision": 0.9829424307036247, |
|
"eval_recall": 0.9850427350427351, |
|
"eval_runtime": 4.4961, |
|
"eval_samples_per_second": 150.132, |
|
"eval_steps_per_second": 4.893, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 21.05, |
|
"learning_rate": 1.6154452324665094e-05, |
|
"loss": 0.1543, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 21.26, |
|
"learning_rate": 1.5760441292356188e-05, |
|
"loss": 0.1752, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 21.47, |
|
"learning_rate": 1.536643026004728e-05, |
|
"loss": 0.1606, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 21.68, |
|
"learning_rate": 1.4972419227738377e-05, |
|
"loss": 0.184, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 21.89, |
|
"learning_rate": 1.4578408195429472e-05, |
|
"loss": 0.191, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.9792592592592593, |
|
"eval_f1_score": 0.9850746268656716, |
|
"eval_loss": 0.06437370181083679, |
|
"eval_precision": 0.9829787234042553, |
|
"eval_recall": 0.9871794871794872, |
|
"eval_runtime": 3.9414, |
|
"eval_samples_per_second": 171.258, |
|
"eval_steps_per_second": 5.582, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 22.11, |
|
"learning_rate": 1.418439716312057e-05, |
|
"loss": 0.177, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 22.32, |
|
"learning_rate": 1.3790386130811665e-05, |
|
"loss": 0.1727, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 22.53, |
|
"learning_rate": 1.3396375098502758e-05, |
|
"loss": 0.1872, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 22.74, |
|
"learning_rate": 1.3002364066193854e-05, |
|
"loss": 0.1558, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"learning_rate": 1.2608353033884947e-05, |
|
"loss": 0.2025, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"eval_accuracy": 0.9792592592592593, |
|
"eval_f1_score": 0.9850107066381155, |
|
"eval_loss": 0.06750550121068954, |
|
"eval_precision": 0.9871244635193133, |
|
"eval_recall": 0.9829059829059829, |
|
"eval_runtime": 3.9141, |
|
"eval_samples_per_second": 172.455, |
|
"eval_steps_per_second": 5.621, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 23.16, |
|
"learning_rate": 1.2214342001576045e-05, |
|
"loss": 0.1766, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 23.37, |
|
"learning_rate": 1.182033096926714e-05, |
|
"loss": 0.167, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"learning_rate": 1.1426319936958235e-05, |
|
"loss": 0.1995, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 23.79, |
|
"learning_rate": 1.103230890464933e-05, |
|
"loss": 0.1775, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 1.0638297872340426e-05, |
|
"loss": 0.1753, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9822222222222222, |
|
"eval_f1_score": 0.9872068230277186, |
|
"eval_loss": 0.06550905108451843, |
|
"eval_precision": 0.9851063829787234, |
|
"eval_recall": 0.9893162393162394, |
|
"eval_runtime": 4.3235, |
|
"eval_samples_per_second": 156.125, |
|
"eval_steps_per_second": 5.089, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"learning_rate": 1.0244286840031522e-05, |
|
"loss": 0.1825, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 24.42, |
|
"learning_rate": 9.850275807722617e-06, |
|
"loss": 0.1587, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 24.63, |
|
"learning_rate": 9.456264775413712e-06, |
|
"loss": 0.1846, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 24.84, |
|
"learning_rate": 9.062253743104808e-06, |
|
"loss": 0.1857, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.9792592592592593, |
|
"eval_f1_score": 0.9851380042462845, |
|
"eval_loss": 0.07306463271379471, |
|
"eval_precision": 0.9789029535864979, |
|
"eval_recall": 0.9914529914529915, |
|
"eval_runtime": 3.9407, |
|
"eval_samples_per_second": 171.288, |
|
"eval_steps_per_second": 5.583, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 25.05, |
|
"learning_rate": 8.668242710795903e-06, |
|
"loss": 0.1747, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 25.26, |
|
"learning_rate": 8.274231678486997e-06, |
|
"loss": 0.165, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"learning_rate": 7.880220646178094e-06, |
|
"loss": 0.1774, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 25.68, |
|
"learning_rate": 7.486209613869188e-06, |
|
"loss": 0.1762, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 25.89, |
|
"learning_rate": 7.092198581560285e-06, |
|
"loss": 0.2007, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.9792592592592593, |
|
"eval_f1_score": 0.9851380042462845, |
|
"eval_loss": 0.06768698245286942, |
|
"eval_precision": 0.9789029535864979, |
|
"eval_recall": 0.9914529914529915, |
|
"eval_runtime": 4.3978, |
|
"eval_samples_per_second": 153.486, |
|
"eval_steps_per_second": 5.002, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 26.11, |
|
"learning_rate": 6.698187549251379e-06, |
|
"loss": 0.1588, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 6.304176516942474e-06, |
|
"loss": 0.166, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 26.53, |
|
"learning_rate": 5.91016548463357e-06, |
|
"loss": 0.191, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 26.74, |
|
"learning_rate": 5.516154452324665e-06, |
|
"loss": 0.1603, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 26.95, |
|
"learning_rate": 5.122143420015761e-06, |
|
"loss": 0.2086, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"eval_accuracy": 0.9792592592592593, |
|
"eval_f1_score": 0.9851063829787234, |
|
"eval_loss": 0.0640312060713768, |
|
"eval_precision": 0.9809322033898306, |
|
"eval_recall": 0.9893162393162394, |
|
"eval_runtime": 3.8919, |
|
"eval_samples_per_second": 173.437, |
|
"eval_steps_per_second": 5.653, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 27.16, |
|
"learning_rate": 4.728132387706856e-06, |
|
"loss": 0.1807, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 27.37, |
|
"learning_rate": 4.3341213553979515e-06, |
|
"loss": 0.1697, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 27.58, |
|
"learning_rate": 3.940110323089047e-06, |
|
"loss": 0.1799, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 27.79, |
|
"learning_rate": 3.5460992907801423e-06, |
|
"loss": 0.1551, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 3.152088258471237e-06, |
|
"loss": 0.1666, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9777777777777777, |
|
"eval_f1_score": 0.9840595111583422, |
|
"eval_loss": 0.07120572775602341, |
|
"eval_precision": 0.9788583509513742, |
|
"eval_recall": 0.9893162393162394, |
|
"eval_runtime": 3.8788, |
|
"eval_samples_per_second": 174.025, |
|
"eval_steps_per_second": 5.672, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 28.21, |
|
"learning_rate": 2.7580772261623327e-06, |
|
"loss": 0.1736, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 28.42, |
|
"learning_rate": 2.364066193853428e-06, |
|
"loss": 0.2003, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 28.63, |
|
"learning_rate": 1.9700551615445235e-06, |
|
"loss": 0.1777, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 28.84, |
|
"learning_rate": 1.5760441292356184e-06, |
|
"loss": 0.157, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.9807407407407407, |
|
"eval_f1_score": 0.9861554845580405, |
|
"eval_loss": 0.06606751680374146, |
|
"eval_precision": 0.9830148619957537, |
|
"eval_recall": 0.9893162393162394, |
|
"eval_runtime": 4.4114, |
|
"eval_samples_per_second": 153.014, |
|
"eval_steps_per_second": 4.987, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 29.05, |
|
"learning_rate": 1.182033096926714e-06, |
|
"loss": 0.169, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 29.26, |
|
"learning_rate": 7.880220646178092e-07, |
|
"loss": 0.1736, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 29.47, |
|
"learning_rate": 3.940110323089046e-07, |
|
"loss": 0.1748, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"learning_rate": 0.0, |
|
"loss": 0.1758, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"eval_accuracy": 0.9777777777777777, |
|
"eval_f1_score": 0.9840595111583422, |
|
"eval_loss": 0.06716117262840271, |
|
"eval_precision": 0.9788583509513742, |
|
"eval_recall": 0.9893162393162394, |
|
"eval_runtime": 4.087, |
|
"eval_samples_per_second": 165.159, |
|
"eval_steps_per_second": 5.383, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"step": 1410, |
|
"total_flos": 8.181902891907809e+17, |
|
"train_loss": 0.24775470698133428, |
|
"train_runtime": 3242.8553, |
|
"train_samples_per_second": 56.145, |
|
"train_steps_per_second": 0.435 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1410, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 8.181902891907809e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|