{ "best_metric": 0.9622641509433962, "best_model_checkpoint": "wav2vec2-2Class-easy-train-test-large/checkpoint-2520", "epoch": 224.0, "eval_steps": 500, "global_step": 2520, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.98, "eval_accuracy": 0.4088050314465409, "eval_loss": 0.7003181576728821, "eval_runtime": 1.8048, "eval_samples_per_second": 88.1, "eval_steps_per_second": 5.541, "step": 11 }, { "epoch": 1.96, "eval_accuracy": 0.4088050314465409, "eval_loss": 0.7001124620437622, "eval_runtime": 1.7728, "eval_samples_per_second": 89.69, "eval_steps_per_second": 5.641, "step": 22 }, { "epoch": 2.93, "eval_accuracy": 0.41509433962264153, "eval_loss": 0.69970703125, "eval_runtime": 1.7593, "eval_samples_per_second": 90.375, "eval_steps_per_second": 5.684, "step": 33 }, { "epoch": 4.0, "eval_accuracy": 0.42138364779874216, "eval_loss": 0.6991450786590576, "eval_runtime": 1.7582, "eval_samples_per_second": 90.433, "eval_steps_per_second": 5.688, "step": 45 }, { "epoch": 4.44, "grad_norm": 0.8353477716445923, "learning_rate": 1.7045454545454546e-06, "loss": 0.6976, "step": 50 }, { "epoch": 4.98, "eval_accuracy": 0.4276729559748428, "eval_loss": 0.6984724998474121, "eval_runtime": 1.7849, "eval_samples_per_second": 89.08, "eval_steps_per_second": 5.603, "step": 56 }, { "epoch": 5.96, "eval_accuracy": 0.44025157232704404, "eval_loss": 0.697744607925415, "eval_runtime": 2.127, "eval_samples_per_second": 74.753, "eval_steps_per_second": 4.701, "step": 67 }, { "epoch": 6.93, "eval_accuracy": 0.44654088050314467, "eval_loss": 0.6968724727630615, "eval_runtime": 2.2513, "eval_samples_per_second": 70.624, "eval_steps_per_second": 4.442, "step": 78 }, { "epoch": 8.0, "eval_accuracy": 0.46540880503144655, "eval_loss": 0.6957085728645325, "eval_runtime": 2.1194, "eval_samples_per_second": 75.021, "eval_steps_per_second": 4.718, "step": 90 }, { "epoch": 8.89, "grad_norm": 0.45805710554122925, "learning_rate": 3.409090909090909e-06, "loss": 0.6952, "step": 100 }, { "epoch": 8.98, "eval_accuracy": 0.46540880503144655, "eval_loss": 0.6945385932922363, "eval_runtime": 2.2918, "eval_samples_per_second": 69.378, "eval_steps_per_second": 4.363, "step": 101 }, { "epoch": 9.96, "eval_accuracy": 0.4779874213836478, "eval_loss": 0.6933900117874146, "eval_runtime": 2.2504, "eval_samples_per_second": 70.654, "eval_steps_per_second": 4.444, "step": 112 }, { "epoch": 10.93, "eval_accuracy": 0.49056603773584906, "eval_loss": 0.692146360874176, "eval_runtime": 2.1543, "eval_samples_per_second": 73.804, "eval_steps_per_second": 4.642, "step": 123 }, { "epoch": 12.0, "eval_accuracy": 0.5471698113207547, "eval_loss": 0.6906170845031738, "eval_runtime": 2.0832, "eval_samples_per_second": 76.326, "eval_steps_per_second": 4.8, "step": 135 }, { "epoch": 12.98, "eval_accuracy": 0.610062893081761, "eval_loss": 0.6892228722572327, "eval_runtime": 2.0269, "eval_samples_per_second": 78.443, "eval_steps_per_second": 4.934, "step": 146 }, { "epoch": 13.33, "grad_norm": 0.6493268609046936, "learning_rate": 5.1136363636363635e-06, "loss": 0.6911, "step": 150 }, { "epoch": 13.96, "eval_accuracy": 0.6037735849056604, "eval_loss": 0.6878040432929993, "eval_runtime": 2.1502, "eval_samples_per_second": 73.946, "eval_steps_per_second": 4.651, "step": 157 }, { "epoch": 14.93, "eval_accuracy": 0.5911949685534591, "eval_loss": 0.6863483190536499, "eval_runtime": 2.0844, "eval_samples_per_second": 76.279, "eval_steps_per_second": 4.797, "step": 168 }, { "epoch": 16.0, "eval_accuracy": 0.5911949685534591, "eval_loss": 0.6847361326217651, "eval_runtime": 2.1372, "eval_samples_per_second": 74.395, "eval_steps_per_second": 4.679, "step": 180 }, { "epoch": 16.98, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.6830993294715881, "eval_runtime": 2.3473, "eval_samples_per_second": 67.739, "eval_steps_per_second": 4.26, "step": 191 }, { "epoch": 17.78, "grad_norm": 0.5862739086151123, "learning_rate": 6.818181818181818e-06, "loss": 0.6852, "step": 200 }, { "epoch": 17.96, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.6815393567085266, "eval_runtime": 2.1307, "eval_samples_per_second": 74.623, "eval_steps_per_second": 4.693, "step": 202 }, { "epoch": 18.93, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.679994523525238, "eval_runtime": 2.082, "eval_samples_per_second": 76.37, "eval_steps_per_second": 4.803, "step": 213 }, { "epoch": 20.0, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.6782289147377014, "eval_runtime": 2.1302, "eval_samples_per_second": 74.641, "eval_steps_per_second": 4.694, "step": 225 }, { "epoch": 20.98, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.6765275001525879, "eval_runtime": 2.0229, "eval_samples_per_second": 78.601, "eval_steps_per_second": 4.943, "step": 236 }, { "epoch": 21.96, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.6749551892280579, "eval_runtime": 2.0505, "eval_samples_per_second": 77.542, "eval_steps_per_second": 4.877, "step": 247 }, { "epoch": 22.22, "grad_norm": 0.10243403911590576, "learning_rate": 8.522727272727273e-06, "loss": 0.6783, "step": 250 }, { "epoch": 22.93, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.6732170581817627, "eval_runtime": 2.0616, "eval_samples_per_second": 77.125, "eval_steps_per_second": 4.851, "step": 258 }, { "epoch": 24.0, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.6713252067565918, "eval_runtime": 2.1605, "eval_samples_per_second": 73.595, "eval_steps_per_second": 4.629, "step": 270 }, { "epoch": 24.98, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.6694673895835876, "eval_runtime": 2.0526, "eval_samples_per_second": 77.462, "eval_steps_per_second": 4.872, "step": 281 }, { "epoch": 25.96, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.6674391031265259, "eval_runtime": 2.1284, "eval_samples_per_second": 74.704, "eval_steps_per_second": 4.698, "step": 292 }, { "epoch": 26.67, "grad_norm": 0.3114006221294403, "learning_rate": 1.0227272727272727e-05, "loss": 0.6676, "step": 300 }, { "epoch": 26.93, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.6654335856437683, "eval_runtime": 1.9991, "eval_samples_per_second": 79.535, "eval_steps_per_second": 5.002, "step": 303 }, { "epoch": 28.0, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.6630644202232361, "eval_runtime": 2.0451, "eval_samples_per_second": 77.745, "eval_steps_per_second": 4.89, "step": 315 }, { "epoch": 28.98, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.6605831980705261, "eval_runtime": 2.0625, "eval_samples_per_second": 77.092, "eval_steps_per_second": 4.849, "step": 326 }, { "epoch": 29.96, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.6578991413116455, "eval_runtime": 2.0381, "eval_samples_per_second": 78.014, "eval_steps_per_second": 4.907, "step": 337 }, { "epoch": 30.93, "eval_accuracy": 0.5849056603773585, "eval_loss": 0.6539114713668823, "eval_runtime": 1.9774, "eval_samples_per_second": 80.407, "eval_steps_per_second": 5.057, "step": 348 }, { "epoch": 31.11, "grad_norm": 0.2134709656238556, "learning_rate": 1.1931818181818181e-05, "loss": 0.6516, "step": 350 }, { "epoch": 32.0, "eval_accuracy": 0.5974842767295597, "eval_loss": 0.6492742896080017, "eval_runtime": 2.0601, "eval_samples_per_second": 77.182, "eval_steps_per_second": 4.854, "step": 360 }, { "epoch": 32.98, "eval_accuracy": 0.610062893081761, "eval_loss": 0.6441397070884705, "eval_runtime": 2.0739, "eval_samples_per_second": 76.667, "eval_steps_per_second": 4.822, "step": 371 }, { "epoch": 33.96, "eval_accuracy": 0.6226415094339622, "eval_loss": 0.6348815560340881, "eval_runtime": 2.1526, "eval_samples_per_second": 73.865, "eval_steps_per_second": 4.646, "step": 382 }, { "epoch": 34.93, "eval_accuracy": 0.6289308176100629, "eval_loss": 0.6257140040397644, "eval_runtime": 2.0081, "eval_samples_per_second": 79.179, "eval_steps_per_second": 4.98, "step": 393 }, { "epoch": 35.56, "grad_norm": 0.8974349498748779, "learning_rate": 1.3636363636363637e-05, "loss": 0.6124, "step": 400 }, { "epoch": 36.0, "eval_accuracy": 0.6415094339622641, "eval_loss": 0.611738920211792, "eval_runtime": 1.9854, "eval_samples_per_second": 80.083, "eval_steps_per_second": 5.037, "step": 405 }, { "epoch": 36.98, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.5910706520080566, "eval_runtime": 2.0618, "eval_samples_per_second": 77.117, "eval_steps_per_second": 4.85, "step": 416 }, { "epoch": 37.96, "eval_accuracy": 0.6918238993710691, "eval_loss": 0.5672016143798828, "eval_runtime": 2.0402, "eval_samples_per_second": 77.932, "eval_steps_per_second": 4.901, "step": 427 }, { "epoch": 38.93, "eval_accuracy": 0.7232704402515723, "eval_loss": 0.5392354130744934, "eval_runtime": 2.2936, "eval_samples_per_second": 69.324, "eval_steps_per_second": 4.36, "step": 438 }, { "epoch": 40.0, "grad_norm": 0.7736309170722961, "learning_rate": 1.534090909090909e-05, "loss": 0.5073, "step": 450 }, { "epoch": 40.0, "eval_accuracy": 0.7547169811320755, "eval_loss": 0.5041937232017517, "eval_runtime": 2.1247, "eval_samples_per_second": 74.835, "eval_steps_per_second": 4.707, "step": 450 }, { "epoch": 40.98, "eval_accuracy": 0.7672955974842768, "eval_loss": 0.47902750968933105, "eval_runtime": 2.163, "eval_samples_per_second": 73.509, "eval_steps_per_second": 4.623, "step": 461 }, { "epoch": 41.96, "eval_accuracy": 0.779874213836478, "eval_loss": 0.47594940662384033, "eval_runtime": 2.1321, "eval_samples_per_second": 74.574, "eval_steps_per_second": 4.69, "step": 472 }, { "epoch": 42.93, "eval_accuracy": 0.7987421383647799, "eval_loss": 0.4369964003562927, "eval_runtime": 2.1555, "eval_samples_per_second": 73.765, "eval_steps_per_second": 4.639, "step": 483 }, { "epoch": 44.0, "eval_accuracy": 0.7987421383647799, "eval_loss": 0.43516698479652405, "eval_runtime": 2.032, "eval_samples_per_second": 78.249, "eval_steps_per_second": 4.921, "step": 495 }, { "epoch": 44.44, "grad_norm": 0.4976819157600403, "learning_rate": 1.7045454545454546e-05, "loss": 0.3489, "step": 500 }, { "epoch": 44.98, "eval_accuracy": 0.7987421383647799, "eval_loss": 0.4422326385974884, "eval_runtime": 2.1135, "eval_samples_per_second": 75.231, "eval_steps_per_second": 4.732, "step": 506 }, { "epoch": 45.96, "eval_accuracy": 0.8050314465408805, "eval_loss": 0.41540881991386414, "eval_runtime": 2.0847, "eval_samples_per_second": 76.27, "eval_steps_per_second": 4.797, "step": 517 }, { "epoch": 46.93, "eval_accuracy": 0.8050314465408805, "eval_loss": 0.4131433367729187, "eval_runtime": 1.9752, "eval_samples_per_second": 80.498, "eval_steps_per_second": 5.063, "step": 528 }, { "epoch": 48.0, "eval_accuracy": 0.8113207547169812, "eval_loss": 0.3975575864315033, "eval_runtime": 2.01, "eval_samples_per_second": 79.104, "eval_steps_per_second": 4.975, "step": 540 }, { "epoch": 48.89, "grad_norm": 0.5197520852088928, "learning_rate": 1.8750000000000002e-05, "loss": 0.2962, "step": 550 }, { "epoch": 48.98, "eval_accuracy": 0.8113207547169812, "eval_loss": 0.39397454261779785, "eval_runtime": 2.0261, "eval_samples_per_second": 78.474, "eval_steps_per_second": 4.935, "step": 551 }, { "epoch": 49.96, "eval_accuracy": 0.8238993710691824, "eval_loss": 0.371494859457016, "eval_runtime": 2.0246, "eval_samples_per_second": 78.535, "eval_steps_per_second": 4.939, "step": 562 }, { "epoch": 50.93, "eval_accuracy": 0.8427672955974843, "eval_loss": 0.34951409697532654, "eval_runtime": 2.3286, "eval_samples_per_second": 68.281, "eval_steps_per_second": 4.294, "step": 573 }, { "epoch": 52.0, "eval_accuracy": 0.8364779874213837, "eval_loss": 0.3481156826019287, "eval_runtime": 1.9542, "eval_samples_per_second": 81.362, "eval_steps_per_second": 5.117, "step": 585 }, { "epoch": 52.98, "eval_accuracy": 0.8176100628930818, "eval_loss": 0.3817409873008728, "eval_runtime": 2.0789, "eval_samples_per_second": 76.484, "eval_steps_per_second": 4.81, "step": 596 }, { "epoch": 53.33, "grad_norm": 0.5608111023902893, "learning_rate": 2.0454545454545454e-05, "loss": 0.2573, "step": 600 }, { "epoch": 53.96, "eval_accuracy": 0.8490566037735849, "eval_loss": 0.3412492871284485, "eval_runtime": 2.0746, "eval_samples_per_second": 76.642, "eval_steps_per_second": 4.82, "step": 607 }, { "epoch": 54.93, "eval_accuracy": 0.8490566037735849, "eval_loss": 0.32929155230522156, "eval_runtime": 1.9991, "eval_samples_per_second": 79.538, "eval_steps_per_second": 5.002, "step": 618 }, { "epoch": 56.0, "eval_accuracy": 0.8427672955974843, "eval_loss": 0.3547687232494354, "eval_runtime": 2.1242, "eval_samples_per_second": 74.851, "eval_steps_per_second": 4.708, "step": 630 }, { "epoch": 56.98, "eval_accuracy": 0.8427672955974843, "eval_loss": 0.3044220209121704, "eval_runtime": 2.0508, "eval_samples_per_second": 77.532, "eval_steps_per_second": 4.876, "step": 641 }, { "epoch": 57.78, "grad_norm": 0.894092321395874, "learning_rate": 2.215909090909091e-05, "loss": 0.2279, "step": 650 }, { "epoch": 57.96, "eval_accuracy": 0.8490566037735849, "eval_loss": 0.32347577810287476, "eval_runtime": 2.2095, "eval_samples_per_second": 71.963, "eval_steps_per_second": 4.526, "step": 652 }, { "epoch": 58.93, "eval_accuracy": 0.8490566037735849, "eval_loss": 0.3371436297893524, "eval_runtime": 2.1055, "eval_samples_per_second": 75.518, "eval_steps_per_second": 4.75, "step": 663 }, { "epoch": 60.0, "eval_accuracy": 0.8490566037735849, "eval_loss": 0.31275492906570435, "eval_runtime": 2.1311, "eval_samples_per_second": 74.61, "eval_steps_per_second": 4.692, "step": 675 }, { "epoch": 60.98, "eval_accuracy": 0.8553459119496856, "eval_loss": 0.32111966609954834, "eval_runtime": 2.0639, "eval_samples_per_second": 77.038, "eval_steps_per_second": 4.845, "step": 686 }, { "epoch": 61.96, "eval_accuracy": 0.8616352201257862, "eval_loss": 0.302960604429245, "eval_runtime": 2.0241, "eval_samples_per_second": 78.552, "eval_steps_per_second": 4.94, "step": 697 }, { "epoch": 62.22, "grad_norm": 0.4315973222255707, "learning_rate": 2.3863636363636362e-05, "loss": 0.2167, "step": 700 }, { "epoch": 62.93, "eval_accuracy": 0.8616352201257862, "eval_loss": 0.29696550965309143, "eval_runtime": 2.034, "eval_samples_per_second": 78.169, "eval_steps_per_second": 4.916, "step": 708 }, { "epoch": 64.0, "eval_accuracy": 0.8679245283018868, "eval_loss": 0.29949402809143066, "eval_runtime": 2.095, "eval_samples_per_second": 75.897, "eval_steps_per_second": 4.773, "step": 720 }, { "epoch": 64.98, "eval_accuracy": 0.8742138364779874, "eval_loss": 0.2867083251476288, "eval_runtime": 2.0417, "eval_samples_per_second": 77.876, "eval_steps_per_second": 4.898, "step": 731 }, { "epoch": 65.96, "eval_accuracy": 0.8930817610062893, "eval_loss": 0.26363295316696167, "eval_runtime": 2.1382, "eval_samples_per_second": 74.363, "eval_steps_per_second": 4.677, "step": 742 }, { "epoch": 66.67, "grad_norm": 0.37665870785713196, "learning_rate": 2.556818181818182e-05, "loss": 0.207, "step": 750 }, { "epoch": 66.93, "eval_accuracy": 0.8805031446540881, "eval_loss": 0.28482353687286377, "eval_runtime": 2.1166, "eval_samples_per_second": 75.119, "eval_steps_per_second": 4.724, "step": 753 }, { "epoch": 68.0, "eval_accuracy": 0.8867924528301887, "eval_loss": 0.2750767767429352, "eval_runtime": 2.1981, "eval_samples_per_second": 72.336, "eval_steps_per_second": 4.549, "step": 765 }, { "epoch": 68.98, "eval_accuracy": 0.8930817610062893, "eval_loss": 0.256393700838089, "eval_runtime": 2.033, "eval_samples_per_second": 78.211, "eval_steps_per_second": 4.919, "step": 776 }, { "epoch": 69.96, "eval_accuracy": 0.8930817610062893, "eval_loss": 0.25443732738494873, "eval_runtime": 2.0096, "eval_samples_per_second": 79.121, "eval_steps_per_second": 4.976, "step": 787 }, { "epoch": 70.93, "eval_accuracy": 0.8742138364779874, "eval_loss": 0.2954423129558563, "eval_runtime": 2.1018, "eval_samples_per_second": 75.649, "eval_steps_per_second": 4.758, "step": 798 }, { "epoch": 71.11, "grad_norm": 0.7302255630493164, "learning_rate": 2.7272727272727273e-05, "loss": 0.1899, "step": 800 }, { "epoch": 72.0, "eval_accuracy": 0.8930817610062893, "eval_loss": 0.25169771909713745, "eval_runtime": 2.041, "eval_samples_per_second": 77.904, "eval_steps_per_second": 4.9, "step": 810 }, { "epoch": 72.98, "eval_accuracy": 0.8930817610062893, "eval_loss": 0.2506076693534851, "eval_runtime": 2.0257, "eval_samples_per_second": 78.49, "eval_steps_per_second": 4.936, "step": 821 }, { "epoch": 73.96, "eval_accuracy": 0.8930817610062893, "eval_loss": 0.2434261441230774, "eval_runtime": 2.0325, "eval_samples_per_second": 78.23, "eval_steps_per_second": 4.92, "step": 832 }, { "epoch": 74.93, "eval_accuracy": 0.89937106918239, "eval_loss": 0.23832084238529205, "eval_runtime": 2.1871, "eval_samples_per_second": 72.699, "eval_steps_per_second": 4.572, "step": 843 }, { "epoch": 75.56, "grad_norm": 0.5180615186691284, "learning_rate": 2.897727272727273e-05, "loss": 0.1801, "step": 850 }, { "epoch": 76.0, "eval_accuracy": 0.89937106918239, "eval_loss": 0.23464229702949524, "eval_runtime": 2.026, "eval_samples_per_second": 78.48, "eval_steps_per_second": 4.936, "step": 855 }, { "epoch": 76.98, "eval_accuracy": 0.89937106918239, "eval_loss": 0.22975026071071625, "eval_runtime": 2.0881, "eval_samples_per_second": 76.147, "eval_steps_per_second": 4.789, "step": 866 }, { "epoch": 77.96, "eval_accuracy": 0.9056603773584906, "eval_loss": 0.2403678596019745, "eval_runtime": 2.075, "eval_samples_per_second": 76.626, "eval_steps_per_second": 4.819, "step": 877 }, { "epoch": 78.93, "eval_accuracy": 0.8930817610062893, "eval_loss": 0.2674010097980499, "eval_runtime": 2.037, "eval_samples_per_second": 78.057, "eval_steps_per_second": 4.909, "step": 888 }, { "epoch": 80.0, "grad_norm": 1.2135472297668457, "learning_rate": 2.9924242424242427e-05, "loss": 0.1692, "step": 900 }, { "epoch": 80.0, "eval_accuracy": 0.89937106918239, "eval_loss": 0.2231501042842865, "eval_runtime": 2.0398, "eval_samples_per_second": 77.949, "eval_steps_per_second": 4.902, "step": 900 }, { "epoch": 80.98, "eval_accuracy": 0.89937106918239, "eval_loss": 0.2390480935573578, "eval_runtime": 1.9822, "eval_samples_per_second": 80.213, "eval_steps_per_second": 5.045, "step": 911 }, { "epoch": 81.96, "eval_accuracy": 0.8930817610062893, "eval_loss": 0.20583955943584442, "eval_runtime": 2.0665, "eval_samples_per_second": 76.94, "eval_steps_per_second": 4.839, "step": 922 }, { "epoch": 82.93, "eval_accuracy": 0.9056603773584906, "eval_loss": 0.2114023119211197, "eval_runtime": 2.0736, "eval_samples_per_second": 76.678, "eval_steps_per_second": 4.823, "step": 933 }, { "epoch": 84.0, "eval_accuracy": 0.89937106918239, "eval_loss": 0.24830691516399384, "eval_runtime": 2.0148, "eval_samples_per_second": 78.915, "eval_steps_per_second": 4.963, "step": 945 }, { "epoch": 84.44, "grad_norm": 0.5111488103866577, "learning_rate": 2.9734848484848486e-05, "loss": 0.1691, "step": 950 }, { "epoch": 84.98, "eval_accuracy": 0.9119496855345912, "eval_loss": 0.2259017676115036, "eval_runtime": 2.2201, "eval_samples_per_second": 71.618, "eval_steps_per_second": 4.504, "step": 956 }, { "epoch": 85.96, "eval_accuracy": 0.9119496855345912, "eval_loss": 0.20239894092082977, "eval_runtime": 2.0671, "eval_samples_per_second": 76.918, "eval_steps_per_second": 4.838, "step": 967 }, { "epoch": 86.93, "eval_accuracy": 0.89937106918239, "eval_loss": 0.20193150639533997, "eval_runtime": 2.0416, "eval_samples_per_second": 77.879, "eval_steps_per_second": 4.898, "step": 978 }, { "epoch": 88.0, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.19625458121299744, "eval_runtime": 2.0196, "eval_samples_per_second": 78.73, "eval_steps_per_second": 4.952, "step": 990 }, { "epoch": 88.89, "grad_norm": 0.4683234989643097, "learning_rate": 2.9545454545454545e-05, "loss": 0.1609, "step": 1000 }, { "epoch": 88.98, "eval_accuracy": 0.9119496855345912, "eval_loss": 0.21583892405033112, "eval_runtime": 2.0254, "eval_samples_per_second": 78.503, "eval_steps_per_second": 4.937, "step": 1001 }, { "epoch": 89.96, "eval_accuracy": 0.9119496855345912, "eval_loss": 0.197691410779953, "eval_runtime": 1.9978, "eval_samples_per_second": 79.586, "eval_steps_per_second": 5.005, "step": 1012 }, { "epoch": 90.93, "eval_accuracy": 0.9182389937106918, "eval_loss": 0.19791610538959503, "eval_runtime": 2.0853, "eval_samples_per_second": 76.248, "eval_steps_per_second": 4.795, "step": 1023 }, { "epoch": 92.0, "eval_accuracy": 0.9119496855345912, "eval_loss": 0.20358721911907196, "eval_runtime": 2.1963, "eval_samples_per_second": 72.393, "eval_steps_per_second": 4.553, "step": 1035 }, { "epoch": 92.98, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.19769711792469025, "eval_runtime": 2.0089, "eval_samples_per_second": 79.146, "eval_steps_per_second": 4.978, "step": 1046 }, { "epoch": 93.33, "grad_norm": 0.6099847555160522, "learning_rate": 2.9356060606060604e-05, "loss": 0.1516, "step": 1050 }, { "epoch": 93.96, "eval_accuracy": 0.9182389937106918, "eval_loss": 0.1974458247423172, "eval_runtime": 2.1182, "eval_samples_per_second": 75.065, "eval_steps_per_second": 4.721, "step": 1057 }, { "epoch": 94.93, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.1993919163942337, "eval_runtime": 2.0707, "eval_samples_per_second": 76.787, "eval_steps_per_second": 4.829, "step": 1068 }, { "epoch": 96.0, "eval_accuracy": 0.9119496855345912, "eval_loss": 0.1955273449420929, "eval_runtime": 2.0163, "eval_samples_per_second": 78.858, "eval_steps_per_second": 4.96, "step": 1080 }, { "epoch": 96.98, "eval_accuracy": 0.9119496855345912, "eval_loss": 0.19483698904514313, "eval_runtime": 2.0495, "eval_samples_per_second": 77.581, "eval_steps_per_second": 4.879, "step": 1091 }, { "epoch": 97.78, "grad_norm": 1.0578981637954712, "learning_rate": 2.9166666666666666e-05, "loss": 0.1386, "step": 1100 }, { "epoch": 97.96, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.19463855028152466, "eval_runtime": 2.0625, "eval_samples_per_second": 77.091, "eval_steps_per_second": 4.849, "step": 1102 }, { "epoch": 98.93, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.19323910772800446, "eval_runtime": 2.0028, "eval_samples_per_second": 79.389, "eval_steps_per_second": 4.993, "step": 1113 }, { "epoch": 100.0, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.1841806173324585, "eval_runtime": 2.1056, "eval_samples_per_second": 75.512, "eval_steps_per_second": 4.749, "step": 1125 }, { "epoch": 100.98, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.18839451670646667, "eval_runtime": 1.9858, "eval_samples_per_second": 80.07, "eval_steps_per_second": 5.036, "step": 1136 }, { "epoch": 101.96, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.1899903267621994, "eval_runtime": 2.2196, "eval_samples_per_second": 71.635, "eval_steps_per_second": 4.505, "step": 1147 }, { "epoch": 102.22, "grad_norm": 0.6229210495948792, "learning_rate": 2.897727272727273e-05, "loss": 0.1279, "step": 1150 }, { "epoch": 102.93, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.184115469455719, "eval_runtime": 2.0229, "eval_samples_per_second": 78.602, "eval_steps_per_second": 4.944, "step": 1158 }, { "epoch": 104.0, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.19207227230072021, "eval_runtime": 1.9639, "eval_samples_per_second": 80.962, "eval_steps_per_second": 5.092, "step": 1170 }, { "epoch": 104.98, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.19926591217517853, "eval_runtime": 2.0509, "eval_samples_per_second": 77.526, "eval_steps_per_second": 4.876, "step": 1181 }, { "epoch": 105.96, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.19455212354660034, "eval_runtime": 2.0496, "eval_samples_per_second": 77.577, "eval_steps_per_second": 4.879, "step": 1192 }, { "epoch": 106.67, "grad_norm": 1.2741256952285767, "learning_rate": 2.8787878787878788e-05, "loss": 0.1258, "step": 1200 }, { "epoch": 106.93, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.18963727355003357, "eval_runtime": 2.0026, "eval_samples_per_second": 79.395, "eval_steps_per_second": 4.993, "step": 1203 }, { "epoch": 108.0, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.1884273737668991, "eval_runtime": 2.0343, "eval_samples_per_second": 78.16, "eval_steps_per_second": 4.916, "step": 1215 }, { "epoch": 108.98, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.17940251529216766, "eval_runtime": 2.1734, "eval_samples_per_second": 73.156, "eval_steps_per_second": 4.601, "step": 1226 }, { "epoch": 109.96, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.18589730560779572, "eval_runtime": 2.0874, "eval_samples_per_second": 76.17, "eval_steps_per_second": 4.791, "step": 1237 }, { "epoch": 110.93, "eval_accuracy": 0.9119496855345912, "eval_loss": 0.2194768339395523, "eval_runtime": 2.0717, "eval_samples_per_second": 76.747, "eval_steps_per_second": 4.827, "step": 1248 }, { "epoch": 111.11, "grad_norm": 0.3613344430923462, "learning_rate": 2.859848484848485e-05, "loss": 0.1258, "step": 1250 }, { "epoch": 112.0, "eval_accuracy": 0.9182389937106918, "eval_loss": 0.20826272666454315, "eval_runtime": 1.9861, "eval_samples_per_second": 80.057, "eval_steps_per_second": 5.035, "step": 1260 }, { "epoch": 112.98, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.21202689409255981, "eval_runtime": 2.0132, "eval_samples_per_second": 78.98, "eval_steps_per_second": 4.967, "step": 1271 }, { "epoch": 113.96, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.20663346350193024, "eval_runtime": 2.02, "eval_samples_per_second": 78.711, "eval_steps_per_second": 4.95, "step": 1282 }, { "epoch": 114.93, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.1931203156709671, "eval_runtime": 2.033, "eval_samples_per_second": 78.208, "eval_steps_per_second": 4.919, "step": 1293 }, { "epoch": 115.56, "grad_norm": 0.7503376007080078, "learning_rate": 2.8409090909090912e-05, "loss": 0.1023, "step": 1300 }, { "epoch": 116.0, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.19000084698200226, "eval_runtime": 2.0014, "eval_samples_per_second": 79.446, "eval_steps_per_second": 4.997, "step": 1305 }, { "epoch": 116.98, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.20288796722888947, "eval_runtime": 2.0774, "eval_samples_per_second": 76.539, "eval_steps_per_second": 4.814, "step": 1316 }, { "epoch": 117.96, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.19505923986434937, "eval_runtime": 2.0552, "eval_samples_per_second": 77.366, "eval_steps_per_second": 4.866, "step": 1327 }, { "epoch": 118.93, "eval_accuracy": 0.9119496855345912, "eval_loss": 0.20838169753551483, "eval_runtime": 2.2371, "eval_samples_per_second": 71.074, "eval_steps_per_second": 4.47, "step": 1338 }, { "epoch": 120.0, "grad_norm": 0.2376416176557541, "learning_rate": 2.821969696969697e-05, "loss": 0.0997, "step": 1350 }, { "epoch": 120.0, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.2159019112586975, "eval_runtime": 2.0579, "eval_samples_per_second": 77.264, "eval_steps_per_second": 4.859, "step": 1350 }, { "epoch": 120.98, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.21662545204162598, "eval_runtime": 2.0756, "eval_samples_per_second": 76.605, "eval_steps_per_second": 4.818, "step": 1361 }, { "epoch": 121.96, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.197323277592659, "eval_runtime": 2.0227, "eval_samples_per_second": 78.607, "eval_steps_per_second": 4.944, "step": 1372 }, { "epoch": 122.93, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.18507684767246246, "eval_runtime": 2.0728, "eval_samples_per_second": 76.706, "eval_steps_per_second": 4.824, "step": 1383 }, { "epoch": 124.0, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.20666691660881042, "eval_runtime": 1.9717, "eval_samples_per_second": 80.642, "eval_steps_per_second": 5.072, "step": 1395 }, { "epoch": 124.44, "grad_norm": 0.3115290403366089, "learning_rate": 2.803030303030303e-05, "loss": 0.1021, "step": 1400 }, { "epoch": 124.98, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.19534242153167725, "eval_runtime": 2.0497, "eval_samples_per_second": 77.571, "eval_steps_per_second": 4.879, "step": 1406 }, { "epoch": 125.96, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.17650572955608368, "eval_runtime": 2.239, "eval_samples_per_second": 71.015, "eval_steps_per_second": 4.466, "step": 1417 }, { "epoch": 126.93, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.18782062828540802, "eval_runtime": 2.0533, "eval_samples_per_second": 77.437, "eval_steps_per_second": 4.87, "step": 1428 }, { "epoch": 128.0, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.20708344876766205, "eval_runtime": 2.0414, "eval_samples_per_second": 77.887, "eval_steps_per_second": 4.899, "step": 1440 }, { "epoch": 128.89, "grad_norm": 1.2413551807403564, "learning_rate": 2.784090909090909e-05, "loss": 0.0883, "step": 1450 }, { "epoch": 128.98, "eval_accuracy": 0.9182389937106918, "eval_loss": 0.2241077572107315, "eval_runtime": 1.9826, "eval_samples_per_second": 80.197, "eval_steps_per_second": 5.044, "step": 1451 }, { "epoch": 129.96, "eval_accuracy": 0.9119496855345912, "eval_loss": 0.23481474816799164, "eval_runtime": 1.9747, "eval_samples_per_second": 80.518, "eval_steps_per_second": 5.064, "step": 1462 }, { "epoch": 130.93, "eval_accuracy": 0.9056603773584906, "eval_loss": 0.24748335778713226, "eval_runtime": 1.9737, "eval_samples_per_second": 80.559, "eval_steps_per_second": 5.067, "step": 1473 }, { "epoch": 132.0, "eval_accuracy": 0.9182389937106918, "eval_loss": 0.21596243977546692, "eval_runtime": 2.0455, "eval_samples_per_second": 77.733, "eval_steps_per_second": 4.889, "step": 1485 }, { "epoch": 132.98, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.20896825194358826, "eval_runtime": 2.047, "eval_samples_per_second": 77.675, "eval_steps_per_second": 4.885, "step": 1496 }, { "epoch": 133.33, "grad_norm": 0.56540846824646, "learning_rate": 2.7651515151515152e-05, "loss": 0.0769, "step": 1500 }, { "epoch": 133.96, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.21468934416770935, "eval_runtime": 1.9936, "eval_samples_per_second": 79.754, "eval_steps_per_second": 5.016, "step": 1507 }, { "epoch": 134.93, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.22008037567138672, "eval_runtime": 2.0857, "eval_samples_per_second": 76.234, "eval_steps_per_second": 4.795, "step": 1518 }, { "epoch": 136.0, "eval_accuracy": 0.9182389937106918, "eval_loss": 0.23723578453063965, "eval_runtime": 2.1872, "eval_samples_per_second": 72.695, "eval_steps_per_second": 4.572, "step": 1530 }, { "epoch": 136.98, "eval_accuracy": 0.9182389937106918, "eval_loss": 0.21990692615509033, "eval_runtime": 2.0473, "eval_samples_per_second": 77.664, "eval_steps_per_second": 4.885, "step": 1541 }, { "epoch": 137.78, "grad_norm": 1.0245180130004883, "learning_rate": 2.7462121212121214e-05, "loss": 0.0786, "step": 1550 }, { "epoch": 137.96, "eval_accuracy": 0.9182389937106918, "eval_loss": 0.2087443619966507, "eval_runtime": 2.0577, "eval_samples_per_second": 77.271, "eval_steps_per_second": 4.86, "step": 1552 }, { "epoch": 138.93, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.18779344856739044, "eval_runtime": 2.0799, "eval_samples_per_second": 76.447, "eval_steps_per_second": 4.808, "step": 1563 }, { "epoch": 140.0, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.1914655864238739, "eval_runtime": 2.043, "eval_samples_per_second": 77.827, "eval_steps_per_second": 4.895, "step": 1575 }, { "epoch": 140.98, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.23168283700942993, "eval_runtime": 2.0313, "eval_samples_per_second": 78.277, "eval_steps_per_second": 4.923, "step": 1586 }, { "epoch": 141.96, "eval_accuracy": 0.8930817610062893, "eval_loss": 0.2865447700023651, "eval_runtime": 2.0095, "eval_samples_per_second": 79.125, "eval_steps_per_second": 4.976, "step": 1597 }, { "epoch": 142.22, "grad_norm": 1.393044352531433, "learning_rate": 2.7272727272727273e-05, "loss": 0.0714, "step": 1600 }, { "epoch": 142.93, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.22998519241809845, "eval_runtime": 2.1842, "eval_samples_per_second": 72.794, "eval_steps_per_second": 4.578, "step": 1608 }, { "epoch": 144.0, "eval_accuracy": 0.9056603773584906, "eval_loss": 0.27265357971191406, "eval_runtime": 2.0318, "eval_samples_per_second": 78.258, "eval_steps_per_second": 4.922, "step": 1620 }, { "epoch": 144.98, "eval_accuracy": 0.9056603773584906, "eval_loss": 0.28114742040634155, "eval_runtime": 2.0949, "eval_samples_per_second": 75.9, "eval_steps_per_second": 4.774, "step": 1631 }, { "epoch": 145.96, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.21014899015426636, "eval_runtime": 2.0829, "eval_samples_per_second": 76.335, "eval_steps_per_second": 4.801, "step": 1642 }, { "epoch": 146.67, "grad_norm": 1.1527929306030273, "learning_rate": 2.7083333333333335e-05, "loss": 0.0702, "step": 1650 }, { "epoch": 146.93, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.20363318920135498, "eval_runtime": 2.0224, "eval_samples_per_second": 78.618, "eval_steps_per_second": 4.945, "step": 1653 }, { "epoch": 148.0, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.22154641151428223, "eval_runtime": 2.0286, "eval_samples_per_second": 78.378, "eval_steps_per_second": 4.929, "step": 1665 }, { "epoch": 148.98, "eval_accuracy": 0.9182389937106918, "eval_loss": 0.21356013417243958, "eval_runtime": 1.9745, "eval_samples_per_second": 80.526, "eval_steps_per_second": 5.065, "step": 1676 }, { "epoch": 149.96, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.20560431480407715, "eval_runtime": 2.0343, "eval_samples_per_second": 78.161, "eval_steps_per_second": 4.916, "step": 1687 }, { "epoch": 150.93, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.20028233528137207, "eval_runtime": 2.0476, "eval_samples_per_second": 77.65, "eval_steps_per_second": 4.884, "step": 1698 }, { "epoch": 151.11, "grad_norm": 0.6037131547927856, "learning_rate": 2.6893939393939398e-05, "loss": 0.0676, "step": 1700 }, { "epoch": 152.0, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.22495229542255402, "eval_runtime": 2.0653, "eval_samples_per_second": 76.985, "eval_steps_per_second": 4.842, "step": 1710 }, { "epoch": 152.98, "eval_accuracy": 0.9559748427672956, "eval_loss": 0.1910940259695053, "eval_runtime": 2.2097, "eval_samples_per_second": 71.955, "eval_steps_per_second": 4.525, "step": 1721 }, { "epoch": 153.96, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.2189728170633316, "eval_runtime": 2.049, "eval_samples_per_second": 77.598, "eval_steps_per_second": 4.88, "step": 1732 }, { "epoch": 154.93, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.1975589245557785, "eval_runtime": 2.0536, "eval_samples_per_second": 77.426, "eval_steps_per_second": 4.87, "step": 1743 }, { "epoch": 155.56, "grad_norm": 0.9841188788414001, "learning_rate": 2.6704545454545453e-05, "loss": 0.0674, "step": 1750 }, { "epoch": 156.0, "eval_accuracy": 0.949685534591195, "eval_loss": 0.18743836879730225, "eval_runtime": 2.0593, "eval_samples_per_second": 77.211, "eval_steps_per_second": 4.856, "step": 1755 }, { "epoch": 156.98, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.2022770792245865, "eval_runtime": 2.0432, "eval_samples_per_second": 77.821, "eval_steps_per_second": 4.894, "step": 1766 }, { "epoch": 157.96, "eval_accuracy": 0.949685534591195, "eval_loss": 0.21527531743049622, "eval_runtime": 1.9951, "eval_samples_per_second": 79.694, "eval_steps_per_second": 5.012, "step": 1777 }, { "epoch": 158.93, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.22451625764369965, "eval_runtime": 2.1442, "eval_samples_per_second": 74.155, "eval_steps_per_second": 4.664, "step": 1788 }, { "epoch": 160.0, "grad_norm": 0.5377254486083984, "learning_rate": 2.6515151515151516e-05, "loss": 0.0548, "step": 1800 }, { "epoch": 160.0, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.2431740015745163, "eval_runtime": 2.2699, "eval_samples_per_second": 70.046, "eval_steps_per_second": 4.405, "step": 1800 }, { "epoch": 160.98, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.2071038782596588, "eval_runtime": 2.0506, "eval_samples_per_second": 77.538, "eval_steps_per_second": 4.877, "step": 1811 }, { "epoch": 161.96, "eval_accuracy": 0.949685534591195, "eval_loss": 0.18368059396743774, "eval_runtime": 2.2081, "eval_samples_per_second": 72.006, "eval_steps_per_second": 4.529, "step": 1822 }, { "epoch": 162.93, "eval_accuracy": 0.949685534591195, "eval_loss": 0.19161438941955566, "eval_runtime": 1.9999, "eval_samples_per_second": 79.505, "eval_steps_per_second": 5.0, "step": 1833 }, { "epoch": 164.0, "eval_accuracy": 0.949685534591195, "eval_loss": 0.22212089598178864, "eval_runtime": 2.0001, "eval_samples_per_second": 79.497, "eval_steps_per_second": 5.0, "step": 1845 }, { "epoch": 164.44, "grad_norm": 0.5433365702629089, "learning_rate": 2.6325757575757575e-05, "loss": 0.0616, "step": 1850 }, { "epoch": 164.98, "eval_accuracy": 0.949685534591195, "eval_loss": 0.21204246580600739, "eval_runtime": 2.035, "eval_samples_per_second": 78.132, "eval_steps_per_second": 4.914, "step": 1856 }, { "epoch": 165.96, "eval_accuracy": 0.9559748427672956, "eval_loss": 0.18882697820663452, "eval_runtime": 2.0581, "eval_samples_per_second": 77.256, "eval_steps_per_second": 4.859, "step": 1867 }, { "epoch": 166.93, "eval_accuracy": 0.949685534591195, "eval_loss": 0.19714578986167908, "eval_runtime": 2.002, "eval_samples_per_second": 79.422, "eval_steps_per_second": 4.995, "step": 1878 }, { "epoch": 168.0, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.21613995730876923, "eval_runtime": 2.0979, "eval_samples_per_second": 75.789, "eval_steps_per_second": 4.767, "step": 1890 }, { "epoch": 168.89, "grad_norm": 0.4616011083126068, "learning_rate": 2.6136363636363637e-05, "loss": 0.0467, "step": 1900 }, { "epoch": 168.98, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.22824302315711975, "eval_runtime": 2.0023, "eval_samples_per_second": 79.407, "eval_steps_per_second": 4.994, "step": 1901 }, { "epoch": 169.96, "eval_accuracy": 0.9056603773584906, "eval_loss": 0.31181007623672485, "eval_runtime": 2.2272, "eval_samples_per_second": 71.39, "eval_steps_per_second": 4.49, "step": 1912 }, { "epoch": 170.93, "eval_accuracy": 0.949685534591195, "eval_loss": 0.23191651701927185, "eval_runtime": 2.0759, "eval_samples_per_second": 76.592, "eval_steps_per_second": 4.817, "step": 1923 }, { "epoch": 172.0, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.27404358983039856, "eval_runtime": 2.0769, "eval_samples_per_second": 76.555, "eval_steps_per_second": 4.815, "step": 1935 }, { "epoch": 172.98, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.2666384279727936, "eval_runtime": 2.1046, "eval_samples_per_second": 75.548, "eval_steps_per_second": 4.751, "step": 1946 }, { "epoch": 173.33, "grad_norm": 1.0961925983428955, "learning_rate": 2.59469696969697e-05, "loss": 0.0609, "step": 1950 }, { "epoch": 173.96, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.23152852058410645, "eval_runtime": 2.0323, "eval_samples_per_second": 78.237, "eval_steps_per_second": 4.921, "step": 1957 }, { "epoch": 174.93, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.22292692959308624, "eval_runtime": 2.0749, "eval_samples_per_second": 76.629, "eval_steps_per_second": 4.819, "step": 1968 }, { "epoch": 176.0, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.21578945219516754, "eval_runtime": 2.0472, "eval_samples_per_second": 77.668, "eval_steps_per_second": 4.885, "step": 1980 }, { "epoch": 176.98, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.22257991135120392, "eval_runtime": 2.1698, "eval_samples_per_second": 73.278, "eval_steps_per_second": 4.609, "step": 1991 }, { "epoch": 177.78, "grad_norm": 1.6022953987121582, "learning_rate": 2.575757575757576e-05, "loss": 0.0522, "step": 2000 }, { "epoch": 177.96, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.22241446375846863, "eval_runtime": 2.0341, "eval_samples_per_second": 78.167, "eval_steps_per_second": 4.916, "step": 2002 }, { "epoch": 178.93, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.21375904977321625, "eval_runtime": 2.1094, "eval_samples_per_second": 75.377, "eval_steps_per_second": 4.741, "step": 2013 }, { "epoch": 180.0, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.21769364178180695, "eval_runtime": 1.9898, "eval_samples_per_second": 79.909, "eval_steps_per_second": 5.026, "step": 2025 }, { "epoch": 180.98, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.19169649481773376, "eval_runtime": 2.1326, "eval_samples_per_second": 74.558, "eval_steps_per_second": 4.689, "step": 2036 }, { "epoch": 181.96, "eval_accuracy": 0.9559748427672956, "eval_loss": 0.19741381704807281, "eval_runtime": 2.1931, "eval_samples_per_second": 72.5, "eval_steps_per_second": 4.56, "step": 2047 }, { "epoch": 182.22, "grad_norm": 0.7399430274963379, "learning_rate": 2.556818181818182e-05, "loss": 0.0515, "step": 2050 }, { "epoch": 182.93, "eval_accuracy": 0.949685534591195, "eval_loss": 0.21981187164783478, "eval_runtime": 2.0417, "eval_samples_per_second": 77.878, "eval_steps_per_second": 4.898, "step": 2058 }, { "epoch": 184.0, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.24247391521930695, "eval_runtime": 2.1999, "eval_samples_per_second": 72.278, "eval_steps_per_second": 4.546, "step": 2070 }, { "epoch": 184.98, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.24488882720470428, "eval_runtime": 2.0767, "eval_samples_per_second": 76.565, "eval_steps_per_second": 4.815, "step": 2081 }, { "epoch": 185.96, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.23463451862335205, "eval_runtime": 2.0674, "eval_samples_per_second": 76.907, "eval_steps_per_second": 4.837, "step": 2092 }, { "epoch": 186.67, "grad_norm": 0.67291659116745, "learning_rate": 2.5378787878787876e-05, "loss": 0.045, "step": 2100 }, { "epoch": 186.93, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.23308323323726654, "eval_runtime": 2.2603, "eval_samples_per_second": 70.346, "eval_steps_per_second": 4.424, "step": 2103 }, { "epoch": 188.0, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.2660614252090454, "eval_runtime": 2.0509, "eval_samples_per_second": 77.527, "eval_steps_per_second": 4.876, "step": 2115 }, { "epoch": 188.98, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.22910529375076294, "eval_runtime": 2.0536, "eval_samples_per_second": 77.423, "eval_steps_per_second": 4.869, "step": 2126 }, { "epoch": 189.96, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.23477251827716827, "eval_runtime": 2.0092, "eval_samples_per_second": 79.134, "eval_steps_per_second": 4.977, "step": 2137 }, { "epoch": 190.93, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.23087622225284576, "eval_runtime": 2.0403, "eval_samples_per_second": 77.929, "eval_steps_per_second": 4.901, "step": 2148 }, { "epoch": 191.11, "grad_norm": 0.11660194396972656, "learning_rate": 2.518939393939394e-05, "loss": 0.0403, "step": 2150 }, { "epoch": 192.0, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.27889564633369446, "eval_runtime": 2.0147, "eval_samples_per_second": 78.921, "eval_steps_per_second": 4.964, "step": 2160 }, { "epoch": 192.98, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.2540048658847809, "eval_runtime": 2.1082, "eval_samples_per_second": 75.42, "eval_steps_per_second": 4.743, "step": 2171 }, { "epoch": 193.96, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.23720349371433258, "eval_runtime": 2.1791, "eval_samples_per_second": 72.966, "eval_steps_per_second": 4.589, "step": 2182 }, { "epoch": 194.93, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.2507873773574829, "eval_runtime": 1.986, "eval_samples_per_second": 80.061, "eval_steps_per_second": 5.035, "step": 2193 }, { "epoch": 195.56, "grad_norm": 0.8518453240394592, "learning_rate": 2.5e-05, "loss": 0.0476, "step": 2200 }, { "epoch": 196.0, "eval_accuracy": 0.949685534591195, "eval_loss": 0.2193620353937149, "eval_runtime": 2.1819, "eval_samples_per_second": 72.874, "eval_steps_per_second": 4.583, "step": 2205 }, { "epoch": 196.98, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.23066306114196777, "eval_runtime": 2.0482, "eval_samples_per_second": 77.628, "eval_steps_per_second": 4.882, "step": 2216 }, { "epoch": 197.96, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.2719472646713257, "eval_runtime": 1.9901, "eval_samples_per_second": 79.896, "eval_steps_per_second": 5.025, "step": 2227 }, { "epoch": 198.93, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.28040099143981934, "eval_runtime": 2.0617, "eval_samples_per_second": 77.122, "eval_steps_per_second": 4.85, "step": 2238 }, { "epoch": 200.0, "grad_norm": 0.09039253741502762, "learning_rate": 2.481060606060606e-05, "loss": 0.0457, "step": 2250 }, { "epoch": 200.0, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.2755438983440399, "eval_runtime": 2.0773, "eval_samples_per_second": 76.541, "eval_steps_per_second": 4.814, "step": 2250 }, { "epoch": 200.98, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.2353052794933319, "eval_runtime": 1.9899, "eval_samples_per_second": 79.904, "eval_steps_per_second": 5.025, "step": 2261 }, { "epoch": 201.96, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.21893078088760376, "eval_runtime": 2.1045, "eval_samples_per_second": 75.552, "eval_steps_per_second": 4.752, "step": 2272 }, { "epoch": 202.93, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.21625204384326935, "eval_runtime": 2.0731, "eval_samples_per_second": 76.697, "eval_steps_per_second": 4.824, "step": 2283 }, { "epoch": 204.0, "eval_accuracy": 0.949685534591195, "eval_loss": 0.2110479772090912, "eval_runtime": 2.1463, "eval_samples_per_second": 74.079, "eval_steps_per_second": 4.659, "step": 2295 }, { "epoch": 204.44, "grad_norm": 0.9943685531616211, "learning_rate": 2.4621212121212123e-05, "loss": 0.0393, "step": 2300 }, { "epoch": 204.98, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.23164410889148712, "eval_runtime": 2.0606, "eval_samples_per_second": 77.162, "eval_steps_per_second": 4.853, "step": 2306 }, { "epoch": 205.96, "eval_accuracy": 0.9308176100628931, "eval_loss": 0.24650876224040985, "eval_runtime": 2.0011, "eval_samples_per_second": 79.455, "eval_steps_per_second": 4.997, "step": 2317 }, { "epoch": 206.93, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.23763243854045868, "eval_runtime": 2.0999, "eval_samples_per_second": 75.719, "eval_steps_per_second": 4.762, "step": 2328 }, { "epoch": 208.0, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.2170635461807251, "eval_runtime": 2.1575, "eval_samples_per_second": 73.697, "eval_steps_per_second": 4.635, "step": 2340 }, { "epoch": 208.89, "grad_norm": 0.46173095703125, "learning_rate": 2.4431818181818185e-05, "loss": 0.0443, "step": 2350 }, { "epoch": 208.98, "eval_accuracy": 0.949685534591195, "eval_loss": 0.23952844738960266, "eval_runtime": 2.0014, "eval_samples_per_second": 79.445, "eval_steps_per_second": 4.997, "step": 2351 }, { "epoch": 209.96, "eval_accuracy": 0.8930817610062893, "eval_loss": 0.2906019687652588, "eval_runtime": 2.0133, "eval_samples_per_second": 78.977, "eval_steps_per_second": 4.967, "step": 2362 }, { "epoch": 210.93, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.2608316242694855, "eval_runtime": 2.1558, "eval_samples_per_second": 73.755, "eval_steps_per_second": 4.639, "step": 2373 }, { "epoch": 212.0, "eval_accuracy": 0.949685534591195, "eval_loss": 0.23210321366786957, "eval_runtime": 2.0606, "eval_samples_per_second": 77.161, "eval_steps_per_second": 4.853, "step": 2385 }, { "epoch": 212.98, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.24640053510665894, "eval_runtime": 2.2148, "eval_samples_per_second": 71.79, "eval_steps_per_second": 4.515, "step": 2396 }, { "epoch": 213.33, "grad_norm": 0.94215327501297, "learning_rate": 2.4242424242424244e-05, "loss": 0.0539, "step": 2400 }, { "epoch": 213.96, "eval_accuracy": 0.9182389937106918, "eval_loss": 0.2441636025905609, "eval_runtime": 2.172, "eval_samples_per_second": 73.203, "eval_steps_per_second": 4.604, "step": 2407 }, { "epoch": 214.93, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.2511676847934723, "eval_runtime": 2.0176, "eval_samples_per_second": 78.806, "eval_steps_per_second": 4.956, "step": 2418 }, { "epoch": 216.0, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.22649481892585754, "eval_runtime": 2.0103, "eval_samples_per_second": 79.091, "eval_steps_per_second": 4.974, "step": 2430 }, { "epoch": 216.98, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.21274729073047638, "eval_runtime": 2.0508, "eval_samples_per_second": 77.529, "eval_steps_per_second": 4.876, "step": 2441 }, { "epoch": 217.78, "grad_norm": 0.7381362318992615, "learning_rate": 2.4053030303030303e-05, "loss": 0.0415, "step": 2450 }, { "epoch": 217.96, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.284365177154541, "eval_runtime": 2.0321, "eval_samples_per_second": 78.244, "eval_steps_per_second": 4.921, "step": 2452 }, { "epoch": 218.93, "eval_accuracy": 0.9433962264150944, "eval_loss": 0.24891048669815063, "eval_runtime": 2.0843, "eval_samples_per_second": 76.285, "eval_steps_per_second": 4.798, "step": 2463 }, { "epoch": 220.0, "eval_accuracy": 0.949685534591195, "eval_loss": 0.21200108528137207, "eval_runtime": 1.9938, "eval_samples_per_second": 79.748, "eval_steps_per_second": 5.016, "step": 2475 }, { "epoch": 220.98, "eval_accuracy": 0.9559748427672956, "eval_loss": 0.2015109807252884, "eval_runtime": 2.2098, "eval_samples_per_second": 71.951, "eval_steps_per_second": 4.525, "step": 2486 }, { "epoch": 221.96, "eval_accuracy": 0.9245283018867925, "eval_loss": 0.25095799565315247, "eval_runtime": 2.0817, "eval_samples_per_second": 76.381, "eval_steps_per_second": 4.804, "step": 2497 }, { "epoch": 222.22, "grad_norm": 0.3756774961948395, "learning_rate": 2.3863636363636362e-05, "loss": 0.0325, "step": 2500 }, { "epoch": 222.93, "eval_accuracy": 0.9371069182389937, "eval_loss": 0.2875436246395111, "eval_runtime": 2.0148, "eval_samples_per_second": 78.915, "eval_steps_per_second": 4.963, "step": 2508 }, { "epoch": 224.0, "eval_accuracy": 0.9622641509433962, "eval_loss": 0.19936275482177734, "eval_runtime": 2.0208, "eval_samples_per_second": 78.682, "eval_steps_per_second": 4.949, "step": 2520 } ], "logging_steps": 50, "max_steps": 8800, "num_input_tokens_seen": 0, "num_train_epochs": 800, "save_steps": 500, "total_flos": 1.406670474295296e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }