{ "best_metric": 0.2633333333333333, "best_model_checkpoint": "swinv2-small-patch4-window16-256-mineral\\checkpoint-6693", "epoch": 480.0, "global_step": 9000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.53, "learning_rate": 5.555555555555556e-07, "loss": 5.6941, "step": 10 }, { "epoch": 0.96, "eval_accuracy": 0.005, "eval_loss": 5.692106246948242, "eval_runtime": 9.9033, "eval_samples_per_second": 60.586, "eval_steps_per_second": 1.919, "step": 18 }, { "epoch": 1.07, "learning_rate": 1.1111111111111112e-06, "loss": 5.6939, "step": 20 }, { "epoch": 1.6, "learning_rate": 1.6666666666666667e-06, "loss": 5.6886, "step": 30 }, { "epoch": 1.97, "eval_accuracy": 0.005, "eval_loss": 5.682541370391846, "eval_runtime": 4.1429, "eval_samples_per_second": 144.826, "eval_steps_per_second": 4.586, "step": 37 }, { "epoch": 2.13, "learning_rate": 2.2222222222222225e-06, "loss": 5.6844, "step": 40 }, { "epoch": 2.67, "learning_rate": 2.777777777777778e-06, "loss": 5.6735, "step": 50 }, { "epoch": 2.99, "eval_accuracy": 0.005, "eval_loss": 5.669071674346924, "eval_runtime": 4.142, "eval_samples_per_second": 144.858, "eval_steps_per_second": 4.587, "step": 56 }, { "epoch": 3.2, "learning_rate": 3.3333333333333333e-06, "loss": 5.6534, "step": 60 }, { "epoch": 3.73, "learning_rate": 3.888888888888889e-06, "loss": 5.6521, "step": 70 }, { "epoch": 4.0, "eval_accuracy": 0.0033333333333333335, "eval_loss": 5.654940605163574, "eval_runtime": 4.1572, "eval_samples_per_second": 144.326, "eval_steps_per_second": 4.57, "step": 75 }, { "epoch": 4.27, "learning_rate": 4.444444444444445e-06, "loss": 5.6431, "step": 80 }, { "epoch": 4.8, "learning_rate": 5e-06, "loss": 5.6394, "step": 90 }, { "epoch": 4.96, "eval_accuracy": 0.0033333333333333335, "eval_loss": 5.641611099243164, "eval_runtime": 4.1535, "eval_samples_per_second": 144.457, "eval_steps_per_second": 4.574, "step": 93 }, { "epoch": 5.33, "learning_rate": 5.555555555555556e-06, "loss": 5.5941, "step": 100 }, { "epoch": 5.87, "learning_rate": 6.111111111111111e-06, "loss": 5.6078, "step": 110 }, { "epoch": 5.97, "eval_accuracy": 0.0033333333333333335, "eval_loss": 5.627758979797363, "eval_runtime": 4.1893, "eval_samples_per_second": 143.22, "eval_steps_per_second": 4.535, "step": 112 }, { "epoch": 6.4, "learning_rate": 6.666666666666667e-06, "loss": 5.5762, "step": 120 }, { "epoch": 6.93, "learning_rate": 7.222222222222222e-06, "loss": 5.5743, "step": 130 }, { "epoch": 6.99, "eval_accuracy": 0.0016666666666666668, "eval_loss": 5.612813472747803, "eval_runtime": 4.224, "eval_samples_per_second": 142.046, "eval_steps_per_second": 4.498, "step": 131 }, { "epoch": 7.47, "learning_rate": 7.777777777777777e-06, "loss": 5.5413, "step": 140 }, { "epoch": 8.0, "learning_rate": 8.333333333333334e-06, "loss": 5.5509, "step": 150 }, { "epoch": 8.0, "eval_accuracy": 0.0016666666666666668, "eval_loss": 5.591813087463379, "eval_runtime": 4.1747, "eval_samples_per_second": 143.723, "eval_steps_per_second": 4.551, "step": 150 }, { "epoch": 8.53, "learning_rate": 8.88888888888889e-06, "loss": 5.5115, "step": 160 }, { "epoch": 8.96, "eval_accuracy": 0.006666666666666667, "eval_loss": 5.569559097290039, "eval_runtime": 4.1739, "eval_samples_per_second": 143.75, "eval_steps_per_second": 4.552, "step": 168 }, { "epoch": 9.07, "learning_rate": 9.444444444444445e-06, "loss": 5.4912, "step": 170 }, { "epoch": 9.6, "learning_rate": 1e-05, "loss": 5.4411, "step": 180 }, { "epoch": 9.97, "eval_accuracy": 0.01, "eval_loss": 5.5439839363098145, "eval_runtime": 4.1644, "eval_samples_per_second": 144.078, "eval_steps_per_second": 4.562, "step": 187 }, { "epoch": 10.13, "learning_rate": 1.0555555555555555e-05, "loss": 5.3942, "step": 190 }, { "epoch": 10.67, "learning_rate": 1.1111111111111112e-05, "loss": 5.3335, "step": 200 }, { "epoch": 10.99, "eval_accuracy": 0.016666666666666666, "eval_loss": 5.513484001159668, "eval_runtime": 4.1932, "eval_samples_per_second": 143.088, "eval_steps_per_second": 4.531, "step": 206 }, { "epoch": 11.2, "learning_rate": 1.1666666666666668e-05, "loss": 5.2998, "step": 210 }, { "epoch": 11.73, "learning_rate": 1.2222222222222222e-05, "loss": 5.2413, "step": 220 }, { "epoch": 12.0, "eval_accuracy": 0.021666666666666667, "eval_loss": 5.464037895202637, "eval_runtime": 4.2106, "eval_samples_per_second": 142.496, "eval_steps_per_second": 4.512, "step": 225 }, { "epoch": 12.27, "learning_rate": 1.2777777777777777e-05, "loss": 5.2175, "step": 230 }, { "epoch": 12.8, "learning_rate": 1.3333333333333333e-05, "loss": 5.1738, "step": 240 }, { "epoch": 12.96, "eval_accuracy": 0.03333333333333333, "eval_loss": 5.408351421356201, "eval_runtime": 4.1793, "eval_samples_per_second": 143.566, "eval_steps_per_second": 4.546, "step": 243 }, { "epoch": 13.33, "learning_rate": 1.388888888888889e-05, "loss": 5.0966, "step": 250 }, { "epoch": 13.87, "learning_rate": 1.4444444444444444e-05, "loss": 5.0222, "step": 260 }, { "epoch": 13.97, "eval_accuracy": 0.045, "eval_loss": 5.3320746421813965, "eval_runtime": 4.1797, "eval_samples_per_second": 143.55, "eval_steps_per_second": 4.546, "step": 262 }, { "epoch": 14.4, "learning_rate": 1.5e-05, "loss": 4.913, "step": 270 }, { "epoch": 14.93, "learning_rate": 1.5555555555555555e-05, "loss": 4.8594, "step": 280 }, { "epoch": 14.99, "eval_accuracy": 0.05333333333333334, "eval_loss": 5.248490333557129, "eval_runtime": 4.1704, "eval_samples_per_second": 143.871, "eval_steps_per_second": 4.556, "step": 281 }, { "epoch": 15.47, "learning_rate": 1.6111111111111115e-05, "loss": 4.7591, "step": 290 }, { "epoch": 16.0, "learning_rate": 1.6666666666666667e-05, "loss": 4.7441, "step": 300 }, { "epoch": 16.0, "eval_accuracy": 0.065, "eval_loss": 5.150908946990967, "eval_runtime": 4.1485, "eval_samples_per_second": 144.629, "eval_steps_per_second": 4.58, "step": 300 }, { "epoch": 16.53, "learning_rate": 1.7222222222222224e-05, "loss": 4.5946, "step": 310 }, { "epoch": 16.96, "eval_accuracy": 0.07166666666666667, "eval_loss": 5.070082187652588, "eval_runtime": 4.1543, "eval_samples_per_second": 144.43, "eval_steps_per_second": 4.574, "step": 318 }, { "epoch": 17.07, "learning_rate": 1.777777777777778e-05, "loss": 4.4995, "step": 320 }, { "epoch": 17.6, "learning_rate": 1.8333333333333333e-05, "loss": 4.3382, "step": 330 }, { "epoch": 17.97, "eval_accuracy": 0.08666666666666667, "eval_loss": 4.976734161376953, "eval_runtime": 4.158, "eval_samples_per_second": 144.301, "eval_steps_per_second": 4.57, "step": 337 }, { "epoch": 18.13, "learning_rate": 1.888888888888889e-05, "loss": 4.3477, "step": 340 }, { "epoch": 18.67, "learning_rate": 1.9444444444444445e-05, "loss": 4.2008, "step": 350 }, { "epoch": 18.99, "eval_accuracy": 0.105, "eval_loss": 4.862234115600586, "eval_runtime": 4.1637, "eval_samples_per_second": 144.103, "eval_steps_per_second": 4.563, "step": 356 }, { "epoch": 19.2, "learning_rate": 2e-05, "loss": 4.0491, "step": 360 }, { "epoch": 19.73, "learning_rate": 2.0555555555555555e-05, "loss": 4.0563, "step": 370 }, { "epoch": 20.0, "eval_accuracy": 0.10333333333333333, "eval_loss": 4.772627830505371, "eval_runtime": 4.1762, "eval_samples_per_second": 143.672, "eval_steps_per_second": 4.55, "step": 375 }, { "epoch": 20.27, "learning_rate": 2.111111111111111e-05, "loss": 3.8101, "step": 380 }, { "epoch": 20.8, "learning_rate": 2.1666666666666667e-05, "loss": 3.8064, "step": 390 }, { "epoch": 20.96, "eval_accuracy": 0.115, "eval_loss": 4.689815044403076, "eval_runtime": 4.151, "eval_samples_per_second": 144.542, "eval_steps_per_second": 4.577, "step": 393 }, { "epoch": 21.33, "learning_rate": 2.2222222222222223e-05, "loss": 3.6083, "step": 400 }, { "epoch": 21.87, "learning_rate": 2.277777777777778e-05, "loss": 3.5584, "step": 410 }, { "epoch": 21.97, "eval_accuracy": 0.125, "eval_loss": 4.599685192108154, "eval_runtime": 4.1649, "eval_samples_per_second": 144.062, "eval_steps_per_second": 4.562, "step": 412 }, { "epoch": 22.4, "learning_rate": 2.3333333333333336e-05, "loss": 3.4029, "step": 420 }, { "epoch": 22.93, "learning_rate": 2.3888888888888892e-05, "loss": 3.3377, "step": 430 }, { "epoch": 22.99, "eval_accuracy": 0.13666666666666666, "eval_loss": 4.484786510467529, "eval_runtime": 4.165, "eval_samples_per_second": 144.057, "eval_steps_per_second": 4.562, "step": 431 }, { "epoch": 23.47, "learning_rate": 2.4444444444444445e-05, "loss": 3.1633, "step": 440 }, { "epoch": 24.0, "learning_rate": 2.5e-05, "loss": 3.1119, "step": 450 }, { "epoch": 24.0, "eval_accuracy": 0.15333333333333332, "eval_loss": 4.405167102813721, "eval_runtime": 4.1901, "eval_samples_per_second": 143.196, "eval_steps_per_second": 4.535, "step": 450 }, { "epoch": 24.53, "learning_rate": 2.5555555555555554e-05, "loss": 2.8686, "step": 460 }, { "epoch": 24.96, "eval_accuracy": 0.15, "eval_loss": 4.37052583694458, "eval_runtime": 4.1543, "eval_samples_per_second": 144.427, "eval_steps_per_second": 4.574, "step": 468 }, { "epoch": 25.07, "learning_rate": 2.6111111111111114e-05, "loss": 3.0138, "step": 470 }, { "epoch": 25.6, "learning_rate": 2.6666666666666667e-05, "loss": 2.7649, "step": 480 }, { "epoch": 25.97, "eval_accuracy": 0.165, "eval_loss": 4.297973155975342, "eval_runtime": 4.1559, "eval_samples_per_second": 144.373, "eval_steps_per_second": 4.572, "step": 487 }, { "epoch": 26.13, "learning_rate": 2.7222222222222223e-05, "loss": 2.6887, "step": 490 }, { "epoch": 26.67, "learning_rate": 2.777777777777778e-05, "loss": 2.5698, "step": 500 }, { "epoch": 26.99, "eval_accuracy": 0.17666666666666667, "eval_loss": 4.236337184906006, "eval_runtime": 4.1581, "eval_samples_per_second": 144.296, "eval_steps_per_second": 4.569, "step": 506 }, { "epoch": 27.2, "learning_rate": 2.8333333333333335e-05, "loss": 2.5301, "step": 510 }, { "epoch": 27.73, "learning_rate": 2.8888888888888888e-05, "loss": 2.4344, "step": 520 }, { "epoch": 28.0, "eval_accuracy": 0.17666666666666667, "eval_loss": 4.17328405380249, "eval_runtime": 4.1589, "eval_samples_per_second": 144.269, "eval_steps_per_second": 4.569, "step": 525 }, { "epoch": 28.27, "learning_rate": 2.9444444444444448e-05, "loss": 2.2843, "step": 530 }, { "epoch": 28.8, "learning_rate": 3e-05, "loss": 2.2186, "step": 540 }, { "epoch": 28.96, "eval_accuracy": 0.17333333333333334, "eval_loss": 4.178333282470703, "eval_runtime": 4.1547, "eval_samples_per_second": 144.413, "eval_steps_per_second": 4.573, "step": 543 }, { "epoch": 29.33, "learning_rate": 3.055555555555556e-05, "loss": 2.1025, "step": 550 }, { "epoch": 29.87, "learning_rate": 3.111111111111111e-05, "loss": 2.0227, "step": 560 }, { "epoch": 29.97, "eval_accuracy": 0.18, "eval_loss": 4.13058614730835, "eval_runtime": 4.1618, "eval_samples_per_second": 144.168, "eval_steps_per_second": 4.565, "step": 562 }, { "epoch": 30.4, "learning_rate": 3.1666666666666666e-05, "loss": 1.8851, "step": 570 }, { "epoch": 30.93, "learning_rate": 3.222222222222223e-05, "loss": 1.9153, "step": 580 }, { "epoch": 30.99, "eval_accuracy": 0.175, "eval_loss": 4.094816207885742, "eval_runtime": 4.1706, "eval_samples_per_second": 143.864, "eval_steps_per_second": 4.556, "step": 581 }, { "epoch": 31.47, "learning_rate": 3.277777777777778e-05, "loss": 1.7959, "step": 590 }, { "epoch": 32.0, "learning_rate": 3.3333333333333335e-05, "loss": 1.7363, "step": 600 }, { "epoch": 32.0, "eval_accuracy": 0.17833333333333334, "eval_loss": 4.061172008514404, "eval_runtime": 4.1527, "eval_samples_per_second": 144.483, "eval_steps_per_second": 4.575, "step": 600 }, { "epoch": 32.53, "learning_rate": 3.388888888888889e-05, "loss": 1.6171, "step": 610 }, { "epoch": 32.96, "eval_accuracy": 0.185, "eval_loss": 4.020925521850586, "eval_runtime": 4.1794, "eval_samples_per_second": 143.562, "eval_steps_per_second": 4.546, "step": 618 }, { "epoch": 33.07, "learning_rate": 3.444444444444445e-05, "loss": 1.5284, "step": 620 }, { "epoch": 33.6, "learning_rate": 3.5e-05, "loss": 1.4865, "step": 630 }, { "epoch": 33.97, "eval_accuracy": 0.185, "eval_loss": 4.019384860992432, "eval_runtime": 4.1555, "eval_samples_per_second": 144.386, "eval_steps_per_second": 4.572, "step": 637 }, { "epoch": 34.13, "learning_rate": 3.555555555555556e-05, "loss": 1.4216, "step": 640 }, { "epoch": 34.67, "learning_rate": 3.611111111111111e-05, "loss": 1.3194, "step": 650 }, { "epoch": 34.99, "eval_accuracy": 0.205, "eval_loss": 3.988067626953125, "eval_runtime": 4.1907, "eval_samples_per_second": 143.174, "eval_steps_per_second": 4.534, "step": 656 }, { "epoch": 35.2, "learning_rate": 3.6666666666666666e-05, "loss": 1.3247, "step": 660 }, { "epoch": 35.73, "learning_rate": 3.722222222222222e-05, "loss": 1.2811, "step": 670 }, { "epoch": 36.0, "eval_accuracy": 0.215, "eval_loss": 3.9861950874328613, "eval_runtime": 4.2139, "eval_samples_per_second": 142.387, "eval_steps_per_second": 4.509, "step": 675 }, { "epoch": 36.27, "learning_rate": 3.777777777777778e-05, "loss": 1.1968, "step": 680 }, { "epoch": 36.8, "learning_rate": 3.8333333333333334e-05, "loss": 1.1703, "step": 690 }, { "epoch": 36.96, "eval_accuracy": 0.20333333333333334, "eval_loss": 3.9904768466949463, "eval_runtime": 4.165, "eval_samples_per_second": 144.057, "eval_steps_per_second": 4.562, "step": 693 }, { "epoch": 37.33, "learning_rate": 3.888888888888889e-05, "loss": 1.1136, "step": 700 }, { "epoch": 37.87, "learning_rate": 3.944444444444445e-05, "loss": 1.114, "step": 710 }, { "epoch": 37.97, "eval_accuracy": 0.21333333333333335, "eval_loss": 3.951385021209717, "eval_runtime": 4.1675, "eval_samples_per_second": 143.97, "eval_steps_per_second": 4.559, "step": 712 }, { "epoch": 38.4, "learning_rate": 4e-05, "loss": 1.0194, "step": 720 }, { "epoch": 38.93, "learning_rate": 4.055555555555556e-05, "loss": 0.9645, "step": 730 }, { "epoch": 38.99, "eval_accuracy": 0.20666666666666667, "eval_loss": 3.9677815437316895, "eval_runtime": 4.1678, "eval_samples_per_second": 143.96, "eval_steps_per_second": 4.559, "step": 731 }, { "epoch": 39.47, "learning_rate": 4.111111111111111e-05, "loss": 0.9037, "step": 740 }, { "epoch": 40.0, "learning_rate": 4.166666666666667e-05, "loss": 0.8976, "step": 750 }, { "epoch": 40.0, "eval_accuracy": 0.21666666666666667, "eval_loss": 3.987384080886841, "eval_runtime": 4.1794, "eval_samples_per_second": 143.56, "eval_steps_per_second": 4.546, "step": 750 }, { "epoch": 40.53, "learning_rate": 4.222222222222222e-05, "loss": 0.8147, "step": 760 }, { "epoch": 40.96, "eval_accuracy": 0.20833333333333334, "eval_loss": 3.925668954849243, "eval_runtime": 4.2286, "eval_samples_per_second": 141.892, "eval_steps_per_second": 4.493, "step": 768 }, { "epoch": 41.07, "learning_rate": 4.277777777777778e-05, "loss": 0.8449, "step": 770 }, { "epoch": 41.6, "learning_rate": 4.3333333333333334e-05, "loss": 0.7239, "step": 780 }, { "epoch": 41.97, "eval_accuracy": 0.22166666666666668, "eval_loss": 3.9394490718841553, "eval_runtime": 4.2239, "eval_samples_per_second": 142.05, "eval_steps_per_second": 4.498, "step": 787 }, { "epoch": 42.13, "learning_rate": 4.388888888888889e-05, "loss": 0.7847, "step": 790 }, { "epoch": 42.67, "learning_rate": 4.4444444444444447e-05, "loss": 0.7732, "step": 800 }, { "epoch": 42.99, "eval_accuracy": 0.215, "eval_loss": 3.9472830295562744, "eval_runtime": 4.1769, "eval_samples_per_second": 143.646, "eval_steps_per_second": 4.549, "step": 806 }, { "epoch": 43.2, "learning_rate": 4.5e-05, "loss": 0.7159, "step": 810 }, { "epoch": 43.73, "learning_rate": 4.555555555555556e-05, "loss": 0.7009, "step": 820 }, { "epoch": 44.0, "eval_accuracy": 0.215, "eval_loss": 3.946096897125244, "eval_runtime": 4.1968, "eval_samples_per_second": 142.965, "eval_steps_per_second": 4.527, "step": 825 }, { "epoch": 44.27, "learning_rate": 4.6111111111111115e-05, "loss": 0.663, "step": 830 }, { "epoch": 44.8, "learning_rate": 4.666666666666667e-05, "loss": 0.5945, "step": 840 }, { "epoch": 44.96, "eval_accuracy": 0.21333333333333335, "eval_loss": 4.0206685066223145, "eval_runtime": 4.207, "eval_samples_per_second": 142.62, "eval_steps_per_second": 4.516, "step": 843 }, { "epoch": 45.33, "learning_rate": 4.722222222222222e-05, "loss": 0.5903, "step": 850 }, { "epoch": 45.87, "learning_rate": 4.7777777777777784e-05, "loss": 0.555, "step": 860 }, { "epoch": 45.97, "eval_accuracy": 0.20833333333333334, "eval_loss": 4.035262107849121, "eval_runtime": 4.1806, "eval_samples_per_second": 143.522, "eval_steps_per_second": 4.545, "step": 862 }, { "epoch": 46.4, "learning_rate": 4.8333333333333334e-05, "loss": 0.5768, "step": 870 }, { "epoch": 46.93, "learning_rate": 4.888888888888889e-05, "loss": 0.5241, "step": 880 }, { "epoch": 46.99, "eval_accuracy": 0.21666666666666667, "eval_loss": 4.023153305053711, "eval_runtime": 4.1743, "eval_samples_per_second": 143.736, "eval_steps_per_second": 4.552, "step": 881 }, { "epoch": 47.47, "learning_rate": 4.9444444444444446e-05, "loss": 0.5324, "step": 890 }, { "epoch": 48.0, "learning_rate": 5e-05, "loss": 0.4789, "step": 900 }, { "epoch": 48.0, "eval_accuracy": 0.22, "eval_loss": 4.002644062042236, "eval_runtime": 4.1605, "eval_samples_per_second": 144.215, "eval_steps_per_second": 4.567, "step": 900 }, { "epoch": 48.53, "learning_rate": 4.9938271604938276e-05, "loss": 0.4284, "step": 910 }, { "epoch": 48.96, "eval_accuracy": 0.22, "eval_loss": 4.0031256675720215, "eval_runtime": 4.1788, "eval_samples_per_second": 143.582, "eval_steps_per_second": 4.547, "step": 918 }, { "epoch": 49.07, "learning_rate": 4.987654320987655e-05, "loss": 0.5321, "step": 920 }, { "epoch": 49.6, "learning_rate": 4.981481481481482e-05, "loss": 0.4701, "step": 930 }, { "epoch": 49.97, "eval_accuracy": 0.215, "eval_loss": 4.057220935821533, "eval_runtime": 4.1731, "eval_samples_per_second": 143.779, "eval_steps_per_second": 4.553, "step": 937 }, { "epoch": 50.13, "learning_rate": 4.9753086419753084e-05, "loss": 0.491, "step": 940 }, { "epoch": 50.67, "learning_rate": 4.969135802469136e-05, "loss": 0.4501, "step": 950 }, { "epoch": 50.99, "eval_accuracy": 0.215, "eval_loss": 4.087738513946533, "eval_runtime": 4.1876, "eval_samples_per_second": 143.279, "eval_steps_per_second": 4.537, "step": 956 }, { "epoch": 51.2, "learning_rate": 4.962962962962963e-05, "loss": 0.4453, "step": 960 }, { "epoch": 51.73, "learning_rate": 4.9567901234567905e-05, "loss": 0.3966, "step": 970 }, { "epoch": 52.0, "eval_accuracy": 0.21666666666666667, "eval_loss": 4.020733833312988, "eval_runtime": 4.1529, "eval_samples_per_second": 144.478, "eval_steps_per_second": 4.575, "step": 975 }, { "epoch": 52.27, "learning_rate": 4.950617283950618e-05, "loss": 0.4295, "step": 980 }, { "epoch": 52.8, "learning_rate": 4.9444444444444446e-05, "loss": 0.3564, "step": 990 }, { "epoch": 52.96, "eval_accuracy": 0.215, "eval_loss": 4.082664966583252, "eval_runtime": 4.1603, "eval_samples_per_second": 144.219, "eval_steps_per_second": 4.567, "step": 993 }, { "epoch": 53.33, "learning_rate": 4.938271604938271e-05, "loss": 0.4013, "step": 1000 }, { "epoch": 53.87, "learning_rate": 4.932098765432099e-05, "loss": 0.3472, "step": 1010 }, { "epoch": 53.97, "eval_accuracy": 0.235, "eval_loss": 4.090172290802002, "eval_runtime": 4.1532, "eval_samples_per_second": 144.465, "eval_steps_per_second": 4.575, "step": 1012 }, { "epoch": 54.4, "learning_rate": 4.925925925925926e-05, "loss": 0.383, "step": 1020 }, { "epoch": 54.93, "learning_rate": 4.9197530864197535e-05, "loss": 0.3731, "step": 1030 }, { "epoch": 54.99, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.095271587371826, "eval_runtime": 4.2172, "eval_samples_per_second": 142.273, "eval_steps_per_second": 4.505, "step": 1031 }, { "epoch": 55.47, "learning_rate": 4.913580246913581e-05, "loss": 0.3237, "step": 1040 }, { "epoch": 56.0, "learning_rate": 4.9074074074074075e-05, "loss": 0.3161, "step": 1050 }, { "epoch": 56.0, "eval_accuracy": 0.20333333333333334, "eval_loss": 4.165963172912598, "eval_runtime": 4.2197, "eval_samples_per_second": 142.19, "eval_steps_per_second": 4.503, "step": 1050 }, { "epoch": 56.53, "learning_rate": 4.901234567901235e-05, "loss": 0.3352, "step": 1060 }, { "epoch": 56.96, "eval_accuracy": 0.22166666666666668, "eval_loss": 4.115335464477539, "eval_runtime": 4.1813, "eval_samples_per_second": 143.495, "eval_steps_per_second": 4.544, "step": 1068 }, { "epoch": 57.07, "learning_rate": 4.8950617283950616e-05, "loss": 0.3455, "step": 1070 }, { "epoch": 57.6, "learning_rate": 4.888888888888889e-05, "loss": 0.3317, "step": 1080 }, { "epoch": 57.97, "eval_accuracy": 0.21666666666666667, "eval_loss": 4.1095662117004395, "eval_runtime": 4.2026, "eval_samples_per_second": 142.768, "eval_steps_per_second": 4.521, "step": 1087 }, { "epoch": 58.13, "learning_rate": 4.8827160493827164e-05, "loss": 0.3594, "step": 1090 }, { "epoch": 58.67, "learning_rate": 4.876543209876544e-05, "loss": 0.294, "step": 1100 }, { "epoch": 58.99, "eval_accuracy": 0.215, "eval_loss": 4.185626029968262, "eval_runtime": 4.176, "eval_samples_per_second": 143.679, "eval_steps_per_second": 4.55, "step": 1106 }, { "epoch": 59.2, "learning_rate": 4.8703703703703704e-05, "loss": 0.3059, "step": 1110 }, { "epoch": 59.73, "learning_rate": 4.864197530864198e-05, "loss": 0.3299, "step": 1120 }, { "epoch": 60.0, "eval_accuracy": 0.22333333333333333, "eval_loss": 4.1475830078125, "eval_runtime": 4.1797, "eval_samples_per_second": 143.552, "eval_steps_per_second": 4.546, "step": 1125 }, { "epoch": 60.27, "learning_rate": 4.858024691358025e-05, "loss": 0.2965, "step": 1130 }, { "epoch": 60.8, "learning_rate": 4.851851851851852e-05, "loss": 0.2847, "step": 1140 }, { "epoch": 60.96, "eval_accuracy": 0.225, "eval_loss": 4.204588890075684, "eval_runtime": 4.1808, "eval_samples_per_second": 143.515, "eval_steps_per_second": 4.545, "step": 1143 }, { "epoch": 61.33, "learning_rate": 4.845679012345679e-05, "loss": 0.3176, "step": 1150 }, { "epoch": 61.87, "learning_rate": 4.8395061728395067e-05, "loss": 0.2924, "step": 1160 }, { "epoch": 61.97, "eval_accuracy": 0.21833333333333332, "eval_loss": 4.156820297241211, "eval_runtime": 4.1645, "eval_samples_per_second": 144.075, "eval_steps_per_second": 4.562, "step": 1162 }, { "epoch": 62.4, "learning_rate": 4.8333333333333334e-05, "loss": 0.255, "step": 1170 }, { "epoch": 62.93, "learning_rate": 4.827160493827161e-05, "loss": 0.2818, "step": 1180 }, { "epoch": 62.99, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.151925563812256, "eval_runtime": 4.175, "eval_samples_per_second": 143.711, "eval_steps_per_second": 4.551, "step": 1181 }, { "epoch": 63.47, "learning_rate": 4.820987654320988e-05, "loss": 0.2618, "step": 1190 }, { "epoch": 64.0, "learning_rate": 4.814814814814815e-05, "loss": 0.2698, "step": 1200 }, { "epoch": 64.0, "eval_accuracy": 0.215, "eval_loss": 4.227489471435547, "eval_runtime": 4.1793, "eval_samples_per_second": 143.565, "eval_steps_per_second": 4.546, "step": 1200 }, { "epoch": 64.53, "learning_rate": 4.808641975308642e-05, "loss": 0.2579, "step": 1210 }, { "epoch": 64.96, "eval_accuracy": 0.235, "eval_loss": 4.162615776062012, "eval_runtime": 4.1756, "eval_samples_per_second": 143.693, "eval_steps_per_second": 4.55, "step": 1218 }, { "epoch": 65.07, "learning_rate": 4.8024691358024696e-05, "loss": 0.2555, "step": 1220 }, { "epoch": 65.6, "learning_rate": 4.796296296296296e-05, "loss": 0.2597, "step": 1230 }, { "epoch": 65.97, "eval_accuracy": 0.22166666666666668, "eval_loss": 4.227728843688965, "eval_runtime": 4.1826, "eval_samples_per_second": 143.452, "eval_steps_per_second": 4.543, "step": 1237 }, { "epoch": 66.13, "learning_rate": 4.7901234567901237e-05, "loss": 0.1989, "step": 1240 }, { "epoch": 66.67, "learning_rate": 4.783950617283951e-05, "loss": 0.2443, "step": 1250 }, { "epoch": 66.99, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.192920207977295, "eval_runtime": 4.1844, "eval_samples_per_second": 143.39, "eval_steps_per_second": 4.541, "step": 1256 }, { "epoch": 67.2, "learning_rate": 4.7777777777777784e-05, "loss": 0.2686, "step": 1260 }, { "epoch": 67.73, "learning_rate": 4.771604938271605e-05, "loss": 0.2532, "step": 1270 }, { "epoch": 68.0, "eval_accuracy": 0.22333333333333333, "eval_loss": 4.277872085571289, "eval_runtime": 4.1836, "eval_samples_per_second": 143.417, "eval_steps_per_second": 4.542, "step": 1275 }, { "epoch": 68.27, "learning_rate": 4.7654320987654325e-05, "loss": 0.2647, "step": 1280 }, { "epoch": 68.8, "learning_rate": 4.759259259259259e-05, "loss": 0.2305, "step": 1290 }, { "epoch": 68.96, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.244070053100586, "eval_runtime": 4.1785, "eval_samples_per_second": 143.591, "eval_steps_per_second": 4.547, "step": 1293 }, { "epoch": 69.33, "learning_rate": 4.7530864197530866e-05, "loss": 0.2347, "step": 1300 }, { "epoch": 69.87, "learning_rate": 4.746913580246914e-05, "loss": 0.2423, "step": 1310 }, { "epoch": 69.97, "eval_accuracy": 0.22166666666666668, "eval_loss": 4.25825309753418, "eval_runtime": 4.1881, "eval_samples_per_second": 143.264, "eval_steps_per_second": 4.537, "step": 1312 }, { "epoch": 70.4, "learning_rate": 4.740740740740741e-05, "loss": 0.2167, "step": 1320 }, { "epoch": 70.93, "learning_rate": 4.734567901234569e-05, "loss": 0.222, "step": 1330 }, { "epoch": 70.99, "eval_accuracy": 0.23, "eval_loss": 4.293475151062012, "eval_runtime": 4.178, "eval_samples_per_second": 143.61, "eval_steps_per_second": 4.548, "step": 1331 }, { "epoch": 71.47, "learning_rate": 4.7283950617283954e-05, "loss": 0.2781, "step": 1340 }, { "epoch": 72.0, "learning_rate": 4.722222222222222e-05, "loss": 0.2096, "step": 1350 }, { "epoch": 72.0, "eval_accuracy": 0.23, "eval_loss": 4.271422863006592, "eval_runtime": 4.1808, "eval_samples_per_second": 143.513, "eval_steps_per_second": 4.545, "step": 1350 }, { "epoch": 72.53, "learning_rate": 4.7160493827160495e-05, "loss": 0.1776, "step": 1360 }, { "epoch": 72.96, "eval_accuracy": 0.225, "eval_loss": 4.234805583953857, "eval_runtime": 4.1972, "eval_samples_per_second": 142.953, "eval_steps_per_second": 4.527, "step": 1368 }, { "epoch": 73.07, "learning_rate": 4.709876543209877e-05, "loss": 0.1977, "step": 1370 }, { "epoch": 73.6, "learning_rate": 4.703703703703704e-05, "loss": 0.2009, "step": 1380 }, { "epoch": 73.97, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.292957305908203, "eval_runtime": 4.1773, "eval_samples_per_second": 143.633, "eval_steps_per_second": 4.548, "step": 1387 }, { "epoch": 74.13, "learning_rate": 4.6975308641975316e-05, "loss": 0.2292, "step": 1390 }, { "epoch": 74.67, "learning_rate": 4.691358024691358e-05, "loss": 0.2087, "step": 1400 }, { "epoch": 74.99, "eval_accuracy": 0.235, "eval_loss": 4.307061672210693, "eval_runtime": 4.182, "eval_samples_per_second": 143.472, "eval_steps_per_second": 4.543, "step": 1406 }, { "epoch": 75.2, "learning_rate": 4.685185185185185e-05, "loss": 0.2235, "step": 1410 }, { "epoch": 75.73, "learning_rate": 4.6790123456790124e-05, "loss": 0.1818, "step": 1420 }, { "epoch": 76.0, "eval_accuracy": 0.235, "eval_loss": 4.296043872833252, "eval_runtime": 4.1959, "eval_samples_per_second": 142.996, "eval_steps_per_second": 4.528, "step": 1425 }, { "epoch": 76.27, "learning_rate": 4.67283950617284e-05, "loss": 0.2019, "step": 1430 }, { "epoch": 76.8, "learning_rate": 4.666666666666667e-05, "loss": 0.2236, "step": 1440 }, { "epoch": 76.96, "eval_accuracy": 0.24, "eval_loss": 4.290974140167236, "eval_runtime": 4.1784, "eval_samples_per_second": 143.596, "eval_steps_per_second": 4.547, "step": 1443 }, { "epoch": 77.33, "learning_rate": 4.6604938271604945e-05, "loss": 0.2008, "step": 1450 }, { "epoch": 77.87, "learning_rate": 4.654320987654321e-05, "loss": 0.1802, "step": 1460 }, { "epoch": 77.97, "eval_accuracy": 0.25, "eval_loss": 4.289625644683838, "eval_runtime": 4.224, "eval_samples_per_second": 142.044, "eval_steps_per_second": 4.498, "step": 1462 }, { "epoch": 78.4, "learning_rate": 4.648148148148148e-05, "loss": 0.2296, "step": 1470 }, { "epoch": 78.93, "learning_rate": 4.641975308641975e-05, "loss": 0.2037, "step": 1480 }, { "epoch": 78.99, "eval_accuracy": 0.245, "eval_loss": 4.331364154815674, "eval_runtime": 4.2445, "eval_samples_per_second": 141.359, "eval_steps_per_second": 4.476, "step": 1481 }, { "epoch": 79.47, "learning_rate": 4.635802469135803e-05, "loss": 0.1697, "step": 1490 }, { "epoch": 80.0, "learning_rate": 4.62962962962963e-05, "loss": 0.1912, "step": 1500 }, { "epoch": 80.0, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.261221408843994, "eval_runtime": 4.1986, "eval_samples_per_second": 142.905, "eval_steps_per_second": 4.525, "step": 1500 }, { "epoch": 80.53, "learning_rate": 4.623456790123457e-05, "loss": 0.2305, "step": 1510 }, { "epoch": 80.96, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.279019832611084, "eval_runtime": 4.1994, "eval_samples_per_second": 142.879, "eval_steps_per_second": 4.524, "step": 1518 }, { "epoch": 81.07, "learning_rate": 4.617283950617284e-05, "loss": 0.1808, "step": 1520 }, { "epoch": 81.6, "learning_rate": 4.6111111111111115e-05, "loss": 0.2188, "step": 1530 }, { "epoch": 81.97, "eval_accuracy": 0.22166666666666668, "eval_loss": 4.306877613067627, "eval_runtime": 4.1818, "eval_samples_per_second": 143.477, "eval_steps_per_second": 4.543, "step": 1537 }, { "epoch": 82.13, "learning_rate": 4.604938271604938e-05, "loss": 0.2251, "step": 1540 }, { "epoch": 82.67, "learning_rate": 4.5987654320987656e-05, "loss": 0.1639, "step": 1550 }, { "epoch": 82.99, "eval_accuracy": 0.21833333333333332, "eval_loss": 4.353877067565918, "eval_runtime": 4.2313, "eval_samples_per_second": 141.801, "eval_steps_per_second": 4.49, "step": 1556 }, { "epoch": 83.2, "learning_rate": 4.592592592592593e-05, "loss": 0.1407, "step": 1560 }, { "epoch": 83.73, "learning_rate": 4.58641975308642e-05, "loss": 0.1741, "step": 1570 }, { "epoch": 84.0, "eval_accuracy": 0.225, "eval_loss": 4.321075916290283, "eval_runtime": 4.2392, "eval_samples_per_second": 141.536, "eval_steps_per_second": 4.482, "step": 1575 }, { "epoch": 84.27, "learning_rate": 4.580246913580247e-05, "loss": 0.1926, "step": 1580 }, { "epoch": 84.8, "learning_rate": 4.5740740740740745e-05, "loss": 0.1937, "step": 1590 }, { "epoch": 84.96, "eval_accuracy": 0.21166666666666667, "eval_loss": 4.357635498046875, "eval_runtime": 4.1967, "eval_samples_per_second": 142.97, "eval_steps_per_second": 4.527, "step": 1593 }, { "epoch": 85.33, "learning_rate": 4.567901234567901e-05, "loss": 0.1932, "step": 1600 }, { "epoch": 85.87, "learning_rate": 4.5617283950617285e-05, "loss": 0.1712, "step": 1610 }, { "epoch": 85.97, "eval_accuracy": 0.22333333333333333, "eval_loss": 4.3434247970581055, "eval_runtime": 4.2166, "eval_samples_per_second": 142.294, "eval_steps_per_second": 4.506, "step": 1612 }, { "epoch": 86.4, "learning_rate": 4.555555555555556e-05, "loss": 0.1574, "step": 1620 }, { "epoch": 86.93, "learning_rate": 4.5493827160493826e-05, "loss": 0.1665, "step": 1630 }, { "epoch": 86.99, "eval_accuracy": 0.21166666666666667, "eval_loss": 4.334897041320801, "eval_runtime": 4.1946, "eval_samples_per_second": 143.042, "eval_steps_per_second": 4.53, "step": 1631 }, { "epoch": 87.47, "learning_rate": 4.54320987654321e-05, "loss": 0.1565, "step": 1640 }, { "epoch": 88.0, "learning_rate": 4.5370370370370374e-05, "loss": 0.1846, "step": 1650 }, { "epoch": 88.0, "eval_accuracy": 0.235, "eval_loss": 4.417025566101074, "eval_runtime": 4.202, "eval_samples_per_second": 142.79, "eval_steps_per_second": 4.522, "step": 1650 }, { "epoch": 88.53, "learning_rate": 4.530864197530865e-05, "loss": 0.1827, "step": 1660 }, { "epoch": 88.96, "eval_accuracy": 0.23, "eval_loss": 4.335045337677002, "eval_runtime": 4.1833, "eval_samples_per_second": 143.428, "eval_steps_per_second": 4.542, "step": 1668 }, { "epoch": 89.07, "learning_rate": 4.5246913580246914e-05, "loss": 0.1504, "step": 1670 }, { "epoch": 89.6, "learning_rate": 4.518518518518519e-05, "loss": 0.1591, "step": 1680 }, { "epoch": 89.97, "eval_accuracy": 0.215, "eval_loss": 4.339655876159668, "eval_runtime": 4.1906, "eval_samples_per_second": 143.178, "eval_steps_per_second": 4.534, "step": 1687 }, { "epoch": 90.13, "learning_rate": 4.5123456790123455e-05, "loss": 0.1328, "step": 1690 }, { "epoch": 90.67, "learning_rate": 4.506172839506173e-05, "loss": 0.1508, "step": 1700 }, { "epoch": 90.99, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.3272809982299805, "eval_runtime": 4.1836, "eval_samples_per_second": 143.418, "eval_steps_per_second": 4.542, "step": 1706 }, { "epoch": 91.2, "learning_rate": 4.5e-05, "loss": 0.1493, "step": 1710 }, { "epoch": 91.73, "learning_rate": 4.493827160493828e-05, "loss": 0.1808, "step": 1720 }, { "epoch": 92.0, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.3314714431762695, "eval_runtime": 4.1951, "eval_samples_per_second": 143.024, "eval_steps_per_second": 4.529, "step": 1725 }, { "epoch": 92.27, "learning_rate": 4.4876543209876544e-05, "loss": 0.1551, "step": 1730 }, { "epoch": 92.8, "learning_rate": 4.481481481481482e-05, "loss": 0.17, "step": 1740 }, { "epoch": 92.96, "eval_accuracy": 0.24, "eval_loss": 4.275998592376709, "eval_runtime": 4.1877, "eval_samples_per_second": 143.278, "eval_steps_per_second": 4.537, "step": 1743 }, { "epoch": 93.33, "learning_rate": 4.4753086419753084e-05, "loss": 0.1676, "step": 1750 }, { "epoch": 93.87, "learning_rate": 4.469135802469136e-05, "loss": 0.14, "step": 1760 }, { "epoch": 93.97, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.314431190490723, "eval_runtime": 4.222, "eval_samples_per_second": 142.112, "eval_steps_per_second": 4.5, "step": 1762 }, { "epoch": 94.4, "learning_rate": 4.462962962962963e-05, "loss": 0.1526, "step": 1770 }, { "epoch": 94.93, "learning_rate": 4.4567901234567906e-05, "loss": 0.1734, "step": 1780 }, { "epoch": 94.99, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.366744041442871, "eval_runtime": 4.2453, "eval_samples_per_second": 141.334, "eval_steps_per_second": 4.476, "step": 1781 }, { "epoch": 95.47, "learning_rate": 4.450617283950618e-05, "loss": 0.1472, "step": 1790 }, { "epoch": 96.0, "learning_rate": 4.4444444444444447e-05, "loss": 0.1593, "step": 1800 }, { "epoch": 96.0, "eval_accuracy": 0.225, "eval_loss": 4.390308380126953, "eval_runtime": 4.2118, "eval_samples_per_second": 142.456, "eval_steps_per_second": 4.511, "step": 1800 }, { "epoch": 96.53, "learning_rate": 4.4382716049382714e-05, "loss": 0.1523, "step": 1810 }, { "epoch": 96.96, "eval_accuracy": 0.24, "eval_loss": 4.331397533416748, "eval_runtime": 4.1925, "eval_samples_per_second": 143.113, "eval_steps_per_second": 4.532, "step": 1818 }, { "epoch": 97.07, "learning_rate": 4.432098765432099e-05, "loss": 0.1665, "step": 1820 }, { "epoch": 97.6, "learning_rate": 4.425925925925926e-05, "loss": 0.1599, "step": 1830 }, { "epoch": 97.97, "eval_accuracy": 0.23, "eval_loss": 4.411539077758789, "eval_runtime": 4.1869, "eval_samples_per_second": 143.306, "eval_steps_per_second": 4.538, "step": 1837 }, { "epoch": 98.13, "learning_rate": 4.4197530864197535e-05, "loss": 0.1319, "step": 1840 }, { "epoch": 98.67, "learning_rate": 4.413580246913581e-05, "loss": 0.1352, "step": 1850 }, { "epoch": 98.99, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.362613201141357, "eval_runtime": 4.181, "eval_samples_per_second": 143.506, "eval_steps_per_second": 4.544, "step": 1856 }, { "epoch": 99.2, "learning_rate": 4.4074074074074076e-05, "loss": 0.1625, "step": 1860 }, { "epoch": 99.73, "learning_rate": 4.401234567901234e-05, "loss": 0.1406, "step": 1870 }, { "epoch": 100.0, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.355536937713623, "eval_runtime": 4.2021, "eval_samples_per_second": 142.787, "eval_steps_per_second": 4.522, "step": 1875 }, { "epoch": 100.27, "learning_rate": 4.3950617283950617e-05, "loss": 0.1404, "step": 1880 }, { "epoch": 100.8, "learning_rate": 4.388888888888889e-05, "loss": 0.1486, "step": 1890 }, { "epoch": 100.96, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.311557769775391, "eval_runtime": 4.1983, "eval_samples_per_second": 142.914, "eval_steps_per_second": 4.526, "step": 1893 }, { "epoch": 101.33, "learning_rate": 4.3827160493827164e-05, "loss": 0.1458, "step": 1900 }, { "epoch": 101.87, "learning_rate": 4.376543209876544e-05, "loss": 0.149, "step": 1910 }, { "epoch": 101.97, "eval_accuracy": 0.23, "eval_loss": 4.389366149902344, "eval_runtime": 4.2353, "eval_samples_per_second": 141.666, "eval_steps_per_second": 4.486, "step": 1912 }, { "epoch": 102.4, "learning_rate": 4.3703703703703705e-05, "loss": 0.1317, "step": 1920 }, { "epoch": 102.93, "learning_rate": 4.364197530864197e-05, "loss": 0.115, "step": 1930 }, { "epoch": 102.99, "eval_accuracy": 0.22333333333333333, "eval_loss": 4.3754754066467285, "eval_runtime": 4.2516, "eval_samples_per_second": 141.122, "eval_steps_per_second": 4.469, "step": 1931 }, { "epoch": 103.47, "learning_rate": 4.3580246913580246e-05, "loss": 0.1476, "step": 1940 }, { "epoch": 104.0, "learning_rate": 4.351851851851852e-05, "loss": 0.1301, "step": 1950 }, { "epoch": 104.0, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.376509189605713, "eval_runtime": 4.2304, "eval_samples_per_second": 141.83, "eval_steps_per_second": 4.491, "step": 1950 }, { "epoch": 104.53, "learning_rate": 4.345679012345679e-05, "loss": 0.1429, "step": 1960 }, { "epoch": 104.96, "eval_accuracy": 0.235, "eval_loss": 4.402740955352783, "eval_runtime": 4.1986, "eval_samples_per_second": 142.905, "eval_steps_per_second": 4.525, "step": 1968 }, { "epoch": 105.07, "learning_rate": 4.339506172839507e-05, "loss": 0.123, "step": 1970 }, { "epoch": 105.6, "learning_rate": 4.3333333333333334e-05, "loss": 0.1209, "step": 1980 }, { "epoch": 105.97, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.380291938781738, "eval_runtime": 4.2686, "eval_samples_per_second": 140.56, "eval_steps_per_second": 4.451, "step": 1987 }, { "epoch": 106.13, "learning_rate": 4.327160493827161e-05, "loss": 0.1189, "step": 1990 }, { "epoch": 106.67, "learning_rate": 4.3209876543209875e-05, "loss": 0.1287, "step": 2000 }, { "epoch": 106.99, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.323451995849609, "eval_runtime": 5.6461, "eval_samples_per_second": 106.268, "eval_steps_per_second": 3.365, "step": 2006 }, { "epoch": 107.2, "learning_rate": 4.314814814814815e-05, "loss": 0.1318, "step": 2010 }, { "epoch": 107.73, "learning_rate": 4.308641975308642e-05, "loss": 0.1318, "step": 2020 }, { "epoch": 108.0, "eval_accuracy": 0.24, "eval_loss": 4.348374843597412, "eval_runtime": 4.2065, "eval_samples_per_second": 142.635, "eval_steps_per_second": 4.517, "step": 2025 }, { "epoch": 108.27, "learning_rate": 4.3024691358024696e-05, "loss": 0.1328, "step": 2030 }, { "epoch": 108.8, "learning_rate": 4.296296296296296e-05, "loss": 0.1136, "step": 2040 }, { "epoch": 108.96, "eval_accuracy": 0.225, "eval_loss": 4.39766263961792, "eval_runtime": 4.2019, "eval_samples_per_second": 142.792, "eval_steps_per_second": 4.522, "step": 2043 }, { "epoch": 109.33, "learning_rate": 4.290123456790124e-05, "loss": 0.1218, "step": 2050 }, { "epoch": 109.87, "learning_rate": 4.283950617283951e-05, "loss": 0.1326, "step": 2060 }, { "epoch": 109.97, "eval_accuracy": 0.22666666666666666, "eval_loss": 4.397821426391602, "eval_runtime": 4.1836, "eval_samples_per_second": 143.416, "eval_steps_per_second": 4.542, "step": 2062 }, { "epoch": 110.4, "learning_rate": 4.277777777777778e-05, "loss": 0.1223, "step": 2070 }, { "epoch": 110.93, "learning_rate": 4.271604938271605e-05, "loss": 0.1415, "step": 2080 }, { "epoch": 110.99, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.321408748626709, "eval_runtime": 4.1873, "eval_samples_per_second": 143.29, "eval_steps_per_second": 4.538, "step": 2081 }, { "epoch": 111.47, "learning_rate": 4.2654320987654325e-05, "loss": 0.1281, "step": 2090 }, { "epoch": 112.0, "learning_rate": 4.259259259259259e-05, "loss": 0.1229, "step": 2100 }, { "epoch": 112.0, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.369870662689209, "eval_runtime": 4.2352, "eval_samples_per_second": 141.67, "eval_steps_per_second": 4.486, "step": 2100 }, { "epoch": 112.53, "learning_rate": 4.2530864197530866e-05, "loss": 0.1004, "step": 2110 }, { "epoch": 112.96, "eval_accuracy": 0.25833333333333336, "eval_loss": 4.382783889770508, "eval_runtime": 4.2396, "eval_samples_per_second": 141.524, "eval_steps_per_second": 4.482, "step": 2118 }, { "epoch": 113.07, "learning_rate": 4.246913580246914e-05, "loss": 0.1238, "step": 2120 }, { "epoch": 113.6, "learning_rate": 4.240740740740741e-05, "loss": 0.0961, "step": 2130 }, { "epoch": 113.97, "eval_accuracy": 0.25166666666666665, "eval_loss": 4.356354713439941, "eval_runtime": 4.2272, "eval_samples_per_second": 141.937, "eval_steps_per_second": 4.495, "step": 2137 }, { "epoch": 114.13, "learning_rate": 4.234567901234568e-05, "loss": 0.1181, "step": 2140 }, { "epoch": 114.67, "learning_rate": 4.2283950617283955e-05, "loss": 0.1132, "step": 2150 }, { "epoch": 114.99, "eval_accuracy": 0.25333333333333335, "eval_loss": 4.338386058807373, "eval_runtime": 4.2183, "eval_samples_per_second": 142.239, "eval_steps_per_second": 4.504, "step": 2156 }, { "epoch": 115.2, "learning_rate": 4.222222222222222e-05, "loss": 0.1232, "step": 2160 }, { "epoch": 115.73, "learning_rate": 4.2160493827160495e-05, "loss": 0.1166, "step": 2170 }, { "epoch": 116.0, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.415248394012451, "eval_runtime": 4.233, "eval_samples_per_second": 141.743, "eval_steps_per_second": 4.489, "step": 2175 }, { "epoch": 116.27, "learning_rate": 4.209876543209877e-05, "loss": 0.129, "step": 2180 }, { "epoch": 116.8, "learning_rate": 4.203703703703704e-05, "loss": 0.1193, "step": 2190 }, { "epoch": 116.96, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.363390922546387, "eval_runtime": 4.2449, "eval_samples_per_second": 141.345, "eval_steps_per_second": 4.476, "step": 2193 }, { "epoch": 117.33, "learning_rate": 4.197530864197531e-05, "loss": 0.1348, "step": 2200 }, { "epoch": 117.87, "learning_rate": 4.1913580246913584e-05, "loss": 0.096, "step": 2210 }, { "epoch": 117.97, "eval_accuracy": 0.235, "eval_loss": 4.382623672485352, "eval_runtime": 4.236, "eval_samples_per_second": 141.643, "eval_steps_per_second": 4.485, "step": 2212 }, { "epoch": 118.4, "learning_rate": 4.185185185185185e-05, "loss": 0.1375, "step": 2220 }, { "epoch": 118.93, "learning_rate": 4.1790123456790124e-05, "loss": 0.1158, "step": 2230 }, { "epoch": 118.99, "eval_accuracy": 0.235, "eval_loss": 4.452427387237549, "eval_runtime": 4.1862, "eval_samples_per_second": 143.327, "eval_steps_per_second": 4.539, "step": 2231 }, { "epoch": 119.47, "learning_rate": 4.17283950617284e-05, "loss": 0.0863, "step": 2240 }, { "epoch": 120.0, "learning_rate": 4.166666666666667e-05, "loss": 0.099, "step": 2250 }, { "epoch": 120.0, "eval_accuracy": 0.22333333333333333, "eval_loss": 4.497795581817627, "eval_runtime": 4.2433, "eval_samples_per_second": 141.399, "eval_steps_per_second": 4.478, "step": 2250 }, { "epoch": 120.53, "learning_rate": 4.1604938271604946e-05, "loss": 0.1065, "step": 2260 }, { "epoch": 120.96, "eval_accuracy": 0.24, "eval_loss": 4.412367343902588, "eval_runtime": 4.2487, "eval_samples_per_second": 141.22, "eval_steps_per_second": 4.472, "step": 2268 }, { "epoch": 121.07, "learning_rate": 4.154320987654321e-05, "loss": 0.1202, "step": 2270 }, { "epoch": 121.6, "learning_rate": 4.148148148148148e-05, "loss": 0.129, "step": 2280 }, { "epoch": 121.97, "eval_accuracy": 0.235, "eval_loss": 4.381356239318848, "eval_runtime": 4.2226, "eval_samples_per_second": 142.092, "eval_steps_per_second": 4.5, "step": 2287 }, { "epoch": 122.13, "learning_rate": 4.1419753086419754e-05, "loss": 0.1152, "step": 2290 }, { "epoch": 122.67, "learning_rate": 4.135802469135803e-05, "loss": 0.1047, "step": 2300 }, { "epoch": 122.99, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.366286754608154, "eval_runtime": 4.1923, "eval_samples_per_second": 143.119, "eval_steps_per_second": 4.532, "step": 2306 }, { "epoch": 123.2, "learning_rate": 4.12962962962963e-05, "loss": 0.1075, "step": 2310 }, { "epoch": 123.73, "learning_rate": 4.1234567901234575e-05, "loss": 0.101, "step": 2320 }, { "epoch": 124.0, "eval_accuracy": 0.23, "eval_loss": 4.511256694793701, "eval_runtime": 4.2032, "eval_samples_per_second": 142.748, "eval_steps_per_second": 4.52, "step": 2325 }, { "epoch": 124.27, "learning_rate": 4.117283950617284e-05, "loss": 0.1111, "step": 2330 }, { "epoch": 124.8, "learning_rate": 4.111111111111111e-05, "loss": 0.1076, "step": 2340 }, { "epoch": 124.96, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.455278396606445, "eval_runtime": 4.1874, "eval_samples_per_second": 143.286, "eval_steps_per_second": 4.537, "step": 2343 }, { "epoch": 125.33, "learning_rate": 4.104938271604938e-05, "loss": 0.112, "step": 2350 }, { "epoch": 125.87, "learning_rate": 4.0987654320987657e-05, "loss": 0.1135, "step": 2360 }, { "epoch": 125.97, "eval_accuracy": 0.23, "eval_loss": 4.435062885284424, "eval_runtime": 4.2228, "eval_samples_per_second": 142.087, "eval_steps_per_second": 4.499, "step": 2362 }, { "epoch": 126.4, "learning_rate": 4.092592592592593e-05, "loss": 0.0839, "step": 2370 }, { "epoch": 126.93, "learning_rate": 4.0864197530864204e-05, "loss": 0.1066, "step": 2380 }, { "epoch": 126.99, "eval_accuracy": 0.235, "eval_loss": 4.4874396324157715, "eval_runtime": 4.1945, "eval_samples_per_second": 143.044, "eval_steps_per_second": 4.53, "step": 2381 }, { "epoch": 127.47, "learning_rate": 4.080246913580247e-05, "loss": 0.1007, "step": 2390 }, { "epoch": 128.0, "learning_rate": 4.074074074074074e-05, "loss": 0.1256, "step": 2400 }, { "epoch": 128.0, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.463526725769043, "eval_runtime": 4.2437, "eval_samples_per_second": 141.387, "eval_steps_per_second": 4.477, "step": 2400 }, { "epoch": 128.53, "learning_rate": 4.067901234567901e-05, "loss": 0.0932, "step": 2410 }, { "epoch": 128.96, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.457594394683838, "eval_runtime": 4.2373, "eval_samples_per_second": 141.599, "eval_steps_per_second": 4.484, "step": 2418 }, { "epoch": 129.07, "learning_rate": 4.0617283950617286e-05, "loss": 0.1158, "step": 2420 }, { "epoch": 129.6, "learning_rate": 4.055555555555556e-05, "loss": 0.1189, "step": 2430 }, { "epoch": 129.97, "eval_accuracy": 0.22666666666666666, "eval_loss": 4.577010154724121, "eval_runtime": 4.2164, "eval_samples_per_second": 142.301, "eval_steps_per_second": 4.506, "step": 2437 }, { "epoch": 130.13, "learning_rate": 4.049382716049383e-05, "loss": 0.116, "step": 2440 }, { "epoch": 130.67, "learning_rate": 4.04320987654321e-05, "loss": 0.1096, "step": 2450 }, { "epoch": 130.99, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.492093563079834, "eval_runtime": 4.2174, "eval_samples_per_second": 142.267, "eval_steps_per_second": 4.505, "step": 2456 }, { "epoch": 131.2, "learning_rate": 4.0370370370370374e-05, "loss": 0.1004, "step": 2460 }, { "epoch": 131.73, "learning_rate": 4.030864197530864e-05, "loss": 0.0791, "step": 2470 }, { "epoch": 132.0, "eval_accuracy": 0.22666666666666666, "eval_loss": 4.508973598480225, "eval_runtime": 4.207, "eval_samples_per_second": 142.618, "eval_steps_per_second": 4.516, "step": 2475 }, { "epoch": 132.27, "learning_rate": 4.0246913580246915e-05, "loss": 0.0935, "step": 2480 }, { "epoch": 132.8, "learning_rate": 4.018518518518519e-05, "loss": 0.1152, "step": 2490 }, { "epoch": 132.96, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.457157611846924, "eval_runtime": 4.1941, "eval_samples_per_second": 143.058, "eval_steps_per_second": 4.53, "step": 2493 }, { "epoch": 133.33, "learning_rate": 4.012345679012346e-05, "loss": 0.091, "step": 2500 }, { "epoch": 133.87, "learning_rate": 4.006172839506173e-05, "loss": 0.1264, "step": 2510 }, { "epoch": 133.97, "eval_accuracy": 0.25, "eval_loss": 4.510921001434326, "eval_runtime": 4.2057, "eval_samples_per_second": 142.663, "eval_steps_per_second": 4.518, "step": 2512 }, { "epoch": 134.4, "learning_rate": 4e-05, "loss": 0.0749, "step": 2520 }, { "epoch": 134.93, "learning_rate": 3.993827160493827e-05, "loss": 0.1009, "step": 2530 }, { "epoch": 134.99, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.523574352264404, "eval_runtime": 4.1971, "eval_samples_per_second": 142.957, "eval_steps_per_second": 4.527, "step": 2531 }, { "epoch": 135.47, "learning_rate": 3.9876543209876544e-05, "loss": 0.0964, "step": 2540 }, { "epoch": 136.0, "learning_rate": 3.981481481481482e-05, "loss": 0.0956, "step": 2550 }, { "epoch": 136.0, "eval_accuracy": 0.245, "eval_loss": 4.478328227996826, "eval_runtime": 4.1949, "eval_samples_per_second": 143.029, "eval_steps_per_second": 4.529, "step": 2550 }, { "epoch": 136.53, "learning_rate": 3.975308641975309e-05, "loss": 0.0919, "step": 2560 }, { "epoch": 136.96, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.5484466552734375, "eval_runtime": 4.2364, "eval_samples_per_second": 141.629, "eval_steps_per_second": 4.485, "step": 2568 }, { "epoch": 137.07, "learning_rate": 3.969135802469136e-05, "loss": 0.1154, "step": 2570 }, { "epoch": 137.6, "learning_rate": 3.962962962962963e-05, "loss": 0.1042, "step": 2580 }, { "epoch": 137.97, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.542301654815674, "eval_runtime": 4.2535, "eval_samples_per_second": 141.061, "eval_steps_per_second": 4.467, "step": 2587 }, { "epoch": 138.13, "learning_rate": 3.9567901234567906e-05, "loss": 0.0859, "step": 2590 }, { "epoch": 138.67, "learning_rate": 3.950617283950617e-05, "loss": 0.1039, "step": 2600 }, { "epoch": 138.99, "eval_accuracy": 0.245, "eval_loss": 4.491814136505127, "eval_runtime": 4.1944, "eval_samples_per_second": 143.049, "eval_steps_per_second": 4.53, "step": 2606 }, { "epoch": 139.2, "learning_rate": 3.944444444444445e-05, "loss": 0.0846, "step": 2610 }, { "epoch": 139.73, "learning_rate": 3.938271604938272e-05, "loss": 0.094, "step": 2620 }, { "epoch": 140.0, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.545647144317627, "eval_runtime": 4.1844, "eval_samples_per_second": 143.39, "eval_steps_per_second": 4.541, "step": 2625 }, { "epoch": 140.27, "learning_rate": 3.932098765432099e-05, "loss": 0.0929, "step": 2630 }, { "epoch": 140.8, "learning_rate": 3.925925925925926e-05, "loss": 0.1056, "step": 2640 }, { "epoch": 140.96, "eval_accuracy": 0.245, "eval_loss": 4.521935939788818, "eval_runtime": 4.1914, "eval_samples_per_second": 143.151, "eval_steps_per_second": 4.533, "step": 2643 }, { "epoch": 141.33, "learning_rate": 3.9197530864197535e-05, "loss": 0.1025, "step": 2650 }, { "epoch": 141.87, "learning_rate": 3.91358024691358e-05, "loss": 0.0918, "step": 2660 }, { "epoch": 141.97, "eval_accuracy": 0.245, "eval_loss": 4.52545166015625, "eval_runtime": 4.2013, "eval_samples_per_second": 142.812, "eval_steps_per_second": 4.522, "step": 2662 }, { "epoch": 142.4, "learning_rate": 3.9074074074074076e-05, "loss": 0.098, "step": 2670 }, { "epoch": 142.93, "learning_rate": 3.901234567901234e-05, "loss": 0.0877, "step": 2680 }, { "epoch": 142.99, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.492305278778076, "eval_runtime": 4.1813, "eval_samples_per_second": 143.497, "eval_steps_per_second": 4.544, "step": 2681 }, { "epoch": 143.47, "learning_rate": 3.895061728395062e-05, "loss": 0.0936, "step": 2690 }, { "epoch": 144.0, "learning_rate": 3.888888888888889e-05, "loss": 0.105, "step": 2700 }, { "epoch": 144.0, "eval_accuracy": 0.235, "eval_loss": 4.535154819488525, "eval_runtime": 4.1912, "eval_samples_per_second": 143.158, "eval_steps_per_second": 4.533, "step": 2700 }, { "epoch": 144.53, "learning_rate": 3.8827160493827165e-05, "loss": 0.0892, "step": 2710 }, { "epoch": 144.96, "eval_accuracy": 0.245, "eval_loss": 4.471460819244385, "eval_runtime": 4.1911, "eval_samples_per_second": 143.161, "eval_steps_per_second": 4.533, "step": 2718 }, { "epoch": 145.07, "learning_rate": 3.876543209876544e-05, "loss": 0.1041, "step": 2720 }, { "epoch": 145.6, "learning_rate": 3.8703703703703705e-05, "loss": 0.0963, "step": 2730 }, { "epoch": 145.97, "eval_accuracy": 0.245, "eval_loss": 4.506024360656738, "eval_runtime": 4.1827, "eval_samples_per_second": 143.449, "eval_steps_per_second": 4.543, "step": 2737 }, { "epoch": 146.13, "learning_rate": 3.864197530864197e-05, "loss": 0.089, "step": 2740 }, { "epoch": 146.67, "learning_rate": 3.8580246913580246e-05, "loss": 0.095, "step": 2750 }, { "epoch": 146.99, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.559337615966797, "eval_runtime": 4.1884, "eval_samples_per_second": 143.254, "eval_steps_per_second": 4.536, "step": 2756 }, { "epoch": 147.2, "learning_rate": 3.851851851851852e-05, "loss": 0.0978, "step": 2760 }, { "epoch": 147.73, "learning_rate": 3.8456790123456794e-05, "loss": 0.0997, "step": 2770 }, { "epoch": 148.0, "eval_accuracy": 0.24, "eval_loss": 4.580421447753906, "eval_runtime": 4.2171, "eval_samples_per_second": 142.278, "eval_steps_per_second": 4.505, "step": 2775 }, { "epoch": 148.27, "learning_rate": 3.839506172839507e-05, "loss": 0.0689, "step": 2780 }, { "epoch": 148.8, "learning_rate": 3.8333333333333334e-05, "loss": 0.0839, "step": 2790 }, { "epoch": 148.96, "eval_accuracy": 0.23, "eval_loss": 4.59170389175415, "eval_runtime": 4.2419, "eval_samples_per_second": 141.446, "eval_steps_per_second": 4.479, "step": 2793 }, { "epoch": 149.33, "learning_rate": 3.82716049382716e-05, "loss": 0.1028, "step": 2800 }, { "epoch": 149.87, "learning_rate": 3.8209876543209875e-05, "loss": 0.0924, "step": 2810 }, { "epoch": 149.97, "eval_accuracy": 0.22666666666666666, "eval_loss": 4.593113422393799, "eval_runtime": 4.2519, "eval_samples_per_second": 141.113, "eval_steps_per_second": 4.469, "step": 2812 }, { "epoch": 150.4, "learning_rate": 3.814814814814815e-05, "loss": 0.0694, "step": 2820 }, { "epoch": 150.93, "learning_rate": 3.808641975308642e-05, "loss": 0.0781, "step": 2830 }, { "epoch": 150.99, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.578421115875244, "eval_runtime": 4.2293, "eval_samples_per_second": 141.867, "eval_steps_per_second": 4.492, "step": 2831 }, { "epoch": 151.47, "learning_rate": 3.80246913580247e-05, "loss": 0.1092, "step": 2840 }, { "epoch": 152.0, "learning_rate": 3.7962962962962964e-05, "loss": 0.0986, "step": 2850 }, { "epoch": 152.0, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.654634475708008, "eval_runtime": 4.2497, "eval_samples_per_second": 141.185, "eval_steps_per_second": 4.471, "step": 2850 }, { "epoch": 152.53, "learning_rate": 3.790123456790123e-05, "loss": 0.0823, "step": 2860 }, { "epoch": 152.96, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.598492622375488, "eval_runtime": 4.242, "eval_samples_per_second": 141.443, "eval_steps_per_second": 4.479, "step": 2868 }, { "epoch": 153.07, "learning_rate": 3.7839506172839504e-05, "loss": 0.0913, "step": 2870 }, { "epoch": 153.6, "learning_rate": 3.777777777777778e-05, "loss": 0.0887, "step": 2880 }, { "epoch": 153.97, "eval_accuracy": 0.23, "eval_loss": 4.614808559417725, "eval_runtime": 4.2174, "eval_samples_per_second": 142.267, "eval_steps_per_second": 4.505, "step": 2887 }, { "epoch": 154.13, "learning_rate": 3.771604938271605e-05, "loss": 0.0787, "step": 2890 }, { "epoch": 154.67, "learning_rate": 3.7654320987654326e-05, "loss": 0.0671, "step": 2900 }, { "epoch": 154.99, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.63968563079834, "eval_runtime": 4.1904, "eval_samples_per_second": 143.186, "eval_steps_per_second": 4.534, "step": 2906 }, { "epoch": 155.2, "learning_rate": 3.759259259259259e-05, "loss": 0.104, "step": 2910 }, { "epoch": 155.73, "learning_rate": 3.7530864197530867e-05, "loss": 0.0897, "step": 2920 }, { "epoch": 156.0, "eval_accuracy": 0.235, "eval_loss": 4.583400249481201, "eval_runtime": 4.1921, "eval_samples_per_second": 143.126, "eval_steps_per_second": 4.532, "step": 2925 }, { "epoch": 156.27, "learning_rate": 3.7469135802469134e-05, "loss": 0.0795, "step": 2930 }, { "epoch": 156.8, "learning_rate": 3.740740740740741e-05, "loss": 0.093, "step": 2940 }, { "epoch": 156.96, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.539726257324219, "eval_runtime": 4.203, "eval_samples_per_second": 142.754, "eval_steps_per_second": 4.521, "step": 2943 }, { "epoch": 157.33, "learning_rate": 3.734567901234568e-05, "loss": 0.0903, "step": 2950 }, { "epoch": 157.87, "learning_rate": 3.7283950617283955e-05, "loss": 0.0973, "step": 2960 }, { "epoch": 157.97, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.55323600769043, "eval_runtime": 4.2018, "eval_samples_per_second": 142.796, "eval_steps_per_second": 4.522, "step": 2962 }, { "epoch": 158.4, "learning_rate": 3.722222222222222e-05, "loss": 0.064, "step": 2970 }, { "epoch": 158.93, "learning_rate": 3.7160493827160496e-05, "loss": 0.1001, "step": 2980 }, { "epoch": 158.99, "eval_accuracy": 0.24, "eval_loss": 4.58270263671875, "eval_runtime": 4.2074, "eval_samples_per_second": 142.605, "eval_steps_per_second": 4.516, "step": 2981 }, { "epoch": 159.47, "learning_rate": 3.709876543209877e-05, "loss": 0.0788, "step": 2990 }, { "epoch": 160.0, "learning_rate": 3.7037037037037037e-05, "loss": 0.0884, "step": 3000 }, { "epoch": 160.0, "eval_accuracy": 0.235, "eval_loss": 4.572762966156006, "eval_runtime": 4.217, "eval_samples_per_second": 142.282, "eval_steps_per_second": 4.506, "step": 3000 }, { "epoch": 160.53, "learning_rate": 3.697530864197531e-05, "loss": 0.084, "step": 3010 }, { "epoch": 160.96, "eval_accuracy": 0.235, "eval_loss": 4.654175758361816, "eval_runtime": 4.2104, "eval_samples_per_second": 142.503, "eval_steps_per_second": 4.513, "step": 3018 }, { "epoch": 161.07, "learning_rate": 3.6913580246913584e-05, "loss": 0.0773, "step": 3020 }, { "epoch": 161.6, "learning_rate": 3.685185185185185e-05, "loss": 0.0902, "step": 3030 }, { "epoch": 161.97, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.636648178100586, "eval_runtime": 4.2002, "eval_samples_per_second": 142.849, "eval_steps_per_second": 4.524, "step": 3037 }, { "epoch": 162.13, "learning_rate": 3.6790123456790125e-05, "loss": 0.0819, "step": 3040 }, { "epoch": 162.67, "learning_rate": 3.67283950617284e-05, "loss": 0.0944, "step": 3050 }, { "epoch": 162.99, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.5957183837890625, "eval_runtime": 4.2207, "eval_samples_per_second": 142.157, "eval_steps_per_second": 4.502, "step": 3056 }, { "epoch": 163.2, "learning_rate": 3.6666666666666666e-05, "loss": 0.0792, "step": 3060 }, { "epoch": 163.73, "learning_rate": 3.660493827160494e-05, "loss": 0.0828, "step": 3070 }, { "epoch": 164.0, "eval_accuracy": 0.23, "eval_loss": 4.652061462402344, "eval_runtime": 4.2215, "eval_samples_per_second": 142.13, "eval_steps_per_second": 4.501, "step": 3075 }, { "epoch": 164.27, "learning_rate": 3.654320987654321e-05, "loss": 0.0729, "step": 3080 }, { "epoch": 164.8, "learning_rate": 3.648148148148148e-05, "loss": 0.0812, "step": 3090 }, { "epoch": 164.96, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.676053047180176, "eval_runtime": 4.2058, "eval_samples_per_second": 142.66, "eval_steps_per_second": 4.518, "step": 3093 }, { "epoch": 165.33, "learning_rate": 3.6419753086419754e-05, "loss": 0.0723, "step": 3100 }, { "epoch": 165.87, "learning_rate": 3.635802469135803e-05, "loss": 0.0817, "step": 3110 }, { "epoch": 165.97, "eval_accuracy": 0.225, "eval_loss": 4.627193927764893, "eval_runtime": 4.22, "eval_samples_per_second": 142.182, "eval_steps_per_second": 4.502, "step": 3112 }, { "epoch": 166.4, "learning_rate": 3.62962962962963e-05, "loss": 0.0718, "step": 3120 }, { "epoch": 166.93, "learning_rate": 3.623456790123457e-05, "loss": 0.07, "step": 3130 }, { "epoch": 166.99, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.653589725494385, "eval_runtime": 4.255, "eval_samples_per_second": 141.01, "eval_steps_per_second": 4.465, "step": 3131 }, { "epoch": 167.47, "learning_rate": 3.617283950617284e-05, "loss": 0.0672, "step": 3140 }, { "epoch": 168.0, "learning_rate": 3.611111111111111e-05, "loss": 0.0746, "step": 3150 }, { "epoch": 168.0, "eval_accuracy": 0.245, "eval_loss": 4.567090034484863, "eval_runtime": 4.2691, "eval_samples_per_second": 140.544, "eval_steps_per_second": 4.451, "step": 3150 }, { "epoch": 168.53, "learning_rate": 3.604938271604938e-05, "loss": 0.0782, "step": 3160 }, { "epoch": 168.96, "eval_accuracy": 0.24, "eval_loss": 4.591490745544434, "eval_runtime": 4.2101, "eval_samples_per_second": 142.516, "eval_steps_per_second": 4.513, "step": 3168 }, { "epoch": 169.07, "learning_rate": 3.598765432098766e-05, "loss": 0.0667, "step": 3170 }, { "epoch": 169.6, "learning_rate": 3.592592592592593e-05, "loss": 0.0677, "step": 3180 }, { "epoch": 169.97, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.637347221374512, "eval_runtime": 4.2173, "eval_samples_per_second": 142.272, "eval_steps_per_second": 4.505, "step": 3187 }, { "epoch": 170.13, "learning_rate": 3.5864197530864205e-05, "loss": 0.0839, "step": 3190 }, { "epoch": 170.67, "learning_rate": 3.580246913580247e-05, "loss": 0.0626, "step": 3200 }, { "epoch": 170.99, "eval_accuracy": 0.25833333333333336, "eval_loss": 4.672304630279541, "eval_runtime": 4.2092, "eval_samples_per_second": 142.546, "eval_steps_per_second": 4.514, "step": 3206 }, { "epoch": 171.2, "learning_rate": 3.574074074074074e-05, "loss": 0.076, "step": 3210 }, { "epoch": 171.73, "learning_rate": 3.567901234567901e-05, "loss": 0.0697, "step": 3220 }, { "epoch": 172.0, "eval_accuracy": 0.245, "eval_loss": 4.681668281555176, "eval_runtime": 4.1945, "eval_samples_per_second": 143.044, "eval_steps_per_second": 4.53, "step": 3225 }, { "epoch": 172.27, "learning_rate": 3.5617283950617286e-05, "loss": 0.0826, "step": 3230 }, { "epoch": 172.8, "learning_rate": 3.555555555555556e-05, "loss": 0.077, "step": 3240 }, { "epoch": 172.96, "eval_accuracy": 0.23, "eval_loss": 4.679342269897461, "eval_runtime": 4.2134, "eval_samples_per_second": 142.403, "eval_steps_per_second": 4.509, "step": 3243 }, { "epoch": 173.33, "learning_rate": 3.5493827160493834e-05, "loss": 0.0766, "step": 3250 }, { "epoch": 173.87, "learning_rate": 3.54320987654321e-05, "loss": 0.068, "step": 3260 }, { "epoch": 173.97, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.711019039154053, "eval_runtime": 4.211, "eval_samples_per_second": 142.484, "eval_steps_per_second": 4.512, "step": 3262 }, { "epoch": 174.4, "learning_rate": 3.537037037037037e-05, "loss": 0.0587, "step": 3270 }, { "epoch": 174.93, "learning_rate": 3.530864197530864e-05, "loss": 0.0875, "step": 3280 }, { "epoch": 174.99, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.701217174530029, "eval_runtime": 4.2016, "eval_samples_per_second": 142.802, "eval_steps_per_second": 4.522, "step": 3281 }, { "epoch": 175.47, "learning_rate": 3.5246913580246915e-05, "loss": 0.0765, "step": 3290 }, { "epoch": 176.0, "learning_rate": 3.518518518518519e-05, "loss": 0.0787, "step": 3300 }, { "epoch": 176.0, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.711310863494873, "eval_runtime": 4.2282, "eval_samples_per_second": 141.904, "eval_steps_per_second": 4.494, "step": 3300 }, { "epoch": 176.53, "learning_rate": 3.512345679012346e-05, "loss": 0.0779, "step": 3310 }, { "epoch": 176.96, "eval_accuracy": 0.24, "eval_loss": 4.699758529663086, "eval_runtime": 4.1993, "eval_samples_per_second": 142.882, "eval_steps_per_second": 4.525, "step": 3318 }, { "epoch": 177.07, "learning_rate": 3.506172839506173e-05, "loss": 0.0632, "step": 3320 }, { "epoch": 177.6, "learning_rate": 3.5e-05, "loss": 0.0823, "step": 3330 }, { "epoch": 177.97, "eval_accuracy": 0.24, "eval_loss": 4.709224224090576, "eval_runtime": 4.2154, "eval_samples_per_second": 142.335, "eval_steps_per_second": 4.507, "step": 3337 }, { "epoch": 178.13, "learning_rate": 3.493827160493827e-05, "loss": 0.0819, "step": 3340 }, { "epoch": 178.67, "learning_rate": 3.4876543209876545e-05, "loss": 0.0685, "step": 3350 }, { "epoch": 178.99, "eval_accuracy": 0.245, "eval_loss": 4.676272869110107, "eval_runtime": 4.2277, "eval_samples_per_second": 141.922, "eval_steps_per_second": 4.494, "step": 3356 }, { "epoch": 179.2, "learning_rate": 3.481481481481482e-05, "loss": 0.0592, "step": 3360 }, { "epoch": 179.73, "learning_rate": 3.475308641975309e-05, "loss": 0.0698, "step": 3370 }, { "epoch": 180.0, "eval_accuracy": 0.25666666666666665, "eval_loss": 4.718149185180664, "eval_runtime": 4.2111, "eval_samples_per_second": 142.479, "eval_steps_per_second": 4.512, "step": 3375 }, { "epoch": 180.27, "learning_rate": 3.469135802469136e-05, "loss": 0.074, "step": 3380 }, { "epoch": 180.8, "learning_rate": 3.4629629629629626e-05, "loss": 0.0924, "step": 3390 }, { "epoch": 180.96, "eval_accuracy": 0.24833333333333332, "eval_loss": 4.715093612670898, "eval_runtime": 4.2612, "eval_samples_per_second": 140.804, "eval_steps_per_second": 4.459, "step": 3393 }, { "epoch": 181.33, "learning_rate": 3.45679012345679e-05, "loss": 0.0792, "step": 3400 }, { "epoch": 181.87, "learning_rate": 3.4506172839506174e-05, "loss": 0.084, "step": 3410 }, { "epoch": 181.97, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.723077774047852, "eval_runtime": 4.2506, "eval_samples_per_second": 141.157, "eval_steps_per_second": 4.47, "step": 3412 }, { "epoch": 182.4, "learning_rate": 3.444444444444445e-05, "loss": 0.08, "step": 3420 }, { "epoch": 182.93, "learning_rate": 3.438271604938272e-05, "loss": 0.0508, "step": 3430 }, { "epoch": 182.99, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.685625076293945, "eval_runtime": 4.2741, "eval_samples_per_second": 140.38, "eval_steps_per_second": 4.445, "step": 3431 }, { "epoch": 183.47, "learning_rate": 3.432098765432099e-05, "loss": 0.0604, "step": 3440 }, { "epoch": 184.0, "learning_rate": 3.425925925925926e-05, "loss": 0.0637, "step": 3450 }, { "epoch": 184.0, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.704137325286865, "eval_runtime": 4.274, "eval_samples_per_second": 140.382, "eval_steps_per_second": 4.445, "step": 3450 }, { "epoch": 184.53, "learning_rate": 3.419753086419753e-05, "loss": 0.06, "step": 3460 }, { "epoch": 184.96, "eval_accuracy": 0.24, "eval_loss": 4.720521450042725, "eval_runtime": 4.2381, "eval_samples_per_second": 141.572, "eval_steps_per_second": 4.483, "step": 3468 }, { "epoch": 185.07, "learning_rate": 3.41358024691358e-05, "loss": 0.062, "step": 3470 }, { "epoch": 185.6, "learning_rate": 3.4074074074074077e-05, "loss": 0.0659, "step": 3480 }, { "epoch": 185.97, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.725123405456543, "eval_runtime": 4.2202, "eval_samples_per_second": 142.172, "eval_steps_per_second": 4.502, "step": 3487 }, { "epoch": 186.13, "learning_rate": 3.401234567901235e-05, "loss": 0.0788, "step": 3490 }, { "epoch": 186.67, "learning_rate": 3.395061728395062e-05, "loss": 0.0842, "step": 3500 }, { "epoch": 186.99, "eval_accuracy": 0.23, "eval_loss": 4.721489906311035, "eval_runtime": 4.232, "eval_samples_per_second": 141.778, "eval_steps_per_second": 4.49, "step": 3506 }, { "epoch": 187.2, "learning_rate": 3.388888888888889e-05, "loss": 0.0773, "step": 3510 }, { "epoch": 187.73, "learning_rate": 3.3827160493827165e-05, "loss": 0.0733, "step": 3520 }, { "epoch": 188.0, "eval_accuracy": 0.24, "eval_loss": 4.706781387329102, "eval_runtime": 4.2434, "eval_samples_per_second": 141.395, "eval_steps_per_second": 4.478, "step": 3525 }, { "epoch": 188.27, "learning_rate": 3.376543209876543e-05, "loss": 0.0722, "step": 3530 }, { "epoch": 188.8, "learning_rate": 3.3703703703703706e-05, "loss": 0.0647, "step": 3540 }, { "epoch": 188.96, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.759402751922607, "eval_runtime": 4.2349, "eval_samples_per_second": 141.679, "eval_steps_per_second": 4.486, "step": 3543 }, { "epoch": 189.33, "learning_rate": 3.364197530864198e-05, "loss": 0.0714, "step": 3550 }, { "epoch": 189.87, "learning_rate": 3.3580246913580247e-05, "loss": 0.0569, "step": 3560 }, { "epoch": 189.97, "eval_accuracy": 0.22333333333333333, "eval_loss": 4.783107280731201, "eval_runtime": 4.2157, "eval_samples_per_second": 142.324, "eval_steps_per_second": 4.507, "step": 3562 }, { "epoch": 190.4, "learning_rate": 3.351851851851852e-05, "loss": 0.0673, "step": 3570 }, { "epoch": 190.93, "learning_rate": 3.3456790123456794e-05, "loss": 0.0883, "step": 3580 }, { "epoch": 190.99, "eval_accuracy": 0.235, "eval_loss": 4.721207618713379, "eval_runtime": 4.2273, "eval_samples_per_second": 141.935, "eval_steps_per_second": 4.495, "step": 3581 }, { "epoch": 191.47, "learning_rate": 3.339506172839506e-05, "loss": 0.0704, "step": 3590 }, { "epoch": 192.0, "learning_rate": 3.3333333333333335e-05, "loss": 0.0622, "step": 3600 }, { "epoch": 192.0, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.6877546310424805, "eval_runtime": 4.2265, "eval_samples_per_second": 141.962, "eval_steps_per_second": 4.495, "step": 3600 }, { "epoch": 192.53, "learning_rate": 3.327160493827161e-05, "loss": 0.057, "step": 3610 }, { "epoch": 192.96, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.665402889251709, "eval_runtime": 4.2317, "eval_samples_per_second": 141.788, "eval_steps_per_second": 4.49, "step": 3618 }, { "epoch": 193.07, "learning_rate": 3.3209876543209876e-05, "loss": 0.0563, "step": 3620 }, { "epoch": 193.6, "learning_rate": 3.314814814814815e-05, "loss": 0.0654, "step": 3630 }, { "epoch": 193.97, "eval_accuracy": 0.25166666666666665, "eval_loss": 4.635808944702148, "eval_runtime": 4.2421, "eval_samples_per_second": 141.438, "eval_steps_per_second": 4.479, "step": 3637 }, { "epoch": 194.13, "learning_rate": 3.308641975308642e-05, "loss": 0.0771, "step": 3640 }, { "epoch": 194.67, "learning_rate": 3.30246913580247e-05, "loss": 0.0868, "step": 3650 }, { "epoch": 194.99, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.662071704864502, "eval_runtime": 4.2363, "eval_samples_per_second": 141.634, "eval_steps_per_second": 4.485, "step": 3656 }, { "epoch": 195.2, "learning_rate": 3.2962962962962964e-05, "loss": 0.0592, "step": 3660 }, { "epoch": 195.73, "learning_rate": 3.290123456790124e-05, "loss": 0.0789, "step": 3670 }, { "epoch": 196.0, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.698493003845215, "eval_runtime": 4.2392, "eval_samples_per_second": 141.537, "eval_steps_per_second": 4.482, "step": 3675 }, { "epoch": 196.27, "learning_rate": 3.2839506172839505e-05, "loss": 0.0683, "step": 3680 }, { "epoch": 196.8, "learning_rate": 3.277777777777778e-05, "loss": 0.0657, "step": 3690 }, { "epoch": 196.96, "eval_accuracy": 0.25666666666666665, "eval_loss": 4.663649559020996, "eval_runtime": 4.2398, "eval_samples_per_second": 141.517, "eval_steps_per_second": 4.481, "step": 3693 }, { "epoch": 197.33, "learning_rate": 3.271604938271605e-05, "loss": 0.057, "step": 3700 }, { "epoch": 197.87, "learning_rate": 3.2654320987654326e-05, "loss": 0.0648, "step": 3710 }, { "epoch": 197.97, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.7698283195495605, "eval_runtime": 4.2294, "eval_samples_per_second": 141.864, "eval_steps_per_second": 4.492, "step": 3712 }, { "epoch": 198.4, "learning_rate": 3.25925925925926e-05, "loss": 0.0577, "step": 3720 }, { "epoch": 198.93, "learning_rate": 3.253086419753087e-05, "loss": 0.0635, "step": 3730 }, { "epoch": 198.99, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.722621440887451, "eval_runtime": 4.2242, "eval_samples_per_second": 142.038, "eval_steps_per_second": 4.498, "step": 3731 }, { "epoch": 199.47, "learning_rate": 3.2469135802469134e-05, "loss": 0.0563, "step": 3740 }, { "epoch": 200.0, "learning_rate": 3.240740740740741e-05, "loss": 0.0637, "step": 3750 }, { "epoch": 200.0, "eval_accuracy": 0.245, "eval_loss": 4.748103618621826, "eval_runtime": 4.2435, "eval_samples_per_second": 141.393, "eval_steps_per_second": 4.477, "step": 3750 }, { "epoch": 200.53, "learning_rate": 3.234567901234568e-05, "loss": 0.0665, "step": 3760 }, { "epoch": 200.96, "eval_accuracy": 0.24833333333333332, "eval_loss": 4.778894901275635, "eval_runtime": 4.2222, "eval_samples_per_second": 142.106, "eval_steps_per_second": 4.5, "step": 3768 }, { "epoch": 201.07, "learning_rate": 3.2283950617283955e-05, "loss": 0.0649, "step": 3770 }, { "epoch": 201.6, "learning_rate": 3.222222222222223e-05, "loss": 0.0799, "step": 3780 }, { "epoch": 201.97, "eval_accuracy": 0.235, "eval_loss": 4.701383590698242, "eval_runtime": 4.2577, "eval_samples_per_second": 140.92, "eval_steps_per_second": 4.462, "step": 3787 }, { "epoch": 202.13, "learning_rate": 3.216049382716049e-05, "loss": 0.049, "step": 3790 }, { "epoch": 202.67, "learning_rate": 3.209876543209876e-05, "loss": 0.064, "step": 3800 }, { "epoch": 202.99, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.752817153930664, "eval_runtime": 4.2702, "eval_samples_per_second": 140.507, "eval_steps_per_second": 4.449, "step": 3806 }, { "epoch": 203.2, "learning_rate": 3.203703703703704e-05, "loss": 0.0554, "step": 3810 }, { "epoch": 203.73, "learning_rate": 3.197530864197531e-05, "loss": 0.0772, "step": 3820 }, { "epoch": 204.0, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.740113735198975, "eval_runtime": 4.2544, "eval_samples_per_second": 141.031, "eval_steps_per_second": 4.466, "step": 3825 }, { "epoch": 204.27, "learning_rate": 3.1913580246913585e-05, "loss": 0.0625, "step": 3830 }, { "epoch": 204.8, "learning_rate": 3.185185185185185e-05, "loss": 0.0438, "step": 3840 }, { "epoch": 204.96, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.76779317855835, "eval_runtime": 4.2439, "eval_samples_per_second": 141.379, "eval_steps_per_second": 4.477, "step": 3843 }, { "epoch": 205.33, "learning_rate": 3.1790123456790125e-05, "loss": 0.0716, "step": 3850 }, { "epoch": 205.87, "learning_rate": 3.172839506172839e-05, "loss": 0.0766, "step": 3860 }, { "epoch": 205.97, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.71795654296875, "eval_runtime": 4.2289, "eval_samples_per_second": 141.881, "eval_steps_per_second": 4.493, "step": 3862 }, { "epoch": 206.4, "learning_rate": 3.1666666666666666e-05, "loss": 0.08, "step": 3870 }, { "epoch": 206.93, "learning_rate": 3.160493827160494e-05, "loss": 0.0687, "step": 3880 }, { "epoch": 206.99, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.7058281898498535, "eval_runtime": 4.2979, "eval_samples_per_second": 139.602, "eval_steps_per_second": 4.421, "step": 3881 }, { "epoch": 207.47, "learning_rate": 3.1543209876543214e-05, "loss": 0.0686, "step": 3890 }, { "epoch": 208.0, "learning_rate": 3.148148148148148e-05, "loss": 0.0801, "step": 3900 }, { "epoch": 208.0, "eval_accuracy": 0.235, "eval_loss": 4.7583746910095215, "eval_runtime": 4.3035, "eval_samples_per_second": 139.423, "eval_steps_per_second": 4.415, "step": 3900 }, { "epoch": 208.53, "learning_rate": 3.1419753086419755e-05, "loss": 0.0772, "step": 3910 }, { "epoch": 208.96, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.73037576675415, "eval_runtime": 4.265, "eval_samples_per_second": 140.681, "eval_steps_per_second": 4.455, "step": 3918 }, { "epoch": 209.07, "learning_rate": 3.135802469135803e-05, "loss": 0.0504, "step": 3920 }, { "epoch": 209.6, "learning_rate": 3.1296296296296295e-05, "loss": 0.0663, "step": 3930 }, { "epoch": 209.97, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.693957805633545, "eval_runtime": 4.2721, "eval_samples_per_second": 140.445, "eval_steps_per_second": 4.447, "step": 3937 }, { "epoch": 210.13, "learning_rate": 3.123456790123457e-05, "loss": 0.0497, "step": 3940 }, { "epoch": 210.67, "learning_rate": 3.117283950617284e-05, "loss": 0.0529, "step": 3950 }, { "epoch": 210.99, "eval_accuracy": 0.235, "eval_loss": 4.694019317626953, "eval_runtime": 4.2603, "eval_samples_per_second": 140.834, "eval_steps_per_second": 4.46, "step": 3956 }, { "epoch": 211.2, "learning_rate": 3.111111111111111e-05, "loss": 0.0559, "step": 3960 }, { "epoch": 211.73, "learning_rate": 3.1049382716049384e-05, "loss": 0.0568, "step": 3970 }, { "epoch": 212.0, "eval_accuracy": 0.235, "eval_loss": 4.733299255371094, "eval_runtime": 4.2654, "eval_samples_per_second": 140.666, "eval_steps_per_second": 4.454, "step": 3975 }, { "epoch": 212.27, "learning_rate": 3.098765432098766e-05, "loss": 0.057, "step": 3980 }, { "epoch": 212.8, "learning_rate": 3.0925925925925924e-05, "loss": 0.0697, "step": 3990 }, { "epoch": 212.96, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.667250156402588, "eval_runtime": 4.2885, "eval_samples_per_second": 139.909, "eval_steps_per_second": 4.43, "step": 3993 }, { "epoch": 213.33, "learning_rate": 3.08641975308642e-05, "loss": 0.0402, "step": 4000 }, { "epoch": 213.87, "learning_rate": 3.080246913580247e-05, "loss": 0.0394, "step": 4010 }, { "epoch": 213.97, "eval_accuracy": 0.245, "eval_loss": 4.673309326171875, "eval_runtime": 4.2825, "eval_samples_per_second": 140.105, "eval_steps_per_second": 4.437, "step": 4012 }, { "epoch": 214.4, "learning_rate": 3.074074074074074e-05, "loss": 0.0398, "step": 4020 }, { "epoch": 214.93, "learning_rate": 3.067901234567901e-05, "loss": 0.0625, "step": 4030 }, { "epoch": 214.99, "eval_accuracy": 0.225, "eval_loss": 4.738312244415283, "eval_runtime": 4.3054, "eval_samples_per_second": 139.359, "eval_steps_per_second": 4.413, "step": 4031 }, { "epoch": 215.47, "learning_rate": 3.061728395061729e-05, "loss": 0.0626, "step": 4040 }, { "epoch": 216.0, "learning_rate": 3.055555555555556e-05, "loss": 0.0588, "step": 4050 }, { "epoch": 216.0, "eval_accuracy": 0.24, "eval_loss": 4.767359256744385, "eval_runtime": 4.3344, "eval_samples_per_second": 138.426, "eval_steps_per_second": 4.383, "step": 4050 }, { "epoch": 216.53, "learning_rate": 3.0493827160493827e-05, "loss": 0.0594, "step": 4060 }, { "epoch": 216.96, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.687304496765137, "eval_runtime": 4.285, "eval_samples_per_second": 140.024, "eval_steps_per_second": 4.434, "step": 4068 }, { "epoch": 217.07, "learning_rate": 3.0432098765432098e-05, "loss": 0.0721, "step": 4070 }, { "epoch": 217.6, "learning_rate": 3.037037037037037e-05, "loss": 0.0451, "step": 4080 }, { "epoch": 217.97, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.671844959259033, "eval_runtime": 4.2925, "eval_samples_per_second": 139.78, "eval_steps_per_second": 4.426, "step": 4087 }, { "epoch": 218.13, "learning_rate": 3.0308641975308642e-05, "loss": 0.0445, "step": 4090 }, { "epoch": 218.67, "learning_rate": 3.0246913580246916e-05, "loss": 0.047, "step": 4100 }, { "epoch": 218.99, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.714609146118164, "eval_runtime": 4.2883, "eval_samples_per_second": 139.914, "eval_steps_per_second": 4.431, "step": 4106 }, { "epoch": 219.2, "learning_rate": 3.018518518518519e-05, "loss": 0.0648, "step": 4110 }, { "epoch": 219.73, "learning_rate": 3.012345679012346e-05, "loss": 0.0445, "step": 4120 }, { "epoch": 220.0, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.717392921447754, "eval_runtime": 4.2776, "eval_samples_per_second": 140.266, "eval_steps_per_second": 4.442, "step": 4125 }, { "epoch": 220.27, "learning_rate": 3.0061728395061727e-05, "loss": 0.0438, "step": 4130 }, { "epoch": 220.8, "learning_rate": 3e-05, "loss": 0.0746, "step": 4140 }, { "epoch": 220.96, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.670175075531006, "eval_runtime": 4.2998, "eval_samples_per_second": 139.542, "eval_steps_per_second": 4.419, "step": 4143 }, { "epoch": 221.33, "learning_rate": 2.993827160493827e-05, "loss": 0.0617, "step": 4150 }, { "epoch": 221.87, "learning_rate": 2.9876543209876545e-05, "loss": 0.0697, "step": 4160 }, { "epoch": 221.97, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.646183967590332, "eval_runtime": 4.3381, "eval_samples_per_second": 138.308, "eval_steps_per_second": 4.38, "step": 4162 }, { "epoch": 222.4, "learning_rate": 2.981481481481482e-05, "loss": 0.0866, "step": 4170 }, { "epoch": 222.93, "learning_rate": 2.975308641975309e-05, "loss": 0.0562, "step": 4180 }, { "epoch": 222.99, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.695559024810791, "eval_runtime": 4.3517, "eval_samples_per_second": 137.876, "eval_steps_per_second": 4.366, "step": 4181 }, { "epoch": 223.47, "learning_rate": 2.9691358024691356e-05, "loss": 0.0986, "step": 4190 }, { "epoch": 224.0, "learning_rate": 2.962962962962963e-05, "loss": 0.047, "step": 4200 }, { "epoch": 224.0, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.727797985076904, "eval_runtime": 4.3288, "eval_samples_per_second": 138.607, "eval_steps_per_second": 4.389, "step": 4200 }, { "epoch": 224.53, "learning_rate": 2.95679012345679e-05, "loss": 0.0612, "step": 4210 }, { "epoch": 224.96, "eval_accuracy": 0.235, "eval_loss": 4.730659484863281, "eval_runtime": 4.3287, "eval_samples_per_second": 138.611, "eval_steps_per_second": 4.389, "step": 4218 }, { "epoch": 225.07, "learning_rate": 2.9506172839506174e-05, "loss": 0.048, "step": 4220 }, { "epoch": 225.6, "learning_rate": 2.9444444444444448e-05, "loss": 0.0625, "step": 4230 }, { "epoch": 225.97, "eval_accuracy": 0.25666666666666665, "eval_loss": 4.667015075683594, "eval_runtime": 4.3594, "eval_samples_per_second": 137.632, "eval_steps_per_second": 4.358, "step": 4237 }, { "epoch": 226.13, "learning_rate": 2.9382716049382718e-05, "loss": 0.0558, "step": 4240 }, { "epoch": 226.67, "learning_rate": 2.9320987654320992e-05, "loss": 0.0739, "step": 4250 }, { "epoch": 226.99, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.710987091064453, "eval_runtime": 4.3562, "eval_samples_per_second": 137.734, "eval_steps_per_second": 4.362, "step": 4256 }, { "epoch": 227.2, "learning_rate": 2.925925925925926e-05, "loss": 0.054, "step": 4260 }, { "epoch": 227.73, "learning_rate": 2.919753086419753e-05, "loss": 0.0637, "step": 4270 }, { "epoch": 228.0, "eval_accuracy": 0.22, "eval_loss": 4.703871726989746, "eval_runtime": 4.3307, "eval_samples_per_second": 138.545, "eval_steps_per_second": 4.387, "step": 4275 }, { "epoch": 228.27, "learning_rate": 2.9135802469135803e-05, "loss": 0.0491, "step": 4280 }, { "epoch": 228.8, "learning_rate": 2.9074074074074077e-05, "loss": 0.0461, "step": 4290 }, { "epoch": 228.96, "eval_accuracy": 0.22666666666666666, "eval_loss": 4.711916923522949, "eval_runtime": 4.3352, "eval_samples_per_second": 138.402, "eval_steps_per_second": 4.383, "step": 4293 }, { "epoch": 229.33, "learning_rate": 2.9012345679012347e-05, "loss": 0.0544, "step": 4300 }, { "epoch": 229.87, "learning_rate": 2.895061728395062e-05, "loss": 0.0506, "step": 4310 }, { "epoch": 229.97, "eval_accuracy": 0.23, "eval_loss": 4.7098541259765625, "eval_runtime": 4.3688, "eval_samples_per_second": 137.339, "eval_steps_per_second": 4.349, "step": 4312 }, { "epoch": 230.4, "learning_rate": 2.8888888888888888e-05, "loss": 0.0511, "step": 4320 }, { "epoch": 230.93, "learning_rate": 2.882716049382716e-05, "loss": 0.0412, "step": 4330 }, { "epoch": 230.99, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.671385288238525, "eval_runtime": 4.3317, "eval_samples_per_second": 138.515, "eval_steps_per_second": 4.386, "step": 4331 }, { "epoch": 231.47, "learning_rate": 2.8765432098765432e-05, "loss": 0.0681, "step": 4340 }, { "epoch": 232.0, "learning_rate": 2.8703703703703706e-05, "loss": 0.057, "step": 4350 }, { "epoch": 232.0, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.692080497741699, "eval_runtime": 4.3407, "eval_samples_per_second": 138.227, "eval_steps_per_second": 4.377, "step": 4350 }, { "epoch": 232.53, "learning_rate": 2.8641975308641977e-05, "loss": 0.0402, "step": 4360 }, { "epoch": 232.96, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.754528045654297, "eval_runtime": 4.3356, "eval_samples_per_second": 138.389, "eval_steps_per_second": 4.382, "step": 4368 }, { "epoch": 233.07, "learning_rate": 2.858024691358025e-05, "loss": 0.0766, "step": 4370 }, { "epoch": 233.6, "learning_rate": 2.851851851851852e-05, "loss": 0.058, "step": 4380 }, { "epoch": 233.97, "eval_accuracy": 0.225, "eval_loss": 4.7573161125183105, "eval_runtime": 4.353, "eval_samples_per_second": 137.837, "eval_steps_per_second": 4.365, "step": 4387 }, { "epoch": 234.13, "learning_rate": 2.8456790123456788e-05, "loss": 0.0749, "step": 4390 }, { "epoch": 234.67, "learning_rate": 2.839506172839506e-05, "loss": 0.0661, "step": 4400 }, { "epoch": 234.99, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.6800408363342285, "eval_runtime": 4.3414, "eval_samples_per_second": 138.205, "eval_steps_per_second": 4.377, "step": 4406 }, { "epoch": 235.2, "learning_rate": 2.8333333333333335e-05, "loss": 0.0613, "step": 4410 }, { "epoch": 235.73, "learning_rate": 2.8271604938271606e-05, "loss": 0.0613, "step": 4420 }, { "epoch": 236.0, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.653302192687988, "eval_runtime": 4.3257, "eval_samples_per_second": 138.707, "eval_steps_per_second": 4.392, "step": 4425 }, { "epoch": 236.27, "learning_rate": 2.820987654320988e-05, "loss": 0.0555, "step": 4430 }, { "epoch": 236.8, "learning_rate": 2.814814814814815e-05, "loss": 0.0462, "step": 4440 }, { "epoch": 236.96, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.674839496612549, "eval_runtime": 4.3407, "eval_samples_per_second": 138.228, "eval_steps_per_second": 4.377, "step": 4443 }, { "epoch": 237.33, "learning_rate": 2.8086419753086424e-05, "loss": 0.0742, "step": 4450 }, { "epoch": 237.87, "learning_rate": 2.802469135802469e-05, "loss": 0.0494, "step": 4460 }, { "epoch": 237.97, "eval_accuracy": 0.23, "eval_loss": 4.687388896942139, "eval_runtime": 4.3685, "eval_samples_per_second": 137.346, "eval_steps_per_second": 4.349, "step": 4462 }, { "epoch": 238.4, "learning_rate": 2.7962962962962965e-05, "loss": 0.0617, "step": 4470 }, { "epoch": 238.93, "learning_rate": 2.7901234567901235e-05, "loss": 0.0643, "step": 4480 }, { "epoch": 238.99, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.729123592376709, "eval_runtime": 4.3628, "eval_samples_per_second": 137.527, "eval_steps_per_second": 4.355, "step": 4481 }, { "epoch": 239.47, "learning_rate": 2.783950617283951e-05, "loss": 0.0557, "step": 4490 }, { "epoch": 240.0, "learning_rate": 2.777777777777778e-05, "loss": 0.0422, "step": 4500 }, { "epoch": 240.0, "eval_accuracy": 0.23, "eval_loss": 4.7088117599487305, "eval_runtime": 4.3986, "eval_samples_per_second": 136.407, "eval_steps_per_second": 4.32, "step": 4500 }, { "epoch": 240.53, "learning_rate": 2.7716049382716053e-05, "loss": 0.0376, "step": 4510 }, { "epoch": 240.96, "eval_accuracy": 0.225, "eval_loss": 4.74221658706665, "eval_runtime": 4.4146, "eval_samples_per_second": 135.913, "eval_steps_per_second": 4.304, "step": 4518 }, { "epoch": 241.07, "learning_rate": 2.765432098765432e-05, "loss": 0.0343, "step": 4520 }, { "epoch": 241.6, "learning_rate": 2.7592592592592594e-05, "loss": 0.0696, "step": 4530 }, { "epoch": 241.97, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.801132678985596, "eval_runtime": 4.3976, "eval_samples_per_second": 136.44, "eval_steps_per_second": 4.321, "step": 4537 }, { "epoch": 242.13, "learning_rate": 2.7530864197530864e-05, "loss": 0.0603, "step": 4540 }, { "epoch": 242.67, "learning_rate": 2.7469135802469138e-05, "loss": 0.0609, "step": 4550 }, { "epoch": 242.99, "eval_accuracy": 0.22166666666666668, "eval_loss": 4.801338195800781, "eval_runtime": 4.3836, "eval_samples_per_second": 136.873, "eval_steps_per_second": 4.334, "step": 4556 }, { "epoch": 243.2, "learning_rate": 2.7407407407407408e-05, "loss": 0.0484, "step": 4560 }, { "epoch": 243.73, "learning_rate": 2.7345679012345682e-05, "loss": 0.0637, "step": 4570 }, { "epoch": 244.0, "eval_accuracy": 0.225, "eval_loss": 4.760260105133057, "eval_runtime": 4.4032, "eval_samples_per_second": 136.265, "eval_steps_per_second": 4.315, "step": 4575 }, { "epoch": 244.27, "learning_rate": 2.7283950617283956e-05, "loss": 0.0489, "step": 4580 }, { "epoch": 244.8, "learning_rate": 2.7222222222222223e-05, "loss": 0.0529, "step": 4590 }, { "epoch": 244.96, "eval_accuracy": 0.22333333333333333, "eval_loss": 4.789524078369141, "eval_runtime": 4.388, "eval_samples_per_second": 136.736, "eval_steps_per_second": 4.33, "step": 4593 }, { "epoch": 245.33, "learning_rate": 2.7160493827160493e-05, "loss": 0.0617, "step": 4600 }, { "epoch": 245.87, "learning_rate": 2.7098765432098767e-05, "loss": 0.0603, "step": 4610 }, { "epoch": 245.97, "eval_accuracy": 0.235, "eval_loss": 4.763910293579102, "eval_runtime": 4.3989, "eval_samples_per_second": 136.397, "eval_steps_per_second": 4.319, "step": 4612 }, { "epoch": 246.4, "learning_rate": 2.7037037037037037e-05, "loss": 0.0558, "step": 4620 }, { "epoch": 246.93, "learning_rate": 2.697530864197531e-05, "loss": 0.0365, "step": 4630 }, { "epoch": 246.99, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.728492259979248, "eval_runtime": 4.3868, "eval_samples_per_second": 136.774, "eval_steps_per_second": 4.331, "step": 4631 }, { "epoch": 247.47, "learning_rate": 2.6913580246913585e-05, "loss": 0.0862, "step": 4640 }, { "epoch": 248.0, "learning_rate": 2.6851851851851855e-05, "loss": 0.0732, "step": 4650 }, { "epoch": 248.0, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.725191116333008, "eval_runtime": 4.4162, "eval_samples_per_second": 135.863, "eval_steps_per_second": 4.302, "step": 4650 }, { "epoch": 248.53, "learning_rate": 2.6790123456790122e-05, "loss": 0.0709, "step": 4660 }, { "epoch": 248.96, "eval_accuracy": 0.23, "eval_loss": 4.761960506439209, "eval_runtime": 4.3965, "eval_samples_per_second": 136.472, "eval_steps_per_second": 4.322, "step": 4668 }, { "epoch": 249.07, "learning_rate": 2.6728395061728396e-05, "loss": 0.0463, "step": 4670 }, { "epoch": 249.6, "learning_rate": 2.6666666666666667e-05, "loss": 0.0485, "step": 4680 }, { "epoch": 249.97, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.752857208251953, "eval_runtime": 4.404, "eval_samples_per_second": 136.24, "eval_steps_per_second": 4.314, "step": 4687 }, { "epoch": 250.13, "learning_rate": 2.660493827160494e-05, "loss": 0.0453, "step": 4690 }, { "epoch": 250.67, "learning_rate": 2.654320987654321e-05, "loss": 0.0449, "step": 4700 }, { "epoch": 250.99, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.800561428070068, "eval_runtime": 4.4195, "eval_samples_per_second": 135.761, "eval_steps_per_second": 4.299, "step": 4706 }, { "epoch": 251.2, "learning_rate": 2.6481481481481485e-05, "loss": 0.0629, "step": 4710 }, { "epoch": 251.73, "learning_rate": 2.641975308641975e-05, "loss": 0.0506, "step": 4720 }, { "epoch": 252.0, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.802790641784668, "eval_runtime": 4.4472, "eval_samples_per_second": 134.916, "eval_steps_per_second": 4.272, "step": 4725 }, { "epoch": 252.27, "learning_rate": 2.6358024691358025e-05, "loss": 0.038, "step": 4730 }, { "epoch": 252.8, "learning_rate": 2.6296296296296296e-05, "loss": 0.0455, "step": 4740 }, { "epoch": 252.96, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.777773380279541, "eval_runtime": 4.4493, "eval_samples_per_second": 134.853, "eval_steps_per_second": 4.27, "step": 4743 }, { "epoch": 253.33, "learning_rate": 2.623456790123457e-05, "loss": 0.0418, "step": 4750 }, { "epoch": 253.87, "learning_rate": 2.617283950617284e-05, "loss": 0.0594, "step": 4760 }, { "epoch": 253.97, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.743904113769531, "eval_runtime": 4.4293, "eval_samples_per_second": 135.461, "eval_steps_per_second": 4.29, "step": 4762 }, { "epoch": 254.4, "learning_rate": 2.6111111111111114e-05, "loss": 0.0511, "step": 4770 }, { "epoch": 254.93, "learning_rate": 2.6049382716049388e-05, "loss": 0.0551, "step": 4780 }, { "epoch": 254.99, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.806947708129883, "eval_runtime": 4.4446, "eval_samples_per_second": 134.995, "eval_steps_per_second": 4.275, "step": 4781 }, { "epoch": 255.47, "learning_rate": 2.5987654320987655e-05, "loss": 0.0512, "step": 4790 }, { "epoch": 256.0, "learning_rate": 2.5925925925925925e-05, "loss": 0.0435, "step": 4800 }, { "epoch": 256.0, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.817090034484863, "eval_runtime": 4.463, "eval_samples_per_second": 134.44, "eval_steps_per_second": 4.257, "step": 4800 }, { "epoch": 256.53, "learning_rate": 2.58641975308642e-05, "loss": 0.042, "step": 4810 }, { "epoch": 256.96, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.796122074127197, "eval_runtime": 4.4296, "eval_samples_per_second": 135.453, "eval_steps_per_second": 4.289, "step": 4818 }, { "epoch": 257.07, "learning_rate": 2.580246913580247e-05, "loss": 0.0614, "step": 4820 }, { "epoch": 257.6, "learning_rate": 2.5740740740740743e-05, "loss": 0.0403, "step": 4830 }, { "epoch": 257.97, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.817234516143799, "eval_runtime": 4.4455, "eval_samples_per_second": 134.967, "eval_steps_per_second": 4.274, "step": 4837 }, { "epoch": 258.13, "learning_rate": 2.5679012345679017e-05, "loss": 0.0428, "step": 4840 }, { "epoch": 258.67, "learning_rate": 2.5617283950617287e-05, "loss": 0.0524, "step": 4850 }, { "epoch": 258.99, "eval_accuracy": 0.23, "eval_loss": 4.8536577224731445, "eval_runtime": 4.4559, "eval_samples_per_second": 134.654, "eval_steps_per_second": 4.264, "step": 4856 }, { "epoch": 259.2, "learning_rate": 2.5555555555555554e-05, "loss": 0.0495, "step": 4860 }, { "epoch": 259.73, "learning_rate": 2.5493827160493828e-05, "loss": 0.0461, "step": 4870 }, { "epoch": 260.0, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.769797325134277, "eval_runtime": 4.4781, "eval_samples_per_second": 133.984, "eval_steps_per_second": 4.243, "step": 4875 }, { "epoch": 260.27, "learning_rate": 2.5432098765432098e-05, "loss": 0.0547, "step": 4880 }, { "epoch": 260.8, "learning_rate": 2.5370370370370372e-05, "loss": 0.05, "step": 4890 }, { "epoch": 260.96, "eval_accuracy": 0.24833333333333332, "eval_loss": 4.805800437927246, "eval_runtime": 4.4587, "eval_samples_per_second": 134.567, "eval_steps_per_second": 4.261, "step": 4893 }, { "epoch": 261.33, "learning_rate": 2.5308641975308646e-05, "loss": 0.0463, "step": 4900 }, { "epoch": 261.87, "learning_rate": 2.5246913580246916e-05, "loss": 0.0545, "step": 4910 }, { "epoch": 261.97, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.839805603027344, "eval_runtime": 5.2865, "eval_samples_per_second": 113.497, "eval_steps_per_second": 3.594, "step": 4912 }, { "epoch": 262.4, "learning_rate": 2.5185185185185183e-05, "loss": 0.066, "step": 4920 }, { "epoch": 262.93, "learning_rate": 2.5123456790123457e-05, "loss": 0.0405, "step": 4930 }, { "epoch": 262.99, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.82278299331665, "eval_runtime": 4.4772, "eval_samples_per_second": 134.011, "eval_steps_per_second": 4.244, "step": 4931 }, { "epoch": 263.47, "learning_rate": 2.5061728395061727e-05, "loss": 0.0466, "step": 4940 }, { "epoch": 264.0, "learning_rate": 2.5e-05, "loss": 0.0615, "step": 4950 }, { "epoch": 264.0, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.839545726776123, "eval_runtime": 4.4638, "eval_samples_per_second": 134.416, "eval_steps_per_second": 4.257, "step": 4950 }, { "epoch": 264.53, "learning_rate": 2.4938271604938275e-05, "loss": 0.0381, "step": 4960 }, { "epoch": 264.96, "eval_accuracy": 0.22333333333333333, "eval_loss": 4.823088645935059, "eval_runtime": 4.4558, "eval_samples_per_second": 134.657, "eval_steps_per_second": 4.264, "step": 4968 }, { "epoch": 265.07, "learning_rate": 2.4876543209876542e-05, "loss": 0.0609, "step": 4970 }, { "epoch": 265.6, "learning_rate": 2.4814814814814816e-05, "loss": 0.0464, "step": 4980 }, { "epoch": 265.97, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.818026065826416, "eval_runtime": 4.4544, "eval_samples_per_second": 134.698, "eval_steps_per_second": 4.265, "step": 4987 }, { "epoch": 266.13, "learning_rate": 2.475308641975309e-05, "loss": 0.0454, "step": 4990 }, { "epoch": 266.67, "learning_rate": 2.4691358024691357e-05, "loss": 0.058, "step": 5000 }, { "epoch": 266.99, "eval_accuracy": 0.235, "eval_loss": 4.87436056137085, "eval_runtime": 4.4992, "eval_samples_per_second": 133.357, "eval_steps_per_second": 4.223, "step": 5006 }, { "epoch": 267.2, "learning_rate": 2.462962962962963e-05, "loss": 0.044, "step": 5010 }, { "epoch": 267.73, "learning_rate": 2.4567901234567904e-05, "loss": 0.0553, "step": 5020 }, { "epoch": 268.0, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.886570453643799, "eval_runtime": 4.4931, "eval_samples_per_second": 133.538, "eval_steps_per_second": 4.229, "step": 5025 }, { "epoch": 268.27, "learning_rate": 2.4506172839506175e-05, "loss": 0.0536, "step": 5030 }, { "epoch": 268.8, "learning_rate": 2.4444444444444445e-05, "loss": 0.0505, "step": 5040 }, { "epoch": 268.96, "eval_accuracy": 0.24, "eval_loss": 4.853390216827393, "eval_runtime": 4.4705, "eval_samples_per_second": 134.214, "eval_steps_per_second": 4.25, "step": 5043 }, { "epoch": 269.33, "learning_rate": 2.438271604938272e-05, "loss": 0.0269, "step": 5050 }, { "epoch": 269.87, "learning_rate": 2.432098765432099e-05, "loss": 0.049, "step": 5060 }, { "epoch": 269.97, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.870242118835449, "eval_runtime": 4.4681, "eval_samples_per_second": 134.285, "eval_steps_per_second": 4.252, "step": 5062 }, { "epoch": 270.4, "learning_rate": 2.425925925925926e-05, "loss": 0.0464, "step": 5070 }, { "epoch": 270.93, "learning_rate": 2.4197530864197533e-05, "loss": 0.0444, "step": 5080 }, { "epoch": 270.99, "eval_accuracy": 0.22666666666666666, "eval_loss": 4.871464729309082, "eval_runtime": 4.4942, "eval_samples_per_second": 133.505, "eval_steps_per_second": 4.228, "step": 5081 }, { "epoch": 271.47, "learning_rate": 2.4135802469135804e-05, "loss": 0.0433, "step": 5090 }, { "epoch": 272.0, "learning_rate": 2.4074074074074074e-05, "loss": 0.0457, "step": 5100 }, { "epoch": 272.0, "eval_accuracy": 0.225, "eval_loss": 4.827383518218994, "eval_runtime": 4.4878, "eval_samples_per_second": 133.697, "eval_steps_per_second": 4.234, "step": 5100 }, { "epoch": 272.53, "learning_rate": 2.4012345679012348e-05, "loss": 0.0546, "step": 5110 }, { "epoch": 272.96, "eval_accuracy": 0.225, "eval_loss": 4.844120502471924, "eval_runtime": 4.5115, "eval_samples_per_second": 132.993, "eval_steps_per_second": 4.211, "step": 5118 }, { "epoch": 273.07, "learning_rate": 2.3950617283950618e-05, "loss": 0.0427, "step": 5120 }, { "epoch": 273.6, "learning_rate": 2.3888888888888892e-05, "loss": 0.0378, "step": 5130 }, { "epoch": 273.97, "eval_accuracy": 0.225, "eval_loss": 4.822915077209473, "eval_runtime": 4.4872, "eval_samples_per_second": 133.713, "eval_steps_per_second": 4.234, "step": 5137 }, { "epoch": 274.13, "learning_rate": 2.3827160493827162e-05, "loss": 0.0603, "step": 5140 }, { "epoch": 274.67, "learning_rate": 2.3765432098765433e-05, "loss": 0.0374, "step": 5150 }, { "epoch": 274.99, "eval_accuracy": 0.22166666666666668, "eval_loss": 4.805349349975586, "eval_runtime": 4.5038, "eval_samples_per_second": 133.22, "eval_steps_per_second": 4.219, "step": 5156 }, { "epoch": 275.2, "learning_rate": 2.3703703703703707e-05, "loss": 0.0384, "step": 5160 }, { "epoch": 275.73, "learning_rate": 2.3641975308641977e-05, "loss": 0.047, "step": 5170 }, { "epoch": 276.0, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.861939907073975, "eval_runtime": 4.5272, "eval_samples_per_second": 132.531, "eval_steps_per_second": 4.197, "step": 5175 }, { "epoch": 276.27, "learning_rate": 2.3580246913580247e-05, "loss": 0.0352, "step": 5180 }, { "epoch": 276.8, "learning_rate": 2.351851851851852e-05, "loss": 0.0526, "step": 5190 }, { "epoch": 276.96, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.87931489944458, "eval_runtime": 4.5397, "eval_samples_per_second": 132.166, "eval_steps_per_second": 4.185, "step": 5193 }, { "epoch": 277.33, "learning_rate": 2.345679012345679e-05, "loss": 0.0406, "step": 5200 }, { "epoch": 277.87, "learning_rate": 2.3395061728395062e-05, "loss": 0.0503, "step": 5210 }, { "epoch": 277.97, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.9059576988220215, "eval_runtime": 4.5301, "eval_samples_per_second": 132.446, "eval_steps_per_second": 4.194, "step": 5212 }, { "epoch": 278.4, "learning_rate": 2.3333333333333336e-05, "loss": 0.043, "step": 5220 }, { "epoch": 278.93, "learning_rate": 2.3271604938271606e-05, "loss": 0.0414, "step": 5230 }, { "epoch": 278.99, "eval_accuracy": 0.24, "eval_loss": 4.86867094039917, "eval_runtime": 4.4935, "eval_samples_per_second": 133.527, "eval_steps_per_second": 4.228, "step": 5231 }, { "epoch": 279.47, "learning_rate": 2.3209876543209877e-05, "loss": 0.0561, "step": 5240 }, { "epoch": 280.0, "learning_rate": 2.314814814814815e-05, "loss": 0.0361, "step": 5250 }, { "epoch": 280.0, "eval_accuracy": 0.24, "eval_loss": 4.853731155395508, "eval_runtime": 4.5103, "eval_samples_per_second": 133.029, "eval_steps_per_second": 4.213, "step": 5250 }, { "epoch": 280.53, "learning_rate": 2.308641975308642e-05, "loss": 0.0449, "step": 5260 }, { "epoch": 280.96, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.8204240798950195, "eval_runtime": 4.5205, "eval_samples_per_second": 132.729, "eval_steps_per_second": 4.203, "step": 5268 }, { "epoch": 281.07, "learning_rate": 2.302469135802469e-05, "loss": 0.0527, "step": 5270 }, { "epoch": 281.6, "learning_rate": 2.2962962962962965e-05, "loss": 0.0596, "step": 5280 }, { "epoch": 281.97, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.803044319152832, "eval_runtime": 4.5651, "eval_samples_per_second": 131.433, "eval_steps_per_second": 4.162, "step": 5287 }, { "epoch": 282.13, "learning_rate": 2.2901234567901235e-05, "loss": 0.056, "step": 5290 }, { "epoch": 282.67, "learning_rate": 2.2839506172839506e-05, "loss": 0.0494, "step": 5300 }, { "epoch": 282.99, "eval_accuracy": 0.24833333333333332, "eval_loss": 4.8059892654418945, "eval_runtime": 4.5556, "eval_samples_per_second": 131.706, "eval_steps_per_second": 4.171, "step": 5306 }, { "epoch": 283.2, "learning_rate": 2.277777777777778e-05, "loss": 0.0311, "step": 5310 }, { "epoch": 283.73, "learning_rate": 2.271604938271605e-05, "loss": 0.0483, "step": 5320 }, { "epoch": 284.0, "eval_accuracy": 0.235, "eval_loss": 4.7877960205078125, "eval_runtime": 4.5534, "eval_samples_per_second": 131.77, "eval_steps_per_second": 4.173, "step": 5325 }, { "epoch": 284.27, "learning_rate": 2.2654320987654324e-05, "loss": 0.0606, "step": 5330 }, { "epoch": 284.8, "learning_rate": 2.2592592592592594e-05, "loss": 0.0338, "step": 5340 }, { "epoch": 284.96, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.825440406799316, "eval_runtime": 4.5446, "eval_samples_per_second": 132.024, "eval_steps_per_second": 4.181, "step": 5343 }, { "epoch": 285.33, "learning_rate": 2.2530864197530865e-05, "loss": 0.0454, "step": 5350 }, { "epoch": 285.87, "learning_rate": 2.246913580246914e-05, "loss": 0.0319, "step": 5360 }, { "epoch": 285.97, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.826366424560547, "eval_runtime": 4.5558, "eval_samples_per_second": 131.701, "eval_steps_per_second": 4.171, "step": 5362 }, { "epoch": 286.4, "learning_rate": 2.240740740740741e-05, "loss": 0.0414, "step": 5370 }, { "epoch": 286.93, "learning_rate": 2.234567901234568e-05, "loss": 0.0454, "step": 5380 }, { "epoch": 286.99, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.842591762542725, "eval_runtime": 4.55, "eval_samples_per_second": 131.868, "eval_steps_per_second": 4.176, "step": 5381 }, { "epoch": 287.47, "learning_rate": 2.2283950617283953e-05, "loss": 0.0426, "step": 5390 }, { "epoch": 288.0, "learning_rate": 2.2222222222222223e-05, "loss": 0.0409, "step": 5400 }, { "epoch": 288.0, "eval_accuracy": 0.24833333333333332, "eval_loss": 4.819784164428711, "eval_runtime": 4.5681, "eval_samples_per_second": 131.345, "eval_steps_per_second": 4.159, "step": 5400 }, { "epoch": 288.53, "learning_rate": 2.2160493827160494e-05, "loss": 0.0435, "step": 5410 }, { "epoch": 288.96, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.8339433670043945, "eval_runtime": 4.5995, "eval_samples_per_second": 130.45, "eval_steps_per_second": 4.131, "step": 5418 }, { "epoch": 289.07, "learning_rate": 2.2098765432098767e-05, "loss": 0.0452, "step": 5420 }, { "epoch": 289.6, "learning_rate": 2.2037037037037038e-05, "loss": 0.0498, "step": 5430 }, { "epoch": 289.97, "eval_accuracy": 0.225, "eval_loss": 4.838677883148193, "eval_runtime": 4.6181, "eval_samples_per_second": 129.923, "eval_steps_per_second": 4.114, "step": 5437 }, { "epoch": 290.13, "learning_rate": 2.1975308641975308e-05, "loss": 0.0467, "step": 5440 }, { "epoch": 290.67, "learning_rate": 2.1913580246913582e-05, "loss": 0.0447, "step": 5450 }, { "epoch": 290.99, "eval_accuracy": 0.23, "eval_loss": 4.834191799163818, "eval_runtime": 4.5905, "eval_samples_per_second": 130.706, "eval_steps_per_second": 4.139, "step": 5456 }, { "epoch": 291.2, "learning_rate": 2.1851851851851852e-05, "loss": 0.0441, "step": 5460 }, { "epoch": 291.73, "learning_rate": 2.1790123456790123e-05, "loss": 0.0402, "step": 5470 }, { "epoch": 292.0, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.849569320678711, "eval_runtime": 4.5792, "eval_samples_per_second": 131.028, "eval_steps_per_second": 4.149, "step": 5475 }, { "epoch": 292.27, "learning_rate": 2.1728395061728397e-05, "loss": 0.0314, "step": 5480 }, { "epoch": 292.8, "learning_rate": 2.1666666666666667e-05, "loss": 0.0366, "step": 5490 }, { "epoch": 292.96, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.867130279541016, "eval_runtime": 4.5608, "eval_samples_per_second": 131.555, "eval_steps_per_second": 4.166, "step": 5493 }, { "epoch": 293.33, "learning_rate": 2.1604938271604937e-05, "loss": 0.0388, "step": 5500 }, { "epoch": 293.87, "learning_rate": 2.154320987654321e-05, "loss": 0.0369, "step": 5510 }, { "epoch": 293.97, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.836596488952637, "eval_runtime": 4.583, "eval_samples_per_second": 130.917, "eval_steps_per_second": 4.146, "step": 5512 }, { "epoch": 294.4, "learning_rate": 2.148148148148148e-05, "loss": 0.0651, "step": 5520 }, { "epoch": 294.93, "learning_rate": 2.1419753086419755e-05, "loss": 0.0361, "step": 5530 }, { "epoch": 294.99, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.799242973327637, "eval_runtime": 4.6324, "eval_samples_per_second": 129.522, "eval_steps_per_second": 4.102, "step": 5531 }, { "epoch": 295.47, "learning_rate": 2.1358024691358026e-05, "loss": 0.0549, "step": 5540 }, { "epoch": 296.0, "learning_rate": 2.1296296296296296e-05, "loss": 0.0448, "step": 5550 }, { "epoch": 296.0, "eval_accuracy": 0.22666666666666666, "eval_loss": 4.848645210266113, "eval_runtime": 4.6425, "eval_samples_per_second": 129.241, "eval_steps_per_second": 4.093, "step": 5550 }, { "epoch": 296.53, "learning_rate": 2.123456790123457e-05, "loss": 0.055, "step": 5560 }, { "epoch": 296.96, "eval_accuracy": 0.22666666666666666, "eval_loss": 4.897942066192627, "eval_runtime": 4.5933, "eval_samples_per_second": 130.624, "eval_steps_per_second": 4.136, "step": 5568 }, { "epoch": 297.07, "learning_rate": 2.117283950617284e-05, "loss": 0.0427, "step": 5570 }, { "epoch": 297.6, "learning_rate": 2.111111111111111e-05, "loss": 0.0585, "step": 5580 }, { "epoch": 297.97, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.866022109985352, "eval_runtime": 4.6056, "eval_samples_per_second": 130.276, "eval_steps_per_second": 4.125, "step": 5587 }, { "epoch": 298.13, "learning_rate": 2.1049382716049385e-05, "loss": 0.0478, "step": 5590 }, { "epoch": 298.67, "learning_rate": 2.0987654320987655e-05, "loss": 0.0477, "step": 5600 }, { "epoch": 298.99, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.8717451095581055, "eval_runtime": 4.6241, "eval_samples_per_second": 129.756, "eval_steps_per_second": 4.109, "step": 5606 }, { "epoch": 299.2, "learning_rate": 2.0925925925925925e-05, "loss": 0.0515, "step": 5610 }, { "epoch": 299.73, "learning_rate": 2.08641975308642e-05, "loss": 0.0247, "step": 5620 }, { "epoch": 300.0, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.883845806121826, "eval_runtime": 4.6098, "eval_samples_per_second": 130.158, "eval_steps_per_second": 4.122, "step": 5625 }, { "epoch": 300.27, "learning_rate": 2.0802469135802473e-05, "loss": 0.0438, "step": 5630 }, { "epoch": 300.8, "learning_rate": 2.074074074074074e-05, "loss": 0.047, "step": 5640 }, { "epoch": 300.96, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.824845314025879, "eval_runtime": 4.5984, "eval_samples_per_second": 130.482, "eval_steps_per_second": 4.132, "step": 5643 }, { "epoch": 301.33, "learning_rate": 2.0679012345679014e-05, "loss": 0.0497, "step": 5650 }, { "epoch": 301.87, "learning_rate": 2.0617283950617287e-05, "loss": 0.0608, "step": 5660 }, { "epoch": 301.97, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.832959175109863, "eval_runtime": 4.6004, "eval_samples_per_second": 130.422, "eval_steps_per_second": 4.13, "step": 5662 }, { "epoch": 302.4, "learning_rate": 2.0555555555555555e-05, "loss": 0.0413, "step": 5670 }, { "epoch": 302.93, "learning_rate": 2.0493827160493828e-05, "loss": 0.0417, "step": 5680 }, { "epoch": 302.99, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.823630332946777, "eval_runtime": 4.5991, "eval_samples_per_second": 130.462, "eval_steps_per_second": 4.131, "step": 5681 }, { "epoch": 303.47, "learning_rate": 2.0432098765432102e-05, "loss": 0.0329, "step": 5690 }, { "epoch": 304.0, "learning_rate": 2.037037037037037e-05, "loss": 0.0494, "step": 5700 }, { "epoch": 304.0, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.807046413421631, "eval_runtime": 4.6151, "eval_samples_per_second": 130.008, "eval_steps_per_second": 4.117, "step": 5700 }, { "epoch": 304.53, "learning_rate": 2.0308641975308643e-05, "loss": 0.0316, "step": 5710 }, { "epoch": 304.96, "eval_accuracy": 0.22666666666666666, "eval_loss": 4.821282386779785, "eval_runtime": 4.6056, "eval_samples_per_second": 130.275, "eval_steps_per_second": 4.125, "step": 5718 }, { "epoch": 305.07, "learning_rate": 2.0246913580246917e-05, "loss": 0.0473, "step": 5720 }, { "epoch": 305.6, "learning_rate": 2.0185185185185187e-05, "loss": 0.0421, "step": 5730 }, { "epoch": 305.97, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.863409996032715, "eval_runtime": 4.6418, "eval_samples_per_second": 129.259, "eval_steps_per_second": 4.093, "step": 5737 }, { "epoch": 306.13, "learning_rate": 2.0123456790123457e-05, "loss": 0.0382, "step": 5740 }, { "epoch": 306.67, "learning_rate": 2.006172839506173e-05, "loss": 0.0411, "step": 5750 }, { "epoch": 306.99, "eval_accuracy": 0.24, "eval_loss": 4.877004623413086, "eval_runtime": 4.6583, "eval_samples_per_second": 128.802, "eval_steps_per_second": 4.079, "step": 5756 }, { "epoch": 307.2, "learning_rate": 2e-05, "loss": 0.042, "step": 5760 }, { "epoch": 307.73, "learning_rate": 1.9938271604938272e-05, "loss": 0.0404, "step": 5770 }, { "epoch": 308.0, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.9029860496521, "eval_runtime": 4.6363, "eval_samples_per_second": 129.412, "eval_steps_per_second": 4.098, "step": 5775 }, { "epoch": 308.27, "learning_rate": 1.9876543209876546e-05, "loss": 0.0401, "step": 5780 }, { "epoch": 308.8, "learning_rate": 1.9814814814814816e-05, "loss": 0.0397, "step": 5790 }, { "epoch": 308.96, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.943274974822998, "eval_runtime": 4.6219, "eval_samples_per_second": 129.817, "eval_steps_per_second": 4.111, "step": 5793 }, { "epoch": 309.33, "learning_rate": 1.9753086419753087e-05, "loss": 0.0467, "step": 5800 }, { "epoch": 309.87, "learning_rate": 1.969135802469136e-05, "loss": 0.053, "step": 5810 }, { "epoch": 309.97, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.930065631866455, "eval_runtime": 4.6916, "eval_samples_per_second": 127.887, "eval_steps_per_second": 4.05, "step": 5812 }, { "epoch": 310.4, "learning_rate": 1.962962962962963e-05, "loss": 0.0459, "step": 5820 }, { "epoch": 310.93, "learning_rate": 1.95679012345679e-05, "loss": 0.0303, "step": 5830 }, { "epoch": 310.99, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.8961286544799805, "eval_runtime": 4.6651, "eval_samples_per_second": 128.614, "eval_steps_per_second": 4.073, "step": 5831 }, { "epoch": 311.47, "learning_rate": 1.950617283950617e-05, "loss": 0.0314, "step": 5840 }, { "epoch": 312.0, "learning_rate": 1.9444444444444445e-05, "loss": 0.0369, "step": 5850 }, { "epoch": 312.0, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.856044292449951, "eval_runtime": 4.6488, "eval_samples_per_second": 129.067, "eval_steps_per_second": 4.087, "step": 5850 }, { "epoch": 312.53, "learning_rate": 1.938271604938272e-05, "loss": 0.0423, "step": 5860 }, { "epoch": 312.96, "eval_accuracy": 0.225, "eval_loss": 4.917734146118164, "eval_runtime": 4.6571, "eval_samples_per_second": 128.835, "eval_steps_per_second": 4.08, "step": 5868 }, { "epoch": 313.07, "learning_rate": 1.9320987654320986e-05, "loss": 0.03, "step": 5870 }, { "epoch": 313.6, "learning_rate": 1.925925925925926e-05, "loss": 0.0343, "step": 5880 }, { "epoch": 313.97, "eval_accuracy": 0.22333333333333333, "eval_loss": 4.892765522003174, "eval_runtime": 4.6848, "eval_samples_per_second": 128.074, "eval_steps_per_second": 4.056, "step": 5887 }, { "epoch": 314.13, "learning_rate": 1.9197530864197534e-05, "loss": 0.0299, "step": 5890 }, { "epoch": 314.67, "learning_rate": 1.91358024691358e-05, "loss": 0.0216, "step": 5900 }, { "epoch": 314.99, "eval_accuracy": 0.23, "eval_loss": 4.895847320556641, "eval_runtime": 4.6722, "eval_samples_per_second": 128.42, "eval_steps_per_second": 4.067, "step": 5906 }, { "epoch": 315.2, "learning_rate": 1.9074074074074075e-05, "loss": 0.0604, "step": 5910 }, { "epoch": 315.73, "learning_rate": 1.901234567901235e-05, "loss": 0.0287, "step": 5920 }, { "epoch": 316.0, "eval_accuracy": 0.235, "eval_loss": 4.880258083343506, "eval_runtime": 4.6757, "eval_samples_per_second": 128.323, "eval_steps_per_second": 4.064, "step": 5925 }, { "epoch": 316.27, "learning_rate": 1.8950617283950615e-05, "loss": 0.0269, "step": 5930 }, { "epoch": 316.8, "learning_rate": 1.888888888888889e-05, "loss": 0.0286, "step": 5940 }, { "epoch": 316.96, "eval_accuracy": 0.23, "eval_loss": 4.86151123046875, "eval_runtime": 4.6507, "eval_samples_per_second": 129.012, "eval_steps_per_second": 4.085, "step": 5943 }, { "epoch": 317.33, "learning_rate": 1.8827160493827163e-05, "loss": 0.0478, "step": 5950 }, { "epoch": 317.87, "learning_rate": 1.8765432098765433e-05, "loss": 0.0304, "step": 5960 }, { "epoch": 317.97, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.873566150665283, "eval_runtime": 4.6681, "eval_samples_per_second": 128.532, "eval_steps_per_second": 4.07, "step": 5962 }, { "epoch": 318.4, "learning_rate": 1.8703703703703704e-05, "loss": 0.0346, "step": 5970 }, { "epoch": 318.93, "learning_rate": 1.8641975308641977e-05, "loss": 0.0486, "step": 5980 }, { "epoch": 318.99, "eval_accuracy": 0.22333333333333333, "eval_loss": 4.882538318634033, "eval_runtime": 4.6432, "eval_samples_per_second": 129.222, "eval_steps_per_second": 4.092, "step": 5981 }, { "epoch": 319.47, "learning_rate": 1.8580246913580248e-05, "loss": 0.0391, "step": 5990 }, { "epoch": 320.0, "learning_rate": 1.8518518518518518e-05, "loss": 0.0404, "step": 6000 }, { "epoch": 320.0, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.861847400665283, "eval_runtime": 4.6741, "eval_samples_per_second": 128.368, "eval_steps_per_second": 4.065, "step": 6000 }, { "epoch": 320.53, "learning_rate": 1.8456790123456792e-05, "loss": 0.0439, "step": 6010 }, { "epoch": 320.96, "eval_accuracy": 0.23, "eval_loss": 4.884802341461182, "eval_runtime": 4.66, "eval_samples_per_second": 128.756, "eval_steps_per_second": 4.077, "step": 6018 }, { "epoch": 321.07, "learning_rate": 1.8395061728395062e-05, "loss": 0.0387, "step": 6020 }, { "epoch": 321.6, "learning_rate": 1.8333333333333333e-05, "loss": 0.0428, "step": 6030 }, { "epoch": 321.97, "eval_accuracy": 0.22666666666666666, "eval_loss": 4.897517681121826, "eval_runtime": 4.6751, "eval_samples_per_second": 128.34, "eval_steps_per_second": 4.064, "step": 6037 }, { "epoch": 322.13, "learning_rate": 1.8271604938271607e-05, "loss": 0.0261, "step": 6040 }, { "epoch": 322.67, "learning_rate": 1.8209876543209877e-05, "loss": 0.0498, "step": 6050 }, { "epoch": 322.99, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.8614420890808105, "eval_runtime": 4.6819, "eval_samples_per_second": 128.153, "eval_steps_per_second": 4.058, "step": 6056 }, { "epoch": 323.2, "learning_rate": 1.814814814814815e-05, "loss": 0.0403, "step": 6060 }, { "epoch": 323.73, "learning_rate": 1.808641975308642e-05, "loss": 0.0314, "step": 6070 }, { "epoch": 324.0, "eval_accuracy": 0.235, "eval_loss": 4.871830463409424, "eval_runtime": 4.6693, "eval_samples_per_second": 128.5, "eval_steps_per_second": 4.069, "step": 6075 }, { "epoch": 324.27, "learning_rate": 1.802469135802469e-05, "loss": 0.0365, "step": 6080 }, { "epoch": 324.8, "learning_rate": 1.7962962962962965e-05, "loss": 0.0334, "step": 6090 }, { "epoch": 324.96, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.902100563049316, "eval_runtime": 4.7082, "eval_samples_per_second": 127.436, "eval_steps_per_second": 4.035, "step": 6093 }, { "epoch": 325.33, "learning_rate": 1.7901234567901236e-05, "loss": 0.0446, "step": 6100 }, { "epoch": 325.87, "learning_rate": 1.7839506172839506e-05, "loss": 0.0431, "step": 6110 }, { "epoch": 325.97, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.897326946258545, "eval_runtime": 4.7241, "eval_samples_per_second": 127.008, "eval_steps_per_second": 4.022, "step": 6112 }, { "epoch": 326.4, "learning_rate": 1.777777777777778e-05, "loss": 0.0316, "step": 6120 }, { "epoch": 326.93, "learning_rate": 1.771604938271605e-05, "loss": 0.0473, "step": 6130 }, { "epoch": 326.99, "eval_accuracy": 0.24, "eval_loss": 4.867129802703857, "eval_runtime": 4.6999, "eval_samples_per_second": 127.662, "eval_steps_per_second": 4.043, "step": 6131 }, { "epoch": 327.47, "learning_rate": 1.765432098765432e-05, "loss": 0.0365, "step": 6140 }, { "epoch": 328.0, "learning_rate": 1.7592592592592595e-05, "loss": 0.0348, "step": 6150 }, { "epoch": 328.0, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.905031204223633, "eval_runtime": 4.6993, "eval_samples_per_second": 127.679, "eval_steps_per_second": 4.043, "step": 6150 }, { "epoch": 328.53, "learning_rate": 1.7530864197530865e-05, "loss": 0.0718, "step": 6160 }, { "epoch": 328.96, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.886887073516846, "eval_runtime": 4.7046, "eval_samples_per_second": 127.535, "eval_steps_per_second": 4.039, "step": 6168 }, { "epoch": 329.07, "learning_rate": 1.7469135802469135e-05, "loss": 0.0418, "step": 6170 }, { "epoch": 329.6, "learning_rate": 1.740740740740741e-05, "loss": 0.0387, "step": 6180 }, { "epoch": 329.97, "eval_accuracy": 0.245, "eval_loss": 4.855226039886475, "eval_runtime": 4.7051, "eval_samples_per_second": 127.52, "eval_steps_per_second": 4.038, "step": 6187 }, { "epoch": 330.13, "learning_rate": 1.734567901234568e-05, "loss": 0.032, "step": 6190 }, { "epoch": 330.67, "learning_rate": 1.728395061728395e-05, "loss": 0.0335, "step": 6200 }, { "epoch": 330.99, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.893190860748291, "eval_runtime": 4.7105, "eval_samples_per_second": 127.376, "eval_steps_per_second": 4.034, "step": 6206 }, { "epoch": 331.2, "learning_rate": 1.7222222222222224e-05, "loss": 0.0271, "step": 6210 }, { "epoch": 331.73, "learning_rate": 1.7160493827160494e-05, "loss": 0.0355, "step": 6220 }, { "epoch": 332.0, "eval_accuracy": 0.245, "eval_loss": 4.919488906860352, "eval_runtime": 4.7258, "eval_samples_per_second": 126.963, "eval_steps_per_second": 4.02, "step": 6225 }, { "epoch": 332.27, "learning_rate": 1.7098765432098765e-05, "loss": 0.038, "step": 6230 }, { "epoch": 332.8, "learning_rate": 1.7037037037037038e-05, "loss": 0.0407, "step": 6240 }, { "epoch": 332.96, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.9162702560424805, "eval_runtime": 4.7131, "eval_samples_per_second": 127.305, "eval_steps_per_second": 4.031, "step": 6243 }, { "epoch": 333.33, "learning_rate": 1.697530864197531e-05, "loss": 0.0434, "step": 6250 }, { "epoch": 333.87, "learning_rate": 1.6913580246913582e-05, "loss": 0.0471, "step": 6260 }, { "epoch": 333.97, "eval_accuracy": 0.225, "eval_loss": 4.885989189147949, "eval_runtime": 4.7137, "eval_samples_per_second": 127.289, "eval_steps_per_second": 4.031, "step": 6262 }, { "epoch": 334.4, "learning_rate": 1.6851851851851853e-05, "loss": 0.0336, "step": 6270 }, { "epoch": 334.93, "learning_rate": 1.6790123456790123e-05, "loss": 0.0334, "step": 6280 }, { "epoch": 334.99, "eval_accuracy": 0.235, "eval_loss": 4.894328594207764, "eval_runtime": 4.6988, "eval_samples_per_second": 127.693, "eval_steps_per_second": 4.044, "step": 6281 }, { "epoch": 335.47, "learning_rate": 1.6728395061728397e-05, "loss": 0.0441, "step": 6290 }, { "epoch": 336.0, "learning_rate": 1.6666666666666667e-05, "loss": 0.0301, "step": 6300 }, { "epoch": 336.0, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.922252178192139, "eval_runtime": 4.7453, "eval_samples_per_second": 126.441, "eval_steps_per_second": 4.004, "step": 6300 }, { "epoch": 336.53, "learning_rate": 1.6604938271604938e-05, "loss": 0.0281, "step": 6310 }, { "epoch": 336.96, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.910050392150879, "eval_runtime": 4.7284, "eval_samples_per_second": 126.892, "eval_steps_per_second": 4.018, "step": 6318 }, { "epoch": 337.07, "learning_rate": 1.654320987654321e-05, "loss": 0.0365, "step": 6320 }, { "epoch": 337.6, "learning_rate": 1.6481481481481482e-05, "loss": 0.0305, "step": 6330 }, { "epoch": 337.97, "eval_accuracy": 0.24, "eval_loss": 4.889711856842041, "eval_runtime": 4.7203, "eval_samples_per_second": 127.111, "eval_steps_per_second": 4.025, "step": 6337 }, { "epoch": 338.13, "learning_rate": 1.6419753086419752e-05, "loss": 0.0542, "step": 6340 }, { "epoch": 338.67, "learning_rate": 1.6358024691358026e-05, "loss": 0.0505, "step": 6350 }, { "epoch": 338.99, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.929032802581787, "eval_runtime": 4.7098, "eval_samples_per_second": 127.395, "eval_steps_per_second": 4.034, "step": 6356 }, { "epoch": 339.2, "learning_rate": 1.62962962962963e-05, "loss": 0.0414, "step": 6360 }, { "epoch": 339.73, "learning_rate": 1.6234567901234567e-05, "loss": 0.024, "step": 6370 }, { "epoch": 340.0, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.944223880767822, "eval_runtime": 4.7276, "eval_samples_per_second": 126.914, "eval_steps_per_second": 4.019, "step": 6375 }, { "epoch": 340.27, "learning_rate": 1.617283950617284e-05, "loss": 0.0267, "step": 6380 }, { "epoch": 340.8, "learning_rate": 1.6111111111111115e-05, "loss": 0.0504, "step": 6390 }, { "epoch": 340.96, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.918275833129883, "eval_runtime": 4.709, "eval_samples_per_second": 127.416, "eval_steps_per_second": 4.035, "step": 6393 }, { "epoch": 341.33, "learning_rate": 1.604938271604938e-05, "loss": 0.0413, "step": 6400 }, { "epoch": 341.87, "learning_rate": 1.5987654320987655e-05, "loss": 0.0259, "step": 6410 }, { "epoch": 341.97, "eval_accuracy": 0.235, "eval_loss": 4.883179187774658, "eval_runtime": 4.734, "eval_samples_per_second": 126.742, "eval_steps_per_second": 4.014, "step": 6412 }, { "epoch": 342.4, "learning_rate": 1.5925925925925926e-05, "loss": 0.0338, "step": 6420 }, { "epoch": 342.93, "learning_rate": 1.5864197530864196e-05, "loss": 0.0313, "step": 6430 }, { "epoch": 342.99, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.895809650421143, "eval_runtime": 4.7766, "eval_samples_per_second": 125.612, "eval_steps_per_second": 3.978, "step": 6431 }, { "epoch": 343.47, "learning_rate": 1.580246913580247e-05, "loss": 0.0213, "step": 6440 }, { "epoch": 344.0, "learning_rate": 1.574074074074074e-05, "loss": 0.0293, "step": 6450 }, { "epoch": 344.0, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.897879123687744, "eval_runtime": 4.8088, "eval_samples_per_second": 124.771, "eval_steps_per_second": 3.951, "step": 6450 }, { "epoch": 344.53, "learning_rate": 1.5679012345679014e-05, "loss": 0.0427, "step": 6460 }, { "epoch": 344.96, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.9055495262146, "eval_runtime": 4.8414, "eval_samples_per_second": 123.93, "eval_steps_per_second": 3.924, "step": 6468 }, { "epoch": 345.07, "learning_rate": 1.5617283950617285e-05, "loss": 0.0344, "step": 6470 }, { "epoch": 345.6, "learning_rate": 1.5555555555555555e-05, "loss": 0.0399, "step": 6480 }, { "epoch": 345.97, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.895743370056152, "eval_runtime": 4.7399, "eval_samples_per_second": 126.584, "eval_steps_per_second": 4.009, "step": 6487 }, { "epoch": 346.13, "learning_rate": 1.549382716049383e-05, "loss": 0.0253, "step": 6490 }, { "epoch": 346.67, "learning_rate": 1.54320987654321e-05, "loss": 0.0273, "step": 6500 }, { "epoch": 346.99, "eval_accuracy": 0.24, "eval_loss": 4.8988518714904785, "eval_runtime": 4.7852, "eval_samples_per_second": 125.387, "eval_steps_per_second": 3.971, "step": 6506 }, { "epoch": 347.2, "learning_rate": 1.537037037037037e-05, "loss": 0.0261, "step": 6510 }, { "epoch": 347.73, "learning_rate": 1.5308641975308643e-05, "loss": 0.0388, "step": 6520 }, { "epoch": 348.0, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.9087018966674805, "eval_runtime": 4.7683, "eval_samples_per_second": 125.83, "eval_steps_per_second": 3.985, "step": 6525 }, { "epoch": 348.27, "learning_rate": 1.5246913580246914e-05, "loss": 0.0323, "step": 6530 }, { "epoch": 348.8, "learning_rate": 1.5185185185185186e-05, "loss": 0.0306, "step": 6540 }, { "epoch": 348.96, "eval_accuracy": 0.22833333333333333, "eval_loss": 4.926441669464111, "eval_runtime": 4.7772, "eval_samples_per_second": 125.598, "eval_steps_per_second": 3.977, "step": 6543 }, { "epoch": 349.33, "learning_rate": 1.5123456790123458e-05, "loss": 0.0249, "step": 6550 }, { "epoch": 349.87, "learning_rate": 1.506172839506173e-05, "loss": 0.0411, "step": 6560 }, { "epoch": 349.97, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.921908855438232, "eval_runtime": 4.7531, "eval_samples_per_second": 126.234, "eval_steps_per_second": 3.997, "step": 6562 }, { "epoch": 350.4, "learning_rate": 1.5e-05, "loss": 0.031, "step": 6570 }, { "epoch": 350.93, "learning_rate": 1.4938271604938272e-05, "loss": 0.0394, "step": 6580 }, { "epoch": 350.99, "eval_accuracy": 0.24, "eval_loss": 4.89980936050415, "eval_runtime": 4.7836, "eval_samples_per_second": 125.428, "eval_steps_per_second": 3.972, "step": 6581 }, { "epoch": 351.47, "learning_rate": 1.4876543209876545e-05, "loss": 0.0465, "step": 6590 }, { "epoch": 352.0, "learning_rate": 1.4814814814814815e-05, "loss": 0.0507, "step": 6600 }, { "epoch": 352.0, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.930387496948242, "eval_runtime": 4.8303, "eval_samples_per_second": 124.216, "eval_steps_per_second": 3.934, "step": 6600 }, { "epoch": 352.53, "learning_rate": 1.4753086419753087e-05, "loss": 0.0263, "step": 6610 }, { "epoch": 352.96, "eval_accuracy": 0.23, "eval_loss": 4.923248767852783, "eval_runtime": 4.822, "eval_samples_per_second": 124.429, "eval_steps_per_second": 3.94, "step": 6618 }, { "epoch": 353.07, "learning_rate": 1.4691358024691359e-05, "loss": 0.0355, "step": 6620 }, { "epoch": 353.6, "learning_rate": 1.462962962962963e-05, "loss": 0.0395, "step": 6630 }, { "epoch": 353.97, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.92411470413208, "eval_runtime": 4.7975, "eval_samples_per_second": 125.066, "eval_steps_per_second": 3.96, "step": 6637 }, { "epoch": 354.13, "learning_rate": 1.4567901234567902e-05, "loss": 0.0258, "step": 6640 }, { "epoch": 354.67, "learning_rate": 1.4506172839506174e-05, "loss": 0.0394, "step": 6650 }, { "epoch": 354.99, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.926273345947266, "eval_runtime": 4.7995, "eval_samples_per_second": 125.013, "eval_steps_per_second": 3.959, "step": 6656 }, { "epoch": 355.2, "learning_rate": 1.4444444444444444e-05, "loss": 0.0345, "step": 6660 }, { "epoch": 355.73, "learning_rate": 1.4382716049382716e-05, "loss": 0.0391, "step": 6670 }, { "epoch": 356.0, "eval_accuracy": 0.26, "eval_loss": 4.927285671234131, "eval_runtime": 4.819, "eval_samples_per_second": 124.507, "eval_steps_per_second": 3.943, "step": 6675 }, { "epoch": 356.27, "learning_rate": 1.4320987654320988e-05, "loss": 0.0274, "step": 6680 }, { "epoch": 356.8, "learning_rate": 1.425925925925926e-05, "loss": 0.0647, "step": 6690 }, { "epoch": 356.96, "eval_accuracy": 0.2633333333333333, "eval_loss": 4.903398513793945, "eval_runtime": 4.8546, "eval_samples_per_second": 123.593, "eval_steps_per_second": 3.914, "step": 6693 }, { "epoch": 357.33, "learning_rate": 1.419753086419753e-05, "loss": 0.0427, "step": 6700 }, { "epoch": 357.87, "learning_rate": 1.4135802469135803e-05, "loss": 0.038, "step": 6710 }, { "epoch": 357.97, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.891026496887207, "eval_runtime": 4.8918, "eval_samples_per_second": 122.655, "eval_steps_per_second": 3.884, "step": 6712 }, { "epoch": 358.4, "learning_rate": 1.4074074074074075e-05, "loss": 0.0254, "step": 6720 }, { "epoch": 358.93, "learning_rate": 1.4012345679012345e-05, "loss": 0.0368, "step": 6730 }, { "epoch": 358.99, "eval_accuracy": 0.245, "eval_loss": 4.883033275604248, "eval_runtime": 4.8388, "eval_samples_per_second": 123.997, "eval_steps_per_second": 3.927, "step": 6731 }, { "epoch": 359.47, "learning_rate": 1.3950617283950617e-05, "loss": 0.0278, "step": 6740 }, { "epoch": 360.0, "learning_rate": 1.388888888888889e-05, "loss": 0.0308, "step": 6750 }, { "epoch": 360.0, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.886683940887451, "eval_runtime": 4.8656, "eval_samples_per_second": 123.316, "eval_steps_per_second": 3.905, "step": 6750 }, { "epoch": 360.53, "learning_rate": 1.382716049382716e-05, "loss": 0.0346, "step": 6760 }, { "epoch": 360.96, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.86568021774292, "eval_runtime": 4.8606, "eval_samples_per_second": 123.442, "eval_steps_per_second": 3.909, "step": 6768 }, { "epoch": 361.07, "learning_rate": 1.3765432098765432e-05, "loss": 0.0254, "step": 6770 }, { "epoch": 361.6, "learning_rate": 1.3703703703703704e-05, "loss": 0.0279, "step": 6780 }, { "epoch": 361.97, "eval_accuracy": 0.24, "eval_loss": 4.8677825927734375, "eval_runtime": 4.8214, "eval_samples_per_second": 124.446, "eval_steps_per_second": 3.941, "step": 6787 }, { "epoch": 362.13, "learning_rate": 1.3641975308641978e-05, "loss": 0.0367, "step": 6790 }, { "epoch": 362.67, "learning_rate": 1.3580246913580247e-05, "loss": 0.0443, "step": 6800 }, { "epoch": 362.99, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.872296333312988, "eval_runtime": 4.8453, "eval_samples_per_second": 123.832, "eval_steps_per_second": 3.921, "step": 6806 }, { "epoch": 363.2, "learning_rate": 1.3518518518518519e-05, "loss": 0.0224, "step": 6810 }, { "epoch": 363.73, "learning_rate": 1.3456790123456793e-05, "loss": 0.027, "step": 6820 }, { "epoch": 364.0, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.875555515289307, "eval_runtime": 4.8571, "eval_samples_per_second": 123.529, "eval_steps_per_second": 3.912, "step": 6825 }, { "epoch": 364.27, "learning_rate": 1.3395061728395061e-05, "loss": 0.0394, "step": 6830 }, { "epoch": 364.8, "learning_rate": 1.3333333333333333e-05, "loss": 0.0447, "step": 6840 }, { "epoch": 364.96, "eval_accuracy": 0.235, "eval_loss": 4.874227046966553, "eval_runtime": 4.8623, "eval_samples_per_second": 123.397, "eval_steps_per_second": 3.908, "step": 6843 }, { "epoch": 365.33, "learning_rate": 1.3271604938271605e-05, "loss": 0.0402, "step": 6850 }, { "epoch": 365.87, "learning_rate": 1.3209876543209876e-05, "loss": 0.028, "step": 6860 }, { "epoch": 365.97, "eval_accuracy": 0.235, "eval_loss": 4.904233455657959, "eval_runtime": 4.8676, "eval_samples_per_second": 123.264, "eval_steps_per_second": 3.903, "step": 6862 }, { "epoch": 366.4, "learning_rate": 1.3148148148148148e-05, "loss": 0.0306, "step": 6870 }, { "epoch": 366.93, "learning_rate": 1.308641975308642e-05, "loss": 0.0483, "step": 6880 }, { "epoch": 366.99, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.908579349517822, "eval_runtime": 4.8554, "eval_samples_per_second": 123.573, "eval_steps_per_second": 3.913, "step": 6881 }, { "epoch": 367.47, "learning_rate": 1.3024691358024694e-05, "loss": 0.0321, "step": 6890 }, { "epoch": 368.0, "learning_rate": 1.2962962962962962e-05, "loss": 0.034, "step": 6900 }, { "epoch": 368.0, "eval_accuracy": 0.24, "eval_loss": 4.888582229614258, "eval_runtime": 4.9075, "eval_samples_per_second": 122.261, "eval_steps_per_second": 3.872, "step": 6900 }, { "epoch": 368.53, "learning_rate": 1.2901234567901235e-05, "loss": 0.0363, "step": 6910 }, { "epoch": 368.96, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.877806186676025, "eval_runtime": 4.9146, "eval_samples_per_second": 122.085, "eval_steps_per_second": 3.866, "step": 6918 }, { "epoch": 369.07, "learning_rate": 1.2839506172839508e-05, "loss": 0.0401, "step": 6920 }, { "epoch": 369.6, "learning_rate": 1.2777777777777777e-05, "loss": 0.0417, "step": 6930 }, { "epoch": 369.97, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.905084609985352, "eval_runtime": 4.8859, "eval_samples_per_second": 122.802, "eval_steps_per_second": 3.889, "step": 6937 }, { "epoch": 370.13, "learning_rate": 1.2716049382716049e-05, "loss": 0.0338, "step": 6940 }, { "epoch": 370.67, "learning_rate": 1.2654320987654323e-05, "loss": 0.0326, "step": 6950 }, { "epoch": 370.99, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.911184310913086, "eval_runtime": 4.8779, "eval_samples_per_second": 123.003, "eval_steps_per_second": 3.895, "step": 6956 }, { "epoch": 371.2, "learning_rate": 1.2592592592592592e-05, "loss": 0.0261, "step": 6960 }, { "epoch": 371.73, "learning_rate": 1.2530864197530864e-05, "loss": 0.028, "step": 6970 }, { "epoch": 372.0, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.911579608917236, "eval_runtime": 4.906, "eval_samples_per_second": 122.299, "eval_steps_per_second": 3.873, "step": 6975 }, { "epoch": 372.27, "learning_rate": 1.2469135802469137e-05, "loss": 0.0196, "step": 6980 }, { "epoch": 372.8, "learning_rate": 1.2407407407407408e-05, "loss": 0.0343, "step": 6990 }, { "epoch": 372.96, "eval_accuracy": 0.245, "eval_loss": 4.910400390625, "eval_runtime": 4.8963, "eval_samples_per_second": 122.541, "eval_steps_per_second": 3.88, "step": 6993 }, { "epoch": 373.33, "learning_rate": 1.2345679012345678e-05, "loss": 0.0246, "step": 7000 }, { "epoch": 373.87, "learning_rate": 1.2283950617283952e-05, "loss": 0.0229, "step": 7010 }, { "epoch": 373.97, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.940114974975586, "eval_runtime": 4.8862, "eval_samples_per_second": 122.794, "eval_steps_per_second": 3.888, "step": 7012 }, { "epoch": 374.4, "learning_rate": 1.2222222222222222e-05, "loss": 0.0235, "step": 7020 }, { "epoch": 374.93, "learning_rate": 1.2160493827160495e-05, "loss": 0.0337, "step": 7030 }, { "epoch": 374.99, "eval_accuracy": 0.245, "eval_loss": 4.934114933013916, "eval_runtime": 4.9143, "eval_samples_per_second": 122.092, "eval_steps_per_second": 3.866, "step": 7031 }, { "epoch": 375.47, "learning_rate": 1.2098765432098767e-05, "loss": 0.0334, "step": 7040 }, { "epoch": 376.0, "learning_rate": 1.2037037037037037e-05, "loss": 0.0356, "step": 7050 }, { "epoch": 376.0, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.933629035949707, "eval_runtime": 4.9706, "eval_samples_per_second": 120.709, "eval_steps_per_second": 3.822, "step": 7050 }, { "epoch": 376.53, "learning_rate": 1.1975308641975309e-05, "loss": 0.029, "step": 7060 }, { "epoch": 376.96, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.913231372833252, "eval_runtime": 4.888, "eval_samples_per_second": 122.75, "eval_steps_per_second": 3.887, "step": 7068 }, { "epoch": 377.07, "learning_rate": 1.1913580246913581e-05, "loss": 0.0348, "step": 7070 }, { "epoch": 377.6, "learning_rate": 1.1851851851851853e-05, "loss": 0.0272, "step": 7080 }, { "epoch": 377.97, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.910186290740967, "eval_runtime": 4.9298, "eval_samples_per_second": 121.709, "eval_steps_per_second": 3.854, "step": 7087 }, { "epoch": 378.13, "learning_rate": 1.1790123456790124e-05, "loss": 0.0269, "step": 7090 }, { "epoch": 378.67, "learning_rate": 1.1728395061728396e-05, "loss": 0.0256, "step": 7100 }, { "epoch": 378.99, "eval_accuracy": 0.23166666666666666, "eval_loss": 4.925504207611084, "eval_runtime": 4.948, "eval_samples_per_second": 121.26, "eval_steps_per_second": 3.84, "step": 7106 }, { "epoch": 379.2, "learning_rate": 1.1666666666666668e-05, "loss": 0.0315, "step": 7110 }, { "epoch": 379.73, "learning_rate": 1.1604938271604938e-05, "loss": 0.0276, "step": 7120 }, { "epoch": 380.0, "eval_accuracy": 0.22666666666666666, "eval_loss": 4.928166389465332, "eval_runtime": 4.9278, "eval_samples_per_second": 121.757, "eval_steps_per_second": 3.856, "step": 7125 }, { "epoch": 380.27, "learning_rate": 1.154320987654321e-05, "loss": 0.0286, "step": 7130 }, { "epoch": 380.8, "learning_rate": 1.1481481481481482e-05, "loss": 0.026, "step": 7140 }, { "epoch": 380.96, "eval_accuracy": 0.22, "eval_loss": 4.952660083770752, "eval_runtime": 4.9502, "eval_samples_per_second": 121.208, "eval_steps_per_second": 3.838, "step": 7143 }, { "epoch": 381.33, "learning_rate": 1.1419753086419753e-05, "loss": 0.031, "step": 7150 }, { "epoch": 381.87, "learning_rate": 1.1358024691358025e-05, "loss": 0.0385, "step": 7160 }, { "epoch": 381.97, "eval_accuracy": 0.22166666666666668, "eval_loss": 4.941068649291992, "eval_runtime": 4.9601, "eval_samples_per_second": 120.966, "eval_steps_per_second": 3.831, "step": 7162 }, { "epoch": 382.4, "learning_rate": 1.1296296296296297e-05, "loss": 0.0269, "step": 7170 }, { "epoch": 382.93, "learning_rate": 1.123456790123457e-05, "loss": 0.026, "step": 7180 }, { "epoch": 382.99, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.952951908111572, "eval_runtime": 4.9497, "eval_samples_per_second": 121.22, "eval_steps_per_second": 3.839, "step": 7181 }, { "epoch": 383.47, "learning_rate": 1.117283950617284e-05, "loss": 0.0262, "step": 7190 }, { "epoch": 384.0, "learning_rate": 1.1111111111111112e-05, "loss": 0.0444, "step": 7200 }, { "epoch": 384.0, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.9387335777282715, "eval_runtime": 4.9515, "eval_samples_per_second": 121.176, "eval_steps_per_second": 3.837, "step": 7200 }, { "epoch": 384.53, "learning_rate": 1.1049382716049384e-05, "loss": 0.0369, "step": 7210 }, { "epoch": 384.96, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.904226303100586, "eval_runtime": 4.9498, "eval_samples_per_second": 121.217, "eval_steps_per_second": 3.839, "step": 7218 }, { "epoch": 385.07, "learning_rate": 1.0987654320987654e-05, "loss": 0.0258, "step": 7220 }, { "epoch": 385.6, "learning_rate": 1.0925925925925926e-05, "loss": 0.0203, "step": 7230 }, { "epoch": 385.97, "eval_accuracy": 0.23, "eval_loss": 4.886034965515137, "eval_runtime": 4.9753, "eval_samples_per_second": 120.596, "eval_steps_per_second": 3.819, "step": 7237 }, { "epoch": 386.13, "learning_rate": 1.0864197530864198e-05, "loss": 0.0332, "step": 7240 }, { "epoch": 386.67, "learning_rate": 1.0802469135802469e-05, "loss": 0.0238, "step": 7250 }, { "epoch": 386.99, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.877529144287109, "eval_runtime": 4.9631, "eval_samples_per_second": 120.891, "eval_steps_per_second": 3.828, "step": 7256 }, { "epoch": 387.2, "learning_rate": 1.074074074074074e-05, "loss": 0.0266, "step": 7260 }, { "epoch": 387.73, "learning_rate": 1.0679012345679013e-05, "loss": 0.0315, "step": 7270 }, { "epoch": 388.0, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.864088535308838, "eval_runtime": 4.9534, "eval_samples_per_second": 121.128, "eval_steps_per_second": 3.836, "step": 7275 }, { "epoch": 388.27, "learning_rate": 1.0617283950617285e-05, "loss": 0.0208, "step": 7280 }, { "epoch": 388.8, "learning_rate": 1.0555555555555555e-05, "loss": 0.0349, "step": 7290 }, { "epoch": 388.96, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.86765718460083, "eval_runtime": 4.9642, "eval_samples_per_second": 120.866, "eval_steps_per_second": 3.827, "step": 7293 }, { "epoch": 389.33, "learning_rate": 1.0493827160493827e-05, "loss": 0.0336, "step": 7300 }, { "epoch": 389.87, "learning_rate": 1.04320987654321e-05, "loss": 0.038, "step": 7310 }, { "epoch": 389.97, "eval_accuracy": 0.24, "eval_loss": 4.868815898895264, "eval_runtime": 4.9771, "eval_samples_per_second": 120.552, "eval_steps_per_second": 3.817, "step": 7312 }, { "epoch": 390.4, "learning_rate": 1.037037037037037e-05, "loss": 0.0188, "step": 7320 }, { "epoch": 390.93, "learning_rate": 1.0308641975308644e-05, "loss": 0.0301, "step": 7330 }, { "epoch": 390.99, "eval_accuracy": 0.245, "eval_loss": 4.8932037353515625, "eval_runtime": 4.9575, "eval_samples_per_second": 121.03, "eval_steps_per_second": 3.833, "step": 7331 }, { "epoch": 391.47, "learning_rate": 1.0246913580246914e-05, "loss": 0.0418, "step": 7340 }, { "epoch": 392.0, "learning_rate": 1.0185185185185185e-05, "loss": 0.0363, "step": 7350 }, { "epoch": 392.0, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.902304172515869, "eval_runtime": 4.9867, "eval_samples_per_second": 120.321, "eval_steps_per_second": 3.81, "step": 7350 }, { "epoch": 392.53, "learning_rate": 1.0123456790123458e-05, "loss": 0.0329, "step": 7360 }, { "epoch": 392.96, "eval_accuracy": 0.24, "eval_loss": 4.882464408874512, "eval_runtime": 4.9782, "eval_samples_per_second": 120.525, "eval_steps_per_second": 3.817, "step": 7368 }, { "epoch": 393.07, "learning_rate": 1.0061728395061729e-05, "loss": 0.0255, "step": 7370 }, { "epoch": 393.6, "learning_rate": 1e-05, "loss": 0.0174, "step": 7380 }, { "epoch": 393.97, "eval_accuracy": 0.24, "eval_loss": 4.87109375, "eval_runtime": 4.9821, "eval_samples_per_second": 120.431, "eval_steps_per_second": 3.814, "step": 7387 }, { "epoch": 394.13, "learning_rate": 9.938271604938273e-06, "loss": 0.0256, "step": 7390 }, { "epoch": 394.67, "learning_rate": 9.876543209876543e-06, "loss": 0.0284, "step": 7400 }, { "epoch": 394.99, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.876201152801514, "eval_runtime": 5.0563, "eval_samples_per_second": 118.663, "eval_steps_per_second": 3.758, "step": 7406 }, { "epoch": 395.2, "learning_rate": 9.814814814814815e-06, "loss": 0.0458, "step": 7410 }, { "epoch": 395.73, "learning_rate": 9.753086419753086e-06, "loss": 0.0178, "step": 7420 }, { "epoch": 396.0, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.868426322937012, "eval_runtime": 5.0279, "eval_samples_per_second": 119.333, "eval_steps_per_second": 3.779, "step": 7425 }, { "epoch": 396.27, "learning_rate": 9.69135802469136e-06, "loss": 0.0154, "step": 7430 }, { "epoch": 396.8, "learning_rate": 9.62962962962963e-06, "loss": 0.0359, "step": 7440 }, { "epoch": 396.96, "eval_accuracy": 0.245, "eval_loss": 4.865981578826904, "eval_runtime": 5.0157, "eval_samples_per_second": 119.625, "eval_steps_per_second": 3.788, "step": 7443 }, { "epoch": 397.33, "learning_rate": 9.5679012345679e-06, "loss": 0.0295, "step": 7450 }, { "epoch": 397.87, "learning_rate": 9.506172839506174e-06, "loss": 0.029, "step": 7460 }, { "epoch": 397.97, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.879904270172119, "eval_runtime": 5.0461, "eval_samples_per_second": 118.904, "eval_steps_per_second": 3.765, "step": 7462 }, { "epoch": 398.4, "learning_rate": 9.444444444444445e-06, "loss": 0.0368, "step": 7470 }, { "epoch": 398.93, "learning_rate": 9.382716049382717e-06, "loss": 0.0227, "step": 7480 }, { "epoch": 398.99, "eval_accuracy": 0.25, "eval_loss": 4.884500503540039, "eval_runtime": 5.007, "eval_samples_per_second": 119.833, "eval_steps_per_second": 3.795, "step": 7481 }, { "epoch": 399.47, "learning_rate": 9.320987654320989e-06, "loss": 0.0338, "step": 7490 }, { "epoch": 400.0, "learning_rate": 9.259259259259259e-06, "loss": 0.0135, "step": 7500 }, { "epoch": 400.0, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.889830112457275, "eval_runtime": 5.0086, "eval_samples_per_second": 119.793, "eval_steps_per_second": 3.793, "step": 7500 }, { "epoch": 400.53, "learning_rate": 9.197530864197531e-06, "loss": 0.0297, "step": 7510 }, { "epoch": 400.96, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.896719455718994, "eval_runtime": 5.0281, "eval_samples_per_second": 119.329, "eval_steps_per_second": 3.779, "step": 7518 }, { "epoch": 401.07, "learning_rate": 9.135802469135803e-06, "loss": 0.0364, "step": 7520 }, { "epoch": 401.6, "learning_rate": 9.074074074074075e-06, "loss": 0.0263, "step": 7530 }, { "epoch": 401.97, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.888412952423096, "eval_runtime": 5.0155, "eval_samples_per_second": 119.63, "eval_steps_per_second": 3.788, "step": 7537 }, { "epoch": 402.13, "learning_rate": 9.012345679012346e-06, "loss": 0.035, "step": 7540 }, { "epoch": 402.67, "learning_rate": 8.950617283950618e-06, "loss": 0.0386, "step": 7550 }, { "epoch": 402.99, "eval_accuracy": 0.24, "eval_loss": 4.871886730194092, "eval_runtime": 4.9851, "eval_samples_per_second": 120.358, "eval_steps_per_second": 3.811, "step": 7556 }, { "epoch": 403.2, "learning_rate": 8.88888888888889e-06, "loss": 0.0339, "step": 7560 }, { "epoch": 403.73, "learning_rate": 8.82716049382716e-06, "loss": 0.0298, "step": 7570 }, { "epoch": 404.0, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.860945701599121, "eval_runtime": 5.0553, "eval_samples_per_second": 118.688, "eval_steps_per_second": 3.758, "step": 7575 }, { "epoch": 404.27, "learning_rate": 8.765432098765432e-06, "loss": 0.0318, "step": 7580 }, { "epoch": 404.8, "learning_rate": 8.703703703703705e-06, "loss": 0.0232, "step": 7590 }, { "epoch": 404.96, "eval_accuracy": 0.24833333333333332, "eval_loss": 4.860249996185303, "eval_runtime": 5.0445, "eval_samples_per_second": 118.942, "eval_steps_per_second": 3.767, "step": 7593 }, { "epoch": 405.33, "learning_rate": 8.641975308641975e-06, "loss": 0.0367, "step": 7600 }, { "epoch": 405.87, "learning_rate": 8.580246913580247e-06, "loss": 0.0232, "step": 7610 }, { "epoch": 405.97, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.866739749908447, "eval_runtime": 5.0222, "eval_samples_per_second": 119.47, "eval_steps_per_second": 3.783, "step": 7612 }, { "epoch": 406.4, "learning_rate": 8.518518518518519e-06, "loss": 0.0223, "step": 7620 }, { "epoch": 406.93, "learning_rate": 8.456790123456791e-06, "loss": 0.032, "step": 7630 }, { "epoch": 406.99, "eval_accuracy": 0.24833333333333332, "eval_loss": 4.8684258460998535, "eval_runtime": 5.0294, "eval_samples_per_second": 119.298, "eval_steps_per_second": 3.778, "step": 7631 }, { "epoch": 407.47, "learning_rate": 8.395061728395062e-06, "loss": 0.0272, "step": 7640 }, { "epoch": 408.0, "learning_rate": 8.333333333333334e-06, "loss": 0.0306, "step": 7650 }, { "epoch": 408.0, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.875509262084961, "eval_runtime": 5.0495, "eval_samples_per_second": 118.823, "eval_steps_per_second": 3.763, "step": 7650 }, { "epoch": 408.53, "learning_rate": 8.271604938271606e-06, "loss": 0.0299, "step": 7660 }, { "epoch": 408.96, "eval_accuracy": 0.245, "eval_loss": 4.868679046630859, "eval_runtime": 5.094, "eval_samples_per_second": 117.785, "eval_steps_per_second": 3.73, "step": 7668 }, { "epoch": 409.07, "learning_rate": 8.209876543209876e-06, "loss": 0.0402, "step": 7670 }, { "epoch": 409.6, "learning_rate": 8.14814814814815e-06, "loss": 0.0307, "step": 7680 }, { "epoch": 409.97, "eval_accuracy": 0.24, "eval_loss": 4.872376441955566, "eval_runtime": 5.0699, "eval_samples_per_second": 118.345, "eval_steps_per_second": 3.748, "step": 7687 }, { "epoch": 410.13, "learning_rate": 8.08641975308642e-06, "loss": 0.0315, "step": 7690 }, { "epoch": 410.67, "learning_rate": 8.02469135802469e-06, "loss": 0.0304, "step": 7700 }, { "epoch": 410.99, "eval_accuracy": 0.25, "eval_loss": 4.879815101623535, "eval_runtime": 5.0824, "eval_samples_per_second": 118.054, "eval_steps_per_second": 3.738, "step": 7706 }, { "epoch": 411.2, "learning_rate": 7.962962962962963e-06, "loss": 0.039, "step": 7710 }, { "epoch": 411.73, "learning_rate": 7.901234567901235e-06, "loss": 0.0293, "step": 7720 }, { "epoch": 412.0, "eval_accuracy": 0.24833333333333332, "eval_loss": 4.890056133270264, "eval_runtime": 5.1118, "eval_samples_per_second": 117.376, "eval_steps_per_second": 3.717, "step": 7725 }, { "epoch": 412.27, "learning_rate": 7.839506172839507e-06, "loss": 0.0276, "step": 7730 }, { "epoch": 412.8, "learning_rate": 7.777777777777777e-06, "loss": 0.0273, "step": 7740 }, { "epoch": 412.96, "eval_accuracy": 0.24, "eval_loss": 4.902527332305908, "eval_runtime": 5.0871, "eval_samples_per_second": 117.946, "eval_steps_per_second": 3.735, "step": 7743 }, { "epoch": 413.33, "learning_rate": 7.71604938271605e-06, "loss": 0.0334, "step": 7750 }, { "epoch": 413.87, "learning_rate": 7.654320987654322e-06, "loss": 0.0184, "step": 7760 }, { "epoch": 413.97, "eval_accuracy": 0.24, "eval_loss": 4.886964321136475, "eval_runtime": 5.0738, "eval_samples_per_second": 118.254, "eval_steps_per_second": 3.745, "step": 7762 }, { "epoch": 414.4, "learning_rate": 7.592592592592593e-06, "loss": 0.0227, "step": 7770 }, { "epoch": 414.93, "learning_rate": 7.530864197530865e-06, "loss": 0.0377, "step": 7780 }, { "epoch": 414.99, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.890088081359863, "eval_runtime": 5.0985, "eval_samples_per_second": 117.682, "eval_steps_per_second": 3.727, "step": 7781 }, { "epoch": 415.47, "learning_rate": 7.469135802469136e-06, "loss": 0.0284, "step": 7790 }, { "epoch": 416.0, "learning_rate": 7.4074074074074075e-06, "loss": 0.0278, "step": 7800 }, { "epoch": 416.0, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.889472484588623, "eval_runtime": 5.0982, "eval_samples_per_second": 117.689, "eval_steps_per_second": 3.727, "step": 7800 }, { "epoch": 416.53, "learning_rate": 7.3456790123456796e-06, "loss": 0.0345, "step": 7810 }, { "epoch": 416.96, "eval_accuracy": 0.25333333333333335, "eval_loss": 4.904553413391113, "eval_runtime": 5.0835, "eval_samples_per_second": 118.028, "eval_steps_per_second": 3.738, "step": 7818 }, { "epoch": 417.07, "learning_rate": 7.283950617283951e-06, "loss": 0.0376, "step": 7820 }, { "epoch": 417.6, "learning_rate": 7.222222222222222e-06, "loss": 0.0301, "step": 7830 }, { "epoch": 417.97, "eval_accuracy": 0.24833333333333332, "eval_loss": 4.900204181671143, "eval_runtime": 5.1232, "eval_samples_per_second": 117.114, "eval_steps_per_second": 3.709, "step": 7837 }, { "epoch": 418.13, "learning_rate": 7.160493827160494e-06, "loss": 0.0204, "step": 7840 }, { "epoch": 418.67, "learning_rate": 7.098765432098765e-06, "loss": 0.0159, "step": 7850 }, { "epoch": 418.99, "eval_accuracy": 0.245, "eval_loss": 4.89817476272583, "eval_runtime": 5.1276, "eval_samples_per_second": 117.015, "eval_steps_per_second": 3.705, "step": 7856 }, { "epoch": 419.2, "learning_rate": 7.0370370370370375e-06, "loss": 0.0217, "step": 7860 }, { "epoch": 419.73, "learning_rate": 6.975308641975309e-06, "loss": 0.0203, "step": 7870 }, { "epoch": 420.0, "eval_accuracy": 0.24833333333333332, "eval_loss": 4.900780200958252, "eval_runtime": 5.1291, "eval_samples_per_second": 116.98, "eval_steps_per_second": 3.704, "step": 7875 }, { "epoch": 420.27, "learning_rate": 6.91358024691358e-06, "loss": 0.0295, "step": 7880 }, { "epoch": 420.8, "learning_rate": 6.851851851851852e-06, "loss": 0.0182, "step": 7890 }, { "epoch": 420.96, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.9113335609436035, "eval_runtime": 5.1351, "eval_samples_per_second": 116.842, "eval_steps_per_second": 3.7, "step": 7893 }, { "epoch": 421.33, "learning_rate": 6.790123456790123e-06, "loss": 0.0321, "step": 7900 }, { "epoch": 421.87, "learning_rate": 6.728395061728396e-06, "loss": 0.0258, "step": 7910 }, { "epoch": 421.97, "eval_accuracy": 0.25, "eval_loss": 4.918017387390137, "eval_runtime": 5.1337, "eval_samples_per_second": 116.875, "eval_steps_per_second": 3.701, "step": 7912 }, { "epoch": 422.4, "learning_rate": 6.666666666666667e-06, "loss": 0.0277, "step": 7920 }, { "epoch": 422.93, "learning_rate": 6.604938271604938e-06, "loss": 0.0266, "step": 7930 }, { "epoch": 422.99, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.913443565368652, "eval_runtime": 5.1296, "eval_samples_per_second": 116.969, "eval_steps_per_second": 3.704, "step": 7931 }, { "epoch": 423.47, "learning_rate": 6.54320987654321e-06, "loss": 0.037, "step": 7940 }, { "epoch": 424.0, "learning_rate": 6.481481481481481e-06, "loss": 0.0304, "step": 7950 }, { "epoch": 424.0, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.900519847869873, "eval_runtime": 5.1632, "eval_samples_per_second": 116.207, "eval_steps_per_second": 3.68, "step": 7950 }, { "epoch": 424.53, "learning_rate": 6.419753086419754e-06, "loss": 0.0247, "step": 7960 }, { "epoch": 424.96, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.893669605255127, "eval_runtime": 5.2111, "eval_samples_per_second": 115.139, "eval_steps_per_second": 3.646, "step": 7968 }, { "epoch": 425.07, "learning_rate": 6.3580246913580246e-06, "loss": 0.0335, "step": 7970 }, { "epoch": 425.6, "learning_rate": 6.296296296296296e-06, "loss": 0.0493, "step": 7980 }, { "epoch": 425.97, "eval_accuracy": 0.245, "eval_loss": 4.883533954620361, "eval_runtime": 5.1746, "eval_samples_per_second": 115.951, "eval_steps_per_second": 3.672, "step": 7987 }, { "epoch": 426.13, "learning_rate": 6.234567901234569e-06, "loss": 0.0157, "step": 7990 }, { "epoch": 426.67, "learning_rate": 6.172839506172839e-06, "loss": 0.0286, "step": 8000 }, { "epoch": 426.99, "eval_accuracy": 0.24, "eval_loss": 4.896754741668701, "eval_runtime": 5.1903, "eval_samples_per_second": 115.6, "eval_steps_per_second": 3.661, "step": 8006 }, { "epoch": 427.2, "learning_rate": 6.111111111111111e-06, "loss": 0.0249, "step": 8010 }, { "epoch": 427.73, "learning_rate": 6.049382716049383e-06, "loss": 0.0228, "step": 8020 }, { "epoch": 428.0, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.906573295593262, "eval_runtime": 5.2526, "eval_samples_per_second": 114.23, "eval_steps_per_second": 3.617, "step": 8025 }, { "epoch": 428.27, "learning_rate": 5.9876543209876546e-06, "loss": 0.0324, "step": 8030 }, { "epoch": 428.8, "learning_rate": 5.925925925925927e-06, "loss": 0.0362, "step": 8040 }, { "epoch": 428.96, "eval_accuracy": 0.245, "eval_loss": 4.903099536895752, "eval_runtime": 5.2207, "eval_samples_per_second": 114.927, "eval_steps_per_second": 3.639, "step": 8043 }, { "epoch": 429.33, "learning_rate": 5.864197530864198e-06, "loss": 0.0213, "step": 8050 }, { "epoch": 429.87, "learning_rate": 5.802469135802469e-06, "loss": 0.0244, "step": 8060 }, { "epoch": 429.97, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.899692535400391, "eval_runtime": 5.2551, "eval_samples_per_second": 114.174, "eval_steps_per_second": 3.616, "step": 8062 }, { "epoch": 430.4, "learning_rate": 5.740740740740741e-06, "loss": 0.0405, "step": 8070 }, { "epoch": 430.93, "learning_rate": 5.6790123456790125e-06, "loss": 0.0204, "step": 8080 }, { "epoch": 430.99, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.905935287475586, "eval_runtime": 5.2136, "eval_samples_per_second": 115.084, "eval_steps_per_second": 3.644, "step": 8081 }, { "epoch": 431.47, "learning_rate": 5.617283950617285e-06, "loss": 0.0175, "step": 8090 }, { "epoch": 432.0, "learning_rate": 5.555555555555556e-06, "loss": 0.0344, "step": 8100 }, { "epoch": 432.0, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.905205726623535, "eval_runtime": 5.2045, "eval_samples_per_second": 115.286, "eval_steps_per_second": 3.651, "step": 8100 }, { "epoch": 432.53, "learning_rate": 5.493827160493827e-06, "loss": 0.0252, "step": 8110 }, { "epoch": 432.96, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.897459983825684, "eval_runtime": 5.1933, "eval_samples_per_second": 115.533, "eval_steps_per_second": 3.659, "step": 8118 }, { "epoch": 433.07, "learning_rate": 5.432098765432099e-06, "loss": 0.0185, "step": 8120 }, { "epoch": 433.6, "learning_rate": 5.37037037037037e-06, "loss": 0.0242, "step": 8130 }, { "epoch": 433.97, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.896070957183838, "eval_runtime": 5.2428, "eval_samples_per_second": 114.443, "eval_steps_per_second": 3.624, "step": 8137 }, { "epoch": 434.13, "learning_rate": 5.3086419753086425e-06, "loss": 0.0372, "step": 8140 }, { "epoch": 434.67, "learning_rate": 5.246913580246914e-06, "loss": 0.0135, "step": 8150 }, { "epoch": 434.99, "eval_accuracy": 0.24666666666666667, "eval_loss": 4.908581733703613, "eval_runtime": 5.248, "eval_samples_per_second": 114.329, "eval_steps_per_second": 3.62, "step": 8156 }, { "epoch": 435.2, "learning_rate": 5.185185185185185e-06, "loss": 0.0322, "step": 8160 }, { "epoch": 435.73, "learning_rate": 5.123456790123457e-06, "loss": 0.0296, "step": 8170 }, { "epoch": 436.0, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.9134602546691895, "eval_runtime": 5.2357, "eval_samples_per_second": 114.598, "eval_steps_per_second": 3.629, "step": 8175 }, { "epoch": 436.27, "learning_rate": 5.061728395061729e-06, "loss": 0.0164, "step": 8180 }, { "epoch": 436.8, "learning_rate": 5e-06, "loss": 0.0432, "step": 8190 }, { "epoch": 436.96, "eval_accuracy": 0.24333333333333335, "eval_loss": 4.907933712005615, "eval_runtime": 5.268, "eval_samples_per_second": 113.896, "eval_steps_per_second": 3.607, "step": 8193 }, { "epoch": 437.33, "learning_rate": 4.938271604938272e-06, "loss": 0.017, "step": 8200 }, { "epoch": 437.87, "learning_rate": 4.876543209876543e-06, "loss": 0.0242, "step": 8210 }, { "epoch": 437.97, "eval_accuracy": 0.24, "eval_loss": 4.898138046264648, "eval_runtime": 5.2464, "eval_samples_per_second": 114.365, "eval_steps_per_second": 3.622, "step": 8212 }, { "epoch": 438.4, "learning_rate": 4.814814814814815e-06, "loss": 0.026, "step": 8220 }, { "epoch": 438.93, "learning_rate": 4.753086419753087e-06, "loss": 0.0227, "step": 8230 }, { "epoch": 438.99, "eval_accuracy": 0.24, "eval_loss": 4.8857102394104, "eval_runtime": 5.264, "eval_samples_per_second": 113.981, "eval_steps_per_second": 3.609, "step": 8231 }, { "epoch": 439.47, "learning_rate": 4.691358024691358e-06, "loss": 0.0309, "step": 8240 }, { "epoch": 440.0, "learning_rate": 4.6296296296296296e-06, "loss": 0.021, "step": 8250 }, { "epoch": 440.0, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.887371063232422, "eval_runtime": 5.2856, "eval_samples_per_second": 113.516, "eval_steps_per_second": 3.595, "step": 8250 }, { "epoch": 440.53, "learning_rate": 4.567901234567902e-06, "loss": 0.0244, "step": 8260 }, { "epoch": 440.96, "eval_accuracy": 0.24, "eval_loss": 4.884664058685303, "eval_runtime": 5.2546, "eval_samples_per_second": 114.187, "eval_steps_per_second": 3.616, "step": 8268 }, { "epoch": 441.07, "learning_rate": 4.506172839506173e-06, "loss": 0.0484, "step": 8270 }, { "epoch": 441.6, "learning_rate": 4.444444444444445e-06, "loss": 0.0234, "step": 8280 }, { "epoch": 441.97, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.896438121795654, "eval_runtime": 5.2769, "eval_samples_per_second": 113.703, "eval_steps_per_second": 3.601, "step": 8287 }, { "epoch": 442.13, "learning_rate": 4.382716049382716e-06, "loss": 0.0293, "step": 8290 }, { "epoch": 442.67, "learning_rate": 4.3209876543209875e-06, "loss": 0.0278, "step": 8300 }, { "epoch": 442.99, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.916093349456787, "eval_runtime": 5.2536, "eval_samples_per_second": 114.207, "eval_steps_per_second": 3.617, "step": 8306 }, { "epoch": 443.2, "learning_rate": 4.2592592592592596e-06, "loss": 0.0224, "step": 8310 }, { "epoch": 443.73, "learning_rate": 4.197530864197531e-06, "loss": 0.0322, "step": 8320 }, { "epoch": 444.0, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.921158790588379, "eval_runtime": 5.2757, "eval_samples_per_second": 113.729, "eval_steps_per_second": 3.601, "step": 8325 }, { "epoch": 444.27, "learning_rate": 4.135802469135803e-06, "loss": 0.0201, "step": 8330 }, { "epoch": 444.8, "learning_rate": 4.074074074074075e-06, "loss": 0.038, "step": 8340 }, { "epoch": 444.96, "eval_accuracy": 0.24, "eval_loss": 4.925096035003662, "eval_runtime": 5.3078, "eval_samples_per_second": 113.041, "eval_steps_per_second": 3.58, "step": 8343 }, { "epoch": 445.33, "learning_rate": 4.012345679012345e-06, "loss": 0.035, "step": 8350 }, { "epoch": 445.87, "learning_rate": 3.9506172839506175e-06, "loss": 0.0327, "step": 8360 }, { "epoch": 445.97, "eval_accuracy": 0.24, "eval_loss": 4.933958530426025, "eval_runtime": 5.2639, "eval_samples_per_second": 113.985, "eval_steps_per_second": 3.61, "step": 8362 }, { "epoch": 446.4, "learning_rate": 3.888888888888889e-06, "loss": 0.0226, "step": 8370 }, { "epoch": 446.93, "learning_rate": 3.827160493827161e-06, "loss": 0.0256, "step": 8380 }, { "epoch": 446.99, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.924610614776611, "eval_runtime": 5.2666, "eval_samples_per_second": 113.926, "eval_steps_per_second": 3.608, "step": 8381 }, { "epoch": 447.47, "learning_rate": 3.7654320987654325e-06, "loss": 0.027, "step": 8390 }, { "epoch": 448.0, "learning_rate": 3.7037037037037037e-06, "loss": 0.0327, "step": 8400 }, { "epoch": 448.0, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.929351329803467, "eval_runtime": 5.2953, "eval_samples_per_second": 113.308, "eval_steps_per_second": 3.588, "step": 8400 }, { "epoch": 448.53, "learning_rate": 3.6419753086419754e-06, "loss": 0.0246, "step": 8410 }, { "epoch": 448.96, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.931128025054932, "eval_runtime": 5.3144, "eval_samples_per_second": 112.901, "eval_steps_per_second": 3.575, "step": 8418 }, { "epoch": 449.07, "learning_rate": 3.580246913580247e-06, "loss": 0.027, "step": 8420 }, { "epoch": 449.6, "learning_rate": 3.5185185185185187e-06, "loss": 0.0239, "step": 8430 }, { "epoch": 449.97, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.92203950881958, "eval_runtime": 5.2899, "eval_samples_per_second": 113.424, "eval_steps_per_second": 3.592, "step": 8437 }, { "epoch": 450.13, "learning_rate": 3.45679012345679e-06, "loss": 0.0313, "step": 8440 }, { "epoch": 450.67, "learning_rate": 3.3950617283950617e-06, "loss": 0.0219, "step": 8450 }, { "epoch": 450.99, "eval_accuracy": 0.24, "eval_loss": 4.920533180236816, "eval_runtime": 5.4385, "eval_samples_per_second": 110.325, "eval_steps_per_second": 3.494, "step": 8456 }, { "epoch": 451.2, "learning_rate": 3.3333333333333333e-06, "loss": 0.0214, "step": 8460 }, { "epoch": 451.73, "learning_rate": 3.271604938271605e-06, "loss": 0.0287, "step": 8470 }, { "epoch": 452.0, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.924895763397217, "eval_runtime": 5.3449, "eval_samples_per_second": 112.256, "eval_steps_per_second": 3.555, "step": 8475 }, { "epoch": 452.27, "learning_rate": 3.209876543209877e-06, "loss": 0.0226, "step": 8480 }, { "epoch": 452.8, "learning_rate": 3.148148148148148e-06, "loss": 0.0244, "step": 8490 }, { "epoch": 452.96, "eval_accuracy": 0.24, "eval_loss": 4.927518844604492, "eval_runtime": 5.3234, "eval_samples_per_second": 112.711, "eval_steps_per_second": 3.569, "step": 8493 }, { "epoch": 453.33, "learning_rate": 3.0864197530864196e-06, "loss": 0.0215, "step": 8500 }, { "epoch": 453.87, "learning_rate": 3.0246913580246917e-06, "loss": 0.0222, "step": 8510 }, { "epoch": 453.97, "eval_accuracy": 0.24166666666666667, "eval_loss": 4.932238578796387, "eval_runtime": 5.3268, "eval_samples_per_second": 112.637, "eval_steps_per_second": 3.567, "step": 8512 }, { "epoch": 454.4, "learning_rate": 2.9629629629629633e-06, "loss": 0.0269, "step": 8520 }, { "epoch": 454.93, "learning_rate": 2.9012345679012346e-06, "loss": 0.0277, "step": 8530 }, { "epoch": 454.99, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.931756019592285, "eval_runtime": 5.3343, "eval_samples_per_second": 112.48, "eval_steps_per_second": 3.562, "step": 8531 }, { "epoch": 455.47, "learning_rate": 2.8395061728395062e-06, "loss": 0.0355, "step": 8540 }, { "epoch": 456.0, "learning_rate": 2.777777777777778e-06, "loss": 0.0315, "step": 8550 }, { "epoch": 456.0, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.929112911224365, "eval_runtime": 5.4018, "eval_samples_per_second": 111.075, "eval_steps_per_second": 3.517, "step": 8550 }, { "epoch": 456.53, "learning_rate": 2.7160493827160496e-06, "loss": 0.021, "step": 8560 }, { "epoch": 456.96, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.929343223571777, "eval_runtime": 5.3767, "eval_samples_per_second": 111.594, "eval_steps_per_second": 3.534, "step": 8568 }, { "epoch": 457.07, "learning_rate": 2.6543209876543212e-06, "loss": 0.0232, "step": 8570 }, { "epoch": 457.6, "learning_rate": 2.5925925925925925e-06, "loss": 0.0288, "step": 8580 }, { "epoch": 457.97, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.923261642456055, "eval_runtime": 5.3734, "eval_samples_per_second": 111.661, "eval_steps_per_second": 3.536, "step": 8587 }, { "epoch": 458.13, "learning_rate": 2.5308641975308646e-06, "loss": 0.0381, "step": 8590 }, { "epoch": 458.67, "learning_rate": 2.469135802469136e-06, "loss": 0.0229, "step": 8600 }, { "epoch": 458.99, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.923562049865723, "eval_runtime": 5.3963, "eval_samples_per_second": 111.186, "eval_steps_per_second": 3.521, "step": 8606 }, { "epoch": 459.2, "learning_rate": 2.4074074074074075e-06, "loss": 0.0308, "step": 8610 }, { "epoch": 459.73, "learning_rate": 2.345679012345679e-06, "loss": 0.0257, "step": 8620 }, { "epoch": 460.0, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.922508716583252, "eval_runtime": 5.4351, "eval_samples_per_second": 110.394, "eval_steps_per_second": 3.496, "step": 8625 }, { "epoch": 460.27, "learning_rate": 2.283950617283951e-06, "loss": 0.0193, "step": 8630 }, { "epoch": 460.8, "learning_rate": 2.2222222222222225e-06, "loss": 0.0291, "step": 8640 }, { "epoch": 460.96, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.922194004058838, "eval_runtime": 5.3746, "eval_samples_per_second": 111.636, "eval_steps_per_second": 3.535, "step": 8643 }, { "epoch": 461.33, "learning_rate": 2.1604938271604937e-06, "loss": 0.0292, "step": 8650 }, { "epoch": 461.87, "learning_rate": 2.0987654320987654e-06, "loss": 0.0325, "step": 8660 }, { "epoch": 461.97, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.921637535095215, "eval_runtime": 5.38, "eval_samples_per_second": 111.524, "eval_steps_per_second": 3.532, "step": 8662 }, { "epoch": 462.4, "learning_rate": 2.0370370370370375e-06, "loss": 0.0292, "step": 8670 }, { "epoch": 462.93, "learning_rate": 1.9753086419753087e-06, "loss": 0.0268, "step": 8680 }, { "epoch": 462.99, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.92024564743042, "eval_runtime": 5.3601, "eval_samples_per_second": 111.939, "eval_steps_per_second": 3.545, "step": 8681 }, { "epoch": 463.47, "learning_rate": 1.9135802469135804e-06, "loss": 0.0268, "step": 8690 }, { "epoch": 464.0, "learning_rate": 1.8518518518518519e-06, "loss": 0.0156, "step": 8700 }, { "epoch": 464.0, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.91749906539917, "eval_runtime": 5.3813, "eval_samples_per_second": 111.497, "eval_steps_per_second": 3.531, "step": 8700 }, { "epoch": 464.53, "learning_rate": 1.7901234567901235e-06, "loss": 0.0196, "step": 8710 }, { "epoch": 464.96, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.914690017700195, "eval_runtime": 5.3651, "eval_samples_per_second": 111.834, "eval_steps_per_second": 3.541, "step": 8718 }, { "epoch": 465.07, "learning_rate": 1.728395061728395e-06, "loss": 0.0199, "step": 8720 }, { "epoch": 465.6, "learning_rate": 1.6666666666666667e-06, "loss": 0.0448, "step": 8730 }, { "epoch": 465.97, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.910024642944336, "eval_runtime": 5.359, "eval_samples_per_second": 111.962, "eval_steps_per_second": 3.545, "step": 8737 }, { "epoch": 466.13, "learning_rate": 1.6049382716049385e-06, "loss": 0.021, "step": 8740 }, { "epoch": 466.67, "learning_rate": 1.5432098765432098e-06, "loss": 0.0232, "step": 8750 }, { "epoch": 466.99, "eval_accuracy": 0.23333333333333334, "eval_loss": 4.908828258514404, "eval_runtime": 5.3987, "eval_samples_per_second": 111.138, "eval_steps_per_second": 3.519, "step": 8756 }, { "epoch": 467.2, "learning_rate": 1.4814814814814817e-06, "loss": 0.0224, "step": 8760 }, { "epoch": 467.73, "learning_rate": 1.4197530864197531e-06, "loss": 0.0274, "step": 8770 }, { "epoch": 468.0, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.909602165222168, "eval_runtime": 5.4071, "eval_samples_per_second": 110.966, "eval_steps_per_second": 3.514, "step": 8775 }, { "epoch": 468.27, "learning_rate": 1.3580246913580248e-06, "loss": 0.0344, "step": 8780 }, { "epoch": 468.8, "learning_rate": 1.2962962962962962e-06, "loss": 0.029, "step": 8790 }, { "epoch": 468.96, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.910549640655518, "eval_runtime": 5.3868, "eval_samples_per_second": 111.383, "eval_steps_per_second": 3.527, "step": 8793 }, { "epoch": 469.33, "learning_rate": 1.234567901234568e-06, "loss": 0.0265, "step": 8800 }, { "epoch": 469.87, "learning_rate": 1.1728395061728396e-06, "loss": 0.0337, "step": 8810 }, { "epoch": 469.97, "eval_accuracy": 0.235, "eval_loss": 4.912467956542969, "eval_runtime": 5.3985, "eval_samples_per_second": 111.143, "eval_steps_per_second": 3.52, "step": 8812 }, { "epoch": 470.4, "learning_rate": 1.1111111111111112e-06, "loss": 0.0298, "step": 8820 }, { "epoch": 470.93, "learning_rate": 1.0493827160493827e-06, "loss": 0.0178, "step": 8830 }, { "epoch": 470.99, "eval_accuracy": 0.235, "eval_loss": 4.91201639175415, "eval_runtime": 5.399, "eval_samples_per_second": 111.132, "eval_steps_per_second": 3.519, "step": 8831 }, { "epoch": 471.47, "learning_rate": 9.876543209876544e-07, "loss": 0.0179, "step": 8840 }, { "epoch": 472.0, "learning_rate": 9.259259259259259e-07, "loss": 0.0286, "step": 8850 }, { "epoch": 472.0, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.912468910217285, "eval_runtime": 5.4913, "eval_samples_per_second": 109.263, "eval_steps_per_second": 3.46, "step": 8850 }, { "epoch": 472.53, "learning_rate": 8.641975308641975e-07, "loss": 0.0159, "step": 8860 }, { "epoch": 472.96, "eval_accuracy": 0.23666666666666666, "eval_loss": 4.910217761993408, "eval_runtime": 5.4334, "eval_samples_per_second": 110.428, "eval_steps_per_second": 3.497, "step": 8868 }, { "epoch": 473.07, "learning_rate": 8.024691358024693e-07, "loss": 0.0247, "step": 8870 }, { "epoch": 473.6, "learning_rate": 7.407407407407408e-07, "loss": 0.0318, "step": 8880 }, { "epoch": 473.97, "eval_accuracy": 0.23833333333333334, "eval_loss": 4.9116291999816895, "eval_runtime": 5.4254, "eval_samples_per_second": 110.59, "eval_steps_per_second": 3.502, "step": 8887 }, { "epoch": 474.13, "learning_rate": 6.790123456790124e-07, "loss": 0.0311, "step": 8890 }, { "epoch": 474.67, "learning_rate": 6.17283950617284e-07, "loss": 0.0302, "step": 8900 }, { "epoch": 474.99, "eval_accuracy": 0.24, "eval_loss": 4.911314487457275, "eval_runtime": 5.4263, "eval_samples_per_second": 110.573, "eval_steps_per_second": 3.501, "step": 8906 }, { "epoch": 475.2, "learning_rate": 5.555555555555556e-07, "loss": 0.0305, "step": 8910 }, { "epoch": 475.73, "learning_rate": 4.938271604938272e-07, "loss": 0.0184, "step": 8920 }, { "epoch": 476.0, "eval_accuracy": 0.24, "eval_loss": 4.911987781524658, "eval_runtime": 5.4618, "eval_samples_per_second": 109.854, "eval_steps_per_second": 3.479, "step": 8925 }, { "epoch": 476.27, "learning_rate": 4.3209876543209875e-07, "loss": 0.0415, "step": 8930 }, { "epoch": 476.8, "learning_rate": 3.703703703703704e-07, "loss": 0.025, "step": 8940 }, { "epoch": 476.96, "eval_accuracy": 0.24, "eval_loss": 4.912769794464111, "eval_runtime": 5.4549, "eval_samples_per_second": 109.993, "eval_steps_per_second": 3.483, "step": 8943 }, { "epoch": 477.33, "learning_rate": 3.08641975308642e-07, "loss": 0.013, "step": 8950 }, { "epoch": 477.87, "learning_rate": 2.469135802469136e-07, "loss": 0.027, "step": 8960 }, { "epoch": 477.97, "eval_accuracy": 0.24, "eval_loss": 4.912613868713379, "eval_runtime": 5.4968, "eval_samples_per_second": 109.154, "eval_steps_per_second": 3.457, "step": 8962 }, { "epoch": 478.4, "learning_rate": 1.851851851851852e-07, "loss": 0.0156, "step": 8970 }, { "epoch": 478.93, "learning_rate": 1.234567901234568e-07, "loss": 0.0298, "step": 8980 }, { "epoch": 478.99, "eval_accuracy": 0.24, "eval_loss": 4.913006782531738, "eval_runtime": 5.4672, "eval_samples_per_second": 109.745, "eval_steps_per_second": 3.475, "step": 8981 }, { "epoch": 479.47, "learning_rate": 6.17283950617284e-08, "loss": 0.0213, "step": 8990 }, { "epoch": 480.0, "learning_rate": 0.0, "loss": 0.0349, "step": 9000 }, { "epoch": 480.0, "eval_accuracy": 0.24, "eval_loss": 4.9129838943481445, "eval_runtime": 5.4668, "eval_samples_per_second": 109.753, "eval_steps_per_second": 3.476, "step": 9000 }, { "epoch": 480.0, "step": 9000, "total_flos": 6.6744785965028475e+19, "train_loss": 0.3827814753833744, "train_runtime": 27227.0796, "train_samples_per_second": 44.019, "train_steps_per_second": 0.331 } ], "max_steps": 9000, "num_train_epochs": 500, "total_flos": 6.6744785965028475e+19, "trial_name": null, "trial_params": null }