{ "best_metric": 3.9567816257476807, "best_model_checkpoint": "chinese-roberta-wwm-ext-finetuned-MC-hyper/checkpoint-1375", "epoch": 5.0, "global_step": 6875, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 9.970909090909091e-05, "loss": 1.3244, "step": 20 }, { "epoch": 0.03, "learning_rate": 9.941818181818182e-05, "loss": 0.9671, "step": 40 }, { "epoch": 0.04, "learning_rate": 9.912727272727273e-05, "loss": 0.8194, "step": 60 }, { "epoch": 0.06, "learning_rate": 9.883636363636364e-05, "loss": 0.7545, "step": 80 }, { "epoch": 0.07, "learning_rate": 9.854545454545455e-05, "loss": 0.7631, "step": 100 }, { "epoch": 0.09, "learning_rate": 9.825454545454546e-05, "loss": 0.6651, "step": 120 }, { "epoch": 0.1, "learning_rate": 9.796363636363637e-05, "loss": 0.5622, "step": 140 }, { "epoch": 0.12, "learning_rate": 9.767272727272728e-05, "loss": 0.6096, "step": 160 }, { "epoch": 0.13, "learning_rate": 9.738181818181819e-05, "loss": 0.4859, "step": 180 }, { "epoch": 0.15, "learning_rate": 9.70909090909091e-05, "loss": 0.4879, "step": 200 }, { "epoch": 0.16, "learning_rate": 9.680000000000001e-05, "loss": 0.4388, "step": 220 }, { "epoch": 0.17, "learning_rate": 9.650909090909092e-05, "loss": 0.478, "step": 240 }, { "epoch": 0.19, "learning_rate": 9.621818181818181e-05, "loss": 0.3661, "step": 260 }, { "epoch": 0.2, "learning_rate": 9.592727272727274e-05, "loss": 0.4131, "step": 280 }, { "epoch": 0.22, "learning_rate": 9.563636363636365e-05, "loss": 0.3415, "step": 300 }, { "epoch": 0.23, "learning_rate": 9.534545454545456e-05, "loss": 0.3695, "step": 320 }, { "epoch": 0.25, "learning_rate": 9.505454545454546e-05, "loss": 0.3059, "step": 340 }, { "epoch": 0.26, "learning_rate": 9.476363636363636e-05, "loss": 0.268, "step": 360 }, { "epoch": 0.28, "learning_rate": 9.447272727272728e-05, "loss": 0.3019, "step": 380 }, { "epoch": 0.29, "learning_rate": 9.418181818181818e-05, "loss": 0.2147, "step": 400 }, { "epoch": 0.31, "learning_rate": 9.38909090909091e-05, "loss": 0.3197, "step": 420 }, { "epoch": 0.32, "learning_rate": 9.360000000000001e-05, "loss": 0.2764, "step": 440 }, { "epoch": 0.33, "learning_rate": 9.330909090909091e-05, "loss": 0.2022, "step": 460 }, { "epoch": 0.35, "learning_rate": 9.301818181818183e-05, "loss": 0.2326, "step": 480 }, { "epoch": 0.36, "learning_rate": 9.272727272727273e-05, "loss": 0.3168, "step": 500 }, { "epoch": 0.38, "learning_rate": 9.243636363636365e-05, "loss": 0.2157, "step": 520 }, { "epoch": 0.39, "learning_rate": 9.214545454545455e-05, "loss": 0.2783, "step": 540 }, { "epoch": 0.41, "learning_rate": 9.185454545454545e-05, "loss": 0.1759, "step": 560 }, { "epoch": 0.42, "learning_rate": 9.156363636363636e-05, "loss": 0.169, "step": 580 }, { "epoch": 0.44, "learning_rate": 9.127272727272727e-05, "loss": 0.1731, "step": 600 }, { "epoch": 0.45, "learning_rate": 9.09818181818182e-05, "loss": 0.2469, "step": 620 }, { "epoch": 0.47, "learning_rate": 9.069090909090909e-05, "loss": 0.1617, "step": 640 }, { "epoch": 0.48, "learning_rate": 9.04e-05, "loss": 0.1841, "step": 660 }, { "epoch": 0.49, "learning_rate": 9.010909090909091e-05, "loss": 0.1549, "step": 680 }, { "epoch": 0.51, "learning_rate": 8.981818181818182e-05, "loss": 0.1393, "step": 700 }, { "epoch": 0.52, "learning_rate": 8.952727272727273e-05, "loss": 0.1853, "step": 720 }, { "epoch": 0.54, "learning_rate": 8.923636363636364e-05, "loss": 0.1609, "step": 740 }, { "epoch": 0.55, "learning_rate": 8.894545454545455e-05, "loss": 0.1493, "step": 760 }, { "epoch": 0.57, "learning_rate": 8.865454545454546e-05, "loss": 0.1436, "step": 780 }, { "epoch": 0.58, "learning_rate": 8.836363636363637e-05, "loss": 0.1626, "step": 800 }, { "epoch": 0.6, "learning_rate": 8.807272727272728e-05, "loss": 0.2139, "step": 820 }, { "epoch": 0.61, "learning_rate": 8.778181818181819e-05, "loss": 0.1661, "step": 840 }, { "epoch": 0.63, "learning_rate": 8.74909090909091e-05, "loss": 0.1579, "step": 860 }, { "epoch": 0.64, "learning_rate": 8.72e-05, "loss": 0.1424, "step": 880 }, { "epoch": 0.65, "learning_rate": 8.690909090909091e-05, "loss": 0.073, "step": 900 }, { "epoch": 0.67, "learning_rate": 8.661818181818182e-05, "loss": 0.1764, "step": 920 }, { "epoch": 0.68, "learning_rate": 8.632727272727273e-05, "loss": 0.1296, "step": 940 }, { "epoch": 0.7, "learning_rate": 8.603636363636364e-05, "loss": 0.144, "step": 960 }, { "epoch": 0.71, "learning_rate": 8.574545454545455e-05, "loss": 0.0955, "step": 980 }, { "epoch": 0.73, "learning_rate": 8.545454545454545e-05, "loss": 0.1262, "step": 1000 }, { "epoch": 0.74, "learning_rate": 8.516363636363637e-05, "loss": 0.1599, "step": 1020 }, { "epoch": 0.76, "learning_rate": 8.487272727272728e-05, "loss": 0.0835, "step": 1040 }, { "epoch": 0.77, "learning_rate": 8.458181818181819e-05, "loss": 0.1509, "step": 1060 }, { "epoch": 0.79, "learning_rate": 8.42909090909091e-05, "loss": 0.0916, "step": 1080 }, { "epoch": 0.8, "learning_rate": 8.4e-05, "loss": 0.0863, "step": 1100 }, { "epoch": 0.81, "learning_rate": 8.370909090909092e-05, "loss": 0.1302, "step": 1120 }, { "epoch": 0.83, "learning_rate": 8.341818181818181e-05, "loss": 0.1324, "step": 1140 }, { "epoch": 0.84, "learning_rate": 8.312727272727274e-05, "loss": 0.0838, "step": 1160 }, { "epoch": 0.86, "learning_rate": 8.283636363636365e-05, "loss": 0.1127, "step": 1180 }, { "epoch": 0.87, "learning_rate": 8.254545454545454e-05, "loss": 0.1193, "step": 1200 }, { "epoch": 0.89, "learning_rate": 8.225454545454547e-05, "loss": 0.1404, "step": 1220 }, { "epoch": 0.9, "learning_rate": 8.196363636363636e-05, "loss": 0.125, "step": 1240 }, { "epoch": 0.92, "learning_rate": 8.167272727272728e-05, "loss": 0.0519, "step": 1260 }, { "epoch": 0.93, "learning_rate": 8.138181818181818e-05, "loss": 0.1256, "step": 1280 }, { "epoch": 0.95, "learning_rate": 8.109090909090909e-05, "loss": 0.1016, "step": 1300 }, { "epoch": 0.96, "learning_rate": 8.080000000000001e-05, "loss": 0.0944, "step": 1320 }, { "epoch": 0.97, "learning_rate": 8.050909090909091e-05, "loss": 0.0666, "step": 1340 }, { "epoch": 0.99, "learning_rate": 8.021818181818183e-05, "loss": 0.0903, "step": 1360 }, { "epoch": 1.0, "eval_accuracy": 0.1940000057220459, "eval_loss": 3.9567816257476807, "eval_runtime": 3.3624, "eval_samples_per_second": 148.705, "eval_steps_per_second": 4.759, "step": 1375 }, { "epoch": 1.0, "learning_rate": 7.992727272727273e-05, "loss": 0.1101, "step": 1380 }, { "epoch": 1.02, "learning_rate": 7.963636363636364e-05, "loss": 0.0603, "step": 1400 }, { "epoch": 1.03, "learning_rate": 7.934545454545455e-05, "loss": 0.0622, "step": 1420 }, { "epoch": 1.05, "learning_rate": 7.905454545454546e-05, "loss": 0.0463, "step": 1440 }, { "epoch": 1.06, "learning_rate": 7.876363636363638e-05, "loss": 0.0404, "step": 1460 }, { "epoch": 1.08, "learning_rate": 7.847272727272727e-05, "loss": 0.0476, "step": 1480 }, { "epoch": 1.09, "learning_rate": 7.818181818181818e-05, "loss": 0.0763, "step": 1500 }, { "epoch": 1.11, "learning_rate": 7.789090909090909e-05, "loss": 0.0363, "step": 1520 }, { "epoch": 1.12, "learning_rate": 7.76e-05, "loss": 0.0697, "step": 1540 }, { "epoch": 1.13, "learning_rate": 7.730909090909091e-05, "loss": 0.0838, "step": 1560 }, { "epoch": 1.15, "learning_rate": 7.701818181818182e-05, "loss": 0.0587, "step": 1580 }, { "epoch": 1.16, "learning_rate": 7.672727272727273e-05, "loss": 0.0246, "step": 1600 }, { "epoch": 1.18, "learning_rate": 7.643636363636364e-05, "loss": 0.0461, "step": 1620 }, { "epoch": 1.19, "learning_rate": 7.614545454545455e-05, "loss": 0.0695, "step": 1640 }, { "epoch": 1.21, "learning_rate": 7.585454545454546e-05, "loss": 0.0736, "step": 1660 }, { "epoch": 1.22, "learning_rate": 7.556363636363637e-05, "loss": 0.0261, "step": 1680 }, { "epoch": 1.24, "learning_rate": 7.527272727272728e-05, "loss": 0.0438, "step": 1700 }, { "epoch": 1.25, "learning_rate": 7.498181818181819e-05, "loss": 0.0858, "step": 1720 }, { "epoch": 1.27, "learning_rate": 7.469090909090908e-05, "loss": 0.0248, "step": 1740 }, { "epoch": 1.28, "learning_rate": 7.44e-05, "loss": 0.0615, "step": 1760 }, { "epoch": 1.29, "learning_rate": 7.410909090909092e-05, "loss": 0.0123, "step": 1780 }, { "epoch": 1.31, "learning_rate": 7.381818181818182e-05, "loss": 0.0451, "step": 1800 }, { "epoch": 1.32, "learning_rate": 7.352727272727273e-05, "loss": 0.05, "step": 1820 }, { "epoch": 1.34, "learning_rate": 7.323636363636363e-05, "loss": 0.0415, "step": 1840 }, { "epoch": 1.35, "learning_rate": 7.294545454545455e-05, "loss": 0.0734, "step": 1860 }, { "epoch": 1.37, "learning_rate": 7.265454545454545e-05, "loss": 0.037, "step": 1880 }, { "epoch": 1.38, "learning_rate": 7.236363636363637e-05, "loss": 0.045, "step": 1900 }, { "epoch": 1.4, "learning_rate": 7.207272727272728e-05, "loss": 0.0914, "step": 1920 }, { "epoch": 1.41, "learning_rate": 7.178181818181818e-05, "loss": 0.0288, "step": 1940 }, { "epoch": 1.43, "learning_rate": 7.14909090909091e-05, "loss": 0.0289, "step": 1960 }, { "epoch": 1.44, "learning_rate": 7.12e-05, "loss": 0.0355, "step": 1980 }, { "epoch": 1.45, "learning_rate": 7.090909090909092e-05, "loss": 0.0376, "step": 2000 }, { "epoch": 1.47, "learning_rate": 7.061818181818181e-05, "loss": 0.0375, "step": 2020 }, { "epoch": 1.48, "learning_rate": 7.032727272727272e-05, "loss": 0.0695, "step": 2040 }, { "epoch": 1.5, "learning_rate": 7.003636363636365e-05, "loss": 0.0414, "step": 2060 }, { "epoch": 1.51, "learning_rate": 6.974545454545454e-05, "loss": 0.0308, "step": 2080 }, { "epoch": 1.53, "learning_rate": 6.945454545454547e-05, "loss": 0.0209, "step": 2100 }, { "epoch": 1.54, "learning_rate": 6.916363636363636e-05, "loss": 0.0731, "step": 2120 }, { "epoch": 1.56, "learning_rate": 6.887272727272727e-05, "loss": 0.0409, "step": 2140 }, { "epoch": 1.57, "learning_rate": 6.858181818181818e-05, "loss": 0.0544, "step": 2160 }, { "epoch": 1.59, "learning_rate": 6.829090909090909e-05, "loss": 0.0677, "step": 2180 }, { "epoch": 1.6, "learning_rate": 6.800000000000001e-05, "loss": 0.0345, "step": 2200 }, { "epoch": 1.61, "learning_rate": 6.770909090909091e-05, "loss": 0.0262, "step": 2220 }, { "epoch": 1.63, "learning_rate": 6.741818181818182e-05, "loss": 0.066, "step": 2240 }, { "epoch": 1.64, "learning_rate": 6.712727272727273e-05, "loss": 0.0578, "step": 2260 }, { "epoch": 1.66, "learning_rate": 6.683636363636364e-05, "loss": 0.066, "step": 2280 }, { "epoch": 1.67, "learning_rate": 6.654545454545455e-05, "loss": 0.0565, "step": 2300 }, { "epoch": 1.69, "learning_rate": 6.625454545454546e-05, "loss": 0.0428, "step": 2320 }, { "epoch": 1.7, "learning_rate": 6.596363636363637e-05, "loss": 0.0387, "step": 2340 }, { "epoch": 1.72, "learning_rate": 6.567272727272727e-05, "loss": 0.0294, "step": 2360 }, { "epoch": 1.73, "learning_rate": 6.538181818181818e-05, "loss": 0.0332, "step": 2380 }, { "epoch": 1.75, "learning_rate": 6.50909090909091e-05, "loss": 0.0401, "step": 2400 }, { "epoch": 1.76, "learning_rate": 6.48e-05, "loss": 0.0324, "step": 2420 }, { "epoch": 1.77, "learning_rate": 6.450909090909091e-05, "loss": 0.0162, "step": 2440 }, { "epoch": 1.79, "learning_rate": 6.421818181818182e-05, "loss": 0.0501, "step": 2460 }, { "epoch": 1.8, "learning_rate": 6.392727272727273e-05, "loss": 0.0335, "step": 2480 }, { "epoch": 1.82, "learning_rate": 6.363636363636364e-05, "loss": 0.025, "step": 2500 }, { "epoch": 1.83, "learning_rate": 6.334545454545455e-05, "loss": 0.0402, "step": 2520 }, { "epoch": 1.85, "learning_rate": 6.305454545454546e-05, "loss": 0.0624, "step": 2540 }, { "epoch": 1.86, "learning_rate": 6.276363636363637e-05, "loss": 0.0337, "step": 2560 }, { "epoch": 1.88, "learning_rate": 6.247272727272728e-05, "loss": 0.0264, "step": 2580 }, { "epoch": 1.89, "learning_rate": 6.218181818181819e-05, "loss": 0.057, "step": 2600 }, { "epoch": 1.91, "learning_rate": 6.18909090909091e-05, "loss": 0.0381, "step": 2620 }, { "epoch": 1.92, "learning_rate": 6.16e-05, "loss": 0.0316, "step": 2640 }, { "epoch": 1.93, "learning_rate": 6.130909090909092e-05, "loss": 0.0492, "step": 2660 }, { "epoch": 1.95, "learning_rate": 6.101818181818182e-05, "loss": 0.0405, "step": 2680 }, { "epoch": 1.96, "learning_rate": 6.0727272727272735e-05, "loss": 0.0594, "step": 2700 }, { "epoch": 1.98, "learning_rate": 6.043636363636364e-05, "loss": 0.0263, "step": 2720 }, { "epoch": 1.99, "learning_rate": 6.014545454545455e-05, "loss": 0.0432, "step": 2740 }, { "epoch": 2.0, "eval_accuracy": 0.1720000058412552, "eval_loss": 4.625512599945068, "eval_runtime": 3.3701, "eval_samples_per_second": 148.362, "eval_steps_per_second": 4.748, "step": 2750 }, { "epoch": 2.01, "learning_rate": 5.985454545454545e-05, "loss": 0.0235, "step": 2760 }, { "epoch": 2.02, "learning_rate": 5.9563636363636366e-05, "loss": 0.0293, "step": 2780 }, { "epoch": 2.04, "learning_rate": 5.927272727272728e-05, "loss": 0.0326, "step": 2800 }, { "epoch": 2.05, "learning_rate": 5.8981818181818184e-05, "loss": 0.0124, "step": 2820 }, { "epoch": 2.07, "learning_rate": 5.8690909090909094e-05, "loss": 0.0157, "step": 2840 }, { "epoch": 2.08, "learning_rate": 5.8399999999999997e-05, "loss": 0.0138, "step": 2860 }, { "epoch": 2.09, "learning_rate": 5.810909090909091e-05, "loss": 0.0291, "step": 2880 }, { "epoch": 2.11, "learning_rate": 5.7818181818181815e-05, "loss": 0.0047, "step": 2900 }, { "epoch": 2.12, "learning_rate": 5.752727272727273e-05, "loss": 0.0188, "step": 2920 }, { "epoch": 2.14, "learning_rate": 5.723636363636364e-05, "loss": 0.0277, "step": 2940 }, { "epoch": 2.15, "learning_rate": 5.6945454545454544e-05, "loss": 0.0118, "step": 2960 }, { "epoch": 2.17, "learning_rate": 5.665454545454546e-05, "loss": 0.0277, "step": 2980 }, { "epoch": 2.18, "learning_rate": 5.636363636363636e-05, "loss": 0.006, "step": 3000 }, { "epoch": 2.2, "learning_rate": 5.607272727272728e-05, "loss": 0.0214, "step": 3020 }, { "epoch": 2.21, "learning_rate": 5.578181818181818e-05, "loss": 0.0086, "step": 3040 }, { "epoch": 2.23, "learning_rate": 5.549090909090909e-05, "loss": 0.0091, "step": 3060 }, { "epoch": 2.24, "learning_rate": 5.520000000000001e-05, "loss": 0.0055, "step": 3080 }, { "epoch": 2.25, "learning_rate": 5.490909090909091e-05, "loss": 0.0326, "step": 3100 }, { "epoch": 2.27, "learning_rate": 5.4618181818181826e-05, "loss": 0.0145, "step": 3120 }, { "epoch": 2.28, "learning_rate": 5.432727272727273e-05, "loss": 0.0087, "step": 3140 }, { "epoch": 2.3, "learning_rate": 5.403636363636364e-05, "loss": 0.0084, "step": 3160 }, { "epoch": 2.31, "learning_rate": 5.374545454545454e-05, "loss": 0.0116, "step": 3180 }, { "epoch": 2.33, "learning_rate": 5.3454545454545457e-05, "loss": 0.0254, "step": 3200 }, { "epoch": 2.34, "learning_rate": 5.316363636363637e-05, "loss": 0.0033, "step": 3220 }, { "epoch": 2.36, "learning_rate": 5.2872727272727275e-05, "loss": 0.0246, "step": 3240 }, { "epoch": 2.37, "learning_rate": 5.2581818181818185e-05, "loss": 0.0222, "step": 3260 }, { "epoch": 2.39, "learning_rate": 5.229090909090909e-05, "loss": 0.0093, "step": 3280 }, { "epoch": 2.4, "learning_rate": 5.2000000000000004e-05, "loss": 0.0112, "step": 3300 }, { "epoch": 2.41, "learning_rate": 5.1709090909090906e-05, "loss": 0.0335, "step": 3320 }, { "epoch": 2.43, "learning_rate": 5.141818181818182e-05, "loss": 0.0382, "step": 3340 }, { "epoch": 2.44, "learning_rate": 5.112727272727273e-05, "loss": 0.0264, "step": 3360 }, { "epoch": 2.46, "learning_rate": 5.0836363636363634e-05, "loss": 0.0131, "step": 3380 }, { "epoch": 2.47, "learning_rate": 5.054545454545455e-05, "loss": 0.0198, "step": 3400 }, { "epoch": 2.49, "learning_rate": 5.025454545454545e-05, "loss": 0.0027, "step": 3420 }, { "epoch": 2.5, "learning_rate": 4.996363636363637e-05, "loss": 0.0116, "step": 3440 }, { "epoch": 2.52, "learning_rate": 4.967272727272728e-05, "loss": 0.0066, "step": 3460 }, { "epoch": 2.53, "learning_rate": 4.938181818181818e-05, "loss": 0.04, "step": 3480 }, { "epoch": 2.55, "learning_rate": 4.909090909090909e-05, "loss": 0.0097, "step": 3500 }, { "epoch": 2.56, "learning_rate": 4.88e-05, "loss": 0.0119, "step": 3520 }, { "epoch": 2.57, "learning_rate": 4.850909090909091e-05, "loss": 0.0111, "step": 3540 }, { "epoch": 2.59, "learning_rate": 4.821818181818182e-05, "loss": 0.0311, "step": 3560 }, { "epoch": 2.6, "learning_rate": 4.792727272727273e-05, "loss": 0.0234, "step": 3580 }, { "epoch": 2.62, "learning_rate": 4.763636363636364e-05, "loss": 0.0124, "step": 3600 }, { "epoch": 2.63, "learning_rate": 4.734545454545455e-05, "loss": 0.0068, "step": 3620 }, { "epoch": 2.65, "learning_rate": 4.705454545454546e-05, "loss": 0.0349, "step": 3640 }, { "epoch": 2.66, "learning_rate": 4.6763636363636366e-05, "loss": 0.0112, "step": 3660 }, { "epoch": 2.68, "learning_rate": 4.6472727272727276e-05, "loss": 0.0122, "step": 3680 }, { "epoch": 2.69, "learning_rate": 4.618181818181818e-05, "loss": 0.0071, "step": 3700 }, { "epoch": 2.71, "learning_rate": 4.5890909090909094e-05, "loss": 0.0504, "step": 3720 }, { "epoch": 2.72, "learning_rate": 4.5600000000000004e-05, "loss": 0.0307, "step": 3740 }, { "epoch": 2.73, "learning_rate": 4.530909090909091e-05, "loss": 0.0048, "step": 3760 }, { "epoch": 2.75, "learning_rate": 4.501818181818182e-05, "loss": 0.0136, "step": 3780 }, { "epoch": 2.76, "learning_rate": 4.472727272727273e-05, "loss": 0.038, "step": 3800 }, { "epoch": 2.78, "learning_rate": 4.4436363636363635e-05, "loss": 0.0089, "step": 3820 }, { "epoch": 2.79, "learning_rate": 4.4145454545454544e-05, "loss": 0.0069, "step": 3840 }, { "epoch": 2.81, "learning_rate": 4.385454545454546e-05, "loss": 0.0161, "step": 3860 }, { "epoch": 2.82, "learning_rate": 4.356363636363637e-05, "loss": 0.013, "step": 3880 }, { "epoch": 2.84, "learning_rate": 4.327272727272728e-05, "loss": 0.0049, "step": 3900 }, { "epoch": 2.85, "learning_rate": 4.298181818181818e-05, "loss": 0.0304, "step": 3920 }, { "epoch": 2.87, "learning_rate": 4.269090909090909e-05, "loss": 0.03, "step": 3940 }, { "epoch": 2.88, "learning_rate": 4.24e-05, "loss": 0.0313, "step": 3960 }, { "epoch": 2.89, "learning_rate": 4.210909090909091e-05, "loss": 0.0079, "step": 3980 }, { "epoch": 2.91, "learning_rate": 4.181818181818182e-05, "loss": 0.0189, "step": 4000 }, { "epoch": 2.92, "learning_rate": 4.152727272727273e-05, "loss": 0.0019, "step": 4020 }, { "epoch": 2.94, "learning_rate": 4.123636363636364e-05, "loss": 0.0325, "step": 4040 }, { "epoch": 2.95, "learning_rate": 4.094545454545455e-05, "loss": 0.0276, "step": 4060 }, { "epoch": 2.97, "learning_rate": 4.065454545454546e-05, "loss": 0.0137, "step": 4080 }, { "epoch": 2.98, "learning_rate": 4.0363636363636367e-05, "loss": 0.0169, "step": 4100 }, { "epoch": 3.0, "learning_rate": 4.0072727272727276e-05, "loss": 0.0087, "step": 4120 }, { "epoch": 3.0, "eval_accuracy": 0.17599999904632568, "eval_loss": 6.114068508148193, "eval_runtime": 3.4252, "eval_samples_per_second": 145.975, "eval_steps_per_second": 4.671, "step": 4125 }, { "epoch": 3.01, "learning_rate": 3.978181818181818e-05, "loss": 0.0005, "step": 4140 }, { "epoch": 3.03, "learning_rate": 3.9490909090909095e-05, "loss": 0.0109, "step": 4160 }, { "epoch": 3.04, "learning_rate": 3.9200000000000004e-05, "loss": 0.0128, "step": 4180 }, { "epoch": 3.05, "learning_rate": 3.8909090909090914e-05, "loss": 0.0016, "step": 4200 }, { "epoch": 3.07, "learning_rate": 3.861818181818182e-05, "loss": 0.0009, "step": 4220 }, { "epoch": 3.08, "learning_rate": 3.8327272727272726e-05, "loss": 0.0036, "step": 4240 }, { "epoch": 3.1, "learning_rate": 3.8036363636363635e-05, "loss": 0.0124, "step": 4260 }, { "epoch": 3.11, "learning_rate": 3.7745454545454544e-05, "loss": 0.0128, "step": 4280 }, { "epoch": 3.13, "learning_rate": 3.745454545454546e-05, "loss": 0.0029, "step": 4300 }, { "epoch": 3.14, "learning_rate": 3.716363636363637e-05, "loss": 0.0088, "step": 4320 }, { "epoch": 3.16, "learning_rate": 3.687272727272727e-05, "loss": 0.0129, "step": 4340 }, { "epoch": 3.17, "learning_rate": 3.658181818181818e-05, "loss": 0.0012, "step": 4360 }, { "epoch": 3.19, "learning_rate": 3.629090909090909e-05, "loss": 0.0001, "step": 4380 }, { "epoch": 3.2, "learning_rate": 3.6e-05, "loss": 0.0116, "step": 4400 }, { "epoch": 3.21, "learning_rate": 3.570909090909091e-05, "loss": 0.0005, "step": 4420 }, { "epoch": 3.23, "learning_rate": 3.541818181818182e-05, "loss": 0.0013, "step": 4440 }, { "epoch": 3.24, "learning_rate": 3.512727272727273e-05, "loss": 0.0349, "step": 4460 }, { "epoch": 3.26, "learning_rate": 3.483636363636364e-05, "loss": 0.0013, "step": 4480 }, { "epoch": 3.27, "learning_rate": 3.454545454545455e-05, "loss": 0.0025, "step": 4500 }, { "epoch": 3.29, "learning_rate": 3.425454545454546e-05, "loss": 0.0106, "step": 4520 }, { "epoch": 3.3, "learning_rate": 3.396363636363637e-05, "loss": 0.0107, "step": 4540 }, { "epoch": 3.32, "learning_rate": 3.367272727272727e-05, "loss": 0.0055, "step": 4560 }, { "epoch": 3.33, "learning_rate": 3.338181818181818e-05, "loss": 0.0021, "step": 4580 }, { "epoch": 3.35, "learning_rate": 3.3090909090909095e-05, "loss": 0.0187, "step": 4600 }, { "epoch": 3.36, "learning_rate": 3.2800000000000004e-05, "loss": 0.0005, "step": 4620 }, { "epoch": 3.37, "learning_rate": 3.2509090909090914e-05, "loss": 0.0126, "step": 4640 }, { "epoch": 3.39, "learning_rate": 3.2218181818181816e-05, "loss": 0.0001, "step": 4660 }, { "epoch": 3.4, "learning_rate": 3.1927272727272726e-05, "loss": 0.0022, "step": 4680 }, { "epoch": 3.42, "learning_rate": 3.1636363636363635e-05, "loss": 0.0018, "step": 4700 }, { "epoch": 3.43, "learning_rate": 3.1345454545454545e-05, "loss": 0.001, "step": 4720 }, { "epoch": 3.45, "learning_rate": 3.105454545454546e-05, "loss": 0.0048, "step": 4740 }, { "epoch": 3.46, "learning_rate": 3.0763636363636364e-05, "loss": 0.0025, "step": 4760 }, { "epoch": 3.48, "learning_rate": 3.0472727272727276e-05, "loss": 0.0032, "step": 4780 }, { "epoch": 3.49, "learning_rate": 3.0181818181818182e-05, "loss": 0.0003, "step": 4800 }, { "epoch": 3.51, "learning_rate": 2.9890909090909092e-05, "loss": 0.0125, "step": 4820 }, { "epoch": 3.52, "learning_rate": 2.96e-05, "loss": 0.0009, "step": 4840 }, { "epoch": 3.53, "learning_rate": 2.9309090909090907e-05, "loss": 0.0007, "step": 4860 }, { "epoch": 3.55, "learning_rate": 2.9018181818181823e-05, "loss": 0.0206, "step": 4880 }, { "epoch": 3.56, "learning_rate": 2.872727272727273e-05, "loss": 0.007, "step": 4900 }, { "epoch": 3.58, "learning_rate": 2.843636363636364e-05, "loss": 0.0048, "step": 4920 }, { "epoch": 3.59, "learning_rate": 2.8145454545454548e-05, "loss": 0.0023, "step": 4940 }, { "epoch": 3.61, "learning_rate": 2.7854545454545454e-05, "loss": 0.0015, "step": 4960 }, { "epoch": 3.62, "learning_rate": 2.7563636363636364e-05, "loss": 0.0081, "step": 4980 }, { "epoch": 3.64, "learning_rate": 2.7272727272727273e-05, "loss": 0.0094, "step": 5000 }, { "epoch": 3.65, "learning_rate": 2.6981818181818186e-05, "loss": 0.0001, "step": 5020 }, { "epoch": 3.67, "learning_rate": 2.6690909090909095e-05, "loss": 0.01, "step": 5040 }, { "epoch": 3.68, "learning_rate": 2.64e-05, "loss": 0.0074, "step": 5060 }, { "epoch": 3.69, "learning_rate": 2.610909090909091e-05, "loss": 0.0017, "step": 5080 }, { "epoch": 3.71, "learning_rate": 2.581818181818182e-05, "loss": 0.0112, "step": 5100 }, { "epoch": 3.72, "learning_rate": 2.5527272727272726e-05, "loss": 0.0022, "step": 5120 }, { "epoch": 3.74, "learning_rate": 2.5236363636363636e-05, "loss": 0.0004, "step": 5140 }, { "epoch": 3.75, "learning_rate": 2.494545454545455e-05, "loss": 0.0, "step": 5160 }, { "epoch": 3.77, "learning_rate": 2.4654545454545454e-05, "loss": 0.0134, "step": 5180 }, { "epoch": 3.78, "learning_rate": 2.4363636363636364e-05, "loss": 0.0014, "step": 5200 }, { "epoch": 3.8, "learning_rate": 2.4072727272727273e-05, "loss": 0.0001, "step": 5220 }, { "epoch": 3.81, "learning_rate": 2.3781818181818183e-05, "loss": 0.0075, "step": 5240 }, { "epoch": 3.83, "learning_rate": 2.3490909090909092e-05, "loss": 0.0021, "step": 5260 }, { "epoch": 3.84, "learning_rate": 2.32e-05, "loss": 0.0159, "step": 5280 }, { "epoch": 3.85, "learning_rate": 2.290909090909091e-05, "loss": 0.0001, "step": 5300 }, { "epoch": 3.87, "learning_rate": 2.261818181818182e-05, "loss": 0.0011, "step": 5320 }, { "epoch": 3.88, "learning_rate": 2.2327272727272726e-05, "loss": 0.0052, "step": 5340 }, { "epoch": 3.9, "learning_rate": 2.203636363636364e-05, "loss": 0.0162, "step": 5360 }, { "epoch": 3.91, "learning_rate": 2.1745454545454545e-05, "loss": 0.0066, "step": 5380 }, { "epoch": 3.93, "learning_rate": 2.1454545454545455e-05, "loss": 0.0022, "step": 5400 }, { "epoch": 3.94, "learning_rate": 2.1163636363636367e-05, "loss": 0.0049, "step": 5420 }, { "epoch": 3.96, "learning_rate": 2.0872727272727273e-05, "loss": 0.0147, "step": 5440 }, { "epoch": 3.97, "learning_rate": 2.0581818181818183e-05, "loss": 0.0015, "step": 5460 }, { "epoch": 3.99, "learning_rate": 2.0290909090909092e-05, "loss": 0.0129, "step": 5480 }, { "epoch": 4.0, "learning_rate": 2e-05, "loss": 0.001, "step": 5500 }, { "epoch": 4.0, "eval_accuracy": 0.17399999499320984, "eval_loss": 4.792283058166504, "eval_runtime": 3.3921, "eval_samples_per_second": 147.401, "eval_steps_per_second": 4.717, "step": 5500 }, { "epoch": 4.01, "learning_rate": 1.970909090909091e-05, "loss": 0.0, "step": 5520 }, { "epoch": 4.03, "learning_rate": 1.9418181818181817e-05, "loss": 0.0005, "step": 5540 }, { "epoch": 4.04, "learning_rate": 1.9127272727272726e-05, "loss": 0.0, "step": 5560 }, { "epoch": 4.06, "learning_rate": 1.883636363636364e-05, "loss": 0.0, "step": 5580 }, { "epoch": 4.07, "learning_rate": 1.8545454545454545e-05, "loss": 0.0011, "step": 5600 }, { "epoch": 4.09, "learning_rate": 1.8254545454545455e-05, "loss": 0.0021, "step": 5620 }, { "epoch": 4.1, "learning_rate": 1.7963636363636364e-05, "loss": 0.001, "step": 5640 }, { "epoch": 4.12, "learning_rate": 1.7672727272727274e-05, "loss": 0.0001, "step": 5660 }, { "epoch": 4.13, "learning_rate": 1.7381818181818183e-05, "loss": 0.0044, "step": 5680 }, { "epoch": 4.15, "learning_rate": 1.7090909090909092e-05, "loss": 0.0018, "step": 5700 }, { "epoch": 4.16, "learning_rate": 1.6800000000000002e-05, "loss": 0.0012, "step": 5720 }, { "epoch": 4.17, "learning_rate": 1.650909090909091e-05, "loss": 0.0001, "step": 5740 }, { "epoch": 4.19, "learning_rate": 1.6218181818181817e-05, "loss": 0.0013, "step": 5760 }, { "epoch": 4.2, "learning_rate": 1.5927272727272727e-05, "loss": 0.0, "step": 5780 }, { "epoch": 4.22, "learning_rate": 1.563636363636364e-05, "loss": 0.0003, "step": 5800 }, { "epoch": 4.23, "learning_rate": 1.5345454545454545e-05, "loss": 0.0001, "step": 5820 }, { "epoch": 4.25, "learning_rate": 1.5054545454545455e-05, "loss": 0.0003, "step": 5840 }, { "epoch": 4.26, "learning_rate": 1.4763636363636366e-05, "loss": 0.019, "step": 5860 }, { "epoch": 4.28, "learning_rate": 1.4472727272727274e-05, "loss": 0.001, "step": 5880 }, { "epoch": 4.29, "learning_rate": 1.4181818181818181e-05, "loss": 0.0001, "step": 5900 }, { "epoch": 4.31, "learning_rate": 1.389090909090909e-05, "loss": 0.0001, "step": 5920 }, { "epoch": 4.32, "learning_rate": 1.3600000000000002e-05, "loss": 0.0, "step": 5940 }, { "epoch": 4.33, "learning_rate": 1.330909090909091e-05, "loss": 0.0, "step": 5960 }, { "epoch": 4.35, "learning_rate": 1.3018181818181819e-05, "loss": 0.0064, "step": 5980 }, { "epoch": 4.36, "learning_rate": 1.2727272727272727e-05, "loss": 0.0033, "step": 6000 }, { "epoch": 4.38, "learning_rate": 1.2436363636363636e-05, "loss": 0.0002, "step": 6020 }, { "epoch": 4.39, "learning_rate": 1.2145454545454546e-05, "loss": 0.0001, "step": 6040 }, { "epoch": 4.41, "learning_rate": 1.1854545454545455e-05, "loss": 0.0014, "step": 6060 }, { "epoch": 4.42, "learning_rate": 1.1563636363636364e-05, "loss": 0.002, "step": 6080 }, { "epoch": 4.44, "learning_rate": 1.1272727272727274e-05, "loss": 0.0003, "step": 6100 }, { "epoch": 4.45, "learning_rate": 1.0981818181818182e-05, "loss": 0.0127, "step": 6120 }, { "epoch": 4.47, "learning_rate": 1.0690909090909091e-05, "loss": 0.0007, "step": 6140 }, { "epoch": 4.48, "learning_rate": 1.04e-05, "loss": 0.0086, "step": 6160 }, { "epoch": 4.49, "learning_rate": 1.010909090909091e-05, "loss": 0.0, "step": 6180 }, { "epoch": 4.51, "learning_rate": 9.818181818181818e-06, "loss": 0.0072, "step": 6200 }, { "epoch": 4.52, "learning_rate": 9.527272727272727e-06, "loss": 0.0002, "step": 6220 }, { "epoch": 4.54, "learning_rate": 9.236363636363638e-06, "loss": 0.0, "step": 6240 }, { "epoch": 4.55, "learning_rate": 8.945454545454546e-06, "loss": 0.0, "step": 6260 }, { "epoch": 4.57, "learning_rate": 8.654545454545455e-06, "loss": 0.001, "step": 6280 }, { "epoch": 4.58, "learning_rate": 8.363636363636365e-06, "loss": 0.0001, "step": 6300 }, { "epoch": 4.6, "learning_rate": 8.072727272727274e-06, "loss": 0.0, "step": 6320 }, { "epoch": 4.61, "learning_rate": 7.781818181818182e-06, "loss": 0.001, "step": 6340 }, { "epoch": 4.63, "learning_rate": 7.490909090909091e-06, "loss": 0.0017, "step": 6360 }, { "epoch": 4.64, "learning_rate": 7.2e-06, "loss": 0.0, "step": 6380 }, { "epoch": 4.65, "learning_rate": 6.909090909090909e-06, "loss": 0.0, "step": 6400 }, { "epoch": 4.67, "learning_rate": 6.618181818181818e-06, "loss": 0.0143, "step": 6420 }, { "epoch": 4.68, "learning_rate": 6.327272727272728e-06, "loss": 0.0, "step": 6440 }, { "epoch": 4.7, "learning_rate": 6.0363636363636365e-06, "loss": 0.0, "step": 6460 }, { "epoch": 4.71, "learning_rate": 5.745454545454546e-06, "loss": 0.0029, "step": 6480 }, { "epoch": 4.73, "learning_rate": 5.4545454545454545e-06, "loss": 0.0018, "step": 6500 }, { "epoch": 4.74, "learning_rate": 5.163636363636364e-06, "loss": 0.0, "step": 6520 }, { "epoch": 4.76, "learning_rate": 4.872727272727273e-06, "loss": 0.0001, "step": 6540 }, { "epoch": 4.77, "learning_rate": 4.581818181818182e-06, "loss": 0.0001, "step": 6560 }, { "epoch": 4.79, "learning_rate": 4.290909090909091e-06, "loss": 0.0, "step": 6580 }, { "epoch": 4.8, "learning_rate": 4.000000000000001e-06, "loss": 0.0019, "step": 6600 }, { "epoch": 4.81, "learning_rate": 3.7090909090909092e-06, "loss": 0.0001, "step": 6620 }, { "epoch": 4.83, "learning_rate": 3.4181818181818182e-06, "loss": 0.0003, "step": 6640 }, { "epoch": 4.84, "learning_rate": 3.127272727272727e-06, "loss": 0.0004, "step": 6660 }, { "epoch": 4.86, "learning_rate": 2.8363636363636366e-06, "loss": 0.0037, "step": 6680 }, { "epoch": 4.87, "learning_rate": 2.5454545454545456e-06, "loss": 0.0015, "step": 6700 }, { "epoch": 4.89, "learning_rate": 2.2545454545454546e-06, "loss": 0.0002, "step": 6720 }, { "epoch": 4.9, "learning_rate": 1.9636363636363636e-06, "loss": 0.0144, "step": 6740 }, { "epoch": 4.92, "learning_rate": 1.6727272727272728e-06, "loss": 0.0052, "step": 6760 }, { "epoch": 4.93, "learning_rate": 1.3818181818181818e-06, "loss": 0.0005, "step": 6780 }, { "epoch": 4.95, "learning_rate": 1.090909090909091e-06, "loss": 0.0003, "step": 6800 }, { "epoch": 4.96, "learning_rate": 8.000000000000001e-07, "loss": 0.0082, "step": 6820 }, { "epoch": 4.97, "learning_rate": 5.090909090909092e-07, "loss": 0.0, "step": 6840 }, { "epoch": 4.99, "learning_rate": 2.181818181818182e-07, "loss": 0.0008, "step": 6860 }, { "epoch": 5.0, "eval_accuracy": 0.15800000727176666, "eval_loss": 4.613570213317871, "eval_runtime": 3.3639, "eval_samples_per_second": 148.638, "eval_steps_per_second": 4.756, "step": 6875 }, { "epoch": 5.0, "step": 6875, "total_flos": 2.323364869172544e+16, "train_loss": 0.06812760998129988, "train_runtime": 4390.0209, "train_samples_per_second": 50.114, "train_steps_per_second": 1.566 } ], "max_steps": 6875, "num_train_epochs": 5, "total_flos": 2.323364869172544e+16, "trial_name": null, "trial_params": null }