{ "best_metric": 0.5579236745834351, "best_model_checkpoint": "deberta-v3-large-finetuned-squadv2/checkpoint-3620", "epoch": 2.550269740068661, "eval_steps": 20, "global_step": 5200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.0000000000000002e-07, "loss": 6.0843, "step": 20 }, { "epoch": 0.02, "learning_rate": 4.0000000000000003e-07, "loss": 6.0629, "step": 40 }, { "epoch": 0.03, "learning_rate": 6.000000000000001e-07, "loss": 6.0196, "step": 60 }, { "epoch": 0.04, "learning_rate": 8.000000000000001e-07, "loss": 5.9309, "step": 80 }, { "epoch": 0.05, "learning_rate": 1.0000000000000002e-06, "loss": 5.768, "step": 100 }, { "epoch": 0.06, "learning_rate": 1.2000000000000002e-06, "loss": 5.4721, "step": 120 }, { "epoch": 0.07, "learning_rate": 1.3900000000000002e-06, "loss": 5.1112, "step": 140 }, { "epoch": 0.08, "learning_rate": 1.5900000000000002e-06, "loss": 4.6482, "step": 160 }, { "epoch": 0.09, "learning_rate": 1.79e-06, "loss": 4.2226, "step": 180 }, { "epoch": 0.1, "learning_rate": 1.9900000000000004e-06, "loss": 3.7989, "step": 200 }, { "epoch": 0.11, "learning_rate": 2.19e-06, "loss": 3.4023, "step": 220 }, { "epoch": 0.12, "learning_rate": 2.39e-06, "loss": 3.0793, "step": 240 }, { "epoch": 0.13, "learning_rate": 2.59e-06, "loss": 2.5037, "step": 260 }, { "epoch": 0.14, "learning_rate": 2.7900000000000004e-06, "loss": 1.9528, "step": 280 }, { "epoch": 0.15, "learning_rate": 2.99e-06, "loss": 1.736, "step": 300 }, { "epoch": 0.16, "learning_rate": 3.1900000000000004e-06, "loss": 1.5742, "step": 320 }, { "epoch": 0.17, "learning_rate": 3.3900000000000006e-06, "loss": 1.4485, "step": 340 }, { "epoch": 0.18, "learning_rate": 3.58e-06, "loss": 1.4103, "step": 360 }, { "epoch": 0.19, "learning_rate": 3.7800000000000002e-06, "loss": 1.3205, "step": 380 }, { "epoch": 0.2, "learning_rate": 3.980000000000001e-06, "loss": 1.2793, "step": 400 }, { "epoch": 0.21, "learning_rate": 4.18e-06, "loss": 1.1896, "step": 420 }, { "epoch": 0.22, "learning_rate": 4.38e-06, "loss": 1.1228, "step": 440 }, { "epoch": 0.23, "learning_rate": 4.57e-06, "loss": 1.0661, "step": 460 }, { "epoch": 0.24, "learning_rate": 4.77e-06, "loss": 1.0459, "step": 480 }, { "epoch": 0.25, "learning_rate": 4.970000000000001e-06, "loss": 1.0062, "step": 500 }, { "epoch": 0.26, "learning_rate": 5.1700000000000005e-06, "loss": 0.9441, "step": 520 }, { "epoch": 0.26, "learning_rate": 5.370000000000001e-06, "loss": 0.9871, "step": 540 }, { "epoch": 0.27, "learning_rate": 5.570000000000001e-06, "loss": 0.9291, "step": 560 }, { "epoch": 0.28, "learning_rate": 5.77e-06, "loss": 0.9635, "step": 580 }, { "epoch": 0.29, "learning_rate": 5.9700000000000004e-06, "loss": 0.9874, "step": 600 }, { "epoch": 0.3, "learning_rate": 6.17e-06, "loss": 0.8922, "step": 620 }, { "epoch": 0.31, "learning_rate": 6.370000000000001e-06, "loss": 0.9304, "step": 640 }, { "epoch": 0.32, "learning_rate": 6.570000000000001e-06, "loss": 0.8742, "step": 660 }, { "epoch": 0.33, "learning_rate": 6.770000000000001e-06, "loss": 0.8423, "step": 680 }, { "epoch": 0.34, "learning_rate": 6.97e-06, "loss": 0.8286, "step": 700 }, { "epoch": 0.35, "learning_rate": 7.17e-06, "loss": 0.8192, "step": 720 }, { "epoch": 0.36, "learning_rate": 7.370000000000001e-06, "loss": 0.8451, "step": 740 }, { "epoch": 0.37, "learning_rate": 7.57e-06, "loss": 0.7954, "step": 760 }, { "epoch": 0.38, "learning_rate": 7.77e-06, "loss": 0.8322, "step": 780 }, { "epoch": 0.39, "learning_rate": 7.970000000000002e-06, "loss": 0.8318, "step": 800 }, { "epoch": 0.4, "learning_rate": 8.17e-06, "loss": 0.8396, "step": 820 }, { "epoch": 0.41, "learning_rate": 8.370000000000001e-06, "loss": 0.7922, "step": 840 }, { "epoch": 0.42, "learning_rate": 8.570000000000001e-06, "loss": 0.7931, "step": 860 }, { "epoch": 0.43, "learning_rate": 8.77e-06, "loss": 0.7942, "step": 880 }, { "epoch": 0.44, "learning_rate": 8.97e-06, "loss": 0.8017, "step": 900 }, { "epoch": 0.45, "learning_rate": 9.17e-06, "loss": 0.7346, "step": 920 }, { "epoch": 0.46, "learning_rate": 9.370000000000002e-06, "loss": 0.6792, "step": 940 }, { "epoch": 0.47, "learning_rate": 9.57e-06, "loss": 0.8069, "step": 960 }, { "epoch": 0.48, "learning_rate": 9.770000000000001e-06, "loss": 0.8058, "step": 980 }, { "epoch": 0.49, "learning_rate": 9.970000000000001e-06, "loss": 0.8398, "step": 1000 }, { "epoch": 0.5, "learning_rate": 9.95952380952381e-06, "loss": 0.8148, "step": 1020 }, { "epoch": 0.51, "learning_rate": 9.911904761904762e-06, "loss": 0.7888, "step": 1040 }, { "epoch": 0.52, "learning_rate": 9.864285714285715e-06, "loss": 0.7423, "step": 1060 }, { "epoch": 0.53, "learning_rate": 9.816666666666667e-06, "loss": 0.7855, "step": 1080 }, { "epoch": 0.54, "learning_rate": 9.76904761904762e-06, "loss": 0.7303, "step": 1100 }, { "epoch": 0.55, "learning_rate": 9.721428571428573e-06, "loss": 0.7156, "step": 1120 }, { "epoch": 0.56, "learning_rate": 9.673809523809525e-06, "loss": 0.7542, "step": 1140 }, { "epoch": 0.57, "learning_rate": 9.626190476190477e-06, "loss": 0.723, "step": 1160 }, { "epoch": 0.58, "learning_rate": 9.57857142857143e-06, "loss": 0.7025, "step": 1180 }, { "epoch": 0.59, "learning_rate": 9.530952380952381e-06, "loss": 0.7755, "step": 1200 }, { "epoch": 0.6, "learning_rate": 9.483333333333335e-06, "loss": 0.7225, "step": 1220 }, { "epoch": 0.61, "learning_rate": 9.435714285714286e-06, "loss": 0.7389, "step": 1240 }, { "epoch": 0.62, "learning_rate": 9.388095238095238e-06, "loss": 0.7083, "step": 1260 }, { "epoch": 0.63, "learning_rate": 9.340476190476191e-06, "loss": 0.6863, "step": 1280 }, { "epoch": 0.64, "learning_rate": 9.292857142857144e-06, "loss": 0.7167, "step": 1300 }, { "epoch": 0.65, "learning_rate": 9.245238095238096e-06, "loss": 0.7006, "step": 1320 }, { "epoch": 0.66, "learning_rate": 9.197619047619049e-06, "loss": 0.7058, "step": 1340 }, { "epoch": 0.67, "learning_rate": 9.15e-06, "loss": 0.7068, "step": 1360 }, { "epoch": 0.68, "learning_rate": 9.102380952380954e-06, "loss": 0.713, "step": 1380 }, { "epoch": 0.69, "learning_rate": 9.054761904761905e-06, "loss": 0.743, "step": 1400 }, { "epoch": 0.7, "learning_rate": 9.007142857142857e-06, "loss": 0.6588, "step": 1420 }, { "epoch": 0.71, "learning_rate": 8.95952380952381e-06, "loss": 0.7087, "step": 1440 }, { "epoch": 0.72, "learning_rate": 8.911904761904763e-06, "loss": 0.7172, "step": 1460 }, { "epoch": 0.73, "learning_rate": 8.864285714285715e-06, "loss": 0.6935, "step": 1480 }, { "epoch": 0.74, "learning_rate": 8.816666666666668e-06, "loss": 0.7049, "step": 1500 }, { "epoch": 0.75, "learning_rate": 8.76904761904762e-06, "loss": 0.6749, "step": 1520 }, { "epoch": 0.76, "learning_rate": 8.721428571428571e-06, "loss": 0.6773, "step": 1540 }, { "epoch": 0.77, "learning_rate": 8.673809523809524e-06, "loss": 0.7028, "step": 1560 }, { "epoch": 0.77, "learning_rate": 8.626190476190478e-06, "loss": 0.6754, "step": 1580 }, { "epoch": 0.78, "learning_rate": 8.57857142857143e-06, "loss": 0.744, "step": 1600 }, { "epoch": 0.79, "learning_rate": 8.530952380952382e-06, "loss": 0.6175, "step": 1620 }, { "epoch": 0.8, "learning_rate": 8.483333333333334e-06, "loss": 0.6788, "step": 1640 }, { "epoch": 0.81, "learning_rate": 8.435714285714286e-06, "loss": 0.6728, "step": 1660 }, { "epoch": 0.82, "learning_rate": 8.388095238095239e-06, "loss": 0.7451, "step": 1680 }, { "epoch": 0.83, "learning_rate": 8.34047619047619e-06, "loss": 0.6392, "step": 1700 }, { "epoch": 0.84, "learning_rate": 8.292857142857144e-06, "loss": 0.6522, "step": 1720 }, { "epoch": 0.85, "learning_rate": 8.245238095238097e-06, "loss": 0.6665, "step": 1740 }, { "epoch": 0.86, "learning_rate": 8.197619047619048e-06, "loss": 0.6178, "step": 1760 }, { "epoch": 0.87, "learning_rate": 8.15e-06, "loss": 0.6699, "step": 1780 }, { "epoch": 0.88, "learning_rate": 8.102380952380953e-06, "loss": 0.6543, "step": 1800 }, { "epoch": 0.89, "learning_rate": 8.054761904761905e-06, "loss": 0.6724, "step": 1820 }, { "epoch": 0.9, "learning_rate": 8.007142857142858e-06, "loss": 0.7194, "step": 1840 }, { "epoch": 0.91, "learning_rate": 7.959523809523811e-06, "loss": 0.6695, "step": 1860 }, { "epoch": 0.92, "learning_rate": 7.911904761904763e-06, "loss": 0.6542, "step": 1880 }, { "epoch": 0.93, "learning_rate": 7.864285714285716e-06, "loss": 0.6986, "step": 1900 }, { "epoch": 0.94, "learning_rate": 7.816666666666667e-06, "loss": 0.6527, "step": 1920 }, { "epoch": 0.95, "learning_rate": 7.769047619047619e-06, "loss": 0.6036, "step": 1940 }, { "epoch": 0.96, "learning_rate": 7.721428571428572e-06, "loss": 0.6309, "step": 1960 }, { "epoch": 0.97, "learning_rate": 7.673809523809524e-06, "loss": 0.6647, "step": 1980 }, { "epoch": 0.98, "learning_rate": 7.626190476190477e-06, "loss": 0.6625, "step": 2000 }, { "epoch": 0.99, "learning_rate": 7.5785714285714295e-06, "loss": 0.673, "step": 2020 }, { "epoch": 1.0, "learning_rate": 7.530952380952381e-06, "loss": 0.6615, "step": 2040 }, { "epoch": 1.01, "learning_rate": 7.483333333333333e-06, "loss": 0.5771, "step": 2060 }, { "epoch": 1.02, "learning_rate": 7.435714285714286e-06, "loss": 0.5824, "step": 2080 }, { "epoch": 1.03, "learning_rate": 7.388095238095239e-06, "loss": 0.5038, "step": 2100 }, { "epoch": 1.04, "learning_rate": 7.3404761904761914e-06, "loss": 0.5524, "step": 2120 }, { "epoch": 1.05, "learning_rate": 7.292857142857144e-06, "loss": 0.5903, "step": 2140 }, { "epoch": 1.06, "learning_rate": 7.245238095238096e-06, "loss": 0.609, "step": 2160 }, { "epoch": 1.07, "learning_rate": 7.197619047619048e-06, "loss": 0.5303, "step": 2180 }, { "epoch": 1.08, "learning_rate": 7.15e-06, "loss": 0.5637, "step": 2200 }, { "epoch": 1.09, "learning_rate": 7.1023809523809525e-06, "loss": 0.5513, "step": 2220 }, { "epoch": 1.1, "learning_rate": 7.054761904761906e-06, "loss": 0.5648, "step": 2240 }, { "epoch": 1.11, "learning_rate": 7.007142857142858e-06, "loss": 0.5907, "step": 2260 }, { "epoch": 1.12, "learning_rate": 6.9595238095238106e-06, "loss": 0.5791, "step": 2280 }, { "epoch": 1.13, "learning_rate": 6.911904761904763e-06, "loss": 0.5485, "step": 2300 }, { "epoch": 1.14, "learning_rate": 6.8642857142857145e-06, "loss": 0.5279, "step": 2320 }, { "epoch": 1.15, "learning_rate": 6.816666666666667e-06, "loss": 0.5283, "step": 2340 }, { "epoch": 1.16, "learning_rate": 6.769047619047619e-06, "loss": 0.5294, "step": 2360 }, { "epoch": 1.17, "learning_rate": 6.721428571428572e-06, "loss": 0.4647, "step": 2380 }, { "epoch": 1.18, "learning_rate": 6.673809523809525e-06, "loss": 0.5571, "step": 2400 }, { "epoch": 1.19, "learning_rate": 6.626190476190477e-06, "loss": 0.5188, "step": 2420 }, { "epoch": 1.2, "learning_rate": 6.578571428571429e-06, "loss": 0.5099, "step": 2440 }, { "epoch": 1.21, "learning_rate": 6.530952380952381e-06, "loss": 0.5371, "step": 2460 }, { "epoch": 1.22, "learning_rate": 6.483333333333334e-06, "loss": 0.55, "step": 2480 }, { "epoch": 1.23, "learning_rate": 6.435714285714286e-06, "loss": 0.5546, "step": 2500 }, { "epoch": 1.24, "learning_rate": 6.3880952380952384e-06, "loss": 0.5465, "step": 2520 }, { "epoch": 1.25, "learning_rate": 6.340476190476192e-06, "loss": 0.4944, "step": 2540 }, { "epoch": 1.26, "learning_rate": 6.292857142857144e-06, "loss": 0.5248, "step": 2560 }, { "epoch": 1.27, "learning_rate": 6.245238095238096e-06, "loss": 0.5585, "step": 2580 }, { "epoch": 1.28, "learning_rate": 6.197619047619048e-06, "loss": 0.5562, "step": 2600 }, { "epoch": 1.28, "learning_rate": 6.15e-06, "loss": 0.5402, "step": 2620 }, { "epoch": 1.29, "learning_rate": 6.102380952380953e-06, "loss": 0.5674, "step": 2640 }, { "epoch": 1.3, "learning_rate": 6.054761904761905e-06, "loss": 0.5641, "step": 2660 }, { "epoch": 1.31, "learning_rate": 6.0071428571428584e-06, "loss": 0.4678, "step": 2680 }, { "epoch": 1.32, "learning_rate": 5.959523809523809e-06, "loss": 0.5162, "step": 2700 }, { "epoch": 1.33, "learning_rate": 5.911904761904762e-06, "loss": 0.5826, "step": 2720 }, { "epoch": 1.34, "learning_rate": 5.864285714285715e-06, "loss": 0.518, "step": 2740 }, { "epoch": 1.35, "learning_rate": 5.816666666666667e-06, "loss": 0.512, "step": 2760 }, { "epoch": 1.36, "learning_rate": 5.7690476190476196e-06, "loss": 0.5848, "step": 2780 }, { "epoch": 1.37, "learning_rate": 5.721428571428572e-06, "loss": 0.526, "step": 2800 }, { "epoch": 1.38, "learning_rate": 5.673809523809525e-06, "loss": 0.5419, "step": 2820 }, { "epoch": 1.39, "learning_rate": 5.626190476190476e-06, "loss": 0.5642, "step": 2840 }, { "epoch": 1.4, "learning_rate": 5.578571428571429e-06, "loss": 0.5126, "step": 2860 }, { "epoch": 1.41, "learning_rate": 5.5309523809523815e-06, "loss": 0.5687, "step": 2880 }, { "epoch": 1.42, "learning_rate": 5.483333333333334e-06, "loss": 0.5498, "step": 2900 }, { "epoch": 1.43, "learning_rate": 5.435714285714286e-06, "loss": 0.5213, "step": 2920 }, { "epoch": 1.44, "learning_rate": 5.388095238095239e-06, "loss": 0.575, "step": 2940 }, { "epoch": 1.45, "learning_rate": 5.34047619047619e-06, "loss": 0.5524, "step": 2960 }, { "epoch": 1.46, "learning_rate": 5.292857142857143e-06, "loss": 0.5304, "step": 2980 }, { "epoch": 1.47, "learning_rate": 5.245238095238096e-06, "loss": 0.4706, "step": 3000 }, { "epoch": 1.48, "learning_rate": 5.197619047619048e-06, "loss": 0.5716, "step": 3020 }, { "epoch": 1.49, "learning_rate": 5.150000000000001e-06, "loss": 0.4976, "step": 3040 }, { "epoch": 1.5, "learning_rate": 5.102380952380953e-06, "loss": 0.5022, "step": 3060 }, { "epoch": 1.51, "learning_rate": 5.0547619047619055e-06, "loss": 0.556, "step": 3080 }, { "epoch": 1.52, "learning_rate": 5.007142857142857e-06, "loss": 0.5023, "step": 3100 }, { "epoch": 1.53, "learning_rate": 4.95952380952381e-06, "loss": 0.5613, "step": 3120 }, { "epoch": 1.54, "learning_rate": 4.911904761904762e-06, "loss": 0.6031, "step": 3140 }, { "epoch": 1.55, "learning_rate": 4.864285714285715e-06, "loss": 0.5346, "step": 3160 }, { "epoch": 1.56, "learning_rate": 4.816666666666667e-06, "loss": 0.5665, "step": 3180 }, { "epoch": 1.57, "learning_rate": 4.769047619047619e-06, "loss": 0.5293, "step": 3200 }, { "epoch": 1.57, "eval_loss": 0.5739259123802185, "eval_runtime": 277.0719, "eval_samples_per_second": 43.133, "eval_steps_per_second": 5.392, "step": 3200 }, { "epoch": 1.58, "learning_rate": 4.721428571428572e-06, "loss": 0.5106, "step": 3220 }, { "epoch": 1.58, "eval_loss": 0.5783331990242004, "eval_runtime": 276.9686, "eval_samples_per_second": 43.149, "eval_steps_per_second": 5.394, "step": 3220 }, { "epoch": 1.59, "learning_rate": 4.673809523809525e-06, "loss": 0.5338, "step": 3240 }, { "epoch": 1.59, "eval_loss": 0.5718061327934265, "eval_runtime": 276.8816, "eval_samples_per_second": 43.163, "eval_steps_per_second": 5.396, "step": 3240 }, { "epoch": 1.6, "learning_rate": 4.626190476190476e-06, "loss": 0.5128, "step": 3260 }, { "epoch": 1.6, "eval_loss": 0.5826650261878967, "eval_runtime": 276.9662, "eval_samples_per_second": 43.15, "eval_steps_per_second": 5.394, "step": 3260 }, { "epoch": 1.61, "learning_rate": 4.5785714285714285e-06, "loss": 0.5205, "step": 3280 }, { "epoch": 1.61, "eval_loss": 0.6044849157333374, "eval_runtime": 276.8959, "eval_samples_per_second": 43.161, "eval_steps_per_second": 5.396, "step": 3280 }, { "epoch": 1.62, "learning_rate": 4.530952380952382e-06, "loss": 0.5114, "step": 3300 }, { "epoch": 1.62, "eval_loss": 0.5880448818206787, "eval_runtime": 276.8767, "eval_samples_per_second": 43.164, "eval_steps_per_second": 5.396, "step": 3300 }, { "epoch": 1.63, "learning_rate": 4.483333333333333e-06, "loss": 0.5072, "step": 3320 }, { "epoch": 1.63, "eval_loss": 0.5788173079490662, "eval_runtime": 276.8866, "eval_samples_per_second": 43.162, "eval_steps_per_second": 5.396, "step": 3320 }, { "epoch": 1.64, "learning_rate": 4.435714285714286e-06, "loss": 0.5512, "step": 3340 }, { "epoch": 1.64, "eval_loss": 0.5863245725631714, "eval_runtime": 276.8934, "eval_samples_per_second": 43.161, "eval_steps_per_second": 5.396, "step": 3340 }, { "epoch": 1.65, "learning_rate": 4.388095238095238e-06, "loss": 0.4723, "step": 3360 }, { "epoch": 1.65, "eval_loss": 0.5898299813270569, "eval_runtime": 276.9062, "eval_samples_per_second": 43.159, "eval_steps_per_second": 5.395, "step": 3360 }, { "epoch": 1.66, "learning_rate": 4.340476190476191e-06, "loss": 0.5011, "step": 3380 }, { "epoch": 1.66, "eval_loss": 0.5917273163795471, "eval_runtime": 276.8734, "eval_samples_per_second": 43.164, "eval_steps_per_second": 5.396, "step": 3380 }, { "epoch": 1.67, "learning_rate": 4.292857142857143e-06, "loss": 0.5419, "step": 3400 }, { "epoch": 1.67, "eval_loss": 0.6026594042778015, "eval_runtime": 276.8544, "eval_samples_per_second": 43.167, "eval_steps_per_second": 5.396, "step": 3400 }, { "epoch": 1.68, "learning_rate": 4.245238095238095e-06, "loss": 0.5425, "step": 3420 }, { "epoch": 1.68, "eval_loss": 0.5699217915534973, "eval_runtime": 276.8159, "eval_samples_per_second": 43.173, "eval_steps_per_second": 5.397, "step": 3420 }, { "epoch": 1.69, "learning_rate": 4.1976190476190485e-06, "loss": 0.5703, "step": 3440 }, { "epoch": 1.69, "eval_loss": 0.5897491574287415, "eval_runtime": 276.8228, "eval_samples_per_second": 43.172, "eval_steps_per_second": 5.397, "step": 3440 }, { "epoch": 1.7, "learning_rate": 4.15e-06, "loss": 0.4646, "step": 3460 }, { "epoch": 1.7, "eval_loss": 0.5916581153869629, "eval_runtime": 276.8047, "eval_samples_per_second": 43.175, "eval_steps_per_second": 5.397, "step": 3460 }, { "epoch": 1.71, "learning_rate": 4.1023809523809525e-06, "loss": 0.4652, "step": 3480 }, { "epoch": 1.71, "eval_loss": 0.5745313167572021, "eval_runtime": 276.8115, "eval_samples_per_second": 43.174, "eval_steps_per_second": 5.397, "step": 3480 }, { "epoch": 1.72, "learning_rate": 4.054761904761905e-06, "loss": 0.5323, "step": 3500 }, { "epoch": 1.72, "eval_loss": 0.5859553217887878, "eval_runtime": 276.8471, "eval_samples_per_second": 43.168, "eval_steps_per_second": 5.396, "step": 3500 }, { "epoch": 1.73, "learning_rate": 4.007142857142857e-06, "loss": 0.5129, "step": 3520 }, { "epoch": 1.73, "eval_loss": 0.5655719637870789, "eval_runtime": 276.8529, "eval_samples_per_second": 43.167, "eval_steps_per_second": 5.396, "step": 3520 }, { "epoch": 1.74, "learning_rate": 3.95952380952381e-06, "loss": 0.5441, "step": 3540 }, { "epoch": 1.74, "eval_loss": 0.5642224550247192, "eval_runtime": 276.8896, "eval_samples_per_second": 43.162, "eval_steps_per_second": 5.396, "step": 3540 }, { "epoch": 1.75, "learning_rate": 3.911904761904762e-06, "loss": 0.5624, "step": 3560 }, { "epoch": 1.75, "eval_loss": 0.5872688293457031, "eval_runtime": 276.8743, "eval_samples_per_second": 43.164, "eval_steps_per_second": 5.396, "step": 3560 }, { "epoch": 1.76, "learning_rate": 3.864285714285715e-06, "loss": 0.4645, "step": 3580 }, { "epoch": 1.76, "eval_loss": 0.5890788435935974, "eval_runtime": 276.8469, "eval_samples_per_second": 43.168, "eval_steps_per_second": 5.396, "step": 3580 }, { "epoch": 1.77, "learning_rate": 3.816666666666667e-06, "loss": 0.5577, "step": 3600 }, { "epoch": 1.77, "eval_loss": 0.5816096663475037, "eval_runtime": 276.8584, "eval_samples_per_second": 43.166, "eval_steps_per_second": 5.396, "step": 3600 }, { "epoch": 1.78, "learning_rate": 3.7690476190476192e-06, "loss": 0.5199, "step": 3620 }, { "epoch": 1.78, "eval_loss": 0.5579236745834351, "eval_runtime": 276.8666, "eval_samples_per_second": 43.165, "eval_steps_per_second": 5.396, "step": 3620 }, { "epoch": 1.79, "learning_rate": 3.721428571428572e-06, "loss": 0.5061, "step": 3640 }, { "epoch": 1.79, "eval_loss": 0.5837463140487671, "eval_runtime": 276.8995, "eval_samples_per_second": 43.16, "eval_steps_per_second": 5.395, "step": 3640 }, { "epoch": 1.79, "learning_rate": 3.673809523809524e-06, "loss": 0.484, "step": 3660 }, { "epoch": 1.79, "eval_loss": 0.5721494555473328, "eval_runtime": 277.0285, "eval_samples_per_second": 43.14, "eval_steps_per_second": 5.393, "step": 3660 }, { "epoch": 1.8, "learning_rate": 3.6261904761904764e-06, "loss": 0.5095, "step": 3680 }, { "epoch": 1.8, "eval_loss": 0.5820609927177429, "eval_runtime": 277.1157, "eval_samples_per_second": 43.126, "eval_steps_per_second": 5.391, "step": 3680 }, { "epoch": 1.81, "learning_rate": 3.5785714285714292e-06, "loss": 0.5342, "step": 3700 }, { "epoch": 1.81, "eval_loss": 0.5602211356163025, "eval_runtime": 277.1392, "eval_samples_per_second": 43.123, "eval_steps_per_second": 5.391, "step": 3700 }, { "epoch": 1.82, "learning_rate": 3.530952380952381e-06, "loss": 0.5435, "step": 3720 }, { "epoch": 1.82, "eval_loss": 0.5910717248916626, "eval_runtime": 277.1237, "eval_samples_per_second": 43.125, "eval_steps_per_second": 5.391, "step": 3720 }, { "epoch": 1.83, "learning_rate": 3.4833333333333336e-06, "loss": 0.5288, "step": 3740 }, { "epoch": 1.83, "eval_loss": 0.5647350549697876, "eval_runtime": 277.1519, "eval_samples_per_second": 43.121, "eval_steps_per_second": 5.391, "step": 3740 }, { "epoch": 1.84, "learning_rate": 3.435714285714286e-06, "loss": 0.5476, "step": 3760 }, { "epoch": 1.84, "eval_loss": 0.5733036398887634, "eval_runtime": 277.012, "eval_samples_per_second": 43.143, "eval_steps_per_second": 5.393, "step": 3760 }, { "epoch": 1.85, "learning_rate": 3.388095238095238e-06, "loss": 0.5199, "step": 3780 }, { "epoch": 1.85, "eval_loss": 0.5674840807914734, "eval_runtime": 276.9911, "eval_samples_per_second": 43.146, "eval_steps_per_second": 5.394, "step": 3780 }, { "epoch": 1.86, "learning_rate": 3.3404761904761908e-06, "loss": 0.5067, "step": 3800 }, { "epoch": 1.86, "eval_loss": 0.5838811993598938, "eval_runtime": 276.9655, "eval_samples_per_second": 43.15, "eval_steps_per_second": 5.394, "step": 3800 }, { "epoch": 1.87, "learning_rate": 3.292857142857143e-06, "loss": 0.5418, "step": 3820 }, { "epoch": 1.87, "eval_loss": 0.5757073163986206, "eval_runtime": 277.0012, "eval_samples_per_second": 43.144, "eval_steps_per_second": 5.393, "step": 3820 }, { "epoch": 1.88, "learning_rate": 3.2452380952380955e-06, "loss": 0.4965, "step": 3840 }, { "epoch": 1.88, "eval_loss": 0.5763747692108154, "eval_runtime": 276.9993, "eval_samples_per_second": 43.145, "eval_steps_per_second": 5.394, "step": 3840 }, { "epoch": 1.89, "learning_rate": 3.197619047619048e-06, "loss": 0.5273, "step": 3860 }, { "epoch": 1.89, "eval_loss": 0.5905867218971252, "eval_runtime": 276.9687, "eval_samples_per_second": 43.149, "eval_steps_per_second": 5.394, "step": 3860 }, { "epoch": 1.9, "learning_rate": 3.1500000000000003e-06, "loss": 0.5808, "step": 3880 }, { "epoch": 1.9, "eval_loss": 0.5761615633964539, "eval_runtime": 276.9979, "eval_samples_per_second": 43.145, "eval_steps_per_second": 5.394, "step": 3880 }, { "epoch": 1.91, "learning_rate": 3.1023809523809527e-06, "loss": 0.5161, "step": 3900 }, { "epoch": 1.91, "eval_loss": 0.5611954927444458, "eval_runtime": 276.9358, "eval_samples_per_second": 43.154, "eval_steps_per_second": 5.395, "step": 3900 }, { "epoch": 1.92, "learning_rate": 3.0547619047619047e-06, "loss": 0.4863, "step": 3920 }, { "epoch": 1.92, "eval_loss": 0.5804067254066467, "eval_runtime": 276.9544, "eval_samples_per_second": 43.152, "eval_steps_per_second": 5.394, "step": 3920 }, { "epoch": 1.93, "learning_rate": 3.0071428571428575e-06, "loss": 0.4827, "step": 3940 }, { "epoch": 1.93, "eval_loss": 0.584104597568512, "eval_runtime": 276.9708, "eval_samples_per_second": 43.149, "eval_steps_per_second": 5.394, "step": 3940 }, { "epoch": 1.94, "learning_rate": 2.95952380952381e-06, "loss": 0.4643, "step": 3960 }, { "epoch": 1.94, "eval_loss": 0.5822347402572632, "eval_runtime": 276.9742, "eval_samples_per_second": 43.148, "eval_steps_per_second": 5.394, "step": 3960 }, { "epoch": 1.95, "learning_rate": 2.911904761904762e-06, "loss": 0.5029, "step": 3980 }, { "epoch": 1.95, "eval_loss": 0.6052400469779968, "eval_runtime": 276.9537, "eval_samples_per_second": 43.152, "eval_steps_per_second": 5.394, "step": 3980 }, { "epoch": 1.96, "learning_rate": 2.8642857142857143e-06, "loss": 0.509, "step": 4000 }, { "epoch": 1.96, "eval_loss": 0.5799996852874756, "eval_runtime": 276.9867, "eval_samples_per_second": 43.146, "eval_steps_per_second": 5.394, "step": 4000 }, { "epoch": 1.97, "learning_rate": 2.816666666666667e-06, "loss": 0.5382, "step": 4020 }, { "epoch": 1.97, "eval_loss": 0.5645180940628052, "eval_runtime": 277.0026, "eval_samples_per_second": 43.144, "eval_steps_per_second": 5.393, "step": 4020 }, { "epoch": 1.98, "learning_rate": 2.7690476190476195e-06, "loss": 0.469, "step": 4040 }, { "epoch": 1.98, "eval_loss": 0.5685124397277832, "eval_runtime": 276.9786, "eval_samples_per_second": 43.148, "eval_steps_per_second": 5.394, "step": 4040 }, { "epoch": 1.99, "learning_rate": 2.7214285714285714e-06, "loss": 0.5032, "step": 4060 }, { "epoch": 1.99, "eval_loss": 0.5778502225875854, "eval_runtime": 277.0026, "eval_samples_per_second": 43.144, "eval_steps_per_second": 5.393, "step": 4060 }, { "epoch": 2.0, "learning_rate": 2.6738095238095243e-06, "loss": 0.5171, "step": 4080 }, { "epoch": 2.0, "eval_loss": 0.5685559511184692, "eval_runtime": 277.0003, "eval_samples_per_second": 43.144, "eval_steps_per_second": 5.393, "step": 4080 }, { "epoch": 2.01, "learning_rate": 2.6261904761904767e-06, "loss": 0.3938, "step": 4100 }, { "epoch": 2.01, "eval_loss": 0.5889346599578857, "eval_runtime": 277.0343, "eval_samples_per_second": 43.139, "eval_steps_per_second": 5.393, "step": 4100 }, { "epoch": 2.02, "learning_rate": 2.5785714285714286e-06, "loss": 0.4321, "step": 4120 }, { "epoch": 2.02, "eval_loss": 0.6039115190505981, "eval_runtime": 276.9943, "eval_samples_per_second": 43.145, "eval_steps_per_second": 5.394, "step": 4120 }, { "epoch": 2.03, "learning_rate": 2.530952380952381e-06, "loss": 0.4185, "step": 4140 }, { "epoch": 2.03, "eval_loss": 0.599577009677887, "eval_runtime": 277.0077, "eval_samples_per_second": 43.143, "eval_steps_per_second": 5.393, "step": 4140 }, { "epoch": 2.04, "learning_rate": 2.4833333333333334e-06, "loss": 0.4782, "step": 4160 }, { "epoch": 2.04, "eval_loss": 0.580022931098938, "eval_runtime": 277.0163, "eval_samples_per_second": 43.142, "eval_steps_per_second": 5.393, "step": 4160 }, { "epoch": 2.05, "learning_rate": 2.435714285714286e-06, "loss": 0.424, "step": 4180 }, { "epoch": 2.05, "eval_loss": 0.6373934745788574, "eval_runtime": 277.0011, "eval_samples_per_second": 43.144, "eval_steps_per_second": 5.393, "step": 4180 }, { "epoch": 2.06, "learning_rate": 2.388095238095238e-06, "loss": 0.3766, "step": 4200 }, { "epoch": 2.06, "eval_loss": 0.6096173524856567, "eval_runtime": 276.9796, "eval_samples_per_second": 43.148, "eval_steps_per_second": 5.394, "step": 4200 }, { "epoch": 2.07, "learning_rate": 2.3404761904761906e-06, "loss": 0.415, "step": 4220 }, { "epoch": 2.07, "eval_loss": 0.6220654249191284, "eval_runtime": 277.0595, "eval_samples_per_second": 43.135, "eval_steps_per_second": 5.392, "step": 4220 }, { "epoch": 2.08, "learning_rate": 2.292857142857143e-06, "loss": 0.4352, "step": 4240 }, { "epoch": 2.08, "eval_loss": 0.615013599395752, "eval_runtime": 277.0601, "eval_samples_per_second": 43.135, "eval_steps_per_second": 5.392, "step": 4240 }, { "epoch": 2.09, "learning_rate": 2.2452380952380954e-06, "loss": 0.4336, "step": 4260 }, { "epoch": 2.09, "eval_loss": 0.6055351495742798, "eval_runtime": 277.0825, "eval_samples_per_second": 43.132, "eval_steps_per_second": 5.392, "step": 4260 }, { "epoch": 2.1, "learning_rate": 2.1976190476190478e-06, "loss": 0.4289, "step": 4280 }, { "epoch": 2.1, "eval_loss": 0.6138429641723633, "eval_runtime": 277.0898, "eval_samples_per_second": 43.13, "eval_steps_per_second": 5.392, "step": 4280 }, { "epoch": 2.11, "learning_rate": 2.15e-06, "loss": 0.4433, "step": 4300 }, { "epoch": 2.11, "eval_loss": 0.5946049094200134, "eval_runtime": 277.0747, "eval_samples_per_second": 43.133, "eval_steps_per_second": 5.392, "step": 4300 }, { "epoch": 2.12, "learning_rate": 2.1023809523809526e-06, "loss": 0.4478, "step": 4320 }, { "epoch": 2.12, "eval_loss": 0.611806333065033, "eval_runtime": 277.0164, "eval_samples_per_second": 43.142, "eval_steps_per_second": 5.393, "step": 4320 }, { "epoch": 2.13, "learning_rate": 2.054761904761905e-06, "loss": 0.4787, "step": 4340 }, { "epoch": 2.13, "eval_loss": 0.5969259738922119, "eval_runtime": 277.0423, "eval_samples_per_second": 43.138, "eval_steps_per_second": 5.393, "step": 4340 }, { "epoch": 2.14, "learning_rate": 2.0071428571428573e-06, "loss": 0.4432, "step": 4360 }, { "epoch": 2.14, "eval_loss": 0.6047642230987549, "eval_runtime": 277.0355, "eval_samples_per_second": 43.139, "eval_steps_per_second": 5.393, "step": 4360 }, { "epoch": 2.15, "learning_rate": 1.9595238095238097e-06, "loss": 0.4319, "step": 4380 }, { "epoch": 2.15, "eval_loss": 0.5948361158370972, "eval_runtime": 277.1353, "eval_samples_per_second": 43.123, "eval_steps_per_second": 5.391, "step": 4380 }, { "epoch": 2.16, "learning_rate": 1.911904761904762e-06, "loss": 0.3939, "step": 4400 }, { "epoch": 2.16, "eval_loss": 0.6115566492080688, "eval_runtime": 277.1102, "eval_samples_per_second": 43.127, "eval_steps_per_second": 5.391, "step": 4400 }, { "epoch": 2.17, "learning_rate": 1.8642857142857143e-06, "loss": 0.3921, "step": 4420 }, { "epoch": 2.17, "eval_loss": 0.608245849609375, "eval_runtime": 277.1416, "eval_samples_per_second": 43.122, "eval_steps_per_second": 5.391, "step": 4420 }, { "epoch": 2.18, "learning_rate": 1.816666666666667e-06, "loss": 0.4381, "step": 4440 }, { "epoch": 2.18, "eval_loss": 0.6282362937927246, "eval_runtime": 277.0787, "eval_samples_per_second": 43.132, "eval_steps_per_second": 5.392, "step": 4440 }, { "epoch": 2.19, "learning_rate": 1.769047619047619e-06, "loss": 0.4461, "step": 4460 }, { "epoch": 2.19, "eval_loss": 0.6083888411521912, "eval_runtime": 277.0933, "eval_samples_per_second": 43.13, "eval_steps_per_second": 5.392, "step": 4460 }, { "epoch": 2.2, "learning_rate": 1.7214285714285717e-06, "loss": 0.4012, "step": 4480 }, { "epoch": 2.2, "eval_loss": 0.6091529726982117, "eval_runtime": 277.0771, "eval_samples_per_second": 43.132, "eval_steps_per_second": 5.392, "step": 4480 }, { "epoch": 2.21, "learning_rate": 1.6738095238095239e-06, "loss": 0.3849, "step": 4500 }, { "epoch": 2.21, "eval_loss": 0.6152328848838806, "eval_runtime": 277.2346, "eval_samples_per_second": 43.108, "eval_steps_per_second": 5.389, "step": 4500 }, { "epoch": 2.22, "learning_rate": 1.6261904761904763e-06, "loss": 0.4178, "step": 4520 }, { "epoch": 2.22, "eval_loss": 0.6003779172897339, "eval_runtime": 277.2625, "eval_samples_per_second": 43.104, "eval_steps_per_second": 5.388, "step": 4520 }, { "epoch": 2.23, "learning_rate": 1.5785714285714287e-06, "loss": 0.4163, "step": 4540 }, { "epoch": 2.23, "eval_loss": 0.6059258580207825, "eval_runtime": 277.3365, "eval_samples_per_second": 43.092, "eval_steps_per_second": 5.387, "step": 4540 }, { "epoch": 2.24, "learning_rate": 1.530952380952381e-06, "loss": 0.4006, "step": 4560 }, { "epoch": 2.24, "eval_loss": 0.6115380525588989, "eval_runtime": 277.2201, "eval_samples_per_second": 43.11, "eval_steps_per_second": 5.389, "step": 4560 }, { "epoch": 2.25, "learning_rate": 1.4833333333333337e-06, "loss": 0.4225, "step": 4580 }, { "epoch": 2.25, "eval_loss": 0.6130145192146301, "eval_runtime": 277.4178, "eval_samples_per_second": 43.079, "eval_steps_per_second": 5.385, "step": 4580 }, { "epoch": 2.26, "learning_rate": 1.4357142857142859e-06, "loss": 0.4008, "step": 4600 }, { "epoch": 2.26, "eval_loss": 0.6094552278518677, "eval_runtime": 277.5161, "eval_samples_per_second": 43.064, "eval_steps_per_second": 5.383, "step": 4600 }, { "epoch": 2.27, "learning_rate": 1.388095238095238e-06, "loss": 0.4706, "step": 4620 }, { "epoch": 2.27, "eval_loss": 0.6135911345481873, "eval_runtime": 277.6184, "eval_samples_per_second": 43.048, "eval_steps_per_second": 5.381, "step": 4620 }, { "epoch": 2.28, "learning_rate": 1.3404761904761906e-06, "loss": 0.3902, "step": 4640 }, { "epoch": 2.28, "eval_loss": 0.6103312373161316, "eval_runtime": 277.532, "eval_samples_per_second": 43.062, "eval_steps_per_second": 5.383, "step": 4640 }, { "epoch": 2.29, "learning_rate": 1.2928571428571428e-06, "loss": 0.4048, "step": 4660 }, { "epoch": 2.29, "eval_loss": 0.608475923538208, "eval_runtime": 277.3473, "eval_samples_per_second": 43.09, "eval_steps_per_second": 5.387, "step": 4660 }, { "epoch": 2.3, "learning_rate": 1.2452380952380954e-06, "loss": 0.4411, "step": 4680 }, { "epoch": 2.3, "eval_loss": 0.6138780117034912, "eval_runtime": 277.4196, "eval_samples_per_second": 43.079, "eval_steps_per_second": 5.385, "step": 4680 }, { "epoch": 2.31, "learning_rate": 1.1976190476190478e-06, "loss": 0.403, "step": 4700 }, { "epoch": 2.31, "eval_loss": 0.6047297120094299, "eval_runtime": 277.4316, "eval_samples_per_second": 43.077, "eval_steps_per_second": 5.385, "step": 4700 }, { "epoch": 2.31, "learning_rate": 1.1500000000000002e-06, "loss": 0.4799, "step": 4720 }, { "epoch": 2.31, "eval_loss": 0.6043194532394409, "eval_runtime": 277.378, "eval_samples_per_second": 43.086, "eval_steps_per_second": 5.386, "step": 4720 }, { "epoch": 2.32, "learning_rate": 1.1023809523809524e-06, "loss": 0.4316, "step": 4740 }, { "epoch": 2.32, "eval_loss": 0.5959681868553162, "eval_runtime": 277.4868, "eval_samples_per_second": 43.069, "eval_steps_per_second": 5.384, "step": 4740 }, { "epoch": 2.33, "learning_rate": 1.0547619047619048e-06, "loss": 0.4198, "step": 4760 }, { "epoch": 2.33, "eval_loss": 0.6030734181404114, "eval_runtime": 277.3901, "eval_samples_per_second": 43.084, "eval_steps_per_second": 5.386, "step": 4760 }, { "epoch": 2.34, "learning_rate": 1.0071428571428572e-06, "loss": 0.4254, "step": 4780 }, { "epoch": 2.34, "eval_loss": 0.60329669713974, "eval_runtime": 277.3302, "eval_samples_per_second": 43.093, "eval_steps_per_second": 5.387, "step": 4780 }, { "epoch": 2.35, "learning_rate": 9.595238095238096e-07, "loss": 0.387, "step": 4800 }, { "epoch": 2.35, "eval_loss": 0.611955463886261, "eval_runtime": 277.4816, "eval_samples_per_second": 43.07, "eval_steps_per_second": 5.384, "step": 4800 }, { "epoch": 2.36, "learning_rate": 9.119047619047621e-07, "loss": 0.3882, "step": 4820 }, { "epoch": 2.36, "eval_loss": 0.612755298614502, "eval_runtime": 277.4062, "eval_samples_per_second": 43.081, "eval_steps_per_second": 5.386, "step": 4820 }, { "epoch": 2.37, "learning_rate": 8.642857142857144e-07, "loss": 0.4307, "step": 4840 }, { "epoch": 2.37, "eval_loss": 0.6149932742118835, "eval_runtime": 277.4451, "eval_samples_per_second": 43.075, "eval_steps_per_second": 5.385, "step": 4840 }, { "epoch": 2.38, "learning_rate": 8.166666666666668e-07, "loss": 0.434, "step": 4860 }, { "epoch": 2.38, "eval_loss": 0.6077226400375366, "eval_runtime": 277.2986, "eval_samples_per_second": 43.098, "eval_steps_per_second": 5.388, "step": 4860 }, { "epoch": 2.39, "learning_rate": 7.690476190476191e-07, "loss": 0.4225, "step": 4880 }, { "epoch": 2.39, "eval_loss": 0.6070570945739746, "eval_runtime": 277.4566, "eval_samples_per_second": 43.073, "eval_steps_per_second": 5.385, "step": 4880 }, { "epoch": 2.4, "learning_rate": 7.214285714285715e-07, "loss": 0.4134, "step": 4900 }, { "epoch": 2.4, "eval_loss": 0.6035702228546143, "eval_runtime": 277.4001, "eval_samples_per_second": 43.082, "eval_steps_per_second": 5.386, "step": 4900 }, { "epoch": 2.41, "learning_rate": 6.738095238095238e-07, "loss": 0.3846, "step": 4920 }, { "epoch": 2.41, "eval_loss": 0.612420380115509, "eval_runtime": 277.4205, "eval_samples_per_second": 43.079, "eval_steps_per_second": 5.385, "step": 4920 }, { "epoch": 2.42, "learning_rate": 6.261904761904762e-07, "loss": 0.3943, "step": 4940 }, { "epoch": 2.42, "eval_loss": 0.6291103959083557, "eval_runtime": 277.2666, "eval_samples_per_second": 43.103, "eval_steps_per_second": 5.388, "step": 4940 }, { "epoch": 2.43, "learning_rate": 5.785714285714286e-07, "loss": 0.4455, "step": 4960 }, { "epoch": 2.43, "eval_loss": 0.6184937953948975, "eval_runtime": 277.2863, "eval_samples_per_second": 43.1, "eval_steps_per_second": 5.388, "step": 4960 }, { "epoch": 2.44, "learning_rate": 5.30952380952381e-07, "loss": 0.4104, "step": 4980 }, { "epoch": 2.44, "eval_loss": 0.6063624620437622, "eval_runtime": 277.3406, "eval_samples_per_second": 43.091, "eval_steps_per_second": 5.387, "step": 4980 }, { "epoch": 2.45, "learning_rate": 4.833333333333334e-07, "loss": 0.4158, "step": 5000 }, { "epoch": 2.45, "eval_loss": 0.6095247268676758, "eval_runtime": 277.4398, "eval_samples_per_second": 43.076, "eval_steps_per_second": 5.385, "step": 5000 }, { "epoch": 2.46, "learning_rate": 4.357142857142858e-07, "loss": 0.4135, "step": 5020 }, { "epoch": 2.46, "eval_loss": 0.6154703497886658, "eval_runtime": 277.3849, "eval_samples_per_second": 43.085, "eval_steps_per_second": 5.386, "step": 5020 }, { "epoch": 2.47, "learning_rate": 3.8809523809523813e-07, "loss": 0.3789, "step": 5040 }, { "epoch": 2.47, "eval_loss": 0.6208740472793579, "eval_runtime": 277.33, "eval_samples_per_second": 43.093, "eval_steps_per_second": 5.387, "step": 5040 }, { "epoch": 2.48, "learning_rate": 3.404761904761905e-07, "loss": 0.418, "step": 5060 }, { "epoch": 2.48, "eval_loss": 0.6106104850769043, "eval_runtime": 277.377, "eval_samples_per_second": 43.086, "eval_steps_per_second": 5.386, "step": 5060 }, { "epoch": 2.49, "learning_rate": 2.9285714285714287e-07, "loss": 0.3931, "step": 5080 }, { "epoch": 2.49, "eval_loss": 0.604749858379364, "eval_runtime": 277.377, "eval_samples_per_second": 43.086, "eval_steps_per_second": 5.386, "step": 5080 }, { "epoch": 2.5, "learning_rate": 2.4523809523809526e-07, "loss": 0.4289, "step": 5100 }, { "epoch": 2.5, "eval_loss": 0.6055382490158081, "eval_runtime": 277.4628, "eval_samples_per_second": 43.072, "eval_steps_per_second": 5.385, "step": 5100 }, { "epoch": 2.51, "learning_rate": 1.9761904761904763e-07, "loss": 0.4051, "step": 5120 }, { "epoch": 2.51, "eval_loss": 0.6083624958992004, "eval_runtime": 277.3533, "eval_samples_per_second": 43.089, "eval_steps_per_second": 5.387, "step": 5120 }, { "epoch": 2.52, "learning_rate": 1.5000000000000002e-07, "loss": 0.4217, "step": 5140 }, { "epoch": 2.52, "eval_loss": 0.611778736114502, "eval_runtime": 277.4092, "eval_samples_per_second": 43.081, "eval_steps_per_second": 5.386, "step": 5140 }, { "epoch": 2.53, "learning_rate": 1.023809523809524e-07, "loss": 0.3843, "step": 5160 }, { "epoch": 2.53, "eval_loss": 0.613944411277771, "eval_runtime": 277.3668, "eval_samples_per_second": 43.087, "eval_steps_per_second": 5.386, "step": 5160 }, { "epoch": 2.54, "learning_rate": 5.4761904761904766e-08, "loss": 0.4435, "step": 5180 }, { "epoch": 2.54, "eval_loss": 0.6125811338424683, "eval_runtime": 277.4188, "eval_samples_per_second": 43.079, "eval_steps_per_second": 5.385, "step": 5180 }, { "epoch": 2.55, "learning_rate": 7.142857142857144e-09, "loss": 0.4274, "step": 5200 }, { "epoch": 2.55, "eval_loss": 0.6120193600654602, "eval_runtime": 277.4157, "eval_samples_per_second": 43.08, "eval_steps_per_second": 5.385, "step": 5200 }, { "epoch": 2.55, "step": 5200, "total_flos": 3.09073923350188e+17, "train_loss": 0.820626282783655, "train_runtime": 52836.9017, "train_samples_per_second": 6.299, "train_steps_per_second": 0.098 } ], "logging_steps": 20, "max_steps": 5200, "num_train_epochs": 3, "save_steps": 20, "total_flos": 3.09073923350188e+17, "trial_name": null, "trial_params": null }